xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (revision 22d26ae3040095c7bfe4e2f1678b9738bf81fd4a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFH
4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFH
6; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
7; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFHMIN
8; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
9; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFHMIN
10
11; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
12; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFH
13; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
14; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFH
15; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
16; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFHMIN
17; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
18; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFHMIN
19
20declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>)
21
22define <1 x i8> @mgather_v1i8(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i8> %passthru) {
23; RV32V-LABEL: mgather_v1i8:
24; RV32V:       # %bb.0:
25; RV32V-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
26; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
27; RV32V-NEXT:    vmv1r.v v8, v9
28; RV32V-NEXT:    ret
29;
30; RV64V-LABEL: mgather_v1i8:
31; RV64V:       # %bb.0:
32; RV64V-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
33; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
34; RV64V-NEXT:    vmv1r.v v8, v9
35; RV64V-NEXT:    ret
36;
37; RV32ZVE32F-LABEL: mgather_v1i8:
38; RV32ZVE32F:       # %bb.0:
39; RV32ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, mu
40; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
41; RV32ZVE32F-NEXT:    vmv1r.v v8, v9
42; RV32ZVE32F-NEXT:    ret
43;
44; RV64ZVE32F-LABEL: mgather_v1i8:
45; RV64ZVE32F:       # %bb.0:
46; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
47; RV64ZVE32F-NEXT:    vfirst.m a1, v0
48; RV64ZVE32F-NEXT:    bnez a1, .LBB0_2
49; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
50; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
51; RV64ZVE32F-NEXT:    vle8.v v8, (a0)
52; RV64ZVE32F-NEXT:  .LBB0_2: # %else
53; RV64ZVE32F-NEXT:    ret
54  %v = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> %ptrs, i32 1, <1 x i1> %m, <1 x i8> %passthru)
55  ret <1 x i8> %v
56}
57
58declare <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i8>)
59
60define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
61; RV32V-LABEL: mgather_v2i8:
62; RV32V:       # %bb.0:
63; RV32V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
64; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
65; RV32V-NEXT:    vmv1r.v v8, v9
66; RV32V-NEXT:    ret
67;
68; RV64V-LABEL: mgather_v2i8:
69; RV64V:       # %bb.0:
70; RV64V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
71; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
72; RV64V-NEXT:    vmv1r.v v8, v9
73; RV64V-NEXT:    ret
74;
75; RV32ZVE32F-LABEL: mgather_v2i8:
76; RV32ZVE32F:       # %bb.0:
77; RV32ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
78; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
79; RV32ZVE32F-NEXT:    vmv1r.v v8, v9
80; RV32ZVE32F-NEXT:    ret
81;
82; RV64ZVE32F-LABEL: mgather_v2i8:
83; RV64ZVE32F:       # %bb.0:
84; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
85; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
86; RV64ZVE32F-NEXT:    andi a3, a2, 1
87; RV64ZVE32F-NEXT:    bnez a3, .LBB1_3
88; RV64ZVE32F-NEXT:  # %bb.1: # %else
89; RV64ZVE32F-NEXT:    andi a2, a2, 2
90; RV64ZVE32F-NEXT:    bnez a2, .LBB1_4
91; RV64ZVE32F-NEXT:  .LBB1_2: # %else2
92; RV64ZVE32F-NEXT:    ret
93; RV64ZVE32F-NEXT:  .LBB1_3: # %cond.load
94; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
95; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
96; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
97; RV64ZVE32F-NEXT:    andi a2, a2, 2
98; RV64ZVE32F-NEXT:    beqz a2, .LBB1_2
99; RV64ZVE32F-NEXT:  .LBB1_4: # %cond.load1
100; RV64ZVE32F-NEXT:    lbu a0, 0(a1)
101; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
102; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
103; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
104; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
105; RV64ZVE32F-NEXT:    ret
106  %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
107  ret <2 x i8> %v
108}
109
110define <2 x i16> @mgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
111; RV32V-LABEL: mgather_v2i8_sextload_v2i16:
112; RV32V:       # %bb.0:
113; RV32V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
114; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
115; RV32V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
116; RV32V-NEXT:    vsext.vf2 v8, v9
117; RV32V-NEXT:    ret
118;
119; RV64V-LABEL: mgather_v2i8_sextload_v2i16:
120; RV64V:       # %bb.0:
121; RV64V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
122; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
123; RV64V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
124; RV64V-NEXT:    vsext.vf2 v8, v9
125; RV64V-NEXT:    ret
126;
127; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i16:
128; RV32ZVE32F:       # %bb.0:
129; RV32ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
130; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
131; RV32ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
132; RV32ZVE32F-NEXT:    vsext.vf2 v8, v9
133; RV32ZVE32F-NEXT:    ret
134;
135; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i16:
136; RV64ZVE32F:       # %bb.0:
137; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
138; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
139; RV64ZVE32F-NEXT:    andi a3, a2, 1
140; RV64ZVE32F-NEXT:    beqz a3, .LBB2_2
141; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
142; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
143; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
144; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
145; RV64ZVE32F-NEXT:  .LBB2_2: # %else
146; RV64ZVE32F-NEXT:    andi a2, a2, 2
147; RV64ZVE32F-NEXT:    beqz a2, .LBB2_4
148; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
149; RV64ZVE32F-NEXT:    lbu a0, 0(a1)
150; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
151; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
152; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
153; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
154; RV64ZVE32F-NEXT:  .LBB2_4: # %else2
155; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
156; RV64ZVE32F-NEXT:    vsext.vf2 v9, v8
157; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
158; RV64ZVE32F-NEXT:    ret
159  %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
160  %ev = sext <2 x i8> %v to <2 x i16>
161  ret <2 x i16> %ev
162}
163
164define <2 x i16> @mgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
165; RV32V-LABEL: mgather_v2i8_zextload_v2i16:
166; RV32V:       # %bb.0:
167; RV32V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
168; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
169; RV32V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
170; RV32V-NEXT:    vzext.vf2 v8, v9
171; RV32V-NEXT:    ret
172;
173; RV64V-LABEL: mgather_v2i8_zextload_v2i16:
174; RV64V:       # %bb.0:
175; RV64V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
176; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
177; RV64V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
178; RV64V-NEXT:    vzext.vf2 v8, v9
179; RV64V-NEXT:    ret
180;
181; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i16:
182; RV32ZVE32F:       # %bb.0:
183; RV32ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
184; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
185; RV32ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
186; RV32ZVE32F-NEXT:    vzext.vf2 v8, v9
187; RV32ZVE32F-NEXT:    ret
188;
189; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i16:
190; RV64ZVE32F:       # %bb.0:
191; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
192; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
193; RV64ZVE32F-NEXT:    andi a3, a2, 1
194; RV64ZVE32F-NEXT:    beqz a3, .LBB3_2
195; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
196; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
197; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
198; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
199; RV64ZVE32F-NEXT:  .LBB3_2: # %else
200; RV64ZVE32F-NEXT:    andi a2, a2, 2
201; RV64ZVE32F-NEXT:    beqz a2, .LBB3_4
202; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
203; RV64ZVE32F-NEXT:    lbu a0, 0(a1)
204; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
205; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
206; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
207; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
208; RV64ZVE32F-NEXT:  .LBB3_4: # %else2
209; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
210; RV64ZVE32F-NEXT:    vzext.vf2 v9, v8
211; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
212; RV64ZVE32F-NEXT:    ret
213  %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
214  %ev = zext <2 x i8> %v to <2 x i16>
215  ret <2 x i16> %ev
216}
217
218define <2 x i32> @mgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
219; RV32V-LABEL: mgather_v2i8_sextload_v2i32:
220; RV32V:       # %bb.0:
221; RV32V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
222; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
223; RV32V-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
224; RV32V-NEXT:    vsext.vf4 v8, v9
225; RV32V-NEXT:    ret
226;
227; RV64V-LABEL: mgather_v2i8_sextload_v2i32:
228; RV64V:       # %bb.0:
229; RV64V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
230; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
231; RV64V-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
232; RV64V-NEXT:    vsext.vf4 v8, v9
233; RV64V-NEXT:    ret
234;
235; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i32:
236; RV32ZVE32F:       # %bb.0:
237; RV32ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
238; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
239; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
240; RV32ZVE32F-NEXT:    vsext.vf4 v8, v9
241; RV32ZVE32F-NEXT:    ret
242;
243; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i32:
244; RV64ZVE32F:       # %bb.0:
245; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
246; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
247; RV64ZVE32F-NEXT:    andi a3, a2, 1
248; RV64ZVE32F-NEXT:    beqz a3, .LBB4_2
249; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
250; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
251; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
252; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
253; RV64ZVE32F-NEXT:  .LBB4_2: # %else
254; RV64ZVE32F-NEXT:    andi a2, a2, 2
255; RV64ZVE32F-NEXT:    beqz a2, .LBB4_4
256; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
257; RV64ZVE32F-NEXT:    lbu a0, 0(a1)
258; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
259; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
260; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
261; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
262; RV64ZVE32F-NEXT:  .LBB4_4: # %else2
263; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
264; RV64ZVE32F-NEXT:    vsext.vf4 v9, v8
265; RV64ZVE32F-NEXT:    vmv.v.v v8, v9
266; RV64ZVE32F-NEXT:    ret
267  %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
268  %ev = sext <2 x i8> %v to <2 x i32>
269  ret <2 x i32> %ev
270}
271
272define <2 x i32> @mgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
273; RV32V-LABEL: mgather_v2i8_zextload_v2i32:
274; RV32V:       # %bb.0:
275; RV32V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
276; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
277; RV32V-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
278; RV32V-NEXT:    vzext.vf4 v8, v9
279; RV32V-NEXT:    ret
280;
281; RV64V-LABEL: mgather_v2i8_zextload_v2i32:
282; RV64V:       # %bb.0:
283; RV64V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
284; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
285; RV64V-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
286; RV64V-NEXT:    vzext.vf4 v8, v9
287; RV64V-NEXT:    ret
288;
289; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i32:
290; RV32ZVE32F:       # %bb.0:
291; RV32ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
292; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
293; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
294; RV32ZVE32F-NEXT:    vzext.vf4 v8, v9
295; RV32ZVE32F-NEXT:    ret
296;
297; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i32:
298; RV64ZVE32F:       # %bb.0:
299; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
300; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
301; RV64ZVE32F-NEXT:    andi a3, a2, 1
302; RV64ZVE32F-NEXT:    beqz a3, .LBB5_2
303; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
304; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
305; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
306; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
307; RV64ZVE32F-NEXT:  .LBB5_2: # %else
308; RV64ZVE32F-NEXT:    andi a2, a2, 2
309; RV64ZVE32F-NEXT:    beqz a2, .LBB5_4
310; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
311; RV64ZVE32F-NEXT:    lbu a0, 0(a1)
312; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
313; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
314; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
315; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
316; RV64ZVE32F-NEXT:  .LBB5_4: # %else2
317; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
318; RV64ZVE32F-NEXT:    vzext.vf4 v9, v8
319; RV64ZVE32F-NEXT:    vmv.v.v v8, v9
320; RV64ZVE32F-NEXT:    ret
321  %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
322  %ev = zext <2 x i8> %v to <2 x i32>
323  ret <2 x i32> %ev
324}
325
326define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
327; RV32V-LABEL: mgather_v2i8_sextload_v2i64:
328; RV32V:       # %bb.0:
329; RV32V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
330; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
331; RV32V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
332; RV32V-NEXT:    vsext.vf8 v8, v9
333; RV32V-NEXT:    ret
334;
335; RV64V-LABEL: mgather_v2i8_sextload_v2i64:
336; RV64V:       # %bb.0:
337; RV64V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
338; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
339; RV64V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
340; RV64V-NEXT:    vsext.vf8 v8, v9
341; RV64V-NEXT:    ret
342;
343; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i64:
344; RV32ZVE32F:       # %bb.0:
345; RV32ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
346; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
347; RV32ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
348; RV32ZVE32F-NEXT:    vmv.x.s a1, v9
349; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
350; RV32ZVE32F-NEXT:    srai a3, a1, 31
351; RV32ZVE32F-NEXT:    srai a4, a2, 31
352; RV32ZVE32F-NEXT:    sw a1, 0(a0)
353; RV32ZVE32F-NEXT:    sw a3, 4(a0)
354; RV32ZVE32F-NEXT:    sw a2, 8(a0)
355; RV32ZVE32F-NEXT:    sw a4, 12(a0)
356; RV32ZVE32F-NEXT:    ret
357;
358; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i64:
359; RV64ZVE32F:       # %bb.0:
360; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
361; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
362; RV64ZVE32F-NEXT:    andi a3, a2, 1
363; RV64ZVE32F-NEXT:    beqz a3, .LBB6_2
364; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
365; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
366; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
367; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
368; RV64ZVE32F-NEXT:  .LBB6_2: # %else
369; RV64ZVE32F-NEXT:    andi a2, a2, 2
370; RV64ZVE32F-NEXT:    beqz a2, .LBB6_4
371; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
372; RV64ZVE32F-NEXT:    lbu a0, 0(a1)
373; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
374; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
375; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
376; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
377; RV64ZVE32F-NEXT:  .LBB6_4: # %else2
378; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
379; RV64ZVE32F-NEXT:    vmv.x.s a0, v8
380; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
381; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
382; RV64ZVE32F-NEXT:    ret
383  %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
384  %ev = sext <2 x i8> %v to <2 x i64>
385  ret <2 x i64> %ev
386}
387
388define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
389; RV32V-LABEL: mgather_v2i8_zextload_v2i64:
390; RV32V:       # %bb.0:
391; RV32V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
392; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
393; RV32V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
394; RV32V-NEXT:    vzext.vf8 v8, v9
395; RV32V-NEXT:    ret
396;
397; RV64V-LABEL: mgather_v2i8_zextload_v2i64:
398; RV64V:       # %bb.0:
399; RV64V-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
400; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
401; RV64V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
402; RV64V-NEXT:    vzext.vf8 v8, v9
403; RV64V-NEXT:    ret
404;
405; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
406; RV32ZVE32F:       # %bb.0:
407; RV32ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
408; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
409; RV32ZVE32F-NEXT:    sw zero, 12(a0)
410; RV32ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
411; RV32ZVE32F-NEXT:    vmv.x.s a1, v9
412; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
413; RV32ZVE32F-NEXT:    andi a1, a1, 255
414; RV32ZVE32F-NEXT:    andi a2, a2, 255
415; RV32ZVE32F-NEXT:    sw a1, 0(a0)
416; RV32ZVE32F-NEXT:    sw zero, 4(a0)
417; RV32ZVE32F-NEXT:    sw a2, 8(a0)
418; RV32ZVE32F-NEXT:    ret
419;
420; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
421; RV64ZVE32F:       # %bb.0:
422; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
423; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
424; RV64ZVE32F-NEXT:    andi a3, a2, 1
425; RV64ZVE32F-NEXT:    beqz a3, .LBB7_2
426; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
427; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
428; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
429; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
430; RV64ZVE32F-NEXT:  .LBB7_2: # %else
431; RV64ZVE32F-NEXT:    andi a2, a2, 2
432; RV64ZVE32F-NEXT:    beqz a2, .LBB7_4
433; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
434; RV64ZVE32F-NEXT:    lbu a0, 0(a1)
435; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
436; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
437; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
438; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
439; RV64ZVE32F-NEXT:  .LBB7_4: # %else2
440; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
441; RV64ZVE32F-NEXT:    vmv.x.s a0, v8
442; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
443; RV64ZVE32F-NEXT:    andi a0, a0, 255
444; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
445; RV64ZVE32F-NEXT:    andi a1, a1, 255
446; RV64ZVE32F-NEXT:    ret
447  %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
448  %ev = zext <2 x i8> %v to <2 x i64>
449  ret <2 x i64> %ev
450}
451
452declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
453
454define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) {
455; RV32-LABEL: mgather_v4i8:
456; RV32:       # %bb.0:
457; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
458; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
459; RV32-NEXT:    vmv1r.v v8, v9
460; RV32-NEXT:    ret
461;
462; RV64V-LABEL: mgather_v4i8:
463; RV64V:       # %bb.0:
464; RV64V-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
465; RV64V-NEXT:    vluxei64.v v10, (zero), v8, v0.t
466; RV64V-NEXT:    vmv1r.v v8, v10
467; RV64V-NEXT:    ret
468;
469; RV64ZVE32F-LABEL: mgather_v4i8:
470; RV64ZVE32F:       # %bb.0:
471; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
472; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
473; RV64ZVE32F-NEXT:    andi a2, a1, 1
474; RV64ZVE32F-NEXT:    bnez a2, .LBB8_5
475; RV64ZVE32F-NEXT:  # %bb.1: # %else
476; RV64ZVE32F-NEXT:    andi a2, a1, 2
477; RV64ZVE32F-NEXT:    bnez a2, .LBB8_6
478; RV64ZVE32F-NEXT:  .LBB8_2: # %else2
479; RV64ZVE32F-NEXT:    andi a2, a1, 4
480; RV64ZVE32F-NEXT:    bnez a2, .LBB8_7
481; RV64ZVE32F-NEXT:  .LBB8_3: # %else5
482; RV64ZVE32F-NEXT:    andi a1, a1, 8
483; RV64ZVE32F-NEXT:    bnez a1, .LBB8_8
484; RV64ZVE32F-NEXT:  .LBB8_4: # %else8
485; RV64ZVE32F-NEXT:    ret
486; RV64ZVE32F-NEXT:  .LBB8_5: # %cond.load
487; RV64ZVE32F-NEXT:    ld a2, 0(a0)
488; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
489; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
490; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
491; RV64ZVE32F-NEXT:    andi a2, a1, 2
492; RV64ZVE32F-NEXT:    beqz a2, .LBB8_2
493; RV64ZVE32F-NEXT:  .LBB8_6: # %cond.load1
494; RV64ZVE32F-NEXT:    ld a2, 8(a0)
495; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
496; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
497; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
498; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
499; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
500; RV64ZVE32F-NEXT:    andi a2, a1, 4
501; RV64ZVE32F-NEXT:    beqz a2, .LBB8_3
502; RV64ZVE32F-NEXT:  .LBB8_7: # %cond.load4
503; RV64ZVE32F-NEXT:    ld a2, 16(a0)
504; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
505; RV64ZVE32F-NEXT:    vsetivli zero, 3, e8, mf4, tu, ma
506; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
507; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
508; RV64ZVE32F-NEXT:    andi a1, a1, 8
509; RV64ZVE32F-NEXT:    beqz a1, .LBB8_4
510; RV64ZVE32F-NEXT:  .LBB8_8: # %cond.load7
511; RV64ZVE32F-NEXT:    ld a0, 24(a0)
512; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
513; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
514; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
515; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 3
516; RV64ZVE32F-NEXT:    ret
517  %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %m, <4 x i8> %passthru)
518  ret <4 x i8> %v
519}
520
521define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
522; RV32-LABEL: mgather_truemask_v4i8:
523; RV32:       # %bb.0:
524; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
525; RV32-NEXT:    vluxei32.v v9, (zero), v8
526; RV32-NEXT:    vmv1r.v v8, v9
527; RV32-NEXT:    ret
528;
529; RV64V-LABEL: mgather_truemask_v4i8:
530; RV64V:       # %bb.0:
531; RV64V-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
532; RV64V-NEXT:    vluxei64.v v10, (zero), v8
533; RV64V-NEXT:    vmv1r.v v8, v10
534; RV64V-NEXT:    ret
535;
536; RV64ZVE32F-LABEL: mgather_truemask_v4i8:
537; RV64ZVE32F:       # %bb.0:
538; RV64ZVE32F-NEXT:    ld a1, 0(a0)
539; RV64ZVE32F-NEXT:    ld a2, 8(a0)
540; RV64ZVE32F-NEXT:    ld a3, 16(a0)
541; RV64ZVE32F-NEXT:    ld a0, 24(a0)
542; RV64ZVE32F-NEXT:    lbu a1, 0(a1)
543; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
544; RV64ZVE32F-NEXT:    lbu a3, 0(a3)
545; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
546; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
547; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
548; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
549; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
550; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
551; RV64ZVE32F-NEXT:    ret
552  %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1), <4 x i8> %passthru)
553  ret <4 x i8> %v
554}
555
556define <4 x i8> @mgather_falsemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
557; RV32-LABEL: mgather_falsemask_v4i8:
558; RV32:       # %bb.0:
559; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
560; RV32-NEXT:    vmv1r.v v8, v9
561; RV32-NEXT:    ret
562;
563; RV64V-LABEL: mgather_falsemask_v4i8:
564; RV64V:       # %bb.0:
565; RV64V-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
566; RV64V-NEXT:    vmv1r.v v8, v10
567; RV64V-NEXT:    ret
568;
569; RV64ZVE32F-LABEL: mgather_falsemask_v4i8:
570; RV64ZVE32F:       # %bb.0:
571; RV64ZVE32F-NEXT:    ret
572  %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> zeroinitializer, <4 x i8> %passthru)
573  ret <4 x i8> %v
574}
575
576declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
577
578define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) {
579; RV32-LABEL: mgather_v8i8:
580; RV32:       # %bb.0:
581; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
582; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
583; RV32-NEXT:    vmv1r.v v8, v10
584; RV32-NEXT:    ret
585;
586; RV64V-LABEL: mgather_v8i8:
587; RV64V:       # %bb.0:
588; RV64V-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
589; RV64V-NEXT:    vluxei64.v v12, (zero), v8, v0.t
590; RV64V-NEXT:    vmv1r.v v8, v12
591; RV64V-NEXT:    ret
592;
593; RV64ZVE32F-LABEL: mgather_v8i8:
594; RV64ZVE32F:       # %bb.0:
595; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
596; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
597; RV64ZVE32F-NEXT:    andi a2, a1, 1
598; RV64ZVE32F-NEXT:    bnez a2, .LBB11_9
599; RV64ZVE32F-NEXT:  # %bb.1: # %else
600; RV64ZVE32F-NEXT:    andi a2, a1, 2
601; RV64ZVE32F-NEXT:    bnez a2, .LBB11_10
602; RV64ZVE32F-NEXT:  .LBB11_2: # %else2
603; RV64ZVE32F-NEXT:    andi a2, a1, 4
604; RV64ZVE32F-NEXT:    bnez a2, .LBB11_11
605; RV64ZVE32F-NEXT:  .LBB11_3: # %else5
606; RV64ZVE32F-NEXT:    andi a2, a1, 8
607; RV64ZVE32F-NEXT:    bnez a2, .LBB11_12
608; RV64ZVE32F-NEXT:  .LBB11_4: # %else8
609; RV64ZVE32F-NEXT:    andi a2, a1, 16
610; RV64ZVE32F-NEXT:    bnez a2, .LBB11_13
611; RV64ZVE32F-NEXT:  .LBB11_5: # %else11
612; RV64ZVE32F-NEXT:    andi a2, a1, 32
613; RV64ZVE32F-NEXT:    bnez a2, .LBB11_14
614; RV64ZVE32F-NEXT:  .LBB11_6: # %else14
615; RV64ZVE32F-NEXT:    andi a2, a1, 64
616; RV64ZVE32F-NEXT:    bnez a2, .LBB11_15
617; RV64ZVE32F-NEXT:  .LBB11_7: # %else17
618; RV64ZVE32F-NEXT:    andi a1, a1, -128
619; RV64ZVE32F-NEXT:    bnez a1, .LBB11_16
620; RV64ZVE32F-NEXT:  .LBB11_8: # %else20
621; RV64ZVE32F-NEXT:    ret
622; RV64ZVE32F-NEXT:  .LBB11_9: # %cond.load
623; RV64ZVE32F-NEXT:    ld a2, 0(a0)
624; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
625; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
626; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
627; RV64ZVE32F-NEXT:    andi a2, a1, 2
628; RV64ZVE32F-NEXT:    beqz a2, .LBB11_2
629; RV64ZVE32F-NEXT:  .LBB11_10: # %cond.load1
630; RV64ZVE32F-NEXT:    ld a2, 8(a0)
631; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
632; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
633; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
634; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf2, tu, ma
635; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
636; RV64ZVE32F-NEXT:    andi a2, a1, 4
637; RV64ZVE32F-NEXT:    beqz a2, .LBB11_3
638; RV64ZVE32F-NEXT:  .LBB11_11: # %cond.load4
639; RV64ZVE32F-NEXT:    ld a2, 16(a0)
640; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
641; RV64ZVE32F-NEXT:    vsetivli zero, 3, e8, mf2, tu, ma
642; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
643; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
644; RV64ZVE32F-NEXT:    andi a2, a1, 8
645; RV64ZVE32F-NEXT:    beqz a2, .LBB11_4
646; RV64ZVE32F-NEXT:  .LBB11_12: # %cond.load7
647; RV64ZVE32F-NEXT:    ld a2, 24(a0)
648; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
649; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
650; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
651; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 3
652; RV64ZVE32F-NEXT:    andi a2, a1, 16
653; RV64ZVE32F-NEXT:    beqz a2, .LBB11_5
654; RV64ZVE32F-NEXT:  .LBB11_13: # %cond.load10
655; RV64ZVE32F-NEXT:    ld a2, 32(a0)
656; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
657; RV64ZVE32F-NEXT:    vsetivli zero, 5, e8, mf2, tu, ma
658; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
659; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 4
660; RV64ZVE32F-NEXT:    andi a2, a1, 32
661; RV64ZVE32F-NEXT:    beqz a2, .LBB11_6
662; RV64ZVE32F-NEXT:  .LBB11_14: # %cond.load13
663; RV64ZVE32F-NEXT:    ld a2, 40(a0)
664; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
665; RV64ZVE32F-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
666; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
667; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 5
668; RV64ZVE32F-NEXT:    andi a2, a1, 64
669; RV64ZVE32F-NEXT:    beqz a2, .LBB11_7
670; RV64ZVE32F-NEXT:  .LBB11_15: # %cond.load16
671; RV64ZVE32F-NEXT:    ld a2, 48(a0)
672; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
673; RV64ZVE32F-NEXT:    vsetivli zero, 7, e8, mf2, tu, ma
674; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
675; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 6
676; RV64ZVE32F-NEXT:    andi a1, a1, -128
677; RV64ZVE32F-NEXT:    beqz a1, .LBB11_8
678; RV64ZVE32F-NEXT:  .LBB11_16: # %cond.load19
679; RV64ZVE32F-NEXT:    ld a0, 56(a0)
680; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
681; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
682; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
683; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 7
684; RV64ZVE32F-NEXT:    ret
685  %v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
686  ret <8 x i8> %v
687}
688
689define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i8> %passthru) {
690; RV32-LABEL: mgather_baseidx_v8i8:
691; RV32:       # %bb.0:
692; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
693; RV32-NEXT:    vsext.vf4 v10, v8
694; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
695; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
696; RV32-NEXT:    vmv1r.v v8, v9
697; RV32-NEXT:    ret
698;
699; RV64V-LABEL: mgather_baseidx_v8i8:
700; RV64V:       # %bb.0:
701; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
702; RV64V-NEXT:    vsext.vf8 v12, v8
703; RV64V-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
704; RV64V-NEXT:    vluxei64.v v9, (a0), v12, v0.t
705; RV64V-NEXT:    vmv1r.v v8, v9
706; RV64V-NEXT:    ret
707;
708; RV64ZVE32F-LABEL: mgather_baseidx_v8i8:
709; RV64ZVE32F:       # %bb.0:
710; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
711; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
712; RV64ZVE32F-NEXT:    andi a2, a1, 1
713; RV64ZVE32F-NEXT:    beqz a2, .LBB12_2
714; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
715; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
716; RV64ZVE32F-NEXT:    add a2, a0, a2
717; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
718; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
719; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
720; RV64ZVE32F-NEXT:  .LBB12_2: # %else
721; RV64ZVE32F-NEXT:    andi a2, a1, 2
722; RV64ZVE32F-NEXT:    beqz a2, .LBB12_4
723; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
724; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
725; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
726; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
727; RV64ZVE32F-NEXT:    add a2, a0, a2
728; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
729; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
730; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf2, tu, ma
731; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
732; RV64ZVE32F-NEXT:  .LBB12_4: # %else2
733; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
734; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
735; RV64ZVE32F-NEXT:    andi a2, a1, 4
736; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
737; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
738; RV64ZVE32F-NEXT:    bnez a2, .LBB12_14
739; RV64ZVE32F-NEXT:  # %bb.5: # %else5
740; RV64ZVE32F-NEXT:    andi a2, a1, 8
741; RV64ZVE32F-NEXT:    bnez a2, .LBB12_15
742; RV64ZVE32F-NEXT:  .LBB12_6: # %else8
743; RV64ZVE32F-NEXT:    andi a2, a1, 16
744; RV64ZVE32F-NEXT:    bnez a2, .LBB12_16
745; RV64ZVE32F-NEXT:  .LBB12_7: # %else11
746; RV64ZVE32F-NEXT:    andi a2, a1, 32
747; RV64ZVE32F-NEXT:    beqz a2, .LBB12_9
748; RV64ZVE32F-NEXT:  .LBB12_8: # %cond.load13
749; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
750; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
751; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
752; RV64ZVE32F-NEXT:    add a2, a0, a2
753; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
754; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
755; RV64ZVE32F-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
756; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 5
757; RV64ZVE32F-NEXT:  .LBB12_9: # %else14
758; RV64ZVE32F-NEXT:    andi a2, a1, 64
759; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
760; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
761; RV64ZVE32F-NEXT:    beqz a2, .LBB12_11
762; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
763; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
764; RV64ZVE32F-NEXT:    add a2, a0, a2
765; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
766; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
767; RV64ZVE32F-NEXT:    vsetivli zero, 7, e8, mf2, tu, ma
768; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
769; RV64ZVE32F-NEXT:  .LBB12_11: # %else17
770; RV64ZVE32F-NEXT:    andi a1, a1, -128
771; RV64ZVE32F-NEXT:    beqz a1, .LBB12_13
772; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
773; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
774; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
775; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
776; RV64ZVE32F-NEXT:    add a0, a0, a1
777; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
778; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
779; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
780; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
781; RV64ZVE32F-NEXT:  .LBB12_13: # %else20
782; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
783; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
784; RV64ZVE32F-NEXT:    ret
785; RV64ZVE32F-NEXT:  .LBB12_14: # %cond.load4
786; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
787; RV64ZVE32F-NEXT:    add a2, a0, a2
788; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
789; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
790; RV64ZVE32F-NEXT:    vsetivli zero, 3, e8, mf2, tu, ma
791; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
792; RV64ZVE32F-NEXT:    andi a2, a1, 8
793; RV64ZVE32F-NEXT:    beqz a2, .LBB12_6
794; RV64ZVE32F-NEXT:  .LBB12_15: # %cond.load7
795; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
796; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
797; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
798; RV64ZVE32F-NEXT:    add a2, a0, a2
799; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
800; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
801; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
802; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 3
803; RV64ZVE32F-NEXT:    andi a2, a1, 16
804; RV64ZVE32F-NEXT:    beqz a2, .LBB12_7
805; RV64ZVE32F-NEXT:  .LBB12_16: # %cond.load10
806; RV64ZVE32F-NEXT:    vsetivli zero, 5, e8, mf2, tu, ma
807; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
808; RV64ZVE32F-NEXT:    add a2, a0, a2
809; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
810; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
811; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 4
812; RV64ZVE32F-NEXT:    andi a2, a1, 32
813; RV64ZVE32F-NEXT:    bnez a2, .LBB12_8
814; RV64ZVE32F-NEXT:    j .LBB12_9
815  %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
816  %v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
817  ret <8 x i8> %v
818}
819
820declare <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i16>)
821
822define <1 x i16> @mgather_v1i16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i16> %passthru) {
823; RV32V-LABEL: mgather_v1i16:
824; RV32V:       # %bb.0:
825; RV32V-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
826; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
827; RV32V-NEXT:    vmv1r.v v8, v9
828; RV32V-NEXT:    ret
829;
830; RV64V-LABEL: mgather_v1i16:
831; RV64V:       # %bb.0:
832; RV64V-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
833; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
834; RV64V-NEXT:    vmv1r.v v8, v9
835; RV64V-NEXT:    ret
836;
837; RV32ZVE32F-LABEL: mgather_v1i16:
838; RV32ZVE32F:       # %bb.0:
839; RV32ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, mu
840; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
841; RV32ZVE32F-NEXT:    vmv1r.v v8, v9
842; RV32ZVE32F-NEXT:    ret
843;
844; RV64ZVE32F-LABEL: mgather_v1i16:
845; RV64ZVE32F:       # %bb.0:
846; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
847; RV64ZVE32F-NEXT:    vfirst.m a1, v0
848; RV64ZVE32F-NEXT:    bnez a1, .LBB13_2
849; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
850; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
851; RV64ZVE32F-NEXT:    vle16.v v8, (a0)
852; RV64ZVE32F-NEXT:  .LBB13_2: # %else
853; RV64ZVE32F-NEXT:    ret
854  %v = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x i16> %passthru)
855  ret <1 x i16> %v
856}
857
858declare <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
859
860define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
861; RV32V-LABEL: mgather_v2i16:
862; RV32V:       # %bb.0:
863; RV32V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
864; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
865; RV32V-NEXT:    vmv1r.v v8, v9
866; RV32V-NEXT:    ret
867;
868; RV64V-LABEL: mgather_v2i16:
869; RV64V:       # %bb.0:
870; RV64V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
871; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
872; RV64V-NEXT:    vmv1r.v v8, v9
873; RV64V-NEXT:    ret
874;
875; RV32ZVE32F-LABEL: mgather_v2i16:
876; RV32ZVE32F:       # %bb.0:
877; RV32ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, mu
878; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
879; RV32ZVE32F-NEXT:    vmv1r.v v8, v9
880; RV32ZVE32F-NEXT:    ret
881;
882; RV64ZVE32F-LABEL: mgather_v2i16:
883; RV64ZVE32F:       # %bb.0:
884; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
885; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
886; RV64ZVE32F-NEXT:    andi a3, a2, 1
887; RV64ZVE32F-NEXT:    bnez a3, .LBB14_3
888; RV64ZVE32F-NEXT:  # %bb.1: # %else
889; RV64ZVE32F-NEXT:    andi a2, a2, 2
890; RV64ZVE32F-NEXT:    bnez a2, .LBB14_4
891; RV64ZVE32F-NEXT:  .LBB14_2: # %else2
892; RV64ZVE32F-NEXT:    ret
893; RV64ZVE32F-NEXT:  .LBB14_3: # %cond.load
894; RV64ZVE32F-NEXT:    lh a0, 0(a0)
895; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
896; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
897; RV64ZVE32F-NEXT:    andi a2, a2, 2
898; RV64ZVE32F-NEXT:    beqz a2, .LBB14_2
899; RV64ZVE32F-NEXT:  .LBB14_4: # %cond.load1
900; RV64ZVE32F-NEXT:    lh a0, 0(a1)
901; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
902; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
903; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
904; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
905; RV64ZVE32F-NEXT:    ret
906  %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
907  ret <2 x i16> %v
908}
909
910define <2 x i32> @mgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
911; RV32V-LABEL: mgather_v2i16_sextload_v2i32:
912; RV32V:       # %bb.0:
913; RV32V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
914; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
915; RV32V-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
916; RV32V-NEXT:    vsext.vf2 v8, v9
917; RV32V-NEXT:    ret
918;
919; RV64V-LABEL: mgather_v2i16_sextload_v2i32:
920; RV64V:       # %bb.0:
921; RV64V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
922; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
923; RV64V-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
924; RV64V-NEXT:    vsext.vf2 v8, v9
925; RV64V-NEXT:    ret
926;
927; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i32:
928; RV32ZVE32F:       # %bb.0:
929; RV32ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, mu
930; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
931; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
932; RV32ZVE32F-NEXT:    vsext.vf2 v8, v9
933; RV32ZVE32F-NEXT:    ret
934;
935; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i32:
936; RV64ZVE32F:       # %bb.0:
937; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
938; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
939; RV64ZVE32F-NEXT:    andi a3, a2, 1
940; RV64ZVE32F-NEXT:    beqz a3, .LBB15_2
941; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
942; RV64ZVE32F-NEXT:    lh a0, 0(a0)
943; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
944; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
945; RV64ZVE32F-NEXT:  .LBB15_2: # %else
946; RV64ZVE32F-NEXT:    andi a2, a2, 2
947; RV64ZVE32F-NEXT:    beqz a2, .LBB15_4
948; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
949; RV64ZVE32F-NEXT:    lh a0, 0(a1)
950; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
951; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
952; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
953; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
954; RV64ZVE32F-NEXT:  .LBB15_4: # %else2
955; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
956; RV64ZVE32F-NEXT:    vsext.vf2 v9, v8
957; RV64ZVE32F-NEXT:    vmv.v.v v8, v9
958; RV64ZVE32F-NEXT:    ret
959  %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
960  %ev = sext <2 x i16> %v to <2 x i32>
961  ret <2 x i32> %ev
962}
963
964define <2 x i32> @mgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
965; RV32V-LABEL: mgather_v2i16_zextload_v2i32:
966; RV32V:       # %bb.0:
967; RV32V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
968; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
969; RV32V-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
970; RV32V-NEXT:    vzext.vf2 v8, v9
971; RV32V-NEXT:    ret
972;
973; RV64V-LABEL: mgather_v2i16_zextload_v2i32:
974; RV64V:       # %bb.0:
975; RV64V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
976; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
977; RV64V-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
978; RV64V-NEXT:    vzext.vf2 v8, v9
979; RV64V-NEXT:    ret
980;
981; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i32:
982; RV32ZVE32F:       # %bb.0:
983; RV32ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, mu
984; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
985; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
986; RV32ZVE32F-NEXT:    vzext.vf2 v8, v9
987; RV32ZVE32F-NEXT:    ret
988;
989; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i32:
990; RV64ZVE32F:       # %bb.0:
991; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
992; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
993; RV64ZVE32F-NEXT:    andi a3, a2, 1
994; RV64ZVE32F-NEXT:    beqz a3, .LBB16_2
995; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
996; RV64ZVE32F-NEXT:    lh a0, 0(a0)
997; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
998; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
999; RV64ZVE32F-NEXT:  .LBB16_2: # %else
1000; RV64ZVE32F-NEXT:    andi a2, a2, 2
1001; RV64ZVE32F-NEXT:    beqz a2, .LBB16_4
1002; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
1003; RV64ZVE32F-NEXT:    lh a0, 0(a1)
1004; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1005; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
1006; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
1007; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
1008; RV64ZVE32F-NEXT:  .LBB16_4: # %else2
1009; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
1010; RV64ZVE32F-NEXT:    vzext.vf2 v9, v8
1011; RV64ZVE32F-NEXT:    vmv.v.v v8, v9
1012; RV64ZVE32F-NEXT:    ret
1013  %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
1014  %ev = zext <2 x i16> %v to <2 x i32>
1015  ret <2 x i32> %ev
1016}
1017
1018define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
1019; RV32V-LABEL: mgather_v2i16_sextload_v2i64:
1020; RV32V:       # %bb.0:
1021; RV32V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
1022; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1023; RV32V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
1024; RV32V-NEXT:    vsext.vf4 v8, v9
1025; RV32V-NEXT:    ret
1026;
1027; RV64V-LABEL: mgather_v2i16_sextload_v2i64:
1028; RV64V:       # %bb.0:
1029; RV64V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
1030; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1031; RV64V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
1032; RV64V-NEXT:    vsext.vf4 v8, v9
1033; RV64V-NEXT:    ret
1034;
1035; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i64:
1036; RV32ZVE32F:       # %bb.0:
1037; RV32ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, mu
1038; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1039; RV32ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
1040; RV32ZVE32F-NEXT:    vmv.x.s a1, v9
1041; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
1042; RV32ZVE32F-NEXT:    srai a3, a1, 31
1043; RV32ZVE32F-NEXT:    srai a4, a2, 31
1044; RV32ZVE32F-NEXT:    sw a1, 0(a0)
1045; RV32ZVE32F-NEXT:    sw a3, 4(a0)
1046; RV32ZVE32F-NEXT:    sw a2, 8(a0)
1047; RV32ZVE32F-NEXT:    sw a4, 12(a0)
1048; RV32ZVE32F-NEXT:    ret
1049;
1050; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i64:
1051; RV64ZVE32F:       # %bb.0:
1052; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1053; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
1054; RV64ZVE32F-NEXT:    andi a3, a2, 1
1055; RV64ZVE32F-NEXT:    beqz a3, .LBB17_2
1056; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
1057; RV64ZVE32F-NEXT:    lh a0, 0(a0)
1058; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
1059; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
1060; RV64ZVE32F-NEXT:  .LBB17_2: # %else
1061; RV64ZVE32F-NEXT:    andi a2, a2, 2
1062; RV64ZVE32F-NEXT:    beqz a2, .LBB17_4
1063; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
1064; RV64ZVE32F-NEXT:    lh a0, 0(a1)
1065; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1066; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
1067; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
1068; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
1069; RV64ZVE32F-NEXT:  .LBB17_4: # %else2
1070; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
1071; RV64ZVE32F-NEXT:    vmv.x.s a0, v8
1072; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
1073; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
1074; RV64ZVE32F-NEXT:    ret
1075  %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
1076  %ev = sext <2 x i16> %v to <2 x i64>
1077  ret <2 x i64> %ev
1078}
1079
1080define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
1081; RV32V-LABEL: mgather_v2i16_zextload_v2i64:
1082; RV32V:       # %bb.0:
1083; RV32V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
1084; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1085; RV32V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
1086; RV32V-NEXT:    vzext.vf4 v8, v9
1087; RV32V-NEXT:    ret
1088;
1089; RV64V-LABEL: mgather_v2i16_zextload_v2i64:
1090; RV64V:       # %bb.0:
1091; RV64V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
1092; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1093; RV64V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
1094; RV64V-NEXT:    vzext.vf4 v8, v9
1095; RV64V-NEXT:    ret
1096;
1097; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
1098; RV32ZVE32F:       # %bb.0:
1099; RV32ZVE32F-NEXT:    lui a1, 16
1100; RV32ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, mu
1101; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1102; RV32ZVE32F-NEXT:    addi a1, a1, -1
1103; RV32ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
1104; RV32ZVE32F-NEXT:    vmv.x.s a2, v9
1105; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
1106; RV32ZVE32F-NEXT:    and a2, a2, a1
1107; RV32ZVE32F-NEXT:    and a1, a3, a1
1108; RV32ZVE32F-NEXT:    sw a2, 0(a0)
1109; RV32ZVE32F-NEXT:    sw zero, 4(a0)
1110; RV32ZVE32F-NEXT:    sw a1, 8(a0)
1111; RV32ZVE32F-NEXT:    sw zero, 12(a0)
1112; RV32ZVE32F-NEXT:    ret
1113;
1114; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
1115; RV64ZVE32F:       # %bb.0:
1116; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1117; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
1118; RV64ZVE32F-NEXT:    andi a3, a2, 1
1119; RV64ZVE32F-NEXT:    beqz a3, .LBB18_2
1120; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
1121; RV64ZVE32F-NEXT:    lh a0, 0(a0)
1122; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
1123; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
1124; RV64ZVE32F-NEXT:  .LBB18_2: # %else
1125; RV64ZVE32F-NEXT:    andi a2, a2, 2
1126; RV64ZVE32F-NEXT:    beqz a2, .LBB18_4
1127; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
1128; RV64ZVE32F-NEXT:    lh a0, 0(a1)
1129; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1130; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
1131; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
1132; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
1133; RV64ZVE32F-NEXT:  .LBB18_4: # %else2
1134; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
1135; RV64ZVE32F-NEXT:    vmv.x.s a0, v8
1136; RV64ZVE32F-NEXT:    lui a1, 16
1137; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
1138; RV64ZVE32F-NEXT:    addiw a1, a1, -1
1139; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1140; RV64ZVE32F-NEXT:    and a0, a0, a1
1141; RV64ZVE32F-NEXT:    and a1, a2, a1
1142; RV64ZVE32F-NEXT:    ret
1143  %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
1144  %ev = zext <2 x i16> %v to <2 x i64>
1145  ret <2 x i64> %ev
1146}
1147
1148declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
1149
1150define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthru) {
1151; RV32-LABEL: mgather_v4i16:
1152; RV32:       # %bb.0:
1153; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
1154; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1155; RV32-NEXT:    vmv1r.v v8, v9
1156; RV32-NEXT:    ret
1157;
1158; RV64V-LABEL: mgather_v4i16:
1159; RV64V:       # %bb.0:
1160; RV64V-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
1161; RV64V-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1162; RV64V-NEXT:    vmv1r.v v8, v10
1163; RV64V-NEXT:    ret
1164;
1165; RV64ZVE32F-LABEL: mgather_v4i16:
1166; RV64ZVE32F:       # %bb.0:
1167; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1168; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
1169; RV64ZVE32F-NEXT:    andi a2, a1, 1
1170; RV64ZVE32F-NEXT:    bnez a2, .LBB19_5
1171; RV64ZVE32F-NEXT:  # %bb.1: # %else
1172; RV64ZVE32F-NEXT:    andi a2, a1, 2
1173; RV64ZVE32F-NEXT:    bnez a2, .LBB19_6
1174; RV64ZVE32F-NEXT:  .LBB19_2: # %else2
1175; RV64ZVE32F-NEXT:    andi a2, a1, 4
1176; RV64ZVE32F-NEXT:    bnez a2, .LBB19_7
1177; RV64ZVE32F-NEXT:  .LBB19_3: # %else5
1178; RV64ZVE32F-NEXT:    andi a1, a1, 8
1179; RV64ZVE32F-NEXT:    bnez a1, .LBB19_8
1180; RV64ZVE32F-NEXT:  .LBB19_4: # %else8
1181; RV64ZVE32F-NEXT:    ret
1182; RV64ZVE32F-NEXT:  .LBB19_5: # %cond.load
1183; RV64ZVE32F-NEXT:    ld a2, 0(a0)
1184; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1185; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
1186; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1187; RV64ZVE32F-NEXT:    andi a2, a1, 2
1188; RV64ZVE32F-NEXT:    beqz a2, .LBB19_2
1189; RV64ZVE32F-NEXT:  .LBB19_6: # %cond.load1
1190; RV64ZVE32F-NEXT:    ld a2, 8(a0)
1191; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1192; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1193; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1194; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
1195; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
1196; RV64ZVE32F-NEXT:    andi a2, a1, 4
1197; RV64ZVE32F-NEXT:    beqz a2, .LBB19_3
1198; RV64ZVE32F-NEXT:  .LBB19_7: # %cond.load4
1199; RV64ZVE32F-NEXT:    ld a2, 16(a0)
1200; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1201; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
1202; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1203; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
1204; RV64ZVE32F-NEXT:    andi a1, a1, 8
1205; RV64ZVE32F-NEXT:    beqz a1, .LBB19_4
1206; RV64ZVE32F-NEXT:  .LBB19_8: # %cond.load7
1207; RV64ZVE32F-NEXT:    ld a0, 24(a0)
1208; RV64ZVE32F-NEXT:    lh a0, 0(a0)
1209; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1210; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
1211; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 3
1212; RV64ZVE32F-NEXT:    ret
1213  %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x i16> %passthru)
1214  ret <4 x i16> %v
1215}
1216
1217define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
1218; RV32-LABEL: mgather_truemask_v4i16:
1219; RV32:       # %bb.0:
1220; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1221; RV32-NEXT:    vluxei32.v v9, (zero), v8
1222; RV32-NEXT:    vmv1r.v v8, v9
1223; RV32-NEXT:    ret
1224;
1225; RV64V-LABEL: mgather_truemask_v4i16:
1226; RV64V:       # %bb.0:
1227; RV64V-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1228; RV64V-NEXT:    vluxei64.v v10, (zero), v8
1229; RV64V-NEXT:    vmv1r.v v8, v10
1230; RV64V-NEXT:    ret
1231;
1232; RV64ZVE32F-LABEL: mgather_truemask_v4i16:
1233; RV64ZVE32F:       # %bb.0:
1234; RV64ZVE32F-NEXT:    ld a1, 0(a0)
1235; RV64ZVE32F-NEXT:    ld a2, 8(a0)
1236; RV64ZVE32F-NEXT:    ld a3, 16(a0)
1237; RV64ZVE32F-NEXT:    ld a0, 24(a0)
1238; RV64ZVE32F-NEXT:    lh a1, 0(a1)
1239; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1240; RV64ZVE32F-NEXT:    lh a3, 0(a3)
1241; RV64ZVE32F-NEXT:    lh a0, 0(a0)
1242; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1243; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
1244; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
1245; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
1246; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
1247; RV64ZVE32F-NEXT:    ret
1248  %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x i16> %passthru)
1249  ret <4 x i16> %v
1250}
1251
1252define <4 x i16> @mgather_falsemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
1253; RV32-LABEL: mgather_falsemask_v4i16:
1254; RV32:       # %bb.0:
1255; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1256; RV32-NEXT:    vmv1r.v v8, v9
1257; RV32-NEXT:    ret
1258;
1259; RV64V-LABEL: mgather_falsemask_v4i16:
1260; RV64V:       # %bb.0:
1261; RV64V-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1262; RV64V-NEXT:    vmv1r.v v8, v10
1263; RV64V-NEXT:    ret
1264;
1265; RV64ZVE32F-LABEL: mgather_falsemask_v4i16:
1266; RV64ZVE32F:       # %bb.0:
1267; RV64ZVE32F-NEXT:    ret
1268  %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x i16> %passthru)
1269  ret <4 x i16> %v
1270}
1271
1272declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
1273
1274define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthru) {
1275; RV32-LABEL: mgather_v8i16:
1276; RV32:       # %bb.0:
1277; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
1278; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1279; RV32-NEXT:    vmv.v.v v8, v10
1280; RV32-NEXT:    ret
1281;
1282; RV64V-LABEL: mgather_v8i16:
1283; RV64V:       # %bb.0:
1284; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
1285; RV64V-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1286; RV64V-NEXT:    vmv.v.v v8, v12
1287; RV64V-NEXT:    ret
1288;
1289; RV64ZVE32F-LABEL: mgather_v8i16:
1290; RV64ZVE32F:       # %bb.0:
1291; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1292; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
1293; RV64ZVE32F-NEXT:    andi a2, a1, 1
1294; RV64ZVE32F-NEXT:    bnez a2, .LBB22_9
1295; RV64ZVE32F-NEXT:  # %bb.1: # %else
1296; RV64ZVE32F-NEXT:    andi a2, a1, 2
1297; RV64ZVE32F-NEXT:    bnez a2, .LBB22_10
1298; RV64ZVE32F-NEXT:  .LBB22_2: # %else2
1299; RV64ZVE32F-NEXT:    andi a2, a1, 4
1300; RV64ZVE32F-NEXT:    bnez a2, .LBB22_11
1301; RV64ZVE32F-NEXT:  .LBB22_3: # %else5
1302; RV64ZVE32F-NEXT:    andi a2, a1, 8
1303; RV64ZVE32F-NEXT:    bnez a2, .LBB22_12
1304; RV64ZVE32F-NEXT:  .LBB22_4: # %else8
1305; RV64ZVE32F-NEXT:    andi a2, a1, 16
1306; RV64ZVE32F-NEXT:    bnez a2, .LBB22_13
1307; RV64ZVE32F-NEXT:  .LBB22_5: # %else11
1308; RV64ZVE32F-NEXT:    andi a2, a1, 32
1309; RV64ZVE32F-NEXT:    bnez a2, .LBB22_14
1310; RV64ZVE32F-NEXT:  .LBB22_6: # %else14
1311; RV64ZVE32F-NEXT:    andi a2, a1, 64
1312; RV64ZVE32F-NEXT:    bnez a2, .LBB22_15
1313; RV64ZVE32F-NEXT:  .LBB22_7: # %else17
1314; RV64ZVE32F-NEXT:    andi a1, a1, -128
1315; RV64ZVE32F-NEXT:    bnez a1, .LBB22_16
1316; RV64ZVE32F-NEXT:  .LBB22_8: # %else20
1317; RV64ZVE32F-NEXT:    ret
1318; RV64ZVE32F-NEXT:  .LBB22_9: # %cond.load
1319; RV64ZVE32F-NEXT:    ld a2, 0(a0)
1320; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1321; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
1322; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1323; RV64ZVE32F-NEXT:    andi a2, a1, 2
1324; RV64ZVE32F-NEXT:    beqz a2, .LBB22_2
1325; RV64ZVE32F-NEXT:  .LBB22_10: # %cond.load1
1326; RV64ZVE32F-NEXT:    ld a2, 8(a0)
1327; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1328; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1329; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1330; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
1331; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
1332; RV64ZVE32F-NEXT:    andi a2, a1, 4
1333; RV64ZVE32F-NEXT:    beqz a2, .LBB22_3
1334; RV64ZVE32F-NEXT:  .LBB22_11: # %cond.load4
1335; RV64ZVE32F-NEXT:    ld a2, 16(a0)
1336; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1337; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
1338; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1339; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
1340; RV64ZVE32F-NEXT:    andi a2, a1, 8
1341; RV64ZVE32F-NEXT:    beqz a2, .LBB22_4
1342; RV64ZVE32F-NEXT:  .LBB22_12: # %cond.load7
1343; RV64ZVE32F-NEXT:    ld a2, 24(a0)
1344; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1345; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
1346; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1347; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 3
1348; RV64ZVE32F-NEXT:    andi a2, a1, 16
1349; RV64ZVE32F-NEXT:    beqz a2, .LBB22_5
1350; RV64ZVE32F-NEXT:  .LBB22_13: # %cond.load10
1351; RV64ZVE32F-NEXT:    ld a2, 32(a0)
1352; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1353; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
1354; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1355; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 4
1356; RV64ZVE32F-NEXT:    andi a2, a1, 32
1357; RV64ZVE32F-NEXT:    beqz a2, .LBB22_6
1358; RV64ZVE32F-NEXT:  .LBB22_14: # %cond.load13
1359; RV64ZVE32F-NEXT:    ld a2, 40(a0)
1360; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1361; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
1362; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1363; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 5
1364; RV64ZVE32F-NEXT:    andi a2, a1, 64
1365; RV64ZVE32F-NEXT:    beqz a2, .LBB22_7
1366; RV64ZVE32F-NEXT:  .LBB22_15: # %cond.load16
1367; RV64ZVE32F-NEXT:    ld a2, 48(a0)
1368; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1369; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
1370; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1371; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 6
1372; RV64ZVE32F-NEXT:    andi a1, a1, -128
1373; RV64ZVE32F-NEXT:    beqz a1, .LBB22_8
1374; RV64ZVE32F-NEXT:  .LBB22_16: # %cond.load19
1375; RV64ZVE32F-NEXT:    ld a0, 56(a0)
1376; RV64ZVE32F-NEXT:    lh a0, 0(a0)
1377; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1378; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
1379; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 7
1380; RV64ZVE32F-NEXT:    ret
1381  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1382  ret <8 x i16> %v
1383}
1384
1385define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1386; RV32-LABEL: mgather_baseidx_v8i8_v8i16:
1387; RV32:       # %bb.0:
1388; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1389; RV32-NEXT:    vsext.vf4 v10, v8
1390; RV32-NEXT:    vadd.vv v10, v10, v10
1391; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
1392; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
1393; RV32-NEXT:    vmv.v.v v8, v9
1394; RV32-NEXT:    ret
1395;
1396; RV64V-LABEL: mgather_baseidx_v8i8_v8i16:
1397; RV64V:       # %bb.0:
1398; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1399; RV64V-NEXT:    vsext.vf8 v12, v8
1400; RV64V-NEXT:    vadd.vv v12, v12, v12
1401; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
1402; RV64V-NEXT:    vluxei64.v v9, (a0), v12, v0.t
1403; RV64V-NEXT:    vmv.v.v v8, v9
1404; RV64V-NEXT:    ret
1405;
1406; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i16:
1407; RV64ZVE32F:       # %bb.0:
1408; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1409; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
1410; RV64ZVE32F-NEXT:    andi a2, a1, 1
1411; RV64ZVE32F-NEXT:    beqz a2, .LBB23_2
1412; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
1413; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1414; RV64ZVE32F-NEXT:    slli a2, a2, 1
1415; RV64ZVE32F-NEXT:    add a2, a0, a2
1416; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1417; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
1418; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1419; RV64ZVE32F-NEXT:  .LBB23_2: # %else
1420; RV64ZVE32F-NEXT:    andi a2, a1, 2
1421; RV64ZVE32F-NEXT:    beqz a2, .LBB23_4
1422; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
1423; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1424; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
1425; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
1426; RV64ZVE32F-NEXT:    slli a2, a2, 1
1427; RV64ZVE32F-NEXT:    add a2, a0, a2
1428; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1429; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1430; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
1431; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
1432; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
1433; RV64ZVE32F-NEXT:  .LBB23_4: # %else2
1434; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
1435; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
1436; RV64ZVE32F-NEXT:    andi a2, a1, 4
1437; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1438; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
1439; RV64ZVE32F-NEXT:    bnez a2, .LBB23_14
1440; RV64ZVE32F-NEXT:  # %bb.5: # %else5
1441; RV64ZVE32F-NEXT:    andi a2, a1, 8
1442; RV64ZVE32F-NEXT:    bnez a2, .LBB23_15
1443; RV64ZVE32F-NEXT:  .LBB23_6: # %else8
1444; RV64ZVE32F-NEXT:    andi a2, a1, 16
1445; RV64ZVE32F-NEXT:    bnez a2, .LBB23_16
1446; RV64ZVE32F-NEXT:  .LBB23_7: # %else11
1447; RV64ZVE32F-NEXT:    andi a2, a1, 32
1448; RV64ZVE32F-NEXT:    beqz a2, .LBB23_9
1449; RV64ZVE32F-NEXT:  .LBB23_8: # %cond.load13
1450; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1451; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
1452; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1453; RV64ZVE32F-NEXT:    slli a2, a2, 1
1454; RV64ZVE32F-NEXT:    add a2, a0, a2
1455; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1456; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1457; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1458; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
1459; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 5
1460; RV64ZVE32F-NEXT:  .LBB23_9: # %else14
1461; RV64ZVE32F-NEXT:    andi a2, a1, 64
1462; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1463; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
1464; RV64ZVE32F-NEXT:    beqz a2, .LBB23_11
1465; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
1466; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1467; RV64ZVE32F-NEXT:    slli a2, a2, 1
1468; RV64ZVE32F-NEXT:    add a2, a0, a2
1469; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1470; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1471; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
1472; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
1473; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
1474; RV64ZVE32F-NEXT:  .LBB23_11: # %else17
1475; RV64ZVE32F-NEXT:    andi a1, a1, -128
1476; RV64ZVE32F-NEXT:    beqz a1, .LBB23_13
1477; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
1478; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1479; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
1480; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
1481; RV64ZVE32F-NEXT:    slli a1, a1, 1
1482; RV64ZVE32F-NEXT:    add a0, a0, a1
1483; RV64ZVE32F-NEXT:    lh a0, 0(a0)
1484; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1485; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
1486; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1487; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
1488; RV64ZVE32F-NEXT:  .LBB23_13: # %else20
1489; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1490; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
1491; RV64ZVE32F-NEXT:    ret
1492; RV64ZVE32F-NEXT:  .LBB23_14: # %cond.load4
1493; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1494; RV64ZVE32F-NEXT:    slli a2, a2, 1
1495; RV64ZVE32F-NEXT:    add a2, a0, a2
1496; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1497; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1498; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
1499; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
1500; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
1501; RV64ZVE32F-NEXT:    andi a2, a1, 8
1502; RV64ZVE32F-NEXT:    beqz a2, .LBB23_6
1503; RV64ZVE32F-NEXT:  .LBB23_15: # %cond.load7
1504; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1505; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
1506; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1507; RV64ZVE32F-NEXT:    slli a2, a2, 1
1508; RV64ZVE32F-NEXT:    add a2, a0, a2
1509; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1510; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1511; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1512; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
1513; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 3
1514; RV64ZVE32F-NEXT:    andi a2, a1, 16
1515; RV64ZVE32F-NEXT:    beqz a2, .LBB23_7
1516; RV64ZVE32F-NEXT:  .LBB23_16: # %cond.load10
1517; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1518; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
1519; RV64ZVE32F-NEXT:    slli a2, a2, 1
1520; RV64ZVE32F-NEXT:    add a2, a0, a2
1521; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1522; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1523; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1524; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
1525; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 4
1526; RV64ZVE32F-NEXT:    andi a2, a1, 32
1527; RV64ZVE32F-NEXT:    bnez a2, .LBB23_8
1528; RV64ZVE32F-NEXT:    j .LBB23_9
1529  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
1530  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1531  ret <8 x i16> %v
1532}
1533
1534define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1535; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i16:
1536; RV32:       # %bb.0:
1537; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1538; RV32-NEXT:    vsext.vf4 v10, v8
1539; RV32-NEXT:    vadd.vv v10, v10, v10
1540; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
1541; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
1542; RV32-NEXT:    vmv.v.v v8, v9
1543; RV32-NEXT:    ret
1544;
1545; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i16:
1546; RV64V:       # %bb.0:
1547; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1548; RV64V-NEXT:    vsext.vf8 v12, v8
1549; RV64V-NEXT:    vadd.vv v12, v12, v12
1550; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
1551; RV64V-NEXT:    vluxei64.v v9, (a0), v12, v0.t
1552; RV64V-NEXT:    vmv.v.v v8, v9
1553; RV64V-NEXT:    ret
1554;
1555; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i16:
1556; RV64ZVE32F:       # %bb.0:
1557; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1558; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
1559; RV64ZVE32F-NEXT:    andi a2, a1, 1
1560; RV64ZVE32F-NEXT:    beqz a2, .LBB24_2
1561; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
1562; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1563; RV64ZVE32F-NEXT:    slli a2, a2, 1
1564; RV64ZVE32F-NEXT:    add a2, a0, a2
1565; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1566; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
1567; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1568; RV64ZVE32F-NEXT:  .LBB24_2: # %else
1569; RV64ZVE32F-NEXT:    andi a2, a1, 2
1570; RV64ZVE32F-NEXT:    beqz a2, .LBB24_4
1571; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
1572; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1573; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
1574; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
1575; RV64ZVE32F-NEXT:    slli a2, a2, 1
1576; RV64ZVE32F-NEXT:    add a2, a0, a2
1577; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1578; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1579; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
1580; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
1581; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
1582; RV64ZVE32F-NEXT:  .LBB24_4: # %else2
1583; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
1584; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
1585; RV64ZVE32F-NEXT:    andi a2, a1, 4
1586; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1587; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
1588; RV64ZVE32F-NEXT:    bnez a2, .LBB24_14
1589; RV64ZVE32F-NEXT:  # %bb.5: # %else5
1590; RV64ZVE32F-NEXT:    andi a2, a1, 8
1591; RV64ZVE32F-NEXT:    bnez a2, .LBB24_15
1592; RV64ZVE32F-NEXT:  .LBB24_6: # %else8
1593; RV64ZVE32F-NEXT:    andi a2, a1, 16
1594; RV64ZVE32F-NEXT:    bnez a2, .LBB24_16
1595; RV64ZVE32F-NEXT:  .LBB24_7: # %else11
1596; RV64ZVE32F-NEXT:    andi a2, a1, 32
1597; RV64ZVE32F-NEXT:    beqz a2, .LBB24_9
1598; RV64ZVE32F-NEXT:  .LBB24_8: # %cond.load13
1599; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1600; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
1601; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1602; RV64ZVE32F-NEXT:    slli a2, a2, 1
1603; RV64ZVE32F-NEXT:    add a2, a0, a2
1604; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1605; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1606; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1607; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
1608; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 5
1609; RV64ZVE32F-NEXT:  .LBB24_9: # %else14
1610; RV64ZVE32F-NEXT:    andi a2, a1, 64
1611; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1612; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
1613; RV64ZVE32F-NEXT:    beqz a2, .LBB24_11
1614; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
1615; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1616; RV64ZVE32F-NEXT:    slli a2, a2, 1
1617; RV64ZVE32F-NEXT:    add a2, a0, a2
1618; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1619; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1620; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
1621; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
1622; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
1623; RV64ZVE32F-NEXT:  .LBB24_11: # %else17
1624; RV64ZVE32F-NEXT:    andi a1, a1, -128
1625; RV64ZVE32F-NEXT:    beqz a1, .LBB24_13
1626; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
1627; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1628; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
1629; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
1630; RV64ZVE32F-NEXT:    slli a1, a1, 1
1631; RV64ZVE32F-NEXT:    add a0, a0, a1
1632; RV64ZVE32F-NEXT:    lh a0, 0(a0)
1633; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1634; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
1635; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1636; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
1637; RV64ZVE32F-NEXT:  .LBB24_13: # %else20
1638; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1639; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
1640; RV64ZVE32F-NEXT:    ret
1641; RV64ZVE32F-NEXT:  .LBB24_14: # %cond.load4
1642; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1643; RV64ZVE32F-NEXT:    slli a2, a2, 1
1644; RV64ZVE32F-NEXT:    add a2, a0, a2
1645; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1646; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1647; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
1648; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
1649; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
1650; RV64ZVE32F-NEXT:    andi a2, a1, 8
1651; RV64ZVE32F-NEXT:    beqz a2, .LBB24_6
1652; RV64ZVE32F-NEXT:  .LBB24_15: # %cond.load7
1653; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1654; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
1655; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1656; RV64ZVE32F-NEXT:    slli a2, a2, 1
1657; RV64ZVE32F-NEXT:    add a2, a0, a2
1658; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1659; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1660; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1661; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
1662; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 3
1663; RV64ZVE32F-NEXT:    andi a2, a1, 16
1664; RV64ZVE32F-NEXT:    beqz a2, .LBB24_7
1665; RV64ZVE32F-NEXT:  .LBB24_16: # %cond.load10
1666; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1667; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
1668; RV64ZVE32F-NEXT:    slli a2, a2, 1
1669; RV64ZVE32F-NEXT:    add a2, a0, a2
1670; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1671; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1672; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1673; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
1674; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 4
1675; RV64ZVE32F-NEXT:    andi a2, a1, 32
1676; RV64ZVE32F-NEXT:    bnez a2, .LBB24_8
1677; RV64ZVE32F-NEXT:    j .LBB24_9
1678  %eidxs = sext <8 x i8> %idxs to <8 x i16>
1679  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1680  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1681  ret <8 x i16> %v
1682}
1683
1684define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1685; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i16:
1686; RV32:       # %bb.0:
1687; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
1688; RV32-NEXT:    vwaddu.vv v10, v8, v8
1689; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
1690; RV32-NEXT:    vluxei16.v v9, (a0), v10, v0.t
1691; RV32-NEXT:    vmv.v.v v8, v9
1692; RV32-NEXT:    ret
1693;
1694; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i16:
1695; RV64V:       # %bb.0:
1696; RV64V-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
1697; RV64V-NEXT:    vwaddu.vv v10, v8, v8
1698; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
1699; RV64V-NEXT:    vluxei16.v v9, (a0), v10, v0.t
1700; RV64V-NEXT:    vmv.v.v v8, v9
1701; RV64V-NEXT:    ret
1702;
1703; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i16:
1704; RV64ZVE32F:       # %bb.0:
1705; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1706; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
1707; RV64ZVE32F-NEXT:    andi a2, a1, 1
1708; RV64ZVE32F-NEXT:    beqz a2, .LBB25_2
1709; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
1710; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1711; RV64ZVE32F-NEXT:    andi a2, a2, 255
1712; RV64ZVE32F-NEXT:    slli a2, a2, 1
1713; RV64ZVE32F-NEXT:    add a2, a0, a2
1714; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1715; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
1716; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1717; RV64ZVE32F-NEXT:  .LBB25_2: # %else
1718; RV64ZVE32F-NEXT:    andi a2, a1, 2
1719; RV64ZVE32F-NEXT:    beqz a2, .LBB25_4
1720; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
1721; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1722; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
1723; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
1724; RV64ZVE32F-NEXT:    andi a2, a2, 255
1725; RV64ZVE32F-NEXT:    slli a2, a2, 1
1726; RV64ZVE32F-NEXT:    add a2, a0, a2
1727; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1728; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1729; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
1730; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
1731; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
1732; RV64ZVE32F-NEXT:  .LBB25_4: # %else2
1733; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
1734; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
1735; RV64ZVE32F-NEXT:    andi a2, a1, 4
1736; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1737; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
1738; RV64ZVE32F-NEXT:    bnez a2, .LBB25_14
1739; RV64ZVE32F-NEXT:  # %bb.5: # %else5
1740; RV64ZVE32F-NEXT:    andi a2, a1, 8
1741; RV64ZVE32F-NEXT:    bnez a2, .LBB25_15
1742; RV64ZVE32F-NEXT:  .LBB25_6: # %else8
1743; RV64ZVE32F-NEXT:    andi a2, a1, 16
1744; RV64ZVE32F-NEXT:    bnez a2, .LBB25_16
1745; RV64ZVE32F-NEXT:  .LBB25_7: # %else11
1746; RV64ZVE32F-NEXT:    andi a2, a1, 32
1747; RV64ZVE32F-NEXT:    beqz a2, .LBB25_9
1748; RV64ZVE32F-NEXT:  .LBB25_8: # %cond.load13
1749; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1750; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
1751; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1752; RV64ZVE32F-NEXT:    andi a2, a2, 255
1753; RV64ZVE32F-NEXT:    slli a2, a2, 1
1754; RV64ZVE32F-NEXT:    add a2, a0, a2
1755; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1756; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1757; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1758; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
1759; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 5
1760; RV64ZVE32F-NEXT:  .LBB25_9: # %else14
1761; RV64ZVE32F-NEXT:    andi a2, a1, 64
1762; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1763; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
1764; RV64ZVE32F-NEXT:    beqz a2, .LBB25_11
1765; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
1766; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1767; RV64ZVE32F-NEXT:    andi a2, a2, 255
1768; RV64ZVE32F-NEXT:    slli a2, a2, 1
1769; RV64ZVE32F-NEXT:    add a2, a0, a2
1770; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1771; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1772; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
1773; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
1774; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
1775; RV64ZVE32F-NEXT:  .LBB25_11: # %else17
1776; RV64ZVE32F-NEXT:    andi a1, a1, -128
1777; RV64ZVE32F-NEXT:    beqz a1, .LBB25_13
1778; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
1779; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1780; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
1781; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
1782; RV64ZVE32F-NEXT:    andi a1, a1, 255
1783; RV64ZVE32F-NEXT:    slli a1, a1, 1
1784; RV64ZVE32F-NEXT:    add a0, a0, a1
1785; RV64ZVE32F-NEXT:    lh a0, 0(a0)
1786; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1787; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
1788; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1789; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
1790; RV64ZVE32F-NEXT:  .LBB25_13: # %else20
1791; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1792; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
1793; RV64ZVE32F-NEXT:    ret
1794; RV64ZVE32F-NEXT:  .LBB25_14: # %cond.load4
1795; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1796; RV64ZVE32F-NEXT:    andi a2, a2, 255
1797; RV64ZVE32F-NEXT:    slli a2, a2, 1
1798; RV64ZVE32F-NEXT:    add a2, a0, a2
1799; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1800; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1801; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
1802; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
1803; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
1804; RV64ZVE32F-NEXT:    andi a2, a1, 8
1805; RV64ZVE32F-NEXT:    beqz a2, .LBB25_6
1806; RV64ZVE32F-NEXT:  .LBB25_15: # %cond.load7
1807; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
1808; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
1809; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1810; RV64ZVE32F-NEXT:    andi a2, a2, 255
1811; RV64ZVE32F-NEXT:    slli a2, a2, 1
1812; RV64ZVE32F-NEXT:    add a2, a0, a2
1813; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1814; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
1815; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1816; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
1817; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 3
1818; RV64ZVE32F-NEXT:    andi a2, a1, 16
1819; RV64ZVE32F-NEXT:    beqz a2, .LBB25_7
1820; RV64ZVE32F-NEXT:  .LBB25_16: # %cond.load10
1821; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1822; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
1823; RV64ZVE32F-NEXT:    andi a2, a2, 255
1824; RV64ZVE32F-NEXT:    slli a2, a2, 1
1825; RV64ZVE32F-NEXT:    add a2, a0, a2
1826; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1827; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1828; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1829; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
1830; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 4
1831; RV64ZVE32F-NEXT:    andi a2, a1, 32
1832; RV64ZVE32F-NEXT:    bnez a2, .LBB25_8
1833; RV64ZVE32F-NEXT:    j .LBB25_9
1834  %eidxs = zext <8 x i8> %idxs to <8 x i16>
1835  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1836  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1837  ret <8 x i16> %v
1838}
1839
1840define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1841; RV32-LABEL: mgather_baseidx_v8i16:
1842; RV32:       # %bb.0:
1843; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
1844; RV32-NEXT:    vwadd.vv v10, v8, v8
1845; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
1846; RV32-NEXT:    vmv.v.v v8, v9
1847; RV32-NEXT:    ret
1848;
1849; RV64V-LABEL: mgather_baseidx_v8i16:
1850; RV64V:       # %bb.0:
1851; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1852; RV64V-NEXT:    vsext.vf4 v12, v8
1853; RV64V-NEXT:    vadd.vv v12, v12, v12
1854; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
1855; RV64V-NEXT:    vluxei64.v v9, (a0), v12, v0.t
1856; RV64V-NEXT:    vmv.v.v v8, v9
1857; RV64V-NEXT:    ret
1858;
1859; RV64ZVE32F-LABEL: mgather_baseidx_v8i16:
1860; RV64ZVE32F:       # %bb.0:
1861; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1862; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
1863; RV64ZVE32F-NEXT:    andi a2, a1, 1
1864; RV64ZVE32F-NEXT:    beqz a2, .LBB26_2
1865; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
1866; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
1867; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1868; RV64ZVE32F-NEXT:    slli a2, a2, 1
1869; RV64ZVE32F-NEXT:    add a2, a0, a2
1870; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1871; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
1872; RV64ZVE32F-NEXT:  .LBB26_2: # %else
1873; RV64ZVE32F-NEXT:    andi a2, a1, 2
1874; RV64ZVE32F-NEXT:    beqz a2, .LBB26_4
1875; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
1876; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
1877; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
1878; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
1879; RV64ZVE32F-NEXT:    slli a2, a2, 1
1880; RV64ZVE32F-NEXT:    add a2, a0, a2
1881; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1882; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
1883; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
1884; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
1885; RV64ZVE32F-NEXT:  .LBB26_4: # %else2
1886; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
1887; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
1888; RV64ZVE32F-NEXT:    andi a2, a1, 4
1889; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
1890; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
1891; RV64ZVE32F-NEXT:    bnez a2, .LBB26_14
1892; RV64ZVE32F-NEXT:  # %bb.5: # %else5
1893; RV64ZVE32F-NEXT:    andi a2, a1, 8
1894; RV64ZVE32F-NEXT:    bnez a2, .LBB26_15
1895; RV64ZVE32F-NEXT:  .LBB26_6: # %else8
1896; RV64ZVE32F-NEXT:    andi a2, a1, 16
1897; RV64ZVE32F-NEXT:    bnez a2, .LBB26_16
1898; RV64ZVE32F-NEXT:  .LBB26_7: # %else11
1899; RV64ZVE32F-NEXT:    andi a2, a1, 32
1900; RV64ZVE32F-NEXT:    beqz a2, .LBB26_9
1901; RV64ZVE32F-NEXT:  .LBB26_8: # %cond.load13
1902; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
1903; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
1904; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1905; RV64ZVE32F-NEXT:    slli a2, a2, 1
1906; RV64ZVE32F-NEXT:    add a2, a0, a2
1907; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1908; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1909; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
1910; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 5
1911; RV64ZVE32F-NEXT:  .LBB26_9: # %else14
1912; RV64ZVE32F-NEXT:    andi a2, a1, 64
1913; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
1914; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
1915; RV64ZVE32F-NEXT:    beqz a2, .LBB26_11
1916; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
1917; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1918; RV64ZVE32F-NEXT:    slli a2, a2, 1
1919; RV64ZVE32F-NEXT:    add a2, a0, a2
1920; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1921; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
1922; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
1923; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
1924; RV64ZVE32F-NEXT:  .LBB26_11: # %else17
1925; RV64ZVE32F-NEXT:    andi a1, a1, -128
1926; RV64ZVE32F-NEXT:    beqz a1, .LBB26_13
1927; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
1928; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
1929; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
1930; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
1931; RV64ZVE32F-NEXT:    slli a1, a1, 1
1932; RV64ZVE32F-NEXT:    add a0, a0, a1
1933; RV64ZVE32F-NEXT:    lh a0, 0(a0)
1934; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
1935; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1936; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
1937; RV64ZVE32F-NEXT:  .LBB26_13: # %else20
1938; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1939; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
1940; RV64ZVE32F-NEXT:    ret
1941; RV64ZVE32F-NEXT:  .LBB26_14: # %cond.load4
1942; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1943; RV64ZVE32F-NEXT:    slli a2, a2, 1
1944; RV64ZVE32F-NEXT:    add a2, a0, a2
1945; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1946; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
1947; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
1948; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
1949; RV64ZVE32F-NEXT:    andi a2, a1, 8
1950; RV64ZVE32F-NEXT:    beqz a2, .LBB26_6
1951; RV64ZVE32F-NEXT:  .LBB26_15: # %cond.load7
1952; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
1953; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
1954; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
1955; RV64ZVE32F-NEXT:    slli a2, a2, 1
1956; RV64ZVE32F-NEXT:    add a2, a0, a2
1957; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1958; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1959; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
1960; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 3
1961; RV64ZVE32F-NEXT:    andi a2, a1, 16
1962; RV64ZVE32F-NEXT:    beqz a2, .LBB26_7
1963; RV64ZVE32F-NEXT:  .LBB26_16: # %cond.load10
1964; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
1965; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
1966; RV64ZVE32F-NEXT:    slli a2, a2, 1
1967; RV64ZVE32F-NEXT:    add a2, a0, a2
1968; RV64ZVE32F-NEXT:    lh a2, 0(a2)
1969; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
1970; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 4
1971; RV64ZVE32F-NEXT:    andi a2, a1, 32
1972; RV64ZVE32F-NEXT:    bnez a2, .LBB26_8
1973; RV64ZVE32F-NEXT:    j .LBB26_9
1974  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
1975  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1976  ret <8 x i16> %v
1977}
1978
1979declare <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i32>)
1980
1981define <1 x i32> @mgather_v1i32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i32> %passthru) {
1982; RV32V-LABEL: mgather_v1i32:
1983; RV32V:       # %bb.0:
1984; RV32V-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
1985; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1986; RV32V-NEXT:    vmv1r.v v8, v9
1987; RV32V-NEXT:    ret
1988;
1989; RV64V-LABEL: mgather_v1i32:
1990; RV64V:       # %bb.0:
1991; RV64V-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
1992; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1993; RV64V-NEXT:    vmv1r.v v8, v9
1994; RV64V-NEXT:    ret
1995;
1996; RV32ZVE32F-LABEL: mgather_v1i32:
1997; RV32ZVE32F:       # %bb.0:
1998; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
1999; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
2000; RV32ZVE32F-NEXT:    vmv.v.v v8, v9
2001; RV32ZVE32F-NEXT:    ret
2002;
2003; RV64ZVE32F-LABEL: mgather_v1i32:
2004; RV64ZVE32F:       # %bb.0:
2005; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
2006; RV64ZVE32F-NEXT:    vfirst.m a1, v0
2007; RV64ZVE32F-NEXT:    bnez a1, .LBB27_2
2008; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
2009; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
2010; RV64ZVE32F-NEXT:    vle32.v v8, (a0)
2011; RV64ZVE32F-NEXT:  .LBB27_2: # %else
2012; RV64ZVE32F-NEXT:    ret
2013  %v = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x i32> %passthru)
2014  ret <1 x i32> %v
2015}
2016
2017declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
2018
2019define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
2020; RV32V-LABEL: mgather_v2i32:
2021; RV32V:       # %bb.0:
2022; RV32V-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
2023; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
2024; RV32V-NEXT:    vmv1r.v v8, v9
2025; RV32V-NEXT:    ret
2026;
2027; RV64V-LABEL: mgather_v2i32:
2028; RV64V:       # %bb.0:
2029; RV64V-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
2030; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
2031; RV64V-NEXT:    vmv1r.v v8, v9
2032; RV64V-NEXT:    ret
2033;
2034; RV32ZVE32F-LABEL: mgather_v2i32:
2035; RV32ZVE32F:       # %bb.0:
2036; RV32ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, mu
2037; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
2038; RV32ZVE32F-NEXT:    vmv.v.v v8, v9
2039; RV32ZVE32F-NEXT:    ret
2040;
2041; RV64ZVE32F-LABEL: mgather_v2i32:
2042; RV64ZVE32F:       # %bb.0:
2043; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2044; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
2045; RV64ZVE32F-NEXT:    andi a3, a2, 1
2046; RV64ZVE32F-NEXT:    bnez a3, .LBB28_3
2047; RV64ZVE32F-NEXT:  # %bb.1: # %else
2048; RV64ZVE32F-NEXT:    andi a2, a2, 2
2049; RV64ZVE32F-NEXT:    bnez a2, .LBB28_4
2050; RV64ZVE32F-NEXT:  .LBB28_2: # %else2
2051; RV64ZVE32F-NEXT:    ret
2052; RV64ZVE32F-NEXT:  .LBB28_3: # %cond.load
2053; RV64ZVE32F-NEXT:    lw a0, 0(a0)
2054; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
2055; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
2056; RV64ZVE32F-NEXT:    andi a2, a2, 2
2057; RV64ZVE32F-NEXT:    beqz a2, .LBB28_2
2058; RV64ZVE32F-NEXT:  .LBB28_4: # %cond.load1
2059; RV64ZVE32F-NEXT:    lw a0, 0(a1)
2060; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2061; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
2062; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
2063; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
2064; RV64ZVE32F-NEXT:    ret
2065  %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
2066  ret <2 x i32> %v
2067}
2068
2069define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
2070; RV32V-LABEL: mgather_v2i32_sextload_v2i64:
2071; RV32V:       # %bb.0:
2072; RV32V-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
2073; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
2074; RV32V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
2075; RV32V-NEXT:    vsext.vf2 v8, v9
2076; RV32V-NEXT:    ret
2077;
2078; RV64V-LABEL: mgather_v2i32_sextload_v2i64:
2079; RV64V:       # %bb.0:
2080; RV64V-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
2081; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
2082; RV64V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
2083; RV64V-NEXT:    vsext.vf2 v8, v9
2084; RV64V-NEXT:    ret
2085;
2086; RV32ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
2087; RV32ZVE32F:       # %bb.0:
2088; RV32ZVE32F-NEXT:    addi a1, a0, 8
2089; RV32ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, mu
2090; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
2091; RV32ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
2092; RV32ZVE32F-NEXT:    vmv.x.s a2, v9
2093; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
2094; RV32ZVE32F-NEXT:    vse32.v v9, (a0)
2095; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
2096; RV32ZVE32F-NEXT:    srai a2, a2, 31
2097; RV32ZVE32F-NEXT:    vse32.v v8, (a1)
2098; RV32ZVE32F-NEXT:    srai a3, a3, 31
2099; RV32ZVE32F-NEXT:    sw a2, 4(a0)
2100; RV32ZVE32F-NEXT:    sw a3, 12(a0)
2101; RV32ZVE32F-NEXT:    ret
2102;
2103; RV64ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
2104; RV64ZVE32F:       # %bb.0:
2105; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2106; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
2107; RV64ZVE32F-NEXT:    andi a3, a2, 1
2108; RV64ZVE32F-NEXT:    beqz a3, .LBB29_2
2109; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
2110; RV64ZVE32F-NEXT:    lw a0, 0(a0)
2111; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
2112; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
2113; RV64ZVE32F-NEXT:  .LBB29_2: # %else
2114; RV64ZVE32F-NEXT:    andi a2, a2, 2
2115; RV64ZVE32F-NEXT:    beqz a2, .LBB29_4
2116; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
2117; RV64ZVE32F-NEXT:    lw a0, 0(a1)
2118; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2119; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
2120; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
2121; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
2122; RV64ZVE32F-NEXT:  .LBB29_4: # %else2
2123; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
2124; RV64ZVE32F-NEXT:    vmv.x.s a0, v8
2125; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
2126; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
2127; RV64ZVE32F-NEXT:    ret
2128  %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
2129  %ev = sext <2 x i32> %v to <2 x i64>
2130  ret <2 x i64> %ev
2131}
2132
2133define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
2134; RV32V-LABEL: mgather_v2i32_zextload_v2i64:
2135; RV32V:       # %bb.0:
2136; RV32V-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
2137; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
2138; RV32V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
2139; RV32V-NEXT:    vzext.vf2 v8, v9
2140; RV32V-NEXT:    ret
2141;
2142; RV64V-LABEL: mgather_v2i32_zextload_v2i64:
2143; RV64V:       # %bb.0:
2144; RV64V-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
2145; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
2146; RV64V-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
2147; RV64V-NEXT:    vzext.vf2 v8, v9
2148; RV64V-NEXT:    ret
2149;
2150; RV32ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
2151; RV32ZVE32F:       # %bb.0:
2152; RV32ZVE32F-NEXT:    addi a1, a0, 8
2153; RV32ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, mu
2154; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
2155; RV32ZVE32F-NEXT:    sw zero, 4(a0)
2156; RV32ZVE32F-NEXT:    sw zero, 12(a0)
2157; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
2158; RV32ZVE32F-NEXT:    vse32.v v9, (a0)
2159; RV32ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
2160; RV32ZVE32F-NEXT:    vse32.v v8, (a1)
2161; RV32ZVE32F-NEXT:    ret
2162;
2163; RV64ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
2164; RV64ZVE32F:       # %bb.0:
2165; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2166; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
2167; RV64ZVE32F-NEXT:    andi a3, a2, 1
2168; RV64ZVE32F-NEXT:    beqz a3, .LBB30_2
2169; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
2170; RV64ZVE32F-NEXT:    lw a0, 0(a0)
2171; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
2172; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
2173; RV64ZVE32F-NEXT:  .LBB30_2: # %else
2174; RV64ZVE32F-NEXT:    andi a2, a2, 2
2175; RV64ZVE32F-NEXT:    beqz a2, .LBB30_4
2176; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
2177; RV64ZVE32F-NEXT:    lw a0, 0(a1)
2178; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2179; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
2180; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
2181; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
2182; RV64ZVE32F-NEXT:  .LBB30_4: # %else2
2183; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
2184; RV64ZVE32F-NEXT:    vmv.x.s a0, v8
2185; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
2186; RV64ZVE32F-NEXT:    slli a0, a0, 32
2187; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
2188; RV64ZVE32F-NEXT:    srli a0, a0, 32
2189; RV64ZVE32F-NEXT:    slli a1, a1, 32
2190; RV64ZVE32F-NEXT:    srli a1, a1, 32
2191; RV64ZVE32F-NEXT:    ret
2192  %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
2193  %ev = zext <2 x i32> %v to <2 x i64>
2194  ret <2 x i64> %ev
2195}
2196
2197declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
2198
2199define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthru) {
2200; RV32-LABEL: mgather_v4i32:
2201; RV32:       # %bb.0:
2202; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
2203; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
2204; RV32-NEXT:    vmv.v.v v8, v9
2205; RV32-NEXT:    ret
2206;
2207; RV64V-LABEL: mgather_v4i32:
2208; RV64V:       # %bb.0:
2209; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
2210; RV64V-NEXT:    vluxei64.v v10, (zero), v8, v0.t
2211; RV64V-NEXT:    vmv.v.v v8, v10
2212; RV64V-NEXT:    ret
2213;
2214; RV64ZVE32F-LABEL: mgather_v4i32:
2215; RV64ZVE32F:       # %bb.0:
2216; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2217; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
2218; RV64ZVE32F-NEXT:    andi a2, a1, 1
2219; RV64ZVE32F-NEXT:    bnez a2, .LBB31_5
2220; RV64ZVE32F-NEXT:  # %bb.1: # %else
2221; RV64ZVE32F-NEXT:    andi a2, a1, 2
2222; RV64ZVE32F-NEXT:    bnez a2, .LBB31_6
2223; RV64ZVE32F-NEXT:  .LBB31_2: # %else2
2224; RV64ZVE32F-NEXT:    andi a2, a1, 4
2225; RV64ZVE32F-NEXT:    bnez a2, .LBB31_7
2226; RV64ZVE32F-NEXT:  .LBB31_3: # %else5
2227; RV64ZVE32F-NEXT:    andi a1, a1, 8
2228; RV64ZVE32F-NEXT:    bnez a1, .LBB31_8
2229; RV64ZVE32F-NEXT:  .LBB31_4: # %else8
2230; RV64ZVE32F-NEXT:    ret
2231; RV64ZVE32F-NEXT:  .LBB31_5: # %cond.load
2232; RV64ZVE32F-NEXT:    ld a2, 0(a0)
2233; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2234; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
2235; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
2236; RV64ZVE32F-NEXT:    andi a2, a1, 2
2237; RV64ZVE32F-NEXT:    beqz a2, .LBB31_2
2238; RV64ZVE32F-NEXT:  .LBB31_6: # %cond.load1
2239; RV64ZVE32F-NEXT:    ld a2, 8(a0)
2240; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2241; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2242; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
2243; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
2244; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
2245; RV64ZVE32F-NEXT:    andi a2, a1, 4
2246; RV64ZVE32F-NEXT:    beqz a2, .LBB31_3
2247; RV64ZVE32F-NEXT:  .LBB31_7: # %cond.load4
2248; RV64ZVE32F-NEXT:    ld a2, 16(a0)
2249; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2250; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
2251; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
2252; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
2253; RV64ZVE32F-NEXT:    andi a1, a1, 8
2254; RV64ZVE32F-NEXT:    beqz a1, .LBB31_4
2255; RV64ZVE32F-NEXT:  .LBB31_8: # %cond.load7
2256; RV64ZVE32F-NEXT:    ld a0, 24(a0)
2257; RV64ZVE32F-NEXT:    lw a0, 0(a0)
2258; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2259; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
2260; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 3
2261; RV64ZVE32F-NEXT:    ret
2262  %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> %passthru)
2263  ret <4 x i32> %v
2264}
2265
2266define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) {
2267; RV32-LABEL: mgather_truemask_v4i32:
2268; RV32:       # %bb.0:
2269; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2270; RV32-NEXT:    vluxei32.v v8, (zero), v8
2271; RV32-NEXT:    ret
2272;
2273; RV64V-LABEL: mgather_truemask_v4i32:
2274; RV64V:       # %bb.0:
2275; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2276; RV64V-NEXT:    vluxei64.v v10, (zero), v8
2277; RV64V-NEXT:    vmv.v.v v8, v10
2278; RV64V-NEXT:    ret
2279;
2280; RV64ZVE32F-LABEL: mgather_truemask_v4i32:
2281; RV64ZVE32F:       # %bb.0:
2282; RV64ZVE32F-NEXT:    ld a1, 0(a0)
2283; RV64ZVE32F-NEXT:    ld a2, 8(a0)
2284; RV64ZVE32F-NEXT:    ld a3, 16(a0)
2285; RV64ZVE32F-NEXT:    ld a0, 24(a0)
2286; RV64ZVE32F-NEXT:    lw a1, 0(a1)
2287; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2288; RV64ZVE32F-NEXT:    lw a3, 0(a3)
2289; RV64ZVE32F-NEXT:    lw a0, 0(a0)
2290; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2291; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
2292; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
2293; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
2294; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
2295; RV64ZVE32F-NEXT:    ret
2296  %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x i32> %passthru)
2297  ret <4 x i32> %v
2298}
2299
2300define <4 x i32> @mgather_falsemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) {
2301; RV32-LABEL: mgather_falsemask_v4i32:
2302; RV32:       # %bb.0:
2303; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2304; RV32-NEXT:    vmv1r.v v8, v9
2305; RV32-NEXT:    ret
2306;
2307; RV64V-LABEL: mgather_falsemask_v4i32:
2308; RV64V:       # %bb.0:
2309; RV64V-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2310; RV64V-NEXT:    vmv1r.v v8, v10
2311; RV64V-NEXT:    ret
2312;
2313; RV64ZVE32F-LABEL: mgather_falsemask_v4i32:
2314; RV64ZVE32F:       # %bb.0:
2315; RV64ZVE32F-NEXT:    ret
2316  %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x i32> %passthru)
2317  ret <4 x i32> %v
2318}
2319
2320declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
2321
2322define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthru) {
2323; RV32-LABEL: mgather_v8i32:
2324; RV32:       # %bb.0:
2325; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
2326; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
2327; RV32-NEXT:    vmv.v.v v8, v10
2328; RV32-NEXT:    ret
2329;
2330; RV64V-LABEL: mgather_v8i32:
2331; RV64V:       # %bb.0:
2332; RV64V-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
2333; RV64V-NEXT:    vluxei64.v v12, (zero), v8, v0.t
2334; RV64V-NEXT:    vmv.v.v v8, v12
2335; RV64V-NEXT:    ret
2336;
2337; RV64ZVE32F-LABEL: mgather_v8i32:
2338; RV64ZVE32F:       # %bb.0:
2339; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2340; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
2341; RV64ZVE32F-NEXT:    andi a2, a1, 1
2342; RV64ZVE32F-NEXT:    bnez a2, .LBB34_9
2343; RV64ZVE32F-NEXT:  # %bb.1: # %else
2344; RV64ZVE32F-NEXT:    andi a2, a1, 2
2345; RV64ZVE32F-NEXT:    bnez a2, .LBB34_10
2346; RV64ZVE32F-NEXT:  .LBB34_2: # %else2
2347; RV64ZVE32F-NEXT:    andi a2, a1, 4
2348; RV64ZVE32F-NEXT:    bnez a2, .LBB34_11
2349; RV64ZVE32F-NEXT:  .LBB34_3: # %else5
2350; RV64ZVE32F-NEXT:    andi a2, a1, 8
2351; RV64ZVE32F-NEXT:    bnez a2, .LBB34_12
2352; RV64ZVE32F-NEXT:  .LBB34_4: # %else8
2353; RV64ZVE32F-NEXT:    andi a2, a1, 16
2354; RV64ZVE32F-NEXT:    bnez a2, .LBB34_13
2355; RV64ZVE32F-NEXT:  .LBB34_5: # %else11
2356; RV64ZVE32F-NEXT:    andi a2, a1, 32
2357; RV64ZVE32F-NEXT:    bnez a2, .LBB34_14
2358; RV64ZVE32F-NEXT:  .LBB34_6: # %else14
2359; RV64ZVE32F-NEXT:    andi a2, a1, 64
2360; RV64ZVE32F-NEXT:    bnez a2, .LBB34_15
2361; RV64ZVE32F-NEXT:  .LBB34_7: # %else17
2362; RV64ZVE32F-NEXT:    andi a1, a1, -128
2363; RV64ZVE32F-NEXT:    bnez a1, .LBB34_16
2364; RV64ZVE32F-NEXT:  .LBB34_8: # %else20
2365; RV64ZVE32F-NEXT:    ret
2366; RV64ZVE32F-NEXT:  .LBB34_9: # %cond.load
2367; RV64ZVE32F-NEXT:    ld a2, 0(a0)
2368; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2369; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
2370; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
2371; RV64ZVE32F-NEXT:    andi a2, a1, 2
2372; RV64ZVE32F-NEXT:    beqz a2, .LBB34_2
2373; RV64ZVE32F-NEXT:  .LBB34_10: # %cond.load1
2374; RV64ZVE32F-NEXT:    ld a2, 8(a0)
2375; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2376; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2377; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
2378; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
2379; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 1
2380; RV64ZVE32F-NEXT:    andi a2, a1, 4
2381; RV64ZVE32F-NEXT:    beqz a2, .LBB34_3
2382; RV64ZVE32F-NEXT:  .LBB34_11: # %cond.load4
2383; RV64ZVE32F-NEXT:    ld a2, 16(a0)
2384; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2385; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
2386; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
2387; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 2
2388; RV64ZVE32F-NEXT:    andi a2, a1, 8
2389; RV64ZVE32F-NEXT:    beqz a2, .LBB34_4
2390; RV64ZVE32F-NEXT:  .LBB34_12: # %cond.load7
2391; RV64ZVE32F-NEXT:    ld a2, 24(a0)
2392; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2393; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
2394; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
2395; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 3
2396; RV64ZVE32F-NEXT:    andi a2, a1, 16
2397; RV64ZVE32F-NEXT:    beqz a2, .LBB34_5
2398; RV64ZVE32F-NEXT:  .LBB34_13: # %cond.load10
2399; RV64ZVE32F-NEXT:    ld a2, 32(a0)
2400; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2401; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
2402; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
2403; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 4
2404; RV64ZVE32F-NEXT:    andi a2, a1, 32
2405; RV64ZVE32F-NEXT:    beqz a2, .LBB34_6
2406; RV64ZVE32F-NEXT:  .LBB34_14: # %cond.load13
2407; RV64ZVE32F-NEXT:    ld a2, 40(a0)
2408; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2409; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
2410; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
2411; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 5
2412; RV64ZVE32F-NEXT:    andi a2, a1, 64
2413; RV64ZVE32F-NEXT:    beqz a2, .LBB34_7
2414; RV64ZVE32F-NEXT:  .LBB34_15: # %cond.load16
2415; RV64ZVE32F-NEXT:    ld a2, 48(a0)
2416; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2417; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
2418; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
2419; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 6
2420; RV64ZVE32F-NEXT:    andi a1, a1, -128
2421; RV64ZVE32F-NEXT:    beqz a1, .LBB34_8
2422; RV64ZVE32F-NEXT:  .LBB34_16: # %cond.load19
2423; RV64ZVE32F-NEXT:    ld a0, 56(a0)
2424; RV64ZVE32F-NEXT:    lw a0, 0(a0)
2425; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2426; RV64ZVE32F-NEXT:    vmv.s.x v10, a0
2427; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 7
2428; RV64ZVE32F-NEXT:    ret
2429  %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2430  ret <8 x i32> %v
2431}
2432
2433define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2434; RV32-LABEL: mgather_baseidx_v8i8_v8i32:
2435; RV32:       # %bb.0:
2436; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
2437; RV32-NEXT:    vsext.vf4 v12, v8
2438; RV32-NEXT:    vsll.vi v8, v12, 2
2439; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
2440; RV32-NEXT:    vmv.v.v v8, v10
2441; RV32-NEXT:    ret
2442;
2443; RV64V-LABEL: mgather_baseidx_v8i8_v8i32:
2444; RV64V:       # %bb.0:
2445; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
2446; RV64V-NEXT:    vsext.vf8 v12, v8
2447; RV64V-NEXT:    vsll.vi v12, v12, 2
2448; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
2449; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
2450; RV64V-NEXT:    vmv.v.v v8, v10
2451; RV64V-NEXT:    ret
2452;
2453; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i32:
2454; RV64ZVE32F:       # %bb.0:
2455; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2456; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
2457; RV64ZVE32F-NEXT:    andi a2, a1, 1
2458; RV64ZVE32F-NEXT:    beqz a2, .LBB35_2
2459; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
2460; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2461; RV64ZVE32F-NEXT:    slli a2, a2, 2
2462; RV64ZVE32F-NEXT:    add a2, a0, a2
2463; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2464; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
2465; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
2466; RV64ZVE32F-NEXT:  .LBB35_2: # %else
2467; RV64ZVE32F-NEXT:    andi a2, a1, 2
2468; RV64ZVE32F-NEXT:    beqz a2, .LBB35_4
2469; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
2470; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2471; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
2472; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
2473; RV64ZVE32F-NEXT:    slli a2, a2, 2
2474; RV64ZVE32F-NEXT:    add a2, a0, a2
2475; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2476; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2477; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
2478; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
2479; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
2480; RV64ZVE32F-NEXT:  .LBB35_4: # %else2
2481; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
2482; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
2483; RV64ZVE32F-NEXT:    andi a2, a1, 4
2484; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2485; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
2486; RV64ZVE32F-NEXT:    bnez a2, .LBB35_14
2487; RV64ZVE32F-NEXT:  # %bb.5: # %else5
2488; RV64ZVE32F-NEXT:    andi a2, a1, 8
2489; RV64ZVE32F-NEXT:    bnez a2, .LBB35_15
2490; RV64ZVE32F-NEXT:  .LBB35_6: # %else8
2491; RV64ZVE32F-NEXT:    andi a2, a1, 16
2492; RV64ZVE32F-NEXT:    bnez a2, .LBB35_16
2493; RV64ZVE32F-NEXT:  .LBB35_7: # %else11
2494; RV64ZVE32F-NEXT:    andi a2, a1, 32
2495; RV64ZVE32F-NEXT:    beqz a2, .LBB35_9
2496; RV64ZVE32F-NEXT:  .LBB35_8: # %cond.load13
2497; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2498; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
2499; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2500; RV64ZVE32F-NEXT:    slli a2, a2, 2
2501; RV64ZVE32F-NEXT:    add a2, a0, a2
2502; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2503; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2504; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2505; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
2506; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
2507; RV64ZVE32F-NEXT:  .LBB35_9: # %else14
2508; RV64ZVE32F-NEXT:    andi a2, a1, 64
2509; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2510; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
2511; RV64ZVE32F-NEXT:    beqz a2, .LBB35_11
2512; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
2513; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2514; RV64ZVE32F-NEXT:    slli a2, a2, 2
2515; RV64ZVE32F-NEXT:    add a2, a0, a2
2516; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2517; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2518; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2519; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
2520; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
2521; RV64ZVE32F-NEXT:  .LBB35_11: # %else17
2522; RV64ZVE32F-NEXT:    andi a1, a1, -128
2523; RV64ZVE32F-NEXT:    beqz a1, .LBB35_13
2524; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
2525; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2526; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
2527; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
2528; RV64ZVE32F-NEXT:    slli a1, a1, 2
2529; RV64ZVE32F-NEXT:    add a0, a0, a1
2530; RV64ZVE32F-NEXT:    lw a0, 0(a0)
2531; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2532; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
2533; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2534; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
2535; RV64ZVE32F-NEXT:  .LBB35_13: # %else20
2536; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2537; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
2538; RV64ZVE32F-NEXT:    ret
2539; RV64ZVE32F-NEXT:  .LBB35_14: # %cond.load4
2540; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2541; RV64ZVE32F-NEXT:    slli a2, a2, 2
2542; RV64ZVE32F-NEXT:    add a2, a0, a2
2543; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2544; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2545; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2546; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
2547; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
2548; RV64ZVE32F-NEXT:    andi a2, a1, 8
2549; RV64ZVE32F-NEXT:    beqz a2, .LBB35_6
2550; RV64ZVE32F-NEXT:  .LBB35_15: # %cond.load7
2551; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2552; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
2553; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2554; RV64ZVE32F-NEXT:    slli a2, a2, 2
2555; RV64ZVE32F-NEXT:    add a2, a0, a2
2556; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2557; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2558; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
2559; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
2560; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
2561; RV64ZVE32F-NEXT:    andi a2, a1, 16
2562; RV64ZVE32F-NEXT:    beqz a2, .LBB35_7
2563; RV64ZVE32F-NEXT:  .LBB35_16: # %cond.load10
2564; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2565; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
2566; RV64ZVE32F-NEXT:    slli a2, a2, 2
2567; RV64ZVE32F-NEXT:    add a2, a0, a2
2568; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2569; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2570; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2571; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
2572; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
2573; RV64ZVE32F-NEXT:    andi a2, a1, 32
2574; RV64ZVE32F-NEXT:    bnez a2, .LBB35_8
2575; RV64ZVE32F-NEXT:    j .LBB35_9
2576  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
2577  %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2578  ret <8 x i32> %v
2579}
2580
2581define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2582; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i32:
2583; RV32:       # %bb.0:
2584; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
2585; RV32-NEXT:    vsext.vf4 v12, v8
2586; RV32-NEXT:    vsll.vi v8, v12, 2
2587; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
2588; RV32-NEXT:    vmv.v.v v8, v10
2589; RV32-NEXT:    ret
2590;
2591; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i32:
2592; RV64V:       # %bb.0:
2593; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
2594; RV64V-NEXT:    vsext.vf8 v12, v8
2595; RV64V-NEXT:    vsll.vi v12, v12, 2
2596; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
2597; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
2598; RV64V-NEXT:    vmv.v.v v8, v10
2599; RV64V-NEXT:    ret
2600;
2601; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i32:
2602; RV64ZVE32F:       # %bb.0:
2603; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2604; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
2605; RV64ZVE32F-NEXT:    andi a2, a1, 1
2606; RV64ZVE32F-NEXT:    beqz a2, .LBB36_2
2607; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
2608; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2609; RV64ZVE32F-NEXT:    slli a2, a2, 2
2610; RV64ZVE32F-NEXT:    add a2, a0, a2
2611; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2612; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
2613; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
2614; RV64ZVE32F-NEXT:  .LBB36_2: # %else
2615; RV64ZVE32F-NEXT:    andi a2, a1, 2
2616; RV64ZVE32F-NEXT:    beqz a2, .LBB36_4
2617; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
2618; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2619; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
2620; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
2621; RV64ZVE32F-NEXT:    slli a2, a2, 2
2622; RV64ZVE32F-NEXT:    add a2, a0, a2
2623; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2624; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2625; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
2626; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
2627; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
2628; RV64ZVE32F-NEXT:  .LBB36_4: # %else2
2629; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
2630; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
2631; RV64ZVE32F-NEXT:    andi a2, a1, 4
2632; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2633; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
2634; RV64ZVE32F-NEXT:    bnez a2, .LBB36_14
2635; RV64ZVE32F-NEXT:  # %bb.5: # %else5
2636; RV64ZVE32F-NEXT:    andi a2, a1, 8
2637; RV64ZVE32F-NEXT:    bnez a2, .LBB36_15
2638; RV64ZVE32F-NEXT:  .LBB36_6: # %else8
2639; RV64ZVE32F-NEXT:    andi a2, a1, 16
2640; RV64ZVE32F-NEXT:    bnez a2, .LBB36_16
2641; RV64ZVE32F-NEXT:  .LBB36_7: # %else11
2642; RV64ZVE32F-NEXT:    andi a2, a1, 32
2643; RV64ZVE32F-NEXT:    beqz a2, .LBB36_9
2644; RV64ZVE32F-NEXT:  .LBB36_8: # %cond.load13
2645; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2646; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
2647; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2648; RV64ZVE32F-NEXT:    slli a2, a2, 2
2649; RV64ZVE32F-NEXT:    add a2, a0, a2
2650; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2651; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2652; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2653; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
2654; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
2655; RV64ZVE32F-NEXT:  .LBB36_9: # %else14
2656; RV64ZVE32F-NEXT:    andi a2, a1, 64
2657; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2658; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
2659; RV64ZVE32F-NEXT:    beqz a2, .LBB36_11
2660; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
2661; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2662; RV64ZVE32F-NEXT:    slli a2, a2, 2
2663; RV64ZVE32F-NEXT:    add a2, a0, a2
2664; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2665; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2666; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2667; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
2668; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
2669; RV64ZVE32F-NEXT:  .LBB36_11: # %else17
2670; RV64ZVE32F-NEXT:    andi a1, a1, -128
2671; RV64ZVE32F-NEXT:    beqz a1, .LBB36_13
2672; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
2673; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2674; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
2675; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
2676; RV64ZVE32F-NEXT:    slli a1, a1, 2
2677; RV64ZVE32F-NEXT:    add a0, a0, a1
2678; RV64ZVE32F-NEXT:    lw a0, 0(a0)
2679; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2680; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
2681; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2682; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
2683; RV64ZVE32F-NEXT:  .LBB36_13: # %else20
2684; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2685; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
2686; RV64ZVE32F-NEXT:    ret
2687; RV64ZVE32F-NEXT:  .LBB36_14: # %cond.load4
2688; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2689; RV64ZVE32F-NEXT:    slli a2, a2, 2
2690; RV64ZVE32F-NEXT:    add a2, a0, a2
2691; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2692; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2693; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2694; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
2695; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
2696; RV64ZVE32F-NEXT:    andi a2, a1, 8
2697; RV64ZVE32F-NEXT:    beqz a2, .LBB36_6
2698; RV64ZVE32F-NEXT:  .LBB36_15: # %cond.load7
2699; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2700; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
2701; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2702; RV64ZVE32F-NEXT:    slli a2, a2, 2
2703; RV64ZVE32F-NEXT:    add a2, a0, a2
2704; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2705; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2706; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
2707; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
2708; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
2709; RV64ZVE32F-NEXT:    andi a2, a1, 16
2710; RV64ZVE32F-NEXT:    beqz a2, .LBB36_7
2711; RV64ZVE32F-NEXT:  .LBB36_16: # %cond.load10
2712; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2713; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
2714; RV64ZVE32F-NEXT:    slli a2, a2, 2
2715; RV64ZVE32F-NEXT:    add a2, a0, a2
2716; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2717; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2718; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2719; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
2720; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
2721; RV64ZVE32F-NEXT:    andi a2, a1, 32
2722; RV64ZVE32F-NEXT:    bnez a2, .LBB36_8
2723; RV64ZVE32F-NEXT:    j .LBB36_9
2724  %eidxs = sext <8 x i8> %idxs to <8 x i32>
2725  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2726  %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2727  ret <8 x i32> %v
2728}
2729
2730define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2731; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i32:
2732; RV32:       # %bb.0:
2733; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2734; RV32-NEXT:    vzext.vf2 v9, v8
2735; RV32-NEXT:    vsll.vi v8, v9, 2
2736; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
2737; RV32-NEXT:    vluxei16.v v10, (a0), v8, v0.t
2738; RV32-NEXT:    vmv.v.v v8, v10
2739; RV32-NEXT:    ret
2740;
2741; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i32:
2742; RV64V:       # %bb.0:
2743; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2744; RV64V-NEXT:    vzext.vf2 v9, v8
2745; RV64V-NEXT:    vsll.vi v8, v9, 2
2746; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
2747; RV64V-NEXT:    vluxei16.v v10, (a0), v8, v0.t
2748; RV64V-NEXT:    vmv.v.v v8, v10
2749; RV64V-NEXT:    ret
2750;
2751; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i32:
2752; RV64ZVE32F:       # %bb.0:
2753; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2754; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
2755; RV64ZVE32F-NEXT:    andi a2, a1, 1
2756; RV64ZVE32F-NEXT:    beqz a2, .LBB37_2
2757; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
2758; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2759; RV64ZVE32F-NEXT:    andi a2, a2, 255
2760; RV64ZVE32F-NEXT:    slli a2, a2, 2
2761; RV64ZVE32F-NEXT:    add a2, a0, a2
2762; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2763; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
2764; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
2765; RV64ZVE32F-NEXT:  .LBB37_2: # %else
2766; RV64ZVE32F-NEXT:    andi a2, a1, 2
2767; RV64ZVE32F-NEXT:    beqz a2, .LBB37_4
2768; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
2769; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2770; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
2771; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
2772; RV64ZVE32F-NEXT:    andi a2, a2, 255
2773; RV64ZVE32F-NEXT:    slli a2, a2, 2
2774; RV64ZVE32F-NEXT:    add a2, a0, a2
2775; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2776; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2777; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
2778; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
2779; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
2780; RV64ZVE32F-NEXT:  .LBB37_4: # %else2
2781; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
2782; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
2783; RV64ZVE32F-NEXT:    andi a2, a1, 4
2784; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2785; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
2786; RV64ZVE32F-NEXT:    bnez a2, .LBB37_14
2787; RV64ZVE32F-NEXT:  # %bb.5: # %else5
2788; RV64ZVE32F-NEXT:    andi a2, a1, 8
2789; RV64ZVE32F-NEXT:    bnez a2, .LBB37_15
2790; RV64ZVE32F-NEXT:  .LBB37_6: # %else8
2791; RV64ZVE32F-NEXT:    andi a2, a1, 16
2792; RV64ZVE32F-NEXT:    bnez a2, .LBB37_16
2793; RV64ZVE32F-NEXT:  .LBB37_7: # %else11
2794; RV64ZVE32F-NEXT:    andi a2, a1, 32
2795; RV64ZVE32F-NEXT:    beqz a2, .LBB37_9
2796; RV64ZVE32F-NEXT:  .LBB37_8: # %cond.load13
2797; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2798; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
2799; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2800; RV64ZVE32F-NEXT:    andi a2, a2, 255
2801; RV64ZVE32F-NEXT:    slli a2, a2, 2
2802; RV64ZVE32F-NEXT:    add a2, a0, a2
2803; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2804; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2805; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2806; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
2807; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
2808; RV64ZVE32F-NEXT:  .LBB37_9: # %else14
2809; RV64ZVE32F-NEXT:    andi a2, a1, 64
2810; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2811; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
2812; RV64ZVE32F-NEXT:    beqz a2, .LBB37_11
2813; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
2814; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2815; RV64ZVE32F-NEXT:    andi a2, a2, 255
2816; RV64ZVE32F-NEXT:    slli a2, a2, 2
2817; RV64ZVE32F-NEXT:    add a2, a0, a2
2818; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2819; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2820; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2821; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
2822; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
2823; RV64ZVE32F-NEXT:  .LBB37_11: # %else17
2824; RV64ZVE32F-NEXT:    andi a1, a1, -128
2825; RV64ZVE32F-NEXT:    beqz a1, .LBB37_13
2826; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
2827; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2828; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
2829; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
2830; RV64ZVE32F-NEXT:    andi a1, a1, 255
2831; RV64ZVE32F-NEXT:    slli a1, a1, 2
2832; RV64ZVE32F-NEXT:    add a0, a0, a1
2833; RV64ZVE32F-NEXT:    lw a0, 0(a0)
2834; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2835; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
2836; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2837; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
2838; RV64ZVE32F-NEXT:  .LBB37_13: # %else20
2839; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2840; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
2841; RV64ZVE32F-NEXT:    ret
2842; RV64ZVE32F-NEXT:  .LBB37_14: # %cond.load4
2843; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2844; RV64ZVE32F-NEXT:    andi a2, a2, 255
2845; RV64ZVE32F-NEXT:    slli a2, a2, 2
2846; RV64ZVE32F-NEXT:    add a2, a0, a2
2847; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2848; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2849; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2850; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
2851; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
2852; RV64ZVE32F-NEXT:    andi a2, a1, 8
2853; RV64ZVE32F-NEXT:    beqz a2, .LBB37_6
2854; RV64ZVE32F-NEXT:  .LBB37_15: # %cond.load7
2855; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
2856; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
2857; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2858; RV64ZVE32F-NEXT:    andi a2, a2, 255
2859; RV64ZVE32F-NEXT:    slli a2, a2, 2
2860; RV64ZVE32F-NEXT:    add a2, a0, a2
2861; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2862; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2863; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
2864; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
2865; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
2866; RV64ZVE32F-NEXT:    andi a2, a1, 16
2867; RV64ZVE32F-NEXT:    beqz a2, .LBB37_7
2868; RV64ZVE32F-NEXT:  .LBB37_16: # %cond.load10
2869; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2870; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
2871; RV64ZVE32F-NEXT:    andi a2, a2, 255
2872; RV64ZVE32F-NEXT:    slli a2, a2, 2
2873; RV64ZVE32F-NEXT:    add a2, a0, a2
2874; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2875; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
2876; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2877; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
2878; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
2879; RV64ZVE32F-NEXT:    andi a2, a1, 32
2880; RV64ZVE32F-NEXT:    bnez a2, .LBB37_8
2881; RV64ZVE32F-NEXT:    j .LBB37_9
2882  %eidxs = zext <8 x i8> %idxs to <8 x i32>
2883  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2884  %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2885  ret <8 x i32> %v
2886}
2887
2888define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2889; RV32-LABEL: mgather_baseidx_v8i16_v8i32:
2890; RV32:       # %bb.0:
2891; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
2892; RV32-NEXT:    vsext.vf2 v12, v8
2893; RV32-NEXT:    vsll.vi v8, v12, 2
2894; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
2895; RV32-NEXT:    vmv.v.v v8, v10
2896; RV32-NEXT:    ret
2897;
2898; RV64V-LABEL: mgather_baseidx_v8i16_v8i32:
2899; RV64V:       # %bb.0:
2900; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
2901; RV64V-NEXT:    vsext.vf4 v12, v8
2902; RV64V-NEXT:    vsll.vi v12, v12, 2
2903; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
2904; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
2905; RV64V-NEXT:    vmv.v.v v8, v10
2906; RV64V-NEXT:    ret
2907;
2908; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i32:
2909; RV64ZVE32F:       # %bb.0:
2910; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2911; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
2912; RV64ZVE32F-NEXT:    andi a2, a1, 1
2913; RV64ZVE32F-NEXT:    beqz a2, .LBB38_2
2914; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
2915; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
2916; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2917; RV64ZVE32F-NEXT:    slli a2, a2, 2
2918; RV64ZVE32F-NEXT:    add a2, a0, a2
2919; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2920; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
2921; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
2922; RV64ZVE32F-NEXT:  .LBB38_2: # %else
2923; RV64ZVE32F-NEXT:    andi a2, a1, 2
2924; RV64ZVE32F-NEXT:    beqz a2, .LBB38_4
2925; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
2926; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
2927; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
2928; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
2929; RV64ZVE32F-NEXT:    slli a2, a2, 2
2930; RV64ZVE32F-NEXT:    add a2, a0, a2
2931; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2932; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2933; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
2934; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
2935; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
2936; RV64ZVE32F-NEXT:  .LBB38_4: # %else2
2937; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
2938; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
2939; RV64ZVE32F-NEXT:    andi a2, a1, 4
2940; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
2941; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
2942; RV64ZVE32F-NEXT:    bnez a2, .LBB38_14
2943; RV64ZVE32F-NEXT:  # %bb.5: # %else5
2944; RV64ZVE32F-NEXT:    andi a2, a1, 8
2945; RV64ZVE32F-NEXT:    bnez a2, .LBB38_15
2946; RV64ZVE32F-NEXT:  .LBB38_6: # %else8
2947; RV64ZVE32F-NEXT:    andi a2, a1, 16
2948; RV64ZVE32F-NEXT:    bnez a2, .LBB38_16
2949; RV64ZVE32F-NEXT:  .LBB38_7: # %else11
2950; RV64ZVE32F-NEXT:    andi a2, a1, 32
2951; RV64ZVE32F-NEXT:    beqz a2, .LBB38_9
2952; RV64ZVE32F-NEXT:  .LBB38_8: # %cond.load13
2953; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
2954; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
2955; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2956; RV64ZVE32F-NEXT:    slli a2, a2, 2
2957; RV64ZVE32F-NEXT:    add a2, a0, a2
2958; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2959; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2960; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2961; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
2962; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
2963; RV64ZVE32F-NEXT:  .LBB38_9: # %else14
2964; RV64ZVE32F-NEXT:    andi a2, a1, 64
2965; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
2966; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
2967; RV64ZVE32F-NEXT:    beqz a2, .LBB38_11
2968; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
2969; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2970; RV64ZVE32F-NEXT:    slli a2, a2, 2
2971; RV64ZVE32F-NEXT:    add a2, a0, a2
2972; RV64ZVE32F-NEXT:    lw a2, 0(a2)
2973; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2974; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
2975; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
2976; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
2977; RV64ZVE32F-NEXT:  .LBB38_11: # %else17
2978; RV64ZVE32F-NEXT:    andi a1, a1, -128
2979; RV64ZVE32F-NEXT:    beqz a1, .LBB38_13
2980; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
2981; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
2982; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
2983; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
2984; RV64ZVE32F-NEXT:    slli a1, a1, 2
2985; RV64ZVE32F-NEXT:    add a0, a0, a1
2986; RV64ZVE32F-NEXT:    lw a0, 0(a0)
2987; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
2988; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
2989; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2990; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
2991; RV64ZVE32F-NEXT:  .LBB38_13: # %else20
2992; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2993; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
2994; RV64ZVE32F-NEXT:    ret
2995; RV64ZVE32F-NEXT:  .LBB38_14: # %cond.load4
2996; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
2997; RV64ZVE32F-NEXT:    slli a2, a2, 2
2998; RV64ZVE32F-NEXT:    add a2, a0, a2
2999; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3000; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3001; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3002; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
3003; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
3004; RV64ZVE32F-NEXT:    andi a2, a1, 8
3005; RV64ZVE32F-NEXT:    beqz a2, .LBB38_6
3006; RV64ZVE32F-NEXT:  .LBB38_15: # %cond.load7
3007; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
3008; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
3009; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3010; RV64ZVE32F-NEXT:    slli a2, a2, 2
3011; RV64ZVE32F-NEXT:    add a2, a0, a2
3012; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3013; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3014; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
3015; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
3016; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
3017; RV64ZVE32F-NEXT:    andi a2, a1, 16
3018; RV64ZVE32F-NEXT:    beqz a2, .LBB38_7
3019; RV64ZVE32F-NEXT:  .LBB38_16: # %cond.load10
3020; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
3021; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
3022; RV64ZVE32F-NEXT:    slli a2, a2, 2
3023; RV64ZVE32F-NEXT:    add a2, a0, a2
3024; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3025; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3026; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3027; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
3028; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
3029; RV64ZVE32F-NEXT:    andi a2, a1, 32
3030; RV64ZVE32F-NEXT:    bnez a2, .LBB38_8
3031; RV64ZVE32F-NEXT:    j .LBB38_9
3032  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
3033  %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3034  ret <8 x i32> %v
3035}
3036
3037define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
3038; RV32-LABEL: mgather_baseidx_sext_v8i16_v8i32:
3039; RV32:       # %bb.0:
3040; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
3041; RV32-NEXT:    vsext.vf2 v12, v8
3042; RV32-NEXT:    vsll.vi v8, v12, 2
3043; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
3044; RV32-NEXT:    vmv.v.v v8, v10
3045; RV32-NEXT:    ret
3046;
3047; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i32:
3048; RV64V:       # %bb.0:
3049; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
3050; RV64V-NEXT:    vsext.vf4 v12, v8
3051; RV64V-NEXT:    vsll.vi v12, v12, 2
3052; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
3053; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
3054; RV64V-NEXT:    vmv.v.v v8, v10
3055; RV64V-NEXT:    ret
3056;
3057; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i32:
3058; RV64ZVE32F:       # %bb.0:
3059; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3060; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
3061; RV64ZVE32F-NEXT:    andi a2, a1, 1
3062; RV64ZVE32F-NEXT:    beqz a2, .LBB39_2
3063; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
3064; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
3065; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3066; RV64ZVE32F-NEXT:    slli a2, a2, 2
3067; RV64ZVE32F-NEXT:    add a2, a0, a2
3068; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3069; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
3070; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
3071; RV64ZVE32F-NEXT:  .LBB39_2: # %else
3072; RV64ZVE32F-NEXT:    andi a2, a1, 2
3073; RV64ZVE32F-NEXT:    beqz a2, .LBB39_4
3074; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
3075; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
3076; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
3077; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
3078; RV64ZVE32F-NEXT:    slli a2, a2, 2
3079; RV64ZVE32F-NEXT:    add a2, a0, a2
3080; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3081; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3082; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
3083; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
3084; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
3085; RV64ZVE32F-NEXT:  .LBB39_4: # %else2
3086; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
3087; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
3088; RV64ZVE32F-NEXT:    andi a2, a1, 4
3089; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
3090; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
3091; RV64ZVE32F-NEXT:    bnez a2, .LBB39_14
3092; RV64ZVE32F-NEXT:  # %bb.5: # %else5
3093; RV64ZVE32F-NEXT:    andi a2, a1, 8
3094; RV64ZVE32F-NEXT:    bnez a2, .LBB39_15
3095; RV64ZVE32F-NEXT:  .LBB39_6: # %else8
3096; RV64ZVE32F-NEXT:    andi a2, a1, 16
3097; RV64ZVE32F-NEXT:    bnez a2, .LBB39_16
3098; RV64ZVE32F-NEXT:  .LBB39_7: # %else11
3099; RV64ZVE32F-NEXT:    andi a2, a1, 32
3100; RV64ZVE32F-NEXT:    beqz a2, .LBB39_9
3101; RV64ZVE32F-NEXT:  .LBB39_8: # %cond.load13
3102; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
3103; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
3104; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3105; RV64ZVE32F-NEXT:    slli a2, a2, 2
3106; RV64ZVE32F-NEXT:    add a2, a0, a2
3107; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3108; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3109; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3110; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
3111; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
3112; RV64ZVE32F-NEXT:  .LBB39_9: # %else14
3113; RV64ZVE32F-NEXT:    andi a2, a1, 64
3114; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
3115; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
3116; RV64ZVE32F-NEXT:    beqz a2, .LBB39_11
3117; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
3118; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3119; RV64ZVE32F-NEXT:    slli a2, a2, 2
3120; RV64ZVE32F-NEXT:    add a2, a0, a2
3121; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3122; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3123; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3124; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
3125; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
3126; RV64ZVE32F-NEXT:  .LBB39_11: # %else17
3127; RV64ZVE32F-NEXT:    andi a1, a1, -128
3128; RV64ZVE32F-NEXT:    beqz a1, .LBB39_13
3129; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
3130; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
3131; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
3132; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
3133; RV64ZVE32F-NEXT:    slli a1, a1, 2
3134; RV64ZVE32F-NEXT:    add a0, a0, a1
3135; RV64ZVE32F-NEXT:    lw a0, 0(a0)
3136; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3137; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
3138; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3139; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
3140; RV64ZVE32F-NEXT:  .LBB39_13: # %else20
3141; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3142; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
3143; RV64ZVE32F-NEXT:    ret
3144; RV64ZVE32F-NEXT:  .LBB39_14: # %cond.load4
3145; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3146; RV64ZVE32F-NEXT:    slli a2, a2, 2
3147; RV64ZVE32F-NEXT:    add a2, a0, a2
3148; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3149; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3150; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3151; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
3152; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
3153; RV64ZVE32F-NEXT:    andi a2, a1, 8
3154; RV64ZVE32F-NEXT:    beqz a2, .LBB39_6
3155; RV64ZVE32F-NEXT:  .LBB39_15: # %cond.load7
3156; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
3157; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
3158; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3159; RV64ZVE32F-NEXT:    slli a2, a2, 2
3160; RV64ZVE32F-NEXT:    add a2, a0, a2
3161; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3162; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3163; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
3164; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
3165; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
3166; RV64ZVE32F-NEXT:    andi a2, a1, 16
3167; RV64ZVE32F-NEXT:    beqz a2, .LBB39_7
3168; RV64ZVE32F-NEXT:  .LBB39_16: # %cond.load10
3169; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
3170; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
3171; RV64ZVE32F-NEXT:    slli a2, a2, 2
3172; RV64ZVE32F-NEXT:    add a2, a0, a2
3173; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3174; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3175; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3176; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
3177; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
3178; RV64ZVE32F-NEXT:    andi a2, a1, 32
3179; RV64ZVE32F-NEXT:    bnez a2, .LBB39_8
3180; RV64ZVE32F-NEXT:    j .LBB39_9
3181  %eidxs = sext <8 x i16> %idxs to <8 x i32>
3182  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
3183  %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3184  ret <8 x i32> %v
3185}
3186
3187define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
3188; RV32-LABEL: mgather_baseidx_zext_v8i16_v8i32:
3189; RV32:       # %bb.0:
3190; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
3191; RV32-NEXT:    vzext.vf2 v12, v8
3192; RV32-NEXT:    vsll.vi v8, v12, 2
3193; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
3194; RV32-NEXT:    vmv.v.v v8, v10
3195; RV32-NEXT:    ret
3196;
3197; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i32:
3198; RV64V:       # %bb.0:
3199; RV64V-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
3200; RV64V-NEXT:    vzext.vf2 v12, v8
3201; RV64V-NEXT:    vsll.vi v8, v12, 2
3202; RV64V-NEXT:    vluxei32.v v10, (a0), v8, v0.t
3203; RV64V-NEXT:    vmv.v.v v8, v10
3204; RV64V-NEXT:    ret
3205;
3206; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i32:
3207; RV64ZVE32F:       # %bb.0:
3208; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3209; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
3210; RV64ZVE32F-NEXT:    andi a2, a1, 1
3211; RV64ZVE32F-NEXT:    beqz a2, .LBB40_2
3212; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
3213; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
3214; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3215; RV64ZVE32F-NEXT:    slli a2, a2, 48
3216; RV64ZVE32F-NEXT:    srli a2, a2, 46
3217; RV64ZVE32F-NEXT:    add a2, a0, a2
3218; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3219; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
3220; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
3221; RV64ZVE32F-NEXT:  .LBB40_2: # %else
3222; RV64ZVE32F-NEXT:    andi a2, a1, 2
3223; RV64ZVE32F-NEXT:    beqz a2, .LBB40_4
3224; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
3225; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
3226; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
3227; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
3228; RV64ZVE32F-NEXT:    slli a2, a2, 48
3229; RV64ZVE32F-NEXT:    srli a2, a2, 46
3230; RV64ZVE32F-NEXT:    add a2, a0, a2
3231; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3232; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3233; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
3234; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
3235; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
3236; RV64ZVE32F-NEXT:  .LBB40_4: # %else2
3237; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
3238; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
3239; RV64ZVE32F-NEXT:    andi a2, a1, 4
3240; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
3241; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
3242; RV64ZVE32F-NEXT:    bnez a2, .LBB40_14
3243; RV64ZVE32F-NEXT:  # %bb.5: # %else5
3244; RV64ZVE32F-NEXT:    andi a2, a1, 8
3245; RV64ZVE32F-NEXT:    bnez a2, .LBB40_15
3246; RV64ZVE32F-NEXT:  .LBB40_6: # %else8
3247; RV64ZVE32F-NEXT:    andi a2, a1, 16
3248; RV64ZVE32F-NEXT:    bnez a2, .LBB40_16
3249; RV64ZVE32F-NEXT:  .LBB40_7: # %else11
3250; RV64ZVE32F-NEXT:    andi a2, a1, 32
3251; RV64ZVE32F-NEXT:    beqz a2, .LBB40_9
3252; RV64ZVE32F-NEXT:  .LBB40_8: # %cond.load13
3253; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
3254; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
3255; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3256; RV64ZVE32F-NEXT:    slli a2, a2, 48
3257; RV64ZVE32F-NEXT:    srli a2, a2, 46
3258; RV64ZVE32F-NEXT:    add a2, a0, a2
3259; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3260; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3261; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3262; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
3263; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
3264; RV64ZVE32F-NEXT:  .LBB40_9: # %else14
3265; RV64ZVE32F-NEXT:    andi a2, a1, 64
3266; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
3267; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
3268; RV64ZVE32F-NEXT:    beqz a2, .LBB40_11
3269; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
3270; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3271; RV64ZVE32F-NEXT:    slli a2, a2, 48
3272; RV64ZVE32F-NEXT:    srli a2, a2, 46
3273; RV64ZVE32F-NEXT:    add a2, a0, a2
3274; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3275; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3276; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3277; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
3278; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
3279; RV64ZVE32F-NEXT:  .LBB40_11: # %else17
3280; RV64ZVE32F-NEXT:    andi a1, a1, -128
3281; RV64ZVE32F-NEXT:    beqz a1, .LBB40_13
3282; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
3283; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
3284; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
3285; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
3286; RV64ZVE32F-NEXT:    slli a1, a1, 48
3287; RV64ZVE32F-NEXT:    srli a1, a1, 46
3288; RV64ZVE32F-NEXT:    add a0, a0, a1
3289; RV64ZVE32F-NEXT:    lw a0, 0(a0)
3290; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3291; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
3292; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3293; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
3294; RV64ZVE32F-NEXT:  .LBB40_13: # %else20
3295; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3296; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
3297; RV64ZVE32F-NEXT:    ret
3298; RV64ZVE32F-NEXT:  .LBB40_14: # %cond.load4
3299; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3300; RV64ZVE32F-NEXT:    slli a2, a2, 48
3301; RV64ZVE32F-NEXT:    srli a2, a2, 46
3302; RV64ZVE32F-NEXT:    add a2, a0, a2
3303; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3304; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3305; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3306; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
3307; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
3308; RV64ZVE32F-NEXT:    andi a2, a1, 8
3309; RV64ZVE32F-NEXT:    beqz a2, .LBB40_6
3310; RV64ZVE32F-NEXT:  .LBB40_15: # %cond.load7
3311; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
3312; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
3313; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3314; RV64ZVE32F-NEXT:    slli a2, a2, 48
3315; RV64ZVE32F-NEXT:    srli a2, a2, 46
3316; RV64ZVE32F-NEXT:    add a2, a0, a2
3317; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3318; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3319; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
3320; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
3321; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
3322; RV64ZVE32F-NEXT:    andi a2, a1, 16
3323; RV64ZVE32F-NEXT:    beqz a2, .LBB40_7
3324; RV64ZVE32F-NEXT:  .LBB40_16: # %cond.load10
3325; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
3326; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
3327; RV64ZVE32F-NEXT:    slli a2, a2, 48
3328; RV64ZVE32F-NEXT:    srli a2, a2, 46
3329; RV64ZVE32F-NEXT:    add a2, a0, a2
3330; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3331; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
3332; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3333; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
3334; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
3335; RV64ZVE32F-NEXT:    andi a2, a1, 32
3336; RV64ZVE32F-NEXT:    bnez a2, .LBB40_8
3337; RV64ZVE32F-NEXT:    j .LBB40_9
3338  %eidxs = zext <8 x i16> %idxs to <8 x i32>
3339  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
3340  %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3341  ret <8 x i32> %v
3342}
3343
3344define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
3345; RV32-LABEL: mgather_baseidx_v8i32:
3346; RV32:       # %bb.0:
3347; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
3348; RV32-NEXT:    vsll.vi v8, v8, 2
3349; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
3350; RV32-NEXT:    vmv.v.v v8, v10
3351; RV32-NEXT:    ret
3352;
3353; RV64V-LABEL: mgather_baseidx_v8i32:
3354; RV64V:       # %bb.0:
3355; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
3356; RV64V-NEXT:    vsext.vf2 v12, v8
3357; RV64V-NEXT:    vsll.vi v12, v12, 2
3358; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
3359; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
3360; RV64V-NEXT:    vmv.v.v v8, v10
3361; RV64V-NEXT:    ret
3362;
3363; RV64ZVE32F-LABEL: mgather_baseidx_v8i32:
3364; RV64ZVE32F:       # %bb.0:
3365; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3366; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
3367; RV64ZVE32F-NEXT:    andi a2, a1, 1
3368; RV64ZVE32F-NEXT:    beqz a2, .LBB41_2
3369; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
3370; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
3371; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3372; RV64ZVE32F-NEXT:    slli a2, a2, 2
3373; RV64ZVE32F-NEXT:    add a2, a0, a2
3374; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3375; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
3376; RV64ZVE32F-NEXT:  .LBB41_2: # %else
3377; RV64ZVE32F-NEXT:    andi a2, a1, 2
3378; RV64ZVE32F-NEXT:    beqz a2, .LBB41_4
3379; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
3380; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
3381; RV64ZVE32F-NEXT:    vslidedown.vi v12, v8, 1
3382; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
3383; RV64ZVE32F-NEXT:    slli a2, a2, 2
3384; RV64ZVE32F-NEXT:    add a2, a0, a2
3385; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3386; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3387; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 1
3388; RV64ZVE32F-NEXT:  .LBB41_4: # %else2
3389; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
3390; RV64ZVE32F-NEXT:    vslidedown.vi v12, v8, 4
3391; RV64ZVE32F-NEXT:    andi a2, a1, 4
3392; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
3393; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
3394; RV64ZVE32F-NEXT:    bnez a2, .LBB41_14
3395; RV64ZVE32F-NEXT:  # %bb.5: # %else5
3396; RV64ZVE32F-NEXT:    andi a2, a1, 8
3397; RV64ZVE32F-NEXT:    bnez a2, .LBB41_15
3398; RV64ZVE32F-NEXT:  .LBB41_6: # %else8
3399; RV64ZVE32F-NEXT:    andi a2, a1, 16
3400; RV64ZVE32F-NEXT:    bnez a2, .LBB41_16
3401; RV64ZVE32F-NEXT:  .LBB41_7: # %else11
3402; RV64ZVE32F-NEXT:    andi a2, a1, 32
3403; RV64ZVE32F-NEXT:    beqz a2, .LBB41_9
3404; RV64ZVE32F-NEXT:  .LBB41_8: # %cond.load13
3405; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
3406; RV64ZVE32F-NEXT:    vslidedown.vi v8, v12, 1
3407; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3408; RV64ZVE32F-NEXT:    slli a2, a2, 2
3409; RV64ZVE32F-NEXT:    add a2, a0, a2
3410; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3411; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
3412; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
3413; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
3414; RV64ZVE32F-NEXT:  .LBB41_9: # %else14
3415; RV64ZVE32F-NEXT:    andi a2, a1, 64
3416; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
3417; RV64ZVE32F-NEXT:    vslidedown.vi v8, v12, 2
3418; RV64ZVE32F-NEXT:    beqz a2, .LBB41_11
3419; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
3420; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3421; RV64ZVE32F-NEXT:    slli a2, a2, 2
3422; RV64ZVE32F-NEXT:    add a2, a0, a2
3423; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3424; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
3425; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
3426; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
3427; RV64ZVE32F-NEXT:  .LBB41_11: # %else17
3428; RV64ZVE32F-NEXT:    andi a1, a1, -128
3429; RV64ZVE32F-NEXT:    beqz a1, .LBB41_13
3430; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
3431; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
3432; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
3433; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
3434; RV64ZVE32F-NEXT:    slli a1, a1, 2
3435; RV64ZVE32F-NEXT:    add a0, a0, a1
3436; RV64ZVE32F-NEXT:    lw a0, 0(a0)
3437; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
3438; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
3439; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
3440; RV64ZVE32F-NEXT:  .LBB41_13: # %else20
3441; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3442; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
3443; RV64ZVE32F-NEXT:    ret
3444; RV64ZVE32F-NEXT:  .LBB41_14: # %cond.load4
3445; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3446; RV64ZVE32F-NEXT:    slli a2, a2, 2
3447; RV64ZVE32F-NEXT:    add a2, a0, a2
3448; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3449; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
3450; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
3451; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 2
3452; RV64ZVE32F-NEXT:    andi a2, a1, 8
3453; RV64ZVE32F-NEXT:    beqz a2, .LBB41_6
3454; RV64ZVE32F-NEXT:  .LBB41_15: # %cond.load7
3455; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
3456; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
3457; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
3458; RV64ZVE32F-NEXT:    slli a2, a2, 2
3459; RV64ZVE32F-NEXT:    add a2, a0, a2
3460; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3461; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
3462; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
3463; RV64ZVE32F-NEXT:    andi a2, a1, 16
3464; RV64ZVE32F-NEXT:    beqz a2, .LBB41_7
3465; RV64ZVE32F-NEXT:  .LBB41_16: # %cond.load10
3466; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
3467; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
3468; RV64ZVE32F-NEXT:    slli a2, a2, 2
3469; RV64ZVE32F-NEXT:    add a2, a0, a2
3470; RV64ZVE32F-NEXT:    lw a2, 0(a2)
3471; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
3472; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
3473; RV64ZVE32F-NEXT:    andi a2, a1, 32
3474; RV64ZVE32F-NEXT:    bnez a2, .LBB41_8
3475; RV64ZVE32F-NEXT:    j .LBB41_9
3476  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
3477  %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3478  ret <8 x i32> %v
3479}
3480
3481declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>)
3482
3483define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthru) {
3484; RV32V-LABEL: mgather_v1i64:
3485; RV32V:       # %bb.0:
3486; RV32V-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
3487; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
3488; RV32V-NEXT:    vmv.v.v v8, v9
3489; RV32V-NEXT:    ret
3490;
3491; RV64V-LABEL: mgather_v1i64:
3492; RV64V:       # %bb.0:
3493; RV64V-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
3494; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
3495; RV64V-NEXT:    vmv.v.v v8, v9
3496; RV64V-NEXT:    ret
3497;
3498; RV32ZVE32F-LABEL: mgather_v1i64:
3499; RV32ZVE32F:       # %bb.0:
3500; RV32ZVE32F-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
3501; RV32ZVE32F-NEXT:    vfirst.m a2, v0
3502; RV32ZVE32F-NEXT:    bnez a2, .LBB42_2
3503; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
3504; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
3505; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
3506; RV32ZVE32F-NEXT:    lw a0, 0(a1)
3507; RV32ZVE32F-NEXT:    lw a1, 4(a1)
3508; RV32ZVE32F-NEXT:  .LBB42_2: # %else
3509; RV32ZVE32F-NEXT:    ret
3510;
3511; RV64ZVE32F-LABEL: mgather_v1i64:
3512; RV64ZVE32F:       # %bb.0:
3513; RV64ZVE32F-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
3514; RV64ZVE32F-NEXT:    vfirst.m a2, v0
3515; RV64ZVE32F-NEXT:    bnez a2, .LBB42_2
3516; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
3517; RV64ZVE32F-NEXT:    ld a1, 0(a0)
3518; RV64ZVE32F-NEXT:  .LBB42_2: # %else
3519; RV64ZVE32F-NEXT:    mv a0, a1
3520; RV64ZVE32F-NEXT:    ret
3521  %v = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x i64> %passthru)
3522  ret <1 x i64> %v
3523}
3524
3525declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
3526
3527define <2 x i64> @mgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %passthru) {
3528; RV32V-LABEL: mgather_v2i64:
3529; RV32V:       # %bb.0:
3530; RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
3531; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
3532; RV32V-NEXT:    vmv.v.v v8, v9
3533; RV32V-NEXT:    ret
3534;
3535; RV64V-LABEL: mgather_v2i64:
3536; RV64V:       # %bb.0:
3537; RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
3538; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
3539; RV64V-NEXT:    vmv.v.v v8, v9
3540; RV64V-NEXT:    ret
3541;
3542; RV32ZVE32F-LABEL: mgather_v2i64:
3543; RV32ZVE32F:       # %bb.0:
3544; RV32ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3545; RV32ZVE32F-NEXT:    vmv.x.s a4, v0
3546; RV32ZVE32F-NEXT:    andi a2, a4, 1
3547; RV32ZVE32F-NEXT:    beqz a2, .LBB43_3
3548; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
3549; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
3550; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
3551; RV32ZVE32F-NEXT:    lw a2, 0(a3)
3552; RV32ZVE32F-NEXT:    lw a3, 4(a3)
3553; RV32ZVE32F-NEXT:    andi a4, a4, 2
3554; RV32ZVE32F-NEXT:    bnez a4, .LBB43_4
3555; RV32ZVE32F-NEXT:  .LBB43_2:
3556; RV32ZVE32F-NEXT:    lw a4, 8(a1)
3557; RV32ZVE32F-NEXT:    lw a1, 12(a1)
3558; RV32ZVE32F-NEXT:    j .LBB43_5
3559; RV32ZVE32F-NEXT:  .LBB43_3:
3560; RV32ZVE32F-NEXT:    lw a2, 0(a1)
3561; RV32ZVE32F-NEXT:    lw a3, 4(a1)
3562; RV32ZVE32F-NEXT:    andi a4, a4, 2
3563; RV32ZVE32F-NEXT:    beqz a4, .LBB43_2
3564; RV32ZVE32F-NEXT:  .LBB43_4: # %cond.load1
3565; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
3566; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
3567; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
3568; RV32ZVE32F-NEXT:    lw a4, 0(a1)
3569; RV32ZVE32F-NEXT:    lw a1, 4(a1)
3570; RV32ZVE32F-NEXT:  .LBB43_5: # %else2
3571; RV32ZVE32F-NEXT:    sw a2, 0(a0)
3572; RV32ZVE32F-NEXT:    sw a3, 4(a0)
3573; RV32ZVE32F-NEXT:    sw a4, 8(a0)
3574; RV32ZVE32F-NEXT:    sw a1, 12(a0)
3575; RV32ZVE32F-NEXT:    ret
3576;
3577; RV64ZVE32F-LABEL: mgather_v2i64:
3578; RV64ZVE32F:       # %bb.0:
3579; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3580; RV64ZVE32F-NEXT:    vmv.x.s a4, v0
3581; RV64ZVE32F-NEXT:    andi a5, a4, 1
3582; RV64ZVE32F-NEXT:    beqz a5, .LBB43_2
3583; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
3584; RV64ZVE32F-NEXT:    ld a2, 0(a0)
3585; RV64ZVE32F-NEXT:  .LBB43_2: # %else
3586; RV64ZVE32F-NEXT:    andi a4, a4, 2
3587; RV64ZVE32F-NEXT:    beqz a4, .LBB43_4
3588; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
3589; RV64ZVE32F-NEXT:    ld a3, 0(a1)
3590; RV64ZVE32F-NEXT:  .LBB43_4: # %else2
3591; RV64ZVE32F-NEXT:    mv a0, a2
3592; RV64ZVE32F-NEXT:    mv a1, a3
3593; RV64ZVE32F-NEXT:    ret
3594  %v = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x i64> %passthru)
3595  ret <2 x i64> %v
3596}
3597
3598declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
3599
3600define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthru) {
3601; RV32V-LABEL: mgather_v4i64:
3602; RV32V:       # %bb.0:
3603; RV32V-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
3604; RV32V-NEXT:    vluxei32.v v10, (zero), v8, v0.t
3605; RV32V-NEXT:    vmv.v.v v8, v10
3606; RV32V-NEXT:    ret
3607;
3608; RV64V-LABEL: mgather_v4i64:
3609; RV64V:       # %bb.0:
3610; RV64V-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
3611; RV64V-NEXT:    vluxei64.v v10, (zero), v8, v0.t
3612; RV64V-NEXT:    vmv.v.v v8, v10
3613; RV64V-NEXT:    ret
3614;
3615; RV32ZVE32F-LABEL: mgather_v4i64:
3616; RV32ZVE32F:       # %bb.0:
3617; RV32ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3618; RV32ZVE32F-NEXT:    vmv.x.s a6, v0
3619; RV32ZVE32F-NEXT:    andi a2, a6, 1
3620; RV32ZVE32F-NEXT:    beqz a2, .LBB44_5
3621; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
3622; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
3623; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
3624; RV32ZVE32F-NEXT:    lw a2, 0(a3)
3625; RV32ZVE32F-NEXT:    lw a3, 4(a3)
3626; RV32ZVE32F-NEXT:    andi a4, a6, 2
3627; RV32ZVE32F-NEXT:    bnez a4, .LBB44_6
3628; RV32ZVE32F-NEXT:  .LBB44_2:
3629; RV32ZVE32F-NEXT:    lw a4, 8(a1)
3630; RV32ZVE32F-NEXT:    lw a5, 12(a1)
3631; RV32ZVE32F-NEXT:    andi a7, a6, 4
3632; RV32ZVE32F-NEXT:    bnez a7, .LBB44_7
3633; RV32ZVE32F-NEXT:  .LBB44_3:
3634; RV32ZVE32F-NEXT:    lw a7, 16(a1)
3635; RV32ZVE32F-NEXT:    lw t0, 20(a1)
3636; RV32ZVE32F-NEXT:    andi a6, a6, 8
3637; RV32ZVE32F-NEXT:    bnez a6, .LBB44_8
3638; RV32ZVE32F-NEXT:  .LBB44_4:
3639; RV32ZVE32F-NEXT:    lw a6, 24(a1)
3640; RV32ZVE32F-NEXT:    lw a1, 28(a1)
3641; RV32ZVE32F-NEXT:    j .LBB44_9
3642; RV32ZVE32F-NEXT:  .LBB44_5:
3643; RV32ZVE32F-NEXT:    lw a2, 0(a1)
3644; RV32ZVE32F-NEXT:    lw a3, 4(a1)
3645; RV32ZVE32F-NEXT:    andi a4, a6, 2
3646; RV32ZVE32F-NEXT:    beqz a4, .LBB44_2
3647; RV32ZVE32F-NEXT:  .LBB44_6: # %cond.load1
3648; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
3649; RV32ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
3650; RV32ZVE32F-NEXT:    vmv.x.s a5, v9
3651; RV32ZVE32F-NEXT:    lw a4, 0(a5)
3652; RV32ZVE32F-NEXT:    lw a5, 4(a5)
3653; RV32ZVE32F-NEXT:    andi a7, a6, 4
3654; RV32ZVE32F-NEXT:    beqz a7, .LBB44_3
3655; RV32ZVE32F-NEXT:  .LBB44_7: # %cond.load4
3656; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
3657; RV32ZVE32F-NEXT:    vslidedown.vi v9, v8, 2
3658; RV32ZVE32F-NEXT:    vmv.x.s t0, v9
3659; RV32ZVE32F-NEXT:    lw a7, 0(t0)
3660; RV32ZVE32F-NEXT:    lw t0, 4(t0)
3661; RV32ZVE32F-NEXT:    andi a6, a6, 8
3662; RV32ZVE32F-NEXT:    beqz a6, .LBB44_4
3663; RV32ZVE32F-NEXT:  .LBB44_8: # %cond.load7
3664; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
3665; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 3
3666; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
3667; RV32ZVE32F-NEXT:    lw a6, 0(a1)
3668; RV32ZVE32F-NEXT:    lw a1, 4(a1)
3669; RV32ZVE32F-NEXT:  .LBB44_9: # %else8
3670; RV32ZVE32F-NEXT:    sw a2, 0(a0)
3671; RV32ZVE32F-NEXT:    sw a3, 4(a0)
3672; RV32ZVE32F-NEXT:    sw a4, 8(a0)
3673; RV32ZVE32F-NEXT:    sw a5, 12(a0)
3674; RV32ZVE32F-NEXT:    sw a7, 16(a0)
3675; RV32ZVE32F-NEXT:    sw t0, 20(a0)
3676; RV32ZVE32F-NEXT:    sw a6, 24(a0)
3677; RV32ZVE32F-NEXT:    sw a1, 28(a0)
3678; RV32ZVE32F-NEXT:    ret
3679;
3680; RV64ZVE32F-LABEL: mgather_v4i64:
3681; RV64ZVE32F:       # %bb.0:
3682; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3683; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
3684; RV64ZVE32F-NEXT:    andi a3, a5, 1
3685; RV64ZVE32F-NEXT:    beqz a3, .LBB44_5
3686; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
3687; RV64ZVE32F-NEXT:    ld a3, 0(a1)
3688; RV64ZVE32F-NEXT:    ld a3, 0(a3)
3689; RV64ZVE32F-NEXT:    andi a4, a5, 2
3690; RV64ZVE32F-NEXT:    bnez a4, .LBB44_6
3691; RV64ZVE32F-NEXT:  .LBB44_2:
3692; RV64ZVE32F-NEXT:    ld a4, 8(a2)
3693; RV64ZVE32F-NEXT:    andi a6, a5, 4
3694; RV64ZVE32F-NEXT:    bnez a6, .LBB44_7
3695; RV64ZVE32F-NEXT:  .LBB44_3:
3696; RV64ZVE32F-NEXT:    ld a6, 16(a2)
3697; RV64ZVE32F-NEXT:    andi a5, a5, 8
3698; RV64ZVE32F-NEXT:    bnez a5, .LBB44_8
3699; RV64ZVE32F-NEXT:  .LBB44_4:
3700; RV64ZVE32F-NEXT:    ld a1, 24(a2)
3701; RV64ZVE32F-NEXT:    j .LBB44_9
3702; RV64ZVE32F-NEXT:  .LBB44_5:
3703; RV64ZVE32F-NEXT:    ld a3, 0(a2)
3704; RV64ZVE32F-NEXT:    andi a4, a5, 2
3705; RV64ZVE32F-NEXT:    beqz a4, .LBB44_2
3706; RV64ZVE32F-NEXT:  .LBB44_6: # %cond.load1
3707; RV64ZVE32F-NEXT:    ld a4, 8(a1)
3708; RV64ZVE32F-NEXT:    ld a4, 0(a4)
3709; RV64ZVE32F-NEXT:    andi a6, a5, 4
3710; RV64ZVE32F-NEXT:    beqz a6, .LBB44_3
3711; RV64ZVE32F-NEXT:  .LBB44_7: # %cond.load4
3712; RV64ZVE32F-NEXT:    ld a6, 16(a1)
3713; RV64ZVE32F-NEXT:    ld a6, 0(a6)
3714; RV64ZVE32F-NEXT:    andi a5, a5, 8
3715; RV64ZVE32F-NEXT:    beqz a5, .LBB44_4
3716; RV64ZVE32F-NEXT:  .LBB44_8: # %cond.load7
3717; RV64ZVE32F-NEXT:    ld a1, 24(a1)
3718; RV64ZVE32F-NEXT:    ld a1, 0(a1)
3719; RV64ZVE32F-NEXT:  .LBB44_9: # %else8
3720; RV64ZVE32F-NEXT:    sd a3, 0(a0)
3721; RV64ZVE32F-NEXT:    sd a4, 8(a0)
3722; RV64ZVE32F-NEXT:    sd a6, 16(a0)
3723; RV64ZVE32F-NEXT:    sd a1, 24(a0)
3724; RV64ZVE32F-NEXT:    ret
3725  %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x i64> %passthru)
3726  ret <4 x i64> %v
3727}
3728
3729define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
3730; RV32V-LABEL: mgather_truemask_v4i64:
3731; RV32V:       # %bb.0:
3732; RV32V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
3733; RV32V-NEXT:    vluxei32.v v10, (zero), v8
3734; RV32V-NEXT:    vmv.v.v v8, v10
3735; RV32V-NEXT:    ret
3736;
3737; RV64V-LABEL: mgather_truemask_v4i64:
3738; RV64V:       # %bb.0:
3739; RV64V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
3740; RV64V-NEXT:    vluxei64.v v8, (zero), v8
3741; RV64V-NEXT:    ret
3742;
3743; RV32ZVE32F-LABEL: mgather_truemask_v4i64:
3744; RV32ZVE32F:       # %bb.0:
3745; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
3746; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
3747; RV32ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
3748; RV32ZVE32F-NEXT:    vmv.x.s a2, v9
3749; RV32ZVE32F-NEXT:    vslidedown.vi v9, v8, 2
3750; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 3
3751; RV32ZVE32F-NEXT:    lw a3, 0(a1)
3752; RV32ZVE32F-NEXT:    lw a1, 4(a1)
3753; RV32ZVE32F-NEXT:    vmv.x.s a4, v9
3754; RV32ZVE32F-NEXT:    vmv.x.s a5, v8
3755; RV32ZVE32F-NEXT:    lw a6, 0(a2)
3756; RV32ZVE32F-NEXT:    lw a2, 4(a2)
3757; RV32ZVE32F-NEXT:    lw a7, 0(a4)
3758; RV32ZVE32F-NEXT:    lw a4, 4(a4)
3759; RV32ZVE32F-NEXT:    lw t0, 0(a5)
3760; RV32ZVE32F-NEXT:    lw a5, 4(a5)
3761; RV32ZVE32F-NEXT:    sw a7, 16(a0)
3762; RV32ZVE32F-NEXT:    sw a4, 20(a0)
3763; RV32ZVE32F-NEXT:    sw t0, 24(a0)
3764; RV32ZVE32F-NEXT:    sw a5, 28(a0)
3765; RV32ZVE32F-NEXT:    sw a3, 0(a0)
3766; RV32ZVE32F-NEXT:    sw a1, 4(a0)
3767; RV32ZVE32F-NEXT:    sw a6, 8(a0)
3768; RV32ZVE32F-NEXT:    sw a2, 12(a0)
3769; RV32ZVE32F-NEXT:    ret
3770;
3771; RV64ZVE32F-LABEL: mgather_truemask_v4i64:
3772; RV64ZVE32F:       # %bb.0:
3773; RV64ZVE32F-NEXT:    ld a2, 0(a1)
3774; RV64ZVE32F-NEXT:    ld a3, 8(a1)
3775; RV64ZVE32F-NEXT:    ld a4, 16(a1)
3776; RV64ZVE32F-NEXT:    ld a1, 24(a1)
3777; RV64ZVE32F-NEXT:    ld a2, 0(a2)
3778; RV64ZVE32F-NEXT:    ld a3, 0(a3)
3779; RV64ZVE32F-NEXT:    ld a4, 0(a4)
3780; RV64ZVE32F-NEXT:    ld a1, 0(a1)
3781; RV64ZVE32F-NEXT:    sd a2, 0(a0)
3782; RV64ZVE32F-NEXT:    sd a3, 8(a0)
3783; RV64ZVE32F-NEXT:    sd a4, 16(a0)
3784; RV64ZVE32F-NEXT:    sd a1, 24(a0)
3785; RV64ZVE32F-NEXT:    ret
3786  %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x i64> %passthru)
3787  ret <4 x i64> %v
3788}
3789
3790define <4 x i64> @mgather_falsemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
3791; RV32V-LABEL: mgather_falsemask_v4i64:
3792; RV32V:       # %bb.0:
3793; RV32V-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3794; RV32V-NEXT:    vmv2r.v v8, v10
3795; RV32V-NEXT:    ret
3796;
3797; RV64V-LABEL: mgather_falsemask_v4i64:
3798; RV64V:       # %bb.0:
3799; RV64V-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3800; RV64V-NEXT:    vmv2r.v v8, v10
3801; RV64V-NEXT:    ret
3802;
3803; RV32ZVE32F-LABEL: mgather_falsemask_v4i64:
3804; RV32ZVE32F:       # %bb.0:
3805; RV32ZVE32F-NEXT:    lw a2, 0(a1)
3806; RV32ZVE32F-NEXT:    lw a3, 4(a1)
3807; RV32ZVE32F-NEXT:    lw a4, 8(a1)
3808; RV32ZVE32F-NEXT:    lw a5, 12(a1)
3809; RV32ZVE32F-NEXT:    lw a6, 16(a1)
3810; RV32ZVE32F-NEXT:    lw a7, 20(a1)
3811; RV32ZVE32F-NEXT:    lw t0, 24(a1)
3812; RV32ZVE32F-NEXT:    lw a1, 28(a1)
3813; RV32ZVE32F-NEXT:    sw a6, 16(a0)
3814; RV32ZVE32F-NEXT:    sw a7, 20(a0)
3815; RV32ZVE32F-NEXT:    sw t0, 24(a0)
3816; RV32ZVE32F-NEXT:    sw a1, 28(a0)
3817; RV32ZVE32F-NEXT:    sw a2, 0(a0)
3818; RV32ZVE32F-NEXT:    sw a3, 4(a0)
3819; RV32ZVE32F-NEXT:    sw a4, 8(a0)
3820; RV32ZVE32F-NEXT:    sw a5, 12(a0)
3821; RV32ZVE32F-NEXT:    ret
3822;
3823; RV64ZVE32F-LABEL: mgather_falsemask_v4i64:
3824; RV64ZVE32F:       # %bb.0:
3825; RV64ZVE32F-NEXT:    ld a1, 0(a2)
3826; RV64ZVE32F-NEXT:    ld a3, 8(a2)
3827; RV64ZVE32F-NEXT:    ld a4, 16(a2)
3828; RV64ZVE32F-NEXT:    ld a2, 24(a2)
3829; RV64ZVE32F-NEXT:    sd a1, 0(a0)
3830; RV64ZVE32F-NEXT:    sd a3, 8(a0)
3831; RV64ZVE32F-NEXT:    sd a4, 16(a0)
3832; RV64ZVE32F-NEXT:    sd a2, 24(a0)
3833; RV64ZVE32F-NEXT:    ret
3834  %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x i64> %passthru)
3835  ret <4 x i64> %v
3836}
3837
3838declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
3839
3840define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthru) {
3841; RV32V-LABEL: mgather_v8i64:
3842; RV32V:       # %bb.0:
3843; RV32V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
3844; RV32V-NEXT:    vluxei32.v v12, (zero), v8, v0.t
3845; RV32V-NEXT:    vmv.v.v v8, v12
3846; RV32V-NEXT:    ret
3847;
3848; RV64V-LABEL: mgather_v8i64:
3849; RV64V:       # %bb.0:
3850; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
3851; RV64V-NEXT:    vluxei64.v v12, (zero), v8, v0.t
3852; RV64V-NEXT:    vmv.v.v v8, v12
3853; RV64V-NEXT:    ret
3854;
3855; RV32ZVE32F-LABEL: mgather_v8i64:
3856; RV32ZVE32F:       # %bb.0:
3857; RV32ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3858; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
3859; RV32ZVE32F-NEXT:    andi a2, t0, 1
3860; RV32ZVE32F-NEXT:    beqz a2, .LBB47_7
3861; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
3862; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
3863; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
3864; RV32ZVE32F-NEXT:    lw a2, 0(a3)
3865; RV32ZVE32F-NEXT:    lw a3, 4(a3)
3866; RV32ZVE32F-NEXT:    andi a4, t0, 2
3867; RV32ZVE32F-NEXT:    bnez a4, .LBB47_8
3868; RV32ZVE32F-NEXT:  .LBB47_2:
3869; RV32ZVE32F-NEXT:    lw a4, 8(a1)
3870; RV32ZVE32F-NEXT:    lw a5, 12(a1)
3871; RV32ZVE32F-NEXT:    andi a6, t0, 4
3872; RV32ZVE32F-NEXT:    bnez a6, .LBB47_9
3873; RV32ZVE32F-NEXT:  .LBB47_3:
3874; RV32ZVE32F-NEXT:    lw a6, 16(a1)
3875; RV32ZVE32F-NEXT:    lw a7, 20(a1)
3876; RV32ZVE32F-NEXT:    andi t1, t0, 8
3877; RV32ZVE32F-NEXT:    bnez t1, .LBB47_10
3878; RV32ZVE32F-NEXT:  .LBB47_4:
3879; RV32ZVE32F-NEXT:    lw t1, 24(a1)
3880; RV32ZVE32F-NEXT:    lw t2, 28(a1)
3881; RV32ZVE32F-NEXT:    andi t3, t0, 16
3882; RV32ZVE32F-NEXT:    bnez t3, .LBB47_11
3883; RV32ZVE32F-NEXT:  .LBB47_5:
3884; RV32ZVE32F-NEXT:    lw t3, 32(a1)
3885; RV32ZVE32F-NEXT:    lw t4, 36(a1)
3886; RV32ZVE32F-NEXT:    andi t5, t0, 32
3887; RV32ZVE32F-NEXT:    bnez t5, .LBB47_12
3888; RV32ZVE32F-NEXT:  .LBB47_6:
3889; RV32ZVE32F-NEXT:    lw t5, 40(a1)
3890; RV32ZVE32F-NEXT:    lw t6, 44(a1)
3891; RV32ZVE32F-NEXT:    j .LBB47_13
3892; RV32ZVE32F-NEXT:  .LBB47_7:
3893; RV32ZVE32F-NEXT:    lw a2, 0(a1)
3894; RV32ZVE32F-NEXT:    lw a3, 4(a1)
3895; RV32ZVE32F-NEXT:    andi a4, t0, 2
3896; RV32ZVE32F-NEXT:    beqz a4, .LBB47_2
3897; RV32ZVE32F-NEXT:  .LBB47_8: # %cond.load1
3898; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
3899; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
3900; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
3901; RV32ZVE32F-NEXT:    lw a4, 0(a5)
3902; RV32ZVE32F-NEXT:    lw a5, 4(a5)
3903; RV32ZVE32F-NEXT:    andi a6, t0, 4
3904; RV32ZVE32F-NEXT:    beqz a6, .LBB47_3
3905; RV32ZVE32F-NEXT:  .LBB47_9: # %cond.load4
3906; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
3907; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
3908; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
3909; RV32ZVE32F-NEXT:    lw a6, 0(a7)
3910; RV32ZVE32F-NEXT:    lw a7, 4(a7)
3911; RV32ZVE32F-NEXT:    andi t1, t0, 8
3912; RV32ZVE32F-NEXT:    beqz t1, .LBB47_4
3913; RV32ZVE32F-NEXT:  .LBB47_10: # %cond.load7
3914; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
3915; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
3916; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
3917; RV32ZVE32F-NEXT:    lw t1, 0(t2)
3918; RV32ZVE32F-NEXT:    lw t2, 4(t2)
3919; RV32ZVE32F-NEXT:    andi t3, t0, 16
3920; RV32ZVE32F-NEXT:    beqz t3, .LBB47_5
3921; RV32ZVE32F-NEXT:  .LBB47_11: # %cond.load10
3922; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
3923; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
3924; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
3925; RV32ZVE32F-NEXT:    lw t3, 0(t4)
3926; RV32ZVE32F-NEXT:    lw t4, 4(t4)
3927; RV32ZVE32F-NEXT:    andi t5, t0, 32
3928; RV32ZVE32F-NEXT:    beqz t5, .LBB47_6
3929; RV32ZVE32F-NEXT:  .LBB47_12: # %cond.load13
3930; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
3931; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
3932; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
3933; RV32ZVE32F-NEXT:    lw t5, 0(t6)
3934; RV32ZVE32F-NEXT:    lw t6, 4(t6)
3935; RV32ZVE32F-NEXT:  .LBB47_13: # %else14
3936; RV32ZVE32F-NEXT:    addi sp, sp, -16
3937; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
3938; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
3939; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
3940; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
3941; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
3942; RV32ZVE32F-NEXT:    andi s0, t0, 64
3943; RV32ZVE32F-NEXT:    beqz s0, .LBB47_16
3944; RV32ZVE32F-NEXT:  # %bb.14: # %cond.load16
3945; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
3946; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
3947; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
3948; RV32ZVE32F-NEXT:    lw s0, 0(s1)
3949; RV32ZVE32F-NEXT:    lw s1, 4(s1)
3950; RV32ZVE32F-NEXT:    andi t0, t0, -128
3951; RV32ZVE32F-NEXT:    bnez t0, .LBB47_17
3952; RV32ZVE32F-NEXT:  .LBB47_15:
3953; RV32ZVE32F-NEXT:    lw t0, 56(a1)
3954; RV32ZVE32F-NEXT:    lw a1, 60(a1)
3955; RV32ZVE32F-NEXT:    j .LBB47_18
3956; RV32ZVE32F-NEXT:  .LBB47_16:
3957; RV32ZVE32F-NEXT:    lw s0, 48(a1)
3958; RV32ZVE32F-NEXT:    lw s1, 52(a1)
3959; RV32ZVE32F-NEXT:    andi t0, t0, -128
3960; RV32ZVE32F-NEXT:    beqz t0, .LBB47_15
3961; RV32ZVE32F-NEXT:  .LBB47_17: # %cond.load19
3962; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
3963; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
3964; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
3965; RV32ZVE32F-NEXT:    lw t0, 0(a1)
3966; RV32ZVE32F-NEXT:    lw a1, 4(a1)
3967; RV32ZVE32F-NEXT:  .LBB47_18: # %else20
3968; RV32ZVE32F-NEXT:    sw a2, 0(a0)
3969; RV32ZVE32F-NEXT:    sw a3, 4(a0)
3970; RV32ZVE32F-NEXT:    sw a4, 8(a0)
3971; RV32ZVE32F-NEXT:    sw a5, 12(a0)
3972; RV32ZVE32F-NEXT:    sw a6, 16(a0)
3973; RV32ZVE32F-NEXT:    sw a7, 20(a0)
3974; RV32ZVE32F-NEXT:    sw t1, 24(a0)
3975; RV32ZVE32F-NEXT:    sw t2, 28(a0)
3976; RV32ZVE32F-NEXT:    sw t3, 32(a0)
3977; RV32ZVE32F-NEXT:    sw t4, 36(a0)
3978; RV32ZVE32F-NEXT:    sw t5, 40(a0)
3979; RV32ZVE32F-NEXT:    sw t6, 44(a0)
3980; RV32ZVE32F-NEXT:    sw s0, 48(a0)
3981; RV32ZVE32F-NEXT:    sw s1, 52(a0)
3982; RV32ZVE32F-NEXT:    sw t0, 56(a0)
3983; RV32ZVE32F-NEXT:    sw a1, 60(a0)
3984; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
3985; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
3986; RV32ZVE32F-NEXT:    .cfi_restore s0
3987; RV32ZVE32F-NEXT:    .cfi_restore s1
3988; RV32ZVE32F-NEXT:    addi sp, sp, 16
3989; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
3990; RV32ZVE32F-NEXT:    ret
3991;
3992; RV64ZVE32F-LABEL: mgather_v8i64:
3993; RV64ZVE32F:       # %bb.0:
3994; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
3995; RV64ZVE32F-NEXT:    vmv.x.s a6, v0
3996; RV64ZVE32F-NEXT:    andi a3, a6, 1
3997; RV64ZVE32F-NEXT:    beqz a3, .LBB47_9
3998; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
3999; RV64ZVE32F-NEXT:    ld a3, 0(a1)
4000; RV64ZVE32F-NEXT:    ld a3, 0(a3)
4001; RV64ZVE32F-NEXT:    andi a4, a6, 2
4002; RV64ZVE32F-NEXT:    bnez a4, .LBB47_10
4003; RV64ZVE32F-NEXT:  .LBB47_2:
4004; RV64ZVE32F-NEXT:    ld a4, 8(a2)
4005; RV64ZVE32F-NEXT:    andi a5, a6, 4
4006; RV64ZVE32F-NEXT:    bnez a5, .LBB47_11
4007; RV64ZVE32F-NEXT:  .LBB47_3:
4008; RV64ZVE32F-NEXT:    ld a5, 16(a2)
4009; RV64ZVE32F-NEXT:    andi a7, a6, 8
4010; RV64ZVE32F-NEXT:    bnez a7, .LBB47_12
4011; RV64ZVE32F-NEXT:  .LBB47_4:
4012; RV64ZVE32F-NEXT:    ld a7, 24(a2)
4013; RV64ZVE32F-NEXT:    andi t0, a6, 16
4014; RV64ZVE32F-NEXT:    bnez t0, .LBB47_13
4015; RV64ZVE32F-NEXT:  .LBB47_5:
4016; RV64ZVE32F-NEXT:    ld t0, 32(a2)
4017; RV64ZVE32F-NEXT:    andi t1, a6, 32
4018; RV64ZVE32F-NEXT:    bnez t1, .LBB47_14
4019; RV64ZVE32F-NEXT:  .LBB47_6:
4020; RV64ZVE32F-NEXT:    ld t1, 40(a2)
4021; RV64ZVE32F-NEXT:    andi t2, a6, 64
4022; RV64ZVE32F-NEXT:    bnez t2, .LBB47_15
4023; RV64ZVE32F-NEXT:  .LBB47_7:
4024; RV64ZVE32F-NEXT:    ld t2, 48(a2)
4025; RV64ZVE32F-NEXT:    andi a6, a6, -128
4026; RV64ZVE32F-NEXT:    bnez a6, .LBB47_16
4027; RV64ZVE32F-NEXT:  .LBB47_8:
4028; RV64ZVE32F-NEXT:    ld a1, 56(a2)
4029; RV64ZVE32F-NEXT:    j .LBB47_17
4030; RV64ZVE32F-NEXT:  .LBB47_9:
4031; RV64ZVE32F-NEXT:    ld a3, 0(a2)
4032; RV64ZVE32F-NEXT:    andi a4, a6, 2
4033; RV64ZVE32F-NEXT:    beqz a4, .LBB47_2
4034; RV64ZVE32F-NEXT:  .LBB47_10: # %cond.load1
4035; RV64ZVE32F-NEXT:    ld a4, 8(a1)
4036; RV64ZVE32F-NEXT:    ld a4, 0(a4)
4037; RV64ZVE32F-NEXT:    andi a5, a6, 4
4038; RV64ZVE32F-NEXT:    beqz a5, .LBB47_3
4039; RV64ZVE32F-NEXT:  .LBB47_11: # %cond.load4
4040; RV64ZVE32F-NEXT:    ld a5, 16(a1)
4041; RV64ZVE32F-NEXT:    ld a5, 0(a5)
4042; RV64ZVE32F-NEXT:    andi a7, a6, 8
4043; RV64ZVE32F-NEXT:    beqz a7, .LBB47_4
4044; RV64ZVE32F-NEXT:  .LBB47_12: # %cond.load7
4045; RV64ZVE32F-NEXT:    ld a7, 24(a1)
4046; RV64ZVE32F-NEXT:    ld a7, 0(a7)
4047; RV64ZVE32F-NEXT:    andi t0, a6, 16
4048; RV64ZVE32F-NEXT:    beqz t0, .LBB47_5
4049; RV64ZVE32F-NEXT:  .LBB47_13: # %cond.load10
4050; RV64ZVE32F-NEXT:    ld t0, 32(a1)
4051; RV64ZVE32F-NEXT:    ld t0, 0(t0)
4052; RV64ZVE32F-NEXT:    andi t1, a6, 32
4053; RV64ZVE32F-NEXT:    beqz t1, .LBB47_6
4054; RV64ZVE32F-NEXT:  .LBB47_14: # %cond.load13
4055; RV64ZVE32F-NEXT:    ld t1, 40(a1)
4056; RV64ZVE32F-NEXT:    ld t1, 0(t1)
4057; RV64ZVE32F-NEXT:    andi t2, a6, 64
4058; RV64ZVE32F-NEXT:    beqz t2, .LBB47_7
4059; RV64ZVE32F-NEXT:  .LBB47_15: # %cond.load16
4060; RV64ZVE32F-NEXT:    ld t2, 48(a1)
4061; RV64ZVE32F-NEXT:    ld t2, 0(t2)
4062; RV64ZVE32F-NEXT:    andi a6, a6, -128
4063; RV64ZVE32F-NEXT:    beqz a6, .LBB47_8
4064; RV64ZVE32F-NEXT:  .LBB47_16: # %cond.load19
4065; RV64ZVE32F-NEXT:    ld a1, 56(a1)
4066; RV64ZVE32F-NEXT:    ld a1, 0(a1)
4067; RV64ZVE32F-NEXT:  .LBB47_17: # %else20
4068; RV64ZVE32F-NEXT:    sd a3, 0(a0)
4069; RV64ZVE32F-NEXT:    sd a4, 8(a0)
4070; RV64ZVE32F-NEXT:    sd a5, 16(a0)
4071; RV64ZVE32F-NEXT:    sd a7, 24(a0)
4072; RV64ZVE32F-NEXT:    sd t0, 32(a0)
4073; RV64ZVE32F-NEXT:    sd t1, 40(a0)
4074; RV64ZVE32F-NEXT:    sd t2, 48(a0)
4075; RV64ZVE32F-NEXT:    sd a1, 56(a0)
4076; RV64ZVE32F-NEXT:    ret
4077  %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
4078  ret <8 x i64> %v
4079}
4080
4081define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
4082; RV32V-LABEL: mgather_baseidx_v8i8_v8i64:
4083; RV32V:       # %bb.0:
4084; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
4085; RV32V-NEXT:    vsext.vf4 v10, v8
4086; RV32V-NEXT:    vsll.vi v8, v10, 3
4087; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
4088; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
4089; RV32V-NEXT:    vmv.v.v v8, v12
4090; RV32V-NEXT:    ret
4091;
4092; RV64V-LABEL: mgather_baseidx_v8i8_v8i64:
4093; RV64V:       # %bb.0:
4094; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
4095; RV64V-NEXT:    vsext.vf8 v16, v8
4096; RV64V-NEXT:    vsll.vi v8, v16, 3
4097; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
4098; RV64V-NEXT:    vmv.v.v v8, v12
4099; RV64V-NEXT:    ret
4100;
4101; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64:
4102; RV32ZVE32F:       # %bb.0:
4103; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
4104; RV32ZVE32F-NEXT:    vsext.vf4 v10, v8
4105; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
4106; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
4107; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4108; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
4109; RV32ZVE32F-NEXT:    andi a3, t0, 1
4110; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
4111; RV32ZVE32F-NEXT:    beqz a3, .LBB48_7
4112; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
4113; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
4114; RV32ZVE32F-NEXT:    lw a1, 0(a3)
4115; RV32ZVE32F-NEXT:    lw a3, 4(a3)
4116; RV32ZVE32F-NEXT:    andi a4, t0, 2
4117; RV32ZVE32F-NEXT:    bnez a4, .LBB48_8
4118; RV32ZVE32F-NEXT:  .LBB48_2:
4119; RV32ZVE32F-NEXT:    lw a4, 8(a2)
4120; RV32ZVE32F-NEXT:    lw a5, 12(a2)
4121; RV32ZVE32F-NEXT:    andi a6, t0, 4
4122; RV32ZVE32F-NEXT:    bnez a6, .LBB48_9
4123; RV32ZVE32F-NEXT:  .LBB48_3:
4124; RV32ZVE32F-NEXT:    lw a6, 16(a2)
4125; RV32ZVE32F-NEXT:    lw a7, 20(a2)
4126; RV32ZVE32F-NEXT:    andi t1, t0, 8
4127; RV32ZVE32F-NEXT:    bnez t1, .LBB48_10
4128; RV32ZVE32F-NEXT:  .LBB48_4:
4129; RV32ZVE32F-NEXT:    lw t1, 24(a2)
4130; RV32ZVE32F-NEXT:    lw t2, 28(a2)
4131; RV32ZVE32F-NEXT:    andi t3, t0, 16
4132; RV32ZVE32F-NEXT:    bnez t3, .LBB48_11
4133; RV32ZVE32F-NEXT:  .LBB48_5:
4134; RV32ZVE32F-NEXT:    lw t3, 32(a2)
4135; RV32ZVE32F-NEXT:    lw t4, 36(a2)
4136; RV32ZVE32F-NEXT:    andi t5, t0, 32
4137; RV32ZVE32F-NEXT:    bnez t5, .LBB48_12
4138; RV32ZVE32F-NEXT:  .LBB48_6:
4139; RV32ZVE32F-NEXT:    lw t5, 40(a2)
4140; RV32ZVE32F-NEXT:    lw t6, 44(a2)
4141; RV32ZVE32F-NEXT:    j .LBB48_13
4142; RV32ZVE32F-NEXT:  .LBB48_7:
4143; RV32ZVE32F-NEXT:    lw a1, 0(a2)
4144; RV32ZVE32F-NEXT:    lw a3, 4(a2)
4145; RV32ZVE32F-NEXT:    andi a4, t0, 2
4146; RV32ZVE32F-NEXT:    beqz a4, .LBB48_2
4147; RV32ZVE32F-NEXT:  .LBB48_8: # %cond.load1
4148; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
4149; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
4150; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
4151; RV32ZVE32F-NEXT:    lw a4, 0(a5)
4152; RV32ZVE32F-NEXT:    lw a5, 4(a5)
4153; RV32ZVE32F-NEXT:    andi a6, t0, 4
4154; RV32ZVE32F-NEXT:    beqz a6, .LBB48_3
4155; RV32ZVE32F-NEXT:  .LBB48_9: # %cond.load4
4156; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
4157; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
4158; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
4159; RV32ZVE32F-NEXT:    lw a6, 0(a7)
4160; RV32ZVE32F-NEXT:    lw a7, 4(a7)
4161; RV32ZVE32F-NEXT:    andi t1, t0, 8
4162; RV32ZVE32F-NEXT:    beqz t1, .LBB48_4
4163; RV32ZVE32F-NEXT:  .LBB48_10: # %cond.load7
4164; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
4165; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
4166; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
4167; RV32ZVE32F-NEXT:    lw t1, 0(t2)
4168; RV32ZVE32F-NEXT:    lw t2, 4(t2)
4169; RV32ZVE32F-NEXT:    andi t3, t0, 16
4170; RV32ZVE32F-NEXT:    beqz t3, .LBB48_5
4171; RV32ZVE32F-NEXT:  .LBB48_11: # %cond.load10
4172; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4173; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
4174; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
4175; RV32ZVE32F-NEXT:    lw t3, 0(t4)
4176; RV32ZVE32F-NEXT:    lw t4, 4(t4)
4177; RV32ZVE32F-NEXT:    andi t5, t0, 32
4178; RV32ZVE32F-NEXT:    beqz t5, .LBB48_6
4179; RV32ZVE32F-NEXT:  .LBB48_12: # %cond.load13
4180; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4181; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
4182; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
4183; RV32ZVE32F-NEXT:    lw t5, 0(t6)
4184; RV32ZVE32F-NEXT:    lw t6, 4(t6)
4185; RV32ZVE32F-NEXT:  .LBB48_13: # %else14
4186; RV32ZVE32F-NEXT:    addi sp, sp, -16
4187; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
4188; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
4189; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
4190; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
4191; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
4192; RV32ZVE32F-NEXT:    andi s0, t0, 64
4193; RV32ZVE32F-NEXT:    beqz s0, .LBB48_16
4194; RV32ZVE32F-NEXT:  # %bb.14: # %cond.load16
4195; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4196; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
4197; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
4198; RV32ZVE32F-NEXT:    lw s0, 0(s1)
4199; RV32ZVE32F-NEXT:    lw s1, 4(s1)
4200; RV32ZVE32F-NEXT:    andi t0, t0, -128
4201; RV32ZVE32F-NEXT:    bnez t0, .LBB48_17
4202; RV32ZVE32F-NEXT:  .LBB48_15:
4203; RV32ZVE32F-NEXT:    lw t0, 56(a2)
4204; RV32ZVE32F-NEXT:    lw a2, 60(a2)
4205; RV32ZVE32F-NEXT:    j .LBB48_18
4206; RV32ZVE32F-NEXT:  .LBB48_16:
4207; RV32ZVE32F-NEXT:    lw s0, 48(a2)
4208; RV32ZVE32F-NEXT:    lw s1, 52(a2)
4209; RV32ZVE32F-NEXT:    andi t0, t0, -128
4210; RV32ZVE32F-NEXT:    beqz t0, .LBB48_15
4211; RV32ZVE32F-NEXT:  .LBB48_17: # %cond.load19
4212; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4213; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
4214; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
4215; RV32ZVE32F-NEXT:    lw t0, 0(a2)
4216; RV32ZVE32F-NEXT:    lw a2, 4(a2)
4217; RV32ZVE32F-NEXT:  .LBB48_18: # %else20
4218; RV32ZVE32F-NEXT:    sw a1, 0(a0)
4219; RV32ZVE32F-NEXT:    sw a3, 4(a0)
4220; RV32ZVE32F-NEXT:    sw a4, 8(a0)
4221; RV32ZVE32F-NEXT:    sw a5, 12(a0)
4222; RV32ZVE32F-NEXT:    sw a6, 16(a0)
4223; RV32ZVE32F-NEXT:    sw a7, 20(a0)
4224; RV32ZVE32F-NEXT:    sw t1, 24(a0)
4225; RV32ZVE32F-NEXT:    sw t2, 28(a0)
4226; RV32ZVE32F-NEXT:    sw t3, 32(a0)
4227; RV32ZVE32F-NEXT:    sw t4, 36(a0)
4228; RV32ZVE32F-NEXT:    sw t5, 40(a0)
4229; RV32ZVE32F-NEXT:    sw t6, 44(a0)
4230; RV32ZVE32F-NEXT:    sw s0, 48(a0)
4231; RV32ZVE32F-NEXT:    sw s1, 52(a0)
4232; RV32ZVE32F-NEXT:    sw t0, 56(a0)
4233; RV32ZVE32F-NEXT:    sw a2, 60(a0)
4234; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
4235; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
4236; RV32ZVE32F-NEXT:    .cfi_restore s0
4237; RV32ZVE32F-NEXT:    .cfi_restore s1
4238; RV32ZVE32F-NEXT:    addi sp, sp, 16
4239; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
4240; RV32ZVE32F-NEXT:    ret
4241;
4242; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64:
4243; RV64ZVE32F:       # %bb.0:
4244; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
4245; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
4246; RV64ZVE32F-NEXT:    andi a3, a5, 1
4247; RV64ZVE32F-NEXT:    beqz a3, .LBB48_3
4248; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
4249; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
4250; RV64ZVE32F-NEXT:    slli a3, a3, 3
4251; RV64ZVE32F-NEXT:    add a3, a1, a3
4252; RV64ZVE32F-NEXT:    ld a3, 0(a3)
4253; RV64ZVE32F-NEXT:    andi a4, a5, 2
4254; RV64ZVE32F-NEXT:    bnez a4, .LBB48_4
4255; RV64ZVE32F-NEXT:  .LBB48_2:
4256; RV64ZVE32F-NEXT:    ld a4, 8(a2)
4257; RV64ZVE32F-NEXT:    j .LBB48_5
4258; RV64ZVE32F-NEXT:  .LBB48_3:
4259; RV64ZVE32F-NEXT:    ld a3, 0(a2)
4260; RV64ZVE32F-NEXT:    andi a4, a5, 2
4261; RV64ZVE32F-NEXT:    beqz a4, .LBB48_2
4262; RV64ZVE32F-NEXT:  .LBB48_4: # %cond.load1
4263; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
4264; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
4265; RV64ZVE32F-NEXT:    vmv.x.s a4, v9
4266; RV64ZVE32F-NEXT:    slli a4, a4, 3
4267; RV64ZVE32F-NEXT:    add a4, a1, a4
4268; RV64ZVE32F-NEXT:    ld a4, 0(a4)
4269; RV64ZVE32F-NEXT:  .LBB48_5: # %else2
4270; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
4271; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
4272; RV64ZVE32F-NEXT:    andi a6, a5, 4
4273; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
4274; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
4275; RV64ZVE32F-NEXT:    beqz a6, .LBB48_10
4276; RV64ZVE32F-NEXT:  # %bb.6: # %cond.load4
4277; RV64ZVE32F-NEXT:    vmv.x.s a6, v8
4278; RV64ZVE32F-NEXT:    slli a6, a6, 3
4279; RV64ZVE32F-NEXT:    add a6, a1, a6
4280; RV64ZVE32F-NEXT:    ld a6, 0(a6)
4281; RV64ZVE32F-NEXT:    andi a7, a5, 8
4282; RV64ZVE32F-NEXT:    bnez a7, .LBB48_11
4283; RV64ZVE32F-NEXT:  .LBB48_7:
4284; RV64ZVE32F-NEXT:    ld a7, 24(a2)
4285; RV64ZVE32F-NEXT:    andi t0, a5, 16
4286; RV64ZVE32F-NEXT:    bnez t0, .LBB48_12
4287; RV64ZVE32F-NEXT:  .LBB48_8:
4288; RV64ZVE32F-NEXT:    ld t0, 32(a2)
4289; RV64ZVE32F-NEXT:    andi t1, a5, 32
4290; RV64ZVE32F-NEXT:    bnez t1, .LBB48_13
4291; RV64ZVE32F-NEXT:  .LBB48_9:
4292; RV64ZVE32F-NEXT:    ld t1, 40(a2)
4293; RV64ZVE32F-NEXT:    j .LBB48_14
4294; RV64ZVE32F-NEXT:  .LBB48_10:
4295; RV64ZVE32F-NEXT:    ld a6, 16(a2)
4296; RV64ZVE32F-NEXT:    andi a7, a5, 8
4297; RV64ZVE32F-NEXT:    beqz a7, .LBB48_7
4298; RV64ZVE32F-NEXT:  .LBB48_11: # %cond.load7
4299; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
4300; RV64ZVE32F-NEXT:    vmv.x.s a7, v8
4301; RV64ZVE32F-NEXT:    slli a7, a7, 3
4302; RV64ZVE32F-NEXT:    add a7, a1, a7
4303; RV64ZVE32F-NEXT:    ld a7, 0(a7)
4304; RV64ZVE32F-NEXT:    andi t0, a5, 16
4305; RV64ZVE32F-NEXT:    beqz t0, .LBB48_8
4306; RV64ZVE32F-NEXT:  .LBB48_12: # %cond.load10
4307; RV64ZVE32F-NEXT:    vmv.x.s t0, v9
4308; RV64ZVE32F-NEXT:    slli t0, t0, 3
4309; RV64ZVE32F-NEXT:    add t0, a1, t0
4310; RV64ZVE32F-NEXT:    ld t0, 0(t0)
4311; RV64ZVE32F-NEXT:    andi t1, a5, 32
4312; RV64ZVE32F-NEXT:    beqz t1, .LBB48_9
4313; RV64ZVE32F-NEXT:  .LBB48_13: # %cond.load13
4314; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
4315; RV64ZVE32F-NEXT:    vmv.x.s t1, v8
4316; RV64ZVE32F-NEXT:    slli t1, t1, 3
4317; RV64ZVE32F-NEXT:    add t1, a1, t1
4318; RV64ZVE32F-NEXT:    ld t1, 0(t1)
4319; RV64ZVE32F-NEXT:  .LBB48_14: # %else14
4320; RV64ZVE32F-NEXT:    andi t2, a5, 64
4321; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
4322; RV64ZVE32F-NEXT:    beqz t2, .LBB48_17
4323; RV64ZVE32F-NEXT:  # %bb.15: # %cond.load16
4324; RV64ZVE32F-NEXT:    vmv.x.s t2, v8
4325; RV64ZVE32F-NEXT:    slli t2, t2, 3
4326; RV64ZVE32F-NEXT:    add t2, a1, t2
4327; RV64ZVE32F-NEXT:    ld t2, 0(t2)
4328; RV64ZVE32F-NEXT:    andi a5, a5, -128
4329; RV64ZVE32F-NEXT:    bnez a5, .LBB48_18
4330; RV64ZVE32F-NEXT:  .LBB48_16:
4331; RV64ZVE32F-NEXT:    ld a1, 56(a2)
4332; RV64ZVE32F-NEXT:    j .LBB48_19
4333; RV64ZVE32F-NEXT:  .LBB48_17:
4334; RV64ZVE32F-NEXT:    ld t2, 48(a2)
4335; RV64ZVE32F-NEXT:    andi a5, a5, -128
4336; RV64ZVE32F-NEXT:    beqz a5, .LBB48_16
4337; RV64ZVE32F-NEXT:  .LBB48_18: # %cond.load19
4338; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
4339; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
4340; RV64ZVE32F-NEXT:    slli a2, a2, 3
4341; RV64ZVE32F-NEXT:    add a1, a1, a2
4342; RV64ZVE32F-NEXT:    ld a1, 0(a1)
4343; RV64ZVE32F-NEXT:  .LBB48_19: # %else20
4344; RV64ZVE32F-NEXT:    sd a3, 0(a0)
4345; RV64ZVE32F-NEXT:    sd a4, 8(a0)
4346; RV64ZVE32F-NEXT:    sd a6, 16(a0)
4347; RV64ZVE32F-NEXT:    sd a7, 24(a0)
4348; RV64ZVE32F-NEXT:    sd t0, 32(a0)
4349; RV64ZVE32F-NEXT:    sd t1, 40(a0)
4350; RV64ZVE32F-NEXT:    sd t2, 48(a0)
4351; RV64ZVE32F-NEXT:    sd a1, 56(a0)
4352; RV64ZVE32F-NEXT:    ret
4353  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
4354  %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
4355  ret <8 x i64> %v
4356}
4357
4358define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
4359; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4360; RV32V:       # %bb.0:
4361; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
4362; RV32V-NEXT:    vsext.vf4 v10, v8
4363; RV32V-NEXT:    vsll.vi v8, v10, 3
4364; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
4365; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
4366; RV32V-NEXT:    vmv.v.v v8, v12
4367; RV32V-NEXT:    ret
4368;
4369; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4370; RV64V:       # %bb.0:
4371; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
4372; RV64V-NEXT:    vsext.vf8 v16, v8
4373; RV64V-NEXT:    vsll.vi v8, v16, 3
4374; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
4375; RV64V-NEXT:    vmv.v.v v8, v12
4376; RV64V-NEXT:    ret
4377;
4378; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4379; RV32ZVE32F:       # %bb.0:
4380; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
4381; RV32ZVE32F-NEXT:    vsext.vf4 v10, v8
4382; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
4383; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
4384; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4385; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
4386; RV32ZVE32F-NEXT:    andi a3, t0, 1
4387; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
4388; RV32ZVE32F-NEXT:    beqz a3, .LBB49_7
4389; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
4390; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
4391; RV32ZVE32F-NEXT:    lw a1, 0(a3)
4392; RV32ZVE32F-NEXT:    lw a3, 4(a3)
4393; RV32ZVE32F-NEXT:    andi a4, t0, 2
4394; RV32ZVE32F-NEXT:    bnez a4, .LBB49_8
4395; RV32ZVE32F-NEXT:  .LBB49_2:
4396; RV32ZVE32F-NEXT:    lw a4, 8(a2)
4397; RV32ZVE32F-NEXT:    lw a5, 12(a2)
4398; RV32ZVE32F-NEXT:    andi a6, t0, 4
4399; RV32ZVE32F-NEXT:    bnez a6, .LBB49_9
4400; RV32ZVE32F-NEXT:  .LBB49_3:
4401; RV32ZVE32F-NEXT:    lw a6, 16(a2)
4402; RV32ZVE32F-NEXT:    lw a7, 20(a2)
4403; RV32ZVE32F-NEXT:    andi t1, t0, 8
4404; RV32ZVE32F-NEXT:    bnez t1, .LBB49_10
4405; RV32ZVE32F-NEXT:  .LBB49_4:
4406; RV32ZVE32F-NEXT:    lw t1, 24(a2)
4407; RV32ZVE32F-NEXT:    lw t2, 28(a2)
4408; RV32ZVE32F-NEXT:    andi t3, t0, 16
4409; RV32ZVE32F-NEXT:    bnez t3, .LBB49_11
4410; RV32ZVE32F-NEXT:  .LBB49_5:
4411; RV32ZVE32F-NEXT:    lw t3, 32(a2)
4412; RV32ZVE32F-NEXT:    lw t4, 36(a2)
4413; RV32ZVE32F-NEXT:    andi t5, t0, 32
4414; RV32ZVE32F-NEXT:    bnez t5, .LBB49_12
4415; RV32ZVE32F-NEXT:  .LBB49_6:
4416; RV32ZVE32F-NEXT:    lw t5, 40(a2)
4417; RV32ZVE32F-NEXT:    lw t6, 44(a2)
4418; RV32ZVE32F-NEXT:    j .LBB49_13
4419; RV32ZVE32F-NEXT:  .LBB49_7:
4420; RV32ZVE32F-NEXT:    lw a1, 0(a2)
4421; RV32ZVE32F-NEXT:    lw a3, 4(a2)
4422; RV32ZVE32F-NEXT:    andi a4, t0, 2
4423; RV32ZVE32F-NEXT:    beqz a4, .LBB49_2
4424; RV32ZVE32F-NEXT:  .LBB49_8: # %cond.load1
4425; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
4426; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
4427; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
4428; RV32ZVE32F-NEXT:    lw a4, 0(a5)
4429; RV32ZVE32F-NEXT:    lw a5, 4(a5)
4430; RV32ZVE32F-NEXT:    andi a6, t0, 4
4431; RV32ZVE32F-NEXT:    beqz a6, .LBB49_3
4432; RV32ZVE32F-NEXT:  .LBB49_9: # %cond.load4
4433; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
4434; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
4435; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
4436; RV32ZVE32F-NEXT:    lw a6, 0(a7)
4437; RV32ZVE32F-NEXT:    lw a7, 4(a7)
4438; RV32ZVE32F-NEXT:    andi t1, t0, 8
4439; RV32ZVE32F-NEXT:    beqz t1, .LBB49_4
4440; RV32ZVE32F-NEXT:  .LBB49_10: # %cond.load7
4441; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
4442; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
4443; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
4444; RV32ZVE32F-NEXT:    lw t1, 0(t2)
4445; RV32ZVE32F-NEXT:    lw t2, 4(t2)
4446; RV32ZVE32F-NEXT:    andi t3, t0, 16
4447; RV32ZVE32F-NEXT:    beqz t3, .LBB49_5
4448; RV32ZVE32F-NEXT:  .LBB49_11: # %cond.load10
4449; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4450; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
4451; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
4452; RV32ZVE32F-NEXT:    lw t3, 0(t4)
4453; RV32ZVE32F-NEXT:    lw t4, 4(t4)
4454; RV32ZVE32F-NEXT:    andi t5, t0, 32
4455; RV32ZVE32F-NEXT:    beqz t5, .LBB49_6
4456; RV32ZVE32F-NEXT:  .LBB49_12: # %cond.load13
4457; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4458; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
4459; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
4460; RV32ZVE32F-NEXT:    lw t5, 0(t6)
4461; RV32ZVE32F-NEXT:    lw t6, 4(t6)
4462; RV32ZVE32F-NEXT:  .LBB49_13: # %else14
4463; RV32ZVE32F-NEXT:    addi sp, sp, -16
4464; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
4465; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
4466; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
4467; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
4468; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
4469; RV32ZVE32F-NEXT:    andi s0, t0, 64
4470; RV32ZVE32F-NEXT:    beqz s0, .LBB49_16
4471; RV32ZVE32F-NEXT:  # %bb.14: # %cond.load16
4472; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4473; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
4474; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
4475; RV32ZVE32F-NEXT:    lw s0, 0(s1)
4476; RV32ZVE32F-NEXT:    lw s1, 4(s1)
4477; RV32ZVE32F-NEXT:    andi t0, t0, -128
4478; RV32ZVE32F-NEXT:    bnez t0, .LBB49_17
4479; RV32ZVE32F-NEXT:  .LBB49_15:
4480; RV32ZVE32F-NEXT:    lw t0, 56(a2)
4481; RV32ZVE32F-NEXT:    lw a2, 60(a2)
4482; RV32ZVE32F-NEXT:    j .LBB49_18
4483; RV32ZVE32F-NEXT:  .LBB49_16:
4484; RV32ZVE32F-NEXT:    lw s0, 48(a2)
4485; RV32ZVE32F-NEXT:    lw s1, 52(a2)
4486; RV32ZVE32F-NEXT:    andi t0, t0, -128
4487; RV32ZVE32F-NEXT:    beqz t0, .LBB49_15
4488; RV32ZVE32F-NEXT:  .LBB49_17: # %cond.load19
4489; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4490; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
4491; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
4492; RV32ZVE32F-NEXT:    lw t0, 0(a2)
4493; RV32ZVE32F-NEXT:    lw a2, 4(a2)
4494; RV32ZVE32F-NEXT:  .LBB49_18: # %else20
4495; RV32ZVE32F-NEXT:    sw a1, 0(a0)
4496; RV32ZVE32F-NEXT:    sw a3, 4(a0)
4497; RV32ZVE32F-NEXT:    sw a4, 8(a0)
4498; RV32ZVE32F-NEXT:    sw a5, 12(a0)
4499; RV32ZVE32F-NEXT:    sw a6, 16(a0)
4500; RV32ZVE32F-NEXT:    sw a7, 20(a0)
4501; RV32ZVE32F-NEXT:    sw t1, 24(a0)
4502; RV32ZVE32F-NEXT:    sw t2, 28(a0)
4503; RV32ZVE32F-NEXT:    sw t3, 32(a0)
4504; RV32ZVE32F-NEXT:    sw t4, 36(a0)
4505; RV32ZVE32F-NEXT:    sw t5, 40(a0)
4506; RV32ZVE32F-NEXT:    sw t6, 44(a0)
4507; RV32ZVE32F-NEXT:    sw s0, 48(a0)
4508; RV32ZVE32F-NEXT:    sw s1, 52(a0)
4509; RV32ZVE32F-NEXT:    sw t0, 56(a0)
4510; RV32ZVE32F-NEXT:    sw a2, 60(a0)
4511; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
4512; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
4513; RV32ZVE32F-NEXT:    .cfi_restore s0
4514; RV32ZVE32F-NEXT:    .cfi_restore s1
4515; RV32ZVE32F-NEXT:    addi sp, sp, 16
4516; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
4517; RV32ZVE32F-NEXT:    ret
4518;
4519; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4520; RV64ZVE32F:       # %bb.0:
4521; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
4522; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
4523; RV64ZVE32F-NEXT:    andi a3, a5, 1
4524; RV64ZVE32F-NEXT:    beqz a3, .LBB49_3
4525; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
4526; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
4527; RV64ZVE32F-NEXT:    slli a3, a3, 3
4528; RV64ZVE32F-NEXT:    add a3, a1, a3
4529; RV64ZVE32F-NEXT:    ld a3, 0(a3)
4530; RV64ZVE32F-NEXT:    andi a4, a5, 2
4531; RV64ZVE32F-NEXT:    bnez a4, .LBB49_4
4532; RV64ZVE32F-NEXT:  .LBB49_2:
4533; RV64ZVE32F-NEXT:    ld a4, 8(a2)
4534; RV64ZVE32F-NEXT:    j .LBB49_5
4535; RV64ZVE32F-NEXT:  .LBB49_3:
4536; RV64ZVE32F-NEXT:    ld a3, 0(a2)
4537; RV64ZVE32F-NEXT:    andi a4, a5, 2
4538; RV64ZVE32F-NEXT:    beqz a4, .LBB49_2
4539; RV64ZVE32F-NEXT:  .LBB49_4: # %cond.load1
4540; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
4541; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
4542; RV64ZVE32F-NEXT:    vmv.x.s a4, v9
4543; RV64ZVE32F-NEXT:    slli a4, a4, 3
4544; RV64ZVE32F-NEXT:    add a4, a1, a4
4545; RV64ZVE32F-NEXT:    ld a4, 0(a4)
4546; RV64ZVE32F-NEXT:  .LBB49_5: # %else2
4547; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
4548; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
4549; RV64ZVE32F-NEXT:    andi a6, a5, 4
4550; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
4551; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
4552; RV64ZVE32F-NEXT:    beqz a6, .LBB49_10
4553; RV64ZVE32F-NEXT:  # %bb.6: # %cond.load4
4554; RV64ZVE32F-NEXT:    vmv.x.s a6, v8
4555; RV64ZVE32F-NEXT:    slli a6, a6, 3
4556; RV64ZVE32F-NEXT:    add a6, a1, a6
4557; RV64ZVE32F-NEXT:    ld a6, 0(a6)
4558; RV64ZVE32F-NEXT:    andi a7, a5, 8
4559; RV64ZVE32F-NEXT:    bnez a7, .LBB49_11
4560; RV64ZVE32F-NEXT:  .LBB49_7:
4561; RV64ZVE32F-NEXT:    ld a7, 24(a2)
4562; RV64ZVE32F-NEXT:    andi t0, a5, 16
4563; RV64ZVE32F-NEXT:    bnez t0, .LBB49_12
4564; RV64ZVE32F-NEXT:  .LBB49_8:
4565; RV64ZVE32F-NEXT:    ld t0, 32(a2)
4566; RV64ZVE32F-NEXT:    andi t1, a5, 32
4567; RV64ZVE32F-NEXT:    bnez t1, .LBB49_13
4568; RV64ZVE32F-NEXT:  .LBB49_9:
4569; RV64ZVE32F-NEXT:    ld t1, 40(a2)
4570; RV64ZVE32F-NEXT:    j .LBB49_14
4571; RV64ZVE32F-NEXT:  .LBB49_10:
4572; RV64ZVE32F-NEXT:    ld a6, 16(a2)
4573; RV64ZVE32F-NEXT:    andi a7, a5, 8
4574; RV64ZVE32F-NEXT:    beqz a7, .LBB49_7
4575; RV64ZVE32F-NEXT:  .LBB49_11: # %cond.load7
4576; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
4577; RV64ZVE32F-NEXT:    vmv.x.s a7, v8
4578; RV64ZVE32F-NEXT:    slli a7, a7, 3
4579; RV64ZVE32F-NEXT:    add a7, a1, a7
4580; RV64ZVE32F-NEXT:    ld a7, 0(a7)
4581; RV64ZVE32F-NEXT:    andi t0, a5, 16
4582; RV64ZVE32F-NEXT:    beqz t0, .LBB49_8
4583; RV64ZVE32F-NEXT:  .LBB49_12: # %cond.load10
4584; RV64ZVE32F-NEXT:    vmv.x.s t0, v9
4585; RV64ZVE32F-NEXT:    slli t0, t0, 3
4586; RV64ZVE32F-NEXT:    add t0, a1, t0
4587; RV64ZVE32F-NEXT:    ld t0, 0(t0)
4588; RV64ZVE32F-NEXT:    andi t1, a5, 32
4589; RV64ZVE32F-NEXT:    beqz t1, .LBB49_9
4590; RV64ZVE32F-NEXT:  .LBB49_13: # %cond.load13
4591; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
4592; RV64ZVE32F-NEXT:    vmv.x.s t1, v8
4593; RV64ZVE32F-NEXT:    slli t1, t1, 3
4594; RV64ZVE32F-NEXT:    add t1, a1, t1
4595; RV64ZVE32F-NEXT:    ld t1, 0(t1)
4596; RV64ZVE32F-NEXT:  .LBB49_14: # %else14
4597; RV64ZVE32F-NEXT:    andi t2, a5, 64
4598; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
4599; RV64ZVE32F-NEXT:    beqz t2, .LBB49_17
4600; RV64ZVE32F-NEXT:  # %bb.15: # %cond.load16
4601; RV64ZVE32F-NEXT:    vmv.x.s t2, v8
4602; RV64ZVE32F-NEXT:    slli t2, t2, 3
4603; RV64ZVE32F-NEXT:    add t2, a1, t2
4604; RV64ZVE32F-NEXT:    ld t2, 0(t2)
4605; RV64ZVE32F-NEXT:    andi a5, a5, -128
4606; RV64ZVE32F-NEXT:    bnez a5, .LBB49_18
4607; RV64ZVE32F-NEXT:  .LBB49_16:
4608; RV64ZVE32F-NEXT:    ld a1, 56(a2)
4609; RV64ZVE32F-NEXT:    j .LBB49_19
4610; RV64ZVE32F-NEXT:  .LBB49_17:
4611; RV64ZVE32F-NEXT:    ld t2, 48(a2)
4612; RV64ZVE32F-NEXT:    andi a5, a5, -128
4613; RV64ZVE32F-NEXT:    beqz a5, .LBB49_16
4614; RV64ZVE32F-NEXT:  .LBB49_18: # %cond.load19
4615; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
4616; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
4617; RV64ZVE32F-NEXT:    slli a2, a2, 3
4618; RV64ZVE32F-NEXT:    add a1, a1, a2
4619; RV64ZVE32F-NEXT:    ld a1, 0(a1)
4620; RV64ZVE32F-NEXT:  .LBB49_19: # %else20
4621; RV64ZVE32F-NEXT:    sd a3, 0(a0)
4622; RV64ZVE32F-NEXT:    sd a4, 8(a0)
4623; RV64ZVE32F-NEXT:    sd a6, 16(a0)
4624; RV64ZVE32F-NEXT:    sd a7, 24(a0)
4625; RV64ZVE32F-NEXT:    sd t0, 32(a0)
4626; RV64ZVE32F-NEXT:    sd t1, 40(a0)
4627; RV64ZVE32F-NEXT:    sd t2, 48(a0)
4628; RV64ZVE32F-NEXT:    sd a1, 56(a0)
4629; RV64ZVE32F-NEXT:    ret
4630  %eidxs = sext <8 x i8> %idxs to <8 x i64>
4631  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4632  %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
4633  ret <8 x i64> %v
4634}
4635
4636define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
4637; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4638; RV32V:       # %bb.0:
4639; RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4640; RV32V-NEXT:    vzext.vf2 v9, v8
4641; RV32V-NEXT:    vsll.vi v8, v9, 3
4642; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
4643; RV32V-NEXT:    vluxei16.v v12, (a0), v8, v0.t
4644; RV32V-NEXT:    vmv.v.v v8, v12
4645; RV32V-NEXT:    ret
4646;
4647; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4648; RV64V:       # %bb.0:
4649; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4650; RV64V-NEXT:    vzext.vf2 v9, v8
4651; RV64V-NEXT:    vsll.vi v8, v9, 3
4652; RV64V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
4653; RV64V-NEXT:    vluxei16.v v12, (a0), v8, v0.t
4654; RV64V-NEXT:    vmv.v.v v8, v12
4655; RV64V-NEXT:    ret
4656;
4657; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4658; RV32ZVE32F:       # %bb.0:
4659; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
4660; RV32ZVE32F-NEXT:    vzext.vf4 v10, v8
4661; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
4662; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
4663; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4664; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
4665; RV32ZVE32F-NEXT:    andi a3, t0, 1
4666; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
4667; RV32ZVE32F-NEXT:    beqz a3, .LBB50_7
4668; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
4669; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
4670; RV32ZVE32F-NEXT:    lw a1, 0(a3)
4671; RV32ZVE32F-NEXT:    lw a3, 4(a3)
4672; RV32ZVE32F-NEXT:    andi a4, t0, 2
4673; RV32ZVE32F-NEXT:    bnez a4, .LBB50_8
4674; RV32ZVE32F-NEXT:  .LBB50_2:
4675; RV32ZVE32F-NEXT:    lw a4, 8(a2)
4676; RV32ZVE32F-NEXT:    lw a5, 12(a2)
4677; RV32ZVE32F-NEXT:    andi a6, t0, 4
4678; RV32ZVE32F-NEXT:    bnez a6, .LBB50_9
4679; RV32ZVE32F-NEXT:  .LBB50_3:
4680; RV32ZVE32F-NEXT:    lw a6, 16(a2)
4681; RV32ZVE32F-NEXT:    lw a7, 20(a2)
4682; RV32ZVE32F-NEXT:    andi t1, t0, 8
4683; RV32ZVE32F-NEXT:    bnez t1, .LBB50_10
4684; RV32ZVE32F-NEXT:  .LBB50_4:
4685; RV32ZVE32F-NEXT:    lw t1, 24(a2)
4686; RV32ZVE32F-NEXT:    lw t2, 28(a2)
4687; RV32ZVE32F-NEXT:    andi t3, t0, 16
4688; RV32ZVE32F-NEXT:    bnez t3, .LBB50_11
4689; RV32ZVE32F-NEXT:  .LBB50_5:
4690; RV32ZVE32F-NEXT:    lw t3, 32(a2)
4691; RV32ZVE32F-NEXT:    lw t4, 36(a2)
4692; RV32ZVE32F-NEXT:    andi t5, t0, 32
4693; RV32ZVE32F-NEXT:    bnez t5, .LBB50_12
4694; RV32ZVE32F-NEXT:  .LBB50_6:
4695; RV32ZVE32F-NEXT:    lw t5, 40(a2)
4696; RV32ZVE32F-NEXT:    lw t6, 44(a2)
4697; RV32ZVE32F-NEXT:    j .LBB50_13
4698; RV32ZVE32F-NEXT:  .LBB50_7:
4699; RV32ZVE32F-NEXT:    lw a1, 0(a2)
4700; RV32ZVE32F-NEXT:    lw a3, 4(a2)
4701; RV32ZVE32F-NEXT:    andi a4, t0, 2
4702; RV32ZVE32F-NEXT:    beqz a4, .LBB50_2
4703; RV32ZVE32F-NEXT:  .LBB50_8: # %cond.load1
4704; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
4705; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
4706; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
4707; RV32ZVE32F-NEXT:    lw a4, 0(a5)
4708; RV32ZVE32F-NEXT:    lw a5, 4(a5)
4709; RV32ZVE32F-NEXT:    andi a6, t0, 4
4710; RV32ZVE32F-NEXT:    beqz a6, .LBB50_3
4711; RV32ZVE32F-NEXT:  .LBB50_9: # %cond.load4
4712; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
4713; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
4714; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
4715; RV32ZVE32F-NEXT:    lw a6, 0(a7)
4716; RV32ZVE32F-NEXT:    lw a7, 4(a7)
4717; RV32ZVE32F-NEXT:    andi t1, t0, 8
4718; RV32ZVE32F-NEXT:    beqz t1, .LBB50_4
4719; RV32ZVE32F-NEXT:  .LBB50_10: # %cond.load7
4720; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
4721; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
4722; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
4723; RV32ZVE32F-NEXT:    lw t1, 0(t2)
4724; RV32ZVE32F-NEXT:    lw t2, 4(t2)
4725; RV32ZVE32F-NEXT:    andi t3, t0, 16
4726; RV32ZVE32F-NEXT:    beqz t3, .LBB50_5
4727; RV32ZVE32F-NEXT:  .LBB50_11: # %cond.load10
4728; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4729; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
4730; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
4731; RV32ZVE32F-NEXT:    lw t3, 0(t4)
4732; RV32ZVE32F-NEXT:    lw t4, 4(t4)
4733; RV32ZVE32F-NEXT:    andi t5, t0, 32
4734; RV32ZVE32F-NEXT:    beqz t5, .LBB50_6
4735; RV32ZVE32F-NEXT:  .LBB50_12: # %cond.load13
4736; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4737; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
4738; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
4739; RV32ZVE32F-NEXT:    lw t5, 0(t6)
4740; RV32ZVE32F-NEXT:    lw t6, 4(t6)
4741; RV32ZVE32F-NEXT:  .LBB50_13: # %else14
4742; RV32ZVE32F-NEXT:    addi sp, sp, -16
4743; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
4744; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
4745; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
4746; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
4747; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
4748; RV32ZVE32F-NEXT:    andi s0, t0, 64
4749; RV32ZVE32F-NEXT:    beqz s0, .LBB50_16
4750; RV32ZVE32F-NEXT:  # %bb.14: # %cond.load16
4751; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4752; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
4753; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
4754; RV32ZVE32F-NEXT:    lw s0, 0(s1)
4755; RV32ZVE32F-NEXT:    lw s1, 4(s1)
4756; RV32ZVE32F-NEXT:    andi t0, t0, -128
4757; RV32ZVE32F-NEXT:    bnez t0, .LBB50_17
4758; RV32ZVE32F-NEXT:  .LBB50_15:
4759; RV32ZVE32F-NEXT:    lw t0, 56(a2)
4760; RV32ZVE32F-NEXT:    lw a2, 60(a2)
4761; RV32ZVE32F-NEXT:    j .LBB50_18
4762; RV32ZVE32F-NEXT:  .LBB50_16:
4763; RV32ZVE32F-NEXT:    lw s0, 48(a2)
4764; RV32ZVE32F-NEXT:    lw s1, 52(a2)
4765; RV32ZVE32F-NEXT:    andi t0, t0, -128
4766; RV32ZVE32F-NEXT:    beqz t0, .LBB50_15
4767; RV32ZVE32F-NEXT:  .LBB50_17: # %cond.load19
4768; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
4769; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
4770; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
4771; RV32ZVE32F-NEXT:    lw t0, 0(a2)
4772; RV32ZVE32F-NEXT:    lw a2, 4(a2)
4773; RV32ZVE32F-NEXT:  .LBB50_18: # %else20
4774; RV32ZVE32F-NEXT:    sw a1, 0(a0)
4775; RV32ZVE32F-NEXT:    sw a3, 4(a0)
4776; RV32ZVE32F-NEXT:    sw a4, 8(a0)
4777; RV32ZVE32F-NEXT:    sw a5, 12(a0)
4778; RV32ZVE32F-NEXT:    sw a6, 16(a0)
4779; RV32ZVE32F-NEXT:    sw a7, 20(a0)
4780; RV32ZVE32F-NEXT:    sw t1, 24(a0)
4781; RV32ZVE32F-NEXT:    sw t2, 28(a0)
4782; RV32ZVE32F-NEXT:    sw t3, 32(a0)
4783; RV32ZVE32F-NEXT:    sw t4, 36(a0)
4784; RV32ZVE32F-NEXT:    sw t5, 40(a0)
4785; RV32ZVE32F-NEXT:    sw t6, 44(a0)
4786; RV32ZVE32F-NEXT:    sw s0, 48(a0)
4787; RV32ZVE32F-NEXT:    sw s1, 52(a0)
4788; RV32ZVE32F-NEXT:    sw t0, 56(a0)
4789; RV32ZVE32F-NEXT:    sw a2, 60(a0)
4790; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
4791; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
4792; RV32ZVE32F-NEXT:    .cfi_restore s0
4793; RV32ZVE32F-NEXT:    .cfi_restore s1
4794; RV32ZVE32F-NEXT:    addi sp, sp, 16
4795; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
4796; RV32ZVE32F-NEXT:    ret
4797;
4798; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4799; RV64ZVE32F:       # %bb.0:
4800; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
4801; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
4802; RV64ZVE32F-NEXT:    andi a3, a5, 1
4803; RV64ZVE32F-NEXT:    beqz a3, .LBB50_3
4804; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
4805; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
4806; RV64ZVE32F-NEXT:    andi a3, a3, 255
4807; RV64ZVE32F-NEXT:    slli a3, a3, 3
4808; RV64ZVE32F-NEXT:    add a3, a1, a3
4809; RV64ZVE32F-NEXT:    ld a3, 0(a3)
4810; RV64ZVE32F-NEXT:    andi a4, a5, 2
4811; RV64ZVE32F-NEXT:    bnez a4, .LBB50_4
4812; RV64ZVE32F-NEXT:  .LBB50_2:
4813; RV64ZVE32F-NEXT:    ld a4, 8(a2)
4814; RV64ZVE32F-NEXT:    j .LBB50_5
4815; RV64ZVE32F-NEXT:  .LBB50_3:
4816; RV64ZVE32F-NEXT:    ld a3, 0(a2)
4817; RV64ZVE32F-NEXT:    andi a4, a5, 2
4818; RV64ZVE32F-NEXT:    beqz a4, .LBB50_2
4819; RV64ZVE32F-NEXT:  .LBB50_4: # %cond.load1
4820; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
4821; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
4822; RV64ZVE32F-NEXT:    vmv.x.s a4, v9
4823; RV64ZVE32F-NEXT:    andi a4, a4, 255
4824; RV64ZVE32F-NEXT:    slli a4, a4, 3
4825; RV64ZVE32F-NEXT:    add a4, a1, a4
4826; RV64ZVE32F-NEXT:    ld a4, 0(a4)
4827; RV64ZVE32F-NEXT:  .LBB50_5: # %else2
4828; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
4829; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
4830; RV64ZVE32F-NEXT:    andi a6, a5, 4
4831; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
4832; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
4833; RV64ZVE32F-NEXT:    beqz a6, .LBB50_10
4834; RV64ZVE32F-NEXT:  # %bb.6: # %cond.load4
4835; RV64ZVE32F-NEXT:    vmv.x.s a6, v8
4836; RV64ZVE32F-NEXT:    andi a6, a6, 255
4837; RV64ZVE32F-NEXT:    slli a6, a6, 3
4838; RV64ZVE32F-NEXT:    add a6, a1, a6
4839; RV64ZVE32F-NEXT:    ld a6, 0(a6)
4840; RV64ZVE32F-NEXT:    andi a7, a5, 8
4841; RV64ZVE32F-NEXT:    bnez a7, .LBB50_11
4842; RV64ZVE32F-NEXT:  .LBB50_7:
4843; RV64ZVE32F-NEXT:    ld a7, 24(a2)
4844; RV64ZVE32F-NEXT:    andi t0, a5, 16
4845; RV64ZVE32F-NEXT:    bnez t0, .LBB50_12
4846; RV64ZVE32F-NEXT:  .LBB50_8:
4847; RV64ZVE32F-NEXT:    ld t0, 32(a2)
4848; RV64ZVE32F-NEXT:    andi t1, a5, 32
4849; RV64ZVE32F-NEXT:    bnez t1, .LBB50_13
4850; RV64ZVE32F-NEXT:  .LBB50_9:
4851; RV64ZVE32F-NEXT:    ld t1, 40(a2)
4852; RV64ZVE32F-NEXT:    j .LBB50_14
4853; RV64ZVE32F-NEXT:  .LBB50_10:
4854; RV64ZVE32F-NEXT:    ld a6, 16(a2)
4855; RV64ZVE32F-NEXT:    andi a7, a5, 8
4856; RV64ZVE32F-NEXT:    beqz a7, .LBB50_7
4857; RV64ZVE32F-NEXT:  .LBB50_11: # %cond.load7
4858; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
4859; RV64ZVE32F-NEXT:    vmv.x.s a7, v8
4860; RV64ZVE32F-NEXT:    andi a7, a7, 255
4861; RV64ZVE32F-NEXT:    slli a7, a7, 3
4862; RV64ZVE32F-NEXT:    add a7, a1, a7
4863; RV64ZVE32F-NEXT:    ld a7, 0(a7)
4864; RV64ZVE32F-NEXT:    andi t0, a5, 16
4865; RV64ZVE32F-NEXT:    beqz t0, .LBB50_8
4866; RV64ZVE32F-NEXT:  .LBB50_12: # %cond.load10
4867; RV64ZVE32F-NEXT:    vmv.x.s t0, v9
4868; RV64ZVE32F-NEXT:    andi t0, t0, 255
4869; RV64ZVE32F-NEXT:    slli t0, t0, 3
4870; RV64ZVE32F-NEXT:    add t0, a1, t0
4871; RV64ZVE32F-NEXT:    ld t0, 0(t0)
4872; RV64ZVE32F-NEXT:    andi t1, a5, 32
4873; RV64ZVE32F-NEXT:    beqz t1, .LBB50_9
4874; RV64ZVE32F-NEXT:  .LBB50_13: # %cond.load13
4875; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
4876; RV64ZVE32F-NEXT:    vmv.x.s t1, v8
4877; RV64ZVE32F-NEXT:    andi t1, t1, 255
4878; RV64ZVE32F-NEXT:    slli t1, t1, 3
4879; RV64ZVE32F-NEXT:    add t1, a1, t1
4880; RV64ZVE32F-NEXT:    ld t1, 0(t1)
4881; RV64ZVE32F-NEXT:  .LBB50_14: # %else14
4882; RV64ZVE32F-NEXT:    andi t2, a5, 64
4883; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
4884; RV64ZVE32F-NEXT:    beqz t2, .LBB50_17
4885; RV64ZVE32F-NEXT:  # %bb.15: # %cond.load16
4886; RV64ZVE32F-NEXT:    vmv.x.s t2, v8
4887; RV64ZVE32F-NEXT:    andi t2, t2, 255
4888; RV64ZVE32F-NEXT:    slli t2, t2, 3
4889; RV64ZVE32F-NEXT:    add t2, a1, t2
4890; RV64ZVE32F-NEXT:    ld t2, 0(t2)
4891; RV64ZVE32F-NEXT:    andi a5, a5, -128
4892; RV64ZVE32F-NEXT:    bnez a5, .LBB50_18
4893; RV64ZVE32F-NEXT:  .LBB50_16:
4894; RV64ZVE32F-NEXT:    ld a1, 56(a2)
4895; RV64ZVE32F-NEXT:    j .LBB50_19
4896; RV64ZVE32F-NEXT:  .LBB50_17:
4897; RV64ZVE32F-NEXT:    ld t2, 48(a2)
4898; RV64ZVE32F-NEXT:    andi a5, a5, -128
4899; RV64ZVE32F-NEXT:    beqz a5, .LBB50_16
4900; RV64ZVE32F-NEXT:  .LBB50_18: # %cond.load19
4901; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
4902; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
4903; RV64ZVE32F-NEXT:    andi a2, a2, 255
4904; RV64ZVE32F-NEXT:    slli a2, a2, 3
4905; RV64ZVE32F-NEXT:    add a1, a1, a2
4906; RV64ZVE32F-NEXT:    ld a1, 0(a1)
4907; RV64ZVE32F-NEXT:  .LBB50_19: # %else20
4908; RV64ZVE32F-NEXT:    sd a3, 0(a0)
4909; RV64ZVE32F-NEXT:    sd a4, 8(a0)
4910; RV64ZVE32F-NEXT:    sd a6, 16(a0)
4911; RV64ZVE32F-NEXT:    sd a7, 24(a0)
4912; RV64ZVE32F-NEXT:    sd t0, 32(a0)
4913; RV64ZVE32F-NEXT:    sd t1, 40(a0)
4914; RV64ZVE32F-NEXT:    sd t2, 48(a0)
4915; RV64ZVE32F-NEXT:    sd a1, 56(a0)
4916; RV64ZVE32F-NEXT:    ret
4917  %eidxs = zext <8 x i8> %idxs to <8 x i64>
4918  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4919  %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
4920  ret <8 x i64> %v
4921}
4922
4923define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
4924; RV32V-LABEL: mgather_baseidx_v8i16_v8i64:
4925; RV32V:       # %bb.0:
4926; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
4927; RV32V-NEXT:    vsext.vf2 v10, v8
4928; RV32V-NEXT:    vsll.vi v8, v10, 3
4929; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
4930; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
4931; RV32V-NEXT:    vmv.v.v v8, v12
4932; RV32V-NEXT:    ret
4933;
4934; RV64V-LABEL: mgather_baseidx_v8i16_v8i64:
4935; RV64V:       # %bb.0:
4936; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
4937; RV64V-NEXT:    vsext.vf4 v16, v8
4938; RV64V-NEXT:    vsll.vi v8, v16, 3
4939; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
4940; RV64V-NEXT:    vmv.v.v v8, v12
4941; RV64V-NEXT:    ret
4942;
4943; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64:
4944; RV32ZVE32F:       # %bb.0:
4945; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
4946; RV32ZVE32F-NEXT:    vsext.vf2 v10, v8
4947; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
4948; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
4949; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
4950; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
4951; RV32ZVE32F-NEXT:    andi a3, t0, 1
4952; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
4953; RV32ZVE32F-NEXT:    beqz a3, .LBB51_7
4954; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
4955; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
4956; RV32ZVE32F-NEXT:    lw a1, 0(a3)
4957; RV32ZVE32F-NEXT:    lw a3, 4(a3)
4958; RV32ZVE32F-NEXT:    andi a4, t0, 2
4959; RV32ZVE32F-NEXT:    bnez a4, .LBB51_8
4960; RV32ZVE32F-NEXT:  .LBB51_2:
4961; RV32ZVE32F-NEXT:    lw a4, 8(a2)
4962; RV32ZVE32F-NEXT:    lw a5, 12(a2)
4963; RV32ZVE32F-NEXT:    andi a6, t0, 4
4964; RV32ZVE32F-NEXT:    bnez a6, .LBB51_9
4965; RV32ZVE32F-NEXT:  .LBB51_3:
4966; RV32ZVE32F-NEXT:    lw a6, 16(a2)
4967; RV32ZVE32F-NEXT:    lw a7, 20(a2)
4968; RV32ZVE32F-NEXT:    andi t1, t0, 8
4969; RV32ZVE32F-NEXT:    bnez t1, .LBB51_10
4970; RV32ZVE32F-NEXT:  .LBB51_4:
4971; RV32ZVE32F-NEXT:    lw t1, 24(a2)
4972; RV32ZVE32F-NEXT:    lw t2, 28(a2)
4973; RV32ZVE32F-NEXT:    andi t3, t0, 16
4974; RV32ZVE32F-NEXT:    bnez t3, .LBB51_11
4975; RV32ZVE32F-NEXT:  .LBB51_5:
4976; RV32ZVE32F-NEXT:    lw t3, 32(a2)
4977; RV32ZVE32F-NEXT:    lw t4, 36(a2)
4978; RV32ZVE32F-NEXT:    andi t5, t0, 32
4979; RV32ZVE32F-NEXT:    bnez t5, .LBB51_12
4980; RV32ZVE32F-NEXT:  .LBB51_6:
4981; RV32ZVE32F-NEXT:    lw t5, 40(a2)
4982; RV32ZVE32F-NEXT:    lw t6, 44(a2)
4983; RV32ZVE32F-NEXT:    j .LBB51_13
4984; RV32ZVE32F-NEXT:  .LBB51_7:
4985; RV32ZVE32F-NEXT:    lw a1, 0(a2)
4986; RV32ZVE32F-NEXT:    lw a3, 4(a2)
4987; RV32ZVE32F-NEXT:    andi a4, t0, 2
4988; RV32ZVE32F-NEXT:    beqz a4, .LBB51_2
4989; RV32ZVE32F-NEXT:  .LBB51_8: # %cond.load1
4990; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
4991; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
4992; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
4993; RV32ZVE32F-NEXT:    lw a4, 0(a5)
4994; RV32ZVE32F-NEXT:    lw a5, 4(a5)
4995; RV32ZVE32F-NEXT:    andi a6, t0, 4
4996; RV32ZVE32F-NEXT:    beqz a6, .LBB51_3
4997; RV32ZVE32F-NEXT:  .LBB51_9: # %cond.load4
4998; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
4999; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
5000; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
5001; RV32ZVE32F-NEXT:    lw a6, 0(a7)
5002; RV32ZVE32F-NEXT:    lw a7, 4(a7)
5003; RV32ZVE32F-NEXT:    andi t1, t0, 8
5004; RV32ZVE32F-NEXT:    beqz t1, .LBB51_4
5005; RV32ZVE32F-NEXT:  .LBB51_10: # %cond.load7
5006; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
5007; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
5008; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
5009; RV32ZVE32F-NEXT:    lw t1, 0(t2)
5010; RV32ZVE32F-NEXT:    lw t2, 4(t2)
5011; RV32ZVE32F-NEXT:    andi t3, t0, 16
5012; RV32ZVE32F-NEXT:    beqz t3, .LBB51_5
5013; RV32ZVE32F-NEXT:  .LBB51_11: # %cond.load10
5014; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5015; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
5016; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
5017; RV32ZVE32F-NEXT:    lw t3, 0(t4)
5018; RV32ZVE32F-NEXT:    lw t4, 4(t4)
5019; RV32ZVE32F-NEXT:    andi t5, t0, 32
5020; RV32ZVE32F-NEXT:    beqz t5, .LBB51_6
5021; RV32ZVE32F-NEXT:  .LBB51_12: # %cond.load13
5022; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5023; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
5024; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
5025; RV32ZVE32F-NEXT:    lw t5, 0(t6)
5026; RV32ZVE32F-NEXT:    lw t6, 4(t6)
5027; RV32ZVE32F-NEXT:  .LBB51_13: # %else14
5028; RV32ZVE32F-NEXT:    addi sp, sp, -16
5029; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
5030; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
5031; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
5032; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
5033; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
5034; RV32ZVE32F-NEXT:    andi s0, t0, 64
5035; RV32ZVE32F-NEXT:    beqz s0, .LBB51_16
5036; RV32ZVE32F-NEXT:  # %bb.14: # %cond.load16
5037; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5038; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
5039; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
5040; RV32ZVE32F-NEXT:    lw s0, 0(s1)
5041; RV32ZVE32F-NEXT:    lw s1, 4(s1)
5042; RV32ZVE32F-NEXT:    andi t0, t0, -128
5043; RV32ZVE32F-NEXT:    bnez t0, .LBB51_17
5044; RV32ZVE32F-NEXT:  .LBB51_15:
5045; RV32ZVE32F-NEXT:    lw t0, 56(a2)
5046; RV32ZVE32F-NEXT:    lw a2, 60(a2)
5047; RV32ZVE32F-NEXT:    j .LBB51_18
5048; RV32ZVE32F-NEXT:  .LBB51_16:
5049; RV32ZVE32F-NEXT:    lw s0, 48(a2)
5050; RV32ZVE32F-NEXT:    lw s1, 52(a2)
5051; RV32ZVE32F-NEXT:    andi t0, t0, -128
5052; RV32ZVE32F-NEXT:    beqz t0, .LBB51_15
5053; RV32ZVE32F-NEXT:  .LBB51_17: # %cond.load19
5054; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5055; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
5056; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
5057; RV32ZVE32F-NEXT:    lw t0, 0(a2)
5058; RV32ZVE32F-NEXT:    lw a2, 4(a2)
5059; RV32ZVE32F-NEXT:  .LBB51_18: # %else20
5060; RV32ZVE32F-NEXT:    sw a1, 0(a0)
5061; RV32ZVE32F-NEXT:    sw a3, 4(a0)
5062; RV32ZVE32F-NEXT:    sw a4, 8(a0)
5063; RV32ZVE32F-NEXT:    sw a5, 12(a0)
5064; RV32ZVE32F-NEXT:    sw a6, 16(a0)
5065; RV32ZVE32F-NEXT:    sw a7, 20(a0)
5066; RV32ZVE32F-NEXT:    sw t1, 24(a0)
5067; RV32ZVE32F-NEXT:    sw t2, 28(a0)
5068; RV32ZVE32F-NEXT:    sw t3, 32(a0)
5069; RV32ZVE32F-NEXT:    sw t4, 36(a0)
5070; RV32ZVE32F-NEXT:    sw t5, 40(a0)
5071; RV32ZVE32F-NEXT:    sw t6, 44(a0)
5072; RV32ZVE32F-NEXT:    sw s0, 48(a0)
5073; RV32ZVE32F-NEXT:    sw s1, 52(a0)
5074; RV32ZVE32F-NEXT:    sw t0, 56(a0)
5075; RV32ZVE32F-NEXT:    sw a2, 60(a0)
5076; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
5077; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
5078; RV32ZVE32F-NEXT:    .cfi_restore s0
5079; RV32ZVE32F-NEXT:    .cfi_restore s1
5080; RV32ZVE32F-NEXT:    addi sp, sp, 16
5081; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
5082; RV32ZVE32F-NEXT:    ret
5083;
5084; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64:
5085; RV64ZVE32F:       # %bb.0:
5086; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
5087; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
5088; RV64ZVE32F-NEXT:    andi a3, a5, 1
5089; RV64ZVE32F-NEXT:    beqz a3, .LBB51_3
5090; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
5091; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
5092; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
5093; RV64ZVE32F-NEXT:    slli a3, a3, 3
5094; RV64ZVE32F-NEXT:    add a3, a1, a3
5095; RV64ZVE32F-NEXT:    ld a3, 0(a3)
5096; RV64ZVE32F-NEXT:    andi a4, a5, 2
5097; RV64ZVE32F-NEXT:    bnez a4, .LBB51_4
5098; RV64ZVE32F-NEXT:  .LBB51_2:
5099; RV64ZVE32F-NEXT:    ld a4, 8(a2)
5100; RV64ZVE32F-NEXT:    j .LBB51_5
5101; RV64ZVE32F-NEXT:  .LBB51_3:
5102; RV64ZVE32F-NEXT:    ld a3, 0(a2)
5103; RV64ZVE32F-NEXT:    andi a4, a5, 2
5104; RV64ZVE32F-NEXT:    beqz a4, .LBB51_2
5105; RV64ZVE32F-NEXT:  .LBB51_4: # %cond.load1
5106; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
5107; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
5108; RV64ZVE32F-NEXT:    vmv.x.s a4, v9
5109; RV64ZVE32F-NEXT:    slli a4, a4, 3
5110; RV64ZVE32F-NEXT:    add a4, a1, a4
5111; RV64ZVE32F-NEXT:    ld a4, 0(a4)
5112; RV64ZVE32F-NEXT:  .LBB51_5: # %else2
5113; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
5114; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
5115; RV64ZVE32F-NEXT:    andi a6, a5, 4
5116; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
5117; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
5118; RV64ZVE32F-NEXT:    beqz a6, .LBB51_10
5119; RV64ZVE32F-NEXT:  # %bb.6: # %cond.load4
5120; RV64ZVE32F-NEXT:    vmv.x.s a6, v8
5121; RV64ZVE32F-NEXT:    slli a6, a6, 3
5122; RV64ZVE32F-NEXT:    add a6, a1, a6
5123; RV64ZVE32F-NEXT:    ld a6, 0(a6)
5124; RV64ZVE32F-NEXT:    andi a7, a5, 8
5125; RV64ZVE32F-NEXT:    bnez a7, .LBB51_11
5126; RV64ZVE32F-NEXT:  .LBB51_7:
5127; RV64ZVE32F-NEXT:    ld a7, 24(a2)
5128; RV64ZVE32F-NEXT:    andi t0, a5, 16
5129; RV64ZVE32F-NEXT:    bnez t0, .LBB51_12
5130; RV64ZVE32F-NEXT:  .LBB51_8:
5131; RV64ZVE32F-NEXT:    ld t0, 32(a2)
5132; RV64ZVE32F-NEXT:    andi t1, a5, 32
5133; RV64ZVE32F-NEXT:    bnez t1, .LBB51_13
5134; RV64ZVE32F-NEXT:  .LBB51_9:
5135; RV64ZVE32F-NEXT:    ld t1, 40(a2)
5136; RV64ZVE32F-NEXT:    j .LBB51_14
5137; RV64ZVE32F-NEXT:  .LBB51_10:
5138; RV64ZVE32F-NEXT:    ld a6, 16(a2)
5139; RV64ZVE32F-NEXT:    andi a7, a5, 8
5140; RV64ZVE32F-NEXT:    beqz a7, .LBB51_7
5141; RV64ZVE32F-NEXT:  .LBB51_11: # %cond.load7
5142; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
5143; RV64ZVE32F-NEXT:    vmv.x.s a7, v8
5144; RV64ZVE32F-NEXT:    slli a7, a7, 3
5145; RV64ZVE32F-NEXT:    add a7, a1, a7
5146; RV64ZVE32F-NEXT:    ld a7, 0(a7)
5147; RV64ZVE32F-NEXT:    andi t0, a5, 16
5148; RV64ZVE32F-NEXT:    beqz t0, .LBB51_8
5149; RV64ZVE32F-NEXT:  .LBB51_12: # %cond.load10
5150; RV64ZVE32F-NEXT:    vmv.x.s t0, v9
5151; RV64ZVE32F-NEXT:    slli t0, t0, 3
5152; RV64ZVE32F-NEXT:    add t0, a1, t0
5153; RV64ZVE32F-NEXT:    ld t0, 0(t0)
5154; RV64ZVE32F-NEXT:    andi t1, a5, 32
5155; RV64ZVE32F-NEXT:    beqz t1, .LBB51_9
5156; RV64ZVE32F-NEXT:  .LBB51_13: # %cond.load13
5157; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
5158; RV64ZVE32F-NEXT:    vmv.x.s t1, v8
5159; RV64ZVE32F-NEXT:    slli t1, t1, 3
5160; RV64ZVE32F-NEXT:    add t1, a1, t1
5161; RV64ZVE32F-NEXT:    ld t1, 0(t1)
5162; RV64ZVE32F-NEXT:  .LBB51_14: # %else14
5163; RV64ZVE32F-NEXT:    andi t2, a5, 64
5164; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
5165; RV64ZVE32F-NEXT:    beqz t2, .LBB51_17
5166; RV64ZVE32F-NEXT:  # %bb.15: # %cond.load16
5167; RV64ZVE32F-NEXT:    vmv.x.s t2, v8
5168; RV64ZVE32F-NEXT:    slli t2, t2, 3
5169; RV64ZVE32F-NEXT:    add t2, a1, t2
5170; RV64ZVE32F-NEXT:    ld t2, 0(t2)
5171; RV64ZVE32F-NEXT:    andi a5, a5, -128
5172; RV64ZVE32F-NEXT:    bnez a5, .LBB51_18
5173; RV64ZVE32F-NEXT:  .LBB51_16:
5174; RV64ZVE32F-NEXT:    ld a1, 56(a2)
5175; RV64ZVE32F-NEXT:    j .LBB51_19
5176; RV64ZVE32F-NEXT:  .LBB51_17:
5177; RV64ZVE32F-NEXT:    ld t2, 48(a2)
5178; RV64ZVE32F-NEXT:    andi a5, a5, -128
5179; RV64ZVE32F-NEXT:    beqz a5, .LBB51_16
5180; RV64ZVE32F-NEXT:  .LBB51_18: # %cond.load19
5181; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
5182; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
5183; RV64ZVE32F-NEXT:    slli a2, a2, 3
5184; RV64ZVE32F-NEXT:    add a1, a1, a2
5185; RV64ZVE32F-NEXT:    ld a1, 0(a1)
5186; RV64ZVE32F-NEXT:  .LBB51_19: # %else20
5187; RV64ZVE32F-NEXT:    sd a3, 0(a0)
5188; RV64ZVE32F-NEXT:    sd a4, 8(a0)
5189; RV64ZVE32F-NEXT:    sd a6, 16(a0)
5190; RV64ZVE32F-NEXT:    sd a7, 24(a0)
5191; RV64ZVE32F-NEXT:    sd t0, 32(a0)
5192; RV64ZVE32F-NEXT:    sd t1, 40(a0)
5193; RV64ZVE32F-NEXT:    sd t2, 48(a0)
5194; RV64ZVE32F-NEXT:    sd a1, 56(a0)
5195; RV64ZVE32F-NEXT:    ret
5196  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
5197  %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
5198  ret <8 x i64> %v
5199}
5200
5201define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
5202; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5203; RV32V:       # %bb.0:
5204; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
5205; RV32V-NEXT:    vsext.vf2 v10, v8
5206; RV32V-NEXT:    vsll.vi v8, v10, 3
5207; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
5208; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
5209; RV32V-NEXT:    vmv.v.v v8, v12
5210; RV32V-NEXT:    ret
5211;
5212; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5213; RV64V:       # %bb.0:
5214; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
5215; RV64V-NEXT:    vsext.vf4 v16, v8
5216; RV64V-NEXT:    vsll.vi v8, v16, 3
5217; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
5218; RV64V-NEXT:    vmv.v.v v8, v12
5219; RV64V-NEXT:    ret
5220;
5221; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5222; RV32ZVE32F:       # %bb.0:
5223; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
5224; RV32ZVE32F-NEXT:    vsext.vf2 v10, v8
5225; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
5226; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
5227; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
5228; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
5229; RV32ZVE32F-NEXT:    andi a3, t0, 1
5230; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
5231; RV32ZVE32F-NEXT:    beqz a3, .LBB52_7
5232; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
5233; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
5234; RV32ZVE32F-NEXT:    lw a1, 0(a3)
5235; RV32ZVE32F-NEXT:    lw a3, 4(a3)
5236; RV32ZVE32F-NEXT:    andi a4, t0, 2
5237; RV32ZVE32F-NEXT:    bnez a4, .LBB52_8
5238; RV32ZVE32F-NEXT:  .LBB52_2:
5239; RV32ZVE32F-NEXT:    lw a4, 8(a2)
5240; RV32ZVE32F-NEXT:    lw a5, 12(a2)
5241; RV32ZVE32F-NEXT:    andi a6, t0, 4
5242; RV32ZVE32F-NEXT:    bnez a6, .LBB52_9
5243; RV32ZVE32F-NEXT:  .LBB52_3:
5244; RV32ZVE32F-NEXT:    lw a6, 16(a2)
5245; RV32ZVE32F-NEXT:    lw a7, 20(a2)
5246; RV32ZVE32F-NEXT:    andi t1, t0, 8
5247; RV32ZVE32F-NEXT:    bnez t1, .LBB52_10
5248; RV32ZVE32F-NEXT:  .LBB52_4:
5249; RV32ZVE32F-NEXT:    lw t1, 24(a2)
5250; RV32ZVE32F-NEXT:    lw t2, 28(a2)
5251; RV32ZVE32F-NEXT:    andi t3, t0, 16
5252; RV32ZVE32F-NEXT:    bnez t3, .LBB52_11
5253; RV32ZVE32F-NEXT:  .LBB52_5:
5254; RV32ZVE32F-NEXT:    lw t3, 32(a2)
5255; RV32ZVE32F-NEXT:    lw t4, 36(a2)
5256; RV32ZVE32F-NEXT:    andi t5, t0, 32
5257; RV32ZVE32F-NEXT:    bnez t5, .LBB52_12
5258; RV32ZVE32F-NEXT:  .LBB52_6:
5259; RV32ZVE32F-NEXT:    lw t5, 40(a2)
5260; RV32ZVE32F-NEXT:    lw t6, 44(a2)
5261; RV32ZVE32F-NEXT:    j .LBB52_13
5262; RV32ZVE32F-NEXT:  .LBB52_7:
5263; RV32ZVE32F-NEXT:    lw a1, 0(a2)
5264; RV32ZVE32F-NEXT:    lw a3, 4(a2)
5265; RV32ZVE32F-NEXT:    andi a4, t0, 2
5266; RV32ZVE32F-NEXT:    beqz a4, .LBB52_2
5267; RV32ZVE32F-NEXT:  .LBB52_8: # %cond.load1
5268; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
5269; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
5270; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
5271; RV32ZVE32F-NEXT:    lw a4, 0(a5)
5272; RV32ZVE32F-NEXT:    lw a5, 4(a5)
5273; RV32ZVE32F-NEXT:    andi a6, t0, 4
5274; RV32ZVE32F-NEXT:    beqz a6, .LBB52_3
5275; RV32ZVE32F-NEXT:  .LBB52_9: # %cond.load4
5276; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
5277; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
5278; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
5279; RV32ZVE32F-NEXT:    lw a6, 0(a7)
5280; RV32ZVE32F-NEXT:    lw a7, 4(a7)
5281; RV32ZVE32F-NEXT:    andi t1, t0, 8
5282; RV32ZVE32F-NEXT:    beqz t1, .LBB52_4
5283; RV32ZVE32F-NEXT:  .LBB52_10: # %cond.load7
5284; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
5285; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
5286; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
5287; RV32ZVE32F-NEXT:    lw t1, 0(t2)
5288; RV32ZVE32F-NEXT:    lw t2, 4(t2)
5289; RV32ZVE32F-NEXT:    andi t3, t0, 16
5290; RV32ZVE32F-NEXT:    beqz t3, .LBB52_5
5291; RV32ZVE32F-NEXT:  .LBB52_11: # %cond.load10
5292; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5293; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
5294; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
5295; RV32ZVE32F-NEXT:    lw t3, 0(t4)
5296; RV32ZVE32F-NEXT:    lw t4, 4(t4)
5297; RV32ZVE32F-NEXT:    andi t5, t0, 32
5298; RV32ZVE32F-NEXT:    beqz t5, .LBB52_6
5299; RV32ZVE32F-NEXT:  .LBB52_12: # %cond.load13
5300; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5301; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
5302; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
5303; RV32ZVE32F-NEXT:    lw t5, 0(t6)
5304; RV32ZVE32F-NEXT:    lw t6, 4(t6)
5305; RV32ZVE32F-NEXT:  .LBB52_13: # %else14
5306; RV32ZVE32F-NEXT:    addi sp, sp, -16
5307; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
5308; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
5309; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
5310; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
5311; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
5312; RV32ZVE32F-NEXT:    andi s0, t0, 64
5313; RV32ZVE32F-NEXT:    beqz s0, .LBB52_16
5314; RV32ZVE32F-NEXT:  # %bb.14: # %cond.load16
5315; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5316; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
5317; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
5318; RV32ZVE32F-NEXT:    lw s0, 0(s1)
5319; RV32ZVE32F-NEXT:    lw s1, 4(s1)
5320; RV32ZVE32F-NEXT:    andi t0, t0, -128
5321; RV32ZVE32F-NEXT:    bnez t0, .LBB52_17
5322; RV32ZVE32F-NEXT:  .LBB52_15:
5323; RV32ZVE32F-NEXT:    lw t0, 56(a2)
5324; RV32ZVE32F-NEXT:    lw a2, 60(a2)
5325; RV32ZVE32F-NEXT:    j .LBB52_18
5326; RV32ZVE32F-NEXT:  .LBB52_16:
5327; RV32ZVE32F-NEXT:    lw s0, 48(a2)
5328; RV32ZVE32F-NEXT:    lw s1, 52(a2)
5329; RV32ZVE32F-NEXT:    andi t0, t0, -128
5330; RV32ZVE32F-NEXT:    beqz t0, .LBB52_15
5331; RV32ZVE32F-NEXT:  .LBB52_17: # %cond.load19
5332; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5333; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
5334; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
5335; RV32ZVE32F-NEXT:    lw t0, 0(a2)
5336; RV32ZVE32F-NEXT:    lw a2, 4(a2)
5337; RV32ZVE32F-NEXT:  .LBB52_18: # %else20
5338; RV32ZVE32F-NEXT:    sw a1, 0(a0)
5339; RV32ZVE32F-NEXT:    sw a3, 4(a0)
5340; RV32ZVE32F-NEXT:    sw a4, 8(a0)
5341; RV32ZVE32F-NEXT:    sw a5, 12(a0)
5342; RV32ZVE32F-NEXT:    sw a6, 16(a0)
5343; RV32ZVE32F-NEXT:    sw a7, 20(a0)
5344; RV32ZVE32F-NEXT:    sw t1, 24(a0)
5345; RV32ZVE32F-NEXT:    sw t2, 28(a0)
5346; RV32ZVE32F-NEXT:    sw t3, 32(a0)
5347; RV32ZVE32F-NEXT:    sw t4, 36(a0)
5348; RV32ZVE32F-NEXT:    sw t5, 40(a0)
5349; RV32ZVE32F-NEXT:    sw t6, 44(a0)
5350; RV32ZVE32F-NEXT:    sw s0, 48(a0)
5351; RV32ZVE32F-NEXT:    sw s1, 52(a0)
5352; RV32ZVE32F-NEXT:    sw t0, 56(a0)
5353; RV32ZVE32F-NEXT:    sw a2, 60(a0)
5354; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
5355; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
5356; RV32ZVE32F-NEXT:    .cfi_restore s0
5357; RV32ZVE32F-NEXT:    .cfi_restore s1
5358; RV32ZVE32F-NEXT:    addi sp, sp, 16
5359; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
5360; RV32ZVE32F-NEXT:    ret
5361;
5362; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5363; RV64ZVE32F:       # %bb.0:
5364; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
5365; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
5366; RV64ZVE32F-NEXT:    andi a3, a5, 1
5367; RV64ZVE32F-NEXT:    beqz a3, .LBB52_3
5368; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
5369; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
5370; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
5371; RV64ZVE32F-NEXT:    slli a3, a3, 3
5372; RV64ZVE32F-NEXT:    add a3, a1, a3
5373; RV64ZVE32F-NEXT:    ld a3, 0(a3)
5374; RV64ZVE32F-NEXT:    andi a4, a5, 2
5375; RV64ZVE32F-NEXT:    bnez a4, .LBB52_4
5376; RV64ZVE32F-NEXT:  .LBB52_2:
5377; RV64ZVE32F-NEXT:    ld a4, 8(a2)
5378; RV64ZVE32F-NEXT:    j .LBB52_5
5379; RV64ZVE32F-NEXT:  .LBB52_3:
5380; RV64ZVE32F-NEXT:    ld a3, 0(a2)
5381; RV64ZVE32F-NEXT:    andi a4, a5, 2
5382; RV64ZVE32F-NEXT:    beqz a4, .LBB52_2
5383; RV64ZVE32F-NEXT:  .LBB52_4: # %cond.load1
5384; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
5385; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
5386; RV64ZVE32F-NEXT:    vmv.x.s a4, v9
5387; RV64ZVE32F-NEXT:    slli a4, a4, 3
5388; RV64ZVE32F-NEXT:    add a4, a1, a4
5389; RV64ZVE32F-NEXT:    ld a4, 0(a4)
5390; RV64ZVE32F-NEXT:  .LBB52_5: # %else2
5391; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
5392; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
5393; RV64ZVE32F-NEXT:    andi a6, a5, 4
5394; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
5395; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
5396; RV64ZVE32F-NEXT:    beqz a6, .LBB52_10
5397; RV64ZVE32F-NEXT:  # %bb.6: # %cond.load4
5398; RV64ZVE32F-NEXT:    vmv.x.s a6, v8
5399; RV64ZVE32F-NEXT:    slli a6, a6, 3
5400; RV64ZVE32F-NEXT:    add a6, a1, a6
5401; RV64ZVE32F-NEXT:    ld a6, 0(a6)
5402; RV64ZVE32F-NEXT:    andi a7, a5, 8
5403; RV64ZVE32F-NEXT:    bnez a7, .LBB52_11
5404; RV64ZVE32F-NEXT:  .LBB52_7:
5405; RV64ZVE32F-NEXT:    ld a7, 24(a2)
5406; RV64ZVE32F-NEXT:    andi t0, a5, 16
5407; RV64ZVE32F-NEXT:    bnez t0, .LBB52_12
5408; RV64ZVE32F-NEXT:  .LBB52_8:
5409; RV64ZVE32F-NEXT:    ld t0, 32(a2)
5410; RV64ZVE32F-NEXT:    andi t1, a5, 32
5411; RV64ZVE32F-NEXT:    bnez t1, .LBB52_13
5412; RV64ZVE32F-NEXT:  .LBB52_9:
5413; RV64ZVE32F-NEXT:    ld t1, 40(a2)
5414; RV64ZVE32F-NEXT:    j .LBB52_14
5415; RV64ZVE32F-NEXT:  .LBB52_10:
5416; RV64ZVE32F-NEXT:    ld a6, 16(a2)
5417; RV64ZVE32F-NEXT:    andi a7, a5, 8
5418; RV64ZVE32F-NEXT:    beqz a7, .LBB52_7
5419; RV64ZVE32F-NEXT:  .LBB52_11: # %cond.load7
5420; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
5421; RV64ZVE32F-NEXT:    vmv.x.s a7, v8
5422; RV64ZVE32F-NEXT:    slli a7, a7, 3
5423; RV64ZVE32F-NEXT:    add a7, a1, a7
5424; RV64ZVE32F-NEXT:    ld a7, 0(a7)
5425; RV64ZVE32F-NEXT:    andi t0, a5, 16
5426; RV64ZVE32F-NEXT:    beqz t0, .LBB52_8
5427; RV64ZVE32F-NEXT:  .LBB52_12: # %cond.load10
5428; RV64ZVE32F-NEXT:    vmv.x.s t0, v9
5429; RV64ZVE32F-NEXT:    slli t0, t0, 3
5430; RV64ZVE32F-NEXT:    add t0, a1, t0
5431; RV64ZVE32F-NEXT:    ld t0, 0(t0)
5432; RV64ZVE32F-NEXT:    andi t1, a5, 32
5433; RV64ZVE32F-NEXT:    beqz t1, .LBB52_9
5434; RV64ZVE32F-NEXT:  .LBB52_13: # %cond.load13
5435; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
5436; RV64ZVE32F-NEXT:    vmv.x.s t1, v8
5437; RV64ZVE32F-NEXT:    slli t1, t1, 3
5438; RV64ZVE32F-NEXT:    add t1, a1, t1
5439; RV64ZVE32F-NEXT:    ld t1, 0(t1)
5440; RV64ZVE32F-NEXT:  .LBB52_14: # %else14
5441; RV64ZVE32F-NEXT:    andi t2, a5, 64
5442; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
5443; RV64ZVE32F-NEXT:    beqz t2, .LBB52_17
5444; RV64ZVE32F-NEXT:  # %bb.15: # %cond.load16
5445; RV64ZVE32F-NEXT:    vmv.x.s t2, v8
5446; RV64ZVE32F-NEXT:    slli t2, t2, 3
5447; RV64ZVE32F-NEXT:    add t2, a1, t2
5448; RV64ZVE32F-NEXT:    ld t2, 0(t2)
5449; RV64ZVE32F-NEXT:    andi a5, a5, -128
5450; RV64ZVE32F-NEXT:    bnez a5, .LBB52_18
5451; RV64ZVE32F-NEXT:  .LBB52_16:
5452; RV64ZVE32F-NEXT:    ld a1, 56(a2)
5453; RV64ZVE32F-NEXT:    j .LBB52_19
5454; RV64ZVE32F-NEXT:  .LBB52_17:
5455; RV64ZVE32F-NEXT:    ld t2, 48(a2)
5456; RV64ZVE32F-NEXT:    andi a5, a5, -128
5457; RV64ZVE32F-NEXT:    beqz a5, .LBB52_16
5458; RV64ZVE32F-NEXT:  .LBB52_18: # %cond.load19
5459; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
5460; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
5461; RV64ZVE32F-NEXT:    slli a2, a2, 3
5462; RV64ZVE32F-NEXT:    add a1, a1, a2
5463; RV64ZVE32F-NEXT:    ld a1, 0(a1)
5464; RV64ZVE32F-NEXT:  .LBB52_19: # %else20
5465; RV64ZVE32F-NEXT:    sd a3, 0(a0)
5466; RV64ZVE32F-NEXT:    sd a4, 8(a0)
5467; RV64ZVE32F-NEXT:    sd a6, 16(a0)
5468; RV64ZVE32F-NEXT:    sd a7, 24(a0)
5469; RV64ZVE32F-NEXT:    sd t0, 32(a0)
5470; RV64ZVE32F-NEXT:    sd t1, 40(a0)
5471; RV64ZVE32F-NEXT:    sd t2, 48(a0)
5472; RV64ZVE32F-NEXT:    sd a1, 56(a0)
5473; RV64ZVE32F-NEXT:    ret
5474  %eidxs = sext <8 x i16> %idxs to <8 x i64>
5475  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5476  %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
5477  ret <8 x i64> %v
5478}
5479
5480define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
5481; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5482; RV32V:       # %bb.0:
5483; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
5484; RV32V-NEXT:    vzext.vf2 v10, v8
5485; RV32V-NEXT:    vsll.vi v8, v10, 3
5486; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
5487; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
5488; RV32V-NEXT:    vmv.v.v v8, v12
5489; RV32V-NEXT:    ret
5490;
5491; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5492; RV64V:       # %bb.0:
5493; RV64V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
5494; RV64V-NEXT:    vzext.vf2 v10, v8
5495; RV64V-NEXT:    vsll.vi v8, v10, 3
5496; RV64V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
5497; RV64V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
5498; RV64V-NEXT:    vmv.v.v v8, v12
5499; RV64V-NEXT:    ret
5500;
5501; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5502; RV32ZVE32F:       # %bb.0:
5503; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
5504; RV32ZVE32F-NEXT:    vzext.vf2 v10, v8
5505; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
5506; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
5507; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
5508; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
5509; RV32ZVE32F-NEXT:    andi a3, t0, 1
5510; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
5511; RV32ZVE32F-NEXT:    beqz a3, .LBB53_7
5512; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
5513; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
5514; RV32ZVE32F-NEXT:    lw a1, 0(a3)
5515; RV32ZVE32F-NEXT:    lw a3, 4(a3)
5516; RV32ZVE32F-NEXT:    andi a4, t0, 2
5517; RV32ZVE32F-NEXT:    bnez a4, .LBB53_8
5518; RV32ZVE32F-NEXT:  .LBB53_2:
5519; RV32ZVE32F-NEXT:    lw a4, 8(a2)
5520; RV32ZVE32F-NEXT:    lw a5, 12(a2)
5521; RV32ZVE32F-NEXT:    andi a6, t0, 4
5522; RV32ZVE32F-NEXT:    bnez a6, .LBB53_9
5523; RV32ZVE32F-NEXT:  .LBB53_3:
5524; RV32ZVE32F-NEXT:    lw a6, 16(a2)
5525; RV32ZVE32F-NEXT:    lw a7, 20(a2)
5526; RV32ZVE32F-NEXT:    andi t1, t0, 8
5527; RV32ZVE32F-NEXT:    bnez t1, .LBB53_10
5528; RV32ZVE32F-NEXT:  .LBB53_4:
5529; RV32ZVE32F-NEXT:    lw t1, 24(a2)
5530; RV32ZVE32F-NEXT:    lw t2, 28(a2)
5531; RV32ZVE32F-NEXT:    andi t3, t0, 16
5532; RV32ZVE32F-NEXT:    bnez t3, .LBB53_11
5533; RV32ZVE32F-NEXT:  .LBB53_5:
5534; RV32ZVE32F-NEXT:    lw t3, 32(a2)
5535; RV32ZVE32F-NEXT:    lw t4, 36(a2)
5536; RV32ZVE32F-NEXT:    andi t5, t0, 32
5537; RV32ZVE32F-NEXT:    bnez t5, .LBB53_12
5538; RV32ZVE32F-NEXT:  .LBB53_6:
5539; RV32ZVE32F-NEXT:    lw t5, 40(a2)
5540; RV32ZVE32F-NEXT:    lw t6, 44(a2)
5541; RV32ZVE32F-NEXT:    j .LBB53_13
5542; RV32ZVE32F-NEXT:  .LBB53_7:
5543; RV32ZVE32F-NEXT:    lw a1, 0(a2)
5544; RV32ZVE32F-NEXT:    lw a3, 4(a2)
5545; RV32ZVE32F-NEXT:    andi a4, t0, 2
5546; RV32ZVE32F-NEXT:    beqz a4, .LBB53_2
5547; RV32ZVE32F-NEXT:  .LBB53_8: # %cond.load1
5548; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
5549; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
5550; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
5551; RV32ZVE32F-NEXT:    lw a4, 0(a5)
5552; RV32ZVE32F-NEXT:    lw a5, 4(a5)
5553; RV32ZVE32F-NEXT:    andi a6, t0, 4
5554; RV32ZVE32F-NEXT:    beqz a6, .LBB53_3
5555; RV32ZVE32F-NEXT:  .LBB53_9: # %cond.load4
5556; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
5557; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
5558; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
5559; RV32ZVE32F-NEXT:    lw a6, 0(a7)
5560; RV32ZVE32F-NEXT:    lw a7, 4(a7)
5561; RV32ZVE32F-NEXT:    andi t1, t0, 8
5562; RV32ZVE32F-NEXT:    beqz t1, .LBB53_4
5563; RV32ZVE32F-NEXT:  .LBB53_10: # %cond.load7
5564; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
5565; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
5566; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
5567; RV32ZVE32F-NEXT:    lw t1, 0(t2)
5568; RV32ZVE32F-NEXT:    lw t2, 4(t2)
5569; RV32ZVE32F-NEXT:    andi t3, t0, 16
5570; RV32ZVE32F-NEXT:    beqz t3, .LBB53_5
5571; RV32ZVE32F-NEXT:  .LBB53_11: # %cond.load10
5572; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5573; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
5574; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
5575; RV32ZVE32F-NEXT:    lw t3, 0(t4)
5576; RV32ZVE32F-NEXT:    lw t4, 4(t4)
5577; RV32ZVE32F-NEXT:    andi t5, t0, 32
5578; RV32ZVE32F-NEXT:    beqz t5, .LBB53_6
5579; RV32ZVE32F-NEXT:  .LBB53_12: # %cond.load13
5580; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5581; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
5582; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
5583; RV32ZVE32F-NEXT:    lw t5, 0(t6)
5584; RV32ZVE32F-NEXT:    lw t6, 4(t6)
5585; RV32ZVE32F-NEXT:  .LBB53_13: # %else14
5586; RV32ZVE32F-NEXT:    addi sp, sp, -16
5587; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
5588; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
5589; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
5590; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
5591; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
5592; RV32ZVE32F-NEXT:    andi s0, t0, 64
5593; RV32ZVE32F-NEXT:    beqz s0, .LBB53_16
5594; RV32ZVE32F-NEXT:  # %bb.14: # %cond.load16
5595; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5596; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
5597; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
5598; RV32ZVE32F-NEXT:    lw s0, 0(s1)
5599; RV32ZVE32F-NEXT:    lw s1, 4(s1)
5600; RV32ZVE32F-NEXT:    andi t0, t0, -128
5601; RV32ZVE32F-NEXT:    bnez t0, .LBB53_17
5602; RV32ZVE32F-NEXT:  .LBB53_15:
5603; RV32ZVE32F-NEXT:    lw t0, 56(a2)
5604; RV32ZVE32F-NEXT:    lw a2, 60(a2)
5605; RV32ZVE32F-NEXT:    j .LBB53_18
5606; RV32ZVE32F-NEXT:  .LBB53_16:
5607; RV32ZVE32F-NEXT:    lw s0, 48(a2)
5608; RV32ZVE32F-NEXT:    lw s1, 52(a2)
5609; RV32ZVE32F-NEXT:    andi t0, t0, -128
5610; RV32ZVE32F-NEXT:    beqz t0, .LBB53_15
5611; RV32ZVE32F-NEXT:  .LBB53_17: # %cond.load19
5612; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5613; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
5614; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
5615; RV32ZVE32F-NEXT:    lw t0, 0(a2)
5616; RV32ZVE32F-NEXT:    lw a2, 4(a2)
5617; RV32ZVE32F-NEXT:  .LBB53_18: # %else20
5618; RV32ZVE32F-NEXT:    sw a1, 0(a0)
5619; RV32ZVE32F-NEXT:    sw a3, 4(a0)
5620; RV32ZVE32F-NEXT:    sw a4, 8(a0)
5621; RV32ZVE32F-NEXT:    sw a5, 12(a0)
5622; RV32ZVE32F-NEXT:    sw a6, 16(a0)
5623; RV32ZVE32F-NEXT:    sw a7, 20(a0)
5624; RV32ZVE32F-NEXT:    sw t1, 24(a0)
5625; RV32ZVE32F-NEXT:    sw t2, 28(a0)
5626; RV32ZVE32F-NEXT:    sw t3, 32(a0)
5627; RV32ZVE32F-NEXT:    sw t4, 36(a0)
5628; RV32ZVE32F-NEXT:    sw t5, 40(a0)
5629; RV32ZVE32F-NEXT:    sw t6, 44(a0)
5630; RV32ZVE32F-NEXT:    sw s0, 48(a0)
5631; RV32ZVE32F-NEXT:    sw s1, 52(a0)
5632; RV32ZVE32F-NEXT:    sw t0, 56(a0)
5633; RV32ZVE32F-NEXT:    sw a2, 60(a0)
5634; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
5635; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
5636; RV32ZVE32F-NEXT:    .cfi_restore s0
5637; RV32ZVE32F-NEXT:    .cfi_restore s1
5638; RV32ZVE32F-NEXT:    addi sp, sp, 16
5639; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
5640; RV32ZVE32F-NEXT:    ret
5641;
5642; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5643; RV64ZVE32F:       # %bb.0:
5644; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
5645; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
5646; RV64ZVE32F-NEXT:    andi a3, a5, 1
5647; RV64ZVE32F-NEXT:    beqz a3, .LBB53_3
5648; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
5649; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
5650; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
5651; RV64ZVE32F-NEXT:    slli a3, a3, 48
5652; RV64ZVE32F-NEXT:    srli a3, a3, 45
5653; RV64ZVE32F-NEXT:    add a3, a1, a3
5654; RV64ZVE32F-NEXT:    ld a3, 0(a3)
5655; RV64ZVE32F-NEXT:    andi a4, a5, 2
5656; RV64ZVE32F-NEXT:    bnez a4, .LBB53_4
5657; RV64ZVE32F-NEXT:  .LBB53_2:
5658; RV64ZVE32F-NEXT:    ld a4, 8(a2)
5659; RV64ZVE32F-NEXT:    j .LBB53_5
5660; RV64ZVE32F-NEXT:  .LBB53_3:
5661; RV64ZVE32F-NEXT:    ld a3, 0(a2)
5662; RV64ZVE32F-NEXT:    andi a4, a5, 2
5663; RV64ZVE32F-NEXT:    beqz a4, .LBB53_2
5664; RV64ZVE32F-NEXT:  .LBB53_4: # %cond.load1
5665; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
5666; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
5667; RV64ZVE32F-NEXT:    vmv.x.s a4, v9
5668; RV64ZVE32F-NEXT:    slli a4, a4, 48
5669; RV64ZVE32F-NEXT:    srli a4, a4, 45
5670; RV64ZVE32F-NEXT:    add a4, a1, a4
5671; RV64ZVE32F-NEXT:    ld a4, 0(a4)
5672; RV64ZVE32F-NEXT:  .LBB53_5: # %else2
5673; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
5674; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
5675; RV64ZVE32F-NEXT:    andi a6, a5, 4
5676; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
5677; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
5678; RV64ZVE32F-NEXT:    beqz a6, .LBB53_10
5679; RV64ZVE32F-NEXT:  # %bb.6: # %cond.load4
5680; RV64ZVE32F-NEXT:    vmv.x.s a6, v8
5681; RV64ZVE32F-NEXT:    slli a6, a6, 48
5682; RV64ZVE32F-NEXT:    srli a6, a6, 45
5683; RV64ZVE32F-NEXT:    add a6, a1, a6
5684; RV64ZVE32F-NEXT:    ld a6, 0(a6)
5685; RV64ZVE32F-NEXT:    andi a7, a5, 8
5686; RV64ZVE32F-NEXT:    bnez a7, .LBB53_11
5687; RV64ZVE32F-NEXT:  .LBB53_7:
5688; RV64ZVE32F-NEXT:    ld a7, 24(a2)
5689; RV64ZVE32F-NEXT:    andi t0, a5, 16
5690; RV64ZVE32F-NEXT:    bnez t0, .LBB53_12
5691; RV64ZVE32F-NEXT:  .LBB53_8:
5692; RV64ZVE32F-NEXT:    ld t0, 32(a2)
5693; RV64ZVE32F-NEXT:    andi t1, a5, 32
5694; RV64ZVE32F-NEXT:    bnez t1, .LBB53_13
5695; RV64ZVE32F-NEXT:  .LBB53_9:
5696; RV64ZVE32F-NEXT:    ld t1, 40(a2)
5697; RV64ZVE32F-NEXT:    j .LBB53_14
5698; RV64ZVE32F-NEXT:  .LBB53_10:
5699; RV64ZVE32F-NEXT:    ld a6, 16(a2)
5700; RV64ZVE32F-NEXT:    andi a7, a5, 8
5701; RV64ZVE32F-NEXT:    beqz a7, .LBB53_7
5702; RV64ZVE32F-NEXT:  .LBB53_11: # %cond.load7
5703; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
5704; RV64ZVE32F-NEXT:    vmv.x.s a7, v8
5705; RV64ZVE32F-NEXT:    slli a7, a7, 48
5706; RV64ZVE32F-NEXT:    srli a7, a7, 45
5707; RV64ZVE32F-NEXT:    add a7, a1, a7
5708; RV64ZVE32F-NEXT:    ld a7, 0(a7)
5709; RV64ZVE32F-NEXT:    andi t0, a5, 16
5710; RV64ZVE32F-NEXT:    beqz t0, .LBB53_8
5711; RV64ZVE32F-NEXT:  .LBB53_12: # %cond.load10
5712; RV64ZVE32F-NEXT:    vmv.x.s t0, v9
5713; RV64ZVE32F-NEXT:    slli t0, t0, 48
5714; RV64ZVE32F-NEXT:    srli t0, t0, 45
5715; RV64ZVE32F-NEXT:    add t0, a1, t0
5716; RV64ZVE32F-NEXT:    ld t0, 0(t0)
5717; RV64ZVE32F-NEXT:    andi t1, a5, 32
5718; RV64ZVE32F-NEXT:    beqz t1, .LBB53_9
5719; RV64ZVE32F-NEXT:  .LBB53_13: # %cond.load13
5720; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
5721; RV64ZVE32F-NEXT:    vmv.x.s t1, v8
5722; RV64ZVE32F-NEXT:    slli t1, t1, 48
5723; RV64ZVE32F-NEXT:    srli t1, t1, 45
5724; RV64ZVE32F-NEXT:    add t1, a1, t1
5725; RV64ZVE32F-NEXT:    ld t1, 0(t1)
5726; RV64ZVE32F-NEXT:  .LBB53_14: # %else14
5727; RV64ZVE32F-NEXT:    andi t2, a5, 64
5728; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
5729; RV64ZVE32F-NEXT:    beqz t2, .LBB53_17
5730; RV64ZVE32F-NEXT:  # %bb.15: # %cond.load16
5731; RV64ZVE32F-NEXT:    vmv.x.s t2, v8
5732; RV64ZVE32F-NEXT:    slli t2, t2, 48
5733; RV64ZVE32F-NEXT:    srli t2, t2, 45
5734; RV64ZVE32F-NEXT:    add t2, a1, t2
5735; RV64ZVE32F-NEXT:    ld t2, 0(t2)
5736; RV64ZVE32F-NEXT:    andi a5, a5, -128
5737; RV64ZVE32F-NEXT:    bnez a5, .LBB53_18
5738; RV64ZVE32F-NEXT:  .LBB53_16:
5739; RV64ZVE32F-NEXT:    ld a1, 56(a2)
5740; RV64ZVE32F-NEXT:    j .LBB53_19
5741; RV64ZVE32F-NEXT:  .LBB53_17:
5742; RV64ZVE32F-NEXT:    ld t2, 48(a2)
5743; RV64ZVE32F-NEXT:    andi a5, a5, -128
5744; RV64ZVE32F-NEXT:    beqz a5, .LBB53_16
5745; RV64ZVE32F-NEXT:  .LBB53_18: # %cond.load19
5746; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
5747; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
5748; RV64ZVE32F-NEXT:    slli a2, a2, 48
5749; RV64ZVE32F-NEXT:    srli a2, a2, 45
5750; RV64ZVE32F-NEXT:    add a1, a1, a2
5751; RV64ZVE32F-NEXT:    ld a1, 0(a1)
5752; RV64ZVE32F-NEXT:  .LBB53_19: # %else20
5753; RV64ZVE32F-NEXT:    sd a3, 0(a0)
5754; RV64ZVE32F-NEXT:    sd a4, 8(a0)
5755; RV64ZVE32F-NEXT:    sd a6, 16(a0)
5756; RV64ZVE32F-NEXT:    sd a7, 24(a0)
5757; RV64ZVE32F-NEXT:    sd t0, 32(a0)
5758; RV64ZVE32F-NEXT:    sd t1, 40(a0)
5759; RV64ZVE32F-NEXT:    sd t2, 48(a0)
5760; RV64ZVE32F-NEXT:    sd a1, 56(a0)
5761; RV64ZVE32F-NEXT:    ret
5762  %eidxs = zext <8 x i16> %idxs to <8 x i64>
5763  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5764  %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
5765  ret <8 x i64> %v
5766}
5767
5768define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
5769; RV32V-LABEL: mgather_baseidx_v8i32_v8i64:
5770; RV32V:       # %bb.0:
5771; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
5772; RV32V-NEXT:    vsll.vi v8, v8, 3
5773; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
5774; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
5775; RV32V-NEXT:    vmv.v.v v8, v12
5776; RV32V-NEXT:    ret
5777;
5778; RV64V-LABEL: mgather_baseidx_v8i32_v8i64:
5779; RV64V:       # %bb.0:
5780; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
5781; RV64V-NEXT:    vsext.vf2 v16, v8
5782; RV64V-NEXT:    vsll.vi v8, v16, 3
5783; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
5784; RV64V-NEXT:    vmv.v.v v8, v12
5785; RV64V-NEXT:    ret
5786;
5787; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64:
5788; RV32ZVE32F:       # %bb.0:
5789; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
5790; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
5791; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
5792; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
5793; RV32ZVE32F-NEXT:    andi a3, t0, 1
5794; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
5795; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
5796; RV32ZVE32F-NEXT:    beqz a3, .LBB54_7
5797; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
5798; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
5799; RV32ZVE32F-NEXT:    lw a1, 0(a3)
5800; RV32ZVE32F-NEXT:    lw a3, 4(a3)
5801; RV32ZVE32F-NEXT:    andi a4, t0, 2
5802; RV32ZVE32F-NEXT:    bnez a4, .LBB54_8
5803; RV32ZVE32F-NEXT:  .LBB54_2:
5804; RV32ZVE32F-NEXT:    lw a4, 8(a2)
5805; RV32ZVE32F-NEXT:    lw a5, 12(a2)
5806; RV32ZVE32F-NEXT:    andi a6, t0, 4
5807; RV32ZVE32F-NEXT:    bnez a6, .LBB54_9
5808; RV32ZVE32F-NEXT:  .LBB54_3:
5809; RV32ZVE32F-NEXT:    lw a6, 16(a2)
5810; RV32ZVE32F-NEXT:    lw a7, 20(a2)
5811; RV32ZVE32F-NEXT:    andi t1, t0, 8
5812; RV32ZVE32F-NEXT:    bnez t1, .LBB54_10
5813; RV32ZVE32F-NEXT:  .LBB54_4:
5814; RV32ZVE32F-NEXT:    lw t1, 24(a2)
5815; RV32ZVE32F-NEXT:    lw t2, 28(a2)
5816; RV32ZVE32F-NEXT:    andi t3, t0, 16
5817; RV32ZVE32F-NEXT:    bnez t3, .LBB54_11
5818; RV32ZVE32F-NEXT:  .LBB54_5:
5819; RV32ZVE32F-NEXT:    lw t3, 32(a2)
5820; RV32ZVE32F-NEXT:    lw t4, 36(a2)
5821; RV32ZVE32F-NEXT:    andi t5, t0, 32
5822; RV32ZVE32F-NEXT:    bnez t5, .LBB54_12
5823; RV32ZVE32F-NEXT:  .LBB54_6:
5824; RV32ZVE32F-NEXT:    lw t5, 40(a2)
5825; RV32ZVE32F-NEXT:    lw t6, 44(a2)
5826; RV32ZVE32F-NEXT:    j .LBB54_13
5827; RV32ZVE32F-NEXT:  .LBB54_7:
5828; RV32ZVE32F-NEXT:    lw a1, 0(a2)
5829; RV32ZVE32F-NEXT:    lw a3, 4(a2)
5830; RV32ZVE32F-NEXT:    andi a4, t0, 2
5831; RV32ZVE32F-NEXT:    beqz a4, .LBB54_2
5832; RV32ZVE32F-NEXT:  .LBB54_8: # %cond.load1
5833; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
5834; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
5835; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
5836; RV32ZVE32F-NEXT:    lw a4, 0(a5)
5837; RV32ZVE32F-NEXT:    lw a5, 4(a5)
5838; RV32ZVE32F-NEXT:    andi a6, t0, 4
5839; RV32ZVE32F-NEXT:    beqz a6, .LBB54_3
5840; RV32ZVE32F-NEXT:  .LBB54_9: # %cond.load4
5841; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
5842; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
5843; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
5844; RV32ZVE32F-NEXT:    lw a6, 0(a7)
5845; RV32ZVE32F-NEXT:    lw a7, 4(a7)
5846; RV32ZVE32F-NEXT:    andi t1, t0, 8
5847; RV32ZVE32F-NEXT:    beqz t1, .LBB54_4
5848; RV32ZVE32F-NEXT:  .LBB54_10: # %cond.load7
5849; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
5850; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
5851; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
5852; RV32ZVE32F-NEXT:    lw t1, 0(t2)
5853; RV32ZVE32F-NEXT:    lw t2, 4(t2)
5854; RV32ZVE32F-NEXT:    andi t3, t0, 16
5855; RV32ZVE32F-NEXT:    beqz t3, .LBB54_5
5856; RV32ZVE32F-NEXT:  .LBB54_11: # %cond.load10
5857; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5858; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
5859; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
5860; RV32ZVE32F-NEXT:    lw t3, 0(t4)
5861; RV32ZVE32F-NEXT:    lw t4, 4(t4)
5862; RV32ZVE32F-NEXT:    andi t5, t0, 32
5863; RV32ZVE32F-NEXT:    beqz t5, .LBB54_6
5864; RV32ZVE32F-NEXT:  .LBB54_12: # %cond.load13
5865; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5866; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
5867; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
5868; RV32ZVE32F-NEXT:    lw t5, 0(t6)
5869; RV32ZVE32F-NEXT:    lw t6, 4(t6)
5870; RV32ZVE32F-NEXT:  .LBB54_13: # %else14
5871; RV32ZVE32F-NEXT:    addi sp, sp, -16
5872; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
5873; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
5874; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
5875; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
5876; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
5877; RV32ZVE32F-NEXT:    andi s0, t0, 64
5878; RV32ZVE32F-NEXT:    beqz s0, .LBB54_16
5879; RV32ZVE32F-NEXT:  # %bb.14: # %cond.load16
5880; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5881; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
5882; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
5883; RV32ZVE32F-NEXT:    lw s0, 0(s1)
5884; RV32ZVE32F-NEXT:    lw s1, 4(s1)
5885; RV32ZVE32F-NEXT:    andi t0, t0, -128
5886; RV32ZVE32F-NEXT:    bnez t0, .LBB54_17
5887; RV32ZVE32F-NEXT:  .LBB54_15:
5888; RV32ZVE32F-NEXT:    lw t0, 56(a2)
5889; RV32ZVE32F-NEXT:    lw a2, 60(a2)
5890; RV32ZVE32F-NEXT:    j .LBB54_18
5891; RV32ZVE32F-NEXT:  .LBB54_16:
5892; RV32ZVE32F-NEXT:    lw s0, 48(a2)
5893; RV32ZVE32F-NEXT:    lw s1, 52(a2)
5894; RV32ZVE32F-NEXT:    andi t0, t0, -128
5895; RV32ZVE32F-NEXT:    beqz t0, .LBB54_15
5896; RV32ZVE32F-NEXT:  .LBB54_17: # %cond.load19
5897; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
5898; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
5899; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
5900; RV32ZVE32F-NEXT:    lw t0, 0(a2)
5901; RV32ZVE32F-NEXT:    lw a2, 4(a2)
5902; RV32ZVE32F-NEXT:  .LBB54_18: # %else20
5903; RV32ZVE32F-NEXT:    sw a1, 0(a0)
5904; RV32ZVE32F-NEXT:    sw a3, 4(a0)
5905; RV32ZVE32F-NEXT:    sw a4, 8(a0)
5906; RV32ZVE32F-NEXT:    sw a5, 12(a0)
5907; RV32ZVE32F-NEXT:    sw a6, 16(a0)
5908; RV32ZVE32F-NEXT:    sw a7, 20(a0)
5909; RV32ZVE32F-NEXT:    sw t1, 24(a0)
5910; RV32ZVE32F-NEXT:    sw t2, 28(a0)
5911; RV32ZVE32F-NEXT:    sw t3, 32(a0)
5912; RV32ZVE32F-NEXT:    sw t4, 36(a0)
5913; RV32ZVE32F-NEXT:    sw t5, 40(a0)
5914; RV32ZVE32F-NEXT:    sw t6, 44(a0)
5915; RV32ZVE32F-NEXT:    sw s0, 48(a0)
5916; RV32ZVE32F-NEXT:    sw s1, 52(a0)
5917; RV32ZVE32F-NEXT:    sw t0, 56(a0)
5918; RV32ZVE32F-NEXT:    sw a2, 60(a0)
5919; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
5920; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
5921; RV32ZVE32F-NEXT:    .cfi_restore s0
5922; RV32ZVE32F-NEXT:    .cfi_restore s1
5923; RV32ZVE32F-NEXT:    addi sp, sp, 16
5924; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
5925; RV32ZVE32F-NEXT:    ret
5926;
5927; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64:
5928; RV64ZVE32F:       # %bb.0:
5929; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
5930; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
5931; RV64ZVE32F-NEXT:    andi a3, a5, 1
5932; RV64ZVE32F-NEXT:    beqz a3, .LBB54_3
5933; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
5934; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
5935; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
5936; RV64ZVE32F-NEXT:    slli a3, a3, 3
5937; RV64ZVE32F-NEXT:    add a3, a1, a3
5938; RV64ZVE32F-NEXT:    ld a3, 0(a3)
5939; RV64ZVE32F-NEXT:    andi a4, a5, 2
5940; RV64ZVE32F-NEXT:    bnez a4, .LBB54_4
5941; RV64ZVE32F-NEXT:  .LBB54_2:
5942; RV64ZVE32F-NEXT:    ld a4, 8(a2)
5943; RV64ZVE32F-NEXT:    j .LBB54_5
5944; RV64ZVE32F-NEXT:  .LBB54_3:
5945; RV64ZVE32F-NEXT:    ld a3, 0(a2)
5946; RV64ZVE32F-NEXT:    andi a4, a5, 2
5947; RV64ZVE32F-NEXT:    beqz a4, .LBB54_2
5948; RV64ZVE32F-NEXT:  .LBB54_4: # %cond.load1
5949; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
5950; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
5951; RV64ZVE32F-NEXT:    vmv.x.s a4, v10
5952; RV64ZVE32F-NEXT:    slli a4, a4, 3
5953; RV64ZVE32F-NEXT:    add a4, a1, a4
5954; RV64ZVE32F-NEXT:    ld a4, 0(a4)
5955; RV64ZVE32F-NEXT:  .LBB54_5: # %else2
5956; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
5957; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
5958; RV64ZVE32F-NEXT:    andi a6, a5, 4
5959; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
5960; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
5961; RV64ZVE32F-NEXT:    beqz a6, .LBB54_10
5962; RV64ZVE32F-NEXT:  # %bb.6: # %cond.load4
5963; RV64ZVE32F-NEXT:    vmv.x.s a6, v8
5964; RV64ZVE32F-NEXT:    slli a6, a6, 3
5965; RV64ZVE32F-NEXT:    add a6, a1, a6
5966; RV64ZVE32F-NEXT:    ld a6, 0(a6)
5967; RV64ZVE32F-NEXT:    andi a7, a5, 8
5968; RV64ZVE32F-NEXT:    bnez a7, .LBB54_11
5969; RV64ZVE32F-NEXT:  .LBB54_7:
5970; RV64ZVE32F-NEXT:    ld a7, 24(a2)
5971; RV64ZVE32F-NEXT:    andi t0, a5, 16
5972; RV64ZVE32F-NEXT:    bnez t0, .LBB54_12
5973; RV64ZVE32F-NEXT:  .LBB54_8:
5974; RV64ZVE32F-NEXT:    ld t0, 32(a2)
5975; RV64ZVE32F-NEXT:    andi t1, a5, 32
5976; RV64ZVE32F-NEXT:    bnez t1, .LBB54_13
5977; RV64ZVE32F-NEXT:  .LBB54_9:
5978; RV64ZVE32F-NEXT:    ld t1, 40(a2)
5979; RV64ZVE32F-NEXT:    j .LBB54_14
5980; RV64ZVE32F-NEXT:  .LBB54_10:
5981; RV64ZVE32F-NEXT:    ld a6, 16(a2)
5982; RV64ZVE32F-NEXT:    andi a7, a5, 8
5983; RV64ZVE32F-NEXT:    beqz a7, .LBB54_7
5984; RV64ZVE32F-NEXT:  .LBB54_11: # %cond.load7
5985; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
5986; RV64ZVE32F-NEXT:    vmv.x.s a7, v8
5987; RV64ZVE32F-NEXT:    slli a7, a7, 3
5988; RV64ZVE32F-NEXT:    add a7, a1, a7
5989; RV64ZVE32F-NEXT:    ld a7, 0(a7)
5990; RV64ZVE32F-NEXT:    andi t0, a5, 16
5991; RV64ZVE32F-NEXT:    beqz t0, .LBB54_8
5992; RV64ZVE32F-NEXT:  .LBB54_12: # %cond.load10
5993; RV64ZVE32F-NEXT:    vmv.x.s t0, v10
5994; RV64ZVE32F-NEXT:    slli t0, t0, 3
5995; RV64ZVE32F-NEXT:    add t0, a1, t0
5996; RV64ZVE32F-NEXT:    ld t0, 0(t0)
5997; RV64ZVE32F-NEXT:    andi t1, a5, 32
5998; RV64ZVE32F-NEXT:    beqz t1, .LBB54_9
5999; RV64ZVE32F-NEXT:  .LBB54_13: # %cond.load13
6000; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
6001; RV64ZVE32F-NEXT:    vmv.x.s t1, v8
6002; RV64ZVE32F-NEXT:    slli t1, t1, 3
6003; RV64ZVE32F-NEXT:    add t1, a1, t1
6004; RV64ZVE32F-NEXT:    ld t1, 0(t1)
6005; RV64ZVE32F-NEXT:  .LBB54_14: # %else14
6006; RV64ZVE32F-NEXT:    andi t2, a5, 64
6007; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
6008; RV64ZVE32F-NEXT:    beqz t2, .LBB54_17
6009; RV64ZVE32F-NEXT:  # %bb.15: # %cond.load16
6010; RV64ZVE32F-NEXT:    vmv.x.s t2, v8
6011; RV64ZVE32F-NEXT:    slli t2, t2, 3
6012; RV64ZVE32F-NEXT:    add t2, a1, t2
6013; RV64ZVE32F-NEXT:    ld t2, 0(t2)
6014; RV64ZVE32F-NEXT:    andi a5, a5, -128
6015; RV64ZVE32F-NEXT:    bnez a5, .LBB54_18
6016; RV64ZVE32F-NEXT:  .LBB54_16:
6017; RV64ZVE32F-NEXT:    ld a1, 56(a2)
6018; RV64ZVE32F-NEXT:    j .LBB54_19
6019; RV64ZVE32F-NEXT:  .LBB54_17:
6020; RV64ZVE32F-NEXT:    ld t2, 48(a2)
6021; RV64ZVE32F-NEXT:    andi a5, a5, -128
6022; RV64ZVE32F-NEXT:    beqz a5, .LBB54_16
6023; RV64ZVE32F-NEXT:  .LBB54_18: # %cond.load19
6024; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
6025; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
6026; RV64ZVE32F-NEXT:    slli a2, a2, 3
6027; RV64ZVE32F-NEXT:    add a1, a1, a2
6028; RV64ZVE32F-NEXT:    ld a1, 0(a1)
6029; RV64ZVE32F-NEXT:  .LBB54_19: # %else20
6030; RV64ZVE32F-NEXT:    sd a3, 0(a0)
6031; RV64ZVE32F-NEXT:    sd a4, 8(a0)
6032; RV64ZVE32F-NEXT:    sd a6, 16(a0)
6033; RV64ZVE32F-NEXT:    sd a7, 24(a0)
6034; RV64ZVE32F-NEXT:    sd t0, 32(a0)
6035; RV64ZVE32F-NEXT:    sd t1, 40(a0)
6036; RV64ZVE32F-NEXT:    sd t2, 48(a0)
6037; RV64ZVE32F-NEXT:    sd a1, 56(a0)
6038; RV64ZVE32F-NEXT:    ret
6039  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
6040  %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
6041  ret <8 x i64> %v
6042}
6043
6044define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
6045; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6046; RV32V:       # %bb.0:
6047; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
6048; RV32V-NEXT:    vsll.vi v8, v8, 3
6049; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
6050; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
6051; RV32V-NEXT:    vmv.v.v v8, v12
6052; RV32V-NEXT:    ret
6053;
6054; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6055; RV64V:       # %bb.0:
6056; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
6057; RV64V-NEXT:    vsext.vf2 v16, v8
6058; RV64V-NEXT:    vsll.vi v8, v16, 3
6059; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
6060; RV64V-NEXT:    vmv.v.v v8, v12
6061; RV64V-NEXT:    ret
6062;
6063; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6064; RV32ZVE32F:       # %bb.0:
6065; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
6066; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
6067; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
6068; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
6069; RV32ZVE32F-NEXT:    andi a3, t0, 1
6070; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
6071; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
6072; RV32ZVE32F-NEXT:    beqz a3, .LBB55_7
6073; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
6074; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
6075; RV32ZVE32F-NEXT:    lw a1, 0(a3)
6076; RV32ZVE32F-NEXT:    lw a3, 4(a3)
6077; RV32ZVE32F-NEXT:    andi a4, t0, 2
6078; RV32ZVE32F-NEXT:    bnez a4, .LBB55_8
6079; RV32ZVE32F-NEXT:  .LBB55_2:
6080; RV32ZVE32F-NEXT:    lw a4, 8(a2)
6081; RV32ZVE32F-NEXT:    lw a5, 12(a2)
6082; RV32ZVE32F-NEXT:    andi a6, t0, 4
6083; RV32ZVE32F-NEXT:    bnez a6, .LBB55_9
6084; RV32ZVE32F-NEXT:  .LBB55_3:
6085; RV32ZVE32F-NEXT:    lw a6, 16(a2)
6086; RV32ZVE32F-NEXT:    lw a7, 20(a2)
6087; RV32ZVE32F-NEXT:    andi t1, t0, 8
6088; RV32ZVE32F-NEXT:    bnez t1, .LBB55_10
6089; RV32ZVE32F-NEXT:  .LBB55_4:
6090; RV32ZVE32F-NEXT:    lw t1, 24(a2)
6091; RV32ZVE32F-NEXT:    lw t2, 28(a2)
6092; RV32ZVE32F-NEXT:    andi t3, t0, 16
6093; RV32ZVE32F-NEXT:    bnez t3, .LBB55_11
6094; RV32ZVE32F-NEXT:  .LBB55_5:
6095; RV32ZVE32F-NEXT:    lw t3, 32(a2)
6096; RV32ZVE32F-NEXT:    lw t4, 36(a2)
6097; RV32ZVE32F-NEXT:    andi t5, t0, 32
6098; RV32ZVE32F-NEXT:    bnez t5, .LBB55_12
6099; RV32ZVE32F-NEXT:  .LBB55_6:
6100; RV32ZVE32F-NEXT:    lw t5, 40(a2)
6101; RV32ZVE32F-NEXT:    lw t6, 44(a2)
6102; RV32ZVE32F-NEXT:    j .LBB55_13
6103; RV32ZVE32F-NEXT:  .LBB55_7:
6104; RV32ZVE32F-NEXT:    lw a1, 0(a2)
6105; RV32ZVE32F-NEXT:    lw a3, 4(a2)
6106; RV32ZVE32F-NEXT:    andi a4, t0, 2
6107; RV32ZVE32F-NEXT:    beqz a4, .LBB55_2
6108; RV32ZVE32F-NEXT:  .LBB55_8: # %cond.load1
6109; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
6110; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
6111; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
6112; RV32ZVE32F-NEXT:    lw a4, 0(a5)
6113; RV32ZVE32F-NEXT:    lw a5, 4(a5)
6114; RV32ZVE32F-NEXT:    andi a6, t0, 4
6115; RV32ZVE32F-NEXT:    beqz a6, .LBB55_3
6116; RV32ZVE32F-NEXT:  .LBB55_9: # %cond.load4
6117; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
6118; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
6119; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
6120; RV32ZVE32F-NEXT:    lw a6, 0(a7)
6121; RV32ZVE32F-NEXT:    lw a7, 4(a7)
6122; RV32ZVE32F-NEXT:    andi t1, t0, 8
6123; RV32ZVE32F-NEXT:    beqz t1, .LBB55_4
6124; RV32ZVE32F-NEXT:  .LBB55_10: # %cond.load7
6125; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
6126; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
6127; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
6128; RV32ZVE32F-NEXT:    lw t1, 0(t2)
6129; RV32ZVE32F-NEXT:    lw t2, 4(t2)
6130; RV32ZVE32F-NEXT:    andi t3, t0, 16
6131; RV32ZVE32F-NEXT:    beqz t3, .LBB55_5
6132; RV32ZVE32F-NEXT:  .LBB55_11: # %cond.load10
6133; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6134; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
6135; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
6136; RV32ZVE32F-NEXT:    lw t3, 0(t4)
6137; RV32ZVE32F-NEXT:    lw t4, 4(t4)
6138; RV32ZVE32F-NEXT:    andi t5, t0, 32
6139; RV32ZVE32F-NEXT:    beqz t5, .LBB55_6
6140; RV32ZVE32F-NEXT:  .LBB55_12: # %cond.load13
6141; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6142; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
6143; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
6144; RV32ZVE32F-NEXT:    lw t5, 0(t6)
6145; RV32ZVE32F-NEXT:    lw t6, 4(t6)
6146; RV32ZVE32F-NEXT:  .LBB55_13: # %else14
6147; RV32ZVE32F-NEXT:    addi sp, sp, -16
6148; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
6149; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
6150; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
6151; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
6152; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
6153; RV32ZVE32F-NEXT:    andi s0, t0, 64
6154; RV32ZVE32F-NEXT:    beqz s0, .LBB55_16
6155; RV32ZVE32F-NEXT:  # %bb.14: # %cond.load16
6156; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6157; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
6158; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
6159; RV32ZVE32F-NEXT:    lw s0, 0(s1)
6160; RV32ZVE32F-NEXT:    lw s1, 4(s1)
6161; RV32ZVE32F-NEXT:    andi t0, t0, -128
6162; RV32ZVE32F-NEXT:    bnez t0, .LBB55_17
6163; RV32ZVE32F-NEXT:  .LBB55_15:
6164; RV32ZVE32F-NEXT:    lw t0, 56(a2)
6165; RV32ZVE32F-NEXT:    lw a2, 60(a2)
6166; RV32ZVE32F-NEXT:    j .LBB55_18
6167; RV32ZVE32F-NEXT:  .LBB55_16:
6168; RV32ZVE32F-NEXT:    lw s0, 48(a2)
6169; RV32ZVE32F-NEXT:    lw s1, 52(a2)
6170; RV32ZVE32F-NEXT:    andi t0, t0, -128
6171; RV32ZVE32F-NEXT:    beqz t0, .LBB55_15
6172; RV32ZVE32F-NEXT:  .LBB55_17: # %cond.load19
6173; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6174; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
6175; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
6176; RV32ZVE32F-NEXT:    lw t0, 0(a2)
6177; RV32ZVE32F-NEXT:    lw a2, 4(a2)
6178; RV32ZVE32F-NEXT:  .LBB55_18: # %else20
6179; RV32ZVE32F-NEXT:    sw a1, 0(a0)
6180; RV32ZVE32F-NEXT:    sw a3, 4(a0)
6181; RV32ZVE32F-NEXT:    sw a4, 8(a0)
6182; RV32ZVE32F-NEXT:    sw a5, 12(a0)
6183; RV32ZVE32F-NEXT:    sw a6, 16(a0)
6184; RV32ZVE32F-NEXT:    sw a7, 20(a0)
6185; RV32ZVE32F-NEXT:    sw t1, 24(a0)
6186; RV32ZVE32F-NEXT:    sw t2, 28(a0)
6187; RV32ZVE32F-NEXT:    sw t3, 32(a0)
6188; RV32ZVE32F-NEXT:    sw t4, 36(a0)
6189; RV32ZVE32F-NEXT:    sw t5, 40(a0)
6190; RV32ZVE32F-NEXT:    sw t6, 44(a0)
6191; RV32ZVE32F-NEXT:    sw s0, 48(a0)
6192; RV32ZVE32F-NEXT:    sw s1, 52(a0)
6193; RV32ZVE32F-NEXT:    sw t0, 56(a0)
6194; RV32ZVE32F-NEXT:    sw a2, 60(a0)
6195; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
6196; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
6197; RV32ZVE32F-NEXT:    .cfi_restore s0
6198; RV32ZVE32F-NEXT:    .cfi_restore s1
6199; RV32ZVE32F-NEXT:    addi sp, sp, 16
6200; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
6201; RV32ZVE32F-NEXT:    ret
6202;
6203; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6204; RV64ZVE32F:       # %bb.0:
6205; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
6206; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
6207; RV64ZVE32F-NEXT:    andi a3, a5, 1
6208; RV64ZVE32F-NEXT:    beqz a3, .LBB55_3
6209; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
6210; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
6211; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
6212; RV64ZVE32F-NEXT:    slli a3, a3, 3
6213; RV64ZVE32F-NEXT:    add a3, a1, a3
6214; RV64ZVE32F-NEXT:    ld a3, 0(a3)
6215; RV64ZVE32F-NEXT:    andi a4, a5, 2
6216; RV64ZVE32F-NEXT:    bnez a4, .LBB55_4
6217; RV64ZVE32F-NEXT:  .LBB55_2:
6218; RV64ZVE32F-NEXT:    ld a4, 8(a2)
6219; RV64ZVE32F-NEXT:    j .LBB55_5
6220; RV64ZVE32F-NEXT:  .LBB55_3:
6221; RV64ZVE32F-NEXT:    ld a3, 0(a2)
6222; RV64ZVE32F-NEXT:    andi a4, a5, 2
6223; RV64ZVE32F-NEXT:    beqz a4, .LBB55_2
6224; RV64ZVE32F-NEXT:  .LBB55_4: # %cond.load1
6225; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
6226; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
6227; RV64ZVE32F-NEXT:    vmv.x.s a4, v10
6228; RV64ZVE32F-NEXT:    slli a4, a4, 3
6229; RV64ZVE32F-NEXT:    add a4, a1, a4
6230; RV64ZVE32F-NEXT:    ld a4, 0(a4)
6231; RV64ZVE32F-NEXT:  .LBB55_5: # %else2
6232; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
6233; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
6234; RV64ZVE32F-NEXT:    andi a6, a5, 4
6235; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
6236; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
6237; RV64ZVE32F-NEXT:    beqz a6, .LBB55_10
6238; RV64ZVE32F-NEXT:  # %bb.6: # %cond.load4
6239; RV64ZVE32F-NEXT:    vmv.x.s a6, v8
6240; RV64ZVE32F-NEXT:    slli a6, a6, 3
6241; RV64ZVE32F-NEXT:    add a6, a1, a6
6242; RV64ZVE32F-NEXT:    ld a6, 0(a6)
6243; RV64ZVE32F-NEXT:    andi a7, a5, 8
6244; RV64ZVE32F-NEXT:    bnez a7, .LBB55_11
6245; RV64ZVE32F-NEXT:  .LBB55_7:
6246; RV64ZVE32F-NEXT:    ld a7, 24(a2)
6247; RV64ZVE32F-NEXT:    andi t0, a5, 16
6248; RV64ZVE32F-NEXT:    bnez t0, .LBB55_12
6249; RV64ZVE32F-NEXT:  .LBB55_8:
6250; RV64ZVE32F-NEXT:    ld t0, 32(a2)
6251; RV64ZVE32F-NEXT:    andi t1, a5, 32
6252; RV64ZVE32F-NEXT:    bnez t1, .LBB55_13
6253; RV64ZVE32F-NEXT:  .LBB55_9:
6254; RV64ZVE32F-NEXT:    ld t1, 40(a2)
6255; RV64ZVE32F-NEXT:    j .LBB55_14
6256; RV64ZVE32F-NEXT:  .LBB55_10:
6257; RV64ZVE32F-NEXT:    ld a6, 16(a2)
6258; RV64ZVE32F-NEXT:    andi a7, a5, 8
6259; RV64ZVE32F-NEXT:    beqz a7, .LBB55_7
6260; RV64ZVE32F-NEXT:  .LBB55_11: # %cond.load7
6261; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
6262; RV64ZVE32F-NEXT:    vmv.x.s a7, v8
6263; RV64ZVE32F-NEXT:    slli a7, a7, 3
6264; RV64ZVE32F-NEXT:    add a7, a1, a7
6265; RV64ZVE32F-NEXT:    ld a7, 0(a7)
6266; RV64ZVE32F-NEXT:    andi t0, a5, 16
6267; RV64ZVE32F-NEXT:    beqz t0, .LBB55_8
6268; RV64ZVE32F-NEXT:  .LBB55_12: # %cond.load10
6269; RV64ZVE32F-NEXT:    vmv.x.s t0, v10
6270; RV64ZVE32F-NEXT:    slli t0, t0, 3
6271; RV64ZVE32F-NEXT:    add t0, a1, t0
6272; RV64ZVE32F-NEXT:    ld t0, 0(t0)
6273; RV64ZVE32F-NEXT:    andi t1, a5, 32
6274; RV64ZVE32F-NEXT:    beqz t1, .LBB55_9
6275; RV64ZVE32F-NEXT:  .LBB55_13: # %cond.load13
6276; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
6277; RV64ZVE32F-NEXT:    vmv.x.s t1, v8
6278; RV64ZVE32F-NEXT:    slli t1, t1, 3
6279; RV64ZVE32F-NEXT:    add t1, a1, t1
6280; RV64ZVE32F-NEXT:    ld t1, 0(t1)
6281; RV64ZVE32F-NEXT:  .LBB55_14: # %else14
6282; RV64ZVE32F-NEXT:    andi t2, a5, 64
6283; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
6284; RV64ZVE32F-NEXT:    beqz t2, .LBB55_17
6285; RV64ZVE32F-NEXT:  # %bb.15: # %cond.load16
6286; RV64ZVE32F-NEXT:    vmv.x.s t2, v8
6287; RV64ZVE32F-NEXT:    slli t2, t2, 3
6288; RV64ZVE32F-NEXT:    add t2, a1, t2
6289; RV64ZVE32F-NEXT:    ld t2, 0(t2)
6290; RV64ZVE32F-NEXT:    andi a5, a5, -128
6291; RV64ZVE32F-NEXT:    bnez a5, .LBB55_18
6292; RV64ZVE32F-NEXT:  .LBB55_16:
6293; RV64ZVE32F-NEXT:    ld a1, 56(a2)
6294; RV64ZVE32F-NEXT:    j .LBB55_19
6295; RV64ZVE32F-NEXT:  .LBB55_17:
6296; RV64ZVE32F-NEXT:    ld t2, 48(a2)
6297; RV64ZVE32F-NEXT:    andi a5, a5, -128
6298; RV64ZVE32F-NEXT:    beqz a5, .LBB55_16
6299; RV64ZVE32F-NEXT:  .LBB55_18: # %cond.load19
6300; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
6301; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
6302; RV64ZVE32F-NEXT:    slli a2, a2, 3
6303; RV64ZVE32F-NEXT:    add a1, a1, a2
6304; RV64ZVE32F-NEXT:    ld a1, 0(a1)
6305; RV64ZVE32F-NEXT:  .LBB55_19: # %else20
6306; RV64ZVE32F-NEXT:    sd a3, 0(a0)
6307; RV64ZVE32F-NEXT:    sd a4, 8(a0)
6308; RV64ZVE32F-NEXT:    sd a6, 16(a0)
6309; RV64ZVE32F-NEXT:    sd a7, 24(a0)
6310; RV64ZVE32F-NEXT:    sd t0, 32(a0)
6311; RV64ZVE32F-NEXT:    sd t1, 40(a0)
6312; RV64ZVE32F-NEXT:    sd t2, 48(a0)
6313; RV64ZVE32F-NEXT:    sd a1, 56(a0)
6314; RV64ZVE32F-NEXT:    ret
6315  %eidxs = sext <8 x i32> %idxs to <8 x i64>
6316  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
6317  %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
6318  ret <8 x i64> %v
6319}
6320
6321define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
6322; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6323; RV32V:       # %bb.0:
6324; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
6325; RV32V-NEXT:    vsll.vi v8, v8, 3
6326; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
6327; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
6328; RV32V-NEXT:    vmv.v.v v8, v12
6329; RV32V-NEXT:    ret
6330;
6331; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6332; RV64V:       # %bb.0:
6333; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
6334; RV64V-NEXT:    vzext.vf2 v16, v8
6335; RV64V-NEXT:    vsll.vi v8, v16, 3
6336; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
6337; RV64V-NEXT:    vmv.v.v v8, v12
6338; RV64V-NEXT:    ret
6339;
6340; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6341; RV32ZVE32F:       # %bb.0:
6342; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
6343; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
6344; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
6345; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
6346; RV32ZVE32F-NEXT:    andi a3, t0, 1
6347; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
6348; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
6349; RV32ZVE32F-NEXT:    beqz a3, .LBB56_7
6350; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
6351; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
6352; RV32ZVE32F-NEXT:    lw a1, 0(a3)
6353; RV32ZVE32F-NEXT:    lw a3, 4(a3)
6354; RV32ZVE32F-NEXT:    andi a4, t0, 2
6355; RV32ZVE32F-NEXT:    bnez a4, .LBB56_8
6356; RV32ZVE32F-NEXT:  .LBB56_2:
6357; RV32ZVE32F-NEXT:    lw a4, 8(a2)
6358; RV32ZVE32F-NEXT:    lw a5, 12(a2)
6359; RV32ZVE32F-NEXT:    andi a6, t0, 4
6360; RV32ZVE32F-NEXT:    bnez a6, .LBB56_9
6361; RV32ZVE32F-NEXT:  .LBB56_3:
6362; RV32ZVE32F-NEXT:    lw a6, 16(a2)
6363; RV32ZVE32F-NEXT:    lw a7, 20(a2)
6364; RV32ZVE32F-NEXT:    andi t1, t0, 8
6365; RV32ZVE32F-NEXT:    bnez t1, .LBB56_10
6366; RV32ZVE32F-NEXT:  .LBB56_4:
6367; RV32ZVE32F-NEXT:    lw t1, 24(a2)
6368; RV32ZVE32F-NEXT:    lw t2, 28(a2)
6369; RV32ZVE32F-NEXT:    andi t3, t0, 16
6370; RV32ZVE32F-NEXT:    bnez t3, .LBB56_11
6371; RV32ZVE32F-NEXT:  .LBB56_5:
6372; RV32ZVE32F-NEXT:    lw t3, 32(a2)
6373; RV32ZVE32F-NEXT:    lw t4, 36(a2)
6374; RV32ZVE32F-NEXT:    andi t5, t0, 32
6375; RV32ZVE32F-NEXT:    bnez t5, .LBB56_12
6376; RV32ZVE32F-NEXT:  .LBB56_6:
6377; RV32ZVE32F-NEXT:    lw t5, 40(a2)
6378; RV32ZVE32F-NEXT:    lw t6, 44(a2)
6379; RV32ZVE32F-NEXT:    j .LBB56_13
6380; RV32ZVE32F-NEXT:  .LBB56_7:
6381; RV32ZVE32F-NEXT:    lw a1, 0(a2)
6382; RV32ZVE32F-NEXT:    lw a3, 4(a2)
6383; RV32ZVE32F-NEXT:    andi a4, t0, 2
6384; RV32ZVE32F-NEXT:    beqz a4, .LBB56_2
6385; RV32ZVE32F-NEXT:  .LBB56_8: # %cond.load1
6386; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
6387; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
6388; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
6389; RV32ZVE32F-NEXT:    lw a4, 0(a5)
6390; RV32ZVE32F-NEXT:    lw a5, 4(a5)
6391; RV32ZVE32F-NEXT:    andi a6, t0, 4
6392; RV32ZVE32F-NEXT:    beqz a6, .LBB56_3
6393; RV32ZVE32F-NEXT:  .LBB56_9: # %cond.load4
6394; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
6395; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
6396; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
6397; RV32ZVE32F-NEXT:    lw a6, 0(a7)
6398; RV32ZVE32F-NEXT:    lw a7, 4(a7)
6399; RV32ZVE32F-NEXT:    andi t1, t0, 8
6400; RV32ZVE32F-NEXT:    beqz t1, .LBB56_4
6401; RV32ZVE32F-NEXT:  .LBB56_10: # %cond.load7
6402; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
6403; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
6404; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
6405; RV32ZVE32F-NEXT:    lw t1, 0(t2)
6406; RV32ZVE32F-NEXT:    lw t2, 4(t2)
6407; RV32ZVE32F-NEXT:    andi t3, t0, 16
6408; RV32ZVE32F-NEXT:    beqz t3, .LBB56_5
6409; RV32ZVE32F-NEXT:  .LBB56_11: # %cond.load10
6410; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6411; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
6412; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
6413; RV32ZVE32F-NEXT:    lw t3, 0(t4)
6414; RV32ZVE32F-NEXT:    lw t4, 4(t4)
6415; RV32ZVE32F-NEXT:    andi t5, t0, 32
6416; RV32ZVE32F-NEXT:    beqz t5, .LBB56_6
6417; RV32ZVE32F-NEXT:  .LBB56_12: # %cond.load13
6418; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6419; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
6420; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
6421; RV32ZVE32F-NEXT:    lw t5, 0(t6)
6422; RV32ZVE32F-NEXT:    lw t6, 4(t6)
6423; RV32ZVE32F-NEXT:  .LBB56_13: # %else14
6424; RV32ZVE32F-NEXT:    addi sp, sp, -16
6425; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
6426; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
6427; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
6428; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
6429; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
6430; RV32ZVE32F-NEXT:    andi s0, t0, 64
6431; RV32ZVE32F-NEXT:    beqz s0, .LBB56_16
6432; RV32ZVE32F-NEXT:  # %bb.14: # %cond.load16
6433; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6434; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
6435; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
6436; RV32ZVE32F-NEXT:    lw s0, 0(s1)
6437; RV32ZVE32F-NEXT:    lw s1, 4(s1)
6438; RV32ZVE32F-NEXT:    andi t0, t0, -128
6439; RV32ZVE32F-NEXT:    bnez t0, .LBB56_17
6440; RV32ZVE32F-NEXT:  .LBB56_15:
6441; RV32ZVE32F-NEXT:    lw t0, 56(a2)
6442; RV32ZVE32F-NEXT:    lw a2, 60(a2)
6443; RV32ZVE32F-NEXT:    j .LBB56_18
6444; RV32ZVE32F-NEXT:  .LBB56_16:
6445; RV32ZVE32F-NEXT:    lw s0, 48(a2)
6446; RV32ZVE32F-NEXT:    lw s1, 52(a2)
6447; RV32ZVE32F-NEXT:    andi t0, t0, -128
6448; RV32ZVE32F-NEXT:    beqz t0, .LBB56_15
6449; RV32ZVE32F-NEXT:  .LBB56_17: # %cond.load19
6450; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6451; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
6452; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
6453; RV32ZVE32F-NEXT:    lw t0, 0(a2)
6454; RV32ZVE32F-NEXT:    lw a2, 4(a2)
6455; RV32ZVE32F-NEXT:  .LBB56_18: # %else20
6456; RV32ZVE32F-NEXT:    sw a1, 0(a0)
6457; RV32ZVE32F-NEXT:    sw a3, 4(a0)
6458; RV32ZVE32F-NEXT:    sw a4, 8(a0)
6459; RV32ZVE32F-NEXT:    sw a5, 12(a0)
6460; RV32ZVE32F-NEXT:    sw a6, 16(a0)
6461; RV32ZVE32F-NEXT:    sw a7, 20(a0)
6462; RV32ZVE32F-NEXT:    sw t1, 24(a0)
6463; RV32ZVE32F-NEXT:    sw t2, 28(a0)
6464; RV32ZVE32F-NEXT:    sw t3, 32(a0)
6465; RV32ZVE32F-NEXT:    sw t4, 36(a0)
6466; RV32ZVE32F-NEXT:    sw t5, 40(a0)
6467; RV32ZVE32F-NEXT:    sw t6, 44(a0)
6468; RV32ZVE32F-NEXT:    sw s0, 48(a0)
6469; RV32ZVE32F-NEXT:    sw s1, 52(a0)
6470; RV32ZVE32F-NEXT:    sw t0, 56(a0)
6471; RV32ZVE32F-NEXT:    sw a2, 60(a0)
6472; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
6473; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
6474; RV32ZVE32F-NEXT:    .cfi_restore s0
6475; RV32ZVE32F-NEXT:    .cfi_restore s1
6476; RV32ZVE32F-NEXT:    addi sp, sp, 16
6477; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
6478; RV32ZVE32F-NEXT:    ret
6479;
6480; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6481; RV64ZVE32F:       # %bb.0:
6482; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
6483; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
6484; RV64ZVE32F-NEXT:    andi a3, a5, 1
6485; RV64ZVE32F-NEXT:    beqz a3, .LBB56_3
6486; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
6487; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
6488; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
6489; RV64ZVE32F-NEXT:    slli a3, a3, 32
6490; RV64ZVE32F-NEXT:    srli a3, a3, 29
6491; RV64ZVE32F-NEXT:    add a3, a1, a3
6492; RV64ZVE32F-NEXT:    ld a3, 0(a3)
6493; RV64ZVE32F-NEXT:    andi a4, a5, 2
6494; RV64ZVE32F-NEXT:    bnez a4, .LBB56_4
6495; RV64ZVE32F-NEXT:  .LBB56_2:
6496; RV64ZVE32F-NEXT:    ld a4, 8(a2)
6497; RV64ZVE32F-NEXT:    j .LBB56_5
6498; RV64ZVE32F-NEXT:  .LBB56_3:
6499; RV64ZVE32F-NEXT:    ld a3, 0(a2)
6500; RV64ZVE32F-NEXT:    andi a4, a5, 2
6501; RV64ZVE32F-NEXT:    beqz a4, .LBB56_2
6502; RV64ZVE32F-NEXT:  .LBB56_4: # %cond.load1
6503; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
6504; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
6505; RV64ZVE32F-NEXT:    vmv.x.s a4, v10
6506; RV64ZVE32F-NEXT:    slli a4, a4, 32
6507; RV64ZVE32F-NEXT:    srli a4, a4, 29
6508; RV64ZVE32F-NEXT:    add a4, a1, a4
6509; RV64ZVE32F-NEXT:    ld a4, 0(a4)
6510; RV64ZVE32F-NEXT:  .LBB56_5: # %else2
6511; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
6512; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
6513; RV64ZVE32F-NEXT:    andi a6, a5, 4
6514; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
6515; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
6516; RV64ZVE32F-NEXT:    beqz a6, .LBB56_10
6517; RV64ZVE32F-NEXT:  # %bb.6: # %cond.load4
6518; RV64ZVE32F-NEXT:    vmv.x.s a6, v8
6519; RV64ZVE32F-NEXT:    slli a6, a6, 32
6520; RV64ZVE32F-NEXT:    srli a6, a6, 29
6521; RV64ZVE32F-NEXT:    add a6, a1, a6
6522; RV64ZVE32F-NEXT:    ld a6, 0(a6)
6523; RV64ZVE32F-NEXT:    andi a7, a5, 8
6524; RV64ZVE32F-NEXT:    bnez a7, .LBB56_11
6525; RV64ZVE32F-NEXT:  .LBB56_7:
6526; RV64ZVE32F-NEXT:    ld a7, 24(a2)
6527; RV64ZVE32F-NEXT:    andi t0, a5, 16
6528; RV64ZVE32F-NEXT:    bnez t0, .LBB56_12
6529; RV64ZVE32F-NEXT:  .LBB56_8:
6530; RV64ZVE32F-NEXT:    ld t0, 32(a2)
6531; RV64ZVE32F-NEXT:    andi t1, a5, 32
6532; RV64ZVE32F-NEXT:    bnez t1, .LBB56_13
6533; RV64ZVE32F-NEXT:  .LBB56_9:
6534; RV64ZVE32F-NEXT:    ld t1, 40(a2)
6535; RV64ZVE32F-NEXT:    j .LBB56_14
6536; RV64ZVE32F-NEXT:  .LBB56_10:
6537; RV64ZVE32F-NEXT:    ld a6, 16(a2)
6538; RV64ZVE32F-NEXT:    andi a7, a5, 8
6539; RV64ZVE32F-NEXT:    beqz a7, .LBB56_7
6540; RV64ZVE32F-NEXT:  .LBB56_11: # %cond.load7
6541; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
6542; RV64ZVE32F-NEXT:    vmv.x.s a7, v8
6543; RV64ZVE32F-NEXT:    slli a7, a7, 32
6544; RV64ZVE32F-NEXT:    srli a7, a7, 29
6545; RV64ZVE32F-NEXT:    add a7, a1, a7
6546; RV64ZVE32F-NEXT:    ld a7, 0(a7)
6547; RV64ZVE32F-NEXT:    andi t0, a5, 16
6548; RV64ZVE32F-NEXT:    beqz t0, .LBB56_8
6549; RV64ZVE32F-NEXT:  .LBB56_12: # %cond.load10
6550; RV64ZVE32F-NEXT:    vmv.x.s t0, v10
6551; RV64ZVE32F-NEXT:    slli t0, t0, 32
6552; RV64ZVE32F-NEXT:    srli t0, t0, 29
6553; RV64ZVE32F-NEXT:    add t0, a1, t0
6554; RV64ZVE32F-NEXT:    ld t0, 0(t0)
6555; RV64ZVE32F-NEXT:    andi t1, a5, 32
6556; RV64ZVE32F-NEXT:    beqz t1, .LBB56_9
6557; RV64ZVE32F-NEXT:  .LBB56_13: # %cond.load13
6558; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
6559; RV64ZVE32F-NEXT:    vmv.x.s t1, v8
6560; RV64ZVE32F-NEXT:    slli t1, t1, 32
6561; RV64ZVE32F-NEXT:    srli t1, t1, 29
6562; RV64ZVE32F-NEXT:    add t1, a1, t1
6563; RV64ZVE32F-NEXT:    ld t1, 0(t1)
6564; RV64ZVE32F-NEXT:  .LBB56_14: # %else14
6565; RV64ZVE32F-NEXT:    andi t2, a5, 64
6566; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
6567; RV64ZVE32F-NEXT:    beqz t2, .LBB56_17
6568; RV64ZVE32F-NEXT:  # %bb.15: # %cond.load16
6569; RV64ZVE32F-NEXT:    vmv.x.s t2, v8
6570; RV64ZVE32F-NEXT:    slli t2, t2, 32
6571; RV64ZVE32F-NEXT:    srli t2, t2, 29
6572; RV64ZVE32F-NEXT:    add t2, a1, t2
6573; RV64ZVE32F-NEXT:    ld t2, 0(t2)
6574; RV64ZVE32F-NEXT:    andi a5, a5, -128
6575; RV64ZVE32F-NEXT:    bnez a5, .LBB56_18
6576; RV64ZVE32F-NEXT:  .LBB56_16:
6577; RV64ZVE32F-NEXT:    ld a1, 56(a2)
6578; RV64ZVE32F-NEXT:    j .LBB56_19
6579; RV64ZVE32F-NEXT:  .LBB56_17:
6580; RV64ZVE32F-NEXT:    ld t2, 48(a2)
6581; RV64ZVE32F-NEXT:    andi a5, a5, -128
6582; RV64ZVE32F-NEXT:    beqz a5, .LBB56_16
6583; RV64ZVE32F-NEXT:  .LBB56_18: # %cond.load19
6584; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
6585; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
6586; RV64ZVE32F-NEXT:    slli a2, a2, 32
6587; RV64ZVE32F-NEXT:    srli a2, a2, 29
6588; RV64ZVE32F-NEXT:    add a1, a1, a2
6589; RV64ZVE32F-NEXT:    ld a1, 0(a1)
6590; RV64ZVE32F-NEXT:  .LBB56_19: # %else20
6591; RV64ZVE32F-NEXT:    sd a3, 0(a0)
6592; RV64ZVE32F-NEXT:    sd a4, 8(a0)
6593; RV64ZVE32F-NEXT:    sd a6, 16(a0)
6594; RV64ZVE32F-NEXT:    sd a7, 24(a0)
6595; RV64ZVE32F-NEXT:    sd t0, 32(a0)
6596; RV64ZVE32F-NEXT:    sd t1, 40(a0)
6597; RV64ZVE32F-NEXT:    sd t2, 48(a0)
6598; RV64ZVE32F-NEXT:    sd a1, 56(a0)
6599; RV64ZVE32F-NEXT:    ret
6600  %eidxs = zext <8 x i32> %idxs to <8 x i64>
6601  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
6602  %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
6603  ret <8 x i64> %v
6604}
6605
6606define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
6607; RV32V-LABEL: mgather_baseidx_v8i64:
6608; RV32V:       # %bb.0:
6609; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
6610; RV32V-NEXT:    vnsrl.wi v16, v8, 0
6611; RV32V-NEXT:    vsll.vi v8, v16, 3
6612; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
6613; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
6614; RV32V-NEXT:    vmv.v.v v8, v12
6615; RV32V-NEXT:    ret
6616;
6617; RV64V-LABEL: mgather_baseidx_v8i64:
6618; RV64V:       # %bb.0:
6619; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
6620; RV64V-NEXT:    vsll.vi v8, v8, 3
6621; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
6622; RV64V-NEXT:    vmv.v.v v8, v12
6623; RV64V-NEXT:    ret
6624;
6625; RV32ZVE32F-LABEL: mgather_baseidx_v8i64:
6626; RV32ZVE32F:       # %bb.0:
6627; RV32ZVE32F-NEXT:    lw a4, 32(a2)
6628; RV32ZVE32F-NEXT:    lw a5, 40(a2)
6629; RV32ZVE32F-NEXT:    lw a6, 48(a2)
6630; RV32ZVE32F-NEXT:    lw a7, 56(a2)
6631; RV32ZVE32F-NEXT:    lw t0, 0(a2)
6632; RV32ZVE32F-NEXT:    lw t1, 8(a2)
6633; RV32ZVE32F-NEXT:    lw t2, 16(a2)
6634; RV32ZVE32F-NEXT:    lw a2, 24(a2)
6635; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
6636; RV32ZVE32F-NEXT:    vmv.v.x v8, t0
6637; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
6638; RV32ZVE32F-NEXT:    vmv.x.s t0, v0
6639; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
6640; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, t1
6641; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, t2
6642; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
6643; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
6644; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a5
6645; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a6
6646; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a7
6647; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
6648; RV32ZVE32F-NEXT:    andi a2, t0, 1
6649; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
6650; RV32ZVE32F-NEXT:    beqz a2, .LBB57_7
6651; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
6652; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
6653; RV32ZVE32F-NEXT:    lw a1, 0(a2)
6654; RV32ZVE32F-NEXT:    lw a2, 4(a2)
6655; RV32ZVE32F-NEXT:    andi a4, t0, 2
6656; RV32ZVE32F-NEXT:    bnez a4, .LBB57_8
6657; RV32ZVE32F-NEXT:  .LBB57_2:
6658; RV32ZVE32F-NEXT:    lw a4, 8(a3)
6659; RV32ZVE32F-NEXT:    lw a5, 12(a3)
6660; RV32ZVE32F-NEXT:    andi a6, t0, 4
6661; RV32ZVE32F-NEXT:    bnez a6, .LBB57_9
6662; RV32ZVE32F-NEXT:  .LBB57_3:
6663; RV32ZVE32F-NEXT:    lw a6, 16(a3)
6664; RV32ZVE32F-NEXT:    lw a7, 20(a3)
6665; RV32ZVE32F-NEXT:    andi t1, t0, 8
6666; RV32ZVE32F-NEXT:    bnez t1, .LBB57_10
6667; RV32ZVE32F-NEXT:  .LBB57_4:
6668; RV32ZVE32F-NEXT:    lw t1, 24(a3)
6669; RV32ZVE32F-NEXT:    lw t2, 28(a3)
6670; RV32ZVE32F-NEXT:    andi t3, t0, 16
6671; RV32ZVE32F-NEXT:    bnez t3, .LBB57_11
6672; RV32ZVE32F-NEXT:  .LBB57_5:
6673; RV32ZVE32F-NEXT:    lw t3, 32(a3)
6674; RV32ZVE32F-NEXT:    lw t4, 36(a3)
6675; RV32ZVE32F-NEXT:    andi t5, t0, 32
6676; RV32ZVE32F-NEXT:    bnez t5, .LBB57_12
6677; RV32ZVE32F-NEXT:  .LBB57_6:
6678; RV32ZVE32F-NEXT:    lw t5, 40(a3)
6679; RV32ZVE32F-NEXT:    lw t6, 44(a3)
6680; RV32ZVE32F-NEXT:    j .LBB57_13
6681; RV32ZVE32F-NEXT:  .LBB57_7:
6682; RV32ZVE32F-NEXT:    lw a1, 0(a3)
6683; RV32ZVE32F-NEXT:    lw a2, 4(a3)
6684; RV32ZVE32F-NEXT:    andi a4, t0, 2
6685; RV32ZVE32F-NEXT:    beqz a4, .LBB57_2
6686; RV32ZVE32F-NEXT:  .LBB57_8: # %cond.load1
6687; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
6688; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
6689; RV32ZVE32F-NEXT:    vmv.x.s a5, v10
6690; RV32ZVE32F-NEXT:    lw a4, 0(a5)
6691; RV32ZVE32F-NEXT:    lw a5, 4(a5)
6692; RV32ZVE32F-NEXT:    andi a6, t0, 4
6693; RV32ZVE32F-NEXT:    beqz a6, .LBB57_3
6694; RV32ZVE32F-NEXT:  .LBB57_9: # %cond.load4
6695; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
6696; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
6697; RV32ZVE32F-NEXT:    vmv.x.s a7, v10
6698; RV32ZVE32F-NEXT:    lw a6, 0(a7)
6699; RV32ZVE32F-NEXT:    lw a7, 4(a7)
6700; RV32ZVE32F-NEXT:    andi t1, t0, 8
6701; RV32ZVE32F-NEXT:    beqz t1, .LBB57_4
6702; RV32ZVE32F-NEXT:  .LBB57_10: # %cond.load7
6703; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
6704; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
6705; RV32ZVE32F-NEXT:    vmv.x.s t2, v10
6706; RV32ZVE32F-NEXT:    lw t1, 0(t2)
6707; RV32ZVE32F-NEXT:    lw t2, 4(t2)
6708; RV32ZVE32F-NEXT:    andi t3, t0, 16
6709; RV32ZVE32F-NEXT:    beqz t3, .LBB57_5
6710; RV32ZVE32F-NEXT:  .LBB57_11: # %cond.load10
6711; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6712; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
6713; RV32ZVE32F-NEXT:    vmv.x.s t4, v10
6714; RV32ZVE32F-NEXT:    lw t3, 0(t4)
6715; RV32ZVE32F-NEXT:    lw t4, 4(t4)
6716; RV32ZVE32F-NEXT:    andi t5, t0, 32
6717; RV32ZVE32F-NEXT:    beqz t5, .LBB57_6
6718; RV32ZVE32F-NEXT:  .LBB57_12: # %cond.load13
6719; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6720; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
6721; RV32ZVE32F-NEXT:    vmv.x.s t6, v10
6722; RV32ZVE32F-NEXT:    lw t5, 0(t6)
6723; RV32ZVE32F-NEXT:    lw t6, 4(t6)
6724; RV32ZVE32F-NEXT:  .LBB57_13: # %else14
6725; RV32ZVE32F-NEXT:    addi sp, sp, -16
6726; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 16
6727; RV32ZVE32F-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
6728; RV32ZVE32F-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
6729; RV32ZVE32F-NEXT:    .cfi_offset s0, -4
6730; RV32ZVE32F-NEXT:    .cfi_offset s1, -8
6731; RV32ZVE32F-NEXT:    andi s0, t0, 64
6732; RV32ZVE32F-NEXT:    beqz s0, .LBB57_16
6733; RV32ZVE32F-NEXT:  # %bb.14: # %cond.load16
6734; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6735; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
6736; RV32ZVE32F-NEXT:    vmv.x.s s1, v10
6737; RV32ZVE32F-NEXT:    lw s0, 0(s1)
6738; RV32ZVE32F-NEXT:    lw s1, 4(s1)
6739; RV32ZVE32F-NEXT:    andi t0, t0, -128
6740; RV32ZVE32F-NEXT:    bnez t0, .LBB57_17
6741; RV32ZVE32F-NEXT:  .LBB57_15:
6742; RV32ZVE32F-NEXT:    lw t0, 56(a3)
6743; RV32ZVE32F-NEXT:    lw a3, 60(a3)
6744; RV32ZVE32F-NEXT:    j .LBB57_18
6745; RV32ZVE32F-NEXT:  .LBB57_16:
6746; RV32ZVE32F-NEXT:    lw s0, 48(a3)
6747; RV32ZVE32F-NEXT:    lw s1, 52(a3)
6748; RV32ZVE32F-NEXT:    andi t0, t0, -128
6749; RV32ZVE32F-NEXT:    beqz t0, .LBB57_15
6750; RV32ZVE32F-NEXT:  .LBB57_17: # %cond.load19
6751; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
6752; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
6753; RV32ZVE32F-NEXT:    vmv.x.s a3, v8
6754; RV32ZVE32F-NEXT:    lw t0, 0(a3)
6755; RV32ZVE32F-NEXT:    lw a3, 4(a3)
6756; RV32ZVE32F-NEXT:  .LBB57_18: # %else20
6757; RV32ZVE32F-NEXT:    sw a1, 0(a0)
6758; RV32ZVE32F-NEXT:    sw a2, 4(a0)
6759; RV32ZVE32F-NEXT:    sw a4, 8(a0)
6760; RV32ZVE32F-NEXT:    sw a5, 12(a0)
6761; RV32ZVE32F-NEXT:    sw a6, 16(a0)
6762; RV32ZVE32F-NEXT:    sw a7, 20(a0)
6763; RV32ZVE32F-NEXT:    sw t1, 24(a0)
6764; RV32ZVE32F-NEXT:    sw t2, 28(a0)
6765; RV32ZVE32F-NEXT:    sw t3, 32(a0)
6766; RV32ZVE32F-NEXT:    sw t4, 36(a0)
6767; RV32ZVE32F-NEXT:    sw t5, 40(a0)
6768; RV32ZVE32F-NEXT:    sw t6, 44(a0)
6769; RV32ZVE32F-NEXT:    sw s0, 48(a0)
6770; RV32ZVE32F-NEXT:    sw s1, 52(a0)
6771; RV32ZVE32F-NEXT:    sw t0, 56(a0)
6772; RV32ZVE32F-NEXT:    sw a3, 60(a0)
6773; RV32ZVE32F-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
6774; RV32ZVE32F-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
6775; RV32ZVE32F-NEXT:    .cfi_restore s0
6776; RV32ZVE32F-NEXT:    .cfi_restore s1
6777; RV32ZVE32F-NEXT:    addi sp, sp, 16
6778; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
6779; RV32ZVE32F-NEXT:    ret
6780;
6781; RV64ZVE32F-LABEL: mgather_baseidx_v8i64:
6782; RV64ZVE32F:       # %bb.0:
6783; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
6784; RV64ZVE32F-NEXT:    vmv.x.s a7, v0
6785; RV64ZVE32F-NEXT:    andi a4, a7, 1
6786; RV64ZVE32F-NEXT:    beqz a4, .LBB57_9
6787; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
6788; RV64ZVE32F-NEXT:    ld a4, 0(a2)
6789; RV64ZVE32F-NEXT:    slli a4, a4, 3
6790; RV64ZVE32F-NEXT:    add a4, a1, a4
6791; RV64ZVE32F-NEXT:    ld a4, 0(a4)
6792; RV64ZVE32F-NEXT:    andi a5, a7, 2
6793; RV64ZVE32F-NEXT:    bnez a5, .LBB57_10
6794; RV64ZVE32F-NEXT:  .LBB57_2:
6795; RV64ZVE32F-NEXT:    ld a5, 8(a3)
6796; RV64ZVE32F-NEXT:    andi a6, a7, 4
6797; RV64ZVE32F-NEXT:    bnez a6, .LBB57_11
6798; RV64ZVE32F-NEXT:  .LBB57_3:
6799; RV64ZVE32F-NEXT:    ld a6, 16(a3)
6800; RV64ZVE32F-NEXT:    andi t0, a7, 8
6801; RV64ZVE32F-NEXT:    bnez t0, .LBB57_12
6802; RV64ZVE32F-NEXT:  .LBB57_4:
6803; RV64ZVE32F-NEXT:    ld t0, 24(a3)
6804; RV64ZVE32F-NEXT:    andi t1, a7, 16
6805; RV64ZVE32F-NEXT:    bnez t1, .LBB57_13
6806; RV64ZVE32F-NEXT:  .LBB57_5:
6807; RV64ZVE32F-NEXT:    ld t1, 32(a3)
6808; RV64ZVE32F-NEXT:    andi t2, a7, 32
6809; RV64ZVE32F-NEXT:    bnez t2, .LBB57_14
6810; RV64ZVE32F-NEXT:  .LBB57_6:
6811; RV64ZVE32F-NEXT:    ld t2, 40(a3)
6812; RV64ZVE32F-NEXT:    andi t3, a7, 64
6813; RV64ZVE32F-NEXT:    bnez t3, .LBB57_15
6814; RV64ZVE32F-NEXT:  .LBB57_7:
6815; RV64ZVE32F-NEXT:    ld t3, 48(a3)
6816; RV64ZVE32F-NEXT:    andi a7, a7, -128
6817; RV64ZVE32F-NEXT:    bnez a7, .LBB57_16
6818; RV64ZVE32F-NEXT:  .LBB57_8:
6819; RV64ZVE32F-NEXT:    ld a1, 56(a3)
6820; RV64ZVE32F-NEXT:    j .LBB57_17
6821; RV64ZVE32F-NEXT:  .LBB57_9:
6822; RV64ZVE32F-NEXT:    ld a4, 0(a3)
6823; RV64ZVE32F-NEXT:    andi a5, a7, 2
6824; RV64ZVE32F-NEXT:    beqz a5, .LBB57_2
6825; RV64ZVE32F-NEXT:  .LBB57_10: # %cond.load1
6826; RV64ZVE32F-NEXT:    ld a5, 8(a2)
6827; RV64ZVE32F-NEXT:    slli a5, a5, 3
6828; RV64ZVE32F-NEXT:    add a5, a1, a5
6829; RV64ZVE32F-NEXT:    ld a5, 0(a5)
6830; RV64ZVE32F-NEXT:    andi a6, a7, 4
6831; RV64ZVE32F-NEXT:    beqz a6, .LBB57_3
6832; RV64ZVE32F-NEXT:  .LBB57_11: # %cond.load4
6833; RV64ZVE32F-NEXT:    ld a6, 16(a2)
6834; RV64ZVE32F-NEXT:    slli a6, a6, 3
6835; RV64ZVE32F-NEXT:    add a6, a1, a6
6836; RV64ZVE32F-NEXT:    ld a6, 0(a6)
6837; RV64ZVE32F-NEXT:    andi t0, a7, 8
6838; RV64ZVE32F-NEXT:    beqz t0, .LBB57_4
6839; RV64ZVE32F-NEXT:  .LBB57_12: # %cond.load7
6840; RV64ZVE32F-NEXT:    ld t0, 24(a2)
6841; RV64ZVE32F-NEXT:    slli t0, t0, 3
6842; RV64ZVE32F-NEXT:    add t0, a1, t0
6843; RV64ZVE32F-NEXT:    ld t0, 0(t0)
6844; RV64ZVE32F-NEXT:    andi t1, a7, 16
6845; RV64ZVE32F-NEXT:    beqz t1, .LBB57_5
6846; RV64ZVE32F-NEXT:  .LBB57_13: # %cond.load10
6847; RV64ZVE32F-NEXT:    ld t1, 32(a2)
6848; RV64ZVE32F-NEXT:    slli t1, t1, 3
6849; RV64ZVE32F-NEXT:    add t1, a1, t1
6850; RV64ZVE32F-NEXT:    ld t1, 0(t1)
6851; RV64ZVE32F-NEXT:    andi t2, a7, 32
6852; RV64ZVE32F-NEXT:    beqz t2, .LBB57_6
6853; RV64ZVE32F-NEXT:  .LBB57_14: # %cond.load13
6854; RV64ZVE32F-NEXT:    ld t2, 40(a2)
6855; RV64ZVE32F-NEXT:    slli t2, t2, 3
6856; RV64ZVE32F-NEXT:    add t2, a1, t2
6857; RV64ZVE32F-NEXT:    ld t2, 0(t2)
6858; RV64ZVE32F-NEXT:    andi t3, a7, 64
6859; RV64ZVE32F-NEXT:    beqz t3, .LBB57_7
6860; RV64ZVE32F-NEXT:  .LBB57_15: # %cond.load16
6861; RV64ZVE32F-NEXT:    ld t3, 48(a2)
6862; RV64ZVE32F-NEXT:    slli t3, t3, 3
6863; RV64ZVE32F-NEXT:    add t3, a1, t3
6864; RV64ZVE32F-NEXT:    ld t3, 0(t3)
6865; RV64ZVE32F-NEXT:    andi a7, a7, -128
6866; RV64ZVE32F-NEXT:    beqz a7, .LBB57_8
6867; RV64ZVE32F-NEXT:  .LBB57_16: # %cond.load19
6868; RV64ZVE32F-NEXT:    ld a2, 56(a2)
6869; RV64ZVE32F-NEXT:    slli a2, a2, 3
6870; RV64ZVE32F-NEXT:    add a1, a1, a2
6871; RV64ZVE32F-NEXT:    ld a1, 0(a1)
6872; RV64ZVE32F-NEXT:  .LBB57_17: # %else20
6873; RV64ZVE32F-NEXT:    sd a4, 0(a0)
6874; RV64ZVE32F-NEXT:    sd a5, 8(a0)
6875; RV64ZVE32F-NEXT:    sd a6, 16(a0)
6876; RV64ZVE32F-NEXT:    sd t0, 24(a0)
6877; RV64ZVE32F-NEXT:    sd t1, 32(a0)
6878; RV64ZVE32F-NEXT:    sd t2, 40(a0)
6879; RV64ZVE32F-NEXT:    sd t3, 48(a0)
6880; RV64ZVE32F-NEXT:    sd a1, 56(a0)
6881; RV64ZVE32F-NEXT:    ret
6882  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
6883  %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
6884  ret <8 x i64> %v
6885}
6886
6887declare <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x bfloat>)
6888
6889define <1 x bfloat> @mgather_v1bf16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x bfloat> %passthru) {
6890; RV32V-LABEL: mgather_v1bf16:
6891; RV32V:       # %bb.0:
6892; RV32V-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
6893; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
6894; RV32V-NEXT:    vmv1r.v v8, v9
6895; RV32V-NEXT:    ret
6896;
6897; RV64V-LABEL: mgather_v1bf16:
6898; RV64V:       # %bb.0:
6899; RV64V-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
6900; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
6901; RV64V-NEXT:    vmv1r.v v8, v9
6902; RV64V-NEXT:    ret
6903;
6904; RV32ZVE32F-LABEL: mgather_v1bf16:
6905; RV32ZVE32F:       # %bb.0:
6906; RV32ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, mu
6907; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
6908; RV32ZVE32F-NEXT:    vmv1r.v v8, v9
6909; RV32ZVE32F-NEXT:    ret
6910;
6911; RV64ZVE32F-LABEL: mgather_v1bf16:
6912; RV64ZVE32F:       # %bb.0:
6913; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
6914; RV64ZVE32F-NEXT:    vfirst.m a1, v0
6915; RV64ZVE32F-NEXT:    bnez a1, .LBB58_2
6916; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
6917; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
6918; RV64ZVE32F-NEXT:    vle16.v v8, (a0)
6919; RV64ZVE32F-NEXT:  .LBB58_2: # %else
6920; RV64ZVE32F-NEXT:    ret
6921  %v = call <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x bfloat> %passthru)
6922  ret <1 x bfloat> %v
6923}
6924
6925declare <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x bfloat>)
6926
6927define <2 x bfloat> @mgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x bfloat> %passthru) {
6928; RV32V-LABEL: mgather_v2bf16:
6929; RV32V:       # %bb.0:
6930; RV32V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
6931; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
6932; RV32V-NEXT:    vmv1r.v v8, v9
6933; RV32V-NEXT:    ret
6934;
6935; RV64V-LABEL: mgather_v2bf16:
6936; RV64V:       # %bb.0:
6937; RV64V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
6938; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
6939; RV64V-NEXT:    vmv1r.v v8, v9
6940; RV64V-NEXT:    ret
6941;
6942; RV32ZVE32F-LABEL: mgather_v2bf16:
6943; RV32ZVE32F:       # %bb.0:
6944; RV32ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, mu
6945; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
6946; RV32ZVE32F-NEXT:    vmv1r.v v8, v9
6947; RV32ZVE32F-NEXT:    ret
6948;
6949; RV64ZVE32F-LABEL: mgather_v2bf16:
6950; RV64ZVE32F:       # %bb.0:
6951; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
6952; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
6953; RV64ZVE32F-NEXT:    andi a3, a2, 1
6954; RV64ZVE32F-NEXT:    bnez a3, .LBB59_3
6955; RV64ZVE32F-NEXT:  # %bb.1: # %else
6956; RV64ZVE32F-NEXT:    andi a2, a2, 2
6957; RV64ZVE32F-NEXT:    bnez a2, .LBB59_4
6958; RV64ZVE32F-NEXT:  .LBB59_2: # %else2
6959; RV64ZVE32F-NEXT:    ret
6960; RV64ZVE32F-NEXT:  .LBB59_3: # %cond.load
6961; RV64ZVE32F-NEXT:    lh a0, 0(a0)
6962; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
6963; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
6964; RV64ZVE32F-NEXT:    andi a2, a2, 2
6965; RV64ZVE32F-NEXT:    beqz a2, .LBB59_2
6966; RV64ZVE32F-NEXT:  .LBB59_4: # %cond.load1
6967; RV64ZVE32F-NEXT:    lh a0, 0(a1)
6968; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
6969; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
6970; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
6971; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
6972; RV64ZVE32F-NEXT:    ret
6973  %v = call <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x bfloat> %passthru)
6974  ret <2 x bfloat> %v
6975}
6976
6977declare <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x bfloat>)
6978
6979define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %passthru) {
6980; RV32-LABEL: mgather_v4bf16:
6981; RV32:       # %bb.0:
6982; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
6983; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
6984; RV32-NEXT:    vmv1r.v v8, v9
6985; RV32-NEXT:    ret
6986;
6987; RV64V-LABEL: mgather_v4bf16:
6988; RV64V:       # %bb.0:
6989; RV64V-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
6990; RV64V-NEXT:    vluxei64.v v10, (zero), v8, v0.t
6991; RV64V-NEXT:    vmv1r.v v8, v10
6992; RV64V-NEXT:    ret
6993;
6994; RV64ZVE32F-LABEL: mgather_v4bf16:
6995; RV64ZVE32F:       # %bb.0:
6996; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
6997; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
6998; RV64ZVE32F-NEXT:    andi a2, a1, 1
6999; RV64ZVE32F-NEXT:    bnez a2, .LBB60_5
7000; RV64ZVE32F-NEXT:  # %bb.1: # %else
7001; RV64ZVE32F-NEXT:    andi a2, a1, 2
7002; RV64ZVE32F-NEXT:    bnez a2, .LBB60_6
7003; RV64ZVE32F-NEXT:  .LBB60_2: # %else2
7004; RV64ZVE32F-NEXT:    andi a2, a1, 4
7005; RV64ZVE32F-NEXT:    bnez a2, .LBB60_7
7006; RV64ZVE32F-NEXT:  .LBB60_3: # %else5
7007; RV64ZVE32F-NEXT:    andi a1, a1, 8
7008; RV64ZVE32F-NEXT:    bnez a1, .LBB60_8
7009; RV64ZVE32F-NEXT:  .LBB60_4: # %else8
7010; RV64ZVE32F-NEXT:    ret
7011; RV64ZVE32F-NEXT:  .LBB60_5: # %cond.load
7012; RV64ZVE32F-NEXT:    ld a2, 0(a0)
7013; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7014; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
7015; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7016; RV64ZVE32F-NEXT:    andi a2, a1, 2
7017; RV64ZVE32F-NEXT:    beqz a2, .LBB60_2
7018; RV64ZVE32F-NEXT:  .LBB60_6: # %cond.load1
7019; RV64ZVE32F-NEXT:    ld a2, 8(a0)
7020; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7021; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
7022; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7023; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
7024; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
7025; RV64ZVE32F-NEXT:    andi a2, a1, 4
7026; RV64ZVE32F-NEXT:    beqz a2, .LBB60_3
7027; RV64ZVE32F-NEXT:  .LBB60_7: # %cond.load4
7028; RV64ZVE32F-NEXT:    ld a2, 16(a0)
7029; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7030; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
7031; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7032; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
7033; RV64ZVE32F-NEXT:    andi a1, a1, 8
7034; RV64ZVE32F-NEXT:    beqz a1, .LBB60_4
7035; RV64ZVE32F-NEXT:  .LBB60_8: # %cond.load7
7036; RV64ZVE32F-NEXT:    ld a0, 24(a0)
7037; RV64ZVE32F-NEXT:    lh a0, 0(a0)
7038; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
7039; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
7040; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 3
7041; RV64ZVE32F-NEXT:    ret
7042  %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x bfloat> %passthru)
7043  ret <4 x bfloat> %v
7044}
7045
7046define <4 x bfloat> @mgather_truemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) {
7047; RV32-LABEL: mgather_truemask_v4bf16:
7048; RV32:       # %bb.0:
7049; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
7050; RV32-NEXT:    vluxei32.v v9, (zero), v8
7051; RV32-NEXT:    vmv1r.v v8, v9
7052; RV32-NEXT:    ret
7053;
7054; RV64V-LABEL: mgather_truemask_v4bf16:
7055; RV64V:       # %bb.0:
7056; RV64V-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
7057; RV64V-NEXT:    vluxei64.v v10, (zero), v8
7058; RV64V-NEXT:    vmv1r.v v8, v10
7059; RV64V-NEXT:    ret
7060;
7061; RV64ZVE32F-LABEL: mgather_truemask_v4bf16:
7062; RV64ZVE32F:       # %bb.0:
7063; RV64ZVE32F-NEXT:    ld a1, 0(a0)
7064; RV64ZVE32F-NEXT:    ld a2, 8(a0)
7065; RV64ZVE32F-NEXT:    ld a3, 16(a0)
7066; RV64ZVE32F-NEXT:    ld a0, 24(a0)
7067; RV64ZVE32F-NEXT:    lh a1, 0(a1)
7068; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7069; RV64ZVE32F-NEXT:    lh a3, 0(a3)
7070; RV64ZVE32F-NEXT:    lh a0, 0(a0)
7071; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
7072; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
7073; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
7074; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
7075; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
7076; RV64ZVE32F-NEXT:    ret
7077  %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x bfloat> %passthru)
7078  ret <4 x bfloat> %v
7079}
7080
7081define <4 x bfloat> @mgather_falsemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) {
7082; RV32-LABEL: mgather_falsemask_v4bf16:
7083; RV32:       # %bb.0:
7084; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7085; RV32-NEXT:    vmv1r.v v8, v9
7086; RV32-NEXT:    ret
7087;
7088; RV64V-LABEL: mgather_falsemask_v4bf16:
7089; RV64V:       # %bb.0:
7090; RV64V-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7091; RV64V-NEXT:    vmv1r.v v8, v10
7092; RV64V-NEXT:    ret
7093;
7094; RV64ZVE32F-LABEL: mgather_falsemask_v4bf16:
7095; RV64ZVE32F:       # %bb.0:
7096; RV64ZVE32F-NEXT:    ret
7097  %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x bfloat> %passthru)
7098  ret <4 x bfloat> %v
7099}
7100
7101declare <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x bfloat>)
7102
7103define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %passthru) {
7104; RV32-LABEL: mgather_v8bf16:
7105; RV32:       # %bb.0:
7106; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
7107; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
7108; RV32-NEXT:    vmv.v.v v8, v10
7109; RV32-NEXT:    ret
7110;
7111; RV64V-LABEL: mgather_v8bf16:
7112; RV64V:       # %bb.0:
7113; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
7114; RV64V-NEXT:    vluxei64.v v12, (zero), v8, v0.t
7115; RV64V-NEXT:    vmv.v.v v8, v12
7116; RV64V-NEXT:    ret
7117;
7118; RV64ZVE32F-LABEL: mgather_v8bf16:
7119; RV64ZVE32F:       # %bb.0:
7120; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7121; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
7122; RV64ZVE32F-NEXT:    andi a2, a1, 1
7123; RV64ZVE32F-NEXT:    bnez a2, .LBB63_9
7124; RV64ZVE32F-NEXT:  # %bb.1: # %else
7125; RV64ZVE32F-NEXT:    andi a2, a1, 2
7126; RV64ZVE32F-NEXT:    bnez a2, .LBB63_10
7127; RV64ZVE32F-NEXT:  .LBB63_2: # %else2
7128; RV64ZVE32F-NEXT:    andi a2, a1, 4
7129; RV64ZVE32F-NEXT:    bnez a2, .LBB63_11
7130; RV64ZVE32F-NEXT:  .LBB63_3: # %else5
7131; RV64ZVE32F-NEXT:    andi a2, a1, 8
7132; RV64ZVE32F-NEXT:    bnez a2, .LBB63_12
7133; RV64ZVE32F-NEXT:  .LBB63_4: # %else8
7134; RV64ZVE32F-NEXT:    andi a2, a1, 16
7135; RV64ZVE32F-NEXT:    bnez a2, .LBB63_13
7136; RV64ZVE32F-NEXT:  .LBB63_5: # %else11
7137; RV64ZVE32F-NEXT:    andi a2, a1, 32
7138; RV64ZVE32F-NEXT:    bnez a2, .LBB63_14
7139; RV64ZVE32F-NEXT:  .LBB63_6: # %else14
7140; RV64ZVE32F-NEXT:    andi a2, a1, 64
7141; RV64ZVE32F-NEXT:    bnez a2, .LBB63_15
7142; RV64ZVE32F-NEXT:  .LBB63_7: # %else17
7143; RV64ZVE32F-NEXT:    andi a1, a1, -128
7144; RV64ZVE32F-NEXT:    bnez a1, .LBB63_16
7145; RV64ZVE32F-NEXT:  .LBB63_8: # %else20
7146; RV64ZVE32F-NEXT:    ret
7147; RV64ZVE32F-NEXT:  .LBB63_9: # %cond.load
7148; RV64ZVE32F-NEXT:    ld a2, 0(a0)
7149; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7150; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
7151; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7152; RV64ZVE32F-NEXT:    andi a2, a1, 2
7153; RV64ZVE32F-NEXT:    beqz a2, .LBB63_2
7154; RV64ZVE32F-NEXT:  .LBB63_10: # %cond.load1
7155; RV64ZVE32F-NEXT:    ld a2, 8(a0)
7156; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7157; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
7158; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7159; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
7160; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
7161; RV64ZVE32F-NEXT:    andi a2, a1, 4
7162; RV64ZVE32F-NEXT:    beqz a2, .LBB63_3
7163; RV64ZVE32F-NEXT:  .LBB63_11: # %cond.load4
7164; RV64ZVE32F-NEXT:    ld a2, 16(a0)
7165; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7166; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
7167; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7168; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
7169; RV64ZVE32F-NEXT:    andi a2, a1, 8
7170; RV64ZVE32F-NEXT:    beqz a2, .LBB63_4
7171; RV64ZVE32F-NEXT:  .LBB63_12: # %cond.load7
7172; RV64ZVE32F-NEXT:    ld a2, 24(a0)
7173; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7174; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
7175; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7176; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 3
7177; RV64ZVE32F-NEXT:    andi a2, a1, 16
7178; RV64ZVE32F-NEXT:    beqz a2, .LBB63_5
7179; RV64ZVE32F-NEXT:  .LBB63_13: # %cond.load10
7180; RV64ZVE32F-NEXT:    ld a2, 32(a0)
7181; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7182; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
7183; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7184; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 4
7185; RV64ZVE32F-NEXT:    andi a2, a1, 32
7186; RV64ZVE32F-NEXT:    beqz a2, .LBB63_6
7187; RV64ZVE32F-NEXT:  .LBB63_14: # %cond.load13
7188; RV64ZVE32F-NEXT:    ld a2, 40(a0)
7189; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7190; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
7191; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7192; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 5
7193; RV64ZVE32F-NEXT:    andi a2, a1, 64
7194; RV64ZVE32F-NEXT:    beqz a2, .LBB63_7
7195; RV64ZVE32F-NEXT:  .LBB63_15: # %cond.load16
7196; RV64ZVE32F-NEXT:    ld a2, 48(a0)
7197; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7198; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
7199; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7200; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 6
7201; RV64ZVE32F-NEXT:    andi a1, a1, -128
7202; RV64ZVE32F-NEXT:    beqz a1, .LBB63_8
7203; RV64ZVE32F-NEXT:  .LBB63_16: # %cond.load19
7204; RV64ZVE32F-NEXT:    ld a0, 56(a0)
7205; RV64ZVE32F-NEXT:    lh a0, 0(a0)
7206; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
7207; RV64ZVE32F-NEXT:    vmv.s.x v9, a0
7208; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 7
7209; RV64ZVE32F-NEXT:    ret
7210  %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
7211  ret <8 x bfloat> %v
7212}
7213
7214define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
7215; RV32-LABEL: mgather_baseidx_v8i8_v8bf16:
7216; RV32:       # %bb.0:
7217; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
7218; RV32-NEXT:    vsext.vf4 v10, v8
7219; RV32-NEXT:    vadd.vv v10, v10, v10
7220; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
7221; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
7222; RV32-NEXT:    vmv.v.v v8, v9
7223; RV32-NEXT:    ret
7224;
7225; RV64V-LABEL: mgather_baseidx_v8i8_v8bf16:
7226; RV64V:       # %bb.0:
7227; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
7228; RV64V-NEXT:    vsext.vf8 v12, v8
7229; RV64V-NEXT:    vadd.vv v12, v12, v12
7230; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
7231; RV64V-NEXT:    vluxei64.v v9, (a0), v12, v0.t
7232; RV64V-NEXT:    vmv.v.v v8, v9
7233; RV64V-NEXT:    ret
7234;
7235; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8bf16:
7236; RV64ZVE32F:       # %bb.0:
7237; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7238; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
7239; RV64ZVE32F-NEXT:    andi a2, a1, 1
7240; RV64ZVE32F-NEXT:    beqz a2, .LBB64_2
7241; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
7242; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7243; RV64ZVE32F-NEXT:    slli a2, a2, 1
7244; RV64ZVE32F-NEXT:    add a2, a0, a2
7245; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7246; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
7247; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7248; RV64ZVE32F-NEXT:  .LBB64_2: # %else
7249; RV64ZVE32F-NEXT:    andi a2, a1, 2
7250; RV64ZVE32F-NEXT:    beqz a2, .LBB64_4
7251; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
7252; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7253; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
7254; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
7255; RV64ZVE32F-NEXT:    slli a2, a2, 1
7256; RV64ZVE32F-NEXT:    add a2, a0, a2
7257; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7258; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7259; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
7260; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
7261; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
7262; RV64ZVE32F-NEXT:  .LBB64_4: # %else2
7263; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
7264; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
7265; RV64ZVE32F-NEXT:    andi a2, a1, 4
7266; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
7267; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
7268; RV64ZVE32F-NEXT:    bnez a2, .LBB64_14
7269; RV64ZVE32F-NEXT:  # %bb.5: # %else5
7270; RV64ZVE32F-NEXT:    andi a2, a1, 8
7271; RV64ZVE32F-NEXT:    bnez a2, .LBB64_15
7272; RV64ZVE32F-NEXT:  .LBB64_6: # %else8
7273; RV64ZVE32F-NEXT:    andi a2, a1, 16
7274; RV64ZVE32F-NEXT:    bnez a2, .LBB64_16
7275; RV64ZVE32F-NEXT:  .LBB64_7: # %else11
7276; RV64ZVE32F-NEXT:    andi a2, a1, 32
7277; RV64ZVE32F-NEXT:    beqz a2, .LBB64_9
7278; RV64ZVE32F-NEXT:  .LBB64_8: # %cond.load13
7279; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7280; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
7281; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7282; RV64ZVE32F-NEXT:    slli a2, a2, 1
7283; RV64ZVE32F-NEXT:    add a2, a0, a2
7284; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7285; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7286; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7287; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
7288; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 5
7289; RV64ZVE32F-NEXT:  .LBB64_9: # %else14
7290; RV64ZVE32F-NEXT:    andi a2, a1, 64
7291; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
7292; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
7293; RV64ZVE32F-NEXT:    beqz a2, .LBB64_11
7294; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
7295; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7296; RV64ZVE32F-NEXT:    slli a2, a2, 1
7297; RV64ZVE32F-NEXT:    add a2, a0, a2
7298; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7299; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7300; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
7301; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
7302; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
7303; RV64ZVE32F-NEXT:  .LBB64_11: # %else17
7304; RV64ZVE32F-NEXT:    andi a1, a1, -128
7305; RV64ZVE32F-NEXT:    beqz a1, .LBB64_13
7306; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
7307; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7308; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
7309; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
7310; RV64ZVE32F-NEXT:    slli a1, a1, 1
7311; RV64ZVE32F-NEXT:    add a0, a0, a1
7312; RV64ZVE32F-NEXT:    lh a0, 0(a0)
7313; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7314; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
7315; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
7316; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
7317; RV64ZVE32F-NEXT:  .LBB64_13: # %else20
7318; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7319; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
7320; RV64ZVE32F-NEXT:    ret
7321; RV64ZVE32F-NEXT:  .LBB64_14: # %cond.load4
7322; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7323; RV64ZVE32F-NEXT:    slli a2, a2, 1
7324; RV64ZVE32F-NEXT:    add a2, a0, a2
7325; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7326; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7327; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
7328; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
7329; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
7330; RV64ZVE32F-NEXT:    andi a2, a1, 8
7331; RV64ZVE32F-NEXT:    beqz a2, .LBB64_6
7332; RV64ZVE32F-NEXT:  .LBB64_15: # %cond.load7
7333; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7334; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
7335; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7336; RV64ZVE32F-NEXT:    slli a2, a2, 1
7337; RV64ZVE32F-NEXT:    add a2, a0, a2
7338; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7339; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7340; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7341; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
7342; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 3
7343; RV64ZVE32F-NEXT:    andi a2, a1, 16
7344; RV64ZVE32F-NEXT:    beqz a2, .LBB64_7
7345; RV64ZVE32F-NEXT:  .LBB64_16: # %cond.load10
7346; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7347; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
7348; RV64ZVE32F-NEXT:    slli a2, a2, 1
7349; RV64ZVE32F-NEXT:    add a2, a0, a2
7350; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7351; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
7352; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7353; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
7354; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 4
7355; RV64ZVE32F-NEXT:    andi a2, a1, 32
7356; RV64ZVE32F-NEXT:    bnez a2, .LBB64_8
7357; RV64ZVE32F-NEXT:    j .LBB64_9
7358  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs
7359  %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
7360  ret <8 x bfloat> %v
7361}
7362
7363define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
7364; RV32-LABEL: mgather_baseidx_sext_v8i8_v8bf16:
7365; RV32:       # %bb.0:
7366; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
7367; RV32-NEXT:    vsext.vf4 v10, v8
7368; RV32-NEXT:    vadd.vv v10, v10, v10
7369; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
7370; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
7371; RV32-NEXT:    vmv.v.v v8, v9
7372; RV32-NEXT:    ret
7373;
7374; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8bf16:
7375; RV64V:       # %bb.0:
7376; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
7377; RV64V-NEXT:    vsext.vf8 v12, v8
7378; RV64V-NEXT:    vadd.vv v12, v12, v12
7379; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
7380; RV64V-NEXT:    vluxei64.v v9, (a0), v12, v0.t
7381; RV64V-NEXT:    vmv.v.v v8, v9
7382; RV64V-NEXT:    ret
7383;
7384; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8bf16:
7385; RV64ZVE32F:       # %bb.0:
7386; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7387; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
7388; RV64ZVE32F-NEXT:    andi a2, a1, 1
7389; RV64ZVE32F-NEXT:    beqz a2, .LBB65_2
7390; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
7391; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7392; RV64ZVE32F-NEXT:    slli a2, a2, 1
7393; RV64ZVE32F-NEXT:    add a2, a0, a2
7394; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7395; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
7396; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7397; RV64ZVE32F-NEXT:  .LBB65_2: # %else
7398; RV64ZVE32F-NEXT:    andi a2, a1, 2
7399; RV64ZVE32F-NEXT:    beqz a2, .LBB65_4
7400; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
7401; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7402; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
7403; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
7404; RV64ZVE32F-NEXT:    slli a2, a2, 1
7405; RV64ZVE32F-NEXT:    add a2, a0, a2
7406; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7407; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7408; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
7409; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
7410; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
7411; RV64ZVE32F-NEXT:  .LBB65_4: # %else2
7412; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
7413; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
7414; RV64ZVE32F-NEXT:    andi a2, a1, 4
7415; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
7416; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
7417; RV64ZVE32F-NEXT:    bnez a2, .LBB65_14
7418; RV64ZVE32F-NEXT:  # %bb.5: # %else5
7419; RV64ZVE32F-NEXT:    andi a2, a1, 8
7420; RV64ZVE32F-NEXT:    bnez a2, .LBB65_15
7421; RV64ZVE32F-NEXT:  .LBB65_6: # %else8
7422; RV64ZVE32F-NEXT:    andi a2, a1, 16
7423; RV64ZVE32F-NEXT:    bnez a2, .LBB65_16
7424; RV64ZVE32F-NEXT:  .LBB65_7: # %else11
7425; RV64ZVE32F-NEXT:    andi a2, a1, 32
7426; RV64ZVE32F-NEXT:    beqz a2, .LBB65_9
7427; RV64ZVE32F-NEXT:  .LBB65_8: # %cond.load13
7428; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7429; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
7430; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7431; RV64ZVE32F-NEXT:    slli a2, a2, 1
7432; RV64ZVE32F-NEXT:    add a2, a0, a2
7433; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7434; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7435; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7436; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
7437; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 5
7438; RV64ZVE32F-NEXT:  .LBB65_9: # %else14
7439; RV64ZVE32F-NEXT:    andi a2, a1, 64
7440; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
7441; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
7442; RV64ZVE32F-NEXT:    beqz a2, .LBB65_11
7443; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
7444; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7445; RV64ZVE32F-NEXT:    slli a2, a2, 1
7446; RV64ZVE32F-NEXT:    add a2, a0, a2
7447; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7448; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7449; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
7450; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
7451; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
7452; RV64ZVE32F-NEXT:  .LBB65_11: # %else17
7453; RV64ZVE32F-NEXT:    andi a1, a1, -128
7454; RV64ZVE32F-NEXT:    beqz a1, .LBB65_13
7455; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
7456; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7457; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
7458; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
7459; RV64ZVE32F-NEXT:    slli a1, a1, 1
7460; RV64ZVE32F-NEXT:    add a0, a0, a1
7461; RV64ZVE32F-NEXT:    lh a0, 0(a0)
7462; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7463; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
7464; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
7465; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
7466; RV64ZVE32F-NEXT:  .LBB65_13: # %else20
7467; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7468; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
7469; RV64ZVE32F-NEXT:    ret
7470; RV64ZVE32F-NEXT:  .LBB65_14: # %cond.load4
7471; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7472; RV64ZVE32F-NEXT:    slli a2, a2, 1
7473; RV64ZVE32F-NEXT:    add a2, a0, a2
7474; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7475; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7476; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
7477; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
7478; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
7479; RV64ZVE32F-NEXT:    andi a2, a1, 8
7480; RV64ZVE32F-NEXT:    beqz a2, .LBB65_6
7481; RV64ZVE32F-NEXT:  .LBB65_15: # %cond.load7
7482; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7483; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
7484; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7485; RV64ZVE32F-NEXT:    slli a2, a2, 1
7486; RV64ZVE32F-NEXT:    add a2, a0, a2
7487; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7488; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7489; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7490; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
7491; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 3
7492; RV64ZVE32F-NEXT:    andi a2, a1, 16
7493; RV64ZVE32F-NEXT:    beqz a2, .LBB65_7
7494; RV64ZVE32F-NEXT:  .LBB65_16: # %cond.load10
7495; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7496; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
7497; RV64ZVE32F-NEXT:    slli a2, a2, 1
7498; RV64ZVE32F-NEXT:    add a2, a0, a2
7499; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7500; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
7501; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7502; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
7503; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 4
7504; RV64ZVE32F-NEXT:    andi a2, a1, 32
7505; RV64ZVE32F-NEXT:    bnez a2, .LBB65_8
7506; RV64ZVE32F-NEXT:    j .LBB65_9
7507  %eidxs = sext <8 x i8> %idxs to <8 x i16>
7508  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
7509  %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
7510  ret <8 x bfloat> %v
7511}
7512
7513define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
7514; RV32-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
7515; RV32:       # %bb.0:
7516; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
7517; RV32-NEXT:    vwaddu.vv v10, v8, v8
7518; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
7519; RV32-NEXT:    vluxei16.v v9, (a0), v10, v0.t
7520; RV32-NEXT:    vmv.v.v v8, v9
7521; RV32-NEXT:    ret
7522;
7523; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
7524; RV64V:       # %bb.0:
7525; RV64V-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
7526; RV64V-NEXT:    vwaddu.vv v10, v8, v8
7527; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
7528; RV64V-NEXT:    vluxei16.v v9, (a0), v10, v0.t
7529; RV64V-NEXT:    vmv.v.v v8, v9
7530; RV64V-NEXT:    ret
7531;
7532; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
7533; RV64ZVE32F:       # %bb.0:
7534; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7535; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
7536; RV64ZVE32F-NEXT:    andi a2, a1, 1
7537; RV64ZVE32F-NEXT:    beqz a2, .LBB66_2
7538; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
7539; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7540; RV64ZVE32F-NEXT:    andi a2, a2, 255
7541; RV64ZVE32F-NEXT:    slli a2, a2, 1
7542; RV64ZVE32F-NEXT:    add a2, a0, a2
7543; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7544; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
7545; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7546; RV64ZVE32F-NEXT:  .LBB66_2: # %else
7547; RV64ZVE32F-NEXT:    andi a2, a1, 2
7548; RV64ZVE32F-NEXT:    beqz a2, .LBB66_4
7549; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
7550; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7551; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
7552; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
7553; RV64ZVE32F-NEXT:    andi a2, a2, 255
7554; RV64ZVE32F-NEXT:    slli a2, a2, 1
7555; RV64ZVE32F-NEXT:    add a2, a0, a2
7556; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7557; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7558; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
7559; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
7560; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
7561; RV64ZVE32F-NEXT:  .LBB66_4: # %else2
7562; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
7563; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
7564; RV64ZVE32F-NEXT:    andi a2, a1, 4
7565; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
7566; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
7567; RV64ZVE32F-NEXT:    bnez a2, .LBB66_14
7568; RV64ZVE32F-NEXT:  # %bb.5: # %else5
7569; RV64ZVE32F-NEXT:    andi a2, a1, 8
7570; RV64ZVE32F-NEXT:    bnez a2, .LBB66_15
7571; RV64ZVE32F-NEXT:  .LBB66_6: # %else8
7572; RV64ZVE32F-NEXT:    andi a2, a1, 16
7573; RV64ZVE32F-NEXT:    bnez a2, .LBB66_16
7574; RV64ZVE32F-NEXT:  .LBB66_7: # %else11
7575; RV64ZVE32F-NEXT:    andi a2, a1, 32
7576; RV64ZVE32F-NEXT:    beqz a2, .LBB66_9
7577; RV64ZVE32F-NEXT:  .LBB66_8: # %cond.load13
7578; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7579; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
7580; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7581; RV64ZVE32F-NEXT:    andi a2, a2, 255
7582; RV64ZVE32F-NEXT:    slli a2, a2, 1
7583; RV64ZVE32F-NEXT:    add a2, a0, a2
7584; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7585; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7586; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7587; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
7588; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 5
7589; RV64ZVE32F-NEXT:  .LBB66_9: # %else14
7590; RV64ZVE32F-NEXT:    andi a2, a1, 64
7591; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
7592; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
7593; RV64ZVE32F-NEXT:    beqz a2, .LBB66_11
7594; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
7595; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7596; RV64ZVE32F-NEXT:    andi a2, a2, 255
7597; RV64ZVE32F-NEXT:    slli a2, a2, 1
7598; RV64ZVE32F-NEXT:    add a2, a0, a2
7599; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7600; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7601; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
7602; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
7603; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
7604; RV64ZVE32F-NEXT:  .LBB66_11: # %else17
7605; RV64ZVE32F-NEXT:    andi a1, a1, -128
7606; RV64ZVE32F-NEXT:    beqz a1, .LBB66_13
7607; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
7608; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7609; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
7610; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
7611; RV64ZVE32F-NEXT:    andi a1, a1, 255
7612; RV64ZVE32F-NEXT:    slli a1, a1, 1
7613; RV64ZVE32F-NEXT:    add a0, a0, a1
7614; RV64ZVE32F-NEXT:    lh a0, 0(a0)
7615; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7616; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
7617; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
7618; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
7619; RV64ZVE32F-NEXT:  .LBB66_13: # %else20
7620; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7621; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
7622; RV64ZVE32F-NEXT:    ret
7623; RV64ZVE32F-NEXT:  .LBB66_14: # %cond.load4
7624; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7625; RV64ZVE32F-NEXT:    andi a2, a2, 255
7626; RV64ZVE32F-NEXT:    slli a2, a2, 1
7627; RV64ZVE32F-NEXT:    add a2, a0, a2
7628; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7629; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7630; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
7631; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
7632; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
7633; RV64ZVE32F-NEXT:    andi a2, a1, 8
7634; RV64ZVE32F-NEXT:    beqz a2, .LBB66_6
7635; RV64ZVE32F-NEXT:  .LBB66_15: # %cond.load7
7636; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
7637; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
7638; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7639; RV64ZVE32F-NEXT:    andi a2, a2, 255
7640; RV64ZVE32F-NEXT:    slli a2, a2, 1
7641; RV64ZVE32F-NEXT:    add a2, a0, a2
7642; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7643; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
7644; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7645; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
7646; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 3
7647; RV64ZVE32F-NEXT:    andi a2, a1, 16
7648; RV64ZVE32F-NEXT:    beqz a2, .LBB66_7
7649; RV64ZVE32F-NEXT:  .LBB66_16: # %cond.load10
7650; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7651; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
7652; RV64ZVE32F-NEXT:    andi a2, a2, 255
7653; RV64ZVE32F-NEXT:    slli a2, a2, 1
7654; RV64ZVE32F-NEXT:    add a2, a0, a2
7655; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7656; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
7657; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7658; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
7659; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 4
7660; RV64ZVE32F-NEXT:    andi a2, a1, 32
7661; RV64ZVE32F-NEXT:    bnez a2, .LBB66_8
7662; RV64ZVE32F-NEXT:    j .LBB66_9
7663  %eidxs = zext <8 x i8> %idxs to <8 x i16>
7664  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
7665  %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
7666  ret <8 x bfloat> %v
7667}
7668
7669define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
7670; RV32-LABEL: mgather_baseidx_v8bf16:
7671; RV32:       # %bb.0:
7672; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
7673; RV32-NEXT:    vwadd.vv v10, v8, v8
7674; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
7675; RV32-NEXT:    vmv.v.v v8, v9
7676; RV32-NEXT:    ret
7677;
7678; RV64V-LABEL: mgather_baseidx_v8bf16:
7679; RV64V:       # %bb.0:
7680; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
7681; RV64V-NEXT:    vsext.vf4 v12, v8
7682; RV64V-NEXT:    vadd.vv v12, v12, v12
7683; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
7684; RV64V-NEXT:    vluxei64.v v9, (a0), v12, v0.t
7685; RV64V-NEXT:    vmv.v.v v8, v9
7686; RV64V-NEXT:    ret
7687;
7688; RV64ZVE32F-LABEL: mgather_baseidx_v8bf16:
7689; RV64ZVE32F:       # %bb.0:
7690; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7691; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
7692; RV64ZVE32F-NEXT:    andi a2, a1, 1
7693; RV64ZVE32F-NEXT:    beqz a2, .LBB67_2
7694; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
7695; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
7696; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7697; RV64ZVE32F-NEXT:    slli a2, a2, 1
7698; RV64ZVE32F-NEXT:    add a2, a0, a2
7699; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7700; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
7701; RV64ZVE32F-NEXT:  .LBB67_2: # %else
7702; RV64ZVE32F-NEXT:    andi a2, a1, 2
7703; RV64ZVE32F-NEXT:    beqz a2, .LBB67_4
7704; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
7705; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
7706; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
7707; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
7708; RV64ZVE32F-NEXT:    slli a2, a2, 1
7709; RV64ZVE32F-NEXT:    add a2, a0, a2
7710; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7711; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
7712; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
7713; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
7714; RV64ZVE32F-NEXT:  .LBB67_4: # %else2
7715; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
7716; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
7717; RV64ZVE32F-NEXT:    andi a2, a1, 4
7718; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
7719; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
7720; RV64ZVE32F-NEXT:    bnez a2, .LBB67_14
7721; RV64ZVE32F-NEXT:  # %bb.5: # %else5
7722; RV64ZVE32F-NEXT:    andi a2, a1, 8
7723; RV64ZVE32F-NEXT:    bnez a2, .LBB67_15
7724; RV64ZVE32F-NEXT:  .LBB67_6: # %else8
7725; RV64ZVE32F-NEXT:    andi a2, a1, 16
7726; RV64ZVE32F-NEXT:    bnez a2, .LBB67_16
7727; RV64ZVE32F-NEXT:  .LBB67_7: # %else11
7728; RV64ZVE32F-NEXT:    andi a2, a1, 32
7729; RV64ZVE32F-NEXT:    beqz a2, .LBB67_9
7730; RV64ZVE32F-NEXT:  .LBB67_8: # %cond.load13
7731; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
7732; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
7733; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7734; RV64ZVE32F-NEXT:    slli a2, a2, 1
7735; RV64ZVE32F-NEXT:    add a2, a0, a2
7736; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7737; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7738; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
7739; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 5
7740; RV64ZVE32F-NEXT:  .LBB67_9: # %else14
7741; RV64ZVE32F-NEXT:    andi a2, a1, 64
7742; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
7743; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
7744; RV64ZVE32F-NEXT:    beqz a2, .LBB67_11
7745; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
7746; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7747; RV64ZVE32F-NEXT:    slli a2, a2, 1
7748; RV64ZVE32F-NEXT:    add a2, a0, a2
7749; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7750; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
7751; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
7752; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
7753; RV64ZVE32F-NEXT:  .LBB67_11: # %else17
7754; RV64ZVE32F-NEXT:    andi a1, a1, -128
7755; RV64ZVE32F-NEXT:    beqz a1, .LBB67_13
7756; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
7757; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
7758; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
7759; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
7760; RV64ZVE32F-NEXT:    slli a1, a1, 1
7761; RV64ZVE32F-NEXT:    add a0, a0, a1
7762; RV64ZVE32F-NEXT:    lh a0, 0(a0)
7763; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
7764; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
7765; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
7766; RV64ZVE32F-NEXT:  .LBB67_13: # %else20
7767; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7768; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
7769; RV64ZVE32F-NEXT:    ret
7770; RV64ZVE32F-NEXT:  .LBB67_14: # %cond.load4
7771; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7772; RV64ZVE32F-NEXT:    slli a2, a2, 1
7773; RV64ZVE32F-NEXT:    add a2, a0, a2
7774; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7775; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
7776; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
7777; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
7778; RV64ZVE32F-NEXT:    andi a2, a1, 8
7779; RV64ZVE32F-NEXT:    beqz a2, .LBB67_6
7780; RV64ZVE32F-NEXT:  .LBB67_15: # %cond.load7
7781; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
7782; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
7783; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
7784; RV64ZVE32F-NEXT:    slli a2, a2, 1
7785; RV64ZVE32F-NEXT:    add a2, a0, a2
7786; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7787; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7788; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
7789; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 3
7790; RV64ZVE32F-NEXT:    andi a2, a1, 16
7791; RV64ZVE32F-NEXT:    beqz a2, .LBB67_7
7792; RV64ZVE32F-NEXT:  .LBB67_16: # %cond.load10
7793; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
7794; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
7795; RV64ZVE32F-NEXT:    slli a2, a2, 1
7796; RV64ZVE32F-NEXT:    add a2, a0, a2
7797; RV64ZVE32F-NEXT:    lh a2, 0(a2)
7798; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
7799; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 4
7800; RV64ZVE32F-NEXT:    andi a2, a1, 32
7801; RV64ZVE32F-NEXT:    bnez a2, .LBB67_8
7802; RV64ZVE32F-NEXT:    j .LBB67_9
7803  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs
7804  %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
7805  ret <8 x bfloat> %v
7806}
7807
7808declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>)
7809
7810define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) {
7811; RV32V-LABEL: mgather_v1f16:
7812; RV32V:       # %bb.0:
7813; RV32V-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
7814; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
7815; RV32V-NEXT:    vmv1r.v v8, v9
7816; RV32V-NEXT:    ret
7817;
7818; RV64V-LABEL: mgather_v1f16:
7819; RV64V:       # %bb.0:
7820; RV64V-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
7821; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
7822; RV64V-NEXT:    vmv1r.v v8, v9
7823; RV64V-NEXT:    ret
7824;
7825; RV32ZVE32F-LABEL: mgather_v1f16:
7826; RV32ZVE32F:       # %bb.0:
7827; RV32ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, mu
7828; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
7829; RV32ZVE32F-NEXT:    vmv1r.v v8, v9
7830; RV32ZVE32F-NEXT:    ret
7831;
7832; RV64ZVE32F-LABEL: mgather_v1f16:
7833; RV64ZVE32F:       # %bb.0:
7834; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
7835; RV64ZVE32F-NEXT:    vfirst.m a1, v0
7836; RV64ZVE32F-NEXT:    bnez a1, .LBB68_2
7837; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
7838; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
7839; RV64ZVE32F-NEXT:    vle16.v v8, (a0)
7840; RV64ZVE32F-NEXT:  .LBB68_2: # %else
7841; RV64ZVE32F-NEXT:    ret
7842  %v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
7843  ret <1 x half> %v
7844}
7845
7846declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>)
7847
7848define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) {
7849; RV32V-LABEL: mgather_v2f16:
7850; RV32V:       # %bb.0:
7851; RV32V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
7852; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
7853; RV32V-NEXT:    vmv1r.v v8, v9
7854; RV32V-NEXT:    ret
7855;
7856; RV64V-LABEL: mgather_v2f16:
7857; RV64V:       # %bb.0:
7858; RV64V-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
7859; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
7860; RV64V-NEXT:    vmv1r.v v8, v9
7861; RV64V-NEXT:    ret
7862;
7863; RV32ZVE32F-LABEL: mgather_v2f16:
7864; RV32ZVE32F:       # %bb.0:
7865; RV32ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, mu
7866; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
7867; RV32ZVE32F-NEXT:    vmv1r.v v8, v9
7868; RV32ZVE32F-NEXT:    ret
7869;
7870; RV64ZVE32F-ZVFH-LABEL: mgather_v2f16:
7871; RV64ZVE32F-ZVFH:       # %bb.0:
7872; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7873; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v0
7874; RV64ZVE32F-ZVFH-NEXT:    andi a3, a2, 1
7875; RV64ZVE32F-ZVFH-NEXT:    bnez a3, .LBB69_3
7876; RV64ZVE32F-ZVFH-NEXT:  # %bb.1: # %else
7877; RV64ZVE32F-ZVFH-NEXT:    andi a2, a2, 2
7878; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB69_4
7879; RV64ZVE32F-ZVFH-NEXT:  .LBB69_2: # %else2
7880; RV64ZVE32F-ZVFH-NEXT:    ret
7881; RV64ZVE32F-ZVFH-NEXT:  .LBB69_3: # %cond.load
7882; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a0)
7883; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
7884; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
7885; RV64ZVE32F-ZVFH-NEXT:    andi a2, a2, 2
7886; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB69_2
7887; RV64ZVE32F-ZVFH-NEXT:  .LBB69_4: # %cond.load1
7888; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a1)
7889; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
7890; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
7891; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
7892; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v8, v9, 1
7893; RV64ZVE32F-ZVFH-NEXT:    ret
7894;
7895; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v2f16:
7896; RV64ZVE32F-ZVFHMIN:       # %bb.0:
7897; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7898; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v0
7899; RV64ZVE32F-ZVFHMIN-NEXT:    andi a3, a2, 1
7900; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a3, .LBB69_3
7901; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.1: # %else
7902; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a2, 2
7903; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB69_4
7904; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB69_2: # %else2
7905; RV64ZVE32F-ZVFHMIN-NEXT:    ret
7906; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB69_3: # %cond.load
7907; RV64ZVE32F-ZVFHMIN-NEXT:    lh a0, 0(a0)
7908; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
7909; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a0
7910; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a2, 2
7911; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB69_2
7912; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB69_4: # %cond.load1
7913; RV64ZVE32F-ZVFHMIN-NEXT:    lh a0, 0(a1)
7914; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
7915; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a0
7916; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
7917; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v8, v9, 1
7918; RV64ZVE32F-ZVFHMIN-NEXT:    ret
7919  %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru)
7920  ret <2 x half> %v
7921}
7922
7923declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
7924
7925define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) {
7926; RV32-LABEL: mgather_v4f16:
7927; RV32:       # %bb.0:
7928; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
7929; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
7930; RV32-NEXT:    vmv1r.v v8, v9
7931; RV32-NEXT:    ret
7932;
7933; RV64V-LABEL: mgather_v4f16:
7934; RV64V:       # %bb.0:
7935; RV64V-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
7936; RV64V-NEXT:    vluxei64.v v10, (zero), v8, v0.t
7937; RV64V-NEXT:    vmv1r.v v8, v10
7938; RV64V-NEXT:    ret
7939;
7940; RV64ZVE32F-ZVFH-LABEL: mgather_v4f16:
7941; RV64ZVE32F-ZVFH:       # %bb.0:
7942; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7943; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a1, v0
7944; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 1
7945; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB70_5
7946; RV64ZVE32F-ZVFH-NEXT:  # %bb.1: # %else
7947; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 2
7948; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB70_6
7949; RV64ZVE32F-ZVFH-NEXT:  .LBB70_2: # %else2
7950; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 4
7951; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB70_7
7952; RV64ZVE32F-ZVFH-NEXT:  .LBB70_3: # %else5
7953; RV64ZVE32F-ZVFH-NEXT:    andi a1, a1, 8
7954; RV64ZVE32F-ZVFH-NEXT:    bnez a1, .LBB70_8
7955; RV64ZVE32F-ZVFH-NEXT:  .LBB70_4: # %else8
7956; RV64ZVE32F-ZVFH-NEXT:    ret
7957; RV64ZVE32F-ZVFH-NEXT:  .LBB70_5: # %cond.load
7958; RV64ZVE32F-ZVFH-NEXT:    ld a2, 0(a0)
7959; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
7960; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
7961; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
7962; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 2
7963; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB70_2
7964; RV64ZVE32F-ZVFH-NEXT:  .LBB70_6: # %cond.load1
7965; RV64ZVE32F-ZVFH-NEXT:    ld a2, 8(a0)
7966; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
7967; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
7968; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
7969; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
7970; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v8, v9, 1
7971; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 4
7972; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB70_3
7973; RV64ZVE32F-ZVFH-NEXT:  .LBB70_7: # %cond.load4
7974; RV64ZVE32F-ZVFH-NEXT:    ld a2, 16(a0)
7975; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
7976; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
7977; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
7978; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v8, v9, 2
7979; RV64ZVE32F-ZVFH-NEXT:    andi a1, a1, 8
7980; RV64ZVE32F-ZVFH-NEXT:    beqz a1, .LBB70_4
7981; RV64ZVE32F-ZVFH-NEXT:  .LBB70_8: # %cond.load7
7982; RV64ZVE32F-ZVFH-NEXT:    ld a0, 24(a0)
7983; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a0)
7984; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
7985; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
7986; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v8, v9, 3
7987; RV64ZVE32F-ZVFH-NEXT:    ret
7988;
7989; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v4f16:
7990; RV64ZVE32F-ZVFHMIN:       # %bb.0:
7991; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
7992; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a1, v0
7993; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 1
7994; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB70_5
7995; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.1: # %else
7996; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 2
7997; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB70_6
7998; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB70_2: # %else2
7999; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 4
8000; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB70_7
8001; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB70_3: # %else5
8002; RV64ZVE32F-ZVFHMIN-NEXT:    andi a1, a1, 8
8003; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a1, .LBB70_8
8004; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB70_4: # %else8
8005; RV64ZVE32F-ZVFHMIN-NEXT:    ret
8006; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB70_5: # %cond.load
8007; RV64ZVE32F-ZVFHMIN-NEXT:    ld a2, 0(a0)
8008; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8009; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
8010; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
8011; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 2
8012; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB70_2
8013; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB70_6: # %cond.load1
8014; RV64ZVE32F-ZVFHMIN-NEXT:    ld a2, 8(a0)
8015; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8016; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
8017; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
8018; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
8019; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v8, v9, 1
8020; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 4
8021; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB70_3
8022; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB70_7: # %cond.load4
8023; RV64ZVE32F-ZVFHMIN-NEXT:    ld a2, 16(a0)
8024; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8025; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
8026; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
8027; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v8, v9, 2
8028; RV64ZVE32F-ZVFHMIN-NEXT:    andi a1, a1, 8
8029; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a1, .LBB70_4
8030; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB70_8: # %cond.load7
8031; RV64ZVE32F-ZVFHMIN-NEXT:    ld a0, 24(a0)
8032; RV64ZVE32F-ZVFHMIN-NEXT:    lh a0, 0(a0)
8033; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
8034; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a0
8035; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v8, v9, 3
8036; RV64ZVE32F-ZVFHMIN-NEXT:    ret
8037  %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x half> %passthru)
8038  ret <4 x half> %v
8039}
8040
8041define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
8042; RV32-LABEL: mgather_truemask_v4f16:
8043; RV32:       # %bb.0:
8044; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
8045; RV32-NEXT:    vluxei32.v v9, (zero), v8
8046; RV32-NEXT:    vmv1r.v v8, v9
8047; RV32-NEXT:    ret
8048;
8049; RV64V-LABEL: mgather_truemask_v4f16:
8050; RV64V:       # %bb.0:
8051; RV64V-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
8052; RV64V-NEXT:    vluxei64.v v10, (zero), v8
8053; RV64V-NEXT:    vmv1r.v v8, v10
8054; RV64V-NEXT:    ret
8055;
8056; RV64ZVE32F-ZVFH-LABEL: mgather_truemask_v4f16:
8057; RV64ZVE32F-ZVFH:       # %bb.0:
8058; RV64ZVE32F-ZVFH-NEXT:    ld a1, 0(a0)
8059; RV64ZVE32F-ZVFH-NEXT:    ld a2, 8(a0)
8060; RV64ZVE32F-ZVFH-NEXT:    ld a3, 16(a0)
8061; RV64ZVE32F-ZVFH-NEXT:    ld a0, 24(a0)
8062; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a1)
8063; RV64ZVE32F-ZVFH-NEXT:    flh fa4, 0(a2)
8064; RV64ZVE32F-ZVFH-NEXT:    flh fa3, 0(a3)
8065; RV64ZVE32F-ZVFH-NEXT:    flh fa2, 0(a0)
8066; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
8067; RV64ZVE32F-ZVFH-NEXT:    vfmv.v.f v8, fa5
8068; RV64ZVE32F-ZVFH-NEXT:    vfslide1down.vf v8, v8, fa4
8069; RV64ZVE32F-ZVFH-NEXT:    vfslide1down.vf v8, v8, fa3
8070; RV64ZVE32F-ZVFH-NEXT:    vfslide1down.vf v8, v8, fa2
8071; RV64ZVE32F-ZVFH-NEXT:    ret
8072;
8073; RV64ZVE32F-ZVFHMIN-LABEL: mgather_truemask_v4f16:
8074; RV64ZVE32F-ZVFHMIN:       # %bb.0:
8075; RV64ZVE32F-ZVFHMIN-NEXT:    ld a1, 0(a0)
8076; RV64ZVE32F-ZVFHMIN-NEXT:    ld a2, 8(a0)
8077; RV64ZVE32F-ZVFHMIN-NEXT:    ld a3, 16(a0)
8078; RV64ZVE32F-ZVFHMIN-NEXT:    ld a0, 24(a0)
8079; RV64ZVE32F-ZVFHMIN-NEXT:    lh a1, 0(a1)
8080; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8081; RV64ZVE32F-ZVFHMIN-NEXT:    lh a3, 0(a3)
8082; RV64ZVE32F-ZVFHMIN-NEXT:    lh a0, 0(a0)
8083; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
8084; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.v.x v8, a1
8085; RV64ZVE32F-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
8086; RV64ZVE32F-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a3
8087; RV64ZVE32F-ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
8088; RV64ZVE32F-ZVFHMIN-NEXT:    ret
8089  %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru)
8090  ret <4 x half> %v
8091}
8092
8093define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
8094; RV32-LABEL: mgather_falsemask_v4f16:
8095; RV32:       # %bb.0:
8096; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8097; RV32-NEXT:    vmv1r.v v8, v9
8098; RV32-NEXT:    ret
8099;
8100; RV64V-LABEL: mgather_falsemask_v4f16:
8101; RV64V:       # %bb.0:
8102; RV64V-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8103; RV64V-NEXT:    vmv1r.v v8, v10
8104; RV64V-NEXT:    ret
8105;
8106; RV64ZVE32F-LABEL: mgather_falsemask_v4f16:
8107; RV64ZVE32F:       # %bb.0:
8108; RV64ZVE32F-NEXT:    ret
8109  %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x half> %passthru)
8110  ret <4 x half> %v
8111}
8112
8113declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
8114
8115define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) {
8116; RV32-LABEL: mgather_v8f16:
8117; RV32:       # %bb.0:
8118; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
8119; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
8120; RV32-NEXT:    vmv.v.v v8, v10
8121; RV32-NEXT:    ret
8122;
8123; RV64V-LABEL: mgather_v8f16:
8124; RV64V:       # %bb.0:
8125; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
8126; RV64V-NEXT:    vluxei64.v v12, (zero), v8, v0.t
8127; RV64V-NEXT:    vmv.v.v v8, v12
8128; RV64V-NEXT:    ret
8129;
8130; RV64ZVE32F-ZVFH-LABEL: mgather_v8f16:
8131; RV64ZVE32F-ZVFH:       # %bb.0:
8132; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8133; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a1, v0
8134; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 1
8135; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB73_9
8136; RV64ZVE32F-ZVFH-NEXT:  # %bb.1: # %else
8137; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 2
8138; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB73_10
8139; RV64ZVE32F-ZVFH-NEXT:  .LBB73_2: # %else2
8140; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 4
8141; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB73_11
8142; RV64ZVE32F-ZVFH-NEXT:  .LBB73_3: # %else5
8143; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 8
8144; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB73_12
8145; RV64ZVE32F-ZVFH-NEXT:  .LBB73_4: # %else8
8146; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 16
8147; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB73_13
8148; RV64ZVE32F-ZVFH-NEXT:  .LBB73_5: # %else11
8149; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 32
8150; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB73_14
8151; RV64ZVE32F-ZVFH-NEXT:  .LBB73_6: # %else14
8152; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 64
8153; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB73_15
8154; RV64ZVE32F-ZVFH-NEXT:  .LBB73_7: # %else17
8155; RV64ZVE32F-ZVFH-NEXT:    andi a1, a1, -128
8156; RV64ZVE32F-ZVFH-NEXT:    bnez a1, .LBB73_16
8157; RV64ZVE32F-ZVFH-NEXT:  .LBB73_8: # %else20
8158; RV64ZVE32F-ZVFH-NEXT:    ret
8159; RV64ZVE32F-ZVFH-NEXT:  .LBB73_9: # %cond.load
8160; RV64ZVE32F-ZVFH-NEXT:    ld a2, 0(a0)
8161; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8162; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
8163; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8164; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 2
8165; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB73_2
8166; RV64ZVE32F-ZVFH-NEXT:  .LBB73_10: # %cond.load1
8167; RV64ZVE32F-ZVFH-NEXT:    ld a2, 8(a0)
8168; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8169; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
8170; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
8171; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
8172; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v8, v9, 1
8173; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 4
8174; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB73_3
8175; RV64ZVE32F-ZVFH-NEXT:  .LBB73_11: # %cond.load4
8176; RV64ZVE32F-ZVFH-NEXT:    ld a2, 16(a0)
8177; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8178; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
8179; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
8180; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v8, v9, 2
8181; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 8
8182; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB73_4
8183; RV64ZVE32F-ZVFH-NEXT:  .LBB73_12: # %cond.load7
8184; RV64ZVE32F-ZVFH-NEXT:    ld a2, 24(a0)
8185; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8186; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
8187; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
8188; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v8, v9, 3
8189; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 16
8190; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB73_5
8191; RV64ZVE32F-ZVFH-NEXT:  .LBB73_13: # %cond.load10
8192; RV64ZVE32F-ZVFH-NEXT:    ld a2, 32(a0)
8193; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8194; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
8195; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
8196; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v8, v9, 4
8197; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 32
8198; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB73_6
8199; RV64ZVE32F-ZVFH-NEXT:  .LBB73_14: # %cond.load13
8200; RV64ZVE32F-ZVFH-NEXT:    ld a2, 40(a0)
8201; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8202; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
8203; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
8204; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v8, v9, 5
8205; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 64
8206; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB73_7
8207; RV64ZVE32F-ZVFH-NEXT:  .LBB73_15: # %cond.load16
8208; RV64ZVE32F-ZVFH-NEXT:    ld a2, 48(a0)
8209; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8210; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
8211; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
8212; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v8, v9, 6
8213; RV64ZVE32F-ZVFH-NEXT:    andi a1, a1, -128
8214; RV64ZVE32F-ZVFH-NEXT:    beqz a1, .LBB73_8
8215; RV64ZVE32F-ZVFH-NEXT:  .LBB73_16: # %cond.load19
8216; RV64ZVE32F-ZVFH-NEXT:    ld a0, 56(a0)
8217; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a0)
8218; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
8219; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
8220; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v8, v9, 7
8221; RV64ZVE32F-ZVFH-NEXT:    ret
8222;
8223; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v8f16:
8224; RV64ZVE32F-ZVFHMIN:       # %bb.0:
8225; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8226; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a1, v0
8227; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 1
8228; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB73_9
8229; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.1: # %else
8230; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 2
8231; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB73_10
8232; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_2: # %else2
8233; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 4
8234; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB73_11
8235; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_3: # %else5
8236; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 8
8237; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB73_12
8238; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_4: # %else8
8239; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 16
8240; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB73_13
8241; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_5: # %else11
8242; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 32
8243; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB73_14
8244; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_6: # %else14
8245; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 64
8246; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB73_15
8247; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_7: # %else17
8248; RV64ZVE32F-ZVFHMIN-NEXT:    andi a1, a1, -128
8249; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a1, .LBB73_16
8250; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_8: # %else20
8251; RV64ZVE32F-ZVFHMIN-NEXT:    ret
8252; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_9: # %cond.load
8253; RV64ZVE32F-ZVFHMIN-NEXT:    ld a2, 0(a0)
8254; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8255; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
8256; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
8257; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 2
8258; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB73_2
8259; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_10: # %cond.load1
8260; RV64ZVE32F-ZVFHMIN-NEXT:    ld a2, 8(a0)
8261; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8262; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
8263; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
8264; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
8265; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v8, v9, 1
8266; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 4
8267; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB73_3
8268; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_11: # %cond.load4
8269; RV64ZVE32F-ZVFHMIN-NEXT:    ld a2, 16(a0)
8270; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8271; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
8272; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
8273; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v8, v9, 2
8274; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 8
8275; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB73_4
8276; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_12: # %cond.load7
8277; RV64ZVE32F-ZVFHMIN-NEXT:    ld a2, 24(a0)
8278; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8279; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
8280; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
8281; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v8, v9, 3
8282; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 16
8283; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB73_5
8284; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_13: # %cond.load10
8285; RV64ZVE32F-ZVFHMIN-NEXT:    ld a2, 32(a0)
8286; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8287; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
8288; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
8289; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v8, v9, 4
8290; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 32
8291; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB73_6
8292; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_14: # %cond.load13
8293; RV64ZVE32F-ZVFHMIN-NEXT:    ld a2, 40(a0)
8294; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8295; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
8296; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
8297; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v8, v9, 5
8298; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 64
8299; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB73_7
8300; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_15: # %cond.load16
8301; RV64ZVE32F-ZVFHMIN-NEXT:    ld a2, 48(a0)
8302; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8303; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
8304; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
8305; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v8, v9, 6
8306; RV64ZVE32F-ZVFHMIN-NEXT:    andi a1, a1, -128
8307; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a1, .LBB73_8
8308; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB73_16: # %cond.load19
8309; RV64ZVE32F-ZVFHMIN-NEXT:    ld a0, 56(a0)
8310; RV64ZVE32F-ZVFHMIN-NEXT:    lh a0, 0(a0)
8311; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
8312; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a0
8313; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v8, v9, 7
8314; RV64ZVE32F-ZVFHMIN-NEXT:    ret
8315  %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
8316  ret <8 x half> %v
8317}
8318
8319define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
8320; RV32-LABEL: mgather_baseidx_v8i8_v8f16:
8321; RV32:       # %bb.0:
8322; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
8323; RV32-NEXT:    vsext.vf4 v10, v8
8324; RV32-NEXT:    vadd.vv v10, v10, v10
8325; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
8326; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
8327; RV32-NEXT:    vmv.v.v v8, v9
8328; RV32-NEXT:    ret
8329;
8330; RV64V-LABEL: mgather_baseidx_v8i8_v8f16:
8331; RV64V:       # %bb.0:
8332; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
8333; RV64V-NEXT:    vsext.vf8 v12, v8
8334; RV64V-NEXT:    vadd.vv v12, v12, v12
8335; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
8336; RV64V-NEXT:    vluxei64.v v9, (a0), v12, v0.t
8337; RV64V-NEXT:    vmv.v.v v8, v9
8338; RV64V-NEXT:    ret
8339;
8340; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16:
8341; RV64ZVE32F-ZVFH:       # %bb.0:
8342; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8343; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a1, v0
8344; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 1
8345; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB74_2
8346; RV64ZVE32F-ZVFH-NEXT:  # %bb.1: # %cond.load
8347; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8348; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8349; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8350; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8351; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
8352; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
8353; RV64ZVE32F-ZVFH-NEXT:  .LBB74_2: # %else
8354; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 2
8355; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB74_4
8356; RV64ZVE32F-ZVFH-NEXT:  # %bb.3: # %cond.load1
8357; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8358; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v10, v8, 1
8359; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v10
8360; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8361; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8362; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8363; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8364; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v10, fa5
8365; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
8366; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v10, 1
8367; RV64ZVE32F-ZVFH-NEXT:  .LBB74_4: # %else2
8368; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
8369; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v10, v8, 4
8370; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 4
8371; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
8372; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 2
8373; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB74_14
8374; RV64ZVE32F-ZVFH-NEXT:  # %bb.5: # %else5
8375; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 8
8376; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB74_15
8377; RV64ZVE32F-ZVFH-NEXT:  .LBB74_6: # %else8
8378; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 16
8379; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB74_16
8380; RV64ZVE32F-ZVFH-NEXT:  .LBB74_7: # %else11
8381; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 32
8382; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB74_9
8383; RV64ZVE32F-ZVFH-NEXT:  .LBB74_8: # %cond.load13
8384; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8385; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v10, 1
8386; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8387; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8388; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8389; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8390; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8391; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8392; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
8393; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 5
8394; RV64ZVE32F-ZVFH-NEXT:  .LBB74_9: # %else14
8395; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 64
8396; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
8397; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v10, 2
8398; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB74_11
8399; RV64ZVE32F-ZVFH-NEXT:  # %bb.10: # %cond.load16
8400; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8401; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8402; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8403; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8404; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8405; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v10, fa5
8406; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
8407; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v10, 6
8408; RV64ZVE32F-ZVFH-NEXT:  .LBB74_11: # %else17
8409; RV64ZVE32F-ZVFH-NEXT:    andi a1, a1, -128
8410; RV64ZVE32F-ZVFH-NEXT:    beqz a1, .LBB74_13
8411; RV64ZVE32F-ZVFH-NEXT:  # %bb.12: # %cond.load19
8412; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8413; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 1
8414; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a1, v8
8415; RV64ZVE32F-ZVFH-NEXT:    slli a1, a1, 1
8416; RV64ZVE32F-ZVFH-NEXT:    add a0, a0, a1
8417; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a0)
8418; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8419; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8420; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
8421; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 7
8422; RV64ZVE32F-ZVFH-NEXT:  .LBB74_13: # %else20
8423; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8424; RV64ZVE32F-ZVFH-NEXT:    vmv1r.v v8, v9
8425; RV64ZVE32F-ZVFH-NEXT:    ret
8426; RV64ZVE32F-ZVFH-NEXT:  .LBB74_14: # %cond.load4
8427; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8428; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8429; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8430; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8431; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8432; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v11, fa5
8433; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
8434; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v11, 2
8435; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 8
8436; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB74_6
8437; RV64ZVE32F-ZVFH-NEXT:  .LBB74_15: # %cond.load7
8438; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8439; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 1
8440; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8441; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8442; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8443; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8444; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8445; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8446; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
8447; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 3
8448; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 16
8449; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB74_7
8450; RV64ZVE32F-ZVFH-NEXT:  .LBB74_16: # %cond.load10
8451; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8452; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v10
8453; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8454; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8455; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8456; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
8457; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8458; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
8459; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 4
8460; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 32
8461; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB74_8
8462; RV64ZVE32F-ZVFH-NEXT:    j .LBB74_9
8463;
8464; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
8465; RV64ZVE32F-ZVFHMIN:       # %bb.0:
8466; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8467; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a1, v0
8468; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 1
8469; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB74_2
8470; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.1: # %cond.load
8471; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
8472; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8473; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8474; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8475; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
8476; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
8477; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB74_2: # %else
8478; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 2
8479; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB74_4
8480; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.3: # %cond.load1
8481; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8482; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 1
8483; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v10
8484; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8485; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8486; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8487; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8488; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v10, a2
8489; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
8490; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v10, 1
8491; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB74_4: # %else2
8492; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
8493; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 4
8494; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 4
8495; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
8496; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
8497; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB74_14
8498; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.5: # %else5
8499; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 8
8500; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB74_15
8501; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB74_6: # %else8
8502; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 16
8503; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB74_16
8504; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB74_7: # %else11
8505; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 32
8506; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB74_9
8507; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB74_8: # %cond.load13
8508; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8509; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v10, 1
8510; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
8511; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8512; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8513; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8514; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8515; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
8516; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
8517; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 5
8518; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB74_9: # %else14
8519; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 64
8520; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
8521; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v10, 2
8522; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB74_11
8523; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.10: # %cond.load16
8524; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
8525; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8526; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8527; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8528; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8529; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v10, a2
8530; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
8531; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v10, 6
8532; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB74_11: # %else17
8533; RV64ZVE32F-ZVFHMIN-NEXT:    andi a1, a1, -128
8534; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a1, .LBB74_13
8535; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.12: # %cond.load19
8536; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8537; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
8538; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a1, v8
8539; RV64ZVE32F-ZVFHMIN-NEXT:    slli a1, a1, 1
8540; RV64ZVE32F-ZVFHMIN-NEXT:    add a0, a0, a1
8541; RV64ZVE32F-ZVFHMIN-NEXT:    lh a0, 0(a0)
8542; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8543; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a0
8544; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
8545; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 7
8546; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB74_13: # %else20
8547; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8548; RV64ZVE32F-ZVFHMIN-NEXT:    vmv1r.v v8, v9
8549; RV64ZVE32F-ZVFHMIN-NEXT:    ret
8550; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB74_14: # %cond.load4
8551; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
8552; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8553; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8554; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8555; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8556; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v11, a2
8557; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
8558; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v11, 2
8559; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 8
8560; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB74_6
8561; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB74_15: # %cond.load7
8562; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8563; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
8564; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
8565; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8566; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8567; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8568; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8569; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
8570; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
8571; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 3
8572; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 16
8573; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB74_7
8574; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB74_16: # %cond.load10
8575; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8576; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v10
8577; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8578; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8579; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8580; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
8581; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
8582; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
8583; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 4
8584; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 32
8585; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB74_8
8586; RV64ZVE32F-ZVFHMIN-NEXT:    j .LBB74_9
8587  %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
8588  %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
8589  ret <8 x half> %v
8590}
8591
8592define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
8593; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f16:
8594; RV32:       # %bb.0:
8595; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
8596; RV32-NEXT:    vsext.vf4 v10, v8
8597; RV32-NEXT:    vadd.vv v10, v10, v10
8598; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
8599; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
8600; RV32-NEXT:    vmv.v.v v8, v9
8601; RV32-NEXT:    ret
8602;
8603; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f16:
8604; RV64V:       # %bb.0:
8605; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
8606; RV64V-NEXT:    vsext.vf8 v12, v8
8607; RV64V-NEXT:    vadd.vv v12, v12, v12
8608; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
8609; RV64V-NEXT:    vluxei64.v v9, (a0), v12, v0.t
8610; RV64V-NEXT:    vmv.v.v v8, v9
8611; RV64V-NEXT:    ret
8612;
8613; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16:
8614; RV64ZVE32F-ZVFH:       # %bb.0:
8615; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8616; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a1, v0
8617; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 1
8618; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB75_2
8619; RV64ZVE32F-ZVFH-NEXT:  # %bb.1: # %cond.load
8620; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8621; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8622; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8623; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8624; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
8625; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
8626; RV64ZVE32F-ZVFH-NEXT:  .LBB75_2: # %else
8627; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 2
8628; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB75_4
8629; RV64ZVE32F-ZVFH-NEXT:  # %bb.3: # %cond.load1
8630; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8631; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v10, v8, 1
8632; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v10
8633; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8634; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8635; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8636; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8637; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v10, fa5
8638; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
8639; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v10, 1
8640; RV64ZVE32F-ZVFH-NEXT:  .LBB75_4: # %else2
8641; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
8642; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v10, v8, 4
8643; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 4
8644; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
8645; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 2
8646; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB75_14
8647; RV64ZVE32F-ZVFH-NEXT:  # %bb.5: # %else5
8648; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 8
8649; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB75_15
8650; RV64ZVE32F-ZVFH-NEXT:  .LBB75_6: # %else8
8651; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 16
8652; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB75_16
8653; RV64ZVE32F-ZVFH-NEXT:  .LBB75_7: # %else11
8654; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 32
8655; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB75_9
8656; RV64ZVE32F-ZVFH-NEXT:  .LBB75_8: # %cond.load13
8657; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8658; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v10, 1
8659; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8660; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8661; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8662; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8663; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8664; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8665; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
8666; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 5
8667; RV64ZVE32F-ZVFH-NEXT:  .LBB75_9: # %else14
8668; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 64
8669; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
8670; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v10, 2
8671; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB75_11
8672; RV64ZVE32F-ZVFH-NEXT:  # %bb.10: # %cond.load16
8673; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8674; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8675; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8676; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8677; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8678; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v10, fa5
8679; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
8680; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v10, 6
8681; RV64ZVE32F-ZVFH-NEXT:  .LBB75_11: # %else17
8682; RV64ZVE32F-ZVFH-NEXT:    andi a1, a1, -128
8683; RV64ZVE32F-ZVFH-NEXT:    beqz a1, .LBB75_13
8684; RV64ZVE32F-ZVFH-NEXT:  # %bb.12: # %cond.load19
8685; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8686; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 1
8687; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a1, v8
8688; RV64ZVE32F-ZVFH-NEXT:    slli a1, a1, 1
8689; RV64ZVE32F-ZVFH-NEXT:    add a0, a0, a1
8690; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a0)
8691; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8692; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8693; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
8694; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 7
8695; RV64ZVE32F-ZVFH-NEXT:  .LBB75_13: # %else20
8696; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8697; RV64ZVE32F-ZVFH-NEXT:    vmv1r.v v8, v9
8698; RV64ZVE32F-ZVFH-NEXT:    ret
8699; RV64ZVE32F-ZVFH-NEXT:  .LBB75_14: # %cond.load4
8700; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8701; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8702; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8703; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8704; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8705; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v11, fa5
8706; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
8707; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v11, 2
8708; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 8
8709; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB75_6
8710; RV64ZVE32F-ZVFH-NEXT:  .LBB75_15: # %cond.load7
8711; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8712; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 1
8713; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8714; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8715; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8716; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8717; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8718; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8719; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
8720; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 3
8721; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 16
8722; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB75_7
8723; RV64ZVE32F-ZVFH-NEXT:  .LBB75_16: # %cond.load10
8724; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8725; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v10
8726; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8727; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8728; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8729; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
8730; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8731; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
8732; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 4
8733; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 32
8734; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB75_8
8735; RV64ZVE32F-ZVFH-NEXT:    j .LBB75_9
8736;
8737; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
8738; RV64ZVE32F-ZVFHMIN:       # %bb.0:
8739; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8740; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a1, v0
8741; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 1
8742; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB75_2
8743; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.1: # %cond.load
8744; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
8745; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8746; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8747; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8748; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
8749; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
8750; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB75_2: # %else
8751; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 2
8752; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB75_4
8753; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.3: # %cond.load1
8754; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8755; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 1
8756; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v10
8757; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8758; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8759; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8760; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8761; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v10, a2
8762; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
8763; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v10, 1
8764; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB75_4: # %else2
8765; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
8766; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 4
8767; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 4
8768; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
8769; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
8770; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB75_14
8771; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.5: # %else5
8772; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 8
8773; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB75_15
8774; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB75_6: # %else8
8775; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 16
8776; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB75_16
8777; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB75_7: # %else11
8778; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 32
8779; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB75_9
8780; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB75_8: # %cond.load13
8781; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8782; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v10, 1
8783; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
8784; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8785; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8786; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8787; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8788; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
8789; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
8790; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 5
8791; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB75_9: # %else14
8792; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 64
8793; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
8794; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v10, 2
8795; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB75_11
8796; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.10: # %cond.load16
8797; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
8798; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8799; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8800; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8801; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8802; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v10, a2
8803; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
8804; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v10, 6
8805; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB75_11: # %else17
8806; RV64ZVE32F-ZVFHMIN-NEXT:    andi a1, a1, -128
8807; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a1, .LBB75_13
8808; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.12: # %cond.load19
8809; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8810; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
8811; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a1, v8
8812; RV64ZVE32F-ZVFHMIN-NEXT:    slli a1, a1, 1
8813; RV64ZVE32F-ZVFHMIN-NEXT:    add a0, a0, a1
8814; RV64ZVE32F-ZVFHMIN-NEXT:    lh a0, 0(a0)
8815; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8816; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a0
8817; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
8818; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 7
8819; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB75_13: # %else20
8820; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8821; RV64ZVE32F-ZVFHMIN-NEXT:    vmv1r.v v8, v9
8822; RV64ZVE32F-ZVFHMIN-NEXT:    ret
8823; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB75_14: # %cond.load4
8824; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
8825; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8826; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8827; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8828; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8829; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v11, a2
8830; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
8831; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v11, 2
8832; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 8
8833; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB75_6
8834; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB75_15: # %cond.load7
8835; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8836; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
8837; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
8838; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8839; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8840; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8841; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8842; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
8843; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
8844; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 3
8845; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 16
8846; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB75_7
8847; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB75_16: # %cond.load10
8848; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8849; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v10
8850; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
8851; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
8852; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
8853; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
8854; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
8855; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
8856; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 4
8857; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 32
8858; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB75_8
8859; RV64ZVE32F-ZVFHMIN-NEXT:    j .LBB75_9
8860  %eidxs = sext <8 x i8> %idxs to <8 x i16>
8861  %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
8862  %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
8863  ret <8 x half> %v
8864}
8865
8866define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
8867; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16:
8868; RV32:       # %bb.0:
8869; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
8870; RV32-NEXT:    vwaddu.vv v10, v8, v8
8871; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
8872; RV32-NEXT:    vluxei16.v v9, (a0), v10, v0.t
8873; RV32-NEXT:    vmv.v.v v8, v9
8874; RV32-NEXT:    ret
8875;
8876; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16:
8877; RV64V:       # %bb.0:
8878; RV64V-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
8879; RV64V-NEXT:    vwaddu.vv v10, v8, v8
8880; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
8881; RV64V-NEXT:    vluxei16.v v9, (a0), v10, v0.t
8882; RV64V-NEXT:    vmv.v.v v8, v9
8883; RV64V-NEXT:    ret
8884;
8885; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16:
8886; RV64ZVE32F-ZVFH:       # %bb.0:
8887; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8888; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a1, v0
8889; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 1
8890; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB76_2
8891; RV64ZVE32F-ZVFH-NEXT:  # %bb.1: # %cond.load
8892; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8893; RV64ZVE32F-ZVFH-NEXT:    andi a2, a2, 255
8894; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8895; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8896; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8897; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
8898; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
8899; RV64ZVE32F-ZVFH-NEXT:  .LBB76_2: # %else
8900; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 2
8901; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB76_4
8902; RV64ZVE32F-ZVFH-NEXT:  # %bb.3: # %cond.load1
8903; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8904; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v10, v8, 1
8905; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v10
8906; RV64ZVE32F-ZVFH-NEXT:    andi a2, a2, 255
8907; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8908; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8909; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8910; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8911; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v10, fa5
8912; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
8913; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v10, 1
8914; RV64ZVE32F-ZVFH-NEXT:  .LBB76_4: # %else2
8915; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
8916; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v10, v8, 4
8917; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 4
8918; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
8919; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 2
8920; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB76_14
8921; RV64ZVE32F-ZVFH-NEXT:  # %bb.5: # %else5
8922; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 8
8923; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB76_15
8924; RV64ZVE32F-ZVFH-NEXT:  .LBB76_6: # %else8
8925; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 16
8926; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB76_16
8927; RV64ZVE32F-ZVFH-NEXT:  .LBB76_7: # %else11
8928; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 32
8929; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB76_9
8930; RV64ZVE32F-ZVFH-NEXT:  .LBB76_8: # %cond.load13
8931; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8932; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v10, 1
8933; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8934; RV64ZVE32F-ZVFH-NEXT:    andi a2, a2, 255
8935; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8936; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8937; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8938; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8939; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8940; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
8941; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 5
8942; RV64ZVE32F-ZVFH-NEXT:  .LBB76_9: # %else14
8943; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 64
8944; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
8945; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v10, 2
8946; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB76_11
8947; RV64ZVE32F-ZVFH-NEXT:  # %bb.10: # %cond.load16
8948; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8949; RV64ZVE32F-ZVFH-NEXT:    andi a2, a2, 255
8950; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8951; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8952; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8953; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8954; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v10, fa5
8955; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
8956; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v10, 6
8957; RV64ZVE32F-ZVFH-NEXT:  .LBB76_11: # %else17
8958; RV64ZVE32F-ZVFH-NEXT:    andi a1, a1, -128
8959; RV64ZVE32F-ZVFH-NEXT:    beqz a1, .LBB76_13
8960; RV64ZVE32F-ZVFH-NEXT:  # %bb.12: # %cond.load19
8961; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8962; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 1
8963; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a1, v8
8964; RV64ZVE32F-ZVFH-NEXT:    andi a1, a1, 255
8965; RV64ZVE32F-ZVFH-NEXT:    slli a1, a1, 1
8966; RV64ZVE32F-ZVFH-NEXT:    add a0, a0, a1
8967; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a0)
8968; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8969; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8970; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
8971; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 7
8972; RV64ZVE32F-ZVFH-NEXT:  .LBB76_13: # %else20
8973; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
8974; RV64ZVE32F-ZVFH-NEXT:    vmv1r.v v8, v9
8975; RV64ZVE32F-ZVFH-NEXT:    ret
8976; RV64ZVE32F-ZVFH-NEXT:  .LBB76_14: # %cond.load4
8977; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8978; RV64ZVE32F-ZVFH-NEXT:    andi a2, a2, 255
8979; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8980; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8981; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8982; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8983; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v11, fa5
8984; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
8985; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v11, 2
8986; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 8
8987; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB76_6
8988; RV64ZVE32F-ZVFH-NEXT:  .LBB76_15: # %cond.load7
8989; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
8990; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 1
8991; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
8992; RV64ZVE32F-ZVFH-NEXT:    andi a2, a2, 255
8993; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
8994; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
8995; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
8996; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
8997; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
8998; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
8999; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 3
9000; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 16
9001; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB76_7
9002; RV64ZVE32F-ZVFH-NEXT:  .LBB76_16: # %cond.load10
9003; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9004; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v10
9005; RV64ZVE32F-ZVFH-NEXT:    andi a2, a2, 255
9006; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
9007; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
9008; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
9009; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
9010; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
9011; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
9012; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 4
9013; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 32
9014; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB76_8
9015; RV64ZVE32F-ZVFH-NEXT:    j .LBB76_9
9016;
9017; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
9018; RV64ZVE32F-ZVFHMIN:       # %bb.0:
9019; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9020; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a1, v0
9021; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 1
9022; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB76_2
9023; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.1: # %cond.load
9024; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
9025; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a2, 255
9026; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9027; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9028; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9029; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
9030; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
9031; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB76_2: # %else
9032; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 2
9033; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB76_4
9034; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.3: # %cond.load1
9035; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
9036; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 1
9037; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v10
9038; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a2, 255
9039; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9040; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9041; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9042; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
9043; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v10, a2
9044; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
9045; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v10, 1
9046; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB76_4: # %else2
9047; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
9048; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 4
9049; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 4
9050; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
9051; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
9052; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB76_14
9053; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.5: # %else5
9054; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 8
9055; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB76_15
9056; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB76_6: # %else8
9057; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 16
9058; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB76_16
9059; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB76_7: # %else11
9060; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 32
9061; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB76_9
9062; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB76_8: # %cond.load13
9063; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
9064; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v10, 1
9065; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
9066; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a2, 255
9067; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9068; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9069; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9070; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
9071; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
9072; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
9073; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 5
9074; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB76_9: # %else14
9075; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 64
9076; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
9077; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v10, 2
9078; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB76_11
9079; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.10: # %cond.load16
9080; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
9081; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a2, 255
9082; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9083; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9084; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9085; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
9086; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v10, a2
9087; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
9088; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v10, 6
9089; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB76_11: # %else17
9090; RV64ZVE32F-ZVFHMIN-NEXT:    andi a1, a1, -128
9091; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a1, .LBB76_13
9092; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.12: # %cond.load19
9093; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
9094; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
9095; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a1, v8
9096; RV64ZVE32F-ZVFHMIN-NEXT:    andi a1, a1, 255
9097; RV64ZVE32F-ZVFHMIN-NEXT:    slli a1, a1, 1
9098; RV64ZVE32F-ZVFHMIN-NEXT:    add a0, a0, a1
9099; RV64ZVE32F-ZVFHMIN-NEXT:    lh a0, 0(a0)
9100; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
9101; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a0
9102; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
9103; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 7
9104; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB76_13: # %else20
9105; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9106; RV64ZVE32F-ZVFHMIN-NEXT:    vmv1r.v v8, v9
9107; RV64ZVE32F-ZVFHMIN-NEXT:    ret
9108; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB76_14: # %cond.load4
9109; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
9110; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a2, 255
9111; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9112; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9113; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9114; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
9115; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v11, a2
9116; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
9117; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v11, 2
9118; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 8
9119; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB76_6
9120; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB76_15: # %cond.load7
9121; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
9122; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
9123; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
9124; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a2, 255
9125; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9126; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9127; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9128; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
9129; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
9130; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
9131; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 3
9132; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 16
9133; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB76_7
9134; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB76_16: # %cond.load10
9135; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9136; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v10
9137; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a2, 255
9138; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9139; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9140; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9141; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
9142; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
9143; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
9144; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 4
9145; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 32
9146; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB76_8
9147; RV64ZVE32F-ZVFHMIN-NEXT:    j .LBB76_9
9148  %eidxs = zext <8 x i8> %idxs to <8 x i16>
9149  %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
9150  %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
9151  ret <8 x half> %v
9152}
9153
9154define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x half> %passthru) {
9155; RV32-LABEL: mgather_baseidx_v8f16:
9156; RV32:       # %bb.0:
9157; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
9158; RV32-NEXT:    vwadd.vv v10, v8, v8
9159; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
9160; RV32-NEXT:    vmv.v.v v8, v9
9161; RV32-NEXT:    ret
9162;
9163; RV64V-LABEL: mgather_baseidx_v8f16:
9164; RV64V:       # %bb.0:
9165; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
9166; RV64V-NEXT:    vsext.vf4 v12, v8
9167; RV64V-NEXT:    vadd.vv v12, v12, v12
9168; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
9169; RV64V-NEXT:    vluxei64.v v9, (a0), v12, v0.t
9170; RV64V-NEXT:    vmv.v.v v8, v9
9171; RV64V-NEXT:    ret
9172;
9173; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8f16:
9174; RV64ZVE32F-ZVFH:       # %bb.0:
9175; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9176; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a1, v0
9177; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 1
9178; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB77_2
9179; RV64ZVE32F-ZVFH-NEXT:  # %bb.1: # %cond.load
9180; RV64ZVE32F-ZVFH-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
9181; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
9182; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
9183; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
9184; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
9185; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v9, fa5
9186; RV64ZVE32F-ZVFH-NEXT:  .LBB77_2: # %else
9187; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 2
9188; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB77_4
9189; RV64ZVE32F-ZVFH-NEXT:  # %bb.3: # %cond.load1
9190; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
9191; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v10, v8, 1
9192; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v10
9193; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
9194; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
9195; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
9196; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v10, fa5
9197; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
9198; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v10, 1
9199; RV64ZVE32F-ZVFH-NEXT:  .LBB77_4: # %else2
9200; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
9201; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v10, v8, 4
9202; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 4
9203; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
9204; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 2
9205; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB77_14
9206; RV64ZVE32F-ZVFH-NEXT:  # %bb.5: # %else5
9207; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 8
9208; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB77_15
9209; RV64ZVE32F-ZVFH-NEXT:  .LBB77_6: # %else8
9210; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 16
9211; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB77_16
9212; RV64ZVE32F-ZVFH-NEXT:  .LBB77_7: # %else11
9213; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 32
9214; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB77_9
9215; RV64ZVE32F-ZVFH-NEXT:  .LBB77_8: # %cond.load13
9216; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
9217; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v10, 1
9218; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
9219; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
9220; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
9221; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
9222; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
9223; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
9224; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 5
9225; RV64ZVE32F-ZVFH-NEXT:  .LBB77_9: # %else14
9226; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 64
9227; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
9228; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v10, 2
9229; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB77_11
9230; RV64ZVE32F-ZVFH-NEXT:  # %bb.10: # %cond.load16
9231; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
9232; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
9233; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
9234; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
9235; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v10, fa5
9236; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
9237; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v10, 6
9238; RV64ZVE32F-ZVFH-NEXT:  .LBB77_11: # %else17
9239; RV64ZVE32F-ZVFH-NEXT:    andi a1, a1, -128
9240; RV64ZVE32F-ZVFH-NEXT:    beqz a1, .LBB77_13
9241; RV64ZVE32F-ZVFH-NEXT:  # %bb.12: # %cond.load19
9242; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
9243; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 1
9244; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a1, v8
9245; RV64ZVE32F-ZVFH-NEXT:    slli a1, a1, 1
9246; RV64ZVE32F-ZVFH-NEXT:    add a0, a0, a1
9247; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a0)
9248; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
9249; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
9250; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 7
9251; RV64ZVE32F-ZVFH-NEXT:  .LBB77_13: # %else20
9252; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9253; RV64ZVE32F-ZVFH-NEXT:    vmv1r.v v8, v9
9254; RV64ZVE32F-ZVFH-NEXT:    ret
9255; RV64ZVE32F-ZVFH-NEXT:  .LBB77_14: # %cond.load4
9256; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
9257; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
9258; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
9259; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
9260; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v11, fa5
9261; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
9262; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v11, 2
9263; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 8
9264; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB77_6
9265; RV64ZVE32F-ZVFH-NEXT:  .LBB77_15: # %cond.load7
9266; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
9267; RV64ZVE32F-ZVFH-NEXT:    vslidedown.vi v8, v8, 1
9268; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v8
9269; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
9270; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
9271; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
9272; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
9273; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
9274; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 3
9275; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 16
9276; RV64ZVE32F-ZVFH-NEXT:    beqz a2, .LBB77_7
9277; RV64ZVE32F-ZVFH-NEXT:  .LBB77_16: # %cond.load10
9278; RV64ZVE32F-ZVFH-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
9279; RV64ZVE32F-ZVFH-NEXT:    vmv.x.s a2, v10
9280; RV64ZVE32F-ZVFH-NEXT:    slli a2, a2, 1
9281; RV64ZVE32F-ZVFH-NEXT:    add a2, a0, a2
9282; RV64ZVE32F-ZVFH-NEXT:    flh fa5, 0(a2)
9283; RV64ZVE32F-ZVFH-NEXT:    vfmv.s.f v8, fa5
9284; RV64ZVE32F-ZVFH-NEXT:    vslideup.vi v9, v8, 4
9285; RV64ZVE32F-ZVFH-NEXT:    andi a2, a1, 32
9286; RV64ZVE32F-ZVFH-NEXT:    bnez a2, .LBB77_8
9287; RV64ZVE32F-ZVFH-NEXT:    j .LBB77_9
9288;
9289; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
9290; RV64ZVE32F-ZVFHMIN:       # %bb.0:
9291; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9292; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a1, v0
9293; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 1
9294; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB77_2
9295; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.1: # %cond.load
9296; RV64ZVE32F-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
9297; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
9298; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9299; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9300; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9301; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v9, a2
9302; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB77_2: # %else
9303; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 2
9304; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB77_4
9305; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.3: # %cond.load1
9306; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
9307; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 1
9308; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v10
9309; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9310; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9311; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9312; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v10, a2
9313; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
9314; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v10, 1
9315; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB77_4: # %else2
9316; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
9317; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 4
9318; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 4
9319; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
9320; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
9321; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB77_14
9322; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.5: # %else5
9323; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 8
9324; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB77_15
9325; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB77_6: # %else8
9326; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 16
9327; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB77_16
9328; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB77_7: # %else11
9329; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 32
9330; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB77_9
9331; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB77_8: # %cond.load13
9332; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
9333; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v10, 1
9334; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
9335; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9336; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9337; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9338; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
9339; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
9340; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 5
9341; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB77_9: # %else14
9342; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 64
9343; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
9344; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v10, 2
9345; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB77_11
9346; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.10: # %cond.load16
9347; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
9348; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9349; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9350; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9351; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v10, a2
9352; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
9353; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v10, 6
9354; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB77_11: # %else17
9355; RV64ZVE32F-ZVFHMIN-NEXT:    andi a1, a1, -128
9356; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a1, .LBB77_13
9357; RV64ZVE32F-ZVFHMIN-NEXT:  # %bb.12: # %cond.load19
9358; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
9359; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
9360; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a1, v8
9361; RV64ZVE32F-ZVFHMIN-NEXT:    slli a1, a1, 1
9362; RV64ZVE32F-ZVFHMIN-NEXT:    add a0, a0, a1
9363; RV64ZVE32F-ZVFHMIN-NEXT:    lh a0, 0(a0)
9364; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a0
9365; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
9366; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 7
9367; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB77_13: # %else20
9368; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9369; RV64ZVE32F-ZVFHMIN-NEXT:    vmv1r.v v8, v9
9370; RV64ZVE32F-ZVFHMIN-NEXT:    ret
9371; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB77_14: # %cond.load4
9372; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
9373; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9374; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9375; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9376; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v11, a2
9377; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
9378; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v11, 2
9379; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 8
9380; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB77_6
9381; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB77_15: # %cond.load7
9382; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
9383; RV64ZVE32F-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
9384; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v8
9385; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9386; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9387; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9388; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
9389; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
9390; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 3
9391; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 16
9392; RV64ZVE32F-ZVFHMIN-NEXT:    beqz a2, .LBB77_7
9393; RV64ZVE32F-ZVFHMIN-NEXT:  .LBB77_16: # %cond.load10
9394; RV64ZVE32F-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
9395; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.x.s a2, v10
9396; RV64ZVE32F-ZVFHMIN-NEXT:    slli a2, a2, 1
9397; RV64ZVE32F-ZVFHMIN-NEXT:    add a2, a0, a2
9398; RV64ZVE32F-ZVFHMIN-NEXT:    lh a2, 0(a2)
9399; RV64ZVE32F-ZVFHMIN-NEXT:    vmv.s.x v8, a2
9400; RV64ZVE32F-ZVFHMIN-NEXT:    vslideup.vi v9, v8, 4
9401; RV64ZVE32F-ZVFHMIN-NEXT:    andi a2, a1, 32
9402; RV64ZVE32F-ZVFHMIN-NEXT:    bnez a2, .LBB77_8
9403; RV64ZVE32F-ZVFHMIN-NEXT:    j .LBB77_9
9404  %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
9405  %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
9406  ret <8 x half> %v
9407}
9408
9409declare <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x float>)
9410
9411define <1 x float> @mgather_v1f32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x float> %passthru) {
9412; RV32V-LABEL: mgather_v1f32:
9413; RV32V:       # %bb.0:
9414; RV32V-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
9415; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
9416; RV32V-NEXT:    vmv1r.v v8, v9
9417; RV32V-NEXT:    ret
9418;
9419; RV64V-LABEL: mgather_v1f32:
9420; RV64V:       # %bb.0:
9421; RV64V-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
9422; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
9423; RV64V-NEXT:    vmv1r.v v8, v9
9424; RV64V-NEXT:    ret
9425;
9426; RV32ZVE32F-LABEL: mgather_v1f32:
9427; RV32ZVE32F:       # %bb.0:
9428; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
9429; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
9430; RV32ZVE32F-NEXT:    vmv.v.v v8, v9
9431; RV32ZVE32F-NEXT:    ret
9432;
9433; RV64ZVE32F-LABEL: mgather_v1f32:
9434; RV64ZVE32F:       # %bb.0:
9435; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
9436; RV64ZVE32F-NEXT:    vfirst.m a1, v0
9437; RV64ZVE32F-NEXT:    bnez a1, .LBB78_2
9438; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
9439; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
9440; RV64ZVE32F-NEXT:    vle32.v v8, (a0)
9441; RV64ZVE32F-NEXT:  .LBB78_2: # %else
9442; RV64ZVE32F-NEXT:    ret
9443  %v = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru)
9444  ret <1 x float> %v
9445}
9446
9447declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>)
9448
9449define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %passthru) {
9450; RV32V-LABEL: mgather_v2f32:
9451; RV32V:       # %bb.0:
9452; RV32V-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
9453; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
9454; RV32V-NEXT:    vmv1r.v v8, v9
9455; RV32V-NEXT:    ret
9456;
9457; RV64V-LABEL: mgather_v2f32:
9458; RV64V:       # %bb.0:
9459; RV64V-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
9460; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
9461; RV64V-NEXT:    vmv1r.v v8, v9
9462; RV64V-NEXT:    ret
9463;
9464; RV32ZVE32F-LABEL: mgather_v2f32:
9465; RV32ZVE32F:       # %bb.0:
9466; RV32ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, mu
9467; RV32ZVE32F-NEXT:    vluxei32.v v9, (zero), v8, v0.t
9468; RV32ZVE32F-NEXT:    vmv.v.v v8, v9
9469; RV32ZVE32F-NEXT:    ret
9470;
9471; RV64ZVE32F-LABEL: mgather_v2f32:
9472; RV64ZVE32F:       # %bb.0:
9473; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9474; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
9475; RV64ZVE32F-NEXT:    andi a3, a2, 1
9476; RV64ZVE32F-NEXT:    bnez a3, .LBB79_3
9477; RV64ZVE32F-NEXT:  # %bb.1: # %else
9478; RV64ZVE32F-NEXT:    andi a2, a2, 2
9479; RV64ZVE32F-NEXT:    bnez a2, .LBB79_4
9480; RV64ZVE32F-NEXT:  .LBB79_2: # %else2
9481; RV64ZVE32F-NEXT:    ret
9482; RV64ZVE32F-NEXT:  .LBB79_3: # %cond.load
9483; RV64ZVE32F-NEXT:    flw fa5, 0(a0)
9484; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
9485; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
9486; RV64ZVE32F-NEXT:    andi a2, a2, 2
9487; RV64ZVE32F-NEXT:    beqz a2, .LBB79_2
9488; RV64ZVE32F-NEXT:  .LBB79_4: # %cond.load1
9489; RV64ZVE32F-NEXT:    flw fa5, 0(a1)
9490; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
9491; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
9492; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
9493; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
9494; RV64ZVE32F-NEXT:    ret
9495  %v = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x float> %passthru)
9496  ret <2 x float> %v
9497}
9498
9499declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)
9500
9501define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %passthru) {
9502; RV32-LABEL: mgather_v4f32:
9503; RV32:       # %bb.0:
9504; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
9505; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
9506; RV32-NEXT:    vmv.v.v v8, v9
9507; RV32-NEXT:    ret
9508;
9509; RV64V-LABEL: mgather_v4f32:
9510; RV64V:       # %bb.0:
9511; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
9512; RV64V-NEXT:    vluxei64.v v10, (zero), v8, v0.t
9513; RV64V-NEXT:    vmv.v.v v8, v10
9514; RV64V-NEXT:    ret
9515;
9516; RV64ZVE32F-LABEL: mgather_v4f32:
9517; RV64ZVE32F:       # %bb.0:
9518; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9519; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
9520; RV64ZVE32F-NEXT:    andi a2, a1, 1
9521; RV64ZVE32F-NEXT:    bnez a2, .LBB80_5
9522; RV64ZVE32F-NEXT:  # %bb.1: # %else
9523; RV64ZVE32F-NEXT:    andi a2, a1, 2
9524; RV64ZVE32F-NEXT:    bnez a2, .LBB80_6
9525; RV64ZVE32F-NEXT:  .LBB80_2: # %else2
9526; RV64ZVE32F-NEXT:    andi a2, a1, 4
9527; RV64ZVE32F-NEXT:    bnez a2, .LBB80_7
9528; RV64ZVE32F-NEXT:  .LBB80_3: # %else5
9529; RV64ZVE32F-NEXT:    andi a1, a1, 8
9530; RV64ZVE32F-NEXT:    bnez a1, .LBB80_8
9531; RV64ZVE32F-NEXT:  .LBB80_4: # %else8
9532; RV64ZVE32F-NEXT:    ret
9533; RV64ZVE32F-NEXT:  .LBB80_5: # %cond.load
9534; RV64ZVE32F-NEXT:    ld a2, 0(a0)
9535; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9536; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
9537; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
9538; RV64ZVE32F-NEXT:    andi a2, a1, 2
9539; RV64ZVE32F-NEXT:    beqz a2, .LBB80_2
9540; RV64ZVE32F-NEXT:  .LBB80_6: # %cond.load1
9541; RV64ZVE32F-NEXT:    ld a2, 8(a0)
9542; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9543; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
9544; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
9545; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
9546; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 1
9547; RV64ZVE32F-NEXT:    andi a2, a1, 4
9548; RV64ZVE32F-NEXT:    beqz a2, .LBB80_3
9549; RV64ZVE32F-NEXT:  .LBB80_7: # %cond.load4
9550; RV64ZVE32F-NEXT:    ld a2, 16(a0)
9551; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9552; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
9553; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
9554; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 2
9555; RV64ZVE32F-NEXT:    andi a1, a1, 8
9556; RV64ZVE32F-NEXT:    beqz a1, .LBB80_4
9557; RV64ZVE32F-NEXT:  .LBB80_8: # %cond.load7
9558; RV64ZVE32F-NEXT:    ld a0, 24(a0)
9559; RV64ZVE32F-NEXT:    flw fa5, 0(a0)
9560; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
9561; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
9562; RV64ZVE32F-NEXT:    vslideup.vi v8, v9, 3
9563; RV64ZVE32F-NEXT:    ret
9564  %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x float> %passthru)
9565  ret <4 x float> %v
9566}
9567
9568define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) {
9569; RV32-LABEL: mgather_truemask_v4f32:
9570; RV32:       # %bb.0:
9571; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
9572; RV32-NEXT:    vluxei32.v v8, (zero), v8
9573; RV32-NEXT:    ret
9574;
9575; RV64V-LABEL: mgather_truemask_v4f32:
9576; RV64V:       # %bb.0:
9577; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
9578; RV64V-NEXT:    vluxei64.v v10, (zero), v8
9579; RV64V-NEXT:    vmv.v.v v8, v10
9580; RV64V-NEXT:    ret
9581;
9582; RV64ZVE32F-LABEL: mgather_truemask_v4f32:
9583; RV64ZVE32F:       # %bb.0:
9584; RV64ZVE32F-NEXT:    ld a1, 0(a0)
9585; RV64ZVE32F-NEXT:    ld a2, 8(a0)
9586; RV64ZVE32F-NEXT:    ld a3, 16(a0)
9587; RV64ZVE32F-NEXT:    ld a0, 24(a0)
9588; RV64ZVE32F-NEXT:    flw fa5, 0(a1)
9589; RV64ZVE32F-NEXT:    flw fa4, 0(a2)
9590; RV64ZVE32F-NEXT:    flw fa3, 0(a3)
9591; RV64ZVE32F-NEXT:    flw fa2, 0(a0)
9592; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
9593; RV64ZVE32F-NEXT:    vfmv.v.f v8, fa5
9594; RV64ZVE32F-NEXT:    vfslide1down.vf v8, v8, fa4
9595; RV64ZVE32F-NEXT:    vfslide1down.vf v8, v8, fa3
9596; RV64ZVE32F-NEXT:    vfslide1down.vf v8, v8, fa2
9597; RV64ZVE32F-NEXT:    ret
9598  %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x float> %passthru)
9599  ret <4 x float> %v
9600}
9601
9602define <4 x float> @mgather_falsemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) {
9603; RV32-LABEL: mgather_falsemask_v4f32:
9604; RV32:       # %bb.0:
9605; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9606; RV32-NEXT:    vmv1r.v v8, v9
9607; RV32-NEXT:    ret
9608;
9609; RV64V-LABEL: mgather_falsemask_v4f32:
9610; RV64V:       # %bb.0:
9611; RV64V-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9612; RV64V-NEXT:    vmv1r.v v8, v10
9613; RV64V-NEXT:    ret
9614;
9615; RV64ZVE32F-LABEL: mgather_falsemask_v4f32:
9616; RV64ZVE32F:       # %bb.0:
9617; RV64ZVE32F-NEXT:    ret
9618  %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x float> %passthru)
9619  ret <4 x float> %v
9620}
9621
9622declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>)
9623
9624define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %passthru) {
9625; RV32-LABEL: mgather_v8f32:
9626; RV32:       # %bb.0:
9627; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
9628; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
9629; RV32-NEXT:    vmv.v.v v8, v10
9630; RV32-NEXT:    ret
9631;
9632; RV64V-LABEL: mgather_v8f32:
9633; RV64V:       # %bb.0:
9634; RV64V-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
9635; RV64V-NEXT:    vluxei64.v v12, (zero), v8, v0.t
9636; RV64V-NEXT:    vmv.v.v v8, v12
9637; RV64V-NEXT:    ret
9638;
9639; RV64ZVE32F-LABEL: mgather_v8f32:
9640; RV64ZVE32F:       # %bb.0:
9641; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9642; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
9643; RV64ZVE32F-NEXT:    andi a2, a1, 1
9644; RV64ZVE32F-NEXT:    bnez a2, .LBB83_9
9645; RV64ZVE32F-NEXT:  # %bb.1: # %else
9646; RV64ZVE32F-NEXT:    andi a2, a1, 2
9647; RV64ZVE32F-NEXT:    bnez a2, .LBB83_10
9648; RV64ZVE32F-NEXT:  .LBB83_2: # %else2
9649; RV64ZVE32F-NEXT:    andi a2, a1, 4
9650; RV64ZVE32F-NEXT:    bnez a2, .LBB83_11
9651; RV64ZVE32F-NEXT:  .LBB83_3: # %else5
9652; RV64ZVE32F-NEXT:    andi a2, a1, 8
9653; RV64ZVE32F-NEXT:    bnez a2, .LBB83_12
9654; RV64ZVE32F-NEXT:  .LBB83_4: # %else8
9655; RV64ZVE32F-NEXT:    andi a2, a1, 16
9656; RV64ZVE32F-NEXT:    bnez a2, .LBB83_13
9657; RV64ZVE32F-NEXT:  .LBB83_5: # %else11
9658; RV64ZVE32F-NEXT:    andi a2, a1, 32
9659; RV64ZVE32F-NEXT:    bnez a2, .LBB83_14
9660; RV64ZVE32F-NEXT:  .LBB83_6: # %else14
9661; RV64ZVE32F-NEXT:    andi a2, a1, 64
9662; RV64ZVE32F-NEXT:    bnez a2, .LBB83_15
9663; RV64ZVE32F-NEXT:  .LBB83_7: # %else17
9664; RV64ZVE32F-NEXT:    andi a1, a1, -128
9665; RV64ZVE32F-NEXT:    bnez a1, .LBB83_16
9666; RV64ZVE32F-NEXT:  .LBB83_8: # %else20
9667; RV64ZVE32F-NEXT:    ret
9668; RV64ZVE32F-NEXT:  .LBB83_9: # %cond.load
9669; RV64ZVE32F-NEXT:    ld a2, 0(a0)
9670; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9671; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
9672; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
9673; RV64ZVE32F-NEXT:    andi a2, a1, 2
9674; RV64ZVE32F-NEXT:    beqz a2, .LBB83_2
9675; RV64ZVE32F-NEXT:  .LBB83_10: # %cond.load1
9676; RV64ZVE32F-NEXT:    ld a2, 8(a0)
9677; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9678; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
9679; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
9680; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
9681; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 1
9682; RV64ZVE32F-NEXT:    andi a2, a1, 4
9683; RV64ZVE32F-NEXT:    beqz a2, .LBB83_3
9684; RV64ZVE32F-NEXT:  .LBB83_11: # %cond.load4
9685; RV64ZVE32F-NEXT:    ld a2, 16(a0)
9686; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9687; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
9688; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
9689; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 2
9690; RV64ZVE32F-NEXT:    andi a2, a1, 8
9691; RV64ZVE32F-NEXT:    beqz a2, .LBB83_4
9692; RV64ZVE32F-NEXT:  .LBB83_12: # %cond.load7
9693; RV64ZVE32F-NEXT:    ld a2, 24(a0)
9694; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9695; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
9696; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
9697; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 3
9698; RV64ZVE32F-NEXT:    andi a2, a1, 16
9699; RV64ZVE32F-NEXT:    beqz a2, .LBB83_5
9700; RV64ZVE32F-NEXT:  .LBB83_13: # %cond.load10
9701; RV64ZVE32F-NEXT:    ld a2, 32(a0)
9702; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9703; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
9704; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
9705; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 4
9706; RV64ZVE32F-NEXT:    andi a2, a1, 32
9707; RV64ZVE32F-NEXT:    beqz a2, .LBB83_6
9708; RV64ZVE32F-NEXT:  .LBB83_14: # %cond.load13
9709; RV64ZVE32F-NEXT:    ld a2, 40(a0)
9710; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9711; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
9712; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
9713; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 5
9714; RV64ZVE32F-NEXT:    andi a2, a1, 64
9715; RV64ZVE32F-NEXT:    beqz a2, .LBB83_7
9716; RV64ZVE32F-NEXT:  .LBB83_15: # %cond.load16
9717; RV64ZVE32F-NEXT:    ld a2, 48(a0)
9718; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9719; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
9720; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
9721; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 6
9722; RV64ZVE32F-NEXT:    andi a1, a1, -128
9723; RV64ZVE32F-NEXT:    beqz a1, .LBB83_8
9724; RV64ZVE32F-NEXT:  .LBB83_16: # %cond.load19
9725; RV64ZVE32F-NEXT:    ld a0, 56(a0)
9726; RV64ZVE32F-NEXT:    flw fa5, 0(a0)
9727; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
9728; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
9729; RV64ZVE32F-NEXT:    vslideup.vi v8, v10, 7
9730; RV64ZVE32F-NEXT:    ret
9731  %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
9732  ret <8 x float> %v
9733}
9734
9735define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
9736; RV32-LABEL: mgather_baseidx_v8i8_v8f32:
9737; RV32:       # %bb.0:
9738; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
9739; RV32-NEXT:    vsext.vf4 v12, v8
9740; RV32-NEXT:    vsll.vi v8, v12, 2
9741; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
9742; RV32-NEXT:    vmv.v.v v8, v10
9743; RV32-NEXT:    ret
9744;
9745; RV64V-LABEL: mgather_baseidx_v8i8_v8f32:
9746; RV64V:       # %bb.0:
9747; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
9748; RV64V-NEXT:    vsext.vf8 v12, v8
9749; RV64V-NEXT:    vsll.vi v12, v12, 2
9750; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
9751; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
9752; RV64V-NEXT:    vmv.v.v v8, v10
9753; RV64V-NEXT:    ret
9754;
9755; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f32:
9756; RV64ZVE32F:       # %bb.0:
9757; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9758; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
9759; RV64ZVE32F-NEXT:    andi a2, a1, 1
9760; RV64ZVE32F-NEXT:    beqz a2, .LBB84_2
9761; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
9762; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
9763; RV64ZVE32F-NEXT:    slli a2, a2, 2
9764; RV64ZVE32F-NEXT:    add a2, a0, a2
9765; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9766; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
9767; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
9768; RV64ZVE32F-NEXT:  .LBB84_2: # %else
9769; RV64ZVE32F-NEXT:    andi a2, a1, 2
9770; RV64ZVE32F-NEXT:    beqz a2, .LBB84_4
9771; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
9772; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
9773; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
9774; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
9775; RV64ZVE32F-NEXT:    slli a2, a2, 2
9776; RV64ZVE32F-NEXT:    add a2, a0, a2
9777; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9778; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
9779; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
9780; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
9781; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
9782; RV64ZVE32F-NEXT:  .LBB84_4: # %else2
9783; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
9784; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
9785; RV64ZVE32F-NEXT:    andi a2, a1, 4
9786; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
9787; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
9788; RV64ZVE32F-NEXT:    bnez a2, .LBB84_14
9789; RV64ZVE32F-NEXT:  # %bb.5: # %else5
9790; RV64ZVE32F-NEXT:    andi a2, a1, 8
9791; RV64ZVE32F-NEXT:    bnez a2, .LBB84_15
9792; RV64ZVE32F-NEXT:  .LBB84_6: # %else8
9793; RV64ZVE32F-NEXT:    andi a2, a1, 16
9794; RV64ZVE32F-NEXT:    bnez a2, .LBB84_16
9795; RV64ZVE32F-NEXT:  .LBB84_7: # %else11
9796; RV64ZVE32F-NEXT:    andi a2, a1, 32
9797; RV64ZVE32F-NEXT:    beqz a2, .LBB84_9
9798; RV64ZVE32F-NEXT:  .LBB84_8: # %cond.load13
9799; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
9800; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
9801; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
9802; RV64ZVE32F-NEXT:    slli a2, a2, 2
9803; RV64ZVE32F-NEXT:    add a2, a0, a2
9804; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9805; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
9806; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
9807; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
9808; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
9809; RV64ZVE32F-NEXT:  .LBB84_9: # %else14
9810; RV64ZVE32F-NEXT:    andi a2, a1, 64
9811; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
9812; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
9813; RV64ZVE32F-NEXT:    beqz a2, .LBB84_11
9814; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
9815; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
9816; RV64ZVE32F-NEXT:    slli a2, a2, 2
9817; RV64ZVE32F-NEXT:    add a2, a0, a2
9818; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9819; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
9820; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
9821; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
9822; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
9823; RV64ZVE32F-NEXT:  .LBB84_11: # %else17
9824; RV64ZVE32F-NEXT:    andi a1, a1, -128
9825; RV64ZVE32F-NEXT:    beqz a1, .LBB84_13
9826; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
9827; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
9828; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
9829; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
9830; RV64ZVE32F-NEXT:    slli a1, a1, 2
9831; RV64ZVE32F-NEXT:    add a0, a0, a1
9832; RV64ZVE32F-NEXT:    flw fa5, 0(a0)
9833; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
9834; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
9835; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
9836; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
9837; RV64ZVE32F-NEXT:  .LBB84_13: # %else20
9838; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9839; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
9840; RV64ZVE32F-NEXT:    ret
9841; RV64ZVE32F-NEXT:  .LBB84_14: # %cond.load4
9842; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
9843; RV64ZVE32F-NEXT:    slli a2, a2, 2
9844; RV64ZVE32F-NEXT:    add a2, a0, a2
9845; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9846; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
9847; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
9848; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
9849; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
9850; RV64ZVE32F-NEXT:    andi a2, a1, 8
9851; RV64ZVE32F-NEXT:    beqz a2, .LBB84_6
9852; RV64ZVE32F-NEXT:  .LBB84_15: # %cond.load7
9853; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
9854; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
9855; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
9856; RV64ZVE32F-NEXT:    slli a2, a2, 2
9857; RV64ZVE32F-NEXT:    add a2, a0, a2
9858; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9859; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
9860; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
9861; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
9862; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
9863; RV64ZVE32F-NEXT:    andi a2, a1, 16
9864; RV64ZVE32F-NEXT:    beqz a2, .LBB84_7
9865; RV64ZVE32F-NEXT:  .LBB84_16: # %cond.load10
9866; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9867; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
9868; RV64ZVE32F-NEXT:    slli a2, a2, 2
9869; RV64ZVE32F-NEXT:    add a2, a0, a2
9870; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9871; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
9872; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
9873; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
9874; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
9875; RV64ZVE32F-NEXT:    andi a2, a1, 32
9876; RV64ZVE32F-NEXT:    bnez a2, .LBB84_8
9877; RV64ZVE32F-NEXT:    j .LBB84_9
9878  %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
9879  %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
9880  ret <8 x float> %v
9881}
9882
9883define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
9884; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f32:
9885; RV32:       # %bb.0:
9886; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
9887; RV32-NEXT:    vsext.vf4 v12, v8
9888; RV32-NEXT:    vsll.vi v8, v12, 2
9889; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
9890; RV32-NEXT:    vmv.v.v v8, v10
9891; RV32-NEXT:    ret
9892;
9893; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f32:
9894; RV64V:       # %bb.0:
9895; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
9896; RV64V-NEXT:    vsext.vf8 v12, v8
9897; RV64V-NEXT:    vsll.vi v12, v12, 2
9898; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
9899; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
9900; RV64V-NEXT:    vmv.v.v v8, v10
9901; RV64V-NEXT:    ret
9902;
9903; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f32:
9904; RV64ZVE32F:       # %bb.0:
9905; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9906; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
9907; RV64ZVE32F-NEXT:    andi a2, a1, 1
9908; RV64ZVE32F-NEXT:    beqz a2, .LBB85_2
9909; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
9910; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
9911; RV64ZVE32F-NEXT:    slli a2, a2, 2
9912; RV64ZVE32F-NEXT:    add a2, a0, a2
9913; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9914; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
9915; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
9916; RV64ZVE32F-NEXT:  .LBB85_2: # %else
9917; RV64ZVE32F-NEXT:    andi a2, a1, 2
9918; RV64ZVE32F-NEXT:    beqz a2, .LBB85_4
9919; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
9920; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
9921; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
9922; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
9923; RV64ZVE32F-NEXT:    slli a2, a2, 2
9924; RV64ZVE32F-NEXT:    add a2, a0, a2
9925; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9926; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
9927; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
9928; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
9929; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
9930; RV64ZVE32F-NEXT:  .LBB85_4: # %else2
9931; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
9932; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
9933; RV64ZVE32F-NEXT:    andi a2, a1, 4
9934; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
9935; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
9936; RV64ZVE32F-NEXT:    bnez a2, .LBB85_14
9937; RV64ZVE32F-NEXT:  # %bb.5: # %else5
9938; RV64ZVE32F-NEXT:    andi a2, a1, 8
9939; RV64ZVE32F-NEXT:    bnez a2, .LBB85_15
9940; RV64ZVE32F-NEXT:  .LBB85_6: # %else8
9941; RV64ZVE32F-NEXT:    andi a2, a1, 16
9942; RV64ZVE32F-NEXT:    bnez a2, .LBB85_16
9943; RV64ZVE32F-NEXT:  .LBB85_7: # %else11
9944; RV64ZVE32F-NEXT:    andi a2, a1, 32
9945; RV64ZVE32F-NEXT:    beqz a2, .LBB85_9
9946; RV64ZVE32F-NEXT:  .LBB85_8: # %cond.load13
9947; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
9948; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
9949; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
9950; RV64ZVE32F-NEXT:    slli a2, a2, 2
9951; RV64ZVE32F-NEXT:    add a2, a0, a2
9952; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9953; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
9954; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
9955; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
9956; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
9957; RV64ZVE32F-NEXT:  .LBB85_9: # %else14
9958; RV64ZVE32F-NEXT:    andi a2, a1, 64
9959; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
9960; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
9961; RV64ZVE32F-NEXT:    beqz a2, .LBB85_11
9962; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
9963; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
9964; RV64ZVE32F-NEXT:    slli a2, a2, 2
9965; RV64ZVE32F-NEXT:    add a2, a0, a2
9966; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9967; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
9968; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
9969; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
9970; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
9971; RV64ZVE32F-NEXT:  .LBB85_11: # %else17
9972; RV64ZVE32F-NEXT:    andi a1, a1, -128
9973; RV64ZVE32F-NEXT:    beqz a1, .LBB85_13
9974; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
9975; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
9976; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
9977; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
9978; RV64ZVE32F-NEXT:    slli a1, a1, 2
9979; RV64ZVE32F-NEXT:    add a0, a0, a1
9980; RV64ZVE32F-NEXT:    flw fa5, 0(a0)
9981; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
9982; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
9983; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
9984; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
9985; RV64ZVE32F-NEXT:  .LBB85_13: # %else20
9986; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
9987; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
9988; RV64ZVE32F-NEXT:    ret
9989; RV64ZVE32F-NEXT:  .LBB85_14: # %cond.load4
9990; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
9991; RV64ZVE32F-NEXT:    slli a2, a2, 2
9992; RV64ZVE32F-NEXT:    add a2, a0, a2
9993; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
9994; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
9995; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
9996; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
9997; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
9998; RV64ZVE32F-NEXT:    andi a2, a1, 8
9999; RV64ZVE32F-NEXT:    beqz a2, .LBB85_6
10000; RV64ZVE32F-NEXT:  .LBB85_15: # %cond.load7
10001; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
10002; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10003; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10004; RV64ZVE32F-NEXT:    slli a2, a2, 2
10005; RV64ZVE32F-NEXT:    add a2, a0, a2
10006; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10007; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10008; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10009; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
10010; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
10011; RV64ZVE32F-NEXT:    andi a2, a1, 16
10012; RV64ZVE32F-NEXT:    beqz a2, .LBB85_7
10013; RV64ZVE32F-NEXT:  .LBB85_16: # %cond.load10
10014; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10015; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
10016; RV64ZVE32F-NEXT:    slli a2, a2, 2
10017; RV64ZVE32F-NEXT:    add a2, a0, a2
10018; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10019; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
10020; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10021; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
10022; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
10023; RV64ZVE32F-NEXT:    andi a2, a1, 32
10024; RV64ZVE32F-NEXT:    bnez a2, .LBB85_8
10025; RV64ZVE32F-NEXT:    j .LBB85_9
10026  %eidxs = sext <8 x i8> %idxs to <8 x i32>
10027  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
10028  %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10029  ret <8 x float> %v
10030}
10031
10032define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
10033; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f32:
10034; RV32:       # %bb.0:
10035; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
10036; RV32-NEXT:    vzext.vf2 v9, v8
10037; RV32-NEXT:    vsll.vi v8, v9, 2
10038; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
10039; RV32-NEXT:    vluxei16.v v10, (a0), v8, v0.t
10040; RV32-NEXT:    vmv.v.v v8, v10
10041; RV32-NEXT:    ret
10042;
10043; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f32:
10044; RV64V:       # %bb.0:
10045; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
10046; RV64V-NEXT:    vzext.vf2 v9, v8
10047; RV64V-NEXT:    vsll.vi v8, v9, 2
10048; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
10049; RV64V-NEXT:    vluxei16.v v10, (a0), v8, v0.t
10050; RV64V-NEXT:    vmv.v.v v8, v10
10051; RV64V-NEXT:    ret
10052;
10053; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f32:
10054; RV64ZVE32F:       # %bb.0:
10055; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10056; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
10057; RV64ZVE32F-NEXT:    andi a2, a1, 1
10058; RV64ZVE32F-NEXT:    beqz a2, .LBB86_2
10059; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
10060; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10061; RV64ZVE32F-NEXT:    andi a2, a2, 255
10062; RV64ZVE32F-NEXT:    slli a2, a2, 2
10063; RV64ZVE32F-NEXT:    add a2, a0, a2
10064; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10065; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
10066; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
10067; RV64ZVE32F-NEXT:  .LBB86_2: # %else
10068; RV64ZVE32F-NEXT:    andi a2, a1, 2
10069; RV64ZVE32F-NEXT:    beqz a2, .LBB86_4
10070; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
10071; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
10072; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
10073; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
10074; RV64ZVE32F-NEXT:    andi a2, a2, 255
10075; RV64ZVE32F-NEXT:    slli a2, a2, 2
10076; RV64ZVE32F-NEXT:    add a2, a0, a2
10077; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10078; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10079; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
10080; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
10081; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
10082; RV64ZVE32F-NEXT:  .LBB86_4: # %else2
10083; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
10084; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
10085; RV64ZVE32F-NEXT:    andi a2, a1, 4
10086; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
10087; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
10088; RV64ZVE32F-NEXT:    bnez a2, .LBB86_14
10089; RV64ZVE32F-NEXT:  # %bb.5: # %else5
10090; RV64ZVE32F-NEXT:    andi a2, a1, 8
10091; RV64ZVE32F-NEXT:    bnez a2, .LBB86_15
10092; RV64ZVE32F-NEXT:  .LBB86_6: # %else8
10093; RV64ZVE32F-NEXT:    andi a2, a1, 16
10094; RV64ZVE32F-NEXT:    bnez a2, .LBB86_16
10095; RV64ZVE32F-NEXT:  .LBB86_7: # %else11
10096; RV64ZVE32F-NEXT:    andi a2, a1, 32
10097; RV64ZVE32F-NEXT:    beqz a2, .LBB86_9
10098; RV64ZVE32F-NEXT:  .LBB86_8: # %cond.load13
10099; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
10100; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
10101; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10102; RV64ZVE32F-NEXT:    andi a2, a2, 255
10103; RV64ZVE32F-NEXT:    slli a2, a2, 2
10104; RV64ZVE32F-NEXT:    add a2, a0, a2
10105; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10106; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10107; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10108; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
10109; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
10110; RV64ZVE32F-NEXT:  .LBB86_9: # %else14
10111; RV64ZVE32F-NEXT:    andi a2, a1, 64
10112; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
10113; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
10114; RV64ZVE32F-NEXT:    beqz a2, .LBB86_11
10115; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
10116; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10117; RV64ZVE32F-NEXT:    andi a2, a2, 255
10118; RV64ZVE32F-NEXT:    slli a2, a2, 2
10119; RV64ZVE32F-NEXT:    add a2, a0, a2
10120; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10121; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10122; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10123; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
10124; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
10125; RV64ZVE32F-NEXT:  .LBB86_11: # %else17
10126; RV64ZVE32F-NEXT:    andi a1, a1, -128
10127; RV64ZVE32F-NEXT:    beqz a1, .LBB86_13
10128; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
10129; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
10130; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10131; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
10132; RV64ZVE32F-NEXT:    andi a1, a1, 255
10133; RV64ZVE32F-NEXT:    slli a1, a1, 2
10134; RV64ZVE32F-NEXT:    add a0, a0, a1
10135; RV64ZVE32F-NEXT:    flw fa5, 0(a0)
10136; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10137; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10138; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
10139; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
10140; RV64ZVE32F-NEXT:  .LBB86_13: # %else20
10141; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10142; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
10143; RV64ZVE32F-NEXT:    ret
10144; RV64ZVE32F-NEXT:  .LBB86_14: # %cond.load4
10145; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10146; RV64ZVE32F-NEXT:    andi a2, a2, 255
10147; RV64ZVE32F-NEXT:    slli a2, a2, 2
10148; RV64ZVE32F-NEXT:    add a2, a0, a2
10149; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10150; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10151; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10152; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
10153; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
10154; RV64ZVE32F-NEXT:    andi a2, a1, 8
10155; RV64ZVE32F-NEXT:    beqz a2, .LBB86_6
10156; RV64ZVE32F-NEXT:  .LBB86_15: # %cond.load7
10157; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
10158; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10159; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10160; RV64ZVE32F-NEXT:    andi a2, a2, 255
10161; RV64ZVE32F-NEXT:    slli a2, a2, 2
10162; RV64ZVE32F-NEXT:    add a2, a0, a2
10163; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10164; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10165; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10166; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
10167; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
10168; RV64ZVE32F-NEXT:    andi a2, a1, 16
10169; RV64ZVE32F-NEXT:    beqz a2, .LBB86_7
10170; RV64ZVE32F-NEXT:  .LBB86_16: # %cond.load10
10171; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10172; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
10173; RV64ZVE32F-NEXT:    andi a2, a2, 255
10174; RV64ZVE32F-NEXT:    slli a2, a2, 2
10175; RV64ZVE32F-NEXT:    add a2, a0, a2
10176; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10177; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
10178; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10179; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
10180; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
10181; RV64ZVE32F-NEXT:    andi a2, a1, 32
10182; RV64ZVE32F-NEXT:    bnez a2, .LBB86_8
10183; RV64ZVE32F-NEXT:    j .LBB86_9
10184  %eidxs = zext <8 x i8> %idxs to <8 x i32>
10185  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
10186  %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10187  ret <8 x float> %v
10188}
10189
10190define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
10191; RV32-LABEL: mgather_baseidx_v8i16_v8f32:
10192; RV32:       # %bb.0:
10193; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
10194; RV32-NEXT:    vsext.vf2 v12, v8
10195; RV32-NEXT:    vsll.vi v8, v12, 2
10196; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
10197; RV32-NEXT:    vmv.v.v v8, v10
10198; RV32-NEXT:    ret
10199;
10200; RV64V-LABEL: mgather_baseidx_v8i16_v8f32:
10201; RV64V:       # %bb.0:
10202; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
10203; RV64V-NEXT:    vsext.vf4 v12, v8
10204; RV64V-NEXT:    vsll.vi v12, v12, 2
10205; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
10206; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
10207; RV64V-NEXT:    vmv.v.v v8, v10
10208; RV64V-NEXT:    ret
10209;
10210; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f32:
10211; RV64ZVE32F:       # %bb.0:
10212; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10213; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
10214; RV64ZVE32F-NEXT:    andi a2, a1, 1
10215; RV64ZVE32F-NEXT:    beqz a2, .LBB87_2
10216; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
10217; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
10218; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10219; RV64ZVE32F-NEXT:    slli a2, a2, 2
10220; RV64ZVE32F-NEXT:    add a2, a0, a2
10221; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10222; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
10223; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
10224; RV64ZVE32F-NEXT:  .LBB87_2: # %else
10225; RV64ZVE32F-NEXT:    andi a2, a1, 2
10226; RV64ZVE32F-NEXT:    beqz a2, .LBB87_4
10227; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
10228; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10229; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
10230; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
10231; RV64ZVE32F-NEXT:    slli a2, a2, 2
10232; RV64ZVE32F-NEXT:    add a2, a0, a2
10233; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10234; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10235; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
10236; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
10237; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
10238; RV64ZVE32F-NEXT:  .LBB87_4: # %else2
10239; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
10240; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
10241; RV64ZVE32F-NEXT:    andi a2, a1, 4
10242; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
10243; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
10244; RV64ZVE32F-NEXT:    bnez a2, .LBB87_14
10245; RV64ZVE32F-NEXT:  # %bb.5: # %else5
10246; RV64ZVE32F-NEXT:    andi a2, a1, 8
10247; RV64ZVE32F-NEXT:    bnez a2, .LBB87_15
10248; RV64ZVE32F-NEXT:  .LBB87_6: # %else8
10249; RV64ZVE32F-NEXT:    andi a2, a1, 16
10250; RV64ZVE32F-NEXT:    bnez a2, .LBB87_16
10251; RV64ZVE32F-NEXT:  .LBB87_7: # %else11
10252; RV64ZVE32F-NEXT:    andi a2, a1, 32
10253; RV64ZVE32F-NEXT:    beqz a2, .LBB87_9
10254; RV64ZVE32F-NEXT:  .LBB87_8: # %cond.load13
10255; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10256; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
10257; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10258; RV64ZVE32F-NEXT:    slli a2, a2, 2
10259; RV64ZVE32F-NEXT:    add a2, a0, a2
10260; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10261; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10262; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10263; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
10264; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
10265; RV64ZVE32F-NEXT:  .LBB87_9: # %else14
10266; RV64ZVE32F-NEXT:    andi a2, a1, 64
10267; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
10268; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
10269; RV64ZVE32F-NEXT:    beqz a2, .LBB87_11
10270; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
10271; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10272; RV64ZVE32F-NEXT:    slli a2, a2, 2
10273; RV64ZVE32F-NEXT:    add a2, a0, a2
10274; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10275; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10276; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10277; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
10278; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
10279; RV64ZVE32F-NEXT:  .LBB87_11: # %else17
10280; RV64ZVE32F-NEXT:    andi a1, a1, -128
10281; RV64ZVE32F-NEXT:    beqz a1, .LBB87_13
10282; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
10283; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10284; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10285; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
10286; RV64ZVE32F-NEXT:    slli a1, a1, 2
10287; RV64ZVE32F-NEXT:    add a0, a0, a1
10288; RV64ZVE32F-NEXT:    flw fa5, 0(a0)
10289; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10290; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10291; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
10292; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
10293; RV64ZVE32F-NEXT:  .LBB87_13: # %else20
10294; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10295; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
10296; RV64ZVE32F-NEXT:    ret
10297; RV64ZVE32F-NEXT:  .LBB87_14: # %cond.load4
10298; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10299; RV64ZVE32F-NEXT:    slli a2, a2, 2
10300; RV64ZVE32F-NEXT:    add a2, a0, a2
10301; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10302; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10303; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10304; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
10305; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
10306; RV64ZVE32F-NEXT:    andi a2, a1, 8
10307; RV64ZVE32F-NEXT:    beqz a2, .LBB87_6
10308; RV64ZVE32F-NEXT:  .LBB87_15: # %cond.load7
10309; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10310; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10311; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10312; RV64ZVE32F-NEXT:    slli a2, a2, 2
10313; RV64ZVE32F-NEXT:    add a2, a0, a2
10314; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10315; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10316; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10317; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
10318; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
10319; RV64ZVE32F-NEXT:    andi a2, a1, 16
10320; RV64ZVE32F-NEXT:    beqz a2, .LBB87_7
10321; RV64ZVE32F-NEXT:  .LBB87_16: # %cond.load10
10322; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
10323; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
10324; RV64ZVE32F-NEXT:    slli a2, a2, 2
10325; RV64ZVE32F-NEXT:    add a2, a0, a2
10326; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10327; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
10328; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10329; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
10330; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
10331; RV64ZVE32F-NEXT:    andi a2, a1, 32
10332; RV64ZVE32F-NEXT:    bnez a2, .LBB87_8
10333; RV64ZVE32F-NEXT:    j .LBB87_9
10334  %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
10335  %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10336  ret <8 x float> %v
10337}
10338
10339define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
10340; RV32-LABEL: mgather_baseidx_sext_v8i16_v8f32:
10341; RV32:       # %bb.0:
10342; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
10343; RV32-NEXT:    vsext.vf2 v12, v8
10344; RV32-NEXT:    vsll.vi v8, v12, 2
10345; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
10346; RV32-NEXT:    vmv.v.v v8, v10
10347; RV32-NEXT:    ret
10348;
10349; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f32:
10350; RV64V:       # %bb.0:
10351; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
10352; RV64V-NEXT:    vsext.vf4 v12, v8
10353; RV64V-NEXT:    vsll.vi v12, v12, 2
10354; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
10355; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
10356; RV64V-NEXT:    vmv.v.v v8, v10
10357; RV64V-NEXT:    ret
10358;
10359; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f32:
10360; RV64ZVE32F:       # %bb.0:
10361; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10362; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
10363; RV64ZVE32F-NEXT:    andi a2, a1, 1
10364; RV64ZVE32F-NEXT:    beqz a2, .LBB88_2
10365; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
10366; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
10367; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10368; RV64ZVE32F-NEXT:    slli a2, a2, 2
10369; RV64ZVE32F-NEXT:    add a2, a0, a2
10370; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10371; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
10372; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
10373; RV64ZVE32F-NEXT:  .LBB88_2: # %else
10374; RV64ZVE32F-NEXT:    andi a2, a1, 2
10375; RV64ZVE32F-NEXT:    beqz a2, .LBB88_4
10376; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
10377; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10378; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
10379; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
10380; RV64ZVE32F-NEXT:    slli a2, a2, 2
10381; RV64ZVE32F-NEXT:    add a2, a0, a2
10382; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10383; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10384; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
10385; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
10386; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
10387; RV64ZVE32F-NEXT:  .LBB88_4: # %else2
10388; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
10389; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
10390; RV64ZVE32F-NEXT:    andi a2, a1, 4
10391; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
10392; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
10393; RV64ZVE32F-NEXT:    bnez a2, .LBB88_14
10394; RV64ZVE32F-NEXT:  # %bb.5: # %else5
10395; RV64ZVE32F-NEXT:    andi a2, a1, 8
10396; RV64ZVE32F-NEXT:    bnez a2, .LBB88_15
10397; RV64ZVE32F-NEXT:  .LBB88_6: # %else8
10398; RV64ZVE32F-NEXT:    andi a2, a1, 16
10399; RV64ZVE32F-NEXT:    bnez a2, .LBB88_16
10400; RV64ZVE32F-NEXT:  .LBB88_7: # %else11
10401; RV64ZVE32F-NEXT:    andi a2, a1, 32
10402; RV64ZVE32F-NEXT:    beqz a2, .LBB88_9
10403; RV64ZVE32F-NEXT:  .LBB88_8: # %cond.load13
10404; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10405; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
10406; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10407; RV64ZVE32F-NEXT:    slli a2, a2, 2
10408; RV64ZVE32F-NEXT:    add a2, a0, a2
10409; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10410; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10411; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10412; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
10413; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
10414; RV64ZVE32F-NEXT:  .LBB88_9: # %else14
10415; RV64ZVE32F-NEXT:    andi a2, a1, 64
10416; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
10417; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
10418; RV64ZVE32F-NEXT:    beqz a2, .LBB88_11
10419; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
10420; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10421; RV64ZVE32F-NEXT:    slli a2, a2, 2
10422; RV64ZVE32F-NEXT:    add a2, a0, a2
10423; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10424; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10425; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10426; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
10427; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
10428; RV64ZVE32F-NEXT:  .LBB88_11: # %else17
10429; RV64ZVE32F-NEXT:    andi a1, a1, -128
10430; RV64ZVE32F-NEXT:    beqz a1, .LBB88_13
10431; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
10432; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10433; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10434; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
10435; RV64ZVE32F-NEXT:    slli a1, a1, 2
10436; RV64ZVE32F-NEXT:    add a0, a0, a1
10437; RV64ZVE32F-NEXT:    flw fa5, 0(a0)
10438; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10439; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10440; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
10441; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
10442; RV64ZVE32F-NEXT:  .LBB88_13: # %else20
10443; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10444; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
10445; RV64ZVE32F-NEXT:    ret
10446; RV64ZVE32F-NEXT:  .LBB88_14: # %cond.load4
10447; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10448; RV64ZVE32F-NEXT:    slli a2, a2, 2
10449; RV64ZVE32F-NEXT:    add a2, a0, a2
10450; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10451; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10452; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10453; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
10454; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
10455; RV64ZVE32F-NEXT:    andi a2, a1, 8
10456; RV64ZVE32F-NEXT:    beqz a2, .LBB88_6
10457; RV64ZVE32F-NEXT:  .LBB88_15: # %cond.load7
10458; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10459; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10460; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10461; RV64ZVE32F-NEXT:    slli a2, a2, 2
10462; RV64ZVE32F-NEXT:    add a2, a0, a2
10463; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10464; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10465; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10466; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
10467; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
10468; RV64ZVE32F-NEXT:    andi a2, a1, 16
10469; RV64ZVE32F-NEXT:    beqz a2, .LBB88_7
10470; RV64ZVE32F-NEXT:  .LBB88_16: # %cond.load10
10471; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
10472; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
10473; RV64ZVE32F-NEXT:    slli a2, a2, 2
10474; RV64ZVE32F-NEXT:    add a2, a0, a2
10475; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10476; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
10477; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10478; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
10479; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
10480; RV64ZVE32F-NEXT:    andi a2, a1, 32
10481; RV64ZVE32F-NEXT:    bnez a2, .LBB88_8
10482; RV64ZVE32F-NEXT:    j .LBB88_9
10483  %eidxs = sext <8 x i16> %idxs to <8 x i32>
10484  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
10485  %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10486  ret <8 x float> %v
10487}
10488
10489define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
10490; RV32-LABEL: mgather_baseidx_zext_v8i16_v8f32:
10491; RV32:       # %bb.0:
10492; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
10493; RV32-NEXT:    vzext.vf2 v12, v8
10494; RV32-NEXT:    vsll.vi v8, v12, 2
10495; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
10496; RV32-NEXT:    vmv.v.v v8, v10
10497; RV32-NEXT:    ret
10498;
10499; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f32:
10500; RV64V:       # %bb.0:
10501; RV64V-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
10502; RV64V-NEXT:    vzext.vf2 v12, v8
10503; RV64V-NEXT:    vsll.vi v8, v12, 2
10504; RV64V-NEXT:    vluxei32.v v10, (a0), v8, v0.t
10505; RV64V-NEXT:    vmv.v.v v8, v10
10506; RV64V-NEXT:    ret
10507;
10508; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f32:
10509; RV64ZVE32F:       # %bb.0:
10510; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10511; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
10512; RV64ZVE32F-NEXT:    andi a2, a1, 1
10513; RV64ZVE32F-NEXT:    beqz a2, .LBB89_2
10514; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
10515; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
10516; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10517; RV64ZVE32F-NEXT:    slli a2, a2, 48
10518; RV64ZVE32F-NEXT:    srli a2, a2, 46
10519; RV64ZVE32F-NEXT:    add a2, a0, a2
10520; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10521; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
10522; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
10523; RV64ZVE32F-NEXT:  .LBB89_2: # %else
10524; RV64ZVE32F-NEXT:    andi a2, a1, 2
10525; RV64ZVE32F-NEXT:    beqz a2, .LBB89_4
10526; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
10527; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10528; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
10529; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
10530; RV64ZVE32F-NEXT:    slli a2, a2, 48
10531; RV64ZVE32F-NEXT:    srli a2, a2, 46
10532; RV64ZVE32F-NEXT:    add a2, a0, a2
10533; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10534; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10535; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
10536; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
10537; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
10538; RV64ZVE32F-NEXT:  .LBB89_4: # %else2
10539; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
10540; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
10541; RV64ZVE32F-NEXT:    andi a2, a1, 4
10542; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
10543; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
10544; RV64ZVE32F-NEXT:    bnez a2, .LBB89_14
10545; RV64ZVE32F-NEXT:  # %bb.5: # %else5
10546; RV64ZVE32F-NEXT:    andi a2, a1, 8
10547; RV64ZVE32F-NEXT:    bnez a2, .LBB89_15
10548; RV64ZVE32F-NEXT:  .LBB89_6: # %else8
10549; RV64ZVE32F-NEXT:    andi a2, a1, 16
10550; RV64ZVE32F-NEXT:    bnez a2, .LBB89_16
10551; RV64ZVE32F-NEXT:  .LBB89_7: # %else11
10552; RV64ZVE32F-NEXT:    andi a2, a1, 32
10553; RV64ZVE32F-NEXT:    beqz a2, .LBB89_9
10554; RV64ZVE32F-NEXT:  .LBB89_8: # %cond.load13
10555; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10556; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
10557; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10558; RV64ZVE32F-NEXT:    slli a2, a2, 48
10559; RV64ZVE32F-NEXT:    srli a2, a2, 46
10560; RV64ZVE32F-NEXT:    add a2, a0, a2
10561; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10562; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10563; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10564; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
10565; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
10566; RV64ZVE32F-NEXT:  .LBB89_9: # %else14
10567; RV64ZVE32F-NEXT:    andi a2, a1, 64
10568; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
10569; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
10570; RV64ZVE32F-NEXT:    beqz a2, .LBB89_11
10571; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
10572; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10573; RV64ZVE32F-NEXT:    slli a2, a2, 48
10574; RV64ZVE32F-NEXT:    srli a2, a2, 46
10575; RV64ZVE32F-NEXT:    add a2, a0, a2
10576; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10577; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10578; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10579; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
10580; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
10581; RV64ZVE32F-NEXT:  .LBB89_11: # %else17
10582; RV64ZVE32F-NEXT:    andi a1, a1, -128
10583; RV64ZVE32F-NEXT:    beqz a1, .LBB89_13
10584; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
10585; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10586; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10587; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
10588; RV64ZVE32F-NEXT:    slli a1, a1, 48
10589; RV64ZVE32F-NEXT:    srli a1, a1, 46
10590; RV64ZVE32F-NEXT:    add a0, a0, a1
10591; RV64ZVE32F-NEXT:    flw fa5, 0(a0)
10592; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10593; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10594; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
10595; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
10596; RV64ZVE32F-NEXT:  .LBB89_13: # %else20
10597; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10598; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
10599; RV64ZVE32F-NEXT:    ret
10600; RV64ZVE32F-NEXT:  .LBB89_14: # %cond.load4
10601; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10602; RV64ZVE32F-NEXT:    slli a2, a2, 48
10603; RV64ZVE32F-NEXT:    srli a2, a2, 46
10604; RV64ZVE32F-NEXT:    add a2, a0, a2
10605; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10606; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10607; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10608; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
10609; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
10610; RV64ZVE32F-NEXT:    andi a2, a1, 8
10611; RV64ZVE32F-NEXT:    beqz a2, .LBB89_6
10612; RV64ZVE32F-NEXT:  .LBB89_15: # %cond.load7
10613; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
10614; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10615; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10616; RV64ZVE32F-NEXT:    slli a2, a2, 48
10617; RV64ZVE32F-NEXT:    srli a2, a2, 46
10618; RV64ZVE32F-NEXT:    add a2, a0, a2
10619; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10620; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10621; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10622; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
10623; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
10624; RV64ZVE32F-NEXT:    andi a2, a1, 16
10625; RV64ZVE32F-NEXT:    beqz a2, .LBB89_7
10626; RV64ZVE32F-NEXT:  .LBB89_16: # %cond.load10
10627; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
10628; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
10629; RV64ZVE32F-NEXT:    slli a2, a2, 48
10630; RV64ZVE32F-NEXT:    srli a2, a2, 46
10631; RV64ZVE32F-NEXT:    add a2, a0, a2
10632; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10633; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
10634; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10635; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
10636; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
10637; RV64ZVE32F-NEXT:    andi a2, a1, 32
10638; RV64ZVE32F-NEXT:    bnez a2, .LBB89_8
10639; RV64ZVE32F-NEXT:    j .LBB89_9
10640  %eidxs = zext <8 x i16> %idxs to <8 x i32>
10641  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
10642  %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10643  ret <8 x float> %v
10644}
10645
10646define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x float> %passthru) {
10647; RV32-LABEL: mgather_baseidx_v8f32:
10648; RV32:       # %bb.0:
10649; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
10650; RV32-NEXT:    vsll.vi v8, v8, 2
10651; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
10652; RV32-NEXT:    vmv.v.v v8, v10
10653; RV32-NEXT:    ret
10654;
10655; RV64V-LABEL: mgather_baseidx_v8f32:
10656; RV64V:       # %bb.0:
10657; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
10658; RV64V-NEXT:    vsext.vf2 v12, v8
10659; RV64V-NEXT:    vsll.vi v12, v12, 2
10660; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
10661; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
10662; RV64V-NEXT:    vmv.v.v v8, v10
10663; RV64V-NEXT:    ret
10664;
10665; RV64ZVE32F-LABEL: mgather_baseidx_v8f32:
10666; RV64ZVE32F:       # %bb.0:
10667; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10668; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
10669; RV64ZVE32F-NEXT:    andi a2, a1, 1
10670; RV64ZVE32F-NEXT:    beqz a2, .LBB90_2
10671; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
10672; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
10673; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10674; RV64ZVE32F-NEXT:    slli a2, a2, 2
10675; RV64ZVE32F-NEXT:    add a2, a0, a2
10676; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10677; RV64ZVE32F-NEXT:    vfmv.s.f v10, fa5
10678; RV64ZVE32F-NEXT:  .LBB90_2: # %else
10679; RV64ZVE32F-NEXT:    andi a2, a1, 2
10680; RV64ZVE32F-NEXT:    beqz a2, .LBB90_4
10681; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
10682; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
10683; RV64ZVE32F-NEXT:    vslidedown.vi v12, v8, 1
10684; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
10685; RV64ZVE32F-NEXT:    slli a2, a2, 2
10686; RV64ZVE32F-NEXT:    add a2, a0, a2
10687; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10688; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10689; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 1
10690; RV64ZVE32F-NEXT:  .LBB90_4: # %else2
10691; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
10692; RV64ZVE32F-NEXT:    vslidedown.vi v12, v8, 4
10693; RV64ZVE32F-NEXT:    andi a2, a1, 4
10694; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
10695; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
10696; RV64ZVE32F-NEXT:    bnez a2, .LBB90_14
10697; RV64ZVE32F-NEXT:  # %bb.5: # %else5
10698; RV64ZVE32F-NEXT:    andi a2, a1, 8
10699; RV64ZVE32F-NEXT:    bnez a2, .LBB90_15
10700; RV64ZVE32F-NEXT:  .LBB90_6: # %else8
10701; RV64ZVE32F-NEXT:    andi a2, a1, 16
10702; RV64ZVE32F-NEXT:    bnez a2, .LBB90_16
10703; RV64ZVE32F-NEXT:  .LBB90_7: # %else11
10704; RV64ZVE32F-NEXT:    andi a2, a1, 32
10705; RV64ZVE32F-NEXT:    beqz a2, .LBB90_9
10706; RV64ZVE32F-NEXT:  .LBB90_8: # %cond.load13
10707; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
10708; RV64ZVE32F-NEXT:    vslidedown.vi v8, v12, 1
10709; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10710; RV64ZVE32F-NEXT:    slli a2, a2, 2
10711; RV64ZVE32F-NEXT:    add a2, a0, a2
10712; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10713; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10714; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
10715; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
10716; RV64ZVE32F-NEXT:  .LBB90_9: # %else14
10717; RV64ZVE32F-NEXT:    andi a2, a1, 64
10718; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
10719; RV64ZVE32F-NEXT:    vslidedown.vi v8, v12, 2
10720; RV64ZVE32F-NEXT:    beqz a2, .LBB90_11
10721; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
10722; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10723; RV64ZVE32F-NEXT:    slli a2, a2, 2
10724; RV64ZVE32F-NEXT:    add a2, a0, a2
10725; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10726; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
10727; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
10728; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
10729; RV64ZVE32F-NEXT:  .LBB90_11: # %else17
10730; RV64ZVE32F-NEXT:    andi a1, a1, -128
10731; RV64ZVE32F-NEXT:    beqz a1, .LBB90_13
10732; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
10733; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
10734; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10735; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
10736; RV64ZVE32F-NEXT:    slli a1, a1, 2
10737; RV64ZVE32F-NEXT:    add a0, a0, a1
10738; RV64ZVE32F-NEXT:    flw fa5, 0(a0)
10739; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10740; RV64ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
10741; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 7
10742; RV64ZVE32F-NEXT:  .LBB90_13: # %else20
10743; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10744; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
10745; RV64ZVE32F-NEXT:    ret
10746; RV64ZVE32F-NEXT:  .LBB90_14: # %cond.load4
10747; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10748; RV64ZVE32F-NEXT:    slli a2, a2, 2
10749; RV64ZVE32F-NEXT:    add a2, a0, a2
10750; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10751; RV64ZVE32F-NEXT:    vfmv.s.f v9, fa5
10752; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
10753; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 2
10754; RV64ZVE32F-NEXT:    andi a2, a1, 8
10755; RV64ZVE32F-NEXT:    beqz a2, .LBB90_6
10756; RV64ZVE32F-NEXT:  .LBB90_15: # %cond.load7
10757; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
10758; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10759; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
10760; RV64ZVE32F-NEXT:    slli a2, a2, 2
10761; RV64ZVE32F-NEXT:    add a2, a0, a2
10762; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10763; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10764; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 3
10765; RV64ZVE32F-NEXT:    andi a2, a1, 16
10766; RV64ZVE32F-NEXT:    beqz a2, .LBB90_7
10767; RV64ZVE32F-NEXT:  .LBB90_16: # %cond.load10
10768; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
10769; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
10770; RV64ZVE32F-NEXT:    slli a2, a2, 2
10771; RV64ZVE32F-NEXT:    add a2, a0, a2
10772; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
10773; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
10774; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
10775; RV64ZVE32F-NEXT:    andi a2, a1, 32
10776; RV64ZVE32F-NEXT:    bnez a2, .LBB90_8
10777; RV64ZVE32F-NEXT:    j .LBB90_9
10778  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
10779  %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10780  ret <8 x float> %v
10781}
10782
10783declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>)
10784
10785define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %passthru) {
10786; RV32V-LABEL: mgather_v1f64:
10787; RV32V:       # %bb.0:
10788; RV32V-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
10789; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
10790; RV32V-NEXT:    vmv.v.v v8, v9
10791; RV32V-NEXT:    ret
10792;
10793; RV64V-LABEL: mgather_v1f64:
10794; RV64V:       # %bb.0:
10795; RV64V-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
10796; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
10797; RV64V-NEXT:    vmv.v.v v8, v9
10798; RV64V-NEXT:    ret
10799;
10800; RV32ZVE32F-LABEL: mgather_v1f64:
10801; RV32ZVE32F:       # %bb.0:
10802; RV32ZVE32F-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
10803; RV32ZVE32F-NEXT:    vfirst.m a0, v0
10804; RV32ZVE32F-NEXT:    bnez a0, .LBB91_2
10805; RV32ZVE32F-NEXT:  # %bb.1: # %cond.load
10806; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
10807; RV32ZVE32F-NEXT:    vmv.x.s a0, v8
10808; RV32ZVE32F-NEXT:    fld fa0, 0(a0)
10809; RV32ZVE32F-NEXT:  .LBB91_2: # %else
10810; RV32ZVE32F-NEXT:    ret
10811;
10812; RV64ZVE32F-LABEL: mgather_v1f64:
10813; RV64ZVE32F:       # %bb.0:
10814; RV64ZVE32F-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
10815; RV64ZVE32F-NEXT:    vfirst.m a1, v0
10816; RV64ZVE32F-NEXT:    bnez a1, .LBB91_2
10817; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
10818; RV64ZVE32F-NEXT:    fld fa0, 0(a0)
10819; RV64ZVE32F-NEXT:  .LBB91_2: # %else
10820; RV64ZVE32F-NEXT:    ret
10821  %v = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x double> %passthru)
10822  ret <1 x double> %v
10823}
10824
10825declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>)
10826
10827define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %passthru) {
10828; RV32V-LABEL: mgather_v2f64:
10829; RV32V:       # %bb.0:
10830; RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
10831; RV32V-NEXT:    vluxei32.v v9, (zero), v8, v0.t
10832; RV32V-NEXT:    vmv.v.v v8, v9
10833; RV32V-NEXT:    ret
10834;
10835; RV64V-LABEL: mgather_v2f64:
10836; RV64V:       # %bb.0:
10837; RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
10838; RV64V-NEXT:    vluxei64.v v9, (zero), v8, v0.t
10839; RV64V-NEXT:    vmv.v.v v8, v9
10840; RV64V-NEXT:    ret
10841;
10842; RV32ZVE32F-LABEL: mgather_v2f64:
10843; RV32ZVE32F:       # %bb.0:
10844; RV32ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10845; RV32ZVE32F-NEXT:    vmv.x.s a0, v0
10846; RV32ZVE32F-NEXT:    andi a1, a0, 1
10847; RV32ZVE32F-NEXT:    bnez a1, .LBB92_3
10848; RV32ZVE32F-NEXT:  # %bb.1: # %else
10849; RV32ZVE32F-NEXT:    andi a0, a0, 2
10850; RV32ZVE32F-NEXT:    bnez a0, .LBB92_4
10851; RV32ZVE32F-NEXT:  .LBB92_2: # %else2
10852; RV32ZVE32F-NEXT:    ret
10853; RV32ZVE32F-NEXT:  .LBB92_3: # %cond.load
10854; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
10855; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
10856; RV32ZVE32F-NEXT:    fld fa0, 0(a1)
10857; RV32ZVE32F-NEXT:    andi a0, a0, 2
10858; RV32ZVE32F-NEXT:    beqz a0, .LBB92_2
10859; RV32ZVE32F-NEXT:  .LBB92_4: # %cond.load1
10860; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
10861; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
10862; RV32ZVE32F-NEXT:    vmv.x.s a0, v8
10863; RV32ZVE32F-NEXT:    fld fa1, 0(a0)
10864; RV32ZVE32F-NEXT:    ret
10865;
10866; RV64ZVE32F-LABEL: mgather_v2f64:
10867; RV64ZVE32F:       # %bb.0:
10868; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10869; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
10870; RV64ZVE32F-NEXT:    andi a3, a2, 1
10871; RV64ZVE32F-NEXT:    bnez a3, .LBB92_3
10872; RV64ZVE32F-NEXT:  # %bb.1: # %else
10873; RV64ZVE32F-NEXT:    andi a2, a2, 2
10874; RV64ZVE32F-NEXT:    bnez a2, .LBB92_4
10875; RV64ZVE32F-NEXT:  .LBB92_2: # %else2
10876; RV64ZVE32F-NEXT:    ret
10877; RV64ZVE32F-NEXT:  .LBB92_3: # %cond.load
10878; RV64ZVE32F-NEXT:    fld fa0, 0(a0)
10879; RV64ZVE32F-NEXT:    andi a2, a2, 2
10880; RV64ZVE32F-NEXT:    beqz a2, .LBB92_2
10881; RV64ZVE32F-NEXT:  .LBB92_4: # %cond.load1
10882; RV64ZVE32F-NEXT:    fld fa1, 0(a1)
10883; RV64ZVE32F-NEXT:    ret
10884  %v = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x double> %passthru)
10885  ret <2 x double> %v
10886}
10887
10888declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>)
10889
10890define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %passthru) {
10891; RV32V-LABEL: mgather_v4f64:
10892; RV32V:       # %bb.0:
10893; RV32V-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
10894; RV32V-NEXT:    vluxei32.v v10, (zero), v8, v0.t
10895; RV32V-NEXT:    vmv.v.v v8, v10
10896; RV32V-NEXT:    ret
10897;
10898; RV64V-LABEL: mgather_v4f64:
10899; RV64V:       # %bb.0:
10900; RV64V-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
10901; RV64V-NEXT:    vluxei64.v v10, (zero), v8, v0.t
10902; RV64V-NEXT:    vmv.v.v v8, v10
10903; RV64V-NEXT:    ret
10904;
10905; RV32ZVE32F-LABEL: mgather_v4f64:
10906; RV32ZVE32F:       # %bb.0:
10907; RV32ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10908; RV32ZVE32F-NEXT:    vmv.x.s a1, v0
10909; RV32ZVE32F-NEXT:    andi a2, a1, 1
10910; RV32ZVE32F-NEXT:    bnez a2, .LBB93_6
10911; RV32ZVE32F-NEXT:  # %bb.1: # %else
10912; RV32ZVE32F-NEXT:    andi a2, a1, 2
10913; RV32ZVE32F-NEXT:    bnez a2, .LBB93_7
10914; RV32ZVE32F-NEXT:  .LBB93_2: # %else2
10915; RV32ZVE32F-NEXT:    andi a2, a1, 4
10916; RV32ZVE32F-NEXT:    bnez a2, .LBB93_8
10917; RV32ZVE32F-NEXT:  .LBB93_3: # %else5
10918; RV32ZVE32F-NEXT:    andi a1, a1, 8
10919; RV32ZVE32F-NEXT:    beqz a1, .LBB93_5
10920; RV32ZVE32F-NEXT:  .LBB93_4: # %cond.load7
10921; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
10922; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 3
10923; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
10924; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
10925; RV32ZVE32F-NEXT:  .LBB93_5: # %else8
10926; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
10927; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
10928; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
10929; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
10930; RV32ZVE32F-NEXT:    ret
10931; RV32ZVE32F-NEXT:  .LBB93_6: # %cond.load
10932; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
10933; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
10934; RV32ZVE32F-NEXT:    fld fa0, 0(a2)
10935; RV32ZVE32F-NEXT:    andi a2, a1, 2
10936; RV32ZVE32F-NEXT:    beqz a2, .LBB93_2
10937; RV32ZVE32F-NEXT:  .LBB93_7: # %cond.load1
10938; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
10939; RV32ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
10940; RV32ZVE32F-NEXT:    vmv.x.s a2, v9
10941; RV32ZVE32F-NEXT:    fld fa1, 0(a2)
10942; RV32ZVE32F-NEXT:    andi a2, a1, 4
10943; RV32ZVE32F-NEXT:    beqz a2, .LBB93_3
10944; RV32ZVE32F-NEXT:  .LBB93_8: # %cond.load4
10945; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
10946; RV32ZVE32F-NEXT:    vslidedown.vi v9, v8, 2
10947; RV32ZVE32F-NEXT:    vmv.x.s a2, v9
10948; RV32ZVE32F-NEXT:    fld fa2, 0(a2)
10949; RV32ZVE32F-NEXT:    andi a1, a1, 8
10950; RV32ZVE32F-NEXT:    bnez a1, .LBB93_4
10951; RV32ZVE32F-NEXT:    j .LBB93_5
10952;
10953; RV64ZVE32F-LABEL: mgather_v4f64:
10954; RV64ZVE32F:       # %bb.0:
10955; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
10956; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
10957; RV64ZVE32F-NEXT:    andi a3, a2, 1
10958; RV64ZVE32F-NEXT:    bnez a3, .LBB93_6
10959; RV64ZVE32F-NEXT:  # %bb.1: # %else
10960; RV64ZVE32F-NEXT:    andi a3, a2, 2
10961; RV64ZVE32F-NEXT:    bnez a3, .LBB93_7
10962; RV64ZVE32F-NEXT:  .LBB93_2: # %else2
10963; RV64ZVE32F-NEXT:    andi a3, a2, 4
10964; RV64ZVE32F-NEXT:    bnez a3, .LBB93_8
10965; RV64ZVE32F-NEXT:  .LBB93_3: # %else5
10966; RV64ZVE32F-NEXT:    andi a2, a2, 8
10967; RV64ZVE32F-NEXT:    beqz a2, .LBB93_5
10968; RV64ZVE32F-NEXT:  .LBB93_4: # %cond.load7
10969; RV64ZVE32F-NEXT:    ld a1, 24(a1)
10970; RV64ZVE32F-NEXT:    fld fa3, 0(a1)
10971; RV64ZVE32F-NEXT:  .LBB93_5: # %else8
10972; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
10973; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
10974; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
10975; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
10976; RV64ZVE32F-NEXT:    ret
10977; RV64ZVE32F-NEXT:  .LBB93_6: # %cond.load
10978; RV64ZVE32F-NEXT:    ld a3, 0(a1)
10979; RV64ZVE32F-NEXT:    fld fa0, 0(a3)
10980; RV64ZVE32F-NEXT:    andi a3, a2, 2
10981; RV64ZVE32F-NEXT:    beqz a3, .LBB93_2
10982; RV64ZVE32F-NEXT:  .LBB93_7: # %cond.load1
10983; RV64ZVE32F-NEXT:    ld a3, 8(a1)
10984; RV64ZVE32F-NEXT:    fld fa1, 0(a3)
10985; RV64ZVE32F-NEXT:    andi a3, a2, 4
10986; RV64ZVE32F-NEXT:    beqz a3, .LBB93_3
10987; RV64ZVE32F-NEXT:  .LBB93_8: # %cond.load4
10988; RV64ZVE32F-NEXT:    ld a3, 16(a1)
10989; RV64ZVE32F-NEXT:    fld fa2, 0(a3)
10990; RV64ZVE32F-NEXT:    andi a2, a2, 8
10991; RV64ZVE32F-NEXT:    bnez a2, .LBB93_4
10992; RV64ZVE32F-NEXT:    j .LBB93_5
10993  %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x double> %passthru)
10994  ret <4 x double> %v
10995}
10996
10997define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) {
10998; RV32V-LABEL: mgather_truemask_v4f64:
10999; RV32V:       # %bb.0:
11000; RV32V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
11001; RV32V-NEXT:    vluxei32.v v10, (zero), v8
11002; RV32V-NEXT:    vmv.v.v v8, v10
11003; RV32V-NEXT:    ret
11004;
11005; RV64V-LABEL: mgather_truemask_v4f64:
11006; RV64V:       # %bb.0:
11007; RV64V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
11008; RV64V-NEXT:    vluxei64.v v8, (zero), v8
11009; RV64V-NEXT:    ret
11010;
11011; RV32ZVE32F-LABEL: mgather_truemask_v4f64:
11012; RV32ZVE32F:       # %bb.0:
11013; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11014; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
11015; RV32ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
11016; RV32ZVE32F-NEXT:    fld fa5, 0(a1)
11017; RV32ZVE32F-NEXT:    vmv.x.s a1, v9
11018; RV32ZVE32F-NEXT:    vslidedown.vi v9, v8, 2
11019; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 3
11020; RV32ZVE32F-NEXT:    fld fa4, 0(a1)
11021; RV32ZVE32F-NEXT:    vmv.x.s a1, v9
11022; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
11023; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
11024; RV32ZVE32F-NEXT:    fld fa2, 0(a1)
11025; RV32ZVE32F-NEXT:    fsd fa5, 0(a0)
11026; RV32ZVE32F-NEXT:    fsd fa4, 8(a0)
11027; RV32ZVE32F-NEXT:    fsd fa3, 16(a0)
11028; RV32ZVE32F-NEXT:    fsd fa2, 24(a0)
11029; RV32ZVE32F-NEXT:    ret
11030;
11031; RV64ZVE32F-LABEL: mgather_truemask_v4f64:
11032; RV64ZVE32F:       # %bb.0:
11033; RV64ZVE32F-NEXT:    ld a2, 0(a1)
11034; RV64ZVE32F-NEXT:    ld a3, 8(a1)
11035; RV64ZVE32F-NEXT:    ld a4, 16(a1)
11036; RV64ZVE32F-NEXT:    ld a1, 24(a1)
11037; RV64ZVE32F-NEXT:    fld fa5, 0(a2)
11038; RV64ZVE32F-NEXT:    fld fa4, 0(a3)
11039; RV64ZVE32F-NEXT:    fld fa3, 0(a4)
11040; RV64ZVE32F-NEXT:    fld fa2, 0(a1)
11041; RV64ZVE32F-NEXT:    fsd fa5, 0(a0)
11042; RV64ZVE32F-NEXT:    fsd fa4, 8(a0)
11043; RV64ZVE32F-NEXT:    fsd fa3, 16(a0)
11044; RV64ZVE32F-NEXT:    fsd fa2, 24(a0)
11045; RV64ZVE32F-NEXT:    ret
11046  %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x double> %passthru)
11047  ret <4 x double> %v
11048}
11049
11050define <4 x double> @mgather_falsemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) {
11051; RV32V-LABEL: mgather_falsemask_v4f64:
11052; RV32V:       # %bb.0:
11053; RV32V-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
11054; RV32V-NEXT:    vmv2r.v v8, v10
11055; RV32V-NEXT:    ret
11056;
11057; RV64V-LABEL: mgather_falsemask_v4f64:
11058; RV64V:       # %bb.0:
11059; RV64V-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
11060; RV64V-NEXT:    vmv2r.v v8, v10
11061; RV64V-NEXT:    ret
11062;
11063; RV32ZVE32F-LABEL: mgather_falsemask_v4f64:
11064; RV32ZVE32F:       # %bb.0:
11065; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
11066; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
11067; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
11068; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
11069; RV32ZVE32F-NEXT:    ret
11070;
11071; RV64ZVE32F-LABEL: mgather_falsemask_v4f64:
11072; RV64ZVE32F:       # %bb.0:
11073; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
11074; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
11075; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
11076; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
11077; RV64ZVE32F-NEXT:    ret
11078  %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x double> %passthru)
11079  ret <4 x double> %v
11080}
11081
11082declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>)
11083
11084define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %passthru) {
11085; RV32V-LABEL: mgather_v8f64:
11086; RV32V:       # %bb.0:
11087; RV32V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
11088; RV32V-NEXT:    vluxei32.v v12, (zero), v8, v0.t
11089; RV32V-NEXT:    vmv.v.v v8, v12
11090; RV32V-NEXT:    ret
11091;
11092; RV64V-LABEL: mgather_v8f64:
11093; RV64V:       # %bb.0:
11094; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
11095; RV64V-NEXT:    vluxei64.v v12, (zero), v8, v0.t
11096; RV64V-NEXT:    vmv.v.v v8, v12
11097; RV64V-NEXT:    ret
11098;
11099; RV32ZVE32F-LABEL: mgather_v8f64:
11100; RV32ZVE32F:       # %bb.0:
11101; RV32ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
11102; RV32ZVE32F-NEXT:    vmv.x.s a1, v0
11103; RV32ZVE32F-NEXT:    andi a2, a1, 1
11104; RV32ZVE32F-NEXT:    bnez a2, .LBB96_10
11105; RV32ZVE32F-NEXT:  # %bb.1: # %else
11106; RV32ZVE32F-NEXT:    andi a2, a1, 2
11107; RV32ZVE32F-NEXT:    bnez a2, .LBB96_11
11108; RV32ZVE32F-NEXT:  .LBB96_2: # %else2
11109; RV32ZVE32F-NEXT:    andi a2, a1, 4
11110; RV32ZVE32F-NEXT:    bnez a2, .LBB96_12
11111; RV32ZVE32F-NEXT:  .LBB96_3: # %else5
11112; RV32ZVE32F-NEXT:    andi a2, a1, 8
11113; RV32ZVE32F-NEXT:    bnez a2, .LBB96_13
11114; RV32ZVE32F-NEXT:  .LBB96_4: # %else8
11115; RV32ZVE32F-NEXT:    andi a2, a1, 16
11116; RV32ZVE32F-NEXT:    bnez a2, .LBB96_14
11117; RV32ZVE32F-NEXT:  .LBB96_5: # %else11
11118; RV32ZVE32F-NEXT:    andi a2, a1, 32
11119; RV32ZVE32F-NEXT:    bnez a2, .LBB96_15
11120; RV32ZVE32F-NEXT:  .LBB96_6: # %else14
11121; RV32ZVE32F-NEXT:    andi a2, a1, 64
11122; RV32ZVE32F-NEXT:    bnez a2, .LBB96_16
11123; RV32ZVE32F-NEXT:  .LBB96_7: # %else17
11124; RV32ZVE32F-NEXT:    andi a1, a1, -128
11125; RV32ZVE32F-NEXT:    beqz a1, .LBB96_9
11126; RV32ZVE32F-NEXT:  .LBB96_8: # %cond.load19
11127; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11128; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
11129; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
11130; RV32ZVE32F-NEXT:    fld fa7, 0(a1)
11131; RV32ZVE32F-NEXT:  .LBB96_9: # %else20
11132; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
11133; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
11134; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
11135; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
11136; RV32ZVE32F-NEXT:    fsd fa4, 32(a0)
11137; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
11138; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
11139; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
11140; RV32ZVE32F-NEXT:    ret
11141; RV32ZVE32F-NEXT:  .LBB96_10: # %cond.load
11142; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
11143; RV32ZVE32F-NEXT:    vmv.x.s a2, v8
11144; RV32ZVE32F-NEXT:    fld fa0, 0(a2)
11145; RV32ZVE32F-NEXT:    andi a2, a1, 2
11146; RV32ZVE32F-NEXT:    beqz a2, .LBB96_2
11147; RV32ZVE32F-NEXT:  .LBB96_11: # %cond.load1
11148; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11149; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
11150; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
11151; RV32ZVE32F-NEXT:    fld fa1, 0(a2)
11152; RV32ZVE32F-NEXT:    andi a2, a1, 4
11153; RV32ZVE32F-NEXT:    beqz a2, .LBB96_3
11154; RV32ZVE32F-NEXT:  .LBB96_12: # %cond.load4
11155; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11156; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
11157; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
11158; RV32ZVE32F-NEXT:    fld fa2, 0(a2)
11159; RV32ZVE32F-NEXT:    andi a2, a1, 8
11160; RV32ZVE32F-NEXT:    beqz a2, .LBB96_4
11161; RV32ZVE32F-NEXT:  .LBB96_13: # %cond.load7
11162; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11163; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
11164; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
11165; RV32ZVE32F-NEXT:    fld fa3, 0(a2)
11166; RV32ZVE32F-NEXT:    andi a2, a1, 16
11167; RV32ZVE32F-NEXT:    beqz a2, .LBB96_5
11168; RV32ZVE32F-NEXT:  .LBB96_14: # %cond.load10
11169; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11170; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
11171; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
11172; RV32ZVE32F-NEXT:    fld fa4, 0(a2)
11173; RV32ZVE32F-NEXT:    andi a2, a1, 32
11174; RV32ZVE32F-NEXT:    beqz a2, .LBB96_6
11175; RV32ZVE32F-NEXT:  .LBB96_15: # %cond.load13
11176; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11177; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
11178; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
11179; RV32ZVE32F-NEXT:    fld fa5, 0(a2)
11180; RV32ZVE32F-NEXT:    andi a2, a1, 64
11181; RV32ZVE32F-NEXT:    beqz a2, .LBB96_7
11182; RV32ZVE32F-NEXT:  .LBB96_16: # %cond.load16
11183; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11184; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
11185; RV32ZVE32F-NEXT:    vmv.x.s a2, v10
11186; RV32ZVE32F-NEXT:    fld fa6, 0(a2)
11187; RV32ZVE32F-NEXT:    andi a1, a1, -128
11188; RV32ZVE32F-NEXT:    bnez a1, .LBB96_8
11189; RV32ZVE32F-NEXT:    j .LBB96_9
11190;
11191; RV64ZVE32F-LABEL: mgather_v8f64:
11192; RV64ZVE32F:       # %bb.0:
11193; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
11194; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
11195; RV64ZVE32F-NEXT:    andi a3, a2, 1
11196; RV64ZVE32F-NEXT:    bnez a3, .LBB96_10
11197; RV64ZVE32F-NEXT:  # %bb.1: # %else
11198; RV64ZVE32F-NEXT:    andi a3, a2, 2
11199; RV64ZVE32F-NEXT:    bnez a3, .LBB96_11
11200; RV64ZVE32F-NEXT:  .LBB96_2: # %else2
11201; RV64ZVE32F-NEXT:    andi a3, a2, 4
11202; RV64ZVE32F-NEXT:    bnez a3, .LBB96_12
11203; RV64ZVE32F-NEXT:  .LBB96_3: # %else5
11204; RV64ZVE32F-NEXT:    andi a3, a2, 8
11205; RV64ZVE32F-NEXT:    bnez a3, .LBB96_13
11206; RV64ZVE32F-NEXT:  .LBB96_4: # %else8
11207; RV64ZVE32F-NEXT:    andi a3, a2, 16
11208; RV64ZVE32F-NEXT:    bnez a3, .LBB96_14
11209; RV64ZVE32F-NEXT:  .LBB96_5: # %else11
11210; RV64ZVE32F-NEXT:    andi a3, a2, 32
11211; RV64ZVE32F-NEXT:    bnez a3, .LBB96_15
11212; RV64ZVE32F-NEXT:  .LBB96_6: # %else14
11213; RV64ZVE32F-NEXT:    andi a3, a2, 64
11214; RV64ZVE32F-NEXT:    bnez a3, .LBB96_16
11215; RV64ZVE32F-NEXT:  .LBB96_7: # %else17
11216; RV64ZVE32F-NEXT:    andi a2, a2, -128
11217; RV64ZVE32F-NEXT:    beqz a2, .LBB96_9
11218; RV64ZVE32F-NEXT:  .LBB96_8: # %cond.load19
11219; RV64ZVE32F-NEXT:    ld a1, 56(a1)
11220; RV64ZVE32F-NEXT:    fld fa7, 0(a1)
11221; RV64ZVE32F-NEXT:  .LBB96_9: # %else20
11222; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
11223; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
11224; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
11225; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
11226; RV64ZVE32F-NEXT:    fsd fa4, 32(a0)
11227; RV64ZVE32F-NEXT:    fsd fa5, 40(a0)
11228; RV64ZVE32F-NEXT:    fsd fa6, 48(a0)
11229; RV64ZVE32F-NEXT:    fsd fa7, 56(a0)
11230; RV64ZVE32F-NEXT:    ret
11231; RV64ZVE32F-NEXT:  .LBB96_10: # %cond.load
11232; RV64ZVE32F-NEXT:    ld a3, 0(a1)
11233; RV64ZVE32F-NEXT:    fld fa0, 0(a3)
11234; RV64ZVE32F-NEXT:    andi a3, a2, 2
11235; RV64ZVE32F-NEXT:    beqz a3, .LBB96_2
11236; RV64ZVE32F-NEXT:  .LBB96_11: # %cond.load1
11237; RV64ZVE32F-NEXT:    ld a3, 8(a1)
11238; RV64ZVE32F-NEXT:    fld fa1, 0(a3)
11239; RV64ZVE32F-NEXT:    andi a3, a2, 4
11240; RV64ZVE32F-NEXT:    beqz a3, .LBB96_3
11241; RV64ZVE32F-NEXT:  .LBB96_12: # %cond.load4
11242; RV64ZVE32F-NEXT:    ld a3, 16(a1)
11243; RV64ZVE32F-NEXT:    fld fa2, 0(a3)
11244; RV64ZVE32F-NEXT:    andi a3, a2, 8
11245; RV64ZVE32F-NEXT:    beqz a3, .LBB96_4
11246; RV64ZVE32F-NEXT:  .LBB96_13: # %cond.load7
11247; RV64ZVE32F-NEXT:    ld a3, 24(a1)
11248; RV64ZVE32F-NEXT:    fld fa3, 0(a3)
11249; RV64ZVE32F-NEXT:    andi a3, a2, 16
11250; RV64ZVE32F-NEXT:    beqz a3, .LBB96_5
11251; RV64ZVE32F-NEXT:  .LBB96_14: # %cond.load10
11252; RV64ZVE32F-NEXT:    ld a3, 32(a1)
11253; RV64ZVE32F-NEXT:    fld fa4, 0(a3)
11254; RV64ZVE32F-NEXT:    andi a3, a2, 32
11255; RV64ZVE32F-NEXT:    beqz a3, .LBB96_6
11256; RV64ZVE32F-NEXT:  .LBB96_15: # %cond.load13
11257; RV64ZVE32F-NEXT:    ld a3, 40(a1)
11258; RV64ZVE32F-NEXT:    fld fa5, 0(a3)
11259; RV64ZVE32F-NEXT:    andi a3, a2, 64
11260; RV64ZVE32F-NEXT:    beqz a3, .LBB96_7
11261; RV64ZVE32F-NEXT:  .LBB96_16: # %cond.load16
11262; RV64ZVE32F-NEXT:    ld a3, 48(a1)
11263; RV64ZVE32F-NEXT:    fld fa6, 0(a3)
11264; RV64ZVE32F-NEXT:    andi a2, a2, -128
11265; RV64ZVE32F-NEXT:    bnez a2, .LBB96_8
11266; RV64ZVE32F-NEXT:    j .LBB96_9
11267  %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11268  ret <8 x double> %v
11269}
11270
11271define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11272; RV32V-LABEL: mgather_baseidx_v8i8_v8f64:
11273; RV32V:       # %bb.0:
11274; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
11275; RV32V-NEXT:    vsext.vf4 v10, v8
11276; RV32V-NEXT:    vsll.vi v8, v10, 3
11277; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
11278; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
11279; RV32V-NEXT:    vmv.v.v v8, v12
11280; RV32V-NEXT:    ret
11281;
11282; RV64V-LABEL: mgather_baseidx_v8i8_v8f64:
11283; RV64V:       # %bb.0:
11284; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
11285; RV64V-NEXT:    vsext.vf8 v16, v8
11286; RV64V-NEXT:    vsll.vi v8, v16, 3
11287; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
11288; RV64V-NEXT:    vmv.v.v v8, v12
11289; RV64V-NEXT:    ret
11290;
11291; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
11292; RV32ZVE32F:       # %bb.0:
11293; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
11294; RV32ZVE32F-NEXT:    vsext.vf4 v10, v8
11295; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
11296; RV32ZVE32F-NEXT:    vmv.x.s a2, v0
11297; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
11298; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
11299; RV32ZVE32F-NEXT:    andi a3, a2, 1
11300; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
11301; RV32ZVE32F-NEXT:    bnez a3, .LBB97_10
11302; RV32ZVE32F-NEXT:  # %bb.1: # %else
11303; RV32ZVE32F-NEXT:    andi a1, a2, 2
11304; RV32ZVE32F-NEXT:    bnez a1, .LBB97_11
11305; RV32ZVE32F-NEXT:  .LBB97_2: # %else2
11306; RV32ZVE32F-NEXT:    andi a1, a2, 4
11307; RV32ZVE32F-NEXT:    bnez a1, .LBB97_12
11308; RV32ZVE32F-NEXT:  .LBB97_3: # %else5
11309; RV32ZVE32F-NEXT:    andi a1, a2, 8
11310; RV32ZVE32F-NEXT:    bnez a1, .LBB97_13
11311; RV32ZVE32F-NEXT:  .LBB97_4: # %else8
11312; RV32ZVE32F-NEXT:    andi a1, a2, 16
11313; RV32ZVE32F-NEXT:    bnez a1, .LBB97_14
11314; RV32ZVE32F-NEXT:  .LBB97_5: # %else11
11315; RV32ZVE32F-NEXT:    andi a1, a2, 32
11316; RV32ZVE32F-NEXT:    bnez a1, .LBB97_15
11317; RV32ZVE32F-NEXT:  .LBB97_6: # %else14
11318; RV32ZVE32F-NEXT:    andi a1, a2, 64
11319; RV32ZVE32F-NEXT:    bnez a1, .LBB97_16
11320; RV32ZVE32F-NEXT:  .LBB97_7: # %else17
11321; RV32ZVE32F-NEXT:    andi a1, a2, -128
11322; RV32ZVE32F-NEXT:    beqz a1, .LBB97_9
11323; RV32ZVE32F-NEXT:  .LBB97_8: # %cond.load19
11324; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11325; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
11326; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
11327; RV32ZVE32F-NEXT:    fld fa7, 0(a1)
11328; RV32ZVE32F-NEXT:  .LBB97_9: # %else20
11329; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
11330; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
11331; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
11332; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
11333; RV32ZVE32F-NEXT:    fsd fa4, 32(a0)
11334; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
11335; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
11336; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
11337; RV32ZVE32F-NEXT:    ret
11338; RV32ZVE32F-NEXT:  .LBB97_10: # %cond.load
11339; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
11340; RV32ZVE32F-NEXT:    fld fa0, 0(a1)
11341; RV32ZVE32F-NEXT:    andi a1, a2, 2
11342; RV32ZVE32F-NEXT:    beqz a1, .LBB97_2
11343; RV32ZVE32F-NEXT:  .LBB97_11: # %cond.load1
11344; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11345; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
11346; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11347; RV32ZVE32F-NEXT:    fld fa1, 0(a1)
11348; RV32ZVE32F-NEXT:    andi a1, a2, 4
11349; RV32ZVE32F-NEXT:    beqz a1, .LBB97_3
11350; RV32ZVE32F-NEXT:  .LBB97_12: # %cond.load4
11351; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11352; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
11353; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11354; RV32ZVE32F-NEXT:    fld fa2, 0(a1)
11355; RV32ZVE32F-NEXT:    andi a1, a2, 8
11356; RV32ZVE32F-NEXT:    beqz a1, .LBB97_4
11357; RV32ZVE32F-NEXT:  .LBB97_13: # %cond.load7
11358; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11359; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
11360; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11361; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
11362; RV32ZVE32F-NEXT:    andi a1, a2, 16
11363; RV32ZVE32F-NEXT:    beqz a1, .LBB97_5
11364; RV32ZVE32F-NEXT:  .LBB97_14: # %cond.load10
11365; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11366; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
11367; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11368; RV32ZVE32F-NEXT:    fld fa4, 0(a1)
11369; RV32ZVE32F-NEXT:    andi a1, a2, 32
11370; RV32ZVE32F-NEXT:    beqz a1, .LBB97_6
11371; RV32ZVE32F-NEXT:  .LBB97_15: # %cond.load13
11372; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11373; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
11374; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11375; RV32ZVE32F-NEXT:    fld fa5, 0(a1)
11376; RV32ZVE32F-NEXT:    andi a1, a2, 64
11377; RV32ZVE32F-NEXT:    beqz a1, .LBB97_7
11378; RV32ZVE32F-NEXT:  .LBB97_16: # %cond.load16
11379; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11380; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
11381; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11382; RV32ZVE32F-NEXT:    fld fa6, 0(a1)
11383; RV32ZVE32F-NEXT:    andi a1, a2, -128
11384; RV32ZVE32F-NEXT:    bnez a1, .LBB97_8
11385; RV32ZVE32F-NEXT:    j .LBB97_9
11386;
11387; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
11388; RV64ZVE32F:       # %bb.0:
11389; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
11390; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
11391; RV64ZVE32F-NEXT:    andi a3, a2, 1
11392; RV64ZVE32F-NEXT:    beqz a3, .LBB97_2
11393; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
11394; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11395; RV64ZVE32F-NEXT:    slli a3, a3, 3
11396; RV64ZVE32F-NEXT:    add a3, a1, a3
11397; RV64ZVE32F-NEXT:    fld fa0, 0(a3)
11398; RV64ZVE32F-NEXT:  .LBB97_2: # %else
11399; RV64ZVE32F-NEXT:    andi a3, a2, 2
11400; RV64ZVE32F-NEXT:    beqz a3, .LBB97_4
11401; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
11402; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
11403; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
11404; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
11405; RV64ZVE32F-NEXT:    slli a3, a3, 3
11406; RV64ZVE32F-NEXT:    add a3, a1, a3
11407; RV64ZVE32F-NEXT:    fld fa1, 0(a3)
11408; RV64ZVE32F-NEXT:  .LBB97_4: # %else2
11409; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
11410; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
11411; RV64ZVE32F-NEXT:    andi a3, a2, 4
11412; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
11413; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
11414; RV64ZVE32F-NEXT:    bnez a3, .LBB97_14
11415; RV64ZVE32F-NEXT:  # %bb.5: # %else5
11416; RV64ZVE32F-NEXT:    andi a3, a2, 8
11417; RV64ZVE32F-NEXT:    bnez a3, .LBB97_15
11418; RV64ZVE32F-NEXT:  .LBB97_6: # %else8
11419; RV64ZVE32F-NEXT:    andi a3, a2, 16
11420; RV64ZVE32F-NEXT:    bnez a3, .LBB97_16
11421; RV64ZVE32F-NEXT:  .LBB97_7: # %else11
11422; RV64ZVE32F-NEXT:    andi a3, a2, 32
11423; RV64ZVE32F-NEXT:    beqz a3, .LBB97_9
11424; RV64ZVE32F-NEXT:  .LBB97_8: # %cond.load13
11425; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
11426; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11427; RV64ZVE32F-NEXT:    slli a3, a3, 3
11428; RV64ZVE32F-NEXT:    add a3, a1, a3
11429; RV64ZVE32F-NEXT:    fld fa5, 0(a3)
11430; RV64ZVE32F-NEXT:  .LBB97_9: # %else14
11431; RV64ZVE32F-NEXT:    andi a3, a2, 64
11432; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
11433; RV64ZVE32F-NEXT:    beqz a3, .LBB97_11
11434; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
11435; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11436; RV64ZVE32F-NEXT:    slli a3, a3, 3
11437; RV64ZVE32F-NEXT:    add a3, a1, a3
11438; RV64ZVE32F-NEXT:    fld fa6, 0(a3)
11439; RV64ZVE32F-NEXT:  .LBB97_11: # %else17
11440; RV64ZVE32F-NEXT:    andi a2, a2, -128
11441; RV64ZVE32F-NEXT:    beqz a2, .LBB97_13
11442; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
11443; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
11444; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
11445; RV64ZVE32F-NEXT:    slli a2, a2, 3
11446; RV64ZVE32F-NEXT:    add a1, a1, a2
11447; RV64ZVE32F-NEXT:    fld fa7, 0(a1)
11448; RV64ZVE32F-NEXT:  .LBB97_13: # %else20
11449; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
11450; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
11451; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
11452; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
11453; RV64ZVE32F-NEXT:    fsd fa4, 32(a0)
11454; RV64ZVE32F-NEXT:    fsd fa5, 40(a0)
11455; RV64ZVE32F-NEXT:    fsd fa6, 48(a0)
11456; RV64ZVE32F-NEXT:    fsd fa7, 56(a0)
11457; RV64ZVE32F-NEXT:    ret
11458; RV64ZVE32F-NEXT:  .LBB97_14: # %cond.load4
11459; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11460; RV64ZVE32F-NEXT:    slli a3, a3, 3
11461; RV64ZVE32F-NEXT:    add a3, a1, a3
11462; RV64ZVE32F-NEXT:    fld fa2, 0(a3)
11463; RV64ZVE32F-NEXT:    andi a3, a2, 8
11464; RV64ZVE32F-NEXT:    beqz a3, .LBB97_6
11465; RV64ZVE32F-NEXT:  .LBB97_15: # %cond.load7
11466; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
11467; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11468; RV64ZVE32F-NEXT:    slli a3, a3, 3
11469; RV64ZVE32F-NEXT:    add a3, a1, a3
11470; RV64ZVE32F-NEXT:    fld fa3, 0(a3)
11471; RV64ZVE32F-NEXT:    andi a3, a2, 16
11472; RV64ZVE32F-NEXT:    beqz a3, .LBB97_7
11473; RV64ZVE32F-NEXT:  .LBB97_16: # %cond.load10
11474; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
11475; RV64ZVE32F-NEXT:    slli a3, a3, 3
11476; RV64ZVE32F-NEXT:    add a3, a1, a3
11477; RV64ZVE32F-NEXT:    fld fa4, 0(a3)
11478; RV64ZVE32F-NEXT:    andi a3, a2, 32
11479; RV64ZVE32F-NEXT:    bnez a3, .LBB97_8
11480; RV64ZVE32F-NEXT:    j .LBB97_9
11481  %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
11482  %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11483  ret <8 x double> %v
11484}
11485
11486define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11487; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8f64:
11488; RV32V:       # %bb.0:
11489; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
11490; RV32V-NEXT:    vsext.vf4 v10, v8
11491; RV32V-NEXT:    vsll.vi v8, v10, 3
11492; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
11493; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
11494; RV32V-NEXT:    vmv.v.v v8, v12
11495; RV32V-NEXT:    ret
11496;
11497; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f64:
11498; RV64V:       # %bb.0:
11499; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
11500; RV64V-NEXT:    vsext.vf8 v16, v8
11501; RV64V-NEXT:    vsll.vi v8, v16, 3
11502; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
11503; RV64V-NEXT:    vmv.v.v v8, v12
11504; RV64V-NEXT:    ret
11505;
11506; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
11507; RV32ZVE32F:       # %bb.0:
11508; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
11509; RV32ZVE32F-NEXT:    vsext.vf4 v10, v8
11510; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
11511; RV32ZVE32F-NEXT:    vmv.x.s a2, v0
11512; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
11513; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
11514; RV32ZVE32F-NEXT:    andi a3, a2, 1
11515; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
11516; RV32ZVE32F-NEXT:    bnez a3, .LBB98_10
11517; RV32ZVE32F-NEXT:  # %bb.1: # %else
11518; RV32ZVE32F-NEXT:    andi a1, a2, 2
11519; RV32ZVE32F-NEXT:    bnez a1, .LBB98_11
11520; RV32ZVE32F-NEXT:  .LBB98_2: # %else2
11521; RV32ZVE32F-NEXT:    andi a1, a2, 4
11522; RV32ZVE32F-NEXT:    bnez a1, .LBB98_12
11523; RV32ZVE32F-NEXT:  .LBB98_3: # %else5
11524; RV32ZVE32F-NEXT:    andi a1, a2, 8
11525; RV32ZVE32F-NEXT:    bnez a1, .LBB98_13
11526; RV32ZVE32F-NEXT:  .LBB98_4: # %else8
11527; RV32ZVE32F-NEXT:    andi a1, a2, 16
11528; RV32ZVE32F-NEXT:    bnez a1, .LBB98_14
11529; RV32ZVE32F-NEXT:  .LBB98_5: # %else11
11530; RV32ZVE32F-NEXT:    andi a1, a2, 32
11531; RV32ZVE32F-NEXT:    bnez a1, .LBB98_15
11532; RV32ZVE32F-NEXT:  .LBB98_6: # %else14
11533; RV32ZVE32F-NEXT:    andi a1, a2, 64
11534; RV32ZVE32F-NEXT:    bnez a1, .LBB98_16
11535; RV32ZVE32F-NEXT:  .LBB98_7: # %else17
11536; RV32ZVE32F-NEXT:    andi a1, a2, -128
11537; RV32ZVE32F-NEXT:    beqz a1, .LBB98_9
11538; RV32ZVE32F-NEXT:  .LBB98_8: # %cond.load19
11539; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11540; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
11541; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
11542; RV32ZVE32F-NEXT:    fld fa7, 0(a1)
11543; RV32ZVE32F-NEXT:  .LBB98_9: # %else20
11544; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
11545; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
11546; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
11547; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
11548; RV32ZVE32F-NEXT:    fsd fa4, 32(a0)
11549; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
11550; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
11551; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
11552; RV32ZVE32F-NEXT:    ret
11553; RV32ZVE32F-NEXT:  .LBB98_10: # %cond.load
11554; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
11555; RV32ZVE32F-NEXT:    fld fa0, 0(a1)
11556; RV32ZVE32F-NEXT:    andi a1, a2, 2
11557; RV32ZVE32F-NEXT:    beqz a1, .LBB98_2
11558; RV32ZVE32F-NEXT:  .LBB98_11: # %cond.load1
11559; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11560; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
11561; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11562; RV32ZVE32F-NEXT:    fld fa1, 0(a1)
11563; RV32ZVE32F-NEXT:    andi a1, a2, 4
11564; RV32ZVE32F-NEXT:    beqz a1, .LBB98_3
11565; RV32ZVE32F-NEXT:  .LBB98_12: # %cond.load4
11566; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11567; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
11568; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11569; RV32ZVE32F-NEXT:    fld fa2, 0(a1)
11570; RV32ZVE32F-NEXT:    andi a1, a2, 8
11571; RV32ZVE32F-NEXT:    beqz a1, .LBB98_4
11572; RV32ZVE32F-NEXT:  .LBB98_13: # %cond.load7
11573; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11574; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
11575; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11576; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
11577; RV32ZVE32F-NEXT:    andi a1, a2, 16
11578; RV32ZVE32F-NEXT:    beqz a1, .LBB98_5
11579; RV32ZVE32F-NEXT:  .LBB98_14: # %cond.load10
11580; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11581; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
11582; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11583; RV32ZVE32F-NEXT:    fld fa4, 0(a1)
11584; RV32ZVE32F-NEXT:    andi a1, a2, 32
11585; RV32ZVE32F-NEXT:    beqz a1, .LBB98_6
11586; RV32ZVE32F-NEXT:  .LBB98_15: # %cond.load13
11587; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11588; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
11589; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11590; RV32ZVE32F-NEXT:    fld fa5, 0(a1)
11591; RV32ZVE32F-NEXT:    andi a1, a2, 64
11592; RV32ZVE32F-NEXT:    beqz a1, .LBB98_7
11593; RV32ZVE32F-NEXT:  .LBB98_16: # %cond.load16
11594; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11595; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
11596; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11597; RV32ZVE32F-NEXT:    fld fa6, 0(a1)
11598; RV32ZVE32F-NEXT:    andi a1, a2, -128
11599; RV32ZVE32F-NEXT:    bnez a1, .LBB98_8
11600; RV32ZVE32F-NEXT:    j .LBB98_9
11601;
11602; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
11603; RV64ZVE32F:       # %bb.0:
11604; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
11605; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
11606; RV64ZVE32F-NEXT:    andi a3, a2, 1
11607; RV64ZVE32F-NEXT:    beqz a3, .LBB98_2
11608; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
11609; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11610; RV64ZVE32F-NEXT:    slli a3, a3, 3
11611; RV64ZVE32F-NEXT:    add a3, a1, a3
11612; RV64ZVE32F-NEXT:    fld fa0, 0(a3)
11613; RV64ZVE32F-NEXT:  .LBB98_2: # %else
11614; RV64ZVE32F-NEXT:    andi a3, a2, 2
11615; RV64ZVE32F-NEXT:    beqz a3, .LBB98_4
11616; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
11617; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
11618; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
11619; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
11620; RV64ZVE32F-NEXT:    slli a3, a3, 3
11621; RV64ZVE32F-NEXT:    add a3, a1, a3
11622; RV64ZVE32F-NEXT:    fld fa1, 0(a3)
11623; RV64ZVE32F-NEXT:  .LBB98_4: # %else2
11624; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
11625; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
11626; RV64ZVE32F-NEXT:    andi a3, a2, 4
11627; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
11628; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
11629; RV64ZVE32F-NEXT:    bnez a3, .LBB98_14
11630; RV64ZVE32F-NEXT:  # %bb.5: # %else5
11631; RV64ZVE32F-NEXT:    andi a3, a2, 8
11632; RV64ZVE32F-NEXT:    bnez a3, .LBB98_15
11633; RV64ZVE32F-NEXT:  .LBB98_6: # %else8
11634; RV64ZVE32F-NEXT:    andi a3, a2, 16
11635; RV64ZVE32F-NEXT:    bnez a3, .LBB98_16
11636; RV64ZVE32F-NEXT:  .LBB98_7: # %else11
11637; RV64ZVE32F-NEXT:    andi a3, a2, 32
11638; RV64ZVE32F-NEXT:    beqz a3, .LBB98_9
11639; RV64ZVE32F-NEXT:  .LBB98_8: # %cond.load13
11640; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
11641; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11642; RV64ZVE32F-NEXT:    slli a3, a3, 3
11643; RV64ZVE32F-NEXT:    add a3, a1, a3
11644; RV64ZVE32F-NEXT:    fld fa5, 0(a3)
11645; RV64ZVE32F-NEXT:  .LBB98_9: # %else14
11646; RV64ZVE32F-NEXT:    andi a3, a2, 64
11647; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
11648; RV64ZVE32F-NEXT:    beqz a3, .LBB98_11
11649; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
11650; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11651; RV64ZVE32F-NEXT:    slli a3, a3, 3
11652; RV64ZVE32F-NEXT:    add a3, a1, a3
11653; RV64ZVE32F-NEXT:    fld fa6, 0(a3)
11654; RV64ZVE32F-NEXT:  .LBB98_11: # %else17
11655; RV64ZVE32F-NEXT:    andi a2, a2, -128
11656; RV64ZVE32F-NEXT:    beqz a2, .LBB98_13
11657; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
11658; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
11659; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
11660; RV64ZVE32F-NEXT:    slli a2, a2, 3
11661; RV64ZVE32F-NEXT:    add a1, a1, a2
11662; RV64ZVE32F-NEXT:    fld fa7, 0(a1)
11663; RV64ZVE32F-NEXT:  .LBB98_13: # %else20
11664; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
11665; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
11666; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
11667; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
11668; RV64ZVE32F-NEXT:    fsd fa4, 32(a0)
11669; RV64ZVE32F-NEXT:    fsd fa5, 40(a0)
11670; RV64ZVE32F-NEXT:    fsd fa6, 48(a0)
11671; RV64ZVE32F-NEXT:    fsd fa7, 56(a0)
11672; RV64ZVE32F-NEXT:    ret
11673; RV64ZVE32F-NEXT:  .LBB98_14: # %cond.load4
11674; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11675; RV64ZVE32F-NEXT:    slli a3, a3, 3
11676; RV64ZVE32F-NEXT:    add a3, a1, a3
11677; RV64ZVE32F-NEXT:    fld fa2, 0(a3)
11678; RV64ZVE32F-NEXT:    andi a3, a2, 8
11679; RV64ZVE32F-NEXT:    beqz a3, .LBB98_6
11680; RV64ZVE32F-NEXT:  .LBB98_15: # %cond.load7
11681; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
11682; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11683; RV64ZVE32F-NEXT:    slli a3, a3, 3
11684; RV64ZVE32F-NEXT:    add a3, a1, a3
11685; RV64ZVE32F-NEXT:    fld fa3, 0(a3)
11686; RV64ZVE32F-NEXT:    andi a3, a2, 16
11687; RV64ZVE32F-NEXT:    beqz a3, .LBB98_7
11688; RV64ZVE32F-NEXT:  .LBB98_16: # %cond.load10
11689; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
11690; RV64ZVE32F-NEXT:    slli a3, a3, 3
11691; RV64ZVE32F-NEXT:    add a3, a1, a3
11692; RV64ZVE32F-NEXT:    fld fa4, 0(a3)
11693; RV64ZVE32F-NEXT:    andi a3, a2, 32
11694; RV64ZVE32F-NEXT:    bnez a3, .LBB98_8
11695; RV64ZVE32F-NEXT:    j .LBB98_9
11696  %eidxs = sext <8 x i8> %idxs to <8 x i64>
11697  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11698  %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11699  ret <8 x double> %v
11700}
11701
11702define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11703; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8f64:
11704; RV32V:       # %bb.0:
11705; RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
11706; RV32V-NEXT:    vzext.vf2 v9, v8
11707; RV32V-NEXT:    vsll.vi v8, v9, 3
11708; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
11709; RV32V-NEXT:    vluxei16.v v12, (a0), v8, v0.t
11710; RV32V-NEXT:    vmv.v.v v8, v12
11711; RV32V-NEXT:    ret
11712;
11713; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f64:
11714; RV64V:       # %bb.0:
11715; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
11716; RV64V-NEXT:    vzext.vf2 v9, v8
11717; RV64V-NEXT:    vsll.vi v8, v9, 3
11718; RV64V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
11719; RV64V-NEXT:    vluxei16.v v12, (a0), v8, v0.t
11720; RV64V-NEXT:    vmv.v.v v8, v12
11721; RV64V-NEXT:    ret
11722;
11723; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
11724; RV32ZVE32F:       # %bb.0:
11725; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
11726; RV32ZVE32F-NEXT:    vzext.vf4 v10, v8
11727; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
11728; RV32ZVE32F-NEXT:    vmv.x.s a2, v0
11729; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
11730; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
11731; RV32ZVE32F-NEXT:    andi a3, a2, 1
11732; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
11733; RV32ZVE32F-NEXT:    bnez a3, .LBB99_10
11734; RV32ZVE32F-NEXT:  # %bb.1: # %else
11735; RV32ZVE32F-NEXT:    andi a1, a2, 2
11736; RV32ZVE32F-NEXT:    bnez a1, .LBB99_11
11737; RV32ZVE32F-NEXT:  .LBB99_2: # %else2
11738; RV32ZVE32F-NEXT:    andi a1, a2, 4
11739; RV32ZVE32F-NEXT:    bnez a1, .LBB99_12
11740; RV32ZVE32F-NEXT:  .LBB99_3: # %else5
11741; RV32ZVE32F-NEXT:    andi a1, a2, 8
11742; RV32ZVE32F-NEXT:    bnez a1, .LBB99_13
11743; RV32ZVE32F-NEXT:  .LBB99_4: # %else8
11744; RV32ZVE32F-NEXT:    andi a1, a2, 16
11745; RV32ZVE32F-NEXT:    bnez a1, .LBB99_14
11746; RV32ZVE32F-NEXT:  .LBB99_5: # %else11
11747; RV32ZVE32F-NEXT:    andi a1, a2, 32
11748; RV32ZVE32F-NEXT:    bnez a1, .LBB99_15
11749; RV32ZVE32F-NEXT:  .LBB99_6: # %else14
11750; RV32ZVE32F-NEXT:    andi a1, a2, 64
11751; RV32ZVE32F-NEXT:    bnez a1, .LBB99_16
11752; RV32ZVE32F-NEXT:  .LBB99_7: # %else17
11753; RV32ZVE32F-NEXT:    andi a1, a2, -128
11754; RV32ZVE32F-NEXT:    beqz a1, .LBB99_9
11755; RV32ZVE32F-NEXT:  .LBB99_8: # %cond.load19
11756; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11757; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
11758; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
11759; RV32ZVE32F-NEXT:    fld fa7, 0(a1)
11760; RV32ZVE32F-NEXT:  .LBB99_9: # %else20
11761; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
11762; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
11763; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
11764; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
11765; RV32ZVE32F-NEXT:    fsd fa4, 32(a0)
11766; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
11767; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
11768; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
11769; RV32ZVE32F-NEXT:    ret
11770; RV32ZVE32F-NEXT:  .LBB99_10: # %cond.load
11771; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
11772; RV32ZVE32F-NEXT:    fld fa0, 0(a1)
11773; RV32ZVE32F-NEXT:    andi a1, a2, 2
11774; RV32ZVE32F-NEXT:    beqz a1, .LBB99_2
11775; RV32ZVE32F-NEXT:  .LBB99_11: # %cond.load1
11776; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11777; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
11778; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11779; RV32ZVE32F-NEXT:    fld fa1, 0(a1)
11780; RV32ZVE32F-NEXT:    andi a1, a2, 4
11781; RV32ZVE32F-NEXT:    beqz a1, .LBB99_3
11782; RV32ZVE32F-NEXT:  .LBB99_12: # %cond.load4
11783; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11784; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
11785; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11786; RV32ZVE32F-NEXT:    fld fa2, 0(a1)
11787; RV32ZVE32F-NEXT:    andi a1, a2, 8
11788; RV32ZVE32F-NEXT:    beqz a1, .LBB99_4
11789; RV32ZVE32F-NEXT:  .LBB99_13: # %cond.load7
11790; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
11791; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
11792; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11793; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
11794; RV32ZVE32F-NEXT:    andi a1, a2, 16
11795; RV32ZVE32F-NEXT:    beqz a1, .LBB99_5
11796; RV32ZVE32F-NEXT:  .LBB99_14: # %cond.load10
11797; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11798; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
11799; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11800; RV32ZVE32F-NEXT:    fld fa4, 0(a1)
11801; RV32ZVE32F-NEXT:    andi a1, a2, 32
11802; RV32ZVE32F-NEXT:    beqz a1, .LBB99_6
11803; RV32ZVE32F-NEXT:  .LBB99_15: # %cond.load13
11804; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11805; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
11806; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11807; RV32ZVE32F-NEXT:    fld fa5, 0(a1)
11808; RV32ZVE32F-NEXT:    andi a1, a2, 64
11809; RV32ZVE32F-NEXT:    beqz a1, .LBB99_7
11810; RV32ZVE32F-NEXT:  .LBB99_16: # %cond.load16
11811; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11812; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
11813; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
11814; RV32ZVE32F-NEXT:    fld fa6, 0(a1)
11815; RV32ZVE32F-NEXT:    andi a1, a2, -128
11816; RV32ZVE32F-NEXT:    bnez a1, .LBB99_8
11817; RV32ZVE32F-NEXT:    j .LBB99_9
11818;
11819; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
11820; RV64ZVE32F:       # %bb.0:
11821; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
11822; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
11823; RV64ZVE32F-NEXT:    andi a3, a2, 1
11824; RV64ZVE32F-NEXT:    beqz a3, .LBB99_2
11825; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
11826; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11827; RV64ZVE32F-NEXT:    andi a3, a3, 255
11828; RV64ZVE32F-NEXT:    slli a3, a3, 3
11829; RV64ZVE32F-NEXT:    add a3, a1, a3
11830; RV64ZVE32F-NEXT:    fld fa0, 0(a3)
11831; RV64ZVE32F-NEXT:  .LBB99_2: # %else
11832; RV64ZVE32F-NEXT:    andi a3, a2, 2
11833; RV64ZVE32F-NEXT:    beqz a3, .LBB99_4
11834; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
11835; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
11836; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
11837; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
11838; RV64ZVE32F-NEXT:    andi a3, a3, 255
11839; RV64ZVE32F-NEXT:    slli a3, a3, 3
11840; RV64ZVE32F-NEXT:    add a3, a1, a3
11841; RV64ZVE32F-NEXT:    fld fa1, 0(a3)
11842; RV64ZVE32F-NEXT:  .LBB99_4: # %else2
11843; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
11844; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
11845; RV64ZVE32F-NEXT:    andi a3, a2, 4
11846; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
11847; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
11848; RV64ZVE32F-NEXT:    bnez a3, .LBB99_14
11849; RV64ZVE32F-NEXT:  # %bb.5: # %else5
11850; RV64ZVE32F-NEXT:    andi a3, a2, 8
11851; RV64ZVE32F-NEXT:    bnez a3, .LBB99_15
11852; RV64ZVE32F-NEXT:  .LBB99_6: # %else8
11853; RV64ZVE32F-NEXT:    andi a3, a2, 16
11854; RV64ZVE32F-NEXT:    bnez a3, .LBB99_16
11855; RV64ZVE32F-NEXT:  .LBB99_7: # %else11
11856; RV64ZVE32F-NEXT:    andi a3, a2, 32
11857; RV64ZVE32F-NEXT:    beqz a3, .LBB99_9
11858; RV64ZVE32F-NEXT:  .LBB99_8: # %cond.load13
11859; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
11860; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11861; RV64ZVE32F-NEXT:    andi a3, a3, 255
11862; RV64ZVE32F-NEXT:    slli a3, a3, 3
11863; RV64ZVE32F-NEXT:    add a3, a1, a3
11864; RV64ZVE32F-NEXT:    fld fa5, 0(a3)
11865; RV64ZVE32F-NEXT:  .LBB99_9: # %else14
11866; RV64ZVE32F-NEXT:    andi a3, a2, 64
11867; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
11868; RV64ZVE32F-NEXT:    beqz a3, .LBB99_11
11869; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
11870; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11871; RV64ZVE32F-NEXT:    andi a3, a3, 255
11872; RV64ZVE32F-NEXT:    slli a3, a3, 3
11873; RV64ZVE32F-NEXT:    add a3, a1, a3
11874; RV64ZVE32F-NEXT:    fld fa6, 0(a3)
11875; RV64ZVE32F-NEXT:  .LBB99_11: # %else17
11876; RV64ZVE32F-NEXT:    andi a2, a2, -128
11877; RV64ZVE32F-NEXT:    beqz a2, .LBB99_13
11878; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
11879; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
11880; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
11881; RV64ZVE32F-NEXT:    andi a2, a2, 255
11882; RV64ZVE32F-NEXT:    slli a2, a2, 3
11883; RV64ZVE32F-NEXT:    add a1, a1, a2
11884; RV64ZVE32F-NEXT:    fld fa7, 0(a1)
11885; RV64ZVE32F-NEXT:  .LBB99_13: # %else20
11886; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
11887; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
11888; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
11889; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
11890; RV64ZVE32F-NEXT:    fsd fa4, 32(a0)
11891; RV64ZVE32F-NEXT:    fsd fa5, 40(a0)
11892; RV64ZVE32F-NEXT:    fsd fa6, 48(a0)
11893; RV64ZVE32F-NEXT:    fsd fa7, 56(a0)
11894; RV64ZVE32F-NEXT:    ret
11895; RV64ZVE32F-NEXT:  .LBB99_14: # %cond.load4
11896; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11897; RV64ZVE32F-NEXT:    andi a3, a3, 255
11898; RV64ZVE32F-NEXT:    slli a3, a3, 3
11899; RV64ZVE32F-NEXT:    add a3, a1, a3
11900; RV64ZVE32F-NEXT:    fld fa2, 0(a3)
11901; RV64ZVE32F-NEXT:    andi a3, a2, 8
11902; RV64ZVE32F-NEXT:    beqz a3, .LBB99_6
11903; RV64ZVE32F-NEXT:  .LBB99_15: # %cond.load7
11904; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
11905; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
11906; RV64ZVE32F-NEXT:    andi a3, a3, 255
11907; RV64ZVE32F-NEXT:    slli a3, a3, 3
11908; RV64ZVE32F-NEXT:    add a3, a1, a3
11909; RV64ZVE32F-NEXT:    fld fa3, 0(a3)
11910; RV64ZVE32F-NEXT:    andi a3, a2, 16
11911; RV64ZVE32F-NEXT:    beqz a3, .LBB99_7
11912; RV64ZVE32F-NEXT:  .LBB99_16: # %cond.load10
11913; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
11914; RV64ZVE32F-NEXT:    andi a3, a3, 255
11915; RV64ZVE32F-NEXT:    slli a3, a3, 3
11916; RV64ZVE32F-NEXT:    add a3, a1, a3
11917; RV64ZVE32F-NEXT:    fld fa4, 0(a3)
11918; RV64ZVE32F-NEXT:    andi a3, a2, 32
11919; RV64ZVE32F-NEXT:    bnez a3, .LBB99_8
11920; RV64ZVE32F-NEXT:    j .LBB99_9
11921  %eidxs = zext <8 x i8> %idxs to <8 x i64>
11922  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11923  %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11924  ret <8 x double> %v
11925}
11926
11927define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11928; RV32V-LABEL: mgather_baseidx_v8i16_v8f64:
11929; RV32V:       # %bb.0:
11930; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
11931; RV32V-NEXT:    vsext.vf2 v10, v8
11932; RV32V-NEXT:    vsll.vi v8, v10, 3
11933; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
11934; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
11935; RV32V-NEXT:    vmv.v.v v8, v12
11936; RV32V-NEXT:    ret
11937;
11938; RV64V-LABEL: mgather_baseidx_v8i16_v8f64:
11939; RV64V:       # %bb.0:
11940; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
11941; RV64V-NEXT:    vsext.vf4 v16, v8
11942; RV64V-NEXT:    vsll.vi v8, v16, 3
11943; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
11944; RV64V-NEXT:    vmv.v.v v8, v12
11945; RV64V-NEXT:    ret
11946;
11947; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
11948; RV32ZVE32F:       # %bb.0:
11949; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
11950; RV32ZVE32F-NEXT:    vsext.vf2 v10, v8
11951; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
11952; RV32ZVE32F-NEXT:    vmv.x.s a2, v0
11953; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
11954; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
11955; RV32ZVE32F-NEXT:    andi a3, a2, 1
11956; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
11957; RV32ZVE32F-NEXT:    bnez a3, .LBB100_10
11958; RV32ZVE32F-NEXT:  # %bb.1: # %else
11959; RV32ZVE32F-NEXT:    andi a1, a2, 2
11960; RV32ZVE32F-NEXT:    bnez a1, .LBB100_11
11961; RV32ZVE32F-NEXT:  .LBB100_2: # %else2
11962; RV32ZVE32F-NEXT:    andi a1, a2, 4
11963; RV32ZVE32F-NEXT:    bnez a1, .LBB100_12
11964; RV32ZVE32F-NEXT:  .LBB100_3: # %else5
11965; RV32ZVE32F-NEXT:    andi a1, a2, 8
11966; RV32ZVE32F-NEXT:    bnez a1, .LBB100_13
11967; RV32ZVE32F-NEXT:  .LBB100_4: # %else8
11968; RV32ZVE32F-NEXT:    andi a1, a2, 16
11969; RV32ZVE32F-NEXT:    bnez a1, .LBB100_14
11970; RV32ZVE32F-NEXT:  .LBB100_5: # %else11
11971; RV32ZVE32F-NEXT:    andi a1, a2, 32
11972; RV32ZVE32F-NEXT:    bnez a1, .LBB100_15
11973; RV32ZVE32F-NEXT:  .LBB100_6: # %else14
11974; RV32ZVE32F-NEXT:    andi a1, a2, 64
11975; RV32ZVE32F-NEXT:    bnez a1, .LBB100_16
11976; RV32ZVE32F-NEXT:  .LBB100_7: # %else17
11977; RV32ZVE32F-NEXT:    andi a1, a2, -128
11978; RV32ZVE32F-NEXT:    beqz a1, .LBB100_9
11979; RV32ZVE32F-NEXT:  .LBB100_8: # %cond.load19
11980; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
11981; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
11982; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
11983; RV32ZVE32F-NEXT:    fld fa7, 0(a1)
11984; RV32ZVE32F-NEXT:  .LBB100_9: # %else20
11985; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
11986; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
11987; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
11988; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
11989; RV32ZVE32F-NEXT:    fsd fa4, 32(a0)
11990; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
11991; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
11992; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
11993; RV32ZVE32F-NEXT:    ret
11994; RV32ZVE32F-NEXT:  .LBB100_10: # %cond.load
11995; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
11996; RV32ZVE32F-NEXT:    fld fa0, 0(a1)
11997; RV32ZVE32F-NEXT:    andi a1, a2, 2
11998; RV32ZVE32F-NEXT:    beqz a1, .LBB100_2
11999; RV32ZVE32F-NEXT:  .LBB100_11: # %cond.load1
12000; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12001; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
12002; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12003; RV32ZVE32F-NEXT:    fld fa1, 0(a1)
12004; RV32ZVE32F-NEXT:    andi a1, a2, 4
12005; RV32ZVE32F-NEXT:    beqz a1, .LBB100_3
12006; RV32ZVE32F-NEXT:  .LBB100_12: # %cond.load4
12007; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12008; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
12009; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12010; RV32ZVE32F-NEXT:    fld fa2, 0(a1)
12011; RV32ZVE32F-NEXT:    andi a1, a2, 8
12012; RV32ZVE32F-NEXT:    beqz a1, .LBB100_4
12013; RV32ZVE32F-NEXT:  .LBB100_13: # %cond.load7
12014; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12015; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
12016; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12017; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
12018; RV32ZVE32F-NEXT:    andi a1, a2, 16
12019; RV32ZVE32F-NEXT:    beqz a1, .LBB100_5
12020; RV32ZVE32F-NEXT:  .LBB100_14: # %cond.load10
12021; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12022; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
12023; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12024; RV32ZVE32F-NEXT:    fld fa4, 0(a1)
12025; RV32ZVE32F-NEXT:    andi a1, a2, 32
12026; RV32ZVE32F-NEXT:    beqz a1, .LBB100_6
12027; RV32ZVE32F-NEXT:  .LBB100_15: # %cond.load13
12028; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12029; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
12030; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12031; RV32ZVE32F-NEXT:    fld fa5, 0(a1)
12032; RV32ZVE32F-NEXT:    andi a1, a2, 64
12033; RV32ZVE32F-NEXT:    beqz a1, .LBB100_7
12034; RV32ZVE32F-NEXT:  .LBB100_16: # %cond.load16
12035; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12036; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
12037; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12038; RV32ZVE32F-NEXT:    fld fa6, 0(a1)
12039; RV32ZVE32F-NEXT:    andi a1, a2, -128
12040; RV32ZVE32F-NEXT:    bnez a1, .LBB100_8
12041; RV32ZVE32F-NEXT:    j .LBB100_9
12042;
12043; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
12044; RV64ZVE32F:       # %bb.0:
12045; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
12046; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
12047; RV64ZVE32F-NEXT:    andi a3, a2, 1
12048; RV64ZVE32F-NEXT:    beqz a3, .LBB100_2
12049; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
12050; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
12051; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12052; RV64ZVE32F-NEXT:    slli a3, a3, 3
12053; RV64ZVE32F-NEXT:    add a3, a1, a3
12054; RV64ZVE32F-NEXT:    fld fa0, 0(a3)
12055; RV64ZVE32F-NEXT:  .LBB100_2: # %else
12056; RV64ZVE32F-NEXT:    andi a3, a2, 2
12057; RV64ZVE32F-NEXT:    beqz a3, .LBB100_4
12058; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
12059; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
12060; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
12061; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
12062; RV64ZVE32F-NEXT:    slli a3, a3, 3
12063; RV64ZVE32F-NEXT:    add a3, a1, a3
12064; RV64ZVE32F-NEXT:    fld fa1, 0(a3)
12065; RV64ZVE32F-NEXT:  .LBB100_4: # %else2
12066; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
12067; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
12068; RV64ZVE32F-NEXT:    andi a3, a2, 4
12069; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
12070; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
12071; RV64ZVE32F-NEXT:    bnez a3, .LBB100_14
12072; RV64ZVE32F-NEXT:  # %bb.5: # %else5
12073; RV64ZVE32F-NEXT:    andi a3, a2, 8
12074; RV64ZVE32F-NEXT:    bnez a3, .LBB100_15
12075; RV64ZVE32F-NEXT:  .LBB100_6: # %else8
12076; RV64ZVE32F-NEXT:    andi a3, a2, 16
12077; RV64ZVE32F-NEXT:    bnez a3, .LBB100_16
12078; RV64ZVE32F-NEXT:  .LBB100_7: # %else11
12079; RV64ZVE32F-NEXT:    andi a3, a2, 32
12080; RV64ZVE32F-NEXT:    beqz a3, .LBB100_9
12081; RV64ZVE32F-NEXT:  .LBB100_8: # %cond.load13
12082; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
12083; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12084; RV64ZVE32F-NEXT:    slli a3, a3, 3
12085; RV64ZVE32F-NEXT:    add a3, a1, a3
12086; RV64ZVE32F-NEXT:    fld fa5, 0(a3)
12087; RV64ZVE32F-NEXT:  .LBB100_9: # %else14
12088; RV64ZVE32F-NEXT:    andi a3, a2, 64
12089; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
12090; RV64ZVE32F-NEXT:    beqz a3, .LBB100_11
12091; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
12092; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12093; RV64ZVE32F-NEXT:    slli a3, a3, 3
12094; RV64ZVE32F-NEXT:    add a3, a1, a3
12095; RV64ZVE32F-NEXT:    fld fa6, 0(a3)
12096; RV64ZVE32F-NEXT:  .LBB100_11: # %else17
12097; RV64ZVE32F-NEXT:    andi a2, a2, -128
12098; RV64ZVE32F-NEXT:    beqz a2, .LBB100_13
12099; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
12100; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
12101; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
12102; RV64ZVE32F-NEXT:    slli a2, a2, 3
12103; RV64ZVE32F-NEXT:    add a1, a1, a2
12104; RV64ZVE32F-NEXT:    fld fa7, 0(a1)
12105; RV64ZVE32F-NEXT:  .LBB100_13: # %else20
12106; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
12107; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
12108; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
12109; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
12110; RV64ZVE32F-NEXT:    fsd fa4, 32(a0)
12111; RV64ZVE32F-NEXT:    fsd fa5, 40(a0)
12112; RV64ZVE32F-NEXT:    fsd fa6, 48(a0)
12113; RV64ZVE32F-NEXT:    fsd fa7, 56(a0)
12114; RV64ZVE32F-NEXT:    ret
12115; RV64ZVE32F-NEXT:  .LBB100_14: # %cond.load4
12116; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12117; RV64ZVE32F-NEXT:    slli a3, a3, 3
12118; RV64ZVE32F-NEXT:    add a3, a1, a3
12119; RV64ZVE32F-NEXT:    fld fa2, 0(a3)
12120; RV64ZVE32F-NEXT:    andi a3, a2, 8
12121; RV64ZVE32F-NEXT:    beqz a3, .LBB100_6
12122; RV64ZVE32F-NEXT:  .LBB100_15: # %cond.load7
12123; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
12124; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12125; RV64ZVE32F-NEXT:    slli a3, a3, 3
12126; RV64ZVE32F-NEXT:    add a3, a1, a3
12127; RV64ZVE32F-NEXT:    fld fa3, 0(a3)
12128; RV64ZVE32F-NEXT:    andi a3, a2, 16
12129; RV64ZVE32F-NEXT:    beqz a3, .LBB100_7
12130; RV64ZVE32F-NEXT:  .LBB100_16: # %cond.load10
12131; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
12132; RV64ZVE32F-NEXT:    slli a3, a3, 3
12133; RV64ZVE32F-NEXT:    add a3, a1, a3
12134; RV64ZVE32F-NEXT:    fld fa4, 0(a3)
12135; RV64ZVE32F-NEXT:    andi a3, a2, 32
12136; RV64ZVE32F-NEXT:    bnez a3, .LBB100_8
12137; RV64ZVE32F-NEXT:    j .LBB100_9
12138  %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
12139  %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
12140  ret <8 x double> %v
12141}
12142
12143define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
12144; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8f64:
12145; RV32V:       # %bb.0:
12146; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
12147; RV32V-NEXT:    vsext.vf2 v10, v8
12148; RV32V-NEXT:    vsll.vi v8, v10, 3
12149; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
12150; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
12151; RV32V-NEXT:    vmv.v.v v8, v12
12152; RV32V-NEXT:    ret
12153;
12154; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f64:
12155; RV64V:       # %bb.0:
12156; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
12157; RV64V-NEXT:    vsext.vf4 v16, v8
12158; RV64V-NEXT:    vsll.vi v8, v16, 3
12159; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
12160; RV64V-NEXT:    vmv.v.v v8, v12
12161; RV64V-NEXT:    ret
12162;
12163; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
12164; RV32ZVE32F:       # %bb.0:
12165; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
12166; RV32ZVE32F-NEXT:    vsext.vf2 v10, v8
12167; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
12168; RV32ZVE32F-NEXT:    vmv.x.s a2, v0
12169; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
12170; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
12171; RV32ZVE32F-NEXT:    andi a3, a2, 1
12172; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
12173; RV32ZVE32F-NEXT:    bnez a3, .LBB101_10
12174; RV32ZVE32F-NEXT:  # %bb.1: # %else
12175; RV32ZVE32F-NEXT:    andi a1, a2, 2
12176; RV32ZVE32F-NEXT:    bnez a1, .LBB101_11
12177; RV32ZVE32F-NEXT:  .LBB101_2: # %else2
12178; RV32ZVE32F-NEXT:    andi a1, a2, 4
12179; RV32ZVE32F-NEXT:    bnez a1, .LBB101_12
12180; RV32ZVE32F-NEXT:  .LBB101_3: # %else5
12181; RV32ZVE32F-NEXT:    andi a1, a2, 8
12182; RV32ZVE32F-NEXT:    bnez a1, .LBB101_13
12183; RV32ZVE32F-NEXT:  .LBB101_4: # %else8
12184; RV32ZVE32F-NEXT:    andi a1, a2, 16
12185; RV32ZVE32F-NEXT:    bnez a1, .LBB101_14
12186; RV32ZVE32F-NEXT:  .LBB101_5: # %else11
12187; RV32ZVE32F-NEXT:    andi a1, a2, 32
12188; RV32ZVE32F-NEXT:    bnez a1, .LBB101_15
12189; RV32ZVE32F-NEXT:  .LBB101_6: # %else14
12190; RV32ZVE32F-NEXT:    andi a1, a2, 64
12191; RV32ZVE32F-NEXT:    bnez a1, .LBB101_16
12192; RV32ZVE32F-NEXT:  .LBB101_7: # %else17
12193; RV32ZVE32F-NEXT:    andi a1, a2, -128
12194; RV32ZVE32F-NEXT:    beqz a1, .LBB101_9
12195; RV32ZVE32F-NEXT:  .LBB101_8: # %cond.load19
12196; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12197; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
12198; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
12199; RV32ZVE32F-NEXT:    fld fa7, 0(a1)
12200; RV32ZVE32F-NEXT:  .LBB101_9: # %else20
12201; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
12202; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
12203; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
12204; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
12205; RV32ZVE32F-NEXT:    fsd fa4, 32(a0)
12206; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
12207; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
12208; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
12209; RV32ZVE32F-NEXT:    ret
12210; RV32ZVE32F-NEXT:  .LBB101_10: # %cond.load
12211; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
12212; RV32ZVE32F-NEXT:    fld fa0, 0(a1)
12213; RV32ZVE32F-NEXT:    andi a1, a2, 2
12214; RV32ZVE32F-NEXT:    beqz a1, .LBB101_2
12215; RV32ZVE32F-NEXT:  .LBB101_11: # %cond.load1
12216; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12217; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
12218; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12219; RV32ZVE32F-NEXT:    fld fa1, 0(a1)
12220; RV32ZVE32F-NEXT:    andi a1, a2, 4
12221; RV32ZVE32F-NEXT:    beqz a1, .LBB101_3
12222; RV32ZVE32F-NEXT:  .LBB101_12: # %cond.load4
12223; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12224; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
12225; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12226; RV32ZVE32F-NEXT:    fld fa2, 0(a1)
12227; RV32ZVE32F-NEXT:    andi a1, a2, 8
12228; RV32ZVE32F-NEXT:    beqz a1, .LBB101_4
12229; RV32ZVE32F-NEXT:  .LBB101_13: # %cond.load7
12230; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12231; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
12232; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12233; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
12234; RV32ZVE32F-NEXT:    andi a1, a2, 16
12235; RV32ZVE32F-NEXT:    beqz a1, .LBB101_5
12236; RV32ZVE32F-NEXT:  .LBB101_14: # %cond.load10
12237; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12238; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
12239; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12240; RV32ZVE32F-NEXT:    fld fa4, 0(a1)
12241; RV32ZVE32F-NEXT:    andi a1, a2, 32
12242; RV32ZVE32F-NEXT:    beqz a1, .LBB101_6
12243; RV32ZVE32F-NEXT:  .LBB101_15: # %cond.load13
12244; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12245; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
12246; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12247; RV32ZVE32F-NEXT:    fld fa5, 0(a1)
12248; RV32ZVE32F-NEXT:    andi a1, a2, 64
12249; RV32ZVE32F-NEXT:    beqz a1, .LBB101_7
12250; RV32ZVE32F-NEXT:  .LBB101_16: # %cond.load16
12251; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12252; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
12253; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12254; RV32ZVE32F-NEXT:    fld fa6, 0(a1)
12255; RV32ZVE32F-NEXT:    andi a1, a2, -128
12256; RV32ZVE32F-NEXT:    bnez a1, .LBB101_8
12257; RV32ZVE32F-NEXT:    j .LBB101_9
12258;
12259; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
12260; RV64ZVE32F:       # %bb.0:
12261; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
12262; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
12263; RV64ZVE32F-NEXT:    andi a3, a2, 1
12264; RV64ZVE32F-NEXT:    beqz a3, .LBB101_2
12265; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
12266; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
12267; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12268; RV64ZVE32F-NEXT:    slli a3, a3, 3
12269; RV64ZVE32F-NEXT:    add a3, a1, a3
12270; RV64ZVE32F-NEXT:    fld fa0, 0(a3)
12271; RV64ZVE32F-NEXT:  .LBB101_2: # %else
12272; RV64ZVE32F-NEXT:    andi a3, a2, 2
12273; RV64ZVE32F-NEXT:    beqz a3, .LBB101_4
12274; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
12275; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
12276; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
12277; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
12278; RV64ZVE32F-NEXT:    slli a3, a3, 3
12279; RV64ZVE32F-NEXT:    add a3, a1, a3
12280; RV64ZVE32F-NEXT:    fld fa1, 0(a3)
12281; RV64ZVE32F-NEXT:  .LBB101_4: # %else2
12282; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
12283; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
12284; RV64ZVE32F-NEXT:    andi a3, a2, 4
12285; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
12286; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
12287; RV64ZVE32F-NEXT:    bnez a3, .LBB101_14
12288; RV64ZVE32F-NEXT:  # %bb.5: # %else5
12289; RV64ZVE32F-NEXT:    andi a3, a2, 8
12290; RV64ZVE32F-NEXT:    bnez a3, .LBB101_15
12291; RV64ZVE32F-NEXT:  .LBB101_6: # %else8
12292; RV64ZVE32F-NEXT:    andi a3, a2, 16
12293; RV64ZVE32F-NEXT:    bnez a3, .LBB101_16
12294; RV64ZVE32F-NEXT:  .LBB101_7: # %else11
12295; RV64ZVE32F-NEXT:    andi a3, a2, 32
12296; RV64ZVE32F-NEXT:    beqz a3, .LBB101_9
12297; RV64ZVE32F-NEXT:  .LBB101_8: # %cond.load13
12298; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
12299; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12300; RV64ZVE32F-NEXT:    slli a3, a3, 3
12301; RV64ZVE32F-NEXT:    add a3, a1, a3
12302; RV64ZVE32F-NEXT:    fld fa5, 0(a3)
12303; RV64ZVE32F-NEXT:  .LBB101_9: # %else14
12304; RV64ZVE32F-NEXT:    andi a3, a2, 64
12305; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
12306; RV64ZVE32F-NEXT:    beqz a3, .LBB101_11
12307; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
12308; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12309; RV64ZVE32F-NEXT:    slli a3, a3, 3
12310; RV64ZVE32F-NEXT:    add a3, a1, a3
12311; RV64ZVE32F-NEXT:    fld fa6, 0(a3)
12312; RV64ZVE32F-NEXT:  .LBB101_11: # %else17
12313; RV64ZVE32F-NEXT:    andi a2, a2, -128
12314; RV64ZVE32F-NEXT:    beqz a2, .LBB101_13
12315; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
12316; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
12317; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
12318; RV64ZVE32F-NEXT:    slli a2, a2, 3
12319; RV64ZVE32F-NEXT:    add a1, a1, a2
12320; RV64ZVE32F-NEXT:    fld fa7, 0(a1)
12321; RV64ZVE32F-NEXT:  .LBB101_13: # %else20
12322; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
12323; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
12324; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
12325; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
12326; RV64ZVE32F-NEXT:    fsd fa4, 32(a0)
12327; RV64ZVE32F-NEXT:    fsd fa5, 40(a0)
12328; RV64ZVE32F-NEXT:    fsd fa6, 48(a0)
12329; RV64ZVE32F-NEXT:    fsd fa7, 56(a0)
12330; RV64ZVE32F-NEXT:    ret
12331; RV64ZVE32F-NEXT:  .LBB101_14: # %cond.load4
12332; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12333; RV64ZVE32F-NEXT:    slli a3, a3, 3
12334; RV64ZVE32F-NEXT:    add a3, a1, a3
12335; RV64ZVE32F-NEXT:    fld fa2, 0(a3)
12336; RV64ZVE32F-NEXT:    andi a3, a2, 8
12337; RV64ZVE32F-NEXT:    beqz a3, .LBB101_6
12338; RV64ZVE32F-NEXT:  .LBB101_15: # %cond.load7
12339; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
12340; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12341; RV64ZVE32F-NEXT:    slli a3, a3, 3
12342; RV64ZVE32F-NEXT:    add a3, a1, a3
12343; RV64ZVE32F-NEXT:    fld fa3, 0(a3)
12344; RV64ZVE32F-NEXT:    andi a3, a2, 16
12345; RV64ZVE32F-NEXT:    beqz a3, .LBB101_7
12346; RV64ZVE32F-NEXT:  .LBB101_16: # %cond.load10
12347; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
12348; RV64ZVE32F-NEXT:    slli a3, a3, 3
12349; RV64ZVE32F-NEXT:    add a3, a1, a3
12350; RV64ZVE32F-NEXT:    fld fa4, 0(a3)
12351; RV64ZVE32F-NEXT:    andi a3, a2, 32
12352; RV64ZVE32F-NEXT:    bnez a3, .LBB101_8
12353; RV64ZVE32F-NEXT:    j .LBB101_9
12354  %eidxs = sext <8 x i16> %idxs to <8 x i64>
12355  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
12356  %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
12357  ret <8 x double> %v
12358}
12359
12360define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
12361; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8f64:
12362; RV32V:       # %bb.0:
12363; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
12364; RV32V-NEXT:    vzext.vf2 v10, v8
12365; RV32V-NEXT:    vsll.vi v8, v10, 3
12366; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
12367; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
12368; RV32V-NEXT:    vmv.v.v v8, v12
12369; RV32V-NEXT:    ret
12370;
12371; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f64:
12372; RV64V:       # %bb.0:
12373; RV64V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
12374; RV64V-NEXT:    vzext.vf2 v10, v8
12375; RV64V-NEXT:    vsll.vi v8, v10, 3
12376; RV64V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
12377; RV64V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
12378; RV64V-NEXT:    vmv.v.v v8, v12
12379; RV64V-NEXT:    ret
12380;
12381; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
12382; RV32ZVE32F:       # %bb.0:
12383; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
12384; RV32ZVE32F-NEXT:    vzext.vf2 v10, v8
12385; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
12386; RV32ZVE32F-NEXT:    vmv.x.s a2, v0
12387; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
12388; RV32ZVE32F-NEXT:    vsll.vi v8, v10, 3
12389; RV32ZVE32F-NEXT:    andi a3, a2, 1
12390; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
12391; RV32ZVE32F-NEXT:    bnez a3, .LBB102_10
12392; RV32ZVE32F-NEXT:  # %bb.1: # %else
12393; RV32ZVE32F-NEXT:    andi a1, a2, 2
12394; RV32ZVE32F-NEXT:    bnez a1, .LBB102_11
12395; RV32ZVE32F-NEXT:  .LBB102_2: # %else2
12396; RV32ZVE32F-NEXT:    andi a1, a2, 4
12397; RV32ZVE32F-NEXT:    bnez a1, .LBB102_12
12398; RV32ZVE32F-NEXT:  .LBB102_3: # %else5
12399; RV32ZVE32F-NEXT:    andi a1, a2, 8
12400; RV32ZVE32F-NEXT:    bnez a1, .LBB102_13
12401; RV32ZVE32F-NEXT:  .LBB102_4: # %else8
12402; RV32ZVE32F-NEXT:    andi a1, a2, 16
12403; RV32ZVE32F-NEXT:    bnez a1, .LBB102_14
12404; RV32ZVE32F-NEXT:  .LBB102_5: # %else11
12405; RV32ZVE32F-NEXT:    andi a1, a2, 32
12406; RV32ZVE32F-NEXT:    bnez a1, .LBB102_15
12407; RV32ZVE32F-NEXT:  .LBB102_6: # %else14
12408; RV32ZVE32F-NEXT:    andi a1, a2, 64
12409; RV32ZVE32F-NEXT:    bnez a1, .LBB102_16
12410; RV32ZVE32F-NEXT:  .LBB102_7: # %else17
12411; RV32ZVE32F-NEXT:    andi a1, a2, -128
12412; RV32ZVE32F-NEXT:    beqz a1, .LBB102_9
12413; RV32ZVE32F-NEXT:  .LBB102_8: # %cond.load19
12414; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12415; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
12416; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
12417; RV32ZVE32F-NEXT:    fld fa7, 0(a1)
12418; RV32ZVE32F-NEXT:  .LBB102_9: # %else20
12419; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
12420; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
12421; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
12422; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
12423; RV32ZVE32F-NEXT:    fsd fa4, 32(a0)
12424; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
12425; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
12426; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
12427; RV32ZVE32F-NEXT:    ret
12428; RV32ZVE32F-NEXT:  .LBB102_10: # %cond.load
12429; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
12430; RV32ZVE32F-NEXT:    fld fa0, 0(a1)
12431; RV32ZVE32F-NEXT:    andi a1, a2, 2
12432; RV32ZVE32F-NEXT:    beqz a1, .LBB102_2
12433; RV32ZVE32F-NEXT:  .LBB102_11: # %cond.load1
12434; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12435; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
12436; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12437; RV32ZVE32F-NEXT:    fld fa1, 0(a1)
12438; RV32ZVE32F-NEXT:    andi a1, a2, 4
12439; RV32ZVE32F-NEXT:    beqz a1, .LBB102_3
12440; RV32ZVE32F-NEXT:  .LBB102_12: # %cond.load4
12441; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12442; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
12443; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12444; RV32ZVE32F-NEXT:    fld fa2, 0(a1)
12445; RV32ZVE32F-NEXT:    andi a1, a2, 8
12446; RV32ZVE32F-NEXT:    beqz a1, .LBB102_4
12447; RV32ZVE32F-NEXT:  .LBB102_13: # %cond.load7
12448; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12449; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
12450; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12451; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
12452; RV32ZVE32F-NEXT:    andi a1, a2, 16
12453; RV32ZVE32F-NEXT:    beqz a1, .LBB102_5
12454; RV32ZVE32F-NEXT:  .LBB102_14: # %cond.load10
12455; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12456; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
12457; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12458; RV32ZVE32F-NEXT:    fld fa4, 0(a1)
12459; RV32ZVE32F-NEXT:    andi a1, a2, 32
12460; RV32ZVE32F-NEXT:    beqz a1, .LBB102_6
12461; RV32ZVE32F-NEXT:  .LBB102_15: # %cond.load13
12462; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12463; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
12464; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12465; RV32ZVE32F-NEXT:    fld fa5, 0(a1)
12466; RV32ZVE32F-NEXT:    andi a1, a2, 64
12467; RV32ZVE32F-NEXT:    beqz a1, .LBB102_7
12468; RV32ZVE32F-NEXT:  .LBB102_16: # %cond.load16
12469; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12470; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
12471; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12472; RV32ZVE32F-NEXT:    fld fa6, 0(a1)
12473; RV32ZVE32F-NEXT:    andi a1, a2, -128
12474; RV32ZVE32F-NEXT:    bnez a1, .LBB102_8
12475; RV32ZVE32F-NEXT:    j .LBB102_9
12476;
12477; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
12478; RV64ZVE32F:       # %bb.0:
12479; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
12480; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
12481; RV64ZVE32F-NEXT:    andi a3, a2, 1
12482; RV64ZVE32F-NEXT:    beqz a3, .LBB102_2
12483; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
12484; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
12485; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12486; RV64ZVE32F-NEXT:    slli a3, a3, 48
12487; RV64ZVE32F-NEXT:    srli a3, a3, 45
12488; RV64ZVE32F-NEXT:    add a3, a1, a3
12489; RV64ZVE32F-NEXT:    fld fa0, 0(a3)
12490; RV64ZVE32F-NEXT:  .LBB102_2: # %else
12491; RV64ZVE32F-NEXT:    andi a3, a2, 2
12492; RV64ZVE32F-NEXT:    beqz a3, .LBB102_4
12493; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
12494; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
12495; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
12496; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
12497; RV64ZVE32F-NEXT:    slli a3, a3, 48
12498; RV64ZVE32F-NEXT:    srli a3, a3, 45
12499; RV64ZVE32F-NEXT:    add a3, a1, a3
12500; RV64ZVE32F-NEXT:    fld fa1, 0(a3)
12501; RV64ZVE32F-NEXT:  .LBB102_4: # %else2
12502; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
12503; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
12504; RV64ZVE32F-NEXT:    andi a3, a2, 4
12505; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
12506; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
12507; RV64ZVE32F-NEXT:    bnez a3, .LBB102_14
12508; RV64ZVE32F-NEXT:  # %bb.5: # %else5
12509; RV64ZVE32F-NEXT:    andi a3, a2, 8
12510; RV64ZVE32F-NEXT:    bnez a3, .LBB102_15
12511; RV64ZVE32F-NEXT:  .LBB102_6: # %else8
12512; RV64ZVE32F-NEXT:    andi a3, a2, 16
12513; RV64ZVE32F-NEXT:    bnez a3, .LBB102_16
12514; RV64ZVE32F-NEXT:  .LBB102_7: # %else11
12515; RV64ZVE32F-NEXT:    andi a3, a2, 32
12516; RV64ZVE32F-NEXT:    beqz a3, .LBB102_9
12517; RV64ZVE32F-NEXT:  .LBB102_8: # %cond.load13
12518; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
12519; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12520; RV64ZVE32F-NEXT:    slli a3, a3, 48
12521; RV64ZVE32F-NEXT:    srli a3, a3, 45
12522; RV64ZVE32F-NEXT:    add a3, a1, a3
12523; RV64ZVE32F-NEXT:    fld fa5, 0(a3)
12524; RV64ZVE32F-NEXT:  .LBB102_9: # %else14
12525; RV64ZVE32F-NEXT:    andi a3, a2, 64
12526; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
12527; RV64ZVE32F-NEXT:    beqz a3, .LBB102_11
12528; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
12529; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12530; RV64ZVE32F-NEXT:    slli a3, a3, 48
12531; RV64ZVE32F-NEXT:    srli a3, a3, 45
12532; RV64ZVE32F-NEXT:    add a3, a1, a3
12533; RV64ZVE32F-NEXT:    fld fa6, 0(a3)
12534; RV64ZVE32F-NEXT:  .LBB102_11: # %else17
12535; RV64ZVE32F-NEXT:    andi a2, a2, -128
12536; RV64ZVE32F-NEXT:    beqz a2, .LBB102_13
12537; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
12538; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
12539; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
12540; RV64ZVE32F-NEXT:    slli a2, a2, 48
12541; RV64ZVE32F-NEXT:    srli a2, a2, 45
12542; RV64ZVE32F-NEXT:    add a1, a1, a2
12543; RV64ZVE32F-NEXT:    fld fa7, 0(a1)
12544; RV64ZVE32F-NEXT:  .LBB102_13: # %else20
12545; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
12546; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
12547; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
12548; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
12549; RV64ZVE32F-NEXT:    fsd fa4, 32(a0)
12550; RV64ZVE32F-NEXT:    fsd fa5, 40(a0)
12551; RV64ZVE32F-NEXT:    fsd fa6, 48(a0)
12552; RV64ZVE32F-NEXT:    fsd fa7, 56(a0)
12553; RV64ZVE32F-NEXT:    ret
12554; RV64ZVE32F-NEXT:  .LBB102_14: # %cond.load4
12555; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12556; RV64ZVE32F-NEXT:    slli a3, a3, 48
12557; RV64ZVE32F-NEXT:    srli a3, a3, 45
12558; RV64ZVE32F-NEXT:    add a3, a1, a3
12559; RV64ZVE32F-NEXT:    fld fa2, 0(a3)
12560; RV64ZVE32F-NEXT:    andi a3, a2, 8
12561; RV64ZVE32F-NEXT:    beqz a3, .LBB102_6
12562; RV64ZVE32F-NEXT:  .LBB102_15: # %cond.load7
12563; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
12564; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12565; RV64ZVE32F-NEXT:    slli a3, a3, 48
12566; RV64ZVE32F-NEXT:    srli a3, a3, 45
12567; RV64ZVE32F-NEXT:    add a3, a1, a3
12568; RV64ZVE32F-NEXT:    fld fa3, 0(a3)
12569; RV64ZVE32F-NEXT:    andi a3, a2, 16
12570; RV64ZVE32F-NEXT:    beqz a3, .LBB102_7
12571; RV64ZVE32F-NEXT:  .LBB102_16: # %cond.load10
12572; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
12573; RV64ZVE32F-NEXT:    slli a3, a3, 48
12574; RV64ZVE32F-NEXT:    srli a3, a3, 45
12575; RV64ZVE32F-NEXT:    add a3, a1, a3
12576; RV64ZVE32F-NEXT:    fld fa4, 0(a3)
12577; RV64ZVE32F-NEXT:    andi a3, a2, 32
12578; RV64ZVE32F-NEXT:    bnez a3, .LBB102_8
12579; RV64ZVE32F-NEXT:    j .LBB102_9
12580  %eidxs = zext <8 x i16> %idxs to <8 x i64>
12581  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
12582  %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
12583  ret <8 x double> %v
12584}
12585
12586define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
12587; RV32V-LABEL: mgather_baseidx_v8i32_v8f64:
12588; RV32V:       # %bb.0:
12589; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
12590; RV32V-NEXT:    vsll.vi v8, v8, 3
12591; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
12592; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
12593; RV32V-NEXT:    vmv.v.v v8, v12
12594; RV32V-NEXT:    ret
12595;
12596; RV64V-LABEL: mgather_baseidx_v8i32_v8f64:
12597; RV64V:       # %bb.0:
12598; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
12599; RV64V-NEXT:    vsext.vf2 v16, v8
12600; RV64V-NEXT:    vsll.vi v8, v16, 3
12601; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
12602; RV64V-NEXT:    vmv.v.v v8, v12
12603; RV64V-NEXT:    ret
12604;
12605; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
12606; RV32ZVE32F:       # %bb.0:
12607; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
12608; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
12609; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
12610; RV32ZVE32F-NEXT:    vmv.x.s a2, v0
12611; RV32ZVE32F-NEXT:    andi a3, a2, 1
12612; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
12613; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
12614; RV32ZVE32F-NEXT:    bnez a3, .LBB103_10
12615; RV32ZVE32F-NEXT:  # %bb.1: # %else
12616; RV32ZVE32F-NEXT:    andi a1, a2, 2
12617; RV32ZVE32F-NEXT:    bnez a1, .LBB103_11
12618; RV32ZVE32F-NEXT:  .LBB103_2: # %else2
12619; RV32ZVE32F-NEXT:    andi a1, a2, 4
12620; RV32ZVE32F-NEXT:    bnez a1, .LBB103_12
12621; RV32ZVE32F-NEXT:  .LBB103_3: # %else5
12622; RV32ZVE32F-NEXT:    andi a1, a2, 8
12623; RV32ZVE32F-NEXT:    bnez a1, .LBB103_13
12624; RV32ZVE32F-NEXT:  .LBB103_4: # %else8
12625; RV32ZVE32F-NEXT:    andi a1, a2, 16
12626; RV32ZVE32F-NEXT:    bnez a1, .LBB103_14
12627; RV32ZVE32F-NEXT:  .LBB103_5: # %else11
12628; RV32ZVE32F-NEXT:    andi a1, a2, 32
12629; RV32ZVE32F-NEXT:    bnez a1, .LBB103_15
12630; RV32ZVE32F-NEXT:  .LBB103_6: # %else14
12631; RV32ZVE32F-NEXT:    andi a1, a2, 64
12632; RV32ZVE32F-NEXT:    bnez a1, .LBB103_16
12633; RV32ZVE32F-NEXT:  .LBB103_7: # %else17
12634; RV32ZVE32F-NEXT:    andi a1, a2, -128
12635; RV32ZVE32F-NEXT:    beqz a1, .LBB103_9
12636; RV32ZVE32F-NEXT:  .LBB103_8: # %cond.load19
12637; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12638; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
12639; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
12640; RV32ZVE32F-NEXT:    fld fa7, 0(a1)
12641; RV32ZVE32F-NEXT:  .LBB103_9: # %else20
12642; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
12643; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
12644; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
12645; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
12646; RV32ZVE32F-NEXT:    fsd fa4, 32(a0)
12647; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
12648; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
12649; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
12650; RV32ZVE32F-NEXT:    ret
12651; RV32ZVE32F-NEXT:  .LBB103_10: # %cond.load
12652; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
12653; RV32ZVE32F-NEXT:    fld fa0, 0(a1)
12654; RV32ZVE32F-NEXT:    andi a1, a2, 2
12655; RV32ZVE32F-NEXT:    beqz a1, .LBB103_2
12656; RV32ZVE32F-NEXT:  .LBB103_11: # %cond.load1
12657; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12658; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
12659; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12660; RV32ZVE32F-NEXT:    fld fa1, 0(a1)
12661; RV32ZVE32F-NEXT:    andi a1, a2, 4
12662; RV32ZVE32F-NEXT:    beqz a1, .LBB103_3
12663; RV32ZVE32F-NEXT:  .LBB103_12: # %cond.load4
12664; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12665; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
12666; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12667; RV32ZVE32F-NEXT:    fld fa2, 0(a1)
12668; RV32ZVE32F-NEXT:    andi a1, a2, 8
12669; RV32ZVE32F-NEXT:    beqz a1, .LBB103_4
12670; RV32ZVE32F-NEXT:  .LBB103_13: # %cond.load7
12671; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12672; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
12673; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12674; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
12675; RV32ZVE32F-NEXT:    andi a1, a2, 16
12676; RV32ZVE32F-NEXT:    beqz a1, .LBB103_5
12677; RV32ZVE32F-NEXT:  .LBB103_14: # %cond.load10
12678; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12679; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
12680; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12681; RV32ZVE32F-NEXT:    fld fa4, 0(a1)
12682; RV32ZVE32F-NEXT:    andi a1, a2, 32
12683; RV32ZVE32F-NEXT:    beqz a1, .LBB103_6
12684; RV32ZVE32F-NEXT:  .LBB103_15: # %cond.load13
12685; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12686; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
12687; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12688; RV32ZVE32F-NEXT:    fld fa5, 0(a1)
12689; RV32ZVE32F-NEXT:    andi a1, a2, 64
12690; RV32ZVE32F-NEXT:    beqz a1, .LBB103_7
12691; RV32ZVE32F-NEXT:  .LBB103_16: # %cond.load16
12692; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12693; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
12694; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12695; RV32ZVE32F-NEXT:    fld fa6, 0(a1)
12696; RV32ZVE32F-NEXT:    andi a1, a2, -128
12697; RV32ZVE32F-NEXT:    bnez a1, .LBB103_8
12698; RV32ZVE32F-NEXT:    j .LBB103_9
12699;
12700; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
12701; RV64ZVE32F:       # %bb.0:
12702; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
12703; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
12704; RV64ZVE32F-NEXT:    andi a3, a2, 1
12705; RV64ZVE32F-NEXT:    beqz a3, .LBB103_2
12706; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
12707; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
12708; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12709; RV64ZVE32F-NEXT:    slli a3, a3, 3
12710; RV64ZVE32F-NEXT:    add a3, a1, a3
12711; RV64ZVE32F-NEXT:    fld fa0, 0(a3)
12712; RV64ZVE32F-NEXT:  .LBB103_2: # %else
12713; RV64ZVE32F-NEXT:    andi a3, a2, 2
12714; RV64ZVE32F-NEXT:    beqz a3, .LBB103_4
12715; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
12716; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12717; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
12718; RV64ZVE32F-NEXT:    vmv.x.s a3, v10
12719; RV64ZVE32F-NEXT:    slli a3, a3, 3
12720; RV64ZVE32F-NEXT:    add a3, a1, a3
12721; RV64ZVE32F-NEXT:    fld fa1, 0(a3)
12722; RV64ZVE32F-NEXT:  .LBB103_4: # %else2
12723; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
12724; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
12725; RV64ZVE32F-NEXT:    andi a3, a2, 4
12726; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
12727; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
12728; RV64ZVE32F-NEXT:    bnez a3, .LBB103_14
12729; RV64ZVE32F-NEXT:  # %bb.5: # %else5
12730; RV64ZVE32F-NEXT:    andi a3, a2, 8
12731; RV64ZVE32F-NEXT:    bnez a3, .LBB103_15
12732; RV64ZVE32F-NEXT:  .LBB103_6: # %else8
12733; RV64ZVE32F-NEXT:    andi a3, a2, 16
12734; RV64ZVE32F-NEXT:    bnez a3, .LBB103_16
12735; RV64ZVE32F-NEXT:  .LBB103_7: # %else11
12736; RV64ZVE32F-NEXT:    andi a3, a2, 32
12737; RV64ZVE32F-NEXT:    beqz a3, .LBB103_9
12738; RV64ZVE32F-NEXT:  .LBB103_8: # %cond.load13
12739; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
12740; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12741; RV64ZVE32F-NEXT:    slli a3, a3, 3
12742; RV64ZVE32F-NEXT:    add a3, a1, a3
12743; RV64ZVE32F-NEXT:    fld fa5, 0(a3)
12744; RV64ZVE32F-NEXT:  .LBB103_9: # %else14
12745; RV64ZVE32F-NEXT:    andi a3, a2, 64
12746; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
12747; RV64ZVE32F-NEXT:    beqz a3, .LBB103_11
12748; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
12749; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12750; RV64ZVE32F-NEXT:    slli a3, a3, 3
12751; RV64ZVE32F-NEXT:    add a3, a1, a3
12752; RV64ZVE32F-NEXT:    fld fa6, 0(a3)
12753; RV64ZVE32F-NEXT:  .LBB103_11: # %else17
12754; RV64ZVE32F-NEXT:    andi a2, a2, -128
12755; RV64ZVE32F-NEXT:    beqz a2, .LBB103_13
12756; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
12757; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
12758; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
12759; RV64ZVE32F-NEXT:    slli a2, a2, 3
12760; RV64ZVE32F-NEXT:    add a1, a1, a2
12761; RV64ZVE32F-NEXT:    fld fa7, 0(a1)
12762; RV64ZVE32F-NEXT:  .LBB103_13: # %else20
12763; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
12764; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
12765; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
12766; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
12767; RV64ZVE32F-NEXT:    fsd fa4, 32(a0)
12768; RV64ZVE32F-NEXT:    fsd fa5, 40(a0)
12769; RV64ZVE32F-NEXT:    fsd fa6, 48(a0)
12770; RV64ZVE32F-NEXT:    fsd fa7, 56(a0)
12771; RV64ZVE32F-NEXT:    ret
12772; RV64ZVE32F-NEXT:  .LBB103_14: # %cond.load4
12773; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12774; RV64ZVE32F-NEXT:    slli a3, a3, 3
12775; RV64ZVE32F-NEXT:    add a3, a1, a3
12776; RV64ZVE32F-NEXT:    fld fa2, 0(a3)
12777; RV64ZVE32F-NEXT:    andi a3, a2, 8
12778; RV64ZVE32F-NEXT:    beqz a3, .LBB103_6
12779; RV64ZVE32F-NEXT:  .LBB103_15: # %cond.load7
12780; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
12781; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12782; RV64ZVE32F-NEXT:    slli a3, a3, 3
12783; RV64ZVE32F-NEXT:    add a3, a1, a3
12784; RV64ZVE32F-NEXT:    fld fa3, 0(a3)
12785; RV64ZVE32F-NEXT:    andi a3, a2, 16
12786; RV64ZVE32F-NEXT:    beqz a3, .LBB103_7
12787; RV64ZVE32F-NEXT:  .LBB103_16: # %cond.load10
12788; RV64ZVE32F-NEXT:    vmv.x.s a3, v10
12789; RV64ZVE32F-NEXT:    slli a3, a3, 3
12790; RV64ZVE32F-NEXT:    add a3, a1, a3
12791; RV64ZVE32F-NEXT:    fld fa4, 0(a3)
12792; RV64ZVE32F-NEXT:    andi a3, a2, 32
12793; RV64ZVE32F-NEXT:    bnez a3, .LBB103_8
12794; RV64ZVE32F-NEXT:    j .LBB103_9
12795  %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
12796  %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
12797  ret <8 x double> %v
12798}
12799
12800define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
12801; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8f64:
12802; RV32V:       # %bb.0:
12803; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
12804; RV32V-NEXT:    vsll.vi v8, v8, 3
12805; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
12806; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
12807; RV32V-NEXT:    vmv.v.v v8, v12
12808; RV32V-NEXT:    ret
12809;
12810; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8f64:
12811; RV64V:       # %bb.0:
12812; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
12813; RV64V-NEXT:    vsext.vf2 v16, v8
12814; RV64V-NEXT:    vsll.vi v8, v16, 3
12815; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
12816; RV64V-NEXT:    vmv.v.v v8, v12
12817; RV64V-NEXT:    ret
12818;
12819; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
12820; RV32ZVE32F:       # %bb.0:
12821; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
12822; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
12823; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
12824; RV32ZVE32F-NEXT:    vmv.x.s a2, v0
12825; RV32ZVE32F-NEXT:    andi a3, a2, 1
12826; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
12827; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
12828; RV32ZVE32F-NEXT:    bnez a3, .LBB104_10
12829; RV32ZVE32F-NEXT:  # %bb.1: # %else
12830; RV32ZVE32F-NEXT:    andi a1, a2, 2
12831; RV32ZVE32F-NEXT:    bnez a1, .LBB104_11
12832; RV32ZVE32F-NEXT:  .LBB104_2: # %else2
12833; RV32ZVE32F-NEXT:    andi a1, a2, 4
12834; RV32ZVE32F-NEXT:    bnez a1, .LBB104_12
12835; RV32ZVE32F-NEXT:  .LBB104_3: # %else5
12836; RV32ZVE32F-NEXT:    andi a1, a2, 8
12837; RV32ZVE32F-NEXT:    bnez a1, .LBB104_13
12838; RV32ZVE32F-NEXT:  .LBB104_4: # %else8
12839; RV32ZVE32F-NEXT:    andi a1, a2, 16
12840; RV32ZVE32F-NEXT:    bnez a1, .LBB104_14
12841; RV32ZVE32F-NEXT:  .LBB104_5: # %else11
12842; RV32ZVE32F-NEXT:    andi a1, a2, 32
12843; RV32ZVE32F-NEXT:    bnez a1, .LBB104_15
12844; RV32ZVE32F-NEXT:  .LBB104_6: # %else14
12845; RV32ZVE32F-NEXT:    andi a1, a2, 64
12846; RV32ZVE32F-NEXT:    bnez a1, .LBB104_16
12847; RV32ZVE32F-NEXT:  .LBB104_7: # %else17
12848; RV32ZVE32F-NEXT:    andi a1, a2, -128
12849; RV32ZVE32F-NEXT:    beqz a1, .LBB104_9
12850; RV32ZVE32F-NEXT:  .LBB104_8: # %cond.load19
12851; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12852; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
12853; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
12854; RV32ZVE32F-NEXT:    fld fa7, 0(a1)
12855; RV32ZVE32F-NEXT:  .LBB104_9: # %else20
12856; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
12857; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
12858; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
12859; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
12860; RV32ZVE32F-NEXT:    fsd fa4, 32(a0)
12861; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
12862; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
12863; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
12864; RV32ZVE32F-NEXT:    ret
12865; RV32ZVE32F-NEXT:  .LBB104_10: # %cond.load
12866; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
12867; RV32ZVE32F-NEXT:    fld fa0, 0(a1)
12868; RV32ZVE32F-NEXT:    andi a1, a2, 2
12869; RV32ZVE32F-NEXT:    beqz a1, .LBB104_2
12870; RV32ZVE32F-NEXT:  .LBB104_11: # %cond.load1
12871; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12872; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
12873; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12874; RV32ZVE32F-NEXT:    fld fa1, 0(a1)
12875; RV32ZVE32F-NEXT:    andi a1, a2, 4
12876; RV32ZVE32F-NEXT:    beqz a1, .LBB104_3
12877; RV32ZVE32F-NEXT:  .LBB104_12: # %cond.load4
12878; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12879; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
12880; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12881; RV32ZVE32F-NEXT:    fld fa2, 0(a1)
12882; RV32ZVE32F-NEXT:    andi a1, a2, 8
12883; RV32ZVE32F-NEXT:    beqz a1, .LBB104_4
12884; RV32ZVE32F-NEXT:  .LBB104_13: # %cond.load7
12885; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12886; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
12887; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12888; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
12889; RV32ZVE32F-NEXT:    andi a1, a2, 16
12890; RV32ZVE32F-NEXT:    beqz a1, .LBB104_5
12891; RV32ZVE32F-NEXT:  .LBB104_14: # %cond.load10
12892; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12893; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
12894; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12895; RV32ZVE32F-NEXT:    fld fa4, 0(a1)
12896; RV32ZVE32F-NEXT:    andi a1, a2, 32
12897; RV32ZVE32F-NEXT:    beqz a1, .LBB104_6
12898; RV32ZVE32F-NEXT:  .LBB104_15: # %cond.load13
12899; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12900; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
12901; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12902; RV32ZVE32F-NEXT:    fld fa5, 0(a1)
12903; RV32ZVE32F-NEXT:    andi a1, a2, 64
12904; RV32ZVE32F-NEXT:    beqz a1, .LBB104_7
12905; RV32ZVE32F-NEXT:  .LBB104_16: # %cond.load16
12906; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
12907; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
12908; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
12909; RV32ZVE32F-NEXT:    fld fa6, 0(a1)
12910; RV32ZVE32F-NEXT:    andi a1, a2, -128
12911; RV32ZVE32F-NEXT:    bnez a1, .LBB104_8
12912; RV32ZVE32F-NEXT:    j .LBB104_9
12913;
12914; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
12915; RV64ZVE32F:       # %bb.0:
12916; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
12917; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
12918; RV64ZVE32F-NEXT:    andi a3, a2, 1
12919; RV64ZVE32F-NEXT:    beqz a3, .LBB104_2
12920; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
12921; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
12922; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12923; RV64ZVE32F-NEXT:    slli a3, a3, 3
12924; RV64ZVE32F-NEXT:    add a3, a1, a3
12925; RV64ZVE32F-NEXT:    fld fa0, 0(a3)
12926; RV64ZVE32F-NEXT:  .LBB104_2: # %else
12927; RV64ZVE32F-NEXT:    andi a3, a2, 2
12928; RV64ZVE32F-NEXT:    beqz a3, .LBB104_4
12929; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
12930; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
12931; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
12932; RV64ZVE32F-NEXT:    vmv.x.s a3, v10
12933; RV64ZVE32F-NEXT:    slli a3, a3, 3
12934; RV64ZVE32F-NEXT:    add a3, a1, a3
12935; RV64ZVE32F-NEXT:    fld fa1, 0(a3)
12936; RV64ZVE32F-NEXT:  .LBB104_4: # %else2
12937; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
12938; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
12939; RV64ZVE32F-NEXT:    andi a3, a2, 4
12940; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
12941; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
12942; RV64ZVE32F-NEXT:    bnez a3, .LBB104_14
12943; RV64ZVE32F-NEXT:  # %bb.5: # %else5
12944; RV64ZVE32F-NEXT:    andi a3, a2, 8
12945; RV64ZVE32F-NEXT:    bnez a3, .LBB104_15
12946; RV64ZVE32F-NEXT:  .LBB104_6: # %else8
12947; RV64ZVE32F-NEXT:    andi a3, a2, 16
12948; RV64ZVE32F-NEXT:    bnez a3, .LBB104_16
12949; RV64ZVE32F-NEXT:  .LBB104_7: # %else11
12950; RV64ZVE32F-NEXT:    andi a3, a2, 32
12951; RV64ZVE32F-NEXT:    beqz a3, .LBB104_9
12952; RV64ZVE32F-NEXT:  .LBB104_8: # %cond.load13
12953; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
12954; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12955; RV64ZVE32F-NEXT:    slli a3, a3, 3
12956; RV64ZVE32F-NEXT:    add a3, a1, a3
12957; RV64ZVE32F-NEXT:    fld fa5, 0(a3)
12958; RV64ZVE32F-NEXT:  .LBB104_9: # %else14
12959; RV64ZVE32F-NEXT:    andi a3, a2, 64
12960; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
12961; RV64ZVE32F-NEXT:    beqz a3, .LBB104_11
12962; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
12963; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12964; RV64ZVE32F-NEXT:    slli a3, a3, 3
12965; RV64ZVE32F-NEXT:    add a3, a1, a3
12966; RV64ZVE32F-NEXT:    fld fa6, 0(a3)
12967; RV64ZVE32F-NEXT:  .LBB104_11: # %else17
12968; RV64ZVE32F-NEXT:    andi a2, a2, -128
12969; RV64ZVE32F-NEXT:    beqz a2, .LBB104_13
12970; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
12971; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
12972; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
12973; RV64ZVE32F-NEXT:    slli a2, a2, 3
12974; RV64ZVE32F-NEXT:    add a1, a1, a2
12975; RV64ZVE32F-NEXT:    fld fa7, 0(a1)
12976; RV64ZVE32F-NEXT:  .LBB104_13: # %else20
12977; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
12978; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
12979; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
12980; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
12981; RV64ZVE32F-NEXT:    fsd fa4, 32(a0)
12982; RV64ZVE32F-NEXT:    fsd fa5, 40(a0)
12983; RV64ZVE32F-NEXT:    fsd fa6, 48(a0)
12984; RV64ZVE32F-NEXT:    fsd fa7, 56(a0)
12985; RV64ZVE32F-NEXT:    ret
12986; RV64ZVE32F-NEXT:  .LBB104_14: # %cond.load4
12987; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12988; RV64ZVE32F-NEXT:    slli a3, a3, 3
12989; RV64ZVE32F-NEXT:    add a3, a1, a3
12990; RV64ZVE32F-NEXT:    fld fa2, 0(a3)
12991; RV64ZVE32F-NEXT:    andi a3, a2, 8
12992; RV64ZVE32F-NEXT:    beqz a3, .LBB104_6
12993; RV64ZVE32F-NEXT:  .LBB104_15: # %cond.load7
12994; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
12995; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
12996; RV64ZVE32F-NEXT:    slli a3, a3, 3
12997; RV64ZVE32F-NEXT:    add a3, a1, a3
12998; RV64ZVE32F-NEXT:    fld fa3, 0(a3)
12999; RV64ZVE32F-NEXT:    andi a3, a2, 16
13000; RV64ZVE32F-NEXT:    beqz a3, .LBB104_7
13001; RV64ZVE32F-NEXT:  .LBB104_16: # %cond.load10
13002; RV64ZVE32F-NEXT:    vmv.x.s a3, v10
13003; RV64ZVE32F-NEXT:    slli a3, a3, 3
13004; RV64ZVE32F-NEXT:    add a3, a1, a3
13005; RV64ZVE32F-NEXT:    fld fa4, 0(a3)
13006; RV64ZVE32F-NEXT:    andi a3, a2, 32
13007; RV64ZVE32F-NEXT:    bnez a3, .LBB104_8
13008; RV64ZVE32F-NEXT:    j .LBB104_9
13009  %eidxs = sext <8 x i32> %idxs to <8 x i64>
13010  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
13011  %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
13012  ret <8 x double> %v
13013}
13014
13015define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
13016; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8f64:
13017; RV32V:       # %bb.0:
13018; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
13019; RV32V-NEXT:    vsll.vi v8, v8, 3
13020; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
13021; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
13022; RV32V-NEXT:    vmv.v.v v8, v12
13023; RV32V-NEXT:    ret
13024;
13025; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8f64:
13026; RV64V:       # %bb.0:
13027; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
13028; RV64V-NEXT:    vzext.vf2 v16, v8
13029; RV64V-NEXT:    vsll.vi v8, v16, 3
13030; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
13031; RV64V-NEXT:    vmv.v.v v8, v12
13032; RV64V-NEXT:    ret
13033;
13034; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
13035; RV32ZVE32F:       # %bb.0:
13036; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
13037; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
13038; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
13039; RV32ZVE32F-NEXT:    vmv.x.s a2, v0
13040; RV32ZVE32F-NEXT:    andi a3, a2, 1
13041; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
13042; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
13043; RV32ZVE32F-NEXT:    bnez a3, .LBB105_10
13044; RV32ZVE32F-NEXT:  # %bb.1: # %else
13045; RV32ZVE32F-NEXT:    andi a1, a2, 2
13046; RV32ZVE32F-NEXT:    bnez a1, .LBB105_11
13047; RV32ZVE32F-NEXT:  .LBB105_2: # %else2
13048; RV32ZVE32F-NEXT:    andi a1, a2, 4
13049; RV32ZVE32F-NEXT:    bnez a1, .LBB105_12
13050; RV32ZVE32F-NEXT:  .LBB105_3: # %else5
13051; RV32ZVE32F-NEXT:    andi a1, a2, 8
13052; RV32ZVE32F-NEXT:    bnez a1, .LBB105_13
13053; RV32ZVE32F-NEXT:  .LBB105_4: # %else8
13054; RV32ZVE32F-NEXT:    andi a1, a2, 16
13055; RV32ZVE32F-NEXT:    bnez a1, .LBB105_14
13056; RV32ZVE32F-NEXT:  .LBB105_5: # %else11
13057; RV32ZVE32F-NEXT:    andi a1, a2, 32
13058; RV32ZVE32F-NEXT:    bnez a1, .LBB105_15
13059; RV32ZVE32F-NEXT:  .LBB105_6: # %else14
13060; RV32ZVE32F-NEXT:    andi a1, a2, 64
13061; RV32ZVE32F-NEXT:    bnez a1, .LBB105_16
13062; RV32ZVE32F-NEXT:  .LBB105_7: # %else17
13063; RV32ZVE32F-NEXT:    andi a1, a2, -128
13064; RV32ZVE32F-NEXT:    beqz a1, .LBB105_9
13065; RV32ZVE32F-NEXT:  .LBB105_8: # %cond.load19
13066; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
13067; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
13068; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
13069; RV32ZVE32F-NEXT:    fld fa7, 0(a1)
13070; RV32ZVE32F-NEXT:  .LBB105_9: # %else20
13071; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
13072; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
13073; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
13074; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
13075; RV32ZVE32F-NEXT:    fsd fa4, 32(a0)
13076; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
13077; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
13078; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
13079; RV32ZVE32F-NEXT:    ret
13080; RV32ZVE32F-NEXT:  .LBB105_10: # %cond.load
13081; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
13082; RV32ZVE32F-NEXT:    fld fa0, 0(a1)
13083; RV32ZVE32F-NEXT:    andi a1, a2, 2
13084; RV32ZVE32F-NEXT:    beqz a1, .LBB105_2
13085; RV32ZVE32F-NEXT:  .LBB105_11: # %cond.load1
13086; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
13087; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
13088; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13089; RV32ZVE32F-NEXT:    fld fa1, 0(a1)
13090; RV32ZVE32F-NEXT:    andi a1, a2, 4
13091; RV32ZVE32F-NEXT:    beqz a1, .LBB105_3
13092; RV32ZVE32F-NEXT:  .LBB105_12: # %cond.load4
13093; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
13094; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
13095; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13096; RV32ZVE32F-NEXT:    fld fa2, 0(a1)
13097; RV32ZVE32F-NEXT:    andi a1, a2, 8
13098; RV32ZVE32F-NEXT:    beqz a1, .LBB105_4
13099; RV32ZVE32F-NEXT:  .LBB105_13: # %cond.load7
13100; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
13101; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
13102; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13103; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
13104; RV32ZVE32F-NEXT:    andi a1, a2, 16
13105; RV32ZVE32F-NEXT:    beqz a1, .LBB105_5
13106; RV32ZVE32F-NEXT:  .LBB105_14: # %cond.load10
13107; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
13108; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
13109; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13110; RV32ZVE32F-NEXT:    fld fa4, 0(a1)
13111; RV32ZVE32F-NEXT:    andi a1, a2, 32
13112; RV32ZVE32F-NEXT:    beqz a1, .LBB105_6
13113; RV32ZVE32F-NEXT:  .LBB105_15: # %cond.load13
13114; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
13115; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
13116; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13117; RV32ZVE32F-NEXT:    fld fa5, 0(a1)
13118; RV32ZVE32F-NEXT:    andi a1, a2, 64
13119; RV32ZVE32F-NEXT:    beqz a1, .LBB105_7
13120; RV32ZVE32F-NEXT:  .LBB105_16: # %cond.load16
13121; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
13122; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
13123; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13124; RV32ZVE32F-NEXT:    fld fa6, 0(a1)
13125; RV32ZVE32F-NEXT:    andi a1, a2, -128
13126; RV32ZVE32F-NEXT:    bnez a1, .LBB105_8
13127; RV32ZVE32F-NEXT:    j .LBB105_9
13128;
13129; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
13130; RV64ZVE32F:       # %bb.0:
13131; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
13132; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
13133; RV64ZVE32F-NEXT:    andi a3, a2, 1
13134; RV64ZVE32F-NEXT:    beqz a3, .LBB105_2
13135; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
13136; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
13137; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
13138; RV64ZVE32F-NEXT:    slli a3, a3, 32
13139; RV64ZVE32F-NEXT:    srli a3, a3, 29
13140; RV64ZVE32F-NEXT:    add a3, a1, a3
13141; RV64ZVE32F-NEXT:    fld fa0, 0(a3)
13142; RV64ZVE32F-NEXT:  .LBB105_2: # %else
13143; RV64ZVE32F-NEXT:    andi a3, a2, 2
13144; RV64ZVE32F-NEXT:    beqz a3, .LBB105_4
13145; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
13146; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
13147; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
13148; RV64ZVE32F-NEXT:    vmv.x.s a3, v10
13149; RV64ZVE32F-NEXT:    slli a3, a3, 32
13150; RV64ZVE32F-NEXT:    srli a3, a3, 29
13151; RV64ZVE32F-NEXT:    add a3, a1, a3
13152; RV64ZVE32F-NEXT:    fld fa1, 0(a3)
13153; RV64ZVE32F-NEXT:  .LBB105_4: # %else2
13154; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
13155; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
13156; RV64ZVE32F-NEXT:    andi a3, a2, 4
13157; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
13158; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
13159; RV64ZVE32F-NEXT:    bnez a3, .LBB105_14
13160; RV64ZVE32F-NEXT:  # %bb.5: # %else5
13161; RV64ZVE32F-NEXT:    andi a3, a2, 8
13162; RV64ZVE32F-NEXT:    bnez a3, .LBB105_15
13163; RV64ZVE32F-NEXT:  .LBB105_6: # %else8
13164; RV64ZVE32F-NEXT:    andi a3, a2, 16
13165; RV64ZVE32F-NEXT:    bnez a3, .LBB105_16
13166; RV64ZVE32F-NEXT:  .LBB105_7: # %else11
13167; RV64ZVE32F-NEXT:    andi a3, a2, 32
13168; RV64ZVE32F-NEXT:    beqz a3, .LBB105_9
13169; RV64ZVE32F-NEXT:  .LBB105_8: # %cond.load13
13170; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
13171; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
13172; RV64ZVE32F-NEXT:    slli a3, a3, 32
13173; RV64ZVE32F-NEXT:    srli a3, a3, 29
13174; RV64ZVE32F-NEXT:    add a3, a1, a3
13175; RV64ZVE32F-NEXT:    fld fa5, 0(a3)
13176; RV64ZVE32F-NEXT:  .LBB105_9: # %else14
13177; RV64ZVE32F-NEXT:    andi a3, a2, 64
13178; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
13179; RV64ZVE32F-NEXT:    beqz a3, .LBB105_11
13180; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
13181; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
13182; RV64ZVE32F-NEXT:    slli a3, a3, 32
13183; RV64ZVE32F-NEXT:    srli a3, a3, 29
13184; RV64ZVE32F-NEXT:    add a3, a1, a3
13185; RV64ZVE32F-NEXT:    fld fa6, 0(a3)
13186; RV64ZVE32F-NEXT:  .LBB105_11: # %else17
13187; RV64ZVE32F-NEXT:    andi a2, a2, -128
13188; RV64ZVE32F-NEXT:    beqz a2, .LBB105_13
13189; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
13190; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
13191; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
13192; RV64ZVE32F-NEXT:    slli a2, a2, 32
13193; RV64ZVE32F-NEXT:    srli a2, a2, 29
13194; RV64ZVE32F-NEXT:    add a1, a1, a2
13195; RV64ZVE32F-NEXT:    fld fa7, 0(a1)
13196; RV64ZVE32F-NEXT:  .LBB105_13: # %else20
13197; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
13198; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
13199; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
13200; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
13201; RV64ZVE32F-NEXT:    fsd fa4, 32(a0)
13202; RV64ZVE32F-NEXT:    fsd fa5, 40(a0)
13203; RV64ZVE32F-NEXT:    fsd fa6, 48(a0)
13204; RV64ZVE32F-NEXT:    fsd fa7, 56(a0)
13205; RV64ZVE32F-NEXT:    ret
13206; RV64ZVE32F-NEXT:  .LBB105_14: # %cond.load4
13207; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
13208; RV64ZVE32F-NEXT:    slli a3, a3, 32
13209; RV64ZVE32F-NEXT:    srli a3, a3, 29
13210; RV64ZVE32F-NEXT:    add a3, a1, a3
13211; RV64ZVE32F-NEXT:    fld fa2, 0(a3)
13212; RV64ZVE32F-NEXT:    andi a3, a2, 8
13213; RV64ZVE32F-NEXT:    beqz a3, .LBB105_6
13214; RV64ZVE32F-NEXT:  .LBB105_15: # %cond.load7
13215; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
13216; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
13217; RV64ZVE32F-NEXT:    slli a3, a3, 32
13218; RV64ZVE32F-NEXT:    srli a3, a3, 29
13219; RV64ZVE32F-NEXT:    add a3, a1, a3
13220; RV64ZVE32F-NEXT:    fld fa3, 0(a3)
13221; RV64ZVE32F-NEXT:    andi a3, a2, 16
13222; RV64ZVE32F-NEXT:    beqz a3, .LBB105_7
13223; RV64ZVE32F-NEXT:  .LBB105_16: # %cond.load10
13224; RV64ZVE32F-NEXT:    vmv.x.s a3, v10
13225; RV64ZVE32F-NEXT:    slli a3, a3, 32
13226; RV64ZVE32F-NEXT:    srli a3, a3, 29
13227; RV64ZVE32F-NEXT:    add a3, a1, a3
13228; RV64ZVE32F-NEXT:    fld fa4, 0(a3)
13229; RV64ZVE32F-NEXT:    andi a3, a2, 32
13230; RV64ZVE32F-NEXT:    bnez a3, .LBB105_8
13231; RV64ZVE32F-NEXT:    j .LBB105_9
13232  %eidxs = zext <8 x i32> %idxs to <8 x i64>
13233  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
13234  %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
13235  ret <8 x double> %v
13236}
13237
13238define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x double> %passthru) {
13239; RV32V-LABEL: mgather_baseidx_v8f64:
13240; RV32V:       # %bb.0:
13241; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
13242; RV32V-NEXT:    vnsrl.wi v16, v8, 0
13243; RV32V-NEXT:    vsll.vi v8, v16, 3
13244; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
13245; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
13246; RV32V-NEXT:    vmv.v.v v8, v12
13247; RV32V-NEXT:    ret
13248;
13249; RV64V-LABEL: mgather_baseidx_v8f64:
13250; RV64V:       # %bb.0:
13251; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
13252; RV64V-NEXT:    vsll.vi v8, v8, 3
13253; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
13254; RV64V-NEXT:    vmv.v.v v8, v12
13255; RV64V-NEXT:    ret
13256;
13257; RV32ZVE32F-LABEL: mgather_baseidx_v8f64:
13258; RV32ZVE32F:       # %bb.0:
13259; RV32ZVE32F-NEXT:    lw a3, 32(a2)
13260; RV32ZVE32F-NEXT:    lw a4, 40(a2)
13261; RV32ZVE32F-NEXT:    lw a5, 48(a2)
13262; RV32ZVE32F-NEXT:    lw a6, 56(a2)
13263; RV32ZVE32F-NEXT:    lw a7, 0(a2)
13264; RV32ZVE32F-NEXT:    lw t0, 8(a2)
13265; RV32ZVE32F-NEXT:    lw t1, 16(a2)
13266; RV32ZVE32F-NEXT:    lw t2, 24(a2)
13267; RV32ZVE32F-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
13268; RV32ZVE32F-NEXT:    vmv.v.x v8, a7
13269; RV32ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
13270; RV32ZVE32F-NEXT:    vmv.x.s a2, v0
13271; RV32ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
13272; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, t0
13273; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, t1
13274; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, t2
13275; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
13276; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
13277; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a5
13278; RV32ZVE32F-NEXT:    vslide1down.vx v8, v8, a6
13279; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 3
13280; RV32ZVE32F-NEXT:    andi a3, a2, 1
13281; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
13282; RV32ZVE32F-NEXT:    bnez a3, .LBB106_10
13283; RV32ZVE32F-NEXT:  # %bb.1: # %else
13284; RV32ZVE32F-NEXT:    andi a1, a2, 2
13285; RV32ZVE32F-NEXT:    bnez a1, .LBB106_11
13286; RV32ZVE32F-NEXT:  .LBB106_2: # %else2
13287; RV32ZVE32F-NEXT:    andi a1, a2, 4
13288; RV32ZVE32F-NEXT:    bnez a1, .LBB106_12
13289; RV32ZVE32F-NEXT:  .LBB106_3: # %else5
13290; RV32ZVE32F-NEXT:    andi a1, a2, 8
13291; RV32ZVE32F-NEXT:    bnez a1, .LBB106_13
13292; RV32ZVE32F-NEXT:  .LBB106_4: # %else8
13293; RV32ZVE32F-NEXT:    andi a1, a2, 16
13294; RV32ZVE32F-NEXT:    bnez a1, .LBB106_14
13295; RV32ZVE32F-NEXT:  .LBB106_5: # %else11
13296; RV32ZVE32F-NEXT:    andi a1, a2, 32
13297; RV32ZVE32F-NEXT:    bnez a1, .LBB106_15
13298; RV32ZVE32F-NEXT:  .LBB106_6: # %else14
13299; RV32ZVE32F-NEXT:    andi a1, a2, 64
13300; RV32ZVE32F-NEXT:    bnez a1, .LBB106_16
13301; RV32ZVE32F-NEXT:  .LBB106_7: # %else17
13302; RV32ZVE32F-NEXT:    andi a1, a2, -128
13303; RV32ZVE32F-NEXT:    beqz a1, .LBB106_9
13304; RV32ZVE32F-NEXT:  .LBB106_8: # %cond.load19
13305; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
13306; RV32ZVE32F-NEXT:    vslidedown.vi v8, v8, 7
13307; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
13308; RV32ZVE32F-NEXT:    fld fa7, 0(a1)
13309; RV32ZVE32F-NEXT:  .LBB106_9: # %else20
13310; RV32ZVE32F-NEXT:    fsd fa0, 0(a0)
13311; RV32ZVE32F-NEXT:    fsd fa1, 8(a0)
13312; RV32ZVE32F-NEXT:    fsd fa2, 16(a0)
13313; RV32ZVE32F-NEXT:    fsd fa3, 24(a0)
13314; RV32ZVE32F-NEXT:    fsd fa4, 32(a0)
13315; RV32ZVE32F-NEXT:    fsd fa5, 40(a0)
13316; RV32ZVE32F-NEXT:    fsd fa6, 48(a0)
13317; RV32ZVE32F-NEXT:    fsd fa7, 56(a0)
13318; RV32ZVE32F-NEXT:    ret
13319; RV32ZVE32F-NEXT:  .LBB106_10: # %cond.load
13320; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
13321; RV32ZVE32F-NEXT:    fld fa0, 0(a1)
13322; RV32ZVE32F-NEXT:    andi a1, a2, 2
13323; RV32ZVE32F-NEXT:    beqz a1, .LBB106_2
13324; RV32ZVE32F-NEXT:  .LBB106_11: # %cond.load1
13325; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
13326; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
13327; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13328; RV32ZVE32F-NEXT:    fld fa1, 0(a1)
13329; RV32ZVE32F-NEXT:    andi a1, a2, 4
13330; RV32ZVE32F-NEXT:    beqz a1, .LBB106_3
13331; RV32ZVE32F-NEXT:  .LBB106_12: # %cond.load4
13332; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
13333; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
13334; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13335; RV32ZVE32F-NEXT:    fld fa2, 0(a1)
13336; RV32ZVE32F-NEXT:    andi a1, a2, 8
13337; RV32ZVE32F-NEXT:    beqz a1, .LBB106_4
13338; RV32ZVE32F-NEXT:  .LBB106_13: # %cond.load7
13339; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
13340; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 3
13341; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13342; RV32ZVE32F-NEXT:    fld fa3, 0(a1)
13343; RV32ZVE32F-NEXT:    andi a1, a2, 16
13344; RV32ZVE32F-NEXT:    beqz a1, .LBB106_5
13345; RV32ZVE32F-NEXT:  .LBB106_14: # %cond.load10
13346; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
13347; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
13348; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13349; RV32ZVE32F-NEXT:    fld fa4, 0(a1)
13350; RV32ZVE32F-NEXT:    andi a1, a2, 32
13351; RV32ZVE32F-NEXT:    beqz a1, .LBB106_6
13352; RV32ZVE32F-NEXT:  .LBB106_15: # %cond.load13
13353; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
13354; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 5
13355; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13356; RV32ZVE32F-NEXT:    fld fa5, 0(a1)
13357; RV32ZVE32F-NEXT:    andi a1, a2, 64
13358; RV32ZVE32F-NEXT:    beqz a1, .LBB106_7
13359; RV32ZVE32F-NEXT:  .LBB106_16: # %cond.load16
13360; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
13361; RV32ZVE32F-NEXT:    vslidedown.vi v10, v8, 6
13362; RV32ZVE32F-NEXT:    vmv.x.s a1, v10
13363; RV32ZVE32F-NEXT:    fld fa6, 0(a1)
13364; RV32ZVE32F-NEXT:    andi a1, a2, -128
13365; RV32ZVE32F-NEXT:    bnez a1, .LBB106_8
13366; RV32ZVE32F-NEXT:    j .LBB106_9
13367;
13368; RV64ZVE32F-LABEL: mgather_baseidx_v8f64:
13369; RV64ZVE32F:       # %bb.0:
13370; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
13371; RV64ZVE32F-NEXT:    vmv.x.s a3, v0
13372; RV64ZVE32F-NEXT:    andi a4, a3, 1
13373; RV64ZVE32F-NEXT:    bnez a4, .LBB106_10
13374; RV64ZVE32F-NEXT:  # %bb.1: # %else
13375; RV64ZVE32F-NEXT:    andi a4, a3, 2
13376; RV64ZVE32F-NEXT:    bnez a4, .LBB106_11
13377; RV64ZVE32F-NEXT:  .LBB106_2: # %else2
13378; RV64ZVE32F-NEXT:    andi a4, a3, 4
13379; RV64ZVE32F-NEXT:    bnez a4, .LBB106_12
13380; RV64ZVE32F-NEXT:  .LBB106_3: # %else5
13381; RV64ZVE32F-NEXT:    andi a4, a3, 8
13382; RV64ZVE32F-NEXT:    bnez a4, .LBB106_13
13383; RV64ZVE32F-NEXT:  .LBB106_4: # %else8
13384; RV64ZVE32F-NEXT:    andi a4, a3, 16
13385; RV64ZVE32F-NEXT:    bnez a4, .LBB106_14
13386; RV64ZVE32F-NEXT:  .LBB106_5: # %else11
13387; RV64ZVE32F-NEXT:    andi a4, a3, 32
13388; RV64ZVE32F-NEXT:    bnez a4, .LBB106_15
13389; RV64ZVE32F-NEXT:  .LBB106_6: # %else14
13390; RV64ZVE32F-NEXT:    andi a4, a3, 64
13391; RV64ZVE32F-NEXT:    bnez a4, .LBB106_16
13392; RV64ZVE32F-NEXT:  .LBB106_7: # %else17
13393; RV64ZVE32F-NEXT:    andi a3, a3, -128
13394; RV64ZVE32F-NEXT:    beqz a3, .LBB106_9
13395; RV64ZVE32F-NEXT:  .LBB106_8: # %cond.load19
13396; RV64ZVE32F-NEXT:    ld a2, 56(a2)
13397; RV64ZVE32F-NEXT:    slli a2, a2, 3
13398; RV64ZVE32F-NEXT:    add a1, a1, a2
13399; RV64ZVE32F-NEXT:    fld fa7, 0(a1)
13400; RV64ZVE32F-NEXT:  .LBB106_9: # %else20
13401; RV64ZVE32F-NEXT:    fsd fa0, 0(a0)
13402; RV64ZVE32F-NEXT:    fsd fa1, 8(a0)
13403; RV64ZVE32F-NEXT:    fsd fa2, 16(a0)
13404; RV64ZVE32F-NEXT:    fsd fa3, 24(a0)
13405; RV64ZVE32F-NEXT:    fsd fa4, 32(a0)
13406; RV64ZVE32F-NEXT:    fsd fa5, 40(a0)
13407; RV64ZVE32F-NEXT:    fsd fa6, 48(a0)
13408; RV64ZVE32F-NEXT:    fsd fa7, 56(a0)
13409; RV64ZVE32F-NEXT:    ret
13410; RV64ZVE32F-NEXT:  .LBB106_10: # %cond.load
13411; RV64ZVE32F-NEXT:    ld a4, 0(a2)
13412; RV64ZVE32F-NEXT:    slli a4, a4, 3
13413; RV64ZVE32F-NEXT:    add a4, a1, a4
13414; RV64ZVE32F-NEXT:    fld fa0, 0(a4)
13415; RV64ZVE32F-NEXT:    andi a4, a3, 2
13416; RV64ZVE32F-NEXT:    beqz a4, .LBB106_2
13417; RV64ZVE32F-NEXT:  .LBB106_11: # %cond.load1
13418; RV64ZVE32F-NEXT:    ld a4, 8(a2)
13419; RV64ZVE32F-NEXT:    slli a4, a4, 3
13420; RV64ZVE32F-NEXT:    add a4, a1, a4
13421; RV64ZVE32F-NEXT:    fld fa1, 0(a4)
13422; RV64ZVE32F-NEXT:    andi a4, a3, 4
13423; RV64ZVE32F-NEXT:    beqz a4, .LBB106_3
13424; RV64ZVE32F-NEXT:  .LBB106_12: # %cond.load4
13425; RV64ZVE32F-NEXT:    ld a4, 16(a2)
13426; RV64ZVE32F-NEXT:    slli a4, a4, 3
13427; RV64ZVE32F-NEXT:    add a4, a1, a4
13428; RV64ZVE32F-NEXT:    fld fa2, 0(a4)
13429; RV64ZVE32F-NEXT:    andi a4, a3, 8
13430; RV64ZVE32F-NEXT:    beqz a4, .LBB106_4
13431; RV64ZVE32F-NEXT:  .LBB106_13: # %cond.load7
13432; RV64ZVE32F-NEXT:    ld a4, 24(a2)
13433; RV64ZVE32F-NEXT:    slli a4, a4, 3
13434; RV64ZVE32F-NEXT:    add a4, a1, a4
13435; RV64ZVE32F-NEXT:    fld fa3, 0(a4)
13436; RV64ZVE32F-NEXT:    andi a4, a3, 16
13437; RV64ZVE32F-NEXT:    beqz a4, .LBB106_5
13438; RV64ZVE32F-NEXT:  .LBB106_14: # %cond.load10
13439; RV64ZVE32F-NEXT:    ld a4, 32(a2)
13440; RV64ZVE32F-NEXT:    slli a4, a4, 3
13441; RV64ZVE32F-NEXT:    add a4, a1, a4
13442; RV64ZVE32F-NEXT:    fld fa4, 0(a4)
13443; RV64ZVE32F-NEXT:    andi a4, a3, 32
13444; RV64ZVE32F-NEXT:    beqz a4, .LBB106_6
13445; RV64ZVE32F-NEXT:  .LBB106_15: # %cond.load13
13446; RV64ZVE32F-NEXT:    ld a4, 40(a2)
13447; RV64ZVE32F-NEXT:    slli a4, a4, 3
13448; RV64ZVE32F-NEXT:    add a4, a1, a4
13449; RV64ZVE32F-NEXT:    fld fa5, 0(a4)
13450; RV64ZVE32F-NEXT:    andi a4, a3, 64
13451; RV64ZVE32F-NEXT:    beqz a4, .LBB106_7
13452; RV64ZVE32F-NEXT:  .LBB106_16: # %cond.load16
13453; RV64ZVE32F-NEXT:    ld a4, 48(a2)
13454; RV64ZVE32F-NEXT:    slli a4, a4, 3
13455; RV64ZVE32F-NEXT:    add a4, a1, a4
13456; RV64ZVE32F-NEXT:    fld fa6, 0(a4)
13457; RV64ZVE32F-NEXT:    andi a3, a3, -128
13458; RV64ZVE32F-NEXT:    bnez a3, .LBB106_8
13459; RV64ZVE32F-NEXT:    j .LBB106_9
13460  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
13461  %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
13462  ret <8 x double> %v
13463}
13464
13465declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
13466
13467define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m, <16 x i8> %passthru) {
13468; RV32-LABEL: mgather_baseidx_v16i8:
13469; RV32:       # %bb.0:
13470; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
13471; RV32-NEXT:    vsext.vf4 v12, v8
13472; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
13473; RV32-NEXT:    vluxei32.v v9, (a0), v12, v0.t
13474; RV32-NEXT:    vmv.v.v v8, v9
13475; RV32-NEXT:    ret
13476;
13477; RV64V-LABEL: mgather_baseidx_v16i8:
13478; RV64V:       # %bb.0:
13479; RV64V-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
13480; RV64V-NEXT:    vsext.vf8 v16, v8
13481; RV64V-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
13482; RV64V-NEXT:    vluxei64.v v9, (a0), v16, v0.t
13483; RV64V-NEXT:    vmv.v.v v8, v9
13484; RV64V-NEXT:    ret
13485;
13486; RV64ZVE32F-LABEL: mgather_baseidx_v16i8:
13487; RV64ZVE32F:       # %bb.0:
13488; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
13489; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
13490; RV64ZVE32F-NEXT:    andi a2, a1, 1
13491; RV64ZVE32F-NEXT:    beqz a2, .LBB107_2
13492; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
13493; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, mf2, tu, ma
13494; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
13495; RV64ZVE32F-NEXT:    add a2, a0, a2
13496; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13497; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
13498; RV64ZVE32F-NEXT:  .LBB107_2: # %else
13499; RV64ZVE32F-NEXT:    andi a2, a1, 2
13500; RV64ZVE32F-NEXT:    beqz a2, .LBB107_4
13501; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
13502; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13503; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
13504; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
13505; RV64ZVE32F-NEXT:    add a2, a0, a2
13506; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13507; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
13508; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, m1, tu, ma
13509; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
13510; RV64ZVE32F-NEXT:  .LBB107_4: # %else2
13511; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
13512; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
13513; RV64ZVE32F-NEXT:    andi a2, a1, 4
13514; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13515; RV64ZVE32F-NEXT:    vslidedown.vi v11, v8, 2
13516; RV64ZVE32F-NEXT:    bnez a2, .LBB107_25
13517; RV64ZVE32F-NEXT:  # %bb.5: # %else5
13518; RV64ZVE32F-NEXT:    andi a2, a1, 8
13519; RV64ZVE32F-NEXT:    bnez a2, .LBB107_26
13520; RV64ZVE32F-NEXT:  .LBB107_6: # %else8
13521; RV64ZVE32F-NEXT:    andi a2, a1, 16
13522; RV64ZVE32F-NEXT:    beqz a2, .LBB107_8
13523; RV64ZVE32F-NEXT:  .LBB107_7: # %cond.load10
13524; RV64ZVE32F-NEXT:    vsetivli zero, 5, e8, m1, tu, ma
13525; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
13526; RV64ZVE32F-NEXT:    add a2, a0, a2
13527; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13528; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
13529; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 4
13530; RV64ZVE32F-NEXT:  .LBB107_8: # %else11
13531; RV64ZVE32F-NEXT:    andi a2, a1, 32
13532; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
13533; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 8
13534; RV64ZVE32F-NEXT:    beqz a2, .LBB107_10
13535; RV64ZVE32F-NEXT:  # %bb.9: # %cond.load13
13536; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13537; RV64ZVE32F-NEXT:    vslidedown.vi v11, v10, 1
13538; RV64ZVE32F-NEXT:    vmv.x.s a2, v11
13539; RV64ZVE32F-NEXT:    add a2, a0, a2
13540; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13541; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
13542; RV64ZVE32F-NEXT:    vsetivli zero, 6, e8, m1, tu, ma
13543; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 5
13544; RV64ZVE32F-NEXT:  .LBB107_10: # %else14
13545; RV64ZVE32F-NEXT:    andi a2, a1, 64
13546; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13547; RV64ZVE32F-NEXT:    vslidedown.vi v10, v10, 2
13548; RV64ZVE32F-NEXT:    bnez a2, .LBB107_27
13549; RV64ZVE32F-NEXT:  # %bb.11: # %else17
13550; RV64ZVE32F-NEXT:    andi a2, a1, 128
13551; RV64ZVE32F-NEXT:    bnez a2, .LBB107_28
13552; RV64ZVE32F-NEXT:  .LBB107_12: # %else20
13553; RV64ZVE32F-NEXT:    andi a2, a1, 256
13554; RV64ZVE32F-NEXT:    bnez a2, .LBB107_29
13555; RV64ZVE32F-NEXT:  .LBB107_13: # %else23
13556; RV64ZVE32F-NEXT:    andi a2, a1, 512
13557; RV64ZVE32F-NEXT:    beqz a2, .LBB107_15
13558; RV64ZVE32F-NEXT:  .LBB107_14: # %cond.load25
13559; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13560; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
13561; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
13562; RV64ZVE32F-NEXT:    add a2, a0, a2
13563; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13564; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
13565; RV64ZVE32F-NEXT:    vsetivli zero, 10, e8, m1, tu, ma
13566; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 9
13567; RV64ZVE32F-NEXT:  .LBB107_15: # %else26
13568; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
13569; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 4
13570; RV64ZVE32F-NEXT:    andi a2, a1, 1024
13571; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13572; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
13573; RV64ZVE32F-NEXT:    bnez a2, .LBB107_30
13574; RV64ZVE32F-NEXT:  # %bb.16: # %else29
13575; RV64ZVE32F-NEXT:    slli a2, a1, 52
13576; RV64ZVE32F-NEXT:    bltz a2, .LBB107_31
13577; RV64ZVE32F-NEXT:  .LBB107_17: # %else32
13578; RV64ZVE32F-NEXT:    slli a2, a1, 51
13579; RV64ZVE32F-NEXT:    bltz a2, .LBB107_32
13580; RV64ZVE32F-NEXT:  .LBB107_18: # %else35
13581; RV64ZVE32F-NEXT:    slli a2, a1, 50
13582; RV64ZVE32F-NEXT:    bgez a2, .LBB107_20
13583; RV64ZVE32F-NEXT:  .LBB107_19: # %cond.load37
13584; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13585; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 1
13586; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
13587; RV64ZVE32F-NEXT:    add a2, a0, a2
13588; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13589; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
13590; RV64ZVE32F-NEXT:    vsetivli zero, 14, e8, m1, tu, ma
13591; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 13
13592; RV64ZVE32F-NEXT:  .LBB107_20: # %else38
13593; RV64ZVE32F-NEXT:    slli a2, a1, 49
13594; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13595; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 2
13596; RV64ZVE32F-NEXT:    bgez a2, .LBB107_22
13597; RV64ZVE32F-NEXT:  # %bb.21: # %cond.load40
13598; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
13599; RV64ZVE32F-NEXT:    add a2, a0, a2
13600; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13601; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
13602; RV64ZVE32F-NEXT:    vsetivli zero, 15, e8, m1, tu, ma
13603; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 14
13604; RV64ZVE32F-NEXT:  .LBB107_22: # %else41
13605; RV64ZVE32F-NEXT:    lui a2, 1048568
13606; RV64ZVE32F-NEXT:    and a1, a1, a2
13607; RV64ZVE32F-NEXT:    beqz a1, .LBB107_24
13608; RV64ZVE32F-NEXT:  # %bb.23: # %cond.load43
13609; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13610; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
13611; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
13612; RV64ZVE32F-NEXT:    add a0, a0, a1
13613; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
13614; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
13615; RV64ZVE32F-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
13616; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 15
13617; RV64ZVE32F-NEXT:  .LBB107_24: # %else44
13618; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
13619; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
13620; RV64ZVE32F-NEXT:    ret
13621; RV64ZVE32F-NEXT:  .LBB107_25: # %cond.load4
13622; RV64ZVE32F-NEXT:    vmv.x.s a2, v11
13623; RV64ZVE32F-NEXT:    add a2, a0, a2
13624; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13625; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
13626; RV64ZVE32F-NEXT:    vsetivli zero, 3, e8, m1, tu, ma
13627; RV64ZVE32F-NEXT:    vslideup.vi v9, v12, 2
13628; RV64ZVE32F-NEXT:    andi a2, a1, 8
13629; RV64ZVE32F-NEXT:    beqz a2, .LBB107_6
13630; RV64ZVE32F-NEXT:  .LBB107_26: # %cond.load7
13631; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13632; RV64ZVE32F-NEXT:    vslidedown.vi v11, v11, 1
13633; RV64ZVE32F-NEXT:    vmv.x.s a2, v11
13634; RV64ZVE32F-NEXT:    add a2, a0, a2
13635; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13636; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
13637; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, m1, tu, ma
13638; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 3
13639; RV64ZVE32F-NEXT:    andi a2, a1, 16
13640; RV64ZVE32F-NEXT:    bnez a2, .LBB107_7
13641; RV64ZVE32F-NEXT:    j .LBB107_8
13642; RV64ZVE32F-NEXT:  .LBB107_27: # %cond.load16
13643; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
13644; RV64ZVE32F-NEXT:    add a2, a0, a2
13645; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13646; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
13647; RV64ZVE32F-NEXT:    vsetivli zero, 7, e8, m1, tu, ma
13648; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 6
13649; RV64ZVE32F-NEXT:    andi a2, a1, 128
13650; RV64ZVE32F-NEXT:    beqz a2, .LBB107_12
13651; RV64ZVE32F-NEXT:  .LBB107_28: # %cond.load19
13652; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13653; RV64ZVE32F-NEXT:    vslidedown.vi v10, v10, 1
13654; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
13655; RV64ZVE32F-NEXT:    add a2, a0, a2
13656; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13657; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
13658; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, m1, tu, ma
13659; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 7
13660; RV64ZVE32F-NEXT:    andi a2, a1, 256
13661; RV64ZVE32F-NEXT:    beqz a2, .LBB107_13
13662; RV64ZVE32F-NEXT:  .LBB107_29: # %cond.load22
13663; RV64ZVE32F-NEXT:    vsetivli zero, 9, e8, m1, tu, ma
13664; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
13665; RV64ZVE32F-NEXT:    add a2, a0, a2
13666; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13667; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
13668; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 8
13669; RV64ZVE32F-NEXT:    andi a2, a1, 512
13670; RV64ZVE32F-NEXT:    bnez a2, .LBB107_14
13671; RV64ZVE32F-NEXT:    j .LBB107_15
13672; RV64ZVE32F-NEXT:  .LBB107_30: # %cond.load28
13673; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
13674; RV64ZVE32F-NEXT:    add a2, a0, a2
13675; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13676; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
13677; RV64ZVE32F-NEXT:    vsetivli zero, 11, e8, m1, tu, ma
13678; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 10
13679; RV64ZVE32F-NEXT:    slli a2, a1, 52
13680; RV64ZVE32F-NEXT:    bgez a2, .LBB107_17
13681; RV64ZVE32F-NEXT:  .LBB107_31: # %cond.load31
13682; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13683; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
13684; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
13685; RV64ZVE32F-NEXT:    add a2, a0, a2
13686; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13687; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
13688; RV64ZVE32F-NEXT:    vsetivli zero, 12, e8, m1, tu, ma
13689; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 11
13690; RV64ZVE32F-NEXT:    slli a2, a1, 51
13691; RV64ZVE32F-NEXT:    bgez a2, .LBB107_18
13692; RV64ZVE32F-NEXT:  .LBB107_32: # %cond.load34
13693; RV64ZVE32F-NEXT:    vsetivli zero, 13, e8, m1, tu, ma
13694; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
13695; RV64ZVE32F-NEXT:    add a2, a0, a2
13696; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13697; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
13698; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 12
13699; RV64ZVE32F-NEXT:    slli a2, a1, 50
13700; RV64ZVE32F-NEXT:    bltz a2, .LBB107_19
13701; RV64ZVE32F-NEXT:    j .LBB107_20
13702  %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
13703  %v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru)
13704  ret <16 x i8> %v
13705}
13706
13707declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
13708
13709define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m, <32 x i8> %passthru) {
13710; RV32-LABEL: mgather_baseidx_v32i8:
13711; RV32:       # %bb.0:
13712; RV32-NEXT:    li a1, 32
13713; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
13714; RV32-NEXT:    vsext.vf4 v16, v8
13715; RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
13716; RV32-NEXT:    vluxei32.v v10, (a0), v16, v0.t
13717; RV32-NEXT:    vmv.v.v v8, v10
13718; RV32-NEXT:    ret
13719;
13720; RV64V-LABEL: mgather_baseidx_v32i8:
13721; RV64V:       # %bb.0:
13722; RV64V-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
13723; RV64V-NEXT:    vsext.vf8 v16, v8
13724; RV64V-NEXT:    vsetivli zero, 16, e8, m2, ta, ma
13725; RV64V-NEXT:    vslidedown.vi v12, v10, 16
13726; RV64V-NEXT:    vslidedown.vi v14, v8, 16
13727; RV64V-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13728; RV64V-NEXT:    vslidedown.vi v8, v0, 2
13729; RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
13730; RV64V-NEXT:    vluxei64.v v10, (a0), v16, v0.t
13731; RV64V-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
13732; RV64V-NEXT:    vsext.vf8 v16, v14
13733; RV64V-NEXT:    vmv1r.v v0, v8
13734; RV64V-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
13735; RV64V-NEXT:    vluxei64.v v12, (a0), v16, v0.t
13736; RV64V-NEXT:    li a0, 32
13737; RV64V-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
13738; RV64V-NEXT:    vslideup.vi v10, v12, 16
13739; RV64V-NEXT:    vmv.v.v v8, v10
13740; RV64V-NEXT:    ret
13741;
13742; RV64ZVE32F-LABEL: mgather_baseidx_v32i8:
13743; RV64ZVE32F:       # %bb.0:
13744; RV64ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
13745; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
13746; RV64ZVE32F-NEXT:    andi a2, a1, 1
13747; RV64ZVE32F-NEXT:    beqz a2, .LBB108_2
13748; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
13749; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, mf4, tu, ma
13750; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
13751; RV64ZVE32F-NEXT:    add a2, a0, a2
13752; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13753; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
13754; RV64ZVE32F-NEXT:  .LBB108_2: # %else
13755; RV64ZVE32F-NEXT:    andi a2, a1, 2
13756; RV64ZVE32F-NEXT:    beqz a2, .LBB108_4
13757; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
13758; RV64ZVE32F-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
13759; RV64ZVE32F-NEXT:    vslidedown.vi v12, v8, 1
13760; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
13761; RV64ZVE32F-NEXT:    add a2, a0, a2
13762; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13763; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
13764; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, m1, tu, ma
13765; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 1
13766; RV64ZVE32F-NEXT:  .LBB108_4: # %else2
13767; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
13768; RV64ZVE32F-NEXT:    vslidedown.vi v13, v8, 4
13769; RV64ZVE32F-NEXT:    andi a2, a1, 4
13770; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13771; RV64ZVE32F-NEXT:    vslidedown.vi v12, v8, 2
13772; RV64ZVE32F-NEXT:    bnez a2, .LBB108_49
13773; RV64ZVE32F-NEXT:  # %bb.5: # %else5
13774; RV64ZVE32F-NEXT:    andi a2, a1, 8
13775; RV64ZVE32F-NEXT:    bnez a2, .LBB108_50
13776; RV64ZVE32F-NEXT:  .LBB108_6: # %else8
13777; RV64ZVE32F-NEXT:    andi a2, a1, 16
13778; RV64ZVE32F-NEXT:    beqz a2, .LBB108_8
13779; RV64ZVE32F-NEXT:  .LBB108_7: # %cond.load10
13780; RV64ZVE32F-NEXT:    vsetivli zero, 5, e8, m1, tu, ma
13781; RV64ZVE32F-NEXT:    vmv.x.s a2, v13
13782; RV64ZVE32F-NEXT:    add a2, a0, a2
13783; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13784; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
13785; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
13786; RV64ZVE32F-NEXT:  .LBB108_8: # %else11
13787; RV64ZVE32F-NEXT:    andi a2, a1, 32
13788; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
13789; RV64ZVE32F-NEXT:    vslidedown.vi v12, v8, 8
13790; RV64ZVE32F-NEXT:    beqz a2, .LBB108_10
13791; RV64ZVE32F-NEXT:  # %bb.9: # %cond.load13
13792; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13793; RV64ZVE32F-NEXT:    vslidedown.vi v14, v13, 1
13794; RV64ZVE32F-NEXT:    vmv.x.s a2, v14
13795; RV64ZVE32F-NEXT:    add a2, a0, a2
13796; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13797; RV64ZVE32F-NEXT:    vmv.s.x v14, a2
13798; RV64ZVE32F-NEXT:    vsetivli zero, 6, e8, m1, tu, ma
13799; RV64ZVE32F-NEXT:    vslideup.vi v10, v14, 5
13800; RV64ZVE32F-NEXT:  .LBB108_10: # %else14
13801; RV64ZVE32F-NEXT:    andi a2, a1, 64
13802; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13803; RV64ZVE32F-NEXT:    vslidedown.vi v13, v13, 2
13804; RV64ZVE32F-NEXT:    bnez a2, .LBB108_51
13805; RV64ZVE32F-NEXT:  # %bb.11: # %else17
13806; RV64ZVE32F-NEXT:    andi a2, a1, 128
13807; RV64ZVE32F-NEXT:    bnez a2, .LBB108_52
13808; RV64ZVE32F-NEXT:  .LBB108_12: # %else20
13809; RV64ZVE32F-NEXT:    andi a2, a1, 256
13810; RV64ZVE32F-NEXT:    bnez a2, .LBB108_53
13811; RV64ZVE32F-NEXT:  .LBB108_13: # %else23
13812; RV64ZVE32F-NEXT:    andi a2, a1, 512
13813; RV64ZVE32F-NEXT:    beqz a2, .LBB108_15
13814; RV64ZVE32F-NEXT:  .LBB108_14: # %cond.load25
13815; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13816; RV64ZVE32F-NEXT:    vslidedown.vi v13, v12, 1
13817; RV64ZVE32F-NEXT:    vmv.x.s a2, v13
13818; RV64ZVE32F-NEXT:    add a2, a0, a2
13819; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13820; RV64ZVE32F-NEXT:    vmv.s.x v13, a2
13821; RV64ZVE32F-NEXT:    vsetivli zero, 10, e8, m1, tu, ma
13822; RV64ZVE32F-NEXT:    vslideup.vi v10, v13, 9
13823; RV64ZVE32F-NEXT:  .LBB108_15: # %else26
13824; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
13825; RV64ZVE32F-NEXT:    vslidedown.vi v13, v12, 4
13826; RV64ZVE32F-NEXT:    andi a2, a1, 1024
13827; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13828; RV64ZVE32F-NEXT:    vslidedown.vi v12, v12, 2
13829; RV64ZVE32F-NEXT:    beqz a2, .LBB108_17
13830; RV64ZVE32F-NEXT:  # %bb.16: # %cond.load28
13831; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
13832; RV64ZVE32F-NEXT:    add a2, a0, a2
13833; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13834; RV64ZVE32F-NEXT:    vmv.s.x v14, a2
13835; RV64ZVE32F-NEXT:    vsetivli zero, 11, e8, m1, tu, ma
13836; RV64ZVE32F-NEXT:    vslideup.vi v10, v14, 10
13837; RV64ZVE32F-NEXT:  .LBB108_17: # %else29
13838; RV64ZVE32F-NEXT:    slli a2, a1, 52
13839; RV64ZVE32F-NEXT:    bgez a2, .LBB108_19
13840; RV64ZVE32F-NEXT:  # %bb.18: # %cond.load31
13841; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13842; RV64ZVE32F-NEXT:    vslidedown.vi v12, v12, 1
13843; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
13844; RV64ZVE32F-NEXT:    add a2, a0, a2
13845; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13846; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
13847; RV64ZVE32F-NEXT:    vsetivli zero, 12, e8, m1, tu, ma
13848; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 11
13849; RV64ZVE32F-NEXT:  .LBB108_19: # %else32
13850; RV64ZVE32F-NEXT:    slli a2, a1, 51
13851; RV64ZVE32F-NEXT:    vsetivli zero, 16, e8, m2, ta, ma
13852; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 16
13853; RV64ZVE32F-NEXT:    bgez a2, .LBB108_21
13854; RV64ZVE32F-NEXT:  # %bb.20: # %cond.load34
13855; RV64ZVE32F-NEXT:    vmv.x.s a2, v13
13856; RV64ZVE32F-NEXT:    add a2, a0, a2
13857; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13858; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
13859; RV64ZVE32F-NEXT:    vsetivli zero, 13, e8, m1, tu, ma
13860; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 12
13861; RV64ZVE32F-NEXT:  .LBB108_21: # %else35
13862; RV64ZVE32F-NEXT:    slli a2, a1, 50
13863; RV64ZVE32F-NEXT:    bgez a2, .LBB108_23
13864; RV64ZVE32F-NEXT:  # %bb.22: # %cond.load37
13865; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13866; RV64ZVE32F-NEXT:    vslidedown.vi v9, v13, 1
13867; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
13868; RV64ZVE32F-NEXT:    add a2, a0, a2
13869; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13870; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
13871; RV64ZVE32F-NEXT:    vsetivli zero, 14, e8, m1, tu, ma
13872; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 13
13873; RV64ZVE32F-NEXT:  .LBB108_23: # %else38
13874; RV64ZVE32F-NEXT:    slli a2, a1, 49
13875; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13876; RV64ZVE32F-NEXT:    vslidedown.vi v9, v13, 2
13877; RV64ZVE32F-NEXT:    bltz a2, .LBB108_54
13878; RV64ZVE32F-NEXT:  # %bb.24: # %else41
13879; RV64ZVE32F-NEXT:    slli a2, a1, 48
13880; RV64ZVE32F-NEXT:    bltz a2, .LBB108_55
13881; RV64ZVE32F-NEXT:  .LBB108_25: # %else44
13882; RV64ZVE32F-NEXT:    slli a2, a1, 47
13883; RV64ZVE32F-NEXT:    bltz a2, .LBB108_56
13884; RV64ZVE32F-NEXT:  .LBB108_26: # %else47
13885; RV64ZVE32F-NEXT:    slli a2, a1, 46
13886; RV64ZVE32F-NEXT:    bgez a2, .LBB108_28
13887; RV64ZVE32F-NEXT:  .LBB108_27: # %cond.load49
13888; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13889; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
13890; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
13891; RV64ZVE32F-NEXT:    add a2, a0, a2
13892; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13893; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
13894; RV64ZVE32F-NEXT:    vsetivli zero, 18, e8, m2, tu, ma
13895; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 17
13896; RV64ZVE32F-NEXT:  .LBB108_28: # %else50
13897; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
13898; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
13899; RV64ZVE32F-NEXT:    slli a2, a1, 45
13900; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13901; RV64ZVE32F-NEXT:    vslidedown.vi v12, v8, 2
13902; RV64ZVE32F-NEXT:    bltz a2, .LBB108_57
13903; RV64ZVE32F-NEXT:  # %bb.29: # %else53
13904; RV64ZVE32F-NEXT:    slli a2, a1, 44
13905; RV64ZVE32F-NEXT:    bltz a2, .LBB108_58
13906; RV64ZVE32F-NEXT:  .LBB108_30: # %else56
13907; RV64ZVE32F-NEXT:    slli a2, a1, 43
13908; RV64ZVE32F-NEXT:    bgez a2, .LBB108_32
13909; RV64ZVE32F-NEXT:  .LBB108_31: # %cond.load58
13910; RV64ZVE32F-NEXT:    vsetivli zero, 21, e8, m2, tu, ma
13911; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
13912; RV64ZVE32F-NEXT:    add a2, a0, a2
13913; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13914; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
13915; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 20
13916; RV64ZVE32F-NEXT:  .LBB108_32: # %else59
13917; RV64ZVE32F-NEXT:    slli a2, a1, 42
13918; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
13919; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 8
13920; RV64ZVE32F-NEXT:    bgez a2, .LBB108_34
13921; RV64ZVE32F-NEXT:  # %bb.33: # %cond.load61
13922; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13923; RV64ZVE32F-NEXT:    vslidedown.vi v12, v9, 1
13924; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
13925; RV64ZVE32F-NEXT:    add a2, a0, a2
13926; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13927; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
13928; RV64ZVE32F-NEXT:    vsetivli zero, 22, e8, m2, tu, ma
13929; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 21
13930; RV64ZVE32F-NEXT:  .LBB108_34: # %else62
13931; RV64ZVE32F-NEXT:    slli a2, a1, 41
13932; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13933; RV64ZVE32F-NEXT:    vslidedown.vi v9, v9, 2
13934; RV64ZVE32F-NEXT:    bltz a2, .LBB108_59
13935; RV64ZVE32F-NEXT:  # %bb.35: # %else65
13936; RV64ZVE32F-NEXT:    slli a2, a1, 40
13937; RV64ZVE32F-NEXT:    bltz a2, .LBB108_60
13938; RV64ZVE32F-NEXT:  .LBB108_36: # %else68
13939; RV64ZVE32F-NEXT:    slli a2, a1, 39
13940; RV64ZVE32F-NEXT:    bltz a2, .LBB108_61
13941; RV64ZVE32F-NEXT:  .LBB108_37: # %else71
13942; RV64ZVE32F-NEXT:    slli a2, a1, 38
13943; RV64ZVE32F-NEXT:    bgez a2, .LBB108_39
13944; RV64ZVE32F-NEXT:  .LBB108_38: # %cond.load73
13945; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13946; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
13947; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
13948; RV64ZVE32F-NEXT:    add a2, a0, a2
13949; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13950; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
13951; RV64ZVE32F-NEXT:    vsetivli zero, 26, e8, m2, tu, ma
13952; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 25
13953; RV64ZVE32F-NEXT:  .LBB108_39: # %else74
13954; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
13955; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
13956; RV64ZVE32F-NEXT:    slli a2, a1, 37
13957; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13958; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
13959; RV64ZVE32F-NEXT:    bltz a2, .LBB108_62
13960; RV64ZVE32F-NEXT:  # %bb.40: # %else77
13961; RV64ZVE32F-NEXT:    slli a2, a1, 36
13962; RV64ZVE32F-NEXT:    bltz a2, .LBB108_63
13963; RV64ZVE32F-NEXT:  .LBB108_41: # %else80
13964; RV64ZVE32F-NEXT:    slli a2, a1, 35
13965; RV64ZVE32F-NEXT:    bltz a2, .LBB108_64
13966; RV64ZVE32F-NEXT:  .LBB108_42: # %else83
13967; RV64ZVE32F-NEXT:    slli a2, a1, 34
13968; RV64ZVE32F-NEXT:    bgez a2, .LBB108_44
13969; RV64ZVE32F-NEXT:  .LBB108_43: # %cond.load85
13970; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13971; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
13972; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
13973; RV64ZVE32F-NEXT:    add a2, a0, a2
13974; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13975; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
13976; RV64ZVE32F-NEXT:    vsetivli zero, 30, e8, m2, tu, ma
13977; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 29
13978; RV64ZVE32F-NEXT:  .LBB108_44: # %else86
13979; RV64ZVE32F-NEXT:    slli a2, a1, 33
13980; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
13981; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
13982; RV64ZVE32F-NEXT:    bgez a2, .LBB108_46
13983; RV64ZVE32F-NEXT:  # %bb.45: # %cond.load88
13984; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
13985; RV64ZVE32F-NEXT:    add a2, a0, a2
13986; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
13987; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
13988; RV64ZVE32F-NEXT:    vsetivli zero, 31, e8, m2, tu, ma
13989; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 30
13990; RV64ZVE32F-NEXT:  .LBB108_46: # %else89
13991; RV64ZVE32F-NEXT:    lui a2, 524288
13992; RV64ZVE32F-NEXT:    and a1, a1, a2
13993; RV64ZVE32F-NEXT:    beqz a1, .LBB108_48
13994; RV64ZVE32F-NEXT:  # %bb.47: # %cond.load91
13995; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
13996; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
13997; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
13998; RV64ZVE32F-NEXT:    add a0, a0, a1
13999; RV64ZVE32F-NEXT:    lbu a0, 0(a0)
14000; RV64ZVE32F-NEXT:    li a1, 32
14001; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
14002; RV64ZVE32F-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
14003; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 31
14004; RV64ZVE32F-NEXT:  .LBB108_48: # %else92
14005; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
14006; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
14007; RV64ZVE32F-NEXT:    ret
14008; RV64ZVE32F-NEXT:  .LBB108_49: # %cond.load4
14009; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
14010; RV64ZVE32F-NEXT:    add a2, a0, a2
14011; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14012; RV64ZVE32F-NEXT:    vmv.s.x v14, a2
14013; RV64ZVE32F-NEXT:    vsetivli zero, 3, e8, m1, tu, ma
14014; RV64ZVE32F-NEXT:    vslideup.vi v10, v14, 2
14015; RV64ZVE32F-NEXT:    andi a2, a1, 8
14016; RV64ZVE32F-NEXT:    beqz a2, .LBB108_6
14017; RV64ZVE32F-NEXT:  .LBB108_50: # %cond.load7
14018; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
14019; RV64ZVE32F-NEXT:    vslidedown.vi v12, v12, 1
14020; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
14021; RV64ZVE32F-NEXT:    add a2, a0, a2
14022; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14023; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
14024; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, m1, tu, ma
14025; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 3
14026; RV64ZVE32F-NEXT:    andi a2, a1, 16
14027; RV64ZVE32F-NEXT:    bnez a2, .LBB108_7
14028; RV64ZVE32F-NEXT:    j .LBB108_8
14029; RV64ZVE32F-NEXT:  .LBB108_51: # %cond.load16
14030; RV64ZVE32F-NEXT:    vmv.x.s a2, v13
14031; RV64ZVE32F-NEXT:    add a2, a0, a2
14032; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14033; RV64ZVE32F-NEXT:    vmv.s.x v14, a2
14034; RV64ZVE32F-NEXT:    vsetivli zero, 7, e8, m1, tu, ma
14035; RV64ZVE32F-NEXT:    vslideup.vi v10, v14, 6
14036; RV64ZVE32F-NEXT:    andi a2, a1, 128
14037; RV64ZVE32F-NEXT:    beqz a2, .LBB108_12
14038; RV64ZVE32F-NEXT:  .LBB108_52: # %cond.load19
14039; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
14040; RV64ZVE32F-NEXT:    vslidedown.vi v13, v13, 1
14041; RV64ZVE32F-NEXT:    vmv.x.s a2, v13
14042; RV64ZVE32F-NEXT:    add a2, a0, a2
14043; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14044; RV64ZVE32F-NEXT:    vmv.s.x v13, a2
14045; RV64ZVE32F-NEXT:    vsetivli zero, 8, e8, m1, tu, ma
14046; RV64ZVE32F-NEXT:    vslideup.vi v10, v13, 7
14047; RV64ZVE32F-NEXT:    andi a2, a1, 256
14048; RV64ZVE32F-NEXT:    beqz a2, .LBB108_13
14049; RV64ZVE32F-NEXT:  .LBB108_53: # %cond.load22
14050; RV64ZVE32F-NEXT:    vsetivli zero, 9, e8, m1, tu, ma
14051; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
14052; RV64ZVE32F-NEXT:    add a2, a0, a2
14053; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14054; RV64ZVE32F-NEXT:    vmv.s.x v13, a2
14055; RV64ZVE32F-NEXT:    vslideup.vi v10, v13, 8
14056; RV64ZVE32F-NEXT:    andi a2, a1, 512
14057; RV64ZVE32F-NEXT:    bnez a2, .LBB108_14
14058; RV64ZVE32F-NEXT:    j .LBB108_15
14059; RV64ZVE32F-NEXT:  .LBB108_54: # %cond.load40
14060; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
14061; RV64ZVE32F-NEXT:    add a2, a0, a2
14062; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14063; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
14064; RV64ZVE32F-NEXT:    vsetivli zero, 15, e8, m1, tu, ma
14065; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 14
14066; RV64ZVE32F-NEXT:    slli a2, a1, 48
14067; RV64ZVE32F-NEXT:    bgez a2, .LBB108_25
14068; RV64ZVE32F-NEXT:  .LBB108_55: # %cond.load43
14069; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
14070; RV64ZVE32F-NEXT:    vslidedown.vi v9, v9, 1
14071; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
14072; RV64ZVE32F-NEXT:    add a2, a0, a2
14073; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14074; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
14075; RV64ZVE32F-NEXT:    vsetivli zero, 16, e8, m1, tu, ma
14076; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 15
14077; RV64ZVE32F-NEXT:    slli a2, a1, 47
14078; RV64ZVE32F-NEXT:    bgez a2, .LBB108_26
14079; RV64ZVE32F-NEXT:  .LBB108_56: # %cond.load46
14080; RV64ZVE32F-NEXT:    vsetivli zero, 17, e8, m2, tu, ma
14081; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
14082; RV64ZVE32F-NEXT:    add a2, a0, a2
14083; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14084; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
14085; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 16
14086; RV64ZVE32F-NEXT:    slli a2, a1, 46
14087; RV64ZVE32F-NEXT:    bltz a2, .LBB108_27
14088; RV64ZVE32F-NEXT:    j .LBB108_28
14089; RV64ZVE32F-NEXT:  .LBB108_57: # %cond.load52
14090; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
14091; RV64ZVE32F-NEXT:    add a2, a0, a2
14092; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14093; RV64ZVE32F-NEXT:    vmv.s.x v14, a2
14094; RV64ZVE32F-NEXT:    vsetivli zero, 19, e8, m2, tu, ma
14095; RV64ZVE32F-NEXT:    vslideup.vi v10, v14, 18
14096; RV64ZVE32F-NEXT:    slli a2, a1, 44
14097; RV64ZVE32F-NEXT:    bgez a2, .LBB108_30
14098; RV64ZVE32F-NEXT:  .LBB108_58: # %cond.load55
14099; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
14100; RV64ZVE32F-NEXT:    vslidedown.vi v12, v12, 1
14101; RV64ZVE32F-NEXT:    vmv.x.s a2, v12
14102; RV64ZVE32F-NEXT:    add a2, a0, a2
14103; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14104; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
14105; RV64ZVE32F-NEXT:    vsetivli zero, 20, e8, m2, tu, ma
14106; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 19
14107; RV64ZVE32F-NEXT:    slli a2, a1, 43
14108; RV64ZVE32F-NEXT:    bltz a2, .LBB108_31
14109; RV64ZVE32F-NEXT:    j .LBB108_32
14110; RV64ZVE32F-NEXT:  .LBB108_59: # %cond.load64
14111; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
14112; RV64ZVE32F-NEXT:    add a2, a0, a2
14113; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14114; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
14115; RV64ZVE32F-NEXT:    vsetivli zero, 23, e8, m2, tu, ma
14116; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 22
14117; RV64ZVE32F-NEXT:    slli a2, a1, 40
14118; RV64ZVE32F-NEXT:    bgez a2, .LBB108_36
14119; RV64ZVE32F-NEXT:  .LBB108_60: # %cond.load67
14120; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
14121; RV64ZVE32F-NEXT:    vslidedown.vi v9, v9, 1
14122; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
14123; RV64ZVE32F-NEXT:    add a2, a0, a2
14124; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14125; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
14126; RV64ZVE32F-NEXT:    vsetivli zero, 24, e8, m2, tu, ma
14127; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 23
14128; RV64ZVE32F-NEXT:    slli a2, a1, 39
14129; RV64ZVE32F-NEXT:    bgez a2, .LBB108_37
14130; RV64ZVE32F-NEXT:  .LBB108_61: # %cond.load70
14131; RV64ZVE32F-NEXT:    vsetivli zero, 25, e8, m2, tu, ma
14132; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
14133; RV64ZVE32F-NEXT:    add a2, a0, a2
14134; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14135; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
14136; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 24
14137; RV64ZVE32F-NEXT:    slli a2, a1, 38
14138; RV64ZVE32F-NEXT:    bltz a2, .LBB108_38
14139; RV64ZVE32F-NEXT:    j .LBB108_39
14140; RV64ZVE32F-NEXT:  .LBB108_62: # %cond.load76
14141; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
14142; RV64ZVE32F-NEXT:    add a2, a0, a2
14143; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14144; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
14145; RV64ZVE32F-NEXT:    vsetivli zero, 27, e8, m2, tu, ma
14146; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 26
14147; RV64ZVE32F-NEXT:    slli a2, a1, 36
14148; RV64ZVE32F-NEXT:    bgez a2, .LBB108_41
14149; RV64ZVE32F-NEXT:  .LBB108_63: # %cond.load79
14150; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
14151; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
14152; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
14153; RV64ZVE32F-NEXT:    add a2, a0, a2
14154; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14155; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
14156; RV64ZVE32F-NEXT:    vsetivli zero, 28, e8, m2, tu, ma
14157; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 27
14158; RV64ZVE32F-NEXT:    slli a2, a1, 35
14159; RV64ZVE32F-NEXT:    bgez a2, .LBB108_42
14160; RV64ZVE32F-NEXT:  .LBB108_64: # %cond.load82
14161; RV64ZVE32F-NEXT:    vsetivli zero, 29, e8, m2, tu, ma
14162; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
14163; RV64ZVE32F-NEXT:    add a2, a0, a2
14164; RV64ZVE32F-NEXT:    lbu a2, 0(a2)
14165; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
14166; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 28
14167; RV64ZVE32F-NEXT:    slli a2, a1, 34
14168; RV64ZVE32F-NEXT:    bltz a2, .LBB108_43
14169; RV64ZVE32F-NEXT:    j .LBB108_44
14170  %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
14171  %v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
14172  ret <32 x i8> %v
14173}
14174
14175
14176define <4 x i32> @mgather_broadcast_load_unmasked(ptr %base) {
14177; CHECK-LABEL: mgather_broadcast_load_unmasked:
14178; CHECK:       # %bb.0:
14179; CHECK-NEXT:    lw a0, 0(a0)
14180; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14181; CHECK-NEXT:    vmv.v.x v8, a0
14182; CHECK-NEXT:    ret
14183  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
14184  %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14185  ret <4 x i32> %v
14186}
14187
14188; Same as previous, but use an explicit splat instead of splat-via-gep
14189define <4 x i32> @mgather_broadcast_load_unmasked2(ptr %base) {
14190; CHECK-LABEL: mgather_broadcast_load_unmasked2:
14191; CHECK:       # %bb.0:
14192; CHECK-NEXT:    lw a0, 0(a0)
14193; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14194; CHECK-NEXT:    vmv.v.x v8, a0
14195; CHECK-NEXT:    ret
14196  %ptrhead = insertelement <4 x ptr> poison, ptr %base, i32 0
14197  %ptrs = shufflevector <4 x ptr> %ptrhead, <4 x ptr> poison, <4 x i32> zeroinitializer
14198  %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14199  ret <4 x i32> %v
14200}
14201
14202define <4 x i32> @mgather_broadcast_load_masked(ptr %base, <4 x i1> %m) {
14203; CHECK-LABEL: mgather_broadcast_load_masked:
14204; CHECK:       # %bb.0:
14205; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14206; CHECK-NEXT:    vlse32.v v8, (a0), zero, v0.t
14207; CHECK-NEXT:    ret
14208  %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
14209  %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> poison)
14210  ret <4 x i32> %v
14211}
14212
14213define <4 x i32> @mgather_unit_stride_load(ptr %base) {
14214; CHECK-LABEL: mgather_unit_stride_load:
14215; CHECK:       # %bb.0:
14216; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14217; CHECK-NEXT:    vle32.v v8, (a0)
14218; CHECK-NEXT:    ret
14219  %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32>  <i32 0, i32 1, i32 2, i32 3>
14220  %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14221  ret <4 x i32> %v
14222}
14223
14224define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) {
14225; CHECK-LABEL: mgather_unit_stride_load_with_offset:
14226; CHECK:       # %bb.0:
14227; CHECK-NEXT:    addi a0, a0, 16
14228; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14229; CHECK-NEXT:    vle32.v v8, (a0)
14230; CHECK-NEXT:    ret
14231  %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32>  <i32 4, i32 5, i32 6, i32 7>
14232  %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14233  ret <4 x i32> %v
14234}
14235
14236define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) {
14237; CHECK-LABEL: mgather_unit_stride_load_narrow_idx:
14238; CHECK:       # %bb.0:
14239; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14240; CHECK-NEXT:    vle32.v v8, (a0)
14241; CHECK-NEXT:    ret
14242  %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8>  <i8 0, i8 1, i8 2, i8 3>
14243  %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14244  ret <4 x i32> %v
14245}
14246
14247define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) {
14248; CHECK-LABEL: mgather_unit_stride_load_wide_idx:
14249; CHECK:       # %bb.0:
14250; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14251; CHECK-NEXT:    vle32.v v8, (a0)
14252; CHECK-NEXT:    ret
14253  %ptrs = getelementptr inbounds i32, ptr %base, <4 x i128>  <i128 0, i128 1, i128 2, i128 3>
14254  %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14255  ret <4 x i32> %v
14256}
14257
14258; This looks like a strided load (at i8), but isn't at index type.
14259define <4 x i32> @mgather_narrow_edge_case(ptr %base) {
14260; RV32-LABEL: mgather_narrow_edge_case:
14261; RV32:       # %bb.0:
14262; RV32-NEXT:    li a1, -512
14263; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14264; RV32-NEXT:    vmv.v.i v0, 5
14265; RV32-NEXT:    vmv.v.x v8, a1
14266; RV32-NEXT:    vmerge.vim v8, v8, 0, v0
14267; RV32-NEXT:    vluxei32.v v8, (a0), v8
14268; RV32-NEXT:    ret
14269;
14270; RV64V-LABEL: mgather_narrow_edge_case:
14271; RV64V:       # %bb.0:
14272; RV64V-NEXT:    li a1, -512
14273; RV64V-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
14274; RV64V-NEXT:    vmv.v.i v0, 5
14275; RV64V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
14276; RV64V-NEXT:    vmv.v.x v8, a1
14277; RV64V-NEXT:    vmerge.vim v10, v8, 0, v0
14278; RV64V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
14279; RV64V-NEXT:    vluxei64.v v8, (a0), v10
14280; RV64V-NEXT:    ret
14281;
14282; RV64ZVE32F-LABEL: mgather_narrow_edge_case:
14283; RV64ZVE32F:       # %bb.0:
14284; RV64ZVE32F-NEXT:    lw a1, -512(a0)
14285; RV64ZVE32F-NEXT:    lw a0, 0(a0)
14286; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14287; RV64ZVE32F-NEXT:    vmv.v.i v0, 5
14288; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
14289; RV64ZVE32F-NEXT:    vmerge.vxm v8, v8, a0, v0
14290; RV64ZVE32F-NEXT:    ret
14291  %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8>  <i8 0, i8 128, i8 0, i8 128>
14292  %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14293  ret <4 x i32> %v
14294}
14295
14296define <8 x i16> @mgather_strided_unaligned(ptr %base) {
14297; RV32-LABEL: mgather_strided_unaligned:
14298; RV32:       # %bb.0:
14299; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
14300; RV32-NEXT:    vid.v v8
14301; RV32-NEXT:    vsll.vi v8, v8, 2
14302; RV32-NEXT:    vadd.vx v8, v8, a0
14303; RV32-NEXT:    vmv.x.s a0, v8
14304; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
14305; RV32-NEXT:    vslidedown.vi v10, v8, 1
14306; RV32-NEXT:    vslidedown.vi v11, v8, 2
14307; RV32-NEXT:    vmv.x.s a1, v10
14308; RV32-NEXT:    vslidedown.vi v10, v8, 3
14309; RV32-NEXT:    vmv.x.s a2, v11
14310; RV32-NEXT:    vmv.x.s a3, v10
14311; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
14312; RV32-NEXT:    vslidedown.vi v10, v8, 4
14313; RV32-NEXT:    vmv.x.s a4, v10
14314; RV32-NEXT:    vslidedown.vi v10, v8, 5
14315; RV32-NEXT:    vmv.x.s a5, v10
14316; RV32-NEXT:    vslidedown.vi v10, v8, 6
14317; RV32-NEXT:    vslidedown.vi v8, v8, 7
14318; RV32-NEXT:    lbu a6, 0(a0)
14319; RV32-NEXT:    lbu a0, 1(a0)
14320; RV32-NEXT:    vmv.x.s a7, v10
14321; RV32-NEXT:    vmv.x.s t0, v8
14322; RV32-NEXT:    lbu t1, 0(a1)
14323; RV32-NEXT:    lbu a1, 1(a1)
14324; RV32-NEXT:    lbu t2, 0(a2)
14325; RV32-NEXT:    lbu a2, 1(a2)
14326; RV32-NEXT:    slli a0, a0, 8
14327; RV32-NEXT:    or a0, a0, a6
14328; RV32-NEXT:    lbu a6, 0(a3)
14329; RV32-NEXT:    lbu a3, 1(a3)
14330; RV32-NEXT:    slli a1, a1, 8
14331; RV32-NEXT:    or a1, a1, t1
14332; RV32-NEXT:    lbu t1, 0(a4)
14333; RV32-NEXT:    lbu a4, 1(a4)
14334; RV32-NEXT:    slli a2, a2, 8
14335; RV32-NEXT:    or a2, a2, t2
14336; RV32-NEXT:    lbu t2, 0(a5)
14337; RV32-NEXT:    lbu a5, 1(a5)
14338; RV32-NEXT:    slli a3, a3, 8
14339; RV32-NEXT:    or a3, a3, a6
14340; RV32-NEXT:    lbu a6, 0(a7)
14341; RV32-NEXT:    lbu a7, 1(a7)
14342; RV32-NEXT:    slli a4, a4, 8
14343; RV32-NEXT:    or a4, a4, t1
14344; RV32-NEXT:    lbu t1, 0(t0)
14345; RV32-NEXT:    lbu t0, 1(t0)
14346; RV32-NEXT:    slli a5, a5, 8
14347; RV32-NEXT:    or a5, a5, t2
14348; RV32-NEXT:    slli a7, a7, 8
14349; RV32-NEXT:    or a6, a7, a6
14350; RV32-NEXT:    slli t0, t0, 8
14351; RV32-NEXT:    or a7, t0, t1
14352; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14353; RV32-NEXT:    vmv.v.x v8, a0
14354; RV32-NEXT:    vslide1down.vx v8, v8, a1
14355; RV32-NEXT:    vmv.v.x v9, a4
14356; RV32-NEXT:    vslide1down.vx v8, v8, a2
14357; RV32-NEXT:    vslide1down.vx v9, v9, a5
14358; RV32-NEXT:    vslide1down.vx v10, v8, a3
14359; RV32-NEXT:    vslide1down.vx v8, v9, a6
14360; RV32-NEXT:    vmv.v.i v0, 15
14361; RV32-NEXT:    vslide1down.vx v8, v8, a7
14362; RV32-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14363; RV32-NEXT:    ret
14364;
14365; RV64V-LABEL: mgather_strided_unaligned:
14366; RV64V:       # %bb.0:
14367; RV64V-NEXT:    addi sp, sp, -128
14368; RV64V-NEXT:    .cfi_def_cfa_offset 128
14369; RV64V-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
14370; RV64V-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
14371; RV64V-NEXT:    .cfi_offset ra, -8
14372; RV64V-NEXT:    .cfi_offset s0, -16
14373; RV64V-NEXT:    addi s0, sp, 128
14374; RV64V-NEXT:    .cfi_def_cfa s0, 0
14375; RV64V-NEXT:    andi sp, sp, -64
14376; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
14377; RV64V-NEXT:    vid.v v8
14378; RV64V-NEXT:    mv a1, sp
14379; RV64V-NEXT:    vsll.vi v8, v8, 2
14380; RV64V-NEXT:    vadd.vx v8, v8, a0
14381; RV64V-NEXT:    vmv.x.s a0, v8
14382; RV64V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
14383; RV64V-NEXT:    vslidedown.vi v12, v8, 1
14384; RV64V-NEXT:    vmv.x.s a2, v12
14385; RV64V-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
14386; RV64V-NEXT:    vslidedown.vi v12, v8, 2
14387; RV64V-NEXT:    vmv.x.s a3, v12
14388; RV64V-NEXT:    vslidedown.vi v12, v8, 3
14389; RV64V-NEXT:    lbu a4, 0(a0)
14390; RV64V-NEXT:    lbu a0, 1(a0)
14391; RV64V-NEXT:    vmv.x.s a5, v12
14392; RV64V-NEXT:    lbu a6, 0(a2)
14393; RV64V-NEXT:    lbu a2, 1(a2)
14394; RV64V-NEXT:    lbu a7, 0(a3)
14395; RV64V-NEXT:    lbu a3, 1(a3)
14396; RV64V-NEXT:    lbu t0, 0(a5)
14397; RV64V-NEXT:    lbu a5, 1(a5)
14398; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14399; RV64V-NEXT:    vse64.v v8, (a1)
14400; RV64V-NEXT:    slli a0, a0, 8
14401; RV64V-NEXT:    or a0, a0, a4
14402; RV64V-NEXT:    slli a2, a2, 8
14403; RV64V-NEXT:    slli a3, a3, 8
14404; RV64V-NEXT:    or a1, a2, a6
14405; RV64V-NEXT:    or a2, a3, a7
14406; RV64V-NEXT:    ld a3, 32(sp)
14407; RV64V-NEXT:    ld a4, 40(sp)
14408; RV64V-NEXT:    ld a6, 48(sp)
14409; RV64V-NEXT:    ld a7, 56(sp)
14410; RV64V-NEXT:    slli a5, a5, 8
14411; RV64V-NEXT:    or a5, a5, t0
14412; RV64V-NEXT:    lbu t0, 0(a3)
14413; RV64V-NEXT:    lbu a3, 1(a3)
14414; RV64V-NEXT:    vmv.v.x v8, a0
14415; RV64V-NEXT:    lbu a0, 0(a4)
14416; RV64V-NEXT:    lbu a4, 1(a4)
14417; RV64V-NEXT:    vslide1down.vx v8, v8, a1
14418; RV64V-NEXT:    lbu a1, 0(a6)
14419; RV64V-NEXT:    lbu a6, 1(a6)
14420; RV64V-NEXT:    vslide1down.vx v8, v8, a2
14421; RV64V-NEXT:    lbu a2, 0(a7)
14422; RV64V-NEXT:    lbu a7, 1(a7)
14423; RV64V-NEXT:    vslide1down.vx v9, v8, a5
14424; RV64V-NEXT:    slli a3, a3, 8
14425; RV64V-NEXT:    slli a4, a4, 8
14426; RV64V-NEXT:    slli a6, a6, 8
14427; RV64V-NEXT:    slli a7, a7, 8
14428; RV64V-NEXT:    or a3, a3, t0
14429; RV64V-NEXT:    or a0, a4, a0
14430; RV64V-NEXT:    or a1, a6, a1
14431; RV64V-NEXT:    or a2, a7, a2
14432; RV64V-NEXT:    vmv.v.x v8, a3
14433; RV64V-NEXT:    vslide1down.vx v8, v8, a0
14434; RV64V-NEXT:    vslide1down.vx v8, v8, a1
14435; RV64V-NEXT:    vmv.v.i v0, 15
14436; RV64V-NEXT:    vslide1down.vx v8, v8, a2
14437; RV64V-NEXT:    vslidedown.vi v8, v9, 4, v0.t
14438; RV64V-NEXT:    addi sp, s0, -128
14439; RV64V-NEXT:    .cfi_def_cfa sp, 128
14440; RV64V-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
14441; RV64V-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
14442; RV64V-NEXT:    .cfi_restore ra
14443; RV64V-NEXT:    .cfi_restore s0
14444; RV64V-NEXT:    addi sp, sp, 128
14445; RV64V-NEXT:    .cfi_def_cfa_offset 0
14446; RV64V-NEXT:    ret
14447;
14448; RV64ZVE32F-LABEL: mgather_strided_unaligned:
14449; RV64ZVE32F:       # %bb.0:
14450; RV64ZVE32F-NEXT:    lbu a1, 0(a0)
14451; RV64ZVE32F-NEXT:    lbu a2, 1(a0)
14452; RV64ZVE32F-NEXT:    lbu a3, 4(a0)
14453; RV64ZVE32F-NEXT:    lbu a4, 5(a0)
14454; RV64ZVE32F-NEXT:    lbu a5, 8(a0)
14455; RV64ZVE32F-NEXT:    lbu a6, 9(a0)
14456; RV64ZVE32F-NEXT:    lbu a7, 12(a0)
14457; RV64ZVE32F-NEXT:    lbu t0, 13(a0)
14458; RV64ZVE32F-NEXT:    slli a2, a2, 8
14459; RV64ZVE32F-NEXT:    slli a4, a4, 8
14460; RV64ZVE32F-NEXT:    or a1, a2, a1
14461; RV64ZVE32F-NEXT:    or a3, a4, a3
14462; RV64ZVE32F-NEXT:    lbu a2, 16(a0)
14463; RV64ZVE32F-NEXT:    lbu a4, 17(a0)
14464; RV64ZVE32F-NEXT:    lbu t1, 20(a0)
14465; RV64ZVE32F-NEXT:    lbu t2, 21(a0)
14466; RV64ZVE32F-NEXT:    slli a6, a6, 8
14467; RV64ZVE32F-NEXT:    or a5, a6, a5
14468; RV64ZVE32F-NEXT:    slli t0, t0, 8
14469; RV64ZVE32F-NEXT:    slli a4, a4, 8
14470; RV64ZVE32F-NEXT:    slli t2, t2, 8
14471; RV64ZVE32F-NEXT:    or a6, t0, a7
14472; RV64ZVE32F-NEXT:    or a2, a4, a2
14473; RV64ZVE32F-NEXT:    lbu a4, 24(a0)
14474; RV64ZVE32F-NEXT:    lbu a7, 25(a0)
14475; RV64ZVE32F-NEXT:    or t0, t2, t1
14476; RV64ZVE32F-NEXT:    lbu t1, 28(a0)
14477; RV64ZVE32F-NEXT:    lbu a0, 29(a0)
14478; RV64ZVE32F-NEXT:    slli a7, a7, 8
14479; RV64ZVE32F-NEXT:    or a4, a7, a4
14480; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14481; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
14482; RV64ZVE32F-NEXT:    slli a0, a0, 8
14483; RV64ZVE32F-NEXT:    or a0, a0, t1
14484; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
14485; RV64ZVE32F-NEXT:    vmv.v.x v9, a2
14486; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
14487; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, t0
14488; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a5
14489; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a4
14490; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a6
14491; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a0
14492; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14493; RV64ZVE32F-NEXT:    ret
14494  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32>  <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
14495  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> splat (i1 true), <8 x i16> poison)
14496  ret <8 x i16> %v
14497}
14498
14499; TODO: Recognize as strided load with SEW=32
14500define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
14501; RV32-LABEL: mgather_strided_2xSEW:
14502; RV32:       # %bb.0:
14503; RV32-NEXT:    li a1, 8
14504; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14505; RV32-NEXT:    vlse32.v v8, (a0), a1
14506; RV32-NEXT:    ret
14507;
14508; RV64V-LABEL: mgather_strided_2xSEW:
14509; RV64V:       # %bb.0:
14510; RV64V-NEXT:    li a1, 8
14511; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14512; RV64V-NEXT:    vlse32.v v8, (a0), a1
14513; RV64V-NEXT:    ret
14514;
14515; RV64ZVE32F-LABEL: mgather_strided_2xSEW:
14516; RV64ZVE32F:       # %bb.0:
14517; RV64ZVE32F-NEXT:    lh a1, 0(a0)
14518; RV64ZVE32F-NEXT:    lh a2, 2(a0)
14519; RV64ZVE32F-NEXT:    lh a3, 8(a0)
14520; RV64ZVE32F-NEXT:    lh a4, 10(a0)
14521; RV64ZVE32F-NEXT:    lh a5, 16(a0)
14522; RV64ZVE32F-NEXT:    lh a6, 18(a0)
14523; RV64ZVE32F-NEXT:    lh a7, 24(a0)
14524; RV64ZVE32F-NEXT:    lh a0, 26(a0)
14525; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14526; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
14527; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
14528; RV64ZVE32F-NEXT:    vmv.v.x v9, a5
14529; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
14530; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a6
14531; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
14532; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a7
14533; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a4
14534; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a0
14535; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14536; RV64ZVE32F-NEXT:    ret
14537  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32>  <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
14538  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14539  ret <8 x i16> %v
14540}
14541
14542; TODO: Recognize as strided load with SEW=32
14543define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
14544; RV32-LABEL: mgather_strided_2xSEW_with_offset:
14545; RV32:       # %bb.0:
14546; RV32-NEXT:    addi a0, a0, 4
14547; RV32-NEXT:    li a1, 8
14548; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14549; RV32-NEXT:    vlse32.v v8, (a0), a1
14550; RV32-NEXT:    ret
14551;
14552; RV64V-LABEL: mgather_strided_2xSEW_with_offset:
14553; RV64V:       # %bb.0:
14554; RV64V-NEXT:    addi a0, a0, 4
14555; RV64V-NEXT:    li a1, 8
14556; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14557; RV64V-NEXT:    vlse32.v v8, (a0), a1
14558; RV64V-NEXT:    ret
14559;
14560; RV64ZVE32F-LABEL: mgather_strided_2xSEW_with_offset:
14561; RV64ZVE32F:       # %bb.0:
14562; RV64ZVE32F-NEXT:    lh a1, 4(a0)
14563; RV64ZVE32F-NEXT:    lh a2, 6(a0)
14564; RV64ZVE32F-NEXT:    lh a3, 12(a0)
14565; RV64ZVE32F-NEXT:    lh a4, 14(a0)
14566; RV64ZVE32F-NEXT:    lh a5, 20(a0)
14567; RV64ZVE32F-NEXT:    lh a6, 22(a0)
14568; RV64ZVE32F-NEXT:    lh a7, 28(a0)
14569; RV64ZVE32F-NEXT:    lh a0, 30(a0)
14570; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14571; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
14572; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
14573; RV64ZVE32F-NEXT:    vmv.v.x v9, a5
14574; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
14575; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a6
14576; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
14577; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a7
14578; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a4
14579; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a0
14580; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14581; RV64ZVE32F-NEXT:    ret
14582  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64>  <i64 2, i64 3, i64 6, i64 7, i64 10, i64 11, i64 14, i64 15>
14583  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14584  ret <8 x i16> %v
14585}
14586
14587; TODO: Recognize as strided load with SEW=32
14588define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
14589; RV32-LABEL: mgather_reverse_unit_strided_2xSEW:
14590; RV32:       # %bb.0:
14591; RV32-NEXT:    addi a0, a0, 28
14592; RV32-NEXT:    li a1, -4
14593; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14594; RV32-NEXT:    vlse32.v v8, (a0), a1
14595; RV32-NEXT:    ret
14596;
14597; RV64V-LABEL: mgather_reverse_unit_strided_2xSEW:
14598; RV64V:       # %bb.0:
14599; RV64V-NEXT:    addi a0, a0, 28
14600; RV64V-NEXT:    li a1, -4
14601; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14602; RV64V-NEXT:    vlse32.v v8, (a0), a1
14603; RV64V-NEXT:    ret
14604;
14605; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW:
14606; RV64ZVE32F:       # %bb.0:
14607; RV64ZVE32F-NEXT:    lh a1, 24(a0)
14608; RV64ZVE32F-NEXT:    lh a2, 26(a0)
14609; RV64ZVE32F-NEXT:    lh a3, 28(a0)
14610; RV64ZVE32F-NEXT:    lh a4, 30(a0)
14611; RV64ZVE32F-NEXT:    lh a5, 16(a0)
14612; RV64ZVE32F-NEXT:    lh a6, 18(a0)
14613; RV64ZVE32F-NEXT:    lh a7, 20(a0)
14614; RV64ZVE32F-NEXT:    lh a0, 22(a0)
14615; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14616; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
14617; RV64ZVE32F-NEXT:    vmv.v.x v8, a3
14618; RV64ZVE32F-NEXT:    vmv.v.x v9, a7
14619; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
14620; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a0
14621; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
14622; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a5
14623; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a2
14624; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a6
14625; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14626; RV64ZVE32F-NEXT:    ret
14627  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64>  <i64 14, i64 15, i64 12, i64 13, i64 10, i64 11, i64 8, i64 9>
14628  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14629  ret <8 x i16> %v
14630}
14631
14632; TODO: Recognize as strided load with SEW=32
14633define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
14634; RV32-LABEL: mgather_reverse_strided_2xSEW:
14635; RV32:       # %bb.0:
14636; RV32-NEXT:    addi a0, a0, 28
14637; RV32-NEXT:    li a1, -8
14638; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14639; RV32-NEXT:    vlse32.v v8, (a0), a1
14640; RV32-NEXT:    ret
14641;
14642; RV64V-LABEL: mgather_reverse_strided_2xSEW:
14643; RV64V:       # %bb.0:
14644; RV64V-NEXT:    addi a0, a0, 28
14645; RV64V-NEXT:    li a1, -8
14646; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14647; RV64V-NEXT:    vlse32.v v8, (a0), a1
14648; RV64V-NEXT:    ret
14649;
14650; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW:
14651; RV64ZVE32F:       # %bb.0:
14652; RV64ZVE32F-NEXT:    lh a1, 20(a0)
14653; RV64ZVE32F-NEXT:    lh a2, 22(a0)
14654; RV64ZVE32F-NEXT:    lh a3, 28(a0)
14655; RV64ZVE32F-NEXT:    lh a4, 30(a0)
14656; RV64ZVE32F-NEXT:    lh a5, 4(a0)
14657; RV64ZVE32F-NEXT:    lh a6, 6(a0)
14658; RV64ZVE32F-NEXT:    lh a7, 12(a0)
14659; RV64ZVE32F-NEXT:    lh a0, 14(a0)
14660; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14661; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
14662; RV64ZVE32F-NEXT:    vmv.v.x v8, a3
14663; RV64ZVE32F-NEXT:    vmv.v.x v9, a7
14664; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
14665; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a0
14666; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
14667; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a5
14668; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a2
14669; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a6
14670; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14671; RV64ZVE32F-NEXT:    ret
14672  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64>  <i64 14, i64 15, i64 10, i64 11, i64 6, i64 7, i64 2, i64 3>
14673  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14674  ret <8 x i16> %v
14675}
14676
14677define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
14678; RV32-LABEL: mgather_gather_2xSEW:
14679; RV32:       # %bb.0:
14680; RV32-NEXT:    lui a1, 16513
14681; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14682; RV32-NEXT:    vmv.s.x v9, a1
14683; RV32-NEXT:    vluxei8.v v8, (a0), v9
14684; RV32-NEXT:    ret
14685;
14686; RV64V-LABEL: mgather_gather_2xSEW:
14687; RV64V:       # %bb.0:
14688; RV64V-NEXT:    lui a1, 16513
14689; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14690; RV64V-NEXT:    vmv.s.x v9, a1
14691; RV64V-NEXT:    vluxei8.v v8, (a0), v9
14692; RV64V-NEXT:    ret
14693;
14694; RV64ZVE32F-LABEL: mgather_gather_2xSEW:
14695; RV64ZVE32F:       # %bb.0:
14696; RV64ZVE32F-NEXT:    lh a1, 8(a0)
14697; RV64ZVE32F-NEXT:    lh a2, 10(a0)
14698; RV64ZVE32F-NEXT:    lh a3, 16(a0)
14699; RV64ZVE32F-NEXT:    lh a4, 18(a0)
14700; RV64ZVE32F-NEXT:    lh a5, 0(a0)
14701; RV64ZVE32F-NEXT:    lh a6, 2(a0)
14702; RV64ZVE32F-NEXT:    lh a7, 4(a0)
14703; RV64ZVE32F-NEXT:    lh a0, 6(a0)
14704; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14705; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
14706; RV64ZVE32F-NEXT:    vmv.v.x v8, a5
14707; RV64ZVE32F-NEXT:    vmv.v.x v9, a1
14708; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a6
14709; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a2
14710; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
14711; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a7
14712; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a4
14713; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a0
14714; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14715; RV64ZVE32F-NEXT:    ret
14716  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32>  <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 2, i32 3>
14717  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14718  ret <8 x i16> %v
14719}
14720
14721; Base pointer isn't sufficiently aligned to form gather with e32
14722define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
14723; RV32-LABEL: mgather_gather_2xSEW_unaligned:
14724; RV32:       # %bb.0:
14725; RV32-NEXT:    lui a1, %hi(.LCPI123_0)
14726; RV32-NEXT:    addi a1, a1, %lo(.LCPI123_0)
14727; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
14728; RV32-NEXT:    vle8.v v9, (a1)
14729; RV32-NEXT:    vluxei8.v v8, (a0), v9
14730; RV32-NEXT:    ret
14731;
14732; RV64V-LABEL: mgather_gather_2xSEW_unaligned:
14733; RV64V:       # %bb.0:
14734; RV64V-NEXT:    lui a1, %hi(.LCPI123_0)
14735; RV64V-NEXT:    addi a1, a1, %lo(.LCPI123_0)
14736; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
14737; RV64V-NEXT:    vle8.v v9, (a1)
14738; RV64V-NEXT:    vluxei8.v v8, (a0), v9
14739; RV64V-NEXT:    ret
14740;
14741; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned:
14742; RV64ZVE32F:       # %bb.0:
14743; RV64ZVE32F-NEXT:    lh a1, 8(a0)
14744; RV64ZVE32F-NEXT:    lh a2, 10(a0)
14745; RV64ZVE32F-NEXT:    lh a3, 18(a0)
14746; RV64ZVE32F-NEXT:    lh a4, 20(a0)
14747; RV64ZVE32F-NEXT:    lh a5, 0(a0)
14748; RV64ZVE32F-NEXT:    lh a6, 2(a0)
14749; RV64ZVE32F-NEXT:    lh a7, 4(a0)
14750; RV64ZVE32F-NEXT:    lh a0, 6(a0)
14751; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14752; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
14753; RV64ZVE32F-NEXT:    vmv.v.x v8, a5
14754; RV64ZVE32F-NEXT:    vmv.v.x v9, a1
14755; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a6
14756; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a2
14757; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
14758; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a7
14759; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a4
14760; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a0
14761; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14762; RV64ZVE32F-NEXT:    ret
14763  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32>  <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
14764  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true), <8 x i16> poison)
14765  ret <8 x i16> %v
14766}
14767
14768; Despite sufficient starting alignment, the index values aren't properly
14769; aligned for e32.
14770define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
14771; RV32-LABEL: mgather_gather_2xSEW_unaligned2:
14772; RV32:       # %bb.0:
14773; RV32-NEXT:    lui a1, %hi(.LCPI124_0)
14774; RV32-NEXT:    addi a1, a1, %lo(.LCPI124_0)
14775; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
14776; RV32-NEXT:    vle8.v v9, (a1)
14777; RV32-NEXT:    vluxei8.v v8, (a0), v9
14778; RV32-NEXT:    ret
14779;
14780; RV64V-LABEL: mgather_gather_2xSEW_unaligned2:
14781; RV64V:       # %bb.0:
14782; RV64V-NEXT:    lui a1, %hi(.LCPI124_0)
14783; RV64V-NEXT:    addi a1, a1, %lo(.LCPI124_0)
14784; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
14785; RV64V-NEXT:    vle8.v v9, (a1)
14786; RV64V-NEXT:    vluxei8.v v8, (a0), v9
14787; RV64V-NEXT:    ret
14788;
14789; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2:
14790; RV64ZVE32F:       # %bb.0:
14791; RV64ZVE32F-NEXT:    lh a1, 2(a0)
14792; RV64ZVE32F-NEXT:    lh a2, 4(a0)
14793; RV64ZVE32F-NEXT:    lh a3, 6(a0)
14794; RV64ZVE32F-NEXT:    lh a4, 8(a0)
14795; RV64ZVE32F-NEXT:    lh a5, 10(a0)
14796; RV64ZVE32F-NEXT:    lh a6, 18(a0)
14797; RV64ZVE32F-NEXT:    lh a0, 20(a0)
14798; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14799; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
14800; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
14801; RV64ZVE32F-NEXT:    vmv.v.x v9, a4
14802; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
14803; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a5
14804; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a6
14805; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a2
14806; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a0
14807; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a3
14808; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14809; RV64ZVE32F-NEXT:    ret
14810  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32>  <i32 1, i32 2, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
14811  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14812  ret <8 x i16> %v
14813}
14814
14815define <8 x i16> @mgather_gather_4xSEW(ptr %base) {
14816; RV32V-LABEL: mgather_gather_4xSEW:
14817; RV32V:       # %bb.0:
14818; RV32V-NEXT:    li a1, 16
14819; RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
14820; RV32V-NEXT:    vlse64.v v8, (a0), a1
14821; RV32V-NEXT:    ret
14822;
14823; RV64V-LABEL: mgather_gather_4xSEW:
14824; RV64V:       # %bb.0:
14825; RV64V-NEXT:    li a1, 16
14826; RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
14827; RV64V-NEXT:    vlse64.v v8, (a0), a1
14828; RV64V-NEXT:    ret
14829;
14830; RV32ZVE32F-LABEL: mgather_gather_4xSEW:
14831; RV32ZVE32F:       # %bb.0:
14832; RV32ZVE32F-NEXT:    lui a1, 82176
14833; RV32ZVE32F-NEXT:    addi a1, a1, 1024
14834; RV32ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14835; RV32ZVE32F-NEXT:    vmv.s.x v9, a1
14836; RV32ZVE32F-NEXT:    vluxei8.v v8, (a0), v9
14837; RV32ZVE32F-NEXT:    ret
14838;
14839; RV64ZVE32F-LABEL: mgather_gather_4xSEW:
14840; RV64ZVE32F:       # %bb.0:
14841; RV64ZVE32F-NEXT:    lh a1, 0(a0)
14842; RV64ZVE32F-NEXT:    lh a2, 2(a0)
14843; RV64ZVE32F-NEXT:    lh a3, 4(a0)
14844; RV64ZVE32F-NEXT:    lh a4, 6(a0)
14845; RV64ZVE32F-NEXT:    lh a5, 16(a0)
14846; RV64ZVE32F-NEXT:    lh a6, 18(a0)
14847; RV64ZVE32F-NEXT:    lh a7, 20(a0)
14848; RV64ZVE32F-NEXT:    lh a0, 22(a0)
14849; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14850; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
14851; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
14852; RV64ZVE32F-NEXT:    vmv.v.x v9, a5
14853; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
14854; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a6
14855; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
14856; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a7
14857; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a4
14858; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a0
14859; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14860; RV64ZVE32F-NEXT:    ret
14861  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32>  <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
14862  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> splat (i1 true), <8 x i16> poison)
14863  ret <8 x i16> %v
14864}
14865
14866; This is a case where we'd be able to do 4xSEW if we had proper alignment
14867; but we only have sufficient alignment for 2xSEW.
14868define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
14869; RV32-LABEL: mgather_gather_4xSEW_partial_align:
14870; RV32:       # %bb.0:
14871; RV32-NEXT:    lui a1, 82176
14872; RV32-NEXT:    addi a1, a1, 1024
14873; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14874; RV32-NEXT:    vmv.s.x v9, a1
14875; RV32-NEXT:    vluxei8.v v8, (a0), v9
14876; RV32-NEXT:    ret
14877;
14878; RV64V-LABEL: mgather_gather_4xSEW_partial_align:
14879; RV64V:       # %bb.0:
14880; RV64V-NEXT:    lui a1, 82176
14881; RV64V-NEXT:    addi a1, a1, 1024
14882; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
14883; RV64V-NEXT:    vmv.s.x v9, a1
14884; RV64V-NEXT:    vluxei8.v v8, (a0), v9
14885; RV64V-NEXT:    ret
14886;
14887; RV64ZVE32F-LABEL: mgather_gather_4xSEW_partial_align:
14888; RV64ZVE32F:       # %bb.0:
14889; RV64ZVE32F-NEXT:    lh a1, 0(a0)
14890; RV64ZVE32F-NEXT:    lh a2, 2(a0)
14891; RV64ZVE32F-NEXT:    lh a3, 4(a0)
14892; RV64ZVE32F-NEXT:    lh a4, 6(a0)
14893; RV64ZVE32F-NEXT:    lh a5, 16(a0)
14894; RV64ZVE32F-NEXT:    lh a6, 18(a0)
14895; RV64ZVE32F-NEXT:    lh a7, 20(a0)
14896; RV64ZVE32F-NEXT:    lh a0, 22(a0)
14897; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14898; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
14899; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
14900; RV64ZVE32F-NEXT:    vmv.v.x v9, a5
14901; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
14902; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a6
14903; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
14904; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a7
14905; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a4
14906; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a0
14907; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14908; RV64ZVE32F-NEXT:    ret
14909  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32>  <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
14910  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14911  ret <8 x i16> %v
14912}
14913
14914define <8 x i16> @mgather_shuffle_reverse(ptr %base) {
14915; CHECK-LABEL: mgather_shuffle_reverse:
14916; CHECK:       # %bb.0:
14917; CHECK-NEXT:    addi a0, a0, 14
14918; CHECK-NEXT:    li a1, -2
14919; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
14920; CHECK-NEXT:    vlse16.v v8, (a0), a1
14921; CHECK-NEXT:    ret
14922  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64>  <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>
14923  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14924  ret <8 x i16> %v
14925}
14926
14927define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
14928; RV32-LABEL: mgather_shuffle_rotate:
14929; RV32:       # %bb.0:
14930; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
14931; RV32-NEXT:    vle16.v v9, (a0)
14932; RV32-NEXT:    vslidedown.vi v8, v9, 4
14933; RV32-NEXT:    vslideup.vi v8, v9, 4
14934; RV32-NEXT:    ret
14935;
14936; RV64V-LABEL: mgather_shuffle_rotate:
14937; RV64V:       # %bb.0:
14938; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
14939; RV64V-NEXT:    vle16.v v9, (a0)
14940; RV64V-NEXT:    vslidedown.vi v8, v9, 4
14941; RV64V-NEXT:    vslideup.vi v8, v9, 4
14942; RV64V-NEXT:    ret
14943;
14944; RV64ZVE32F-LABEL: mgather_shuffle_rotate:
14945; RV64ZVE32F:       # %bb.0:
14946; RV64ZVE32F-NEXT:    lh a1, 8(a0)
14947; RV64ZVE32F-NEXT:    lh a2, 10(a0)
14948; RV64ZVE32F-NEXT:    lh a3, 12(a0)
14949; RV64ZVE32F-NEXT:    lh a4, 14(a0)
14950; RV64ZVE32F-NEXT:    lh a5, 0(a0)
14951; RV64ZVE32F-NEXT:    lh a6, 2(a0)
14952; RV64ZVE32F-NEXT:    lh a7, 4(a0)
14953; RV64ZVE32F-NEXT:    lh a0, 6(a0)
14954; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
14955; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
14956; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
14957; RV64ZVE32F-NEXT:    vmv.v.x v9, a5
14958; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
14959; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a6
14960; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
14961; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a7
14962; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a4
14963; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a0
14964; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
14965; RV64ZVE32F-NEXT:    ret
14966  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64>  <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3>
14967  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14968  ret <8 x i16> %v
14969}
14970
14971define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
14972; RV32-LABEL: mgather_shuffle_vrgather:
14973; RV32:       # %bb.0:
14974; RV32-NEXT:    lui a1, %hi(.LCPI129_0)
14975; RV32-NEXT:    addi a1, a1, %lo(.LCPI129_0)
14976; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
14977; RV32-NEXT:    vle16.v v9, (a1)
14978; RV32-NEXT:    vle16.v v10, (a0)
14979; RV32-NEXT:    vrgather.vv v8, v10, v9
14980; RV32-NEXT:    ret
14981;
14982; RV64V-LABEL: mgather_shuffle_vrgather:
14983; RV64V:       # %bb.0:
14984; RV64V-NEXT:    lui a1, %hi(.LCPI129_0)
14985; RV64V-NEXT:    addi a1, a1, %lo(.LCPI129_0)
14986; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
14987; RV64V-NEXT:    vle16.v v9, (a1)
14988; RV64V-NEXT:    vle16.v v10, (a0)
14989; RV64V-NEXT:    vrgather.vv v8, v10, v9
14990; RV64V-NEXT:    ret
14991;
14992; RV64ZVE32F-LABEL: mgather_shuffle_vrgather:
14993; RV64ZVE32F:       # %bb.0:
14994; RV64ZVE32F-NEXT:    lh a1, 0(a0)
14995; RV64ZVE32F-NEXT:    lh a2, 2(a0)
14996; RV64ZVE32F-NEXT:    lh a3, 4(a0)
14997; RV64ZVE32F-NEXT:    lh a4, 6(a0)
14998; RV64ZVE32F-NEXT:    lh a5, 8(a0)
14999; RV64ZVE32F-NEXT:    lh a6, 10(a0)
15000; RV64ZVE32F-NEXT:    lh a7, 12(a0)
15001; RV64ZVE32F-NEXT:    lh a0, 14(a0)
15002; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
15003; RV64ZVE32F-NEXT:    vmv.v.i v0, 15
15004; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
15005; RV64ZVE32F-NEXT:    vmv.v.x v9, a5
15006; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
15007; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a6
15008; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
15009; RV64ZVE32F-NEXT:    vslide1down.vx v9, v9, a7
15010; RV64ZVE32F-NEXT:    vslide1down.vx v10, v8, a2
15011; RV64ZVE32F-NEXT:    vslide1down.vx v8, v9, a0
15012; RV64ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
15013; RV64ZVE32F-NEXT:    ret
15014  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64>  <i64 0, i64 2, i64 3, i64 1, i64 4, i64 5, i64 6, i64 7>
15015  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
15016  ret <8 x i16> %v
15017}
15018
15019; v32i64 is not a legal type, so make sure we don't try to combine the mgather
15020; to a vlse intrinsic until it is legalized and split.
15021define <32 x i64> @mgather_strided_split(ptr %base) {
15022; RV32V-LABEL: mgather_strided_split:
15023; RV32V:       # %bb.0:
15024; RV32V-NEXT:    li a1, 16
15025; RV32V-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
15026; RV32V-NEXT:    vlse64.v v8, (a0), a1
15027; RV32V-NEXT:    addi a0, a0, 256
15028; RV32V-NEXT:    vlse64.v v16, (a0), a1
15029; RV32V-NEXT:    ret
15030;
15031; RV64V-LABEL: mgather_strided_split:
15032; RV64V:       # %bb.0:
15033; RV64V-NEXT:    li a1, 16
15034; RV64V-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
15035; RV64V-NEXT:    vlse64.v v8, (a0), a1
15036; RV64V-NEXT:    addi a0, a0, 256
15037; RV64V-NEXT:    vlse64.v v16, (a0), a1
15038; RV64V-NEXT:    ret
15039;
15040; RV32ZVE32F-LABEL: mgather_strided_split:
15041; RV32ZVE32F:       # %bb.0:
15042; RV32ZVE32F-NEXT:    addi sp, sp, -512
15043; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 512
15044; RV32ZVE32F-NEXT:    sw ra, 508(sp) # 4-byte Folded Spill
15045; RV32ZVE32F-NEXT:    sw s0, 504(sp) # 4-byte Folded Spill
15046; RV32ZVE32F-NEXT:    sw s2, 500(sp) # 4-byte Folded Spill
15047; RV32ZVE32F-NEXT:    sw s3, 496(sp) # 4-byte Folded Spill
15048; RV32ZVE32F-NEXT:    sw s4, 492(sp) # 4-byte Folded Spill
15049; RV32ZVE32F-NEXT:    sw s5, 488(sp) # 4-byte Folded Spill
15050; RV32ZVE32F-NEXT:    sw s6, 484(sp) # 4-byte Folded Spill
15051; RV32ZVE32F-NEXT:    sw s7, 480(sp) # 4-byte Folded Spill
15052; RV32ZVE32F-NEXT:    sw s8, 476(sp) # 4-byte Folded Spill
15053; RV32ZVE32F-NEXT:    sw s9, 472(sp) # 4-byte Folded Spill
15054; RV32ZVE32F-NEXT:    sw s10, 468(sp) # 4-byte Folded Spill
15055; RV32ZVE32F-NEXT:    sw s11, 464(sp) # 4-byte Folded Spill
15056; RV32ZVE32F-NEXT:    .cfi_offset ra, -4
15057; RV32ZVE32F-NEXT:    .cfi_offset s0, -8
15058; RV32ZVE32F-NEXT:    .cfi_offset s2, -12
15059; RV32ZVE32F-NEXT:    .cfi_offset s3, -16
15060; RV32ZVE32F-NEXT:    .cfi_offset s4, -20
15061; RV32ZVE32F-NEXT:    .cfi_offset s5, -24
15062; RV32ZVE32F-NEXT:    .cfi_offset s6, -28
15063; RV32ZVE32F-NEXT:    .cfi_offset s7, -32
15064; RV32ZVE32F-NEXT:    .cfi_offset s8, -36
15065; RV32ZVE32F-NEXT:    .cfi_offset s9, -40
15066; RV32ZVE32F-NEXT:    .cfi_offset s10, -44
15067; RV32ZVE32F-NEXT:    .cfi_offset s11, -48
15068; RV32ZVE32F-NEXT:    addi s0, sp, 512
15069; RV32ZVE32F-NEXT:    .cfi_def_cfa s0, 0
15070; RV32ZVE32F-NEXT:    andi sp, sp, -128
15071; RV32ZVE32F-NEXT:    li a2, 32
15072; RV32ZVE32F-NEXT:    lw a3, 0(a1)
15073; RV32ZVE32F-NEXT:    sw a3, 236(sp) # 4-byte Folded Spill
15074; RV32ZVE32F-NEXT:    lw a3, 4(a1)
15075; RV32ZVE32F-NEXT:    sw a3, 232(sp) # 4-byte Folded Spill
15076; RV32ZVE32F-NEXT:    addi a3, sp, 256
15077; RV32ZVE32F-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
15078; RV32ZVE32F-NEXT:    vid.v v8
15079; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 4
15080; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
15081; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
15082; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 1
15083; RV32ZVE32F-NEXT:    vslidedown.vi v17, v8, 2
15084; RV32ZVE32F-NEXT:    vmv.x.s a1, v16
15085; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 3
15086; RV32ZVE32F-NEXT:    vmv.x.s a4, v17
15087; RV32ZVE32F-NEXT:    vmv.x.s a5, v16
15088; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
15089; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 4
15090; RV32ZVE32F-NEXT:    vmv.x.s a6, v16
15091; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 5
15092; RV32ZVE32F-NEXT:    vmv.x.s a7, v16
15093; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 6
15094; RV32ZVE32F-NEXT:    vmv.x.s t0, v16
15095; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 7
15096; RV32ZVE32F-NEXT:    vmv.x.s t1, v16
15097; RV32ZVE32F-NEXT:    lw t2, 0(a1)
15098; RV32ZVE32F-NEXT:    sw t2, 196(sp) # 4-byte Folded Spill
15099; RV32ZVE32F-NEXT:    lw a1, 4(a1)
15100; RV32ZVE32F-NEXT:    sw a1, 192(sp) # 4-byte Folded Spill
15101; RV32ZVE32F-NEXT:    lw ra, 0(a4)
15102; RV32ZVE32F-NEXT:    lw a1, 4(a4)
15103; RV32ZVE32F-NEXT:    sw a1, 172(sp) # 4-byte Folded Spill
15104; RV32ZVE32F-NEXT:    lw a1, 0(a5)
15105; RV32ZVE32F-NEXT:    sw a1, 168(sp) # 4-byte Folded Spill
15106; RV32ZVE32F-NEXT:    lw a1, 4(a5)
15107; RV32ZVE32F-NEXT:    sw a1, 164(sp) # 4-byte Folded Spill
15108; RV32ZVE32F-NEXT:    lw a1, 0(a6)
15109; RV32ZVE32F-NEXT:    sw a1, 252(sp) # 4-byte Folded Spill
15110; RV32ZVE32F-NEXT:    lw a1, 4(a6)
15111; RV32ZVE32F-NEXT:    sw a1, 248(sp) # 4-byte Folded Spill
15112; RV32ZVE32F-NEXT:    lw a1, 0(a7)
15113; RV32ZVE32F-NEXT:    sw a1, 244(sp) # 4-byte Folded Spill
15114; RV32ZVE32F-NEXT:    lw a1, 4(a7)
15115; RV32ZVE32F-NEXT:    sw a1, 240(sp) # 4-byte Folded Spill
15116; RV32ZVE32F-NEXT:    lw a1, 0(t0)
15117; RV32ZVE32F-NEXT:    sw a1, 188(sp) # 4-byte Folded Spill
15118; RV32ZVE32F-NEXT:    lw a1, 4(t0)
15119; RV32ZVE32F-NEXT:    sw a1, 184(sp) # 4-byte Folded Spill
15120; RV32ZVE32F-NEXT:    lw a1, 0(t1)
15121; RV32ZVE32F-NEXT:    sw a1, 180(sp) # 4-byte Folded Spill
15122; RV32ZVE32F-NEXT:    lw a1, 4(t1)
15123; RV32ZVE32F-NEXT:    sw a1, 176(sp) # 4-byte Folded Spill
15124; RV32ZVE32F-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
15125; RV32ZVE32F-NEXT:    vse32.v v8, (a3)
15126; RV32ZVE32F-NEXT:    lw a1, 288(sp)
15127; RV32ZVE32F-NEXT:    lw a2, 292(sp)
15128; RV32ZVE32F-NEXT:    lw a3, 296(sp)
15129; RV32ZVE32F-NEXT:    lw a4, 300(sp)
15130; RV32ZVE32F-NEXT:    lw a5, 0(a1)
15131; RV32ZVE32F-NEXT:    sw a5, 228(sp) # 4-byte Folded Spill
15132; RV32ZVE32F-NEXT:    lw a1, 4(a1)
15133; RV32ZVE32F-NEXT:    sw a1, 224(sp) # 4-byte Folded Spill
15134; RV32ZVE32F-NEXT:    lw a1, 0(a2)
15135; RV32ZVE32F-NEXT:    sw a1, 220(sp) # 4-byte Folded Spill
15136; RV32ZVE32F-NEXT:    lw a1, 4(a2)
15137; RV32ZVE32F-NEXT:    sw a1, 216(sp) # 4-byte Folded Spill
15138; RV32ZVE32F-NEXT:    lw a1, 0(a3)
15139; RV32ZVE32F-NEXT:    sw a1, 212(sp) # 4-byte Folded Spill
15140; RV32ZVE32F-NEXT:    lw a1, 4(a3)
15141; RV32ZVE32F-NEXT:    sw a1, 208(sp) # 4-byte Folded Spill
15142; RV32ZVE32F-NEXT:    lw a1, 0(a4)
15143; RV32ZVE32F-NEXT:    sw a1, 204(sp) # 4-byte Folded Spill
15144; RV32ZVE32F-NEXT:    lw a1, 4(a4)
15145; RV32ZVE32F-NEXT:    sw a1, 200(sp) # 4-byte Folded Spill
15146; RV32ZVE32F-NEXT:    lw a1, 304(sp)
15147; RV32ZVE32F-NEXT:    lw a2, 308(sp)
15148; RV32ZVE32F-NEXT:    lw a3, 312(sp)
15149; RV32ZVE32F-NEXT:    lw a4, 316(sp)
15150; RV32ZVE32F-NEXT:    lw a5, 0(a1)
15151; RV32ZVE32F-NEXT:    sw a5, 160(sp) # 4-byte Folded Spill
15152; RV32ZVE32F-NEXT:    lw a1, 4(a1)
15153; RV32ZVE32F-NEXT:    sw a1, 156(sp) # 4-byte Folded Spill
15154; RV32ZVE32F-NEXT:    lw a1, 0(a2)
15155; RV32ZVE32F-NEXT:    sw a1, 152(sp) # 4-byte Folded Spill
15156; RV32ZVE32F-NEXT:    lw a1, 4(a2)
15157; RV32ZVE32F-NEXT:    sw a1, 148(sp) # 4-byte Folded Spill
15158; RV32ZVE32F-NEXT:    lw a1, 0(a3)
15159; RV32ZVE32F-NEXT:    sw a1, 144(sp) # 4-byte Folded Spill
15160; RV32ZVE32F-NEXT:    lw a1, 4(a3)
15161; RV32ZVE32F-NEXT:    sw a1, 140(sp) # 4-byte Folded Spill
15162; RV32ZVE32F-NEXT:    lw a1, 0(a4)
15163; RV32ZVE32F-NEXT:    sw a1, 136(sp) # 4-byte Folded Spill
15164; RV32ZVE32F-NEXT:    lw a1, 4(a4)
15165; RV32ZVE32F-NEXT:    sw a1, 132(sp) # 4-byte Folded Spill
15166; RV32ZVE32F-NEXT:    lw a1, 320(sp)
15167; RV32ZVE32F-NEXT:    lw a2, 324(sp)
15168; RV32ZVE32F-NEXT:    lw a3, 328(sp)
15169; RV32ZVE32F-NEXT:    lw a4, 332(sp)
15170; RV32ZVE32F-NEXT:    lw a5, 0(a1)
15171; RV32ZVE32F-NEXT:    sw a5, 128(sp) # 4-byte Folded Spill
15172; RV32ZVE32F-NEXT:    lw a1, 4(a1)
15173; RV32ZVE32F-NEXT:    sw a1, 124(sp) # 4-byte Folded Spill
15174; RV32ZVE32F-NEXT:    lw a1, 0(a2)
15175; RV32ZVE32F-NEXT:    sw a1, 120(sp) # 4-byte Folded Spill
15176; RV32ZVE32F-NEXT:    lw a1, 4(a2)
15177; RV32ZVE32F-NEXT:    sw a1, 116(sp) # 4-byte Folded Spill
15178; RV32ZVE32F-NEXT:    lw s8, 0(a3)
15179; RV32ZVE32F-NEXT:    lw s9, 4(a3)
15180; RV32ZVE32F-NEXT:    lw s10, 0(a4)
15181; RV32ZVE32F-NEXT:    lw s11, 4(a4)
15182; RV32ZVE32F-NEXT:    lw a1, 336(sp)
15183; RV32ZVE32F-NEXT:    lw a2, 340(sp)
15184; RV32ZVE32F-NEXT:    lw a3, 344(sp)
15185; RV32ZVE32F-NEXT:    lw a4, 348(sp)
15186; RV32ZVE32F-NEXT:    lw t5, 0(a1)
15187; RV32ZVE32F-NEXT:    lw t6, 4(a1)
15188; RV32ZVE32F-NEXT:    lw s2, 0(a2)
15189; RV32ZVE32F-NEXT:    lw s3, 4(a2)
15190; RV32ZVE32F-NEXT:    lw a5, 0(a3)
15191; RV32ZVE32F-NEXT:    lw a6, 4(a3)
15192; RV32ZVE32F-NEXT:    lw a7, 0(a4)
15193; RV32ZVE32F-NEXT:    lw t0, 4(a4)
15194; RV32ZVE32F-NEXT:    lw a1, 352(sp)
15195; RV32ZVE32F-NEXT:    lw a2, 356(sp)
15196; RV32ZVE32F-NEXT:    lw a3, 360(sp)
15197; RV32ZVE32F-NEXT:    lw a4, 364(sp)
15198; RV32ZVE32F-NEXT:    lw t1, 0(a1)
15199; RV32ZVE32F-NEXT:    sw t1, 112(sp) # 4-byte Folded Spill
15200; RV32ZVE32F-NEXT:    lw a1, 4(a1)
15201; RV32ZVE32F-NEXT:    sw a1, 108(sp) # 4-byte Folded Spill
15202; RV32ZVE32F-NEXT:    lw a1, 0(a2)
15203; RV32ZVE32F-NEXT:    sw a1, 104(sp) # 4-byte Folded Spill
15204; RV32ZVE32F-NEXT:    lw a1, 4(a2)
15205; RV32ZVE32F-NEXT:    sw a1, 100(sp) # 4-byte Folded Spill
15206; RV32ZVE32F-NEXT:    lw s4, 0(a3)
15207; RV32ZVE32F-NEXT:    lw s5, 4(a3)
15208; RV32ZVE32F-NEXT:    lw s6, 0(a4)
15209; RV32ZVE32F-NEXT:    lw s7, 4(a4)
15210; RV32ZVE32F-NEXT:    lw a1, 368(sp)
15211; RV32ZVE32F-NEXT:    lw a2, 372(sp)
15212; RV32ZVE32F-NEXT:    lw a3, 376(sp)
15213; RV32ZVE32F-NEXT:    lw a4, 380(sp)
15214; RV32ZVE32F-NEXT:    lw t1, 0(a1)
15215; RV32ZVE32F-NEXT:    lw t2, 4(a1)
15216; RV32ZVE32F-NEXT:    lw t3, 0(a2)
15217; RV32ZVE32F-NEXT:    lw t4, 4(a2)
15218; RV32ZVE32F-NEXT:    lw a1, 0(a3)
15219; RV32ZVE32F-NEXT:    lw a2, 4(a3)
15220; RV32ZVE32F-NEXT:    lw a3, 0(a4)
15221; RV32ZVE32F-NEXT:    lw a4, 4(a4)
15222; RV32ZVE32F-NEXT:    sw ra, 16(a0)
15223; RV32ZVE32F-NEXT:    lw ra, 172(sp) # 4-byte Folded Reload
15224; RV32ZVE32F-NEXT:    sw ra, 20(a0)
15225; RV32ZVE32F-NEXT:    lw ra, 168(sp) # 4-byte Folded Reload
15226; RV32ZVE32F-NEXT:    sw ra, 24(a0)
15227; RV32ZVE32F-NEXT:    lw ra, 164(sp) # 4-byte Folded Reload
15228; RV32ZVE32F-NEXT:    sw ra, 28(a0)
15229; RV32ZVE32F-NEXT:    lw ra, 236(sp) # 4-byte Folded Reload
15230; RV32ZVE32F-NEXT:    sw ra, 0(a0)
15231; RV32ZVE32F-NEXT:    lw ra, 232(sp) # 4-byte Folded Reload
15232; RV32ZVE32F-NEXT:    sw ra, 4(a0)
15233; RV32ZVE32F-NEXT:    lw ra, 196(sp) # 4-byte Folded Reload
15234; RV32ZVE32F-NEXT:    sw ra, 8(a0)
15235; RV32ZVE32F-NEXT:    lw ra, 192(sp) # 4-byte Folded Reload
15236; RV32ZVE32F-NEXT:    sw ra, 12(a0)
15237; RV32ZVE32F-NEXT:    lw ra, 188(sp) # 4-byte Folded Reload
15238; RV32ZVE32F-NEXT:    sw ra, 48(a0)
15239; RV32ZVE32F-NEXT:    lw ra, 184(sp) # 4-byte Folded Reload
15240; RV32ZVE32F-NEXT:    sw ra, 52(a0)
15241; RV32ZVE32F-NEXT:    lw ra, 180(sp) # 4-byte Folded Reload
15242; RV32ZVE32F-NEXT:    sw ra, 56(a0)
15243; RV32ZVE32F-NEXT:    lw ra, 176(sp) # 4-byte Folded Reload
15244; RV32ZVE32F-NEXT:    sw ra, 60(a0)
15245; RV32ZVE32F-NEXT:    sw a5, 176(a0)
15246; RV32ZVE32F-NEXT:    sw a6, 180(a0)
15247; RV32ZVE32F-NEXT:    sw a7, 184(a0)
15248; RV32ZVE32F-NEXT:    sw t0, 188(a0)
15249; RV32ZVE32F-NEXT:    sw t5, 160(a0)
15250; RV32ZVE32F-NEXT:    sw t6, 164(a0)
15251; RV32ZVE32F-NEXT:    sw s2, 168(a0)
15252; RV32ZVE32F-NEXT:    sw s3, 172(a0)
15253; RV32ZVE32F-NEXT:    sw s8, 144(a0)
15254; RV32ZVE32F-NEXT:    sw s9, 148(a0)
15255; RV32ZVE32F-NEXT:    sw s10, 152(a0)
15256; RV32ZVE32F-NEXT:    sw s11, 156(a0)
15257; RV32ZVE32F-NEXT:    lw a5, 128(sp) # 4-byte Folded Reload
15258; RV32ZVE32F-NEXT:    sw a5, 128(a0)
15259; RV32ZVE32F-NEXT:    lw a5, 124(sp) # 4-byte Folded Reload
15260; RV32ZVE32F-NEXT:    sw a5, 132(a0)
15261; RV32ZVE32F-NEXT:    lw a5, 120(sp) # 4-byte Folded Reload
15262; RV32ZVE32F-NEXT:    sw a5, 136(a0)
15263; RV32ZVE32F-NEXT:    lw a5, 116(sp) # 4-byte Folded Reload
15264; RV32ZVE32F-NEXT:    sw a5, 140(a0)
15265; RV32ZVE32F-NEXT:    lw a5, 144(sp) # 4-byte Folded Reload
15266; RV32ZVE32F-NEXT:    sw a5, 112(a0)
15267; RV32ZVE32F-NEXT:    lw a5, 140(sp) # 4-byte Folded Reload
15268; RV32ZVE32F-NEXT:    sw a5, 116(a0)
15269; RV32ZVE32F-NEXT:    lw a5, 136(sp) # 4-byte Folded Reload
15270; RV32ZVE32F-NEXT:    sw a5, 120(a0)
15271; RV32ZVE32F-NEXT:    lw a5, 132(sp) # 4-byte Folded Reload
15272; RV32ZVE32F-NEXT:    sw a5, 124(a0)
15273; RV32ZVE32F-NEXT:    lw a5, 160(sp) # 4-byte Folded Reload
15274; RV32ZVE32F-NEXT:    sw a5, 96(a0)
15275; RV32ZVE32F-NEXT:    lw a5, 156(sp) # 4-byte Folded Reload
15276; RV32ZVE32F-NEXT:    sw a5, 100(a0)
15277; RV32ZVE32F-NEXT:    lw a5, 152(sp) # 4-byte Folded Reload
15278; RV32ZVE32F-NEXT:    sw a5, 104(a0)
15279; RV32ZVE32F-NEXT:    lw a5, 148(sp) # 4-byte Folded Reload
15280; RV32ZVE32F-NEXT:    sw a5, 108(a0)
15281; RV32ZVE32F-NEXT:    lw a5, 212(sp) # 4-byte Folded Reload
15282; RV32ZVE32F-NEXT:    sw a5, 80(a0)
15283; RV32ZVE32F-NEXT:    lw a5, 208(sp) # 4-byte Folded Reload
15284; RV32ZVE32F-NEXT:    sw a5, 84(a0)
15285; RV32ZVE32F-NEXT:    lw a5, 204(sp) # 4-byte Folded Reload
15286; RV32ZVE32F-NEXT:    sw a5, 88(a0)
15287; RV32ZVE32F-NEXT:    lw a5, 200(sp) # 4-byte Folded Reload
15288; RV32ZVE32F-NEXT:    sw a5, 92(a0)
15289; RV32ZVE32F-NEXT:    lw a5, 228(sp) # 4-byte Folded Reload
15290; RV32ZVE32F-NEXT:    sw a5, 64(a0)
15291; RV32ZVE32F-NEXT:    lw a5, 224(sp) # 4-byte Folded Reload
15292; RV32ZVE32F-NEXT:    sw a5, 68(a0)
15293; RV32ZVE32F-NEXT:    lw a5, 220(sp) # 4-byte Folded Reload
15294; RV32ZVE32F-NEXT:    sw a5, 72(a0)
15295; RV32ZVE32F-NEXT:    lw a5, 216(sp) # 4-byte Folded Reload
15296; RV32ZVE32F-NEXT:    sw a5, 76(a0)
15297; RV32ZVE32F-NEXT:    sw a1, 240(a0)
15298; RV32ZVE32F-NEXT:    sw a2, 244(a0)
15299; RV32ZVE32F-NEXT:    sw a3, 248(a0)
15300; RV32ZVE32F-NEXT:    sw a4, 252(a0)
15301; RV32ZVE32F-NEXT:    sw t1, 224(a0)
15302; RV32ZVE32F-NEXT:    sw t2, 228(a0)
15303; RV32ZVE32F-NEXT:    sw t3, 232(a0)
15304; RV32ZVE32F-NEXT:    sw t4, 236(a0)
15305; RV32ZVE32F-NEXT:    sw s4, 208(a0)
15306; RV32ZVE32F-NEXT:    sw s5, 212(a0)
15307; RV32ZVE32F-NEXT:    sw s6, 216(a0)
15308; RV32ZVE32F-NEXT:    sw s7, 220(a0)
15309; RV32ZVE32F-NEXT:    lw a1, 112(sp) # 4-byte Folded Reload
15310; RV32ZVE32F-NEXT:    sw a1, 192(a0)
15311; RV32ZVE32F-NEXT:    lw a1, 108(sp) # 4-byte Folded Reload
15312; RV32ZVE32F-NEXT:    sw a1, 196(a0)
15313; RV32ZVE32F-NEXT:    lw a1, 104(sp) # 4-byte Folded Reload
15314; RV32ZVE32F-NEXT:    sw a1, 200(a0)
15315; RV32ZVE32F-NEXT:    lw a1, 100(sp) # 4-byte Folded Reload
15316; RV32ZVE32F-NEXT:    sw a1, 204(a0)
15317; RV32ZVE32F-NEXT:    lw a1, 252(sp) # 4-byte Folded Reload
15318; RV32ZVE32F-NEXT:    sw a1, 32(a0)
15319; RV32ZVE32F-NEXT:    lw a1, 248(sp) # 4-byte Folded Reload
15320; RV32ZVE32F-NEXT:    sw a1, 36(a0)
15321; RV32ZVE32F-NEXT:    lw a1, 244(sp) # 4-byte Folded Reload
15322; RV32ZVE32F-NEXT:    sw a1, 40(a0)
15323; RV32ZVE32F-NEXT:    lw a1, 240(sp) # 4-byte Folded Reload
15324; RV32ZVE32F-NEXT:    sw a1, 44(a0)
15325; RV32ZVE32F-NEXT:    addi sp, s0, -512
15326; RV32ZVE32F-NEXT:    .cfi_def_cfa sp, 512
15327; RV32ZVE32F-NEXT:    lw ra, 508(sp) # 4-byte Folded Reload
15328; RV32ZVE32F-NEXT:    lw s0, 504(sp) # 4-byte Folded Reload
15329; RV32ZVE32F-NEXT:    lw s2, 500(sp) # 4-byte Folded Reload
15330; RV32ZVE32F-NEXT:    lw s3, 496(sp) # 4-byte Folded Reload
15331; RV32ZVE32F-NEXT:    lw s4, 492(sp) # 4-byte Folded Reload
15332; RV32ZVE32F-NEXT:    lw s5, 488(sp) # 4-byte Folded Reload
15333; RV32ZVE32F-NEXT:    lw s6, 484(sp) # 4-byte Folded Reload
15334; RV32ZVE32F-NEXT:    lw s7, 480(sp) # 4-byte Folded Reload
15335; RV32ZVE32F-NEXT:    lw s8, 476(sp) # 4-byte Folded Reload
15336; RV32ZVE32F-NEXT:    lw s9, 472(sp) # 4-byte Folded Reload
15337; RV32ZVE32F-NEXT:    lw s10, 468(sp) # 4-byte Folded Reload
15338; RV32ZVE32F-NEXT:    lw s11, 464(sp) # 4-byte Folded Reload
15339; RV32ZVE32F-NEXT:    .cfi_restore ra
15340; RV32ZVE32F-NEXT:    .cfi_restore s0
15341; RV32ZVE32F-NEXT:    .cfi_restore s2
15342; RV32ZVE32F-NEXT:    .cfi_restore s3
15343; RV32ZVE32F-NEXT:    .cfi_restore s4
15344; RV32ZVE32F-NEXT:    .cfi_restore s5
15345; RV32ZVE32F-NEXT:    .cfi_restore s6
15346; RV32ZVE32F-NEXT:    .cfi_restore s7
15347; RV32ZVE32F-NEXT:    .cfi_restore s8
15348; RV32ZVE32F-NEXT:    .cfi_restore s9
15349; RV32ZVE32F-NEXT:    .cfi_restore s10
15350; RV32ZVE32F-NEXT:    .cfi_restore s11
15351; RV32ZVE32F-NEXT:    addi sp, sp, 512
15352; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 0
15353; RV32ZVE32F-NEXT:    ret
15354;
15355; RV64ZVE32F-LABEL: mgather_strided_split:
15356; RV64ZVE32F:       # %bb.0:
15357; RV64ZVE32F-NEXT:    addi sp, sp, -144
15358; RV64ZVE32F-NEXT:    .cfi_def_cfa_offset 144
15359; RV64ZVE32F-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
15360; RV64ZVE32F-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
15361; RV64ZVE32F-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
15362; RV64ZVE32F-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
15363; RV64ZVE32F-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
15364; RV64ZVE32F-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
15365; RV64ZVE32F-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
15366; RV64ZVE32F-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
15367; RV64ZVE32F-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
15368; RV64ZVE32F-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
15369; RV64ZVE32F-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
15370; RV64ZVE32F-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
15371; RV64ZVE32F-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
15372; RV64ZVE32F-NEXT:    .cfi_offset ra, -8
15373; RV64ZVE32F-NEXT:    .cfi_offset s0, -16
15374; RV64ZVE32F-NEXT:    .cfi_offset s1, -24
15375; RV64ZVE32F-NEXT:    .cfi_offset s2, -32
15376; RV64ZVE32F-NEXT:    .cfi_offset s3, -40
15377; RV64ZVE32F-NEXT:    .cfi_offset s4, -48
15378; RV64ZVE32F-NEXT:    .cfi_offset s5, -56
15379; RV64ZVE32F-NEXT:    .cfi_offset s6, -64
15380; RV64ZVE32F-NEXT:    .cfi_offset s7, -72
15381; RV64ZVE32F-NEXT:    .cfi_offset s8, -80
15382; RV64ZVE32F-NEXT:    .cfi_offset s9, -88
15383; RV64ZVE32F-NEXT:    .cfi_offset s10, -96
15384; RV64ZVE32F-NEXT:    .cfi_offset s11, -104
15385; RV64ZVE32F-NEXT:    ld a2, 0(a1)
15386; RV64ZVE32F-NEXT:    sd a2, 32(sp) # 8-byte Folded Spill
15387; RV64ZVE32F-NEXT:    ld a2, 16(a1)
15388; RV64ZVE32F-NEXT:    sd a2, 24(sp) # 8-byte Folded Spill
15389; RV64ZVE32F-NEXT:    ld a2, 32(a1)
15390; RV64ZVE32F-NEXT:    sd a2, 16(sp) # 8-byte Folded Spill
15391; RV64ZVE32F-NEXT:    ld a2, 48(a1)
15392; RV64ZVE32F-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
15393; RV64ZVE32F-NEXT:    ld a2, 64(a1)
15394; RV64ZVE32F-NEXT:    sd a2, 0(sp) # 8-byte Folded Spill
15395; RV64ZVE32F-NEXT:    ld a7, 80(a1)
15396; RV64ZVE32F-NEXT:    ld t0, 96(a1)
15397; RV64ZVE32F-NEXT:    ld t1, 112(a1)
15398; RV64ZVE32F-NEXT:    ld t2, 128(a1)
15399; RV64ZVE32F-NEXT:    ld t3, 144(a1)
15400; RV64ZVE32F-NEXT:    ld t4, 160(a1)
15401; RV64ZVE32F-NEXT:    ld t5, 176(a1)
15402; RV64ZVE32F-NEXT:    ld t6, 192(a1)
15403; RV64ZVE32F-NEXT:    ld s0, 208(a1)
15404; RV64ZVE32F-NEXT:    ld s1, 224(a1)
15405; RV64ZVE32F-NEXT:    ld s2, 240(a1)
15406; RV64ZVE32F-NEXT:    ld s3, 256(a1)
15407; RV64ZVE32F-NEXT:    ld s4, 272(a1)
15408; RV64ZVE32F-NEXT:    ld s5, 288(a1)
15409; RV64ZVE32F-NEXT:    ld s6, 304(a1)
15410; RV64ZVE32F-NEXT:    ld s7, 320(a1)
15411; RV64ZVE32F-NEXT:    ld s8, 336(a1)
15412; RV64ZVE32F-NEXT:    ld s9, 352(a1)
15413; RV64ZVE32F-NEXT:    ld s10, 368(a1)
15414; RV64ZVE32F-NEXT:    ld s11, 384(a1)
15415; RV64ZVE32F-NEXT:    ld ra, 400(a1)
15416; RV64ZVE32F-NEXT:    ld a6, 416(a1)
15417; RV64ZVE32F-NEXT:    ld a5, 432(a1)
15418; RV64ZVE32F-NEXT:    ld a2, 448(a1)
15419; RV64ZVE32F-NEXT:    ld a3, 464(a1)
15420; RV64ZVE32F-NEXT:    ld a4, 480(a1)
15421; RV64ZVE32F-NEXT:    ld a1, 496(a1)
15422; RV64ZVE32F-NEXT:    sd a2, 224(a0)
15423; RV64ZVE32F-NEXT:    sd a3, 232(a0)
15424; RV64ZVE32F-NEXT:    sd a4, 240(a0)
15425; RV64ZVE32F-NEXT:    sd a1, 248(a0)
15426; RV64ZVE32F-NEXT:    sd s11, 192(a0)
15427; RV64ZVE32F-NEXT:    sd ra, 200(a0)
15428; RV64ZVE32F-NEXT:    sd a6, 208(a0)
15429; RV64ZVE32F-NEXT:    sd a5, 216(a0)
15430; RV64ZVE32F-NEXT:    sd s7, 160(a0)
15431; RV64ZVE32F-NEXT:    sd s8, 168(a0)
15432; RV64ZVE32F-NEXT:    sd s9, 176(a0)
15433; RV64ZVE32F-NEXT:    sd s10, 184(a0)
15434; RV64ZVE32F-NEXT:    sd s3, 128(a0)
15435; RV64ZVE32F-NEXT:    sd s4, 136(a0)
15436; RV64ZVE32F-NEXT:    sd s5, 144(a0)
15437; RV64ZVE32F-NEXT:    sd s6, 152(a0)
15438; RV64ZVE32F-NEXT:    sd t6, 96(a0)
15439; RV64ZVE32F-NEXT:    sd s0, 104(a0)
15440; RV64ZVE32F-NEXT:    sd s1, 112(a0)
15441; RV64ZVE32F-NEXT:    sd s2, 120(a0)
15442; RV64ZVE32F-NEXT:    sd t2, 64(a0)
15443; RV64ZVE32F-NEXT:    sd t3, 72(a0)
15444; RV64ZVE32F-NEXT:    sd t4, 80(a0)
15445; RV64ZVE32F-NEXT:    sd t5, 88(a0)
15446; RV64ZVE32F-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
15447; RV64ZVE32F-NEXT:    sd a1, 32(a0)
15448; RV64ZVE32F-NEXT:    sd a7, 40(a0)
15449; RV64ZVE32F-NEXT:    sd t0, 48(a0)
15450; RV64ZVE32F-NEXT:    sd t1, 56(a0)
15451; RV64ZVE32F-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
15452; RV64ZVE32F-NEXT:    sd a1, 0(a0)
15453; RV64ZVE32F-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
15454; RV64ZVE32F-NEXT:    sd a1, 8(a0)
15455; RV64ZVE32F-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
15456; RV64ZVE32F-NEXT:    sd a1, 16(a0)
15457; RV64ZVE32F-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
15458; RV64ZVE32F-NEXT:    sd a1, 24(a0)
15459; RV64ZVE32F-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
15460; RV64ZVE32F-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
15461; RV64ZVE32F-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
15462; RV64ZVE32F-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
15463; RV64ZVE32F-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
15464; RV64ZVE32F-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
15465; RV64ZVE32F-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
15466; RV64ZVE32F-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
15467; RV64ZVE32F-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
15468; RV64ZVE32F-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
15469; RV64ZVE32F-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
15470; RV64ZVE32F-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
15471; RV64ZVE32F-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
15472; RV64ZVE32F-NEXT:    .cfi_restore ra
15473; RV64ZVE32F-NEXT:    .cfi_restore s0
15474; RV64ZVE32F-NEXT:    .cfi_restore s1
15475; RV64ZVE32F-NEXT:    .cfi_restore s2
15476; RV64ZVE32F-NEXT:    .cfi_restore s3
15477; RV64ZVE32F-NEXT:    .cfi_restore s4
15478; RV64ZVE32F-NEXT:    .cfi_restore s5
15479; RV64ZVE32F-NEXT:    .cfi_restore s6
15480; RV64ZVE32F-NEXT:    .cfi_restore s7
15481; RV64ZVE32F-NEXT:    .cfi_restore s8
15482; RV64ZVE32F-NEXT:    .cfi_restore s9
15483; RV64ZVE32F-NEXT:    .cfi_restore s10
15484; RV64ZVE32F-NEXT:    .cfi_restore s11
15485; RV64ZVE32F-NEXT:    addi sp, sp, 144
15486; RV64ZVE32F-NEXT:    .cfi_def_cfa_offset 0
15487; RV64ZVE32F-NEXT:    ret
15488  %ptrs = getelementptr inbounds i64, ptr %base, <32 x i64> <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38, i64 40, i64 42, i64 44, i64 46, i64 48, i64 50, i64 52, i64 54, i64 56, i64 58, i64 60, i64 62>
15489  %x = call <32 x i64> @llvm.masked.gather.v32i64.v32p0(<32 x ptr> %ptrs, i32 8, <32 x i1> splat (i1 true), <32 x i64> poison)
15490  ret <32 x i64> %x
15491}
15492
15493define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
15494; RV32V-LABEL: masked_gather_widen_sew_negative_stride:
15495; RV32V:       # %bb.0:
15496; RV32V-NEXT:    addi a0, a0, 136
15497; RV32V-NEXT:    li a1, -136
15498; RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
15499; RV32V-NEXT:    vlse64.v v8, (a0), a1
15500; RV32V-NEXT:    ret
15501;
15502; RV64V-LABEL: masked_gather_widen_sew_negative_stride:
15503; RV64V:       # %bb.0:
15504; RV64V-NEXT:    addi a0, a0, 136
15505; RV64V-NEXT:    li a1, -136
15506; RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
15507; RV64V-NEXT:    vlse64.v v8, (a0), a1
15508; RV64V-NEXT:    ret
15509;
15510; RV32ZVE32F-LABEL: masked_gather_widen_sew_negative_stride:
15511; RV32ZVE32F:       # %bb.0:
15512; RV32ZVE32F-NEXT:    lui a1, 16393
15513; RV32ZVE32F-NEXT:    addi a1, a1, -888
15514; RV32ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
15515; RV32ZVE32F-NEXT:    vmv.s.x v9, a1
15516; RV32ZVE32F-NEXT:    vluxei8.v v8, (a0), v9
15517; RV32ZVE32F-NEXT:    ret
15518;
15519; RV64ZVE32F-LABEL: masked_gather_widen_sew_negative_stride:
15520; RV64ZVE32F:       # %bb.0:
15521; RV64ZVE32F-NEXT:    lw a1, 136(a0)
15522; RV64ZVE32F-NEXT:    lw a2, 140(a0)
15523; RV64ZVE32F-NEXT:    lw a3, 0(a0)
15524; RV64ZVE32F-NEXT:    lw a0, 4(a0)
15525; RV64ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
15526; RV64ZVE32F-NEXT:    vmv.v.x v8, a1
15527; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a2
15528; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a3
15529; RV64ZVE32F-NEXT:    vslide1down.vx v8, v8, a0
15530; RV64ZVE32F-NEXT:    ret
15531  %ptrs = getelementptr i32, ptr %base, <4 x i64> <i64 34, i64 35, i64 0, i64 1>
15532  %x = call <4 x i32> @llvm.masked.gather.v4i32.v32p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 true), <4 x i32> poison)
15533  ret <4 x i32> %x
15534}
15535
15536;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
15537; RV32V-ZVFH: {{.*}}
15538; RV32V-ZVFHMIN: {{.*}}
15539; RV32ZVE32F-ZVFH: {{.*}}
15540; RV32ZVE32F-ZVFHMIN: {{.*}}
15541; RV64: {{.*}}
15542; RV64V-ZVFH: {{.*}}
15543; RV64V-ZVFHMIN: {{.*}}
15544