xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll (revision dc72ec808d97a83fe9d3c1889302067cbee24c91)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64
6; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
7; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN
8; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
9; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN
10
11declare <4 x i1> @llvm.vp.merge.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32)
12
13define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 zeroext %evl) {
14; RV32-LABEL: vpmerge_vv_v4i1:
15; RV32:       # %bb.0:
16; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
17; RV32-NEXT:    vid.v v10
18; RV32-NEXT:    vmsltu.vx v10, v10, a0
19; RV32-NEXT:    vmand.mm v9, v9, v10
20; RV32-NEXT:    vmandn.mm v8, v8, v9
21; RV32-NEXT:    vmand.mm v9, v0, v9
22; RV32-NEXT:    vmor.mm v0, v9, v8
23; RV32-NEXT:    ret
24;
25; RV64-LABEL: vpmerge_vv_v4i1:
26; RV64:       # %bb.0:
27; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
28; RV64-NEXT:    vid.v v10
29; RV64-NEXT:    vmsltu.vx v12, v10, a0
30; RV64-NEXT:    vmand.mm v9, v9, v12
31; RV64-NEXT:    vmandn.mm v8, v8, v9
32; RV64-NEXT:    vmand.mm v9, v0, v9
33; RV64-NEXT:    vmor.mm v0, v9, v8
34; RV64-NEXT:    ret
35;
36; RV32ZVFHMIN-LABEL: vpmerge_vv_v4i1:
37; RV32ZVFHMIN:       # %bb.0:
38; RV32ZVFHMIN-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
39; RV32ZVFHMIN-NEXT:    vid.v v10
40; RV32ZVFHMIN-NEXT:    vmsltu.vx v10, v10, a0
41; RV32ZVFHMIN-NEXT:    vmand.mm v9, v9, v10
42; RV32ZVFHMIN-NEXT:    vmandn.mm v8, v8, v9
43; RV32ZVFHMIN-NEXT:    vmand.mm v9, v0, v9
44; RV32ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
45; RV32ZVFHMIN-NEXT:    ret
46;
47; RV64ZVFHMIN-LABEL: vpmerge_vv_v4i1:
48; RV64ZVFHMIN:       # %bb.0:
49; RV64ZVFHMIN-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
50; RV64ZVFHMIN-NEXT:    vid.v v10
51; RV64ZVFHMIN-NEXT:    vmsltu.vx v12, v10, a0
52; RV64ZVFHMIN-NEXT:    vmand.mm v9, v9, v12
53; RV64ZVFHMIN-NEXT:    vmandn.mm v8, v8, v9
54; RV64ZVFHMIN-NEXT:    vmand.mm v9, v0, v9
55; RV64ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
56; RV64ZVFHMIN-NEXT:    ret
57  %v = call <4 x i1> @llvm.vp.merge.v4i1(<4 x i1> %m, <4 x i1> %va, <4 x i1> %vb, i32 %evl)
58  ret <4 x i1> %v
59}
60
61define <8 x i1> @vpmerge_vv_v8i1(<8 x i1> %va, <8 x i1> %vb, <8 x i1> %m, i32 zeroext %evl) {
62; RV32-LABEL: vpmerge_vv_v8i1:
63; RV32:       # %bb.0:
64; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
65; RV32-NEXT:    vid.v v10
66; RV32-NEXT:    vmsltu.vx v12, v10, a0
67; RV32-NEXT:    vmand.mm v9, v9, v12
68; RV32-NEXT:    vmandn.mm v8, v8, v9
69; RV32-NEXT:    vmand.mm v9, v0, v9
70; RV32-NEXT:    vmor.mm v0, v9, v8
71; RV32-NEXT:    ret
72;
73; RV64-LABEL: vpmerge_vv_v8i1:
74; RV64:       # %bb.0:
75; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
76; RV64-NEXT:    vid.v v12
77; RV64-NEXT:    vmsltu.vx v10, v12, a0
78; RV64-NEXT:    vmand.mm v9, v9, v10
79; RV64-NEXT:    vmandn.mm v8, v8, v9
80; RV64-NEXT:    vmand.mm v9, v0, v9
81; RV64-NEXT:    vmor.mm v0, v9, v8
82; RV64-NEXT:    ret
83;
84; RV32ZVFHMIN-LABEL: vpmerge_vv_v8i1:
85; RV32ZVFHMIN:       # %bb.0:
86; RV32ZVFHMIN-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
87; RV32ZVFHMIN-NEXT:    vid.v v10
88; RV32ZVFHMIN-NEXT:    vmsltu.vx v12, v10, a0
89; RV32ZVFHMIN-NEXT:    vmand.mm v9, v9, v12
90; RV32ZVFHMIN-NEXT:    vmandn.mm v8, v8, v9
91; RV32ZVFHMIN-NEXT:    vmand.mm v9, v0, v9
92; RV32ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
93; RV32ZVFHMIN-NEXT:    ret
94;
95; RV64ZVFHMIN-LABEL: vpmerge_vv_v8i1:
96; RV64ZVFHMIN:       # %bb.0:
97; RV64ZVFHMIN-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
98; RV64ZVFHMIN-NEXT:    vid.v v12
99; RV64ZVFHMIN-NEXT:    vmsltu.vx v10, v12, a0
100; RV64ZVFHMIN-NEXT:    vmand.mm v9, v9, v10
101; RV64ZVFHMIN-NEXT:    vmandn.mm v8, v8, v9
102; RV64ZVFHMIN-NEXT:    vmand.mm v9, v0, v9
103; RV64ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
104; RV64ZVFHMIN-NEXT:    ret
105  %v = call <8 x i1> @llvm.vp.merge.v8i1(<8 x i1> %m, <8 x i1> %va, <8 x i1> %vb, i32 %evl)
106  ret <8 x i1> %v
107}
108
109define <16 x i1> @vpmerge_vv_v16i1(<16 x i1> %va, <16 x i1> %vb, <16 x i1> %m, i32 zeroext %evl) {
110; RV32-LABEL: vpmerge_vv_v16i1:
111; RV32:       # %bb.0:
112; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
113; RV32-NEXT:    vid.v v12
114; RV32-NEXT:    vmsltu.vx v10, v12, a0
115; RV32-NEXT:    vmand.mm v9, v9, v10
116; RV32-NEXT:    vmandn.mm v8, v8, v9
117; RV32-NEXT:    vmand.mm v9, v0, v9
118; RV32-NEXT:    vmor.mm v0, v9, v8
119; RV32-NEXT:    ret
120;
121; RV64-LABEL: vpmerge_vv_v16i1:
122; RV64:       # %bb.0:
123; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
124; RV64-NEXT:    vid.v v16
125; RV64-NEXT:    vmsltu.vx v10, v16, a0
126; RV64-NEXT:    vmand.mm v9, v9, v10
127; RV64-NEXT:    vmandn.mm v8, v8, v9
128; RV64-NEXT:    vmand.mm v9, v0, v9
129; RV64-NEXT:    vmor.mm v0, v9, v8
130; RV64-NEXT:    ret
131;
132; RV32ZVFHMIN-LABEL: vpmerge_vv_v16i1:
133; RV32ZVFHMIN:       # %bb.0:
134; RV32ZVFHMIN-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
135; RV32ZVFHMIN-NEXT:    vid.v v12
136; RV32ZVFHMIN-NEXT:    vmsltu.vx v10, v12, a0
137; RV32ZVFHMIN-NEXT:    vmand.mm v9, v9, v10
138; RV32ZVFHMIN-NEXT:    vmandn.mm v8, v8, v9
139; RV32ZVFHMIN-NEXT:    vmand.mm v9, v0, v9
140; RV32ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
141; RV32ZVFHMIN-NEXT:    ret
142;
143; RV64ZVFHMIN-LABEL: vpmerge_vv_v16i1:
144; RV64ZVFHMIN:       # %bb.0:
145; RV64ZVFHMIN-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
146; RV64ZVFHMIN-NEXT:    vid.v v16
147; RV64ZVFHMIN-NEXT:    vmsltu.vx v10, v16, a0
148; RV64ZVFHMIN-NEXT:    vmand.mm v9, v9, v10
149; RV64ZVFHMIN-NEXT:    vmandn.mm v8, v8, v9
150; RV64ZVFHMIN-NEXT:    vmand.mm v9, v0, v9
151; RV64ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
152; RV64ZVFHMIN-NEXT:    ret
153  %v = call <16 x i1> @llvm.vp.merge.v16i1(<16 x i1> %m, <16 x i1> %va, <16 x i1> %vb, i32 %evl)
154  ret <16 x i1> %v
155}
156
157define <32 x i1> @vpmerge_vv_v32i1(<32 x i1> %va, <32 x i1> %vb, <32 x i1> %m, i32 zeroext %evl) {
158; RV32-LABEL: vpmerge_vv_v32i1:
159; RV32:       # %bb.0:
160; RV32-NEXT:    li a1, 32
161; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
162; RV32-NEXT:    vid.v v16
163; RV32-NEXT:    vmsltu.vx v10, v16, a0
164; RV32-NEXT:    vmand.mm v9, v9, v10
165; RV32-NEXT:    vmandn.mm v8, v8, v9
166; RV32-NEXT:    vmand.mm v9, v0, v9
167; RV32-NEXT:    vmor.mm v0, v9, v8
168; RV32-NEXT:    ret
169;
170; RV64-LABEL: vpmerge_vv_v32i1:
171; RV64:       # %bb.0:
172; RV64-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
173; RV64-NEXT:    vmv.v.i v10, 0
174; RV64-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
175; RV64-NEXT:    vmerge.vim v12, v10, 1, v0
176; RV64-NEXT:    vmv1r.v v0, v8
177; RV64-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
178; RV64-NEXT:    vmerge.vim v10, v10, 1, v0
179; RV64-NEXT:    vmv1r.v v0, v9
180; RV64-NEXT:    vsetvli zero, a0, e8, m2, tu, ma
181; RV64-NEXT:    vmerge.vvm v10, v10, v12, v0
182; RV64-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
183; RV64-NEXT:    vmsne.vi v0, v10, 0
184; RV64-NEXT:    ret
185;
186; RV32ZVFHMIN-LABEL: vpmerge_vv_v32i1:
187; RV32ZVFHMIN:       # %bb.0:
188; RV32ZVFHMIN-NEXT:    li a1, 32
189; RV32ZVFHMIN-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
190; RV32ZVFHMIN-NEXT:    vid.v v16
191; RV32ZVFHMIN-NEXT:    vmsltu.vx v10, v16, a0
192; RV32ZVFHMIN-NEXT:    vmand.mm v9, v9, v10
193; RV32ZVFHMIN-NEXT:    vmandn.mm v8, v8, v9
194; RV32ZVFHMIN-NEXT:    vmand.mm v9, v0, v9
195; RV32ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
196; RV32ZVFHMIN-NEXT:    ret
197;
198; RV64ZVFHMIN-LABEL: vpmerge_vv_v32i1:
199; RV64ZVFHMIN:       # %bb.0:
200; RV64ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
201; RV64ZVFHMIN-NEXT:    vmv.v.i v10, 0
202; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
203; RV64ZVFHMIN-NEXT:    vmerge.vim v12, v10, 1, v0
204; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v8
205; RV64ZVFHMIN-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
206; RV64ZVFHMIN-NEXT:    vmerge.vim v10, v10, 1, v0
207; RV64ZVFHMIN-NEXT:    vmv1r.v v0, v9
208; RV64ZVFHMIN-NEXT:    vsetvli zero, a0, e8, m2, tu, ma
209; RV64ZVFHMIN-NEXT:    vmerge.vvm v10, v10, v12, v0
210; RV64ZVFHMIN-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
211; RV64ZVFHMIN-NEXT:    vmsne.vi v0, v10, 0
212; RV64ZVFHMIN-NEXT:    ret
213  %v = call <32 x i1> @llvm.vp.merge.v32i1(<32 x i1> %m, <32 x i1> %va, <32 x i1> %vb, i32 %evl)
214  ret <32 x i1> %v
215}
216
217define <64 x i1> @vpmerge_vv_v64i1(<64 x i1> %va, <64 x i1> %vb, <64 x i1> %m, i32 zeroext %evl) {
218; CHECK-LABEL: vpmerge_vv_v64i1:
219; CHECK:       # %bb.0:
220; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
221; CHECK-NEXT:    vmv.v.i v12, 0
222; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
223; CHECK-NEXT:    vmerge.vim v16, v12, 1, v0
224; CHECK-NEXT:    vmv1r.v v0, v8
225; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
226; CHECK-NEXT:    vmerge.vim v12, v12, 1, v0
227; CHECK-NEXT:    vmv1r.v v0, v9
228; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, ma
229; CHECK-NEXT:    vmerge.vvm v12, v12, v16, v0
230; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
231; CHECK-NEXT:    vmsne.vi v0, v12, 0
232; CHECK-NEXT:    ret
233  %v = call <64 x i1> @llvm.vp.merge.v64i1(<64 x i1> %m, <64 x i1> %va, <64 x i1> %vb, i32 %evl)
234  ret <64 x i1> %v
235}
236
237declare <2 x i8> @llvm.vp.merge.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
238
239define <2 x i8> @vpmerge_vv_v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) {
240; CHECK-LABEL: vpmerge_vv_v2i8:
241; CHECK:       # %bb.0:
242; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, ma
243; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
244; CHECK-NEXT:    vmv1r.v v8, v9
245; CHECK-NEXT:    ret
246  %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl)
247  ret <2 x i8> %v
248}
249
250define <2 x i8> @vpmerge_vx_v2i8(i8 %a, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) {
251; CHECK-LABEL: vpmerge_vx_v2i8:
252; CHECK:       # %bb.0:
253; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, ma
254; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
255; CHECK-NEXT:    ret
256  %elt.head = insertelement <2 x i8> poison, i8 %a, i32 0
257  %va = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
258  %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl)
259  ret <2 x i8> %v
260}
261
262define <2 x i8> @vpmerge_vi_v2i8(<2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) {
263; CHECK-LABEL: vpmerge_vi_v2i8:
264; CHECK:       # %bb.0:
265; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, ma
266; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
267; CHECK-NEXT:    ret
268  %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> splat (i8 2), <2 x i8> %vb, i32 %evl)
269  ret <2 x i8> %v
270}
271
272declare <4 x i8> @llvm.vp.merge.v4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32)
273
274define <4 x i8> @vpmerge_vv_v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) {
275; CHECK-LABEL: vpmerge_vv_v4i8:
276; CHECK:       # %bb.0:
277; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, ma
278; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
279; CHECK-NEXT:    vmv1r.v v8, v9
280; CHECK-NEXT:    ret
281  %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl)
282  ret <4 x i8> %v
283}
284
285define <4 x i8> @vpmerge_vx_v4i8(i8 %a, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) {
286; CHECK-LABEL: vpmerge_vx_v4i8:
287; CHECK:       # %bb.0:
288; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, ma
289; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
290; CHECK-NEXT:    ret
291  %elt.head = insertelement <4 x i8> poison, i8 %a, i32 0
292  %va = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
293  %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl)
294  ret <4 x i8> %v
295}
296
297define <4 x i8> @vpmerge_vi_v4i8(<4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) {
298; CHECK-LABEL: vpmerge_vi_v4i8:
299; CHECK:       # %bb.0:
300; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, ma
301; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
302; CHECK-NEXT:    ret
303  %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> splat (i8 2), <4 x i8> %vb, i32 %evl)
304  ret <4 x i8> %v
305}
306
307declare <6 x i8> @llvm.vp.merge.v6i8(<6 x i1>, <6 x i8>, <6 x i8>, i32)
308
309define <6 x i8> @vpmerge_vv_v6i8(<6 x i8> %va, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
310; CHECK-LABEL: vpmerge_vv_v6i8:
311; CHECK:       # %bb.0:
312; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
313; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
314; CHECK-NEXT:    vmv1r.v v8, v9
315; CHECK-NEXT:    ret
316  %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl)
317  ret <6 x i8> %v
318}
319
320define <6 x i8> @vpmerge_vx_v6i8(i8 %a, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
321; CHECK-LABEL: vpmerge_vx_v6i8:
322; CHECK:       # %bb.0:
323; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, ma
324; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
325; CHECK-NEXT:    ret
326  %elt.head = insertelement <6 x i8> poison, i8 %a, i32 0
327  %va = shufflevector <6 x i8> %elt.head, <6 x i8> poison, <6 x i32> zeroinitializer
328  %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl)
329  ret <6 x i8> %v
330}
331
332define <6 x i8> @vpmerge_vi_v6i8(<6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
333; CHECK-LABEL: vpmerge_vi_v6i8:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
336; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
337; CHECK-NEXT:    ret
338  %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> splat (i8 2), <6 x i8> %vb, i32 %evl)
339  ret <6 x i8> %v
340}
341
342declare <8 x i7> @llvm.vp.merge.v8i7(<8 x i1>, <8 x i7>, <8 x i7>, i32)
343
344define <8 x i7> @vpmerge_vv_v8i7(<8 x i7> %va, <8 x i7> %vb, <8 x i1> %m, i32 zeroext %evl) {
345; CHECK-LABEL: vpmerge_vv_v8i7:
346; CHECK:       # %bb.0:
347; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
348; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
349; CHECK-NEXT:    vmv1r.v v8, v9
350; CHECK-NEXT:    ret
351  %v = call <8 x i7> @llvm.vp.merge.v8i7(<8 x i1> %m, <8 x i7> %va, <8 x i7> %vb, i32 %evl)
352  ret <8 x i7> %v
353}
354
355define <8 x i7> @vpmerge_vx_v8i7(i7 %a, <8 x i7> %vb, <8 x i1> %m, i32 zeroext %evl) {
356; CHECK-LABEL: vpmerge_vx_v8i7:
357; CHECK:       # %bb.0:
358; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, ma
359; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
360; CHECK-NEXT:    ret
361  %elt.head = insertelement <8 x i7> poison, i7 %a, i32 0
362  %va = shufflevector <8 x i7> %elt.head, <8 x i7> poison, <8 x i32> zeroinitializer
363  %v = call <8 x i7> @llvm.vp.merge.v8i7(<8 x i1> %m, <8 x i7> %va, <8 x i7> %vb, i32 %evl)
364  ret <8 x i7> %v
365}
366
367define <8 x i7> @vpmerge_vi_v8i7(<8 x i7> %vb, <8 x i1> %m, i32 zeroext %evl) {
368; CHECK-LABEL: vpmerge_vi_v8i7:
369; CHECK:       # %bb.0:
370; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
371; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
372; CHECK-NEXT:    ret
373  %v = call <8 x i7> @llvm.vp.merge.v8i7(<8 x i1> %m, <8 x i7> splat (i7 2), <8 x i7> %vb, i32 %evl)
374  ret <8 x i7> %v
375}
376
377declare <8 x i8> @llvm.vp.merge.v8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32)
378
379define <8 x i8> @vpmerge_vv_v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) {
380; CHECK-LABEL: vpmerge_vv_v8i8:
381; CHECK:       # %bb.0:
382; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
383; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
384; CHECK-NEXT:    vmv1r.v v8, v9
385; CHECK-NEXT:    ret
386  %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl)
387  ret <8 x i8> %v
388}
389
390define <8 x i8> @vpmerge_vx_v8i8(i8 %a, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) {
391; CHECK-LABEL: vpmerge_vx_v8i8:
392; CHECK:       # %bb.0:
393; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, ma
394; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
395; CHECK-NEXT:    ret
396  %elt.head = insertelement <8 x i8> poison, i8 %a, i32 0
397  %va = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
398  %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl)
399  ret <8 x i8> %v
400}
401
402define <8 x i8> @vpmerge_vi_v8i8(<8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) {
403; CHECK-LABEL: vpmerge_vi_v8i8:
404; CHECK:       # %bb.0:
405; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
406; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
407; CHECK-NEXT:    ret
408  %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> splat (i8 2), <8 x i8> %vb, i32 %evl)
409  ret <8 x i8> %v
410}
411
412declare <16 x i8> @llvm.vp.merge.v16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32)
413
414define <16 x i8> @vpmerge_vv_v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) {
415; CHECK-LABEL: vpmerge_vv_v16i8:
416; CHECK:       # %bb.0:
417; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
418; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
419; CHECK-NEXT:    vmv1r.v v8, v9
420; CHECK-NEXT:    ret
421  %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl)
422  ret <16 x i8> %v
423}
424
425define <16 x i8> @vpmerge_vx_v16i8(i8 %a, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) {
426; CHECK-LABEL: vpmerge_vx_v16i8:
427; CHECK:       # %bb.0:
428; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
429; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
430; CHECK-NEXT:    ret
431  %elt.head = insertelement <16 x i8> poison, i8 %a, i32 0
432  %va = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
433  %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl)
434  ret <16 x i8> %v
435}
436
437define <16 x i8> @vpmerge_vi_v16i8(<16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) {
438; CHECK-LABEL: vpmerge_vi_v16i8:
439; CHECK:       # %bb.0:
440; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
441; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
442; CHECK-NEXT:    ret
443  %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> splat (i8 2), <16 x i8> %vb, i32 %evl)
444  ret <16 x i8> %v
445}
446
447declare <2 x i16> @llvm.vp.merge.v2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32)
448
449define <2 x i16> @vpmerge_vv_v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) {
450; CHECK-LABEL: vpmerge_vv_v2i16:
451; CHECK:       # %bb.0:
452; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
453; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
454; CHECK-NEXT:    vmv1r.v v8, v9
455; CHECK-NEXT:    ret
456  %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl)
457  ret <2 x i16> %v
458}
459
460define <2 x i16> @vpmerge_vx_v2i16(i16 %a, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) {
461; CHECK-LABEL: vpmerge_vx_v2i16:
462; CHECK:       # %bb.0:
463; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, ma
464; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
465; CHECK-NEXT:    ret
466  %elt.head = insertelement <2 x i16> poison, i16 %a, i32 0
467  %va = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
468  %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl)
469  ret <2 x i16> %v
470}
471
472define <2 x i16> @vpmerge_vi_v2i16(<2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) {
473; CHECK-LABEL: vpmerge_vi_v2i16:
474; CHECK:       # %bb.0:
475; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
476; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
477; CHECK-NEXT:    ret
478  %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> splat (i16 2), <2 x i16> %vb, i32 %evl)
479  ret <2 x i16> %v
480}
481
482declare <4 x i16> @llvm.vp.merge.v4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32)
483
484define <4 x i16> @vpmerge_vv_v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) {
485; CHECK-LABEL: vpmerge_vv_v4i16:
486; CHECK:       # %bb.0:
487; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, ma
488; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
489; CHECK-NEXT:    vmv1r.v v8, v9
490; CHECK-NEXT:    ret
491  %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl)
492  ret <4 x i16> %v
493}
494
495define <4 x i16> @vpmerge_vx_v4i16(i16 %a, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) {
496; CHECK-LABEL: vpmerge_vx_v4i16:
497; CHECK:       # %bb.0:
498; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, ma
499; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
500; CHECK-NEXT:    ret
501  %elt.head = insertelement <4 x i16> poison, i16 %a, i32 0
502  %va = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
503  %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl)
504  ret <4 x i16> %v
505}
506
507define <4 x i16> @vpmerge_vi_v4i16(<4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) {
508; CHECK-LABEL: vpmerge_vi_v4i16:
509; CHECK:       # %bb.0:
510; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, ma
511; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
512; CHECK-NEXT:    ret
513  %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> splat (i16 2), <4 x i16> %vb, i32 %evl)
514  ret <4 x i16> %v
515}
516
517declare <8 x i16> @llvm.vp.merge.v8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32)
518
519define <8 x i16> @vpmerge_vv_v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) {
520; CHECK-LABEL: vpmerge_vv_v8i16:
521; CHECK:       # %bb.0:
522; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
523; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
524; CHECK-NEXT:    vmv1r.v v8, v9
525; CHECK-NEXT:    ret
526  %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl)
527  ret <8 x i16> %v
528}
529
530define <8 x i16> @vpmerge_vx_v8i16(i16 %a, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) {
531; CHECK-LABEL: vpmerge_vx_v8i16:
532; CHECK:       # %bb.0:
533; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, ma
534; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
535; CHECK-NEXT:    ret
536  %elt.head = insertelement <8 x i16> poison, i16 %a, i32 0
537  %va = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
538  %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl)
539  ret <8 x i16> %v
540}
541
542define <8 x i16> @vpmerge_vi_v8i16(<8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) {
543; CHECK-LABEL: vpmerge_vi_v8i16:
544; CHECK:       # %bb.0:
545; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
546; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
547; CHECK-NEXT:    ret
548  %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> splat (i16 2), <8 x i16> %vb, i32 %evl)
549  ret <8 x i16> %v
550}
551
552declare <16 x i16> @llvm.vp.merge.v16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32)
553
554define <16 x i16> @vpmerge_vv_v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) {
555; CHECK-LABEL: vpmerge_vv_v16i16:
556; CHECK:       # %bb.0:
557; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, ma
558; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
559; CHECK-NEXT:    vmv2r.v v8, v10
560; CHECK-NEXT:    ret
561  %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl)
562  ret <16 x i16> %v
563}
564
565define <16 x i16> @vpmerge_vx_v16i16(i16 %a, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) {
566; CHECK-LABEL: vpmerge_vx_v16i16:
567; CHECK:       # %bb.0:
568; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, ma
569; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
570; CHECK-NEXT:    ret
571  %elt.head = insertelement <16 x i16> poison, i16 %a, i32 0
572  %va = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
573  %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl)
574  ret <16 x i16> %v
575}
576
577define <16 x i16> @vpmerge_vi_v16i16(<16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) {
578; CHECK-LABEL: vpmerge_vi_v16i16:
579; CHECK:       # %bb.0:
580; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, ma
581; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
582; CHECK-NEXT:    ret
583  %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> splat (i16 2), <16 x i16> %vb, i32 %evl)
584  ret <16 x i16> %v
585}
586
587declare <2 x i32> @llvm.vp.merge.v2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32)
588
589define <2 x i32> @vpmerge_vv_v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) {
590; CHECK-LABEL: vpmerge_vv_v2i32:
591; CHECK:       # %bb.0:
592; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, ma
593; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
594; CHECK-NEXT:    vmv1r.v v8, v9
595; CHECK-NEXT:    ret
596  %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl)
597  ret <2 x i32> %v
598}
599
600define <2 x i32> @vpmerge_vx_v2i32(i32 %a, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) {
601; CHECK-LABEL: vpmerge_vx_v2i32:
602; CHECK:       # %bb.0:
603; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, ma
604; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
605; CHECK-NEXT:    ret
606  %elt.head = insertelement <2 x i32> poison, i32 %a, i32 0
607  %va = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
608  %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl)
609  ret <2 x i32> %v
610}
611
612define <2 x i32> @vpmerge_vi_v2i32(<2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) {
613; CHECK-LABEL: vpmerge_vi_v2i32:
614; CHECK:       # %bb.0:
615; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, ma
616; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
617; CHECK-NEXT:    ret
618  %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> splat (i32 2), <2 x i32> %vb, i32 %evl)
619  ret <2 x i32> %v
620}
621
622declare <4 x i32> @llvm.vp.merge.v4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32)
623
624define <4 x i32> @vpmerge_vv_v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) {
625; CHECK-LABEL: vpmerge_vv_v4i32:
626; CHECK:       # %bb.0:
627; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
628; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
629; CHECK-NEXT:    vmv1r.v v8, v9
630; CHECK-NEXT:    ret
631  %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl)
632  ret <4 x i32> %v
633}
634
635define <4 x i32> @vpmerge_vx_v4i32(i32 %a, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) {
636; CHECK-LABEL: vpmerge_vx_v4i32:
637; CHECK:       # %bb.0:
638; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, ma
639; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
640; CHECK-NEXT:    ret
641  %elt.head = insertelement <4 x i32> poison, i32 %a, i32 0
642  %va = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
643  %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl)
644  ret <4 x i32> %v
645}
646
647define <4 x i32> @vpmerge_vi_v4i32(<4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) {
648; CHECK-LABEL: vpmerge_vi_v4i32:
649; CHECK:       # %bb.0:
650; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
651; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
652; CHECK-NEXT:    ret
653  %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> splat (i32 2), <4 x i32> %vb, i32 %evl)
654  ret <4 x i32> %v
655}
656
657declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32)
658
659define <8 x i32> @vpmerge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) {
660; CHECK-LABEL: vpmerge_vv_v8i32:
661; CHECK:       # %bb.0:
662; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
663; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
664; CHECK-NEXT:    vmv2r.v v8, v10
665; CHECK-NEXT:    ret
666  %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl)
667  ret <8 x i32> %v
668}
669
670define <8 x i32> @vpmerge_vx_v8i32(i32 %a, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) {
671; CHECK-LABEL: vpmerge_vx_v8i32:
672; CHECK:       # %bb.0:
673; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, ma
674; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
675; CHECK-NEXT:    ret
676  %elt.head = insertelement <8 x i32> poison, i32 %a, i32 0
677  %va = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
678  %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl)
679  ret <8 x i32> %v
680}
681
682define <8 x i32> @vpmerge_vi_v8i32(<8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) {
683; CHECK-LABEL: vpmerge_vi_v8i32:
684; CHECK:       # %bb.0:
685; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
686; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
687; CHECK-NEXT:    ret
688  %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> splat (i32 2), <8 x i32> %vb, i32 %evl)
689  ret <8 x i32> %v
690}
691
692declare <16 x i32> @llvm.vp.merge.v16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32)
693
694define <16 x i32> @vpmerge_vv_v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) {
695; CHECK-LABEL: vpmerge_vv_v16i32:
696; CHECK:       # %bb.0:
697; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, ma
698; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
699; CHECK-NEXT:    vmv4r.v v8, v12
700; CHECK-NEXT:    ret
701  %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl)
702  ret <16 x i32> %v
703}
704
705define <16 x i32> @vpmerge_vx_v16i32(i32 %a, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) {
706; CHECK-LABEL: vpmerge_vx_v16i32:
707; CHECK:       # %bb.0:
708; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, ma
709; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
710; CHECK-NEXT:    ret
711  %elt.head = insertelement <16 x i32> poison, i32 %a, i32 0
712  %va = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
713  %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl)
714  ret <16 x i32> %v
715}
716
717define <16 x i32> @vpmerge_vi_v16i32(<16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) {
718; CHECK-LABEL: vpmerge_vi_v16i32:
719; CHECK:       # %bb.0:
720; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, ma
721; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
722; CHECK-NEXT:    ret
723  %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> splat (i32 2), <16 x i32> %vb, i32 %evl)
724  ret <16 x i32> %v
725}
726
727declare <2 x i64> @llvm.vp.merge.v2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32)
728
729define <2 x i64> @vpmerge_vv_v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) {
730; CHECK-LABEL: vpmerge_vv_v2i64:
731; CHECK:       # %bb.0:
732; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, ma
733; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
734; CHECK-NEXT:    vmv1r.v v8, v9
735; CHECK-NEXT:    ret
736  %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl)
737  ret <2 x i64> %v
738}
739
740define <2 x i64> @vpmerge_vx_v2i64(i64 %a, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) {
741; RV32-LABEL: vpmerge_vx_v2i64:
742; RV32:       # %bb.0:
743; RV32-NEXT:    addi sp, sp, -16
744; RV32-NEXT:    .cfi_def_cfa_offset 16
745; RV32-NEXT:    sw a0, 8(sp)
746; RV32-NEXT:    sw a1, 12(sp)
747; RV32-NEXT:    addi a0, sp, 8
748; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
749; RV32-NEXT:    vlse64.v v9, (a0), zero
750; RV32-NEXT:    vsetvli zero, a2, e64, m1, tu, ma
751; RV32-NEXT:    vmerge.vvm v8, v8, v9, v0
752; RV32-NEXT:    addi sp, sp, 16
753; RV32-NEXT:    .cfi_def_cfa_offset 0
754; RV32-NEXT:    ret
755;
756; RV64-LABEL: vpmerge_vx_v2i64:
757; RV64:       # %bb.0:
758; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, ma
759; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
760; RV64-NEXT:    ret
761;
762; RV32ZVFHMIN-LABEL: vpmerge_vx_v2i64:
763; RV32ZVFHMIN:       # %bb.0:
764; RV32ZVFHMIN-NEXT:    addi sp, sp, -16
765; RV32ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
766; RV32ZVFHMIN-NEXT:    sw a0, 8(sp)
767; RV32ZVFHMIN-NEXT:    sw a1, 12(sp)
768; RV32ZVFHMIN-NEXT:    addi a0, sp, 8
769; RV32ZVFHMIN-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
770; RV32ZVFHMIN-NEXT:    vlse64.v v9, (a0), zero
771; RV32ZVFHMIN-NEXT:    vsetvli zero, a2, e64, m1, tu, ma
772; RV32ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v9, v0
773; RV32ZVFHMIN-NEXT:    addi sp, sp, 16
774; RV32ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
775; RV32ZVFHMIN-NEXT:    ret
776;
777; RV64ZVFHMIN-LABEL: vpmerge_vx_v2i64:
778; RV64ZVFHMIN:       # %bb.0:
779; RV64ZVFHMIN-NEXT:    vsetvli zero, a1, e64, m1, tu, ma
780; RV64ZVFHMIN-NEXT:    vmerge.vxm v8, v8, a0, v0
781; RV64ZVFHMIN-NEXT:    ret
782  %elt.head = insertelement <2 x i64> poison, i64 %a, i32 0
783  %va = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
784  %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl)
785  ret <2 x i64> %v
786}
787
788define <2 x i64> @vpmerge_vi_v2i64(<2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) {
789; CHECK-LABEL: vpmerge_vi_v2i64:
790; CHECK:       # %bb.0:
791; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, ma
792; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
793; CHECK-NEXT:    ret
794  %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> splat (i64 2), <2 x i64> %vb, i32 %evl)
795  ret <2 x i64> %v
796}
797
798declare <4 x i64> @llvm.vp.merge.v4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32)
799
800define <4 x i64> @vpmerge_vv_v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) {
801; CHECK-LABEL: vpmerge_vv_v4i64:
802; CHECK:       # %bb.0:
803; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, ma
804; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
805; CHECK-NEXT:    vmv2r.v v8, v10
806; CHECK-NEXT:    ret
807  %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl)
808  ret <4 x i64> %v
809}
810
811define <4 x i64> @vpmerge_vx_v4i64(i64 %a, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) {
812; RV32-LABEL: vpmerge_vx_v4i64:
813; RV32:       # %bb.0:
814; RV32-NEXT:    addi sp, sp, -16
815; RV32-NEXT:    .cfi_def_cfa_offset 16
816; RV32-NEXT:    sw a0, 8(sp)
817; RV32-NEXT:    sw a1, 12(sp)
818; RV32-NEXT:    addi a0, sp, 8
819; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
820; RV32-NEXT:    vlse64.v v10, (a0), zero
821; RV32-NEXT:    vsetvli zero, a2, e64, m2, tu, ma
822; RV32-NEXT:    vmerge.vvm v8, v8, v10, v0
823; RV32-NEXT:    addi sp, sp, 16
824; RV32-NEXT:    .cfi_def_cfa_offset 0
825; RV32-NEXT:    ret
826;
827; RV64-LABEL: vpmerge_vx_v4i64:
828; RV64:       # %bb.0:
829; RV64-NEXT:    vsetvli zero, a1, e64, m2, tu, ma
830; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
831; RV64-NEXT:    ret
832;
833; RV32ZVFHMIN-LABEL: vpmerge_vx_v4i64:
834; RV32ZVFHMIN:       # %bb.0:
835; RV32ZVFHMIN-NEXT:    addi sp, sp, -16
836; RV32ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
837; RV32ZVFHMIN-NEXT:    sw a0, 8(sp)
838; RV32ZVFHMIN-NEXT:    sw a1, 12(sp)
839; RV32ZVFHMIN-NEXT:    addi a0, sp, 8
840; RV32ZVFHMIN-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
841; RV32ZVFHMIN-NEXT:    vlse64.v v10, (a0), zero
842; RV32ZVFHMIN-NEXT:    vsetvli zero, a2, e64, m2, tu, ma
843; RV32ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
844; RV32ZVFHMIN-NEXT:    addi sp, sp, 16
845; RV32ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
846; RV32ZVFHMIN-NEXT:    ret
847;
848; RV64ZVFHMIN-LABEL: vpmerge_vx_v4i64:
849; RV64ZVFHMIN:       # %bb.0:
850; RV64ZVFHMIN-NEXT:    vsetvli zero, a1, e64, m2, tu, ma
851; RV64ZVFHMIN-NEXT:    vmerge.vxm v8, v8, a0, v0
852; RV64ZVFHMIN-NEXT:    ret
853  %elt.head = insertelement <4 x i64> poison, i64 %a, i32 0
854  %va = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
855  %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl)
856  ret <4 x i64> %v
857}
858
859define <4 x i64> @vpmerge_vi_v4i64(<4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) {
860; CHECK-LABEL: vpmerge_vi_v4i64:
861; CHECK:       # %bb.0:
862; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, ma
863; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
864; CHECK-NEXT:    ret
865  %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> splat (i64 2), <4 x i64> %vb, i32 %evl)
866  ret <4 x i64> %v
867}
868
869declare <8 x i64> @llvm.vp.merge.v8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32)
870
871define <8 x i64> @vpmerge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) {
872; CHECK-LABEL: vpmerge_vv_v8i64:
873; CHECK:       # %bb.0:
874; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, ma
875; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
876; CHECK-NEXT:    vmv4r.v v8, v12
877; CHECK-NEXT:    ret
878  %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl)
879  ret <8 x i64> %v
880}
881
882define <8 x i64> @vpmerge_vx_v8i64(i64 %a, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) {
883; RV32-LABEL: vpmerge_vx_v8i64:
884; RV32:       # %bb.0:
885; RV32-NEXT:    addi sp, sp, -16
886; RV32-NEXT:    .cfi_def_cfa_offset 16
887; RV32-NEXT:    sw a0, 8(sp)
888; RV32-NEXT:    sw a1, 12(sp)
889; RV32-NEXT:    addi a0, sp, 8
890; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
891; RV32-NEXT:    vlse64.v v12, (a0), zero
892; RV32-NEXT:    vsetvli zero, a2, e64, m4, tu, ma
893; RV32-NEXT:    vmerge.vvm v8, v8, v12, v0
894; RV32-NEXT:    addi sp, sp, 16
895; RV32-NEXT:    .cfi_def_cfa_offset 0
896; RV32-NEXT:    ret
897;
898; RV64-LABEL: vpmerge_vx_v8i64:
899; RV64:       # %bb.0:
900; RV64-NEXT:    vsetvli zero, a1, e64, m4, tu, ma
901; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
902; RV64-NEXT:    ret
903;
904; RV32ZVFHMIN-LABEL: vpmerge_vx_v8i64:
905; RV32ZVFHMIN:       # %bb.0:
906; RV32ZVFHMIN-NEXT:    addi sp, sp, -16
907; RV32ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
908; RV32ZVFHMIN-NEXT:    sw a0, 8(sp)
909; RV32ZVFHMIN-NEXT:    sw a1, 12(sp)
910; RV32ZVFHMIN-NEXT:    addi a0, sp, 8
911; RV32ZVFHMIN-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
912; RV32ZVFHMIN-NEXT:    vlse64.v v12, (a0), zero
913; RV32ZVFHMIN-NEXT:    vsetvli zero, a2, e64, m4, tu, ma
914; RV32ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v12, v0
915; RV32ZVFHMIN-NEXT:    addi sp, sp, 16
916; RV32ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
917; RV32ZVFHMIN-NEXT:    ret
918;
919; RV64ZVFHMIN-LABEL: vpmerge_vx_v8i64:
920; RV64ZVFHMIN:       # %bb.0:
921; RV64ZVFHMIN-NEXT:    vsetvli zero, a1, e64, m4, tu, ma
922; RV64ZVFHMIN-NEXT:    vmerge.vxm v8, v8, a0, v0
923; RV64ZVFHMIN-NEXT:    ret
924  %elt.head = insertelement <8 x i64> poison, i64 %a, i32 0
925  %va = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
926  %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl)
927  ret <8 x i64> %v
928}
929
930define <8 x i64> @vpmerge_vi_v8i64(<8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) {
931; CHECK-LABEL: vpmerge_vi_v8i64:
932; CHECK:       # %bb.0:
933; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, ma
934; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
935; CHECK-NEXT:    ret
936  %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> splat (i64 2), <8 x i64> %vb, i32 %evl)
937  ret <8 x i64> %v
938}
939
940declare <16 x i64> @llvm.vp.merge.v16i64(<16 x i1>, <16 x i64>, <16 x i64>, i32)
941
942define <16 x i64> @vpmerge_vv_v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) {
943; CHECK-LABEL: vpmerge_vv_v16i64:
944; CHECK:       # %bb.0:
945; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, ma
946; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
947; CHECK-NEXT:    vmv8r.v v8, v16
948; CHECK-NEXT:    ret
949  %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl)
950  ret <16 x i64> %v
951}
952
953define <16 x i64> @vpmerge_vx_v16i64(i64 %a, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) {
954; RV32-LABEL: vpmerge_vx_v16i64:
955; RV32:       # %bb.0:
956; RV32-NEXT:    addi sp, sp, -16
957; RV32-NEXT:    .cfi_def_cfa_offset 16
958; RV32-NEXT:    sw a0, 8(sp)
959; RV32-NEXT:    sw a1, 12(sp)
960; RV32-NEXT:    addi a0, sp, 8
961; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
962; RV32-NEXT:    vlse64.v v16, (a0), zero
963; RV32-NEXT:    vsetvli zero, a2, e64, m8, tu, ma
964; RV32-NEXT:    vmerge.vvm v8, v8, v16, v0
965; RV32-NEXT:    addi sp, sp, 16
966; RV32-NEXT:    .cfi_def_cfa_offset 0
967; RV32-NEXT:    ret
968;
969; RV64-LABEL: vpmerge_vx_v16i64:
970; RV64:       # %bb.0:
971; RV64-NEXT:    vsetvli zero, a1, e64, m8, tu, ma
972; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
973; RV64-NEXT:    ret
974;
975; RV32ZVFHMIN-LABEL: vpmerge_vx_v16i64:
976; RV32ZVFHMIN:       # %bb.0:
977; RV32ZVFHMIN-NEXT:    addi sp, sp, -16
978; RV32ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
979; RV32ZVFHMIN-NEXT:    sw a0, 8(sp)
980; RV32ZVFHMIN-NEXT:    sw a1, 12(sp)
981; RV32ZVFHMIN-NEXT:    addi a0, sp, 8
982; RV32ZVFHMIN-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
983; RV32ZVFHMIN-NEXT:    vlse64.v v16, (a0), zero
984; RV32ZVFHMIN-NEXT:    vsetvli zero, a2, e64, m8, tu, ma
985; RV32ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v16, v0
986; RV32ZVFHMIN-NEXT:    addi sp, sp, 16
987; RV32ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
988; RV32ZVFHMIN-NEXT:    ret
989;
990; RV64ZVFHMIN-LABEL: vpmerge_vx_v16i64:
991; RV64ZVFHMIN:       # %bb.0:
992; RV64ZVFHMIN-NEXT:    vsetvli zero, a1, e64, m8, tu, ma
993; RV64ZVFHMIN-NEXT:    vmerge.vxm v8, v8, a0, v0
994; RV64ZVFHMIN-NEXT:    ret
995  %elt.head = insertelement <16 x i64> poison, i64 %a, i32 0
996  %va = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
997  %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl)
998  ret <16 x i64> %v
999}
1000
1001define <16 x i64> @vpmerge_vi_v16i64(<16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) {
1002; CHECK-LABEL: vpmerge_vi_v16i64:
1003; CHECK:       # %bb.0:
1004; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, ma
1005; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
1006; CHECK-NEXT:    ret
1007  %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> splat (i64 2), <16 x i64> %vb, i32 %evl)
1008  ret <16 x i64> %v
1009}
1010
1011declare <2 x half> @llvm.vp.merge.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32)
1012
1013define <2 x half> @vpmerge_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) {
1014; CHECK-LABEL: vpmerge_vv_v2f16:
1015; CHECK:       # %bb.0:
1016; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
1017; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
1018; CHECK-NEXT:    vmv1r.v v8, v9
1019; CHECK-NEXT:    ret
1020  %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl)
1021  ret <2 x half> %v
1022}
1023
1024define <2 x half> @vpmerge_vf_v2f16(half %a, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) {
1025; ZVFH-LABEL: vpmerge_vf_v2f16:
1026; ZVFH:       # %bb.0:
1027; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
1028; ZVFH-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1029; ZVFH-NEXT:    ret
1030;
1031; ZVFHMIN-LABEL: vpmerge_vf_v2f16:
1032; ZVFHMIN:       # %bb.0:
1033; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
1034; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
1035; ZVFHMIN-NEXT:    vmv.v.x v9, a1
1036; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
1037; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v9, v0
1038; ZVFHMIN-NEXT:    ret
1039  %elt.head = insertelement <2 x half> poison, half %a, i32 0
1040  %va = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
1041  %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl)
1042  ret <2 x half> %v
1043}
1044
1045declare <4 x half> @llvm.vp.merge.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32)
1046
1047define <4 x half> @vpmerge_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) {
1048; CHECK-LABEL: vpmerge_vv_v4f16:
1049; CHECK:       # %bb.0:
1050; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, ma
1051; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
1052; CHECK-NEXT:    vmv1r.v v8, v9
1053; CHECK-NEXT:    ret
1054  %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl)
1055  ret <4 x half> %v
1056}
1057
1058define <4 x half> @vpmerge_vf_v4f16(half %a, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) {
1059; ZVFH-LABEL: vpmerge_vf_v4f16:
1060; ZVFH:       # %bb.0:
1061; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, tu, ma
1062; ZVFH-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1063; ZVFH-NEXT:    ret
1064;
1065; ZVFHMIN-LABEL: vpmerge_vf_v4f16:
1066; ZVFHMIN:       # %bb.0:
1067; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
1068; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
1069; ZVFHMIN-NEXT:    vmv.v.x v9, a1
1070; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, tu, ma
1071; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v9, v0
1072; ZVFHMIN-NEXT:    ret
1073  %elt.head = insertelement <4 x half> poison, half %a, i32 0
1074  %va = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
1075  %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl)
1076  ret <4 x half> %v
1077}
1078
1079declare <8 x half> @llvm.vp.merge.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32)
1080
1081define <8 x half> @vpmerge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) {
1082; CHECK-LABEL: vpmerge_vv_v8f16:
1083; CHECK:       # %bb.0:
1084; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
1085; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
1086; CHECK-NEXT:    vmv1r.v v8, v9
1087; CHECK-NEXT:    ret
1088  %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl)
1089  ret <8 x half> %v
1090}
1091
1092define <8 x half> @vpmerge_vf_v8f16(half %a, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) {
1093; ZVFH-LABEL: vpmerge_vf_v8f16:
1094; ZVFH:       # %bb.0:
1095; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
1096; ZVFH-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1097; ZVFH-NEXT:    ret
1098;
1099; ZVFHMIN-LABEL: vpmerge_vf_v8f16:
1100; ZVFHMIN:       # %bb.0:
1101; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
1102; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1103; ZVFHMIN-NEXT:    vmv.v.x v9, a1
1104; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
1105; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v9, v0
1106; ZVFHMIN-NEXT:    ret
1107  %elt.head = insertelement <8 x half> poison, half %a, i32 0
1108  %va = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
1109  %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl)
1110  ret <8 x half> %v
1111}
1112
1113declare <16 x half> @llvm.vp.merge.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32)
1114
1115define <16 x half> @vpmerge_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) {
1116; CHECK-LABEL: vpmerge_vv_v16f16:
1117; CHECK:       # %bb.0:
1118; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, ma
1119; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
1120; CHECK-NEXT:    vmv2r.v v8, v10
1121; CHECK-NEXT:    ret
1122  %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl)
1123  ret <16 x half> %v
1124}
1125
1126define <16 x half> @vpmerge_vf_v16f16(half %a, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) {
1127; ZVFH-LABEL: vpmerge_vf_v16f16:
1128; ZVFH:       # %bb.0:
1129; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, tu, ma
1130; ZVFH-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1131; ZVFH-NEXT:    ret
1132;
1133; ZVFHMIN-LABEL: vpmerge_vf_v16f16:
1134; ZVFHMIN:       # %bb.0:
1135; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
1136; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1137; ZVFHMIN-NEXT:    vmv.v.x v10, a1
1138; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, tu, ma
1139; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
1140; ZVFHMIN-NEXT:    ret
1141  %elt.head = insertelement <16 x half> poison, half %a, i32 0
1142  %va = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
1143  %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl)
1144  ret <16 x half> %v
1145}
1146
1147declare <2 x float> @llvm.vp.merge.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32)
1148
1149define <2 x float> @vpmerge_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) {
1150; CHECK-LABEL: vpmerge_vv_v2f32:
1151; CHECK:       # %bb.0:
1152; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, ma
1153; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
1154; CHECK-NEXT:    vmv1r.v v8, v9
1155; CHECK-NEXT:    ret
1156  %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl)
1157  ret <2 x float> %v
1158}
1159
1160define <2 x float> @vpmerge_vf_v2f32(float %a, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) {
1161; CHECK-LABEL: vpmerge_vf_v2f32:
1162; CHECK:       # %bb.0:
1163; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, ma
1164; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1165; CHECK-NEXT:    ret
1166  %elt.head = insertelement <2 x float> poison, float %a, i32 0
1167  %va = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
1168  %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl)
1169  ret <2 x float> %v
1170}
1171
1172declare <4 x float> @llvm.vp.merge.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32)
1173
1174define <4 x float> @vpmerge_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
1175; CHECK-LABEL: vpmerge_vv_v4f32:
1176; CHECK:       # %bb.0:
1177; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
1178; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
1179; CHECK-NEXT:    vmv1r.v v8, v9
1180; CHECK-NEXT:    ret
1181  %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl)
1182  ret <4 x float> %v
1183}
1184
1185define <4 x float> @vpmerge_vf_v4f32(float %a, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
1186; CHECK-LABEL: vpmerge_vf_v4f32:
1187; CHECK:       # %bb.0:
1188; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
1189; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1190; CHECK-NEXT:    ret
1191  %elt.head = insertelement <4 x float> poison, float %a, i32 0
1192  %va = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
1193  %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl)
1194  ret <4 x float> %v
1195}
1196
1197declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32)
1198
1199define <8 x float> @vpmerge_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
1200; CHECK-LABEL: vpmerge_vv_v8f32:
1201; CHECK:       # %bb.0:
1202; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
1203; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
1204; CHECK-NEXT:    vmv2r.v v8, v10
1205; CHECK-NEXT:    ret
1206  %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl)
1207  ret <8 x float> %v
1208}
1209
1210define <8 x float> @vpmerge_vf_v8f32(float %a, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
1211; CHECK-LABEL: vpmerge_vf_v8f32:
1212; CHECK:       # %bb.0:
1213; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
1214; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1215; CHECK-NEXT:    ret
1216  %elt.head = insertelement <8 x float> poison, float %a, i32 0
1217  %va = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
1218  %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl)
1219  ret <8 x float> %v
1220}
1221
1222declare <16 x float> @llvm.vp.merge.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32)
1223
1224define <16 x float> @vpmerge_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) {
1225; CHECK-LABEL: vpmerge_vv_v16f32:
1226; CHECK:       # %bb.0:
1227; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, ma
1228; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
1229; CHECK-NEXT:    vmv4r.v v8, v12
1230; CHECK-NEXT:    ret
1231  %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl)
1232  ret <16 x float> %v
1233}
1234
1235define <16 x float> @vpmerge_vf_v16f32(float %a, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) {
1236; CHECK-LABEL: vpmerge_vf_v16f32:
1237; CHECK:       # %bb.0:
1238; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, ma
1239; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1240; CHECK-NEXT:    ret
1241  %elt.head = insertelement <16 x float> poison, float %a, i32 0
1242  %va = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
1243  %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl)
1244  ret <16 x float> %v
1245}
1246
1247declare <2 x double> @llvm.vp.merge.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32)
1248
1249define <2 x double> @vpmerge_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) {
1250; CHECK-LABEL: vpmerge_vv_v2f64:
1251; CHECK:       # %bb.0:
1252; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, ma
1253; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
1254; CHECK-NEXT:    vmv1r.v v8, v9
1255; CHECK-NEXT:    ret
1256  %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl)
1257  ret <2 x double> %v
1258}
1259
1260define <2 x double> @vpmerge_vf_v2f64(double %a, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) {
1261; CHECK-LABEL: vpmerge_vf_v2f64:
1262; CHECK:       # %bb.0:
1263; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, ma
1264; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1265; CHECK-NEXT:    ret
1266  %elt.head = insertelement <2 x double> poison, double %a, i32 0
1267  %va = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
1268  %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl)
1269  ret <2 x double> %v
1270}
1271
1272declare <4 x double> @llvm.vp.merge.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32)
1273
1274define <4 x double> @vpmerge_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) {
1275; CHECK-LABEL: vpmerge_vv_v4f64:
1276; CHECK:       # %bb.0:
1277; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, ma
1278; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
1279; CHECK-NEXT:    vmv2r.v v8, v10
1280; CHECK-NEXT:    ret
1281  %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl)
1282  ret <4 x double> %v
1283}
1284
1285define <4 x double> @vpmerge_vf_v4f64(double %a, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) {
1286; CHECK-LABEL: vpmerge_vf_v4f64:
1287; CHECK:       # %bb.0:
1288; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, ma
1289; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1290; CHECK-NEXT:    ret
1291  %elt.head = insertelement <4 x double> poison, double %a, i32 0
1292  %va = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
1293  %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl)
1294  ret <4 x double> %v
1295}
1296
1297declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32)
1298
1299define <8 x double> @vpmerge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) {
1300; CHECK-LABEL: vpmerge_vv_v8f64:
1301; CHECK:       # %bb.0:
1302; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, ma
1303; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
1304; CHECK-NEXT:    vmv4r.v v8, v12
1305; CHECK-NEXT:    ret
1306  %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl)
1307  ret <8 x double> %v
1308}
1309
1310define <8 x double> @vpmerge_vf_v8f64(double %a, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) {
1311; CHECK-LABEL: vpmerge_vf_v8f64:
1312; CHECK:       # %bb.0:
1313; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, ma
1314; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1315; CHECK-NEXT:    ret
1316  %elt.head = insertelement <8 x double> poison, double %a, i32 0
1317  %va = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
1318  %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl)
1319  ret <8 x double> %v
1320}
1321
1322declare <16 x double> @llvm.vp.merge.v16f64(<16 x i1>, <16 x double>, <16 x double>, i32)
1323
1324define <16 x double> @vpmerge_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) {
1325; CHECK-LABEL: vpmerge_vv_v16f64:
1326; CHECK:       # %bb.0:
1327; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, ma
1328; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
1329; CHECK-NEXT:    vmv8r.v v8, v16
1330; CHECK-NEXT:    ret
1331  %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl)
1332  ret <16 x double> %v
1333}
1334
1335define <16 x double> @vpmerge_vf_v16f64(double %a, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) {
1336; CHECK-LABEL: vpmerge_vf_v16f64:
1337; CHECK:       # %bb.0:
1338; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, ma
1339; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1340; CHECK-NEXT:    ret
1341  %elt.head = insertelement <16 x double> poison, double %a, i32 0
1342  %va = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
1343  %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl)
1344  ret <16 x double> %v
1345}
1346
1347declare <32 x double> @llvm.vp.merge.v32f64(<32 x i1>, <32 x double>, <32 x double>, i32)
1348
1349define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) {
1350; CHECK-LABEL: vpmerge_vv_v32f64:
1351; CHECK:       # %bb.0:
1352; CHECK-NEXT:    addi sp, sp, -16
1353; CHECK-NEXT:    .cfi_def_cfa_offset 16
1354; CHECK-NEXT:    csrr a1, vlenb
1355; CHECK-NEXT:    slli a1, a1, 3
1356; CHECK-NEXT:    sub sp, sp, a1
1357; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1358; CHECK-NEXT:    addi a1, sp, 16
1359; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
1360; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1361; CHECK-NEXT:    vmv8r.v v16, v8
1362; CHECK-NEXT:    addi a1, a0, 128
1363; CHECK-NEXT:    vle64.v v24, (a1)
1364; CHECK-NEXT:    vle64.v v8, (a0)
1365; CHECK-NEXT:    li a1, 16
1366; CHECK-NEXT:    mv a0, a2
1367; CHECK-NEXT:    bltu a2, a1, .LBB83_2
1368; CHECK-NEXT:  # %bb.1:
1369; CHECK-NEXT:    li a0, 16
1370; CHECK-NEXT:  .LBB83_2:
1371; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, ma
1372; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
1373; CHECK-NEXT:    addi a0, a2, -16
1374; CHECK-NEXT:    sltu a1, a2, a0
1375; CHECK-NEXT:    addi a1, a1, -1
1376; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1377; CHECK-NEXT:    vslidedown.vi v0, v0, 2
1378; CHECK-NEXT:    and a0, a1, a0
1379; CHECK-NEXT:    addi a1, sp, 16
1380; CHECK-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
1381; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, ma
1382; CHECK-NEXT:    vmerge.vvm v24, v24, v16, v0
1383; CHECK-NEXT:    vmv8r.v v16, v24
1384; CHECK-NEXT:    csrr a0, vlenb
1385; CHECK-NEXT:    slli a0, a0, 3
1386; CHECK-NEXT:    add sp, sp, a0
1387; CHECK-NEXT:    .cfi_def_cfa sp, 16
1388; CHECK-NEXT:    addi sp, sp, 16
1389; CHECK-NEXT:    .cfi_def_cfa_offset 0
1390; CHECK-NEXT:    ret
1391  %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
1392  ret <32 x double> %v
1393}
1394
1395define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) {
1396; CHECK-LABEL: vpmerge_vf_v32f64:
1397; CHECK:       # %bb.0:
1398; CHECK-NEXT:    li a2, 16
1399; CHECK-NEXT:    mv a1, a0
1400; CHECK-NEXT:    bltu a0, a2, .LBB84_2
1401; CHECK-NEXT:  # %bb.1:
1402; CHECK-NEXT:    li a1, 16
1403; CHECK-NEXT:  .LBB84_2:
1404; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, ma
1405; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
1406; CHECK-NEXT:    addi a1, a0, -16
1407; CHECK-NEXT:    sltu a0, a0, a1
1408; CHECK-NEXT:    addi a0, a0, -1
1409; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1410; CHECK-NEXT:    vslidedown.vi v0, v0, 2
1411; CHECK-NEXT:    and a0, a0, a1
1412; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, ma
1413; CHECK-NEXT:    vfmerge.vfm v16, v16, fa0, v0
1414; CHECK-NEXT:    ret
1415  %elt.head = insertelement <32 x double> poison, double %a, i32 0
1416  %va = shufflevector <32 x double> %elt.head, <32 x double> poison, <32 x i32> zeroinitializer
1417  %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
1418  ret <32 x double> %v
1419}
1420