xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
3
4declare <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
5declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
6declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
7declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
8
9; Test binary operator with vp.merge and vp.smax.
10declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
11define <vscale x 2 x i32> @vpmerge_vpadd(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
12; CHECK-LABEL: vpmerge_vpadd:
13; CHECK:       # %bb.0:
14; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
15; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
16; CHECK-NEXT:    ret
17  %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
18  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
19  ret <vscale x 2 x i32> %b
20}
21
22; Test glued node of merge should not be deleted.
23declare <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, metadata, <vscale x 2 x i1>, i32)
24define <vscale x 2 x i32> @vpmerge_vpadd2(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
25; CHECK-LABEL: vpmerge_vpadd2:
26; CHECK:       # %bb.0:
27; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
28; CHECK-NEXT:    vmseq.vv v0, v9, v10
29; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, mu
30; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
31; CHECK-NEXT:    ret
32  %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
33  %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl)
34  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
35  ret <vscale x 2 x i32> %b
36}
37
38; Test vp.merge has all-ones mask.
39define <vscale x 2 x i32> @vpmerge_vpadd3(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
40; CHECK-LABEL: vpmerge_vpadd3:
41; CHECK:       # %bb.0:
42; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
43; CHECK-NEXT:    vadd.vv v8, v9, v10
44; CHECK-NEXT:    ret
45  %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
46  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
47  ret <vscale x 2 x i32> %b
48}
49
50; Test float binary operator with vp.merge and vp.fadd.
51declare <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
52define <vscale x 2 x float> @vpmerge_vpfadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
53; CHECK-LABEL: vpmerge_vpfadd:
54; CHECK:       # %bb.0:
55; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
56; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
57; CHECK-NEXT:    ret
58  %a = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
59  %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
60  ret <vscale x 2 x float> %b
61}
62
63; Test for binary operator with specific EEW by riscv.vrgatherei16.
64declare <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i16>, i64)
65define <vscale x 2 x i32> @vpmerge_vrgatherei16(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
66; CHECK-LABEL: vpmerge_vrgatherei16:
67; CHECK:       # %bb.0:
68; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
69; CHECK-NEXT:    vrgatherei16.vv v8, v9, v10
70; CHECK-NEXT:    ret
71  %1 = zext i32 %vl to i64
72  %2 = tail call <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, i64 %1)
73  %3 = tail call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %2, <vscale x 2 x i32> %passthru, i32 %vl)
74  ret <vscale x 2 x i32> %2
75}
76
77; Test conversion by fptosi.
78declare <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
79define <vscale x 2 x i16> @vpmerge_vpfptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
80; CHECK-LABEL: vpmerge_vpfptosi:
81; CHECK:       # %bb.0:
82; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
83; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v9, v0.t
84; CHECK-NEXT:    ret
85  %a = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
86  %b = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl)
87  ret <vscale x 2 x i16> %b
88}
89
90; Test conversion by sitofp.
91declare <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
92define <vscale x 2 x float> @vpmerge_vpsitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
93; CHECK-LABEL: vpmerge_vpsitofp:
94; CHECK:       # %bb.0:
95; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
96; CHECK-NEXT:    vfncvt.f.x.w v8, v10, v0.t
97; CHECK-NEXT:    ret
98  %a = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
99  %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
100  ret <vscale x 2 x float> %b
101}
102
103; Test integer extension by vp.zext.
104declare <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
105define <vscale x 2 x i32> @vpmerge_vpzext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
106; CHECK-LABEL: vpmerge_vpzext:
107; CHECK:       # %bb.0:
108; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
109; CHECK-NEXT:    vzext.vf4 v8, v9, v0.t
110; CHECK-NEXT:    ret
111  %a = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
112  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
113  ret <vscale x 2 x i32> %b
114}
115
116; Test integer truncation by vp.trunc.
117declare <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
118define <vscale x 2 x i32> @vpmerge_vptrunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
119; CHECK-LABEL: vpmerge_vptrunc:
120; CHECK:       # %bb.0:
121; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
122; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
123; CHECK-NEXT:    ret
124  %a = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
125  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
126  ret <vscale x 2 x i32> %b
127}
128
129; Test integer extension by vp.fpext.
130declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
131define <vscale x 2 x double> @vpmerge_vpfpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
132; CHECK-LABEL: vpmerge_vpfpext:
133; CHECK:       # %bb.0:
134; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
135; CHECK-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
136; CHECK-NEXT:    ret
137  %a = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
138  %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
139  ret <vscale x 2 x double> %b
140}
141
142; Test integer truncation by vp.trunc.
143declare <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
144define <vscale x 2 x float> @vpmerge_vpfptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
145; CHECK-LABEL: vpmerge_vpfptrunc:
146; CHECK:       # %bb.0:
147; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
148; CHECK-NEXT:    vfncvt.f.f.w v8, v10, v0.t
149; CHECK-NEXT:    ret
150  %a = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
151  %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
152  ret <vscale x 2 x float> %b
153}
154
155; Test load operation by vp.load.
156declare <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr, <vscale x 2 x i1>, i32)
157define <vscale x 2 x i32> @vpmerge_vpload(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
158; CHECK-LABEL: vpmerge_vpload:
159; CHECK:       # %bb.0:
160; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
161; CHECK-NEXT:    vle32.v v8, (a0), v0.t
162; CHECK-NEXT:    ret
163  %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
164  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
165  ret <vscale x 2 x i32> %b
166}
167
168; Test result has chain and glued node.
169define <vscale x 2 x i32> @vpmerge_vpload2(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
170; CHECK-LABEL: vpmerge_vpload2:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
173; CHECK-NEXT:    vmseq.vv v0, v9, v10
174; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, mu
175; CHECK-NEXT:    vle32.v v8, (a0), v0.t
176; CHECK-NEXT:    ret
177  %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
178  %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl)
179  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
180  ret <vscale x 2 x i32> %b
181}
182
183; Test result has chain output of true operand of merge.vvm.
184define void @vpmerge_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
185; CHECK-LABEL: vpmerge_vpload_store:
186; CHECK:       # %bb.0:
187; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
188; CHECK-NEXT:    vle32.v v8, (a0), v0.t
189; CHECK-NEXT:    vs1r.v v8, (a0)
190; CHECK-NEXT:    ret
191  %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
192  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
193  store <vscale x 2 x i32> %b, ptr %p
194  ret void
195}
196
197declare { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32>, ptr, i64)
198define <vscale x 2 x i32> @vpmerge_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
199; CHECK-LABEL: vpmerge_vleff:
200; CHECK:       # %bb.0:
201; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
202; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
203; CHECK-NEXT:    ret
204  %1 = zext i32 %vl to i64
205  %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1)
206  %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0
207  %c = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %b, <vscale x 2 x i32> %passthru, i32 %vl)
208  ret <vscale x 2 x i32> %c
209}
210
211; Test strided load by riscv.vlse
212declare <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32>, ptr, i64, i64)
213define <vscale x 2 x i32> @vpmerge_vlse(<vscale x 2 x i32> %passthru,  ptr %p, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) {
214; CHECK-LABEL: vpmerge_vlse:
215; CHECK:       # %bb.0:
216; CHECK-NEXT:    vsetvli zero, a2, e32, m1, tu, mu
217; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
218; CHECK-NEXT:    ret
219  %1 = zext i32 %vl to i64
220  %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %s, i64 %1)
221  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
222  ret <vscale x 2 x i32> %b
223}
224
225; Test indexed load by riscv.vluxei
226declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32>, ptr, <vscale x 2 x i64>, i64)
227define <vscale x 2 x i32> @vpmerge_vluxei(<vscale x 2 x i32> %passthru,  ptr %p, <vscale x 2 x i64> %idx, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) {
228; CHECK-LABEL: vpmerge_vluxei:
229; CHECK:       # %bb.0:
230; CHECK-NEXT:    vsetvli zero, a2, e32, m1, tu, mu
231; CHECK-NEXT:    vluxei64.v v8, (a0), v10, v0.t
232; CHECK-NEXT:    ret
233  %1 = zext i32 %vl to i64
234  %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, ptr %p, <vscale x 2 x i64> %idx, i64 %1)
235  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
236  ret <vscale x 2 x i32> %b
237}
238
239; Test vector index by riscv.vid
240declare <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32>, i64)
241define <vscale x 2 x i32> @vpmerge_vid(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, i32 zeroext %vl) {
242; CHECK-LABEL: vpmerge_vid:
243; CHECK:       # %bb.0:
244; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
245; CHECK-NEXT:    vid.v v8, v0.t
246; CHECK-NEXT:    ret
247  %1 = zext i32 %vl to i64
248  %a = call <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32> undef, i64 %1)
249  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
250  ret <vscale x 2 x i32> %b
251}
252
253; Test not combine VIOTA_M and VMERGE_VVM without true mask.
254declare <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i64)
255define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
256; CHECK-LABEL: vpmerge_viota:
257; CHECK:       # %bb.0:
258; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
259; CHECK-NEXT:    viota.m v10, v9
260; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
261; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
262; CHECK-NEXT:    ret
263  %1 = zext i32 %vl to i64
264  %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
265  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
266  ret <vscale x 2 x i32> %b
267}
268
269; Test combine VIOTA_M and VMERGE_VVM with true mask.
270define <vscale x 2 x i32> @vpmerge_viota2(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
271; CHECK-LABEL: vpmerge_viota2:
272; CHECK:       # %bb.0:
273; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
274; CHECK-NEXT:    viota.m v8, v0
275; CHECK-NEXT:    ret
276  %1 = zext i32 %vl to i64
277  %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
278  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
279  ret <vscale x 2 x i32> %b
280}
281
282; Test riscv.vfclass
283declare <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x float>, i64)
284define <vscale x 2 x i32> @vpmerge_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
285; CHECK-LABEL: vpmerge_vflcass:
286; CHECK:       # %bb.0:
287; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
288; CHECK-NEXT:    vfclass.v v8, v9, v0.t
289; CHECK-NEXT:    ret
290  %1 = zext i32 %vl to i64
291  %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x float> %vf, i64 %1)
292  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
293  ret <vscale x 2 x i32> %b
294}
295
296; Test riscv.vfsqrt
297declare <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64, i64)
298define <vscale x 2 x float> @vpmerge_vfsqrt(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
299; CHECK-LABEL: vpmerge_vfsqrt:
300; CHECK:       # %bb.0:
301; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
302; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
303; CHECK-NEXT:    ret
304  %1 = zext i32 %vl to i64
305  %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1)
306  %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
307  ret <vscale x 2 x float> %b
308}
309
310; Test reciprocal operation by riscv.vfrec7
311declare <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64, i64)
312define <vscale x 2 x float> @vpmerge_vfrec7(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
313; CHECK-LABEL: vpmerge_vfrec7:
314; CHECK:       # %bb.0:
315; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
316; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
317; CHECK-NEXT:    ret
318  %1 = zext i32 %vl to i64
319  %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1)
320  %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
321  ret <vscale x 2 x float> %b
322}
323
324; Test vector operations with VLMAX vector length.
325
326; Test binary operator with vp.merge and add.
327define <vscale x 2 x i32> @vpmerge_add(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
328; CHECK-LABEL: vpmerge_add:
329; CHECK:       # %bb.0:
330; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
331; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
332; CHECK-NEXT:    ret
333  %a = add <vscale x 2 x i32> %x, %y
334  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
335  ret <vscale x 2 x i32> %b
336}
337
338; Test binary operator with vp.merge and fadd.
339define <vscale x 2 x float> @vpmerge_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
340; CHECK-LABEL: vpmerge_fadd:
341; CHECK:       # %bb.0:
342; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
343; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
344; CHECK-NEXT:    ret
345  %a = fadd <vscale x 2 x float> %x, %y
346  %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
347  ret <vscale x 2 x float> %b
348}
349
350; This shouldn't be folded because we need to preserve exceptions with
351; "fpexcept.strict" exception behaviour, and masking may hide them.
352define <vscale x 2 x float> @vpmerge_constrained_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i64 %vl) strictfp {
353; CHECK-LABEL: vpmerge_constrained_fadd:
354; CHECK:       # %bb.0:
355; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
356; CHECK-NEXT:    vfadd.vv v9, v9, v10
357; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
358; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
359; CHECK-NEXT:    ret
360  %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
361  %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 %vl) strictfp
362  ret <vscale x 2 x float> %b
363}
364declare <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, metadata, metadata)
365declare <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i64)
366
367; This shouldn't be folded because we need to preserve exceptions with
368; "fpexcept.strict" exception behaviour, and masking may hide them.
369define <vscale x 2 x float> @vpmerge_constrained_fadd_vlmax(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m) strictfp {
370; CHECK-LABEL: vpmerge_constrained_fadd_vlmax:
371; CHECK:       # %bb.0:
372; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
373; CHECK-NEXT:    vfadd.vv v9, v9, v10
374; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
375; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
376; CHECK-NEXT:    ret
377  %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
378  %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 -1) strictfp
379  ret <vscale x 2 x float> %b
380}
381
382; Test conversion by fptosi.
383define <vscale x 2 x i16> @vpmerge_fptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
384; CHECK-LABEL: vpmerge_fptosi:
385; CHECK:       # %bb.0:
386; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
387; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v9, v0.t
388; CHECK-NEXT:    ret
389  %a = fptosi <vscale x 2 x float> %x to <vscale x 2 x i16>
390  %b = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl)
391  ret <vscale x 2 x i16> %b
392}
393
394; Test conversion by sitofp.
395define <vscale x 2 x float> @vpmerge_sitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
396; CHECK-LABEL: vpmerge_sitofp:
397; CHECK:       # %bb.0:
398; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
399; CHECK-NEXT:    vfncvt.f.x.w v8, v10, v0.t
400; CHECK-NEXT:    ret
401  %a = sitofp <vscale x 2 x i64> %x to <vscale x 2 x float>
402  %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
403  ret <vscale x 2 x float> %b
404}
405
406; Test float extension by fpext.
407define <vscale x 2 x double> @vpmerge_fpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
408; CHECK-LABEL: vpmerge_fpext:
409; CHECK:       # %bb.0:
410; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
411; CHECK-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
412; CHECK-NEXT:    ret
413  %a = fpext <vscale x 2 x float> %x to <vscale x 2 x double>
414  %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
415  ret <vscale x 2 x double> %b
416}
417
418; Test float truncation by fptrunc.
419define <vscale x 2 x float> @vpmerge_fptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
420; CHECK-LABEL: vpmerge_fptrunc:
421; CHECK:       # %bb.0:
422; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
423; CHECK-NEXT:    vfncvt.f.f.w v8, v10, v0.t
424; CHECK-NEXT:    ret
425  %a = fptrunc <vscale x 2 x double> %x to <vscale x 2 x float>
426  %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
427  ret <vscale x 2 x float> %b
428}
429
430; Test integer extension by zext.
431define <vscale x 2 x i32> @vpmerge_zext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
432; CHECK-LABEL: vpmerge_zext:
433; CHECK:       # %bb.0:
434; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
435; CHECK-NEXT:    vzext.vf4 v8, v9, v0.t
436; CHECK-NEXT:    ret
437  %a = zext <vscale x 2 x i8> %x to <vscale x 2 x i32>
438  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
439  ret <vscale x 2 x i32> %b
440}
441
442; Test integer truncation by trunc.
443define <vscale x 2 x i32> @vpmerge_trunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
444; CHECK-LABEL: vpmerge_trunc:
445; CHECK:       # %bb.0:
446; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
447; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
448; CHECK-NEXT:    ret
449  %a = trunc <vscale x 2 x i64> %x to <vscale x 2 x i32>
450  %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
451  ret <vscale x 2 x i32> %b
452}
453
454declare <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
455declare <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
456declare <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
457declare <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
458
459; Test binary operator with vp.select and vp.smax.
460define <vscale x 2 x i32> @vpselect_vpadd(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
461; CHECK-LABEL: vpselect_vpadd:
462; CHECK:       # %bb.0:
463; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
464; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
465; CHECK-NEXT:    ret
466  %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
467  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
468  ret <vscale x 2 x i32> %b
469}
470
471; Test glued node of select should not be deleted.
472define <vscale x 2 x i32> @vpselect_vpadd2(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
473; CHECK-LABEL: vpselect_vpadd2:
474; CHECK:       # %bb.0:
475; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
476; CHECK-NEXT:    vmseq.vv v0, v9, v10
477; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
478; CHECK-NEXT:    ret
479  %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
480  %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl)
481  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
482  ret <vscale x 2 x i32> %b
483}
484
485; Test vp.select has all-ones mask.
486define <vscale x 2 x i32> @vpselect_vpadd3(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
487; CHECK-LABEL: vpselect_vpadd3:
488; CHECK:       # %bb.0:
489; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
490; CHECK-NEXT:    vadd.vv v8, v9, v10
491; CHECK-NEXT:    ret
492  %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
493  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
494  ret <vscale x 2 x i32> %b
495}
496
497; Test float binary operator with vp.select and vp.fadd.
498define <vscale x 2 x float> @vpselect_vpfadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
499; CHECK-LABEL: vpselect_vpfadd:
500; CHECK:       # %bb.0:
501; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
502; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
503; CHECK-NEXT:    ret
504  %a = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
505  %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
506  ret <vscale x 2 x float> %b
507}
508
509; Test for binary operator with specific EEW by riscv.vrgatherei16.
510define <vscale x 2 x i32> @vpselect_vrgatherei16(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
511; CHECK-LABEL: vpselect_vrgatherei16:
512; CHECK:       # %bb.0:
513; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
514; CHECK-NEXT:    vrgatherei16.vv v8, v9, v10
515; CHECK-NEXT:    ret
516  %1 = zext i32 %vl to i64
517  %2 = tail call <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, i64 %1)
518  %3 = tail call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %2, <vscale x 2 x i32> %passthru, i32 %vl)
519  ret <vscale x 2 x i32> %2
520}
521
522; Test conversion by fptosi.
523define <vscale x 2 x i16> @vpselect_vpfptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
524; CHECK-LABEL: vpselect_vpfptosi:
525; CHECK:       # %bb.0:
526; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
527; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v9, v0.t
528; CHECK-NEXT:    ret
529  %a = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
530  %b = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl)
531  ret <vscale x 2 x i16> %b
532}
533
534; Test conversion by sitofp.
535define <vscale x 2 x float> @vpselect_vpsitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
536; CHECK-LABEL: vpselect_vpsitofp:
537; CHECK:       # %bb.0:
538; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
539; CHECK-NEXT:    vfncvt.f.x.w v8, v10, v0.t
540; CHECK-NEXT:    ret
541  %a = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
542  %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
543  ret <vscale x 2 x float> %b
544}
545
546; Test integer extension by vp.zext.
547define <vscale x 2 x i32> @vpselect_vpzext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
548; CHECK-LABEL: vpselect_vpzext:
549; CHECK:       # %bb.0:
550; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
551; CHECK-NEXT:    vzext.vf4 v8, v9, v0.t
552; CHECK-NEXT:    ret
553  %a = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
554  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
555  ret <vscale x 2 x i32> %b
556}
557
558; Test integer truncation by vp.trunc.
559define <vscale x 2 x i32> @vpselect_vptrunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
560; CHECK-LABEL: vpselect_vptrunc:
561; CHECK:       # %bb.0:
562; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
563; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
564; CHECK-NEXT:    ret
565  %a = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
566  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
567  ret <vscale x 2 x i32> %b
568}
569
570; Test integer extension by vp.fpext.
571define <vscale x 2 x double> @vpselect_vpfpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
572; CHECK-LABEL: vpselect_vpfpext:
573; CHECK:       # %bb.0:
574; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
575; CHECK-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
576; CHECK-NEXT:    ret
577  %a = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
578  %b = call <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
579  ret <vscale x 2 x double> %b
580}
581
582; Test integer truncation by vp.trunc.
583define <vscale x 2 x float> @vpselect_vpfptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
584; CHECK-LABEL: vpselect_vpfptrunc:
585; CHECK:       # %bb.0:
586; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
587; CHECK-NEXT:    vfncvt.f.f.w v8, v10, v0.t
588; CHECK-NEXT:    ret
589  %a = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
590  %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
591  ret <vscale x 2 x float> %b
592}
593
594; Test load operation by vp.load.
595define <vscale x 2 x i32> @vpselect_vpload(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
596; CHECK-LABEL: vpselect_vpload:
597; CHECK:       # %bb.0:
598; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
599; CHECK-NEXT:    vle32.v v8, (a0), v0.t
600; CHECK-NEXT:    ret
601  %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
602  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
603  ret <vscale x 2 x i32> %b
604}
605
606; Test result has chain and glued node.
607define <vscale x 2 x i32> @vpselect_vpload2(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) {
608; CHECK-LABEL: vpselect_vpload2:
609; CHECK:       # %bb.0:
610; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
611; CHECK-NEXT:    vmseq.vv v0, v9, v10
612; CHECK-NEXT:    vle32.v v8, (a0), v0.t
613; CHECK-NEXT:    ret
614  %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
615  %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl)
616  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
617  ret <vscale x 2 x i32> %b
618}
619
620; Test result has chain output of true operand of select.vvm.
621define void @vpselect_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
622; CHECK-LABEL: vpselect_vpload_store:
623; CHECK:       # %bb.0:
624; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
625; CHECK-NEXT:    vle32.v v8, (a0), v0.t
626; CHECK-NEXT:    vs1r.v v8, (a0)
627; CHECK-NEXT:    ret
628  %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
629  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
630  store <vscale x 2 x i32> %b, ptr %p
631  ret void
632}
633
634define <vscale x 2 x i32> @vpselect_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
635; CHECK-LABEL: vpselect_vleff:
636; CHECK:       # %bb.0:
637; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
638; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
639; CHECK-NEXT:    ret
640  %1 = zext i32 %vl to i64
641  %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1)
642  %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0
643  %c = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %b, <vscale x 2 x i32> %passthru, i32 %vl)
644  ret <vscale x 2 x i32> %c
645}
646
647; Test strided load by riscv.vlse
648define <vscale x 2 x i32> @vpselect_vlse(<vscale x 2 x i32> %passthru,  ptr %p, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) {
649; CHECK-LABEL: vpselect_vlse:
650; CHECK:       # %bb.0:
651; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
652; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
653; CHECK-NEXT:    ret
654  %1 = zext i32 %vl to i64
655  %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %s, i64 %1)
656  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
657  ret <vscale x 2 x i32> %b
658}
659
660; Test indexed load by riscv.vluxei
661define <vscale x 2 x i32> @vpselect_vluxei(<vscale x 2 x i32> %passthru,  ptr %p, <vscale x 2 x i64> %idx, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) {
662; CHECK-LABEL: vpselect_vluxei:
663; CHECK:       # %bb.0:
664; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
665; CHECK-NEXT:    vluxei64.v v8, (a0), v10, v0.t
666; CHECK-NEXT:    ret
667  %1 = zext i32 %vl to i64
668  %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, ptr %p, <vscale x 2 x i64> %idx, i64 %1)
669  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
670  ret <vscale x 2 x i32> %b
671}
672
673; Test vector index by riscv.vid
674define <vscale x 2 x i32> @vpselect_vid(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, i32 zeroext %vl) {
675; CHECK-LABEL: vpselect_vid:
676; CHECK:       # %bb.0:
677; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
678; CHECK-NEXT:    vid.v v8, v0.t
679; CHECK-NEXT:    ret
680  %1 = zext i32 %vl to i64
681  %a = call <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32> undef, i64 %1)
682  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
683  ret <vscale x 2 x i32> %b
684}
685
686; Test riscv.viota
687define <vscale x 2 x i32> @vpselect_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
688; CHECK-LABEL: vpselect_viota:
689; CHECK:       # %bb.0:
690; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
691; CHECK-NEXT:    viota.m v10, v9
692; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
693; CHECK-NEXT:    ret
694  %1 = zext i32 %vl to i64
695  %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
696  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
697  ret <vscale x 2 x i32> %b
698}
699
700; Test riscv.vfclass
701define <vscale x 2 x i32> @vpselect_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
702; CHECK-LABEL: vpselect_vflcass:
703; CHECK:       # %bb.0:
704; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
705; CHECK-NEXT:    vfclass.v v8, v9, v0.t
706; CHECK-NEXT:    ret
707  %1 = zext i32 %vl to i64
708  %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x float> %vf, i64 %1)
709  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
710  ret <vscale x 2 x i32> %b
711}
712
713; Test riscv.vfsqrt
714define <vscale x 2 x float> @vpselect_vfsqrt(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
715; CHECK-LABEL: vpselect_vfsqrt:
716; CHECK:       # %bb.0:
717; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
718; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
719; CHECK-NEXT:    ret
720  %1 = zext i32 %vl to i64
721  %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1)
722  %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
723  ret <vscale x 2 x float> %b
724}
725
726; Test reciprocal operation by riscv.vfrec7
727define <vscale x 2 x float> @vpselect_vfrec7(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
728; CHECK-LABEL: vpselect_vfrec7:
729; CHECK:       # %bb.0:
730; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
731; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
732; CHECK-NEXT:    ret
733  %1 = zext i32 %vl to i64
734  %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1)
735  %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
736  ret <vscale x 2 x float> %b
737}
738
739; Test slides
740declare <vscale x 2 x i32> @llvm.riscv.vslideup.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64, i64, i64)
741define <vscale x 2 x i32> @vpselect_vslideup(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i64 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
742; CHECK-LABEL: vpselect_vslideup:
743; CHECK:       # %bb.0:
744; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
745; CHECK-NEXT:    vslideup.vx v8, v9, a0, v0.t
746; CHECK-NEXT:    ret
747  %1 = zext i32 %vl to i64
748  %a = call <vscale x 2 x i32> @llvm.riscv.vslideup.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i64 %x, i64 %1, i64 0)
749  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
750  ret <vscale x 2 x i32> %b
751}
752
753declare <vscale x 2 x i32> @llvm.riscv.vslidedown.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64, i64, i64)
754define <vscale x 2 x i32> @vpselect_vslidedown(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i64 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
755; CHECK-LABEL: vpselect_vslidedown:
756; CHECK:       # %bb.0:
757; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
758; CHECK-NEXT:    vslidedown.vx v8, v9, a0, v0.t
759; CHECK-NEXT:    ret
760  %1 = zext i32 %vl to i64
761  %a = call <vscale x 2 x i32> @llvm.riscv.vslidedown.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i64 %x, i64 %1, i64 0)
762  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
763  ret <vscale x 2 x i32> %b
764}
765
766declare <vscale x 2 x i32> @llvm.riscv.vslide1up.nxv2i32.i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i32, i64)
767define <vscale x 2 x i32> @vpselect_vslide1up(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i32 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
768; CHECK-LABEL: vpselect_vslide1up:
769; CHECK:       # %bb.0:
770; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
771; CHECK-NEXT:    vslide1up.vx v8, v9, a0, v0.t
772; CHECK-NEXT:    ret
773  %1 = zext i32 %vl to i64
774  %a = call <vscale x 2 x i32> @llvm.riscv.vslide1up.nxv2i32.i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i32 %x, i64 %1)
775  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
776  ret <vscale x 2 x i32> %b
777}
778
779declare <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i32, i64)
780define <vscale x 2 x i32> @vpselect_vslide1down(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i32 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
781; CHECK-LABEL: vpselect_vslide1down:
782; CHECK:       # %bb.0:
783; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
784; CHECK-NEXT:    vslide1down.vx v8, v9, a0, v0.t
785; CHECK-NEXT:    ret
786  %1 = zext i32 %vl to i64
787  %a = call <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i32 %x, i64 %1)
788  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
789  ret <vscale x 2 x i32> %b
790}
791
792; Test vector operations with VLMAX vector length.
793
794; Test binary operator with vp.select and add.
795define <vscale x 2 x i32> @vpselect_add(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
796; CHECK-LABEL: vpselect_add:
797; CHECK:       # %bb.0:
798; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
799; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
800; CHECK-NEXT:    ret
801  %a = add <vscale x 2 x i32> %x, %y
802  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
803  ret <vscale x 2 x i32> %b
804}
805
806; Test binary operator with vp.select and fadd.
807define <vscale x 2 x float> @vpselect_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) {
808; CHECK-LABEL: vpselect_fadd:
809; CHECK:       # %bb.0:
810; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
811; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
812; CHECK-NEXT:    ret
813  %a = fadd <vscale x 2 x float> %x, %y
814  %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
815  ret <vscale x 2 x float> %b
816}
817
818; Test conversion by fptosi.
819define <vscale x 2 x i16> @vpselect_fptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
820; CHECK-LABEL: vpselect_fptosi:
821; CHECK:       # %bb.0:
822; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
823; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v9, v0.t
824; CHECK-NEXT:    ret
825  %a = fptosi <vscale x 2 x float> %x to <vscale x 2 x i16>
826  %b = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl)
827  ret <vscale x 2 x i16> %b
828}
829
830; Test conversion by sitofp.
831define <vscale x 2 x float> @vpselect_sitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
832; CHECK-LABEL: vpselect_sitofp:
833; CHECK:       # %bb.0:
834; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
835; CHECK-NEXT:    vfncvt.f.x.w v8, v10, v0.t
836; CHECK-NEXT:    ret
837  %a = sitofp <vscale x 2 x i64> %x to <vscale x 2 x float>
838  %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
839  ret <vscale x 2 x float> %b
840}
841
842; Test float extension by fpext.
843define <vscale x 2 x double> @vpselect_fpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
844; CHECK-LABEL: vpselect_fpext:
845; CHECK:       # %bb.0:
846; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
847; CHECK-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
848; CHECK-NEXT:    ret
849  %a = fpext <vscale x 2 x float> %x to <vscale x 2 x double>
850  %b = call <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
851  ret <vscale x 2 x double> %b
852}
853
854; Test float truncation by fptrunc.
855define <vscale x 2 x float> @vpselect_fptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
856; CHECK-LABEL: vpselect_fptrunc:
857; CHECK:       # %bb.0:
858; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
859; CHECK-NEXT:    vfncvt.f.f.w v8, v10, v0.t
860; CHECK-NEXT:    ret
861  %a = fptrunc <vscale x 2 x double> %x to <vscale x 2 x float>
862  %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl)
863  ret <vscale x 2 x float> %b
864}
865
866; Test integer extension by zext.
867define <vscale x 2 x i32> @vpselect_zext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
868; CHECK-LABEL: vpselect_zext:
869; CHECK:       # %bb.0:
870; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
871; CHECK-NEXT:    vzext.vf4 v8, v9, v0.t
872; CHECK-NEXT:    ret
873  %a = zext <vscale x 2 x i8> %x to <vscale x 2 x i32>
874  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
875  ret <vscale x 2 x i32> %b
876}
877
878; Test integer truncation by trunc.
879define <vscale x 2 x i32> @vpselect_trunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
880; CHECK-LABEL: vpselect_trunc:
881; CHECK:       # %bb.0:
882; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
883; CHECK-NEXT:    vnsrl.wi v8, v10, 0, v0.t
884; CHECK-NEXT:    ret
885  %a = trunc <vscale x 2 x i64> %x to <vscale x 2 x i32>
886  %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
887  ret <vscale x 2 x i32> %b
888}
889
890; Folding this would create a loop in the DAG becuase the chain from the VLE is
891; used by the vssubu.
892define void @test_dag_loop() {
893; CHECK-LABEL: test_dag_loop:
894; CHECK:       # %bb.0: # %entry
895; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
896; CHECK-NEXT:    vmclr.m v0
897; CHECK-NEXT:    vmv.v.i v8, 0
898; CHECK-NEXT:    vmv.v.i v12, 0
899; CHECK-NEXT:    vsetivli zero, 0, e8, m4, tu, mu
900; CHECK-NEXT:    vssubu.vx v12, v8, zero, v0.t
901; CHECK-NEXT:    vsetvli zero, zero, e8, m4, ta, ma
902; CHECK-NEXT:    vmseq.vv v0, v12, v8
903; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
904; CHECK-NEXT:    vmv.v.i v8, 0
905; CHECK-NEXT:    vsetivli zero, 0, e16, m8, tu, mu
906; CHECK-NEXT:    vle16.v v8, (zero), v0.t
907; CHECK-NEXT:    vse16.v v8, (zero)
908; CHECK-NEXT:    ret
909entry:
910  %0 = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16> undef, ptr null, i64 1)
911  %1 = tail call <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i8 0, <vscale x 32 x i1> zeroinitializer, i64 0, i64 0)
912  %2 = tail call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> %1, <vscale x 32 x i8> zeroinitializer, i64 0)
913  %3 = tail call <vscale x 32 x i16> @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> %0, <vscale x 32 x i1> %2, i64 1)
914  call void @llvm.riscv.vse.nxv32i16.i64(<vscale x 32 x i16> %3, ptr null, i64 0)
915  ret void
916}
917
918define <vscale x 1 x i16> @test_vaaddu(<vscale x 1 x i16> %var_11, i16 zeroext %var_9, <vscale x 1 x i1> %var_5, <vscale x 1 x i16> %var_0) {
919; CHECK-LABEL: test_vaaddu:
920; CHECK:       # %bb.0: # %entry
921; CHECK-NEXT:    csrwi vxrm, 0
922; CHECK-NEXT:    vsetivli zero, 3, e16, mf4, ta, mu
923; CHECK-NEXT:    vaaddu.vx v9, v8, a0, v0.t
924; CHECK-NEXT:    vmv1r.v v8, v9
925; CHECK-NEXT:    ret
926entry:
927  %0 = tail call <vscale x 1 x i16> @llvm.riscv.vaaddu.nxv1i16.i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> %var_11, i16 %var_9, i64 0, i64 3)
928  %1 = tail call <vscale x 1 x i16> @llvm.riscv.vmerge.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> %var_0, <vscale x 1 x i16> %0, <vscale x 1 x i1> %var_5, i64 3)
929  ret <vscale x 1 x i16> %1
930}
931
932; Test reductions don't have a vmerge folded into them, since the mask affects
933; the result.
934
935declare <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
936    <vscale x 2 x i32>,
937    <vscale x 2 x i32>,
938    <vscale x 2 x i32>,
939    i64)
940
941define <vscale x 2 x i32> @vredsum(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %vl) {
942; CHECK-LABEL: vredsum:
943; CHECK:       # %bb.0:
944; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
945; CHECK-NEXT:    vmv1r.v v11, v8
946; CHECK-NEXT:    vredsum.vs v11, v9, v10
947; CHECK-NEXT:    vmerge.vvm v8, v8, v11, v0
948; CHECK-NEXT:    ret
949  %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
950    <vscale x 2 x i32> %passthru,
951    <vscale x 2 x i32> %x,
952    <vscale x 2 x i32> %y,
953    i64 %vl)
954  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 %vl)
955  ret <vscale x 2 x i32> %b
956}
957
958declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
959    <vscale x 2 x float>,
960    <vscale x 2 x float>,
961    <vscale x 2 x float>,
962    i64, i64)
963
964define <vscale x 2 x float> @vfredusum(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i64 %vl) {
965; CHECK-LABEL: vfredusum:
966; CHECK:       # %bb.0:
967; CHECK-NEXT:    fsrmi a1, 0
968; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
969; CHECK-NEXT:    vmv1r.v v11, v8
970; CHECK-NEXT:    vfredusum.vs v11, v9, v10
971; CHECK-NEXT:    vmerge.vvm v8, v8, v11, v0
972; CHECK-NEXT:    fsrm a1
973; CHECK-NEXT:    ret
974  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
975    <vscale x 2 x float> %passthru,
976    <vscale x 2 x float> %x,
977    <vscale x 2 x float> %y,
978    i64 0, i64 %vl)
979  %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 %vl)
980  ret <vscale x 2 x float> %b
981}
982
983; However we can fold it in if the mask is all ones.
984define <vscale x 2 x i32> @vredsum_allones_mask(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %vl) {
985; CHECK-LABEL: vredsum_allones_mask:
986; CHECK:       # %bb.0:
987; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
988; CHECK-NEXT:    vredsum.vs v8, v9, v10
989; CHECK-NEXT:    ret
990  %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
991    <vscale x 2 x i32> %passthru,
992    <vscale x 2 x i32> %x,
993    <vscale x 2 x i32> %y,
994    i64 %vl)
995  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 -1), i64 %vl)
996  ret <vscale x 2 x i32> %b
997}
998
999define <vscale x 2 x float> @vfredusum_allones_mask(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, i64 %vl) {
1000; CHECK-LABEL: vfredusum_allones_mask:
1001; CHECK:       # %bb.0:
1002; CHECK-NEXT:    fsrmi a1, 0
1003; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
1004; CHECK-NEXT:    vfredusum.vs v8, v9, v10
1005; CHECK-NEXT:    fsrm a1
1006; CHECK-NEXT:    ret
1007  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
1008    <vscale x 2 x float> %passthru,
1009    <vscale x 2 x float> %x,
1010    <vscale x 2 x float> %y,
1011    i64 0, i64 %vl)
1012  %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 -1), i64 %vl)
1013  ret <vscale x 2 x float> %b
1014}
1015
1016define <vscale x 2 x i32> @unfoldable_vredsum_allones_mask_diff_vl(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
1017; CHECK-LABEL: unfoldable_vredsum_allones_mask_diff_vl:
1018; CHECK:       # %bb.0:
1019; CHECK-NEXT:    vsetvli a0, zero, e32, m1, tu, ma
1020; CHECK-NEXT:    vmv1r.v v11, v8
1021; CHECK-NEXT:    vredsum.vs v11, v9, v10
1022; CHECK-NEXT:    vsetivli zero, 1, e32, m1, tu, ma
1023; CHECK-NEXT:    vmv.v.v v8, v11
1024; CHECK-NEXT:    ret
1025  %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
1026    <vscale x 2 x i32> %passthru,
1027    <vscale x 2 x i32> %x,
1028    <vscale x 2 x i32> %y,
1029    i64 -1)
1030  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 -1), i64 1)
1031  ret <vscale x 2 x i32> %b
1032}
1033
1034declare <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16>, ptr nocapture, i64)
1035declare <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i8, <vscale x 32 x i1>, i64, i64 immarg)
1036declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i64)
1037declare <vscale x 32 x i16> @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i64)
1038declare void @llvm.riscv.vse.nxv32i16.i64(<vscale x 32 x i16>, ptr nocapture, i64)
1039declare <vscale x 1 x i16> @llvm.riscv.vaaddu.nxv1i16.i16.i64(<vscale x 1 x i16>, <vscale x 1 x i16>, i16, i64 immarg, i64)
1040declare <vscale x 1 x i16> @llvm.riscv.vmerge.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i64)
1041
1042; Tests for folding vmerge into its ops when their VLs differ
1043
1044declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, i64)
1045declare <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i64)
1046
1047; Can fold with VL=2
1048define <vscale x 2 x i32> @vmerge_smaller_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1049; CHECK-LABEL: vmerge_smaller_vl_same_passthru:
1050; CHECK:       # %bb.0:
1051; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, mu
1052; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
1053; CHECK-NEXT:    ret
1054  %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 4)
1055  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2)
1056  ret <vscale x 2 x i32> %b
1057}
1058
1059; Can fold with VL=2
1060define <vscale x 2 x i32> @vmerge_larger_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1061; CHECK-LABEL: vmerge_larger_vl_same_passthru:
1062; CHECK:       # %bb.0:
1063; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, mu
1064; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
1065; CHECK-NEXT:    ret
1066  %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2)
1067  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
1068  ret <vscale x 2 x i32> %b
1069}
1070
1071; Can fold with VL=2
1072define <vscale x 2 x i32> @vmerge_smaller_vl_different_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1073; CHECK-LABEL: vmerge_smaller_vl_different_passthru:
1074; CHECK:       # %bb.0:
1075; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
1076; CHECK-NEXT:    vadd.vv v8, v10, v11
1077; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
1078; CHECK-NEXT:    vmv1r.v v8, v9
1079; CHECK-NEXT:    ret
1080  %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 3)
1081  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2)
1082  ret <vscale x 2 x i32> %b
1083}
1084
1085; Can't fold this because we need to take elements from both %pt1 and %pt2
1086define <vscale x 2 x i32> @vmerge_larger_vl_different_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1087; CHECK-LABEL: vmerge_larger_vl_different_passthru:
1088; CHECK:       # %bb.0:
1089; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
1090; CHECK-NEXT:    vadd.vv v8, v10, v11
1091; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
1092; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
1093; CHECK-NEXT:    vmv1r.v v8, v9
1094; CHECK-NEXT:    ret
1095  %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2)
1096  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
1097  ret <vscale x 2 x i32> %b
1098}
1099
1100; Can fold with VL=2
1101define <vscale x 2 x i32> @vmerge_smaller_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1102; CHECK-LABEL: vmerge_smaller_vl_poison_passthru:
1103; CHECK:       # %bb.0:
1104; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, mu
1105; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
1106; CHECK-NEXT:    ret
1107  %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 3)
1108  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2)
1109  ret <vscale x 2 x i32> %b
1110}
1111
1112; Can fold with VL=2
1113define <vscale x 2 x i32> @vmerge_larger_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1114; CHECK-LABEL: vmerge_larger_vl_poison_passthru:
1115; CHECK:       # %bb.0:
1116; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, mu
1117; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
1118; CHECK-NEXT:    ret
1119  %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2)
1120  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
1121  ret <vscale x 2 x i32> %b
1122}
1123
1124; The vadd's new policy should be tail undisturbed since the false op of the
1125; vmerge moves from the the body to the tail, and we need to preserve it.
1126define <vscale x 2 x i32> @vmerge_larger_vl_false_becomes_tail(<vscale x 2 x i32> %false, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
1127; CHECK-LABEL: vmerge_larger_vl_false_becomes_tail:
1128; CHECK:       # %bb.0:
1129; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, mu
1130; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
1131; CHECK-NEXT:    ret
1132  %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2)
1133  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %false, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3)
1134  ret <vscale x 2 x i32> %b
1135}
1136
1137; Test widening pseudos with their TIED variant (passthru same as first op).
1138define <vscale x 2 x i64> @vpmerge_vwsub.w_tied(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %mask, i32 zeroext %vl) {
1139; CHECK-LABEL: vpmerge_vwsub.w_tied:
1140; CHECK:       # %bb.0:
1141; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
1142; CHECK-NEXT:    vwsub.wv v8, v8, v12, v0.t
1143; CHECK-NEXT:    ret
1144  %vl.zext = zext i32 %vl to i64
1145  %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %passthru, <vscale x 2 x i32> %y, i64 %vl.zext)
1146  %b = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %mask, <vscale x 2 x i64> %a, <vscale x 2 x i64> %passthru, i32 %vl)
1147  ret <vscale x 2 x i64> %b
1148}
1149
1150define <vscale x 2 x double> @vpmerge_vfwsub.w_tied(<vscale x 2 x double> %passthru, <vscale x 2 x double> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %mask, i32 zeroext %vl) {
1151; CHECK-LABEL: vpmerge_vfwsub.w_tied:
1152; CHECK:       # %bb.0:
1153; CHECK-NEXT:    fsrmi a1, 1
1154; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
1155; CHECK-NEXT:    vfwsub.wv v8, v8, v12, v0.t
1156; CHECK-NEXT:    fsrm a1
1157; CHECK-NEXT:    ret
1158  %vl.zext = zext i32 %vl to i64
1159  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(<vscale x 2 x double> %passthru, <vscale x 2 x double> %passthru, <vscale x 2 x float> %y, i64 1, i64 %vl.zext)
1160  %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl)
1161  ret <vscale x 2 x double> %b
1162}
1163
1164define <vscale x 2 x i32> @true_tied_dest_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) {
1165; CHECK-LABEL: true_tied_dest_vmerge_implicit_passthru:
1166; CHECK:       # %bb.0:
1167; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
1168; CHECK-NEXT:    vmacc.vv v8, v9, v10, v0.t
1169; CHECK-NEXT:    ret
1170  %a = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %avl, i64 0)
1171  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
1172    <vscale x 2 x i32> poison,
1173    <vscale x 2 x i32> %passthru,
1174    <vscale x 2 x i32> %a,
1175    <vscale x 2 x i1> %m,
1176    i64 %avl
1177  )
1178  ret <vscale x 2 x i32> %b
1179}
1180
1181define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) {
1182; CHECK-LABEL: true_mask_vmerge_implicit_passthru:
1183; CHECK:       # %bb.0:
1184; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
1185; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
1186; CHECK-NEXT:    ret
1187  %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl, i64 0)
1188  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
1189    <vscale x 2 x i32> poison,
1190    <vscale x 2 x i32> %passthru,
1191    <vscale x 2 x i32> %a,
1192    <vscale x 2 x i1> shufflevector(<vscale x 2 x i1> insertelement(<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer),
1193    i64 %avl
1194  )
1195  ret <vscale x 2 x i32> %b
1196}
1197
1198
1199define <vscale x 2 x i32> @unfoldable_mismatched_sew(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 2 x i1> %mask, i64 %avl) {
1200; CHECK-LABEL: unfoldable_mismatched_sew:
1201; CHECK:       # %bb.0:
1202; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1203; CHECK-NEXT:    vadd.vv v9, v9, v10
1204; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
1205; CHECK-NEXT:    vmv.v.v v8, v9
1206; CHECK-NEXT:    ret
1207  %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 %avl)
1208  %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
1209  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
1210    <vscale x 2 x i32> %passthru,
1211    <vscale x 2 x i32> %passthru,
1212    <vscale x 2 x i32> %a.bitcast,
1213    <vscale x 2 x i1> splat (i1 true),
1214    i64 %avl
1215  )
1216  ret <vscale x 2 x i32> %b
1217}
1218