1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s 3 4declare <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32) 5declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32) 6declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32) 7declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32) 8 9; Test binary operator with vp.merge and vp.smax. 10declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32) 11define <vscale x 2 x i32> @vpmerge_vpadd(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { 12; CHECK-LABEL: vpmerge_vpadd: 13; CHECK: # %bb.0: 14; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 15; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 16; CHECK-NEXT: ret 17 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 18 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 19 ret <vscale x 2 x i32> %b 20} 21 22; Test glued node of merge should not be deleted. 23declare <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, metadata, <vscale x 2 x i1>, i32) 24define <vscale x 2 x i32> @vpmerge_vpadd2(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { 25; CHECK-LABEL: vpmerge_vpadd2: 26; CHECK: # %bb.0: 27; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 28; CHECK-NEXT: vmseq.vv v0, v9, v10 29; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu 30; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 31; CHECK-NEXT: ret 32 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 33 %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl) 34 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 35 ret <vscale x 2 x i32> %b 36} 37 38; Test vp.merge has all-ones mask. 39define <vscale x 2 x i32> @vpmerge_vpadd3(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { 40; CHECK-LABEL: vpmerge_vpadd3: 41; CHECK: # %bb.0: 42; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 43; CHECK-NEXT: vadd.vv v8, v9, v10 44; CHECK-NEXT: ret 45 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 46 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 47 ret <vscale x 2 x i32> %b 48} 49 50; Test float binary operator with vp.merge and vp.fadd. 51declare <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32) 52define <vscale x 2 x float> @vpmerge_vpfadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { 53; CHECK-LABEL: vpmerge_vpfadd: 54; CHECK: # %bb.0: 55; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 56; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t 57; CHECK-NEXT: ret 58 %a = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 59 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 60 ret <vscale x 2 x float> %b 61} 62 63; Test for binary operator with specific EEW by riscv.vrgatherei16. 64declare <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i16>, i64) 65define <vscale x 2 x i32> @vpmerge_vrgatherei16(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { 66; CHECK-LABEL: vpmerge_vrgatherei16: 67; CHECK: # %bb.0: 68; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 69; CHECK-NEXT: vrgatherei16.vv v8, v9, v10 70; CHECK-NEXT: ret 71 %1 = zext i32 %vl to i64 72 %2 = tail call <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, i64 %1) 73 %3 = tail call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %2, <vscale x 2 x i32> %passthru, i32 %vl) 74 ret <vscale x 2 x i32> %2 75} 76 77; Test conversion by fptosi. 78declare <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) 79define <vscale x 2 x i16> @vpmerge_vpfptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 80; CHECK-LABEL: vpmerge_vpfptosi: 81; CHECK: # %bb.0: 82; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu 83; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t 84; CHECK-NEXT: ret 85 %a = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 86 %b = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl) 87 ret <vscale x 2 x i16> %b 88} 89 90; Test conversion by sitofp. 91declare <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32) 92define <vscale x 2 x float> @vpmerge_vpsitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 93; CHECK-LABEL: vpmerge_vpsitofp: 94; CHECK: # %bb.0: 95; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 96; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t 97; CHECK-NEXT: ret 98 %a = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 99 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 100 ret <vscale x 2 x float> %b 101} 102 103; Test integer extension by vp.zext. 104declare <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32) 105define <vscale x 2 x i32> @vpmerge_vpzext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 106; CHECK-LABEL: vpmerge_vpzext: 107; CHECK: # %bb.0: 108; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 109; CHECK-NEXT: vzext.vf4 v8, v9, v0.t 110; CHECK-NEXT: ret 111 %a = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 112 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 113 ret <vscale x 2 x i32> %b 114} 115 116; Test integer truncation by vp.trunc. 117declare <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32) 118define <vscale x 2 x i32> @vpmerge_vptrunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 119; CHECK-LABEL: vpmerge_vptrunc: 120; CHECK: # %bb.0: 121; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 122; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t 123; CHECK-NEXT: ret 124 %a = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 125 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 126 ret <vscale x 2 x i32> %b 127} 128 129; Test integer extension by vp.fpext. 130declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) 131define <vscale x 2 x double> @vpmerge_vpfpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 132; CHECK-LABEL: vpmerge_vpfpext: 133; CHECK: # %bb.0: 134; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 135; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t 136; CHECK-NEXT: ret 137 %a = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 138 %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) 139 ret <vscale x 2 x double> %b 140} 141 142; Test integer truncation by vp.trunc. 143declare <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) 144define <vscale x 2 x float> @vpmerge_vpfptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 145; CHECK-LABEL: vpmerge_vpfptrunc: 146; CHECK: # %bb.0: 147; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 148; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t 149; CHECK-NEXT: ret 150 %a = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 151 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 152 ret <vscale x 2 x float> %b 153} 154 155; Test load operation by vp.load. 156declare <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr, <vscale x 2 x i1>, i32) 157define <vscale x 2 x i32> @vpmerge_vpload(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { 158; CHECK-LABEL: vpmerge_vpload: 159; CHECK: # %bb.0: 160; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu 161; CHECK-NEXT: vle32.v v8, (a0), v0.t 162; CHECK-NEXT: ret 163 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 164 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 165 ret <vscale x 2 x i32> %b 166} 167 168; Test result has chain and glued node. 169define <vscale x 2 x i32> @vpmerge_vpload2(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { 170; CHECK-LABEL: vpmerge_vpload2: 171; CHECK: # %bb.0: 172; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 173; CHECK-NEXT: vmseq.vv v0, v9, v10 174; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu 175; CHECK-NEXT: vle32.v v8, (a0), v0.t 176; CHECK-NEXT: ret 177 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 178 %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl) 179 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 180 ret <vscale x 2 x i32> %b 181} 182 183; Test result has chain output of true operand of merge.vvm. 184define void @vpmerge_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { 185; CHECK-LABEL: vpmerge_vpload_store: 186; CHECK: # %bb.0: 187; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu 188; CHECK-NEXT: vle32.v v8, (a0), v0.t 189; CHECK-NEXT: vs1r.v v8, (a0) 190; CHECK-NEXT: ret 191 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 192 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 193 store <vscale x 2 x i32> %b, ptr %p 194 ret void 195} 196 197declare { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32>, ptr, i64) 198define <vscale x 2 x i32> @vpmerge_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { 199; CHECK-LABEL: vpmerge_vleff: 200; CHECK: # %bb.0: 201; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu 202; CHECK-NEXT: vle32ff.v v8, (a0), v0.t 203; CHECK-NEXT: ret 204 %1 = zext i32 %vl to i64 205 %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1) 206 %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0 207 %c = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %b, <vscale x 2 x i32> %passthru, i32 %vl) 208 ret <vscale x 2 x i32> %c 209} 210 211; Test strided load by riscv.vlse 212declare <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32>, ptr, i64, i64) 213define <vscale x 2 x i32> @vpmerge_vlse(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) { 214; CHECK-LABEL: vpmerge_vlse: 215; CHECK: # %bb.0: 216; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu 217; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t 218; CHECK-NEXT: ret 219 %1 = zext i32 %vl to i64 220 %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %s, i64 %1) 221 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 222 ret <vscale x 2 x i32> %b 223} 224 225; Test indexed load by riscv.vluxei 226declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32>, ptr, <vscale x 2 x i64>, i64) 227define <vscale x 2 x i32> @vpmerge_vluxei(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i64> %idx, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) { 228; CHECK-LABEL: vpmerge_vluxei: 229; CHECK: # %bb.0: 230; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu 231; CHECK-NEXT: vluxei64.v v8, (a0), v10, v0.t 232; CHECK-NEXT: ret 233 %1 = zext i32 %vl to i64 234 %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, ptr %p, <vscale x 2 x i64> %idx, i64 %1) 235 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 236 ret <vscale x 2 x i32> %b 237} 238 239; Test vector index by riscv.vid 240declare <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32>, i64) 241define <vscale x 2 x i32> @vpmerge_vid(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, i32 zeroext %vl) { 242; CHECK-LABEL: vpmerge_vid: 243; CHECK: # %bb.0: 244; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 245; CHECK-NEXT: vid.v v8, v0.t 246; CHECK-NEXT: ret 247 %1 = zext i32 %vl to i64 248 %a = call <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32> undef, i64 %1) 249 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 250 ret <vscale x 2 x i32> %b 251} 252 253; Test not combine VIOTA_M and VMERGE_VVM without true mask. 254declare <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i64) 255define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) { 256; CHECK-LABEL: vpmerge_viota: 257; CHECK: # %bb.0: 258; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 259; CHECK-NEXT: viota.m v10, v9 260; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma 261; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 262; CHECK-NEXT: ret 263 %1 = zext i32 %vl to i64 264 %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1) 265 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 266 ret <vscale x 2 x i32> %b 267} 268 269; Test combine VIOTA_M and VMERGE_VVM with true mask. 270define <vscale x 2 x i32> @vpmerge_viota2(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %vm, i32 zeroext %vl) { 271; CHECK-LABEL: vpmerge_viota2: 272; CHECK: # %bb.0: 273; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 274; CHECK-NEXT: viota.m v8, v0 275; CHECK-NEXT: ret 276 %1 = zext i32 %vl to i64 277 %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1) 278 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 279 ret <vscale x 2 x i32> %b 280} 281 282; Test riscv.vfclass 283declare <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x float>, i64) 284define <vscale x 2 x i32> @vpmerge_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { 285; CHECK-LABEL: vpmerge_vflcass: 286; CHECK: # %bb.0: 287; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 288; CHECK-NEXT: vfclass.v v8, v9, v0.t 289; CHECK-NEXT: ret 290 %1 = zext i32 %vl to i64 291 %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x float> %vf, i64 %1) 292 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 293 ret <vscale x 2 x i32> %b 294} 295 296; Test riscv.vfsqrt 297declare <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64, i64) 298define <vscale x 2 x float> @vpmerge_vfsqrt(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { 299; CHECK-LABEL: vpmerge_vfsqrt: 300; CHECK: # %bb.0: 301; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 302; CHECK-NEXT: vfsqrt.v v8, v9, v0.t 303; CHECK-NEXT: ret 304 %1 = zext i32 %vl to i64 305 %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1) 306 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 307 ret <vscale x 2 x float> %b 308} 309 310; Test reciprocal operation by riscv.vfrec7 311declare <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64, i64) 312define <vscale x 2 x float> @vpmerge_vfrec7(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { 313; CHECK-LABEL: vpmerge_vfrec7: 314; CHECK: # %bb.0: 315; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 316; CHECK-NEXT: vfrec7.v v8, v9, v0.t 317; CHECK-NEXT: ret 318 %1 = zext i32 %vl to i64 319 %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1) 320 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 321 ret <vscale x 2 x float> %b 322} 323 324; Test vector operations with VLMAX vector length. 325 326; Test binary operator with vp.merge and add. 327define <vscale x 2 x i32> @vpmerge_add(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { 328; CHECK-LABEL: vpmerge_add: 329; CHECK: # %bb.0: 330; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 331; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 332; CHECK-NEXT: ret 333 %a = add <vscale x 2 x i32> %x, %y 334 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 335 ret <vscale x 2 x i32> %b 336} 337 338; Test binary operator with vp.merge and fadd. 339define <vscale x 2 x float> @vpmerge_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { 340; CHECK-LABEL: vpmerge_fadd: 341; CHECK: # %bb.0: 342; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 343; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t 344; CHECK-NEXT: ret 345 %a = fadd <vscale x 2 x float> %x, %y 346 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 347 ret <vscale x 2 x float> %b 348} 349 350; This shouldn't be folded because we need to preserve exceptions with 351; "fpexcept.strict" exception behaviour, and masking may hide them. 352define <vscale x 2 x float> @vpmerge_constrained_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i64 %vl) strictfp { 353; CHECK-LABEL: vpmerge_constrained_fadd: 354; CHECK: # %bb.0: 355; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma 356; CHECK-NEXT: vfadd.vv v9, v9, v10 357; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 358; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 359; CHECK-NEXT: ret 360 %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp 361 %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 %vl) strictfp 362 ret <vscale x 2 x float> %b 363} 364declare <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, metadata, metadata) 365declare <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i64) 366 367; This shouldn't be folded because we need to preserve exceptions with 368; "fpexcept.strict" exception behaviour, and masking may hide them. 369define <vscale x 2 x float> @vpmerge_constrained_fadd_vlmax(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m) strictfp { 370; CHECK-LABEL: vpmerge_constrained_fadd_vlmax: 371; CHECK: # %bb.0: 372; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 373; CHECK-NEXT: vfadd.vv v9, v9, v10 374; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma 375; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 376; CHECK-NEXT: ret 377 %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp 378 %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 -1) strictfp 379 ret <vscale x 2 x float> %b 380} 381 382; Test conversion by fptosi. 383define <vscale x 2 x i16> @vpmerge_fptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 384; CHECK-LABEL: vpmerge_fptosi: 385; CHECK: # %bb.0: 386; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu 387; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t 388; CHECK-NEXT: ret 389 %a = fptosi <vscale x 2 x float> %x to <vscale x 2 x i16> 390 %b = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl) 391 ret <vscale x 2 x i16> %b 392} 393 394; Test conversion by sitofp. 395define <vscale x 2 x float> @vpmerge_sitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 396; CHECK-LABEL: vpmerge_sitofp: 397; CHECK: # %bb.0: 398; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 399; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t 400; CHECK-NEXT: ret 401 %a = sitofp <vscale x 2 x i64> %x to <vscale x 2 x float> 402 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 403 ret <vscale x 2 x float> %b 404} 405 406; Test float extension by fpext. 407define <vscale x 2 x double> @vpmerge_fpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 408; CHECK-LABEL: vpmerge_fpext: 409; CHECK: # %bb.0: 410; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 411; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t 412; CHECK-NEXT: ret 413 %a = fpext <vscale x 2 x float> %x to <vscale x 2 x double> 414 %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) 415 ret <vscale x 2 x double> %b 416} 417 418; Test float truncation by fptrunc. 419define <vscale x 2 x float> @vpmerge_fptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 420; CHECK-LABEL: vpmerge_fptrunc: 421; CHECK: # %bb.0: 422; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 423; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t 424; CHECK-NEXT: ret 425 %a = fptrunc <vscale x 2 x double> %x to <vscale x 2 x float> 426 %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 427 ret <vscale x 2 x float> %b 428} 429 430; Test integer extension by zext. 431define <vscale x 2 x i32> @vpmerge_zext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 432; CHECK-LABEL: vpmerge_zext: 433; CHECK: # %bb.0: 434; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 435; CHECK-NEXT: vzext.vf4 v8, v9, v0.t 436; CHECK-NEXT: ret 437 %a = zext <vscale x 2 x i8> %x to <vscale x 2 x i32> 438 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 439 ret <vscale x 2 x i32> %b 440} 441 442; Test integer truncation by trunc. 443define <vscale x 2 x i32> @vpmerge_trunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 444; CHECK-LABEL: vpmerge_trunc: 445; CHECK: # %bb.0: 446; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 447; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t 448; CHECK-NEXT: ret 449 %a = trunc <vscale x 2 x i64> %x to <vscale x 2 x i32> 450 %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 451 ret <vscale x 2 x i32> %b 452} 453 454declare <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32) 455declare <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32) 456declare <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32) 457declare <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32) 458 459; Test binary operator with vp.select and vp.smax. 460define <vscale x 2 x i32> @vpselect_vpadd(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { 461; CHECK-LABEL: vpselect_vpadd: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 464; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 465; CHECK-NEXT: ret 466 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 467 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 468 ret <vscale x 2 x i32> %b 469} 470 471; Test glued node of select should not be deleted. 472define <vscale x 2 x i32> @vpselect_vpadd2(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { 473; CHECK-LABEL: vpselect_vpadd2: 474; CHECK: # %bb.0: 475; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 476; CHECK-NEXT: vmseq.vv v0, v9, v10 477; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 478; CHECK-NEXT: ret 479 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 480 %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl) 481 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 482 ret <vscale x 2 x i32> %b 483} 484 485; Test vp.select has all-ones mask. 486define <vscale x 2 x i32> @vpselect_vpadd3(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { 487; CHECK-LABEL: vpselect_vpadd3: 488; CHECK: # %bb.0: 489; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 490; CHECK-NEXT: vadd.vv v8, v9, v10 491; CHECK-NEXT: ret 492 %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 493 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 494 ret <vscale x 2 x i32> %b 495} 496 497; Test float binary operator with vp.select and vp.fadd. 498define <vscale x 2 x float> @vpselect_vpfadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { 499; CHECK-LABEL: vpselect_vpfadd: 500; CHECK: # %bb.0: 501; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 502; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t 503; CHECK-NEXT: ret 504 %a = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 505 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 506 ret <vscale x 2 x float> %b 507} 508 509; Test for binary operator with specific EEW by riscv.vrgatherei16. 510define <vscale x 2 x i32> @vpselect_vrgatherei16(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { 511; CHECK-LABEL: vpselect_vrgatherei16: 512; CHECK: # %bb.0: 513; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 514; CHECK-NEXT: vrgatherei16.vv v8, v9, v10 515; CHECK-NEXT: ret 516 %1 = zext i32 %vl to i64 517 %2 = tail call <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, i64 %1) 518 %3 = tail call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %2, <vscale x 2 x i32> %passthru, i32 %vl) 519 ret <vscale x 2 x i32> %2 520} 521 522; Test conversion by fptosi. 523define <vscale x 2 x i16> @vpselect_vpfptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 524; CHECK-LABEL: vpselect_vpfptosi: 525; CHECK: # %bb.0: 526; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 527; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t 528; CHECK-NEXT: ret 529 %a = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 530 %b = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl) 531 ret <vscale x 2 x i16> %b 532} 533 534; Test conversion by sitofp. 535define <vscale x 2 x float> @vpselect_vpsitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 536; CHECK-LABEL: vpselect_vpsitofp: 537; CHECK: # %bb.0: 538; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 539; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t 540; CHECK-NEXT: ret 541 %a = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 542 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 543 ret <vscale x 2 x float> %b 544} 545 546; Test integer extension by vp.zext. 547define <vscale x 2 x i32> @vpselect_vpzext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 548; CHECK-LABEL: vpselect_vpzext: 549; CHECK: # %bb.0: 550; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 551; CHECK-NEXT: vzext.vf4 v8, v9, v0.t 552; CHECK-NEXT: ret 553 %a = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 554 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 555 ret <vscale x 2 x i32> %b 556} 557 558; Test integer truncation by vp.trunc. 559define <vscale x 2 x i32> @vpselect_vptrunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 560; CHECK-LABEL: vpselect_vptrunc: 561; CHECK: # %bb.0: 562; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 563; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t 564; CHECK-NEXT: ret 565 %a = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 566 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 567 ret <vscale x 2 x i32> %b 568} 569 570; Test integer extension by vp.fpext. 571define <vscale x 2 x double> @vpselect_vpfpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 572; CHECK-LABEL: vpselect_vpfpext: 573; CHECK: # %bb.0: 574; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 575; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t 576; CHECK-NEXT: ret 577 %a = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 578 %b = call <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) 579 ret <vscale x 2 x double> %b 580} 581 582; Test integer truncation by vp.trunc. 583define <vscale x 2 x float> @vpselect_vpfptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 584; CHECK-LABEL: vpselect_vpfptrunc: 585; CHECK: # %bb.0: 586; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 587; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t 588; CHECK-NEXT: ret 589 %a = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 590 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 591 ret <vscale x 2 x float> %b 592} 593 594; Test load operation by vp.load. 595define <vscale x 2 x i32> @vpselect_vpload(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { 596; CHECK-LABEL: vpselect_vpload: 597; CHECK: # %bb.0: 598; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu 599; CHECK-NEXT: vle32.v v8, (a0), v0.t 600; CHECK-NEXT: ret 601 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 602 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 603 ret <vscale x 2 x i32> %b 604} 605 606; Test result has chain and glued node. 607define <vscale x 2 x i32> @vpselect_vpload2(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { 608; CHECK-LABEL: vpselect_vpload2: 609; CHECK: # %bb.0: 610; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu 611; CHECK-NEXT: vmseq.vv v0, v9, v10 612; CHECK-NEXT: vle32.v v8, (a0), v0.t 613; CHECK-NEXT: ret 614 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 615 %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl) 616 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 617 ret <vscale x 2 x i32> %b 618} 619 620; Test result has chain output of true operand of select.vvm. 621define void @vpselect_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { 622; CHECK-LABEL: vpselect_vpload_store: 623; CHECK: # %bb.0: 624; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu 625; CHECK-NEXT: vle32.v v8, (a0), v0.t 626; CHECK-NEXT: vs1r.v v8, (a0) 627; CHECK-NEXT: ret 628 %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) 629 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 630 store <vscale x 2 x i32> %b, ptr %p 631 ret void 632} 633 634define <vscale x 2 x i32> @vpselect_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { 635; CHECK-LABEL: vpselect_vleff: 636; CHECK: # %bb.0: 637; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu 638; CHECK-NEXT: vle32ff.v v8, (a0), v0.t 639; CHECK-NEXT: ret 640 %1 = zext i32 %vl to i64 641 %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1) 642 %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0 643 %c = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %b, <vscale x 2 x i32> %passthru, i32 %vl) 644 ret <vscale x 2 x i32> %c 645} 646 647; Test strided load by riscv.vlse 648define <vscale x 2 x i32> @vpselect_vlse(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) { 649; CHECK-LABEL: vpselect_vlse: 650; CHECK: # %bb.0: 651; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu 652; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t 653; CHECK-NEXT: ret 654 %1 = zext i32 %vl to i64 655 %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %s, i64 %1) 656 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 657 ret <vscale x 2 x i32> %b 658} 659 660; Test indexed load by riscv.vluxei 661define <vscale x 2 x i32> @vpselect_vluxei(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i64> %idx, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) { 662; CHECK-LABEL: vpselect_vluxei: 663; CHECK: # %bb.0: 664; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu 665; CHECK-NEXT: vluxei64.v v8, (a0), v10, v0.t 666; CHECK-NEXT: ret 667 %1 = zext i32 %vl to i64 668 %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, ptr %p, <vscale x 2 x i64> %idx, i64 %1) 669 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 670 ret <vscale x 2 x i32> %b 671} 672 673; Test vector index by riscv.vid 674define <vscale x 2 x i32> @vpselect_vid(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, i32 zeroext %vl) { 675; CHECK-LABEL: vpselect_vid: 676; CHECK: # %bb.0: 677; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 678; CHECK-NEXT: vid.v v8, v0.t 679; CHECK-NEXT: ret 680 %1 = zext i32 %vl to i64 681 %a = call <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32> undef, i64 %1) 682 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 683 ret <vscale x 2 x i32> %b 684} 685 686; Test riscv.viota 687define <vscale x 2 x i32> @vpselect_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) { 688; CHECK-LABEL: vpselect_viota: 689; CHECK: # %bb.0: 690; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 691; CHECK-NEXT: viota.m v10, v9 692; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 693; CHECK-NEXT: ret 694 %1 = zext i32 %vl to i64 695 %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1) 696 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 697 ret <vscale x 2 x i32> %b 698} 699 700; Test riscv.vfclass 701define <vscale x 2 x i32> @vpselect_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { 702; CHECK-LABEL: vpselect_vflcass: 703; CHECK: # %bb.0: 704; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 705; CHECK-NEXT: vfclass.v v8, v9, v0.t 706; CHECK-NEXT: ret 707 %1 = zext i32 %vl to i64 708 %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x float> %vf, i64 %1) 709 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 710 ret <vscale x 2 x i32> %b 711} 712 713; Test riscv.vfsqrt 714define <vscale x 2 x float> @vpselect_vfsqrt(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { 715; CHECK-LABEL: vpselect_vfsqrt: 716; CHECK: # %bb.0: 717; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 718; CHECK-NEXT: vfsqrt.v v8, v9, v0.t 719; CHECK-NEXT: ret 720 %1 = zext i32 %vl to i64 721 %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1) 722 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 723 ret <vscale x 2 x float> %b 724} 725 726; Test reciprocal operation by riscv.vfrec7 727define <vscale x 2 x float> @vpselect_vfrec7(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { 728; CHECK-LABEL: vpselect_vfrec7: 729; CHECK: # %bb.0: 730; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 731; CHECK-NEXT: vfrec7.v v8, v9, v0.t 732; CHECK-NEXT: ret 733 %1 = zext i32 %vl to i64 734 %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1) 735 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 736 ret <vscale x 2 x float> %b 737} 738 739; Test slides 740declare <vscale x 2 x i32> @llvm.riscv.vslideup.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64, i64, i64) 741define <vscale x 2 x i32> @vpselect_vslideup(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i64 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 742; CHECK-LABEL: vpselect_vslideup: 743; CHECK: # %bb.0: 744; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu 745; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t 746; CHECK-NEXT: ret 747 %1 = zext i32 %vl to i64 748 %a = call <vscale x 2 x i32> @llvm.riscv.vslideup.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i64 %x, i64 %1, i64 0) 749 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 750 ret <vscale x 2 x i32> %b 751} 752 753declare <vscale x 2 x i32> @llvm.riscv.vslidedown.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64, i64, i64) 754define <vscale x 2 x i32> @vpselect_vslidedown(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i64 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 755; CHECK-LABEL: vpselect_vslidedown: 756; CHECK: # %bb.0: 757; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu 758; CHECK-NEXT: vslidedown.vx v8, v9, a0, v0.t 759; CHECK-NEXT: ret 760 %1 = zext i32 %vl to i64 761 %a = call <vscale x 2 x i32> @llvm.riscv.vslidedown.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i64 %x, i64 %1, i64 0) 762 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 763 ret <vscale x 2 x i32> %b 764} 765 766declare <vscale x 2 x i32> @llvm.riscv.vslide1up.nxv2i32.i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i32, i64) 767define <vscale x 2 x i32> @vpselect_vslide1up(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i32 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 768; CHECK-LABEL: vpselect_vslide1up: 769; CHECK: # %bb.0: 770; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu 771; CHECK-NEXT: vslide1up.vx v8, v9, a0, v0.t 772; CHECK-NEXT: ret 773 %1 = zext i32 %vl to i64 774 %a = call <vscale x 2 x i32> @llvm.riscv.vslide1up.nxv2i32.i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i32 %x, i64 %1) 775 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 776 ret <vscale x 2 x i32> %b 777} 778 779declare <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i32, i64) 780define <vscale x 2 x i32> @vpselect_vslide1down(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i32 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 781; CHECK-LABEL: vpselect_vslide1down: 782; CHECK: # %bb.0: 783; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu 784; CHECK-NEXT: vslide1down.vx v8, v9, a0, v0.t 785; CHECK-NEXT: ret 786 %1 = zext i32 %vl to i64 787 %a = call <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i32 %x, i64 %1) 788 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 789 ret <vscale x 2 x i32> %b 790} 791 792; Test vector operations with VLMAX vector length. 793 794; Test binary operator with vp.select and add. 795define <vscale x 2 x i32> @vpselect_add(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { 796; CHECK-LABEL: vpselect_add: 797; CHECK: # %bb.0: 798; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 799; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 800; CHECK-NEXT: ret 801 %a = add <vscale x 2 x i32> %x, %y 802 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 803 ret <vscale x 2 x i32> %b 804} 805 806; Test binary operator with vp.select and fadd. 807define <vscale x 2 x float> @vpselect_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { 808; CHECK-LABEL: vpselect_fadd: 809; CHECK: # %bb.0: 810; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 811; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t 812; CHECK-NEXT: ret 813 %a = fadd <vscale x 2 x float> %x, %y 814 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 815 ret <vscale x 2 x float> %b 816} 817 818; Test conversion by fptosi. 819define <vscale x 2 x i16> @vpselect_fptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 820; CHECK-LABEL: vpselect_fptosi: 821; CHECK: # %bb.0: 822; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 823; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t 824; CHECK-NEXT: ret 825 %a = fptosi <vscale x 2 x float> %x to <vscale x 2 x i16> 826 %b = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl) 827 ret <vscale x 2 x i16> %b 828} 829 830; Test conversion by sitofp. 831define <vscale x 2 x float> @vpselect_sitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 832; CHECK-LABEL: vpselect_sitofp: 833; CHECK: # %bb.0: 834; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 835; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t 836; CHECK-NEXT: ret 837 %a = sitofp <vscale x 2 x i64> %x to <vscale x 2 x float> 838 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 839 ret <vscale x 2 x float> %b 840} 841 842; Test float extension by fpext. 843define <vscale x 2 x double> @vpselect_fpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 844; CHECK-LABEL: vpselect_fpext: 845; CHECK: # %bb.0: 846; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 847; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t 848; CHECK-NEXT: ret 849 %a = fpext <vscale x 2 x float> %x to <vscale x 2 x double> 850 %b = call <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) 851 ret <vscale x 2 x double> %b 852} 853 854; Test float truncation by fptrunc. 855define <vscale x 2 x float> @vpselect_fptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 856; CHECK-LABEL: vpselect_fptrunc: 857; CHECK: # %bb.0: 858; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 859; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t 860; CHECK-NEXT: ret 861 %a = fptrunc <vscale x 2 x double> %x to <vscale x 2 x float> 862 %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) 863 ret <vscale x 2 x float> %b 864} 865 866; Test integer extension by zext. 867define <vscale x 2 x i32> @vpselect_zext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 868; CHECK-LABEL: vpselect_zext: 869; CHECK: # %bb.0: 870; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 871; CHECK-NEXT: vzext.vf4 v8, v9, v0.t 872; CHECK-NEXT: ret 873 %a = zext <vscale x 2 x i8> %x to <vscale x 2 x i32> 874 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 875 ret <vscale x 2 x i32> %b 876} 877 878; Test integer truncation by trunc. 879define <vscale x 2 x i32> @vpselect_trunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { 880; CHECK-LABEL: vpselect_trunc: 881; CHECK: # %bb.0: 882; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 883; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t 884; CHECK-NEXT: ret 885 %a = trunc <vscale x 2 x i64> %x to <vscale x 2 x i32> 886 %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) 887 ret <vscale x 2 x i32> %b 888} 889 890; Folding this would create a loop in the DAG becuase the chain from the VLE is 891; used by the vssubu. 892define void @test_dag_loop() { 893; CHECK-LABEL: test_dag_loop: 894; CHECK: # %bb.0: # %entry 895; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma 896; CHECK-NEXT: vmclr.m v0 897; CHECK-NEXT: vmv.v.i v8, 0 898; CHECK-NEXT: vmv.v.i v12, 0 899; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu 900; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t 901; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma 902; CHECK-NEXT: vmseq.vv v0, v12, v8 903; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma 904; CHECK-NEXT: vmv.v.i v8, 0 905; CHECK-NEXT: vsetivli zero, 0, e16, m8, tu, mu 906; CHECK-NEXT: vle16.v v8, (zero), v0.t 907; CHECK-NEXT: vse16.v v8, (zero) 908; CHECK-NEXT: ret 909entry: 910 %0 = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16> undef, ptr null, i64 1) 911 %1 = tail call <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i8 0, <vscale x 32 x i1> zeroinitializer, i64 0, i64 0) 912 %2 = tail call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> %1, <vscale x 32 x i8> zeroinitializer, i64 0) 913 %3 = tail call <vscale x 32 x i16> @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> %0, <vscale x 32 x i1> %2, i64 1) 914 call void @llvm.riscv.vse.nxv32i16.i64(<vscale x 32 x i16> %3, ptr null, i64 0) 915 ret void 916} 917 918define <vscale x 1 x i16> @test_vaaddu(<vscale x 1 x i16> %var_11, i16 zeroext %var_9, <vscale x 1 x i1> %var_5, <vscale x 1 x i16> %var_0) { 919; CHECK-LABEL: test_vaaddu: 920; CHECK: # %bb.0: # %entry 921; CHECK-NEXT: csrwi vxrm, 0 922; CHECK-NEXT: vsetivli zero, 3, e16, mf4, ta, mu 923; CHECK-NEXT: vaaddu.vx v9, v8, a0, v0.t 924; CHECK-NEXT: vmv1r.v v8, v9 925; CHECK-NEXT: ret 926entry: 927 %0 = tail call <vscale x 1 x i16> @llvm.riscv.vaaddu.nxv1i16.i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> %var_11, i16 %var_9, i64 0, i64 3) 928 %1 = tail call <vscale x 1 x i16> @llvm.riscv.vmerge.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> %var_0, <vscale x 1 x i16> %0, <vscale x 1 x i1> %var_5, i64 3) 929 ret <vscale x 1 x i16> %1 930} 931 932; Test reductions don't have a vmerge folded into them, since the mask affects 933; the result. 934 935declare <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32( 936 <vscale x 2 x i32>, 937 <vscale x 2 x i32>, 938 <vscale x 2 x i32>, 939 i64) 940 941define <vscale x 2 x i32> @vredsum(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %vl) { 942; CHECK-LABEL: vredsum: 943; CHECK: # %bb.0: 944; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 945; CHECK-NEXT: vmv1r.v v11, v8 946; CHECK-NEXT: vredsum.vs v11, v9, v10 947; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 948; CHECK-NEXT: ret 949 %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32( 950 <vscale x 2 x i32> %passthru, 951 <vscale x 2 x i32> %x, 952 <vscale x 2 x i32> %y, 953 i64 %vl) 954 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 %vl) 955 ret <vscale x 2 x i32> %b 956} 957 958declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32( 959 <vscale x 2 x float>, 960 <vscale x 2 x float>, 961 <vscale x 2 x float>, 962 i64, i64) 963 964define <vscale x 2 x float> @vfredusum(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i64 %vl) { 965; CHECK-LABEL: vfredusum: 966; CHECK: # %bb.0: 967; CHECK-NEXT: fsrmi a1, 0 968; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 969; CHECK-NEXT: vmv1r.v v11, v8 970; CHECK-NEXT: vfredusum.vs v11, v9, v10 971; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 972; CHECK-NEXT: fsrm a1 973; CHECK-NEXT: ret 974 %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32( 975 <vscale x 2 x float> %passthru, 976 <vscale x 2 x float> %x, 977 <vscale x 2 x float> %y, 978 i64 0, i64 %vl) 979 %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 %vl) 980 ret <vscale x 2 x float> %b 981} 982 983; However we can fold it in if the mask is all ones. 984define <vscale x 2 x i32> @vredsum_allones_mask(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %vl) { 985; CHECK-LABEL: vredsum_allones_mask: 986; CHECK: # %bb.0: 987; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 988; CHECK-NEXT: vredsum.vs v8, v9, v10 989; CHECK-NEXT: ret 990 %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32( 991 <vscale x 2 x i32> %passthru, 992 <vscale x 2 x i32> %x, 993 <vscale x 2 x i32> %y, 994 i64 %vl) 995 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 -1), i64 %vl) 996 ret <vscale x 2 x i32> %b 997} 998 999define <vscale x 2 x float> @vfredusum_allones_mask(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, i64 %vl) { 1000; CHECK-LABEL: vfredusum_allones_mask: 1001; CHECK: # %bb.0: 1002; CHECK-NEXT: fsrmi a1, 0 1003; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 1004; CHECK-NEXT: vfredusum.vs v8, v9, v10 1005; CHECK-NEXT: fsrm a1 1006; CHECK-NEXT: ret 1007 %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32( 1008 <vscale x 2 x float> %passthru, 1009 <vscale x 2 x float> %x, 1010 <vscale x 2 x float> %y, 1011 i64 0, i64 %vl) 1012 %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 -1), i64 %vl) 1013 ret <vscale x 2 x float> %b 1014} 1015 1016define <vscale x 2 x i32> @unfoldable_vredsum_allones_mask_diff_vl(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y) { 1017; CHECK-LABEL: unfoldable_vredsum_allones_mask_diff_vl: 1018; CHECK: # %bb.0: 1019; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, ma 1020; CHECK-NEXT: vmv1r.v v11, v8 1021; CHECK-NEXT: vredsum.vs v11, v9, v10 1022; CHECK-NEXT: vsetivli zero, 1, e32, m1, tu, ma 1023; CHECK-NEXT: vmv.v.v v8, v11 1024; CHECK-NEXT: ret 1025 %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32( 1026 <vscale x 2 x i32> %passthru, 1027 <vscale x 2 x i32> %x, 1028 <vscale x 2 x i32> %y, 1029 i64 -1) 1030 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 -1), i64 1) 1031 ret <vscale x 2 x i32> %b 1032} 1033 1034declare <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16>, ptr nocapture, i64) 1035declare <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i8, <vscale x 32 x i1>, i64, i64 immarg) 1036declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i64) 1037declare <vscale x 32 x i16> @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i64) 1038declare void @llvm.riscv.vse.nxv32i16.i64(<vscale x 32 x i16>, ptr nocapture, i64) 1039declare <vscale x 1 x i16> @llvm.riscv.vaaddu.nxv1i16.i16.i64(<vscale x 1 x i16>, <vscale x 1 x i16>, i16, i64 immarg, i64) 1040declare <vscale x 1 x i16> @llvm.riscv.vmerge.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i64) 1041 1042; Tests for folding vmerge into its ops when their VLs differ 1043 1044declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, i64) 1045declare <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i64) 1046 1047; Can fold with VL=2 1048define <vscale x 2 x i32> @vmerge_smaller_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { 1049; CHECK-LABEL: vmerge_smaller_vl_same_passthru: 1050; CHECK: # %bb.0: 1051; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu 1052; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 1053; CHECK-NEXT: ret 1054 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 4) 1055 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2) 1056 ret <vscale x 2 x i32> %b 1057} 1058 1059; Can fold with VL=2 1060define <vscale x 2 x i32> @vmerge_larger_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { 1061; CHECK-LABEL: vmerge_larger_vl_same_passthru: 1062; CHECK: # %bb.0: 1063; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu 1064; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 1065; CHECK-NEXT: ret 1066 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2) 1067 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3) 1068 ret <vscale x 2 x i32> %b 1069} 1070 1071; Can fold with VL=2 1072define <vscale x 2 x i32> @vmerge_smaller_vl_different_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { 1073; CHECK-LABEL: vmerge_smaller_vl_different_passthru: 1074; CHECK: # %bb.0: 1075; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma 1076; CHECK-NEXT: vadd.vv v8, v10, v11 1077; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 1078; CHECK-NEXT: vmv1r.v v8, v9 1079; CHECK-NEXT: ret 1080 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 3) 1081 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2) 1082 ret <vscale x 2 x i32> %b 1083} 1084 1085; Can't fold this because we need to take elements from both %pt1 and %pt2 1086define <vscale x 2 x i32> @vmerge_larger_vl_different_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { 1087; CHECK-LABEL: vmerge_larger_vl_different_passthru: 1088; CHECK: # %bb.0: 1089; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma 1090; CHECK-NEXT: vadd.vv v8, v10, v11 1091; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma 1092; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 1093; CHECK-NEXT: vmv1r.v v8, v9 1094; CHECK-NEXT: ret 1095 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2) 1096 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3) 1097 ret <vscale x 2 x i32> %b 1098} 1099 1100; Can fold with VL=2 1101define <vscale x 2 x i32> @vmerge_smaller_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { 1102; CHECK-LABEL: vmerge_smaller_vl_poison_passthru: 1103; CHECK: # %bb.0: 1104; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu 1105; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 1106; CHECK-NEXT: ret 1107 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 3) 1108 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2) 1109 ret <vscale x 2 x i32> %b 1110} 1111 1112; Can fold with VL=2 1113define <vscale x 2 x i32> @vmerge_larger_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { 1114; CHECK-LABEL: vmerge_larger_vl_poison_passthru: 1115; CHECK: # %bb.0: 1116; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu 1117; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 1118; CHECK-NEXT: ret 1119 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2) 1120 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3) 1121 ret <vscale x 2 x i32> %b 1122} 1123 1124; The vadd's new policy should be tail undisturbed since the false op of the 1125; vmerge moves from the the body to the tail, and we need to preserve it. 1126define <vscale x 2 x i32> @vmerge_larger_vl_false_becomes_tail(<vscale x 2 x i32> %false, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { 1127; CHECK-LABEL: vmerge_larger_vl_false_becomes_tail: 1128; CHECK: # %bb.0: 1129; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu 1130; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 1131; CHECK-NEXT: ret 1132 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2) 1133 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %false, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3) 1134 ret <vscale x 2 x i32> %b 1135} 1136 1137; Test widening pseudos with their TIED variant (passthru same as first op). 1138define <vscale x 2 x i64> @vpmerge_vwsub.w_tied(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %mask, i32 zeroext %vl) { 1139; CHECK-LABEL: vpmerge_vwsub.w_tied: 1140; CHECK: # %bb.0: 1141; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 1142; CHECK-NEXT: vwsub.wv v8, v8, v12, v0.t 1143; CHECK-NEXT: ret 1144 %vl.zext = zext i32 %vl to i64 1145 %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %passthru, <vscale x 2 x i32> %y, i64 %vl.zext) 1146 %b = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %mask, <vscale x 2 x i64> %a, <vscale x 2 x i64> %passthru, i32 %vl) 1147 ret <vscale x 2 x i64> %b 1148} 1149 1150define <vscale x 2 x double> @vpmerge_vfwsub.w_tied(<vscale x 2 x double> %passthru, <vscale x 2 x double> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %mask, i32 zeroext %vl) { 1151; CHECK-LABEL: vpmerge_vfwsub.w_tied: 1152; CHECK: # %bb.0: 1153; CHECK-NEXT: fsrmi a1, 1 1154; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 1155; CHECK-NEXT: vfwsub.wv v8, v8, v12, v0.t 1156; CHECK-NEXT: fsrm a1 1157; CHECK-NEXT: ret 1158 %vl.zext = zext i32 %vl to i64 1159 %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(<vscale x 2 x double> %passthru, <vscale x 2 x double> %passthru, <vscale x 2 x float> %y, i64 1, i64 %vl.zext) 1160 %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) 1161 ret <vscale x 2 x double> %b 1162} 1163 1164define <vscale x 2 x i32> @true_tied_dest_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) { 1165; CHECK-LABEL: true_tied_dest_vmerge_implicit_passthru: 1166; CHECK: # %bb.0: 1167; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 1168; CHECK-NEXT: vmacc.vv v8, v9, v10, v0.t 1169; CHECK-NEXT: ret 1170 %a = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %avl, i64 0) 1171 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32( 1172 <vscale x 2 x i32> poison, 1173 <vscale x 2 x i32> %passthru, 1174 <vscale x 2 x i32> %a, 1175 <vscale x 2 x i1> %m, 1176 i64 %avl 1177 ) 1178 ret <vscale x 2 x i32> %b 1179} 1180 1181define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) { 1182; CHECK-LABEL: true_mask_vmerge_implicit_passthru: 1183; CHECK: # %bb.0: 1184; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 1185; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 1186; CHECK-NEXT: ret 1187 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl, i64 0) 1188 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32( 1189 <vscale x 2 x i32> poison, 1190 <vscale x 2 x i32> %passthru, 1191 <vscale x 2 x i32> %a, 1192 <vscale x 2 x i1> shufflevector(<vscale x 2 x i1> insertelement(<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), 1193 i64 %avl 1194 ) 1195 ret <vscale x 2 x i32> %b 1196} 1197 1198 1199define <vscale x 2 x i32> @unfoldable_mismatched_sew(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 2 x i1> %mask, i64 %avl) { 1200; CHECK-LABEL: unfoldable_mismatched_sew: 1201; CHECK: # %bb.0: 1202; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1203; CHECK-NEXT: vadd.vv v9, v9, v10 1204; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 1205; CHECK-NEXT: vmv.v.v v8, v9 1206; CHECK-NEXT: ret 1207 %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 %avl) 1208 %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32> 1209 %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32( 1210 <vscale x 2 x i32> %passthru, 1211 <vscale x 2 x i32> %passthru, 1212 <vscale x 2 x i32> %a.bitcast, 1213 <vscale x 2 x i1> splat (i1 true), 1214 i64 %avl 1215 ) 1216 ret <vscale x 2 x i32> %b 1217} 1218