1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvl256b | FileCheck %s 3 4declare <8 x i16> @llvm.vp.merge.v8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) 5declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) 6declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) 7declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) 8 9; Test binary operator with vp.merge and vp.smax. 10declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) 11define <8 x i32> @vpmerge_vpadd(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, <8 x i1> %m, i32 zeroext %vl) { 12; CHECK-LABEL: vpmerge_vpadd: 13; CHECK: # %bb.0: 14; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 15; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 16; CHECK-NEXT: ret 17 %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) 18 %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 19 ret <8 x i32> %b 20} 21 22; Test glued node of merge should not be deleted. 23declare <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32>, <8 x i32>, metadata, <8 x i1>, i32) 24define <8 x i32> @vpmerge_vpadd2(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { 25; CHECK-LABEL: vpmerge_vpadd2: 26; CHECK: # %bb.0: 27; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 28; CHECK-NEXT: vmseq.vv v0, v9, v10 29; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu 30; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 31; CHECK-NEXT: ret 32 %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) 33 %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> splat (i1 true), i32 %vl) 34 %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 35 ret <8 x i32> %b 36} 37 38; Test vp.merge have all-ones mask. 39define <8 x i32> @vpmerge_vpadd3(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { 40; CHECK-LABEL: vpmerge_vpadd3: 41; CHECK: # %bb.0: 42; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 43; CHECK-NEXT: vadd.vv v8, v9, v10 44; CHECK-NEXT: ret 45 %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) 46 %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> splat (i1 true), <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 47 ret <8 x i32> %b 48} 49 50; Test float binary operator with vp.merge and vp.fadd. 51declare <8 x float> @llvm.vp.fadd.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) 52define <8 x float> @vpmerge_vpfadd(<8 x float> %passthru, <8 x float> %x, <8 x float> %y, <8 x i1> %m, i32 zeroext %vl) { 53; CHECK-LABEL: vpmerge_vpfadd: 54; CHECK: # %bb.0: 55; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 56; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t 57; CHECK-NEXT: ret 58 %a = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %x, <8 x float> %y, <8 x i1> splat (i1 true), i32 %vl) 59 %b = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) 60 ret <8 x float> %b 61} 62 63; Test conversion by fptosi. 64declare <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float>, <8 x i1>, i32) 65define <8 x i16> @vpmerge_vpfptosi(<8 x i16> %passthru, <8 x float> %x, <8 x i1> %m, i32 zeroext %vl) { 66; CHECK-LABEL: vpmerge_vpfptosi: 67; CHECK: # %bb.0: 68; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu 69; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t 70; CHECK-NEXT: ret 71 %a = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float> %x, <8 x i1> splat (i1 true), i32 %vl) 72 %b = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %a, <8 x i16> %passthru, i32 %vl) 73 ret <8 x i16> %b 74} 75 76; Test conversion by sitofp. 77declare <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64>, <8 x i1>, i32) 78define <8 x float> @vpmerge_vpsitofp(<8 x float> %passthru, <8 x i64> %x, <8 x i1> %m, i32 zeroext %vl) { 79; CHECK-LABEL: vpmerge_vpsitofp: 80; CHECK: # %bb.0: 81; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 82; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t 83; CHECK-NEXT: ret 84 %a = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> %x, <8 x i1> splat (i1 true), i32 %vl) 85 %b = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) 86 ret <8 x float> %b 87} 88 89; Test integer extension by vp.zext. 90declare <8 x i32> @llvm.vp.zext.v8i32.v8i8(<8 x i8>, <8 x i1>, i32) 91define <8 x i32> @vpmerge_vpzext(<8 x i32> %passthru, <8 x i8> %x, <8 x i1> %m, i32 zeroext %vl) { 92; CHECK-LABEL: vpmerge_vpzext: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 95; CHECK-NEXT: vzext.vf4 v8, v9, v0.t 96; CHECK-NEXT: ret 97 %a = call <8 x i32> @llvm.vp.zext.v8i32.v8i8(<8 x i8> %x, <8 x i1> splat (i1 true), i32 %vl) 98 %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 99 ret <8 x i32> %b 100} 101 102; Test integer truncation by vp.trunc. 103declare <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64>, <8 x i1>, i32) 104define <8 x i32> @vpmerge_vptrunc(<8 x i32> %passthru, <8 x i64> %x, <8 x i1> %m, i32 zeroext %vl) { 105; CHECK-LABEL: vpmerge_vptrunc: 106; CHECK: # %bb.0: 107; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 108; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t 109; CHECK-NEXT: ret 110 %a = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> %x, <8 x i1> splat (i1 true), i32 %vl) 111 %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 112 ret <8 x i32> %b 113} 114 115; Test integer extension by vp.fpext. 116declare <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float>, <8 x i1>, i32) 117define <8 x double> @vpmerge_vpfpext(<8 x double> %passthru, <8 x float> %x, <8 x i1> %m, i32 zeroext %vl) { 118; CHECK-LABEL: vpmerge_vpfpext: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 121; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t 122; CHECK-NEXT: ret 123 %a = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> %x, <8 x i1> splat (i1 true), i32 %vl) 124 %b = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %a, <8 x double> %passthru, i32 %vl) 125 ret <8 x double> %b 126} 127 128; Test integer truncation by vp.trunc. 129declare <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double>, <8 x i1>, i32) 130define <8 x float> @vpmerge_vpfptrunc(<8 x float> %passthru, <8 x double> %x, <8 x i1> %m, i32 zeroext %vl) { 131; CHECK-LABEL: vpmerge_vpfptrunc: 132; CHECK: # %bb.0: 133; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 134; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t 135; CHECK-NEXT: ret 136 %a = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> %x, <8 x i1> splat (i1 true), i32 %vl) 137 %b = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) 138 ret <8 x float> %b 139} 140 141; Test load operation by vp.load. 142declare <8 x i32> @llvm.vp.load.v8i32.p0(ptr, <8 x i1>, i32) 143 144define <8 x i32> @vpmerge_vpload(<8 x i32> %passthru, ptr %p, <8 x i1> %m, i32 zeroext %vl) { 145; CHECK-LABEL: vpmerge_vpload: 146; CHECK: # %bb.0: 147; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu 148; CHECK-NEXT: vle32.v v8, (a0), v0.t 149; CHECK-NEXT: ret 150 %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> splat (i1 true), i32 %vl) 151 %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 152 ret <8 x i32> %b 153} 154 155; Test result have chain and glued node. 156define <8 x i32> @vpmerge_vpload2(<8 x i32> %passthru, ptr %p, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { 157; CHECK-LABEL: vpmerge_vpload2: 158; CHECK: # %bb.0: 159; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 160; CHECK-NEXT: vmseq.vv v0, v9, v10 161; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu 162; CHECK-NEXT: vle32.v v8, (a0), v0.t 163; CHECK-NEXT: ret 164 %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> splat (i1 true), i32 %vl) 165 %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> splat (i1 true), i32 %vl) 166 %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 167 ret <8 x i32> %b 168} 169 170declare <8 x i16> @llvm.vp.select.v8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) 171declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) 172declare <8 x float> @llvm.vp.select.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) 173declare <8 x double> @llvm.vp.select.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) 174 175; Test binary operator with vp.select and vp.add. 176define <8 x i32> @vpselect_vpadd(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, <8 x i1> %m, i32 zeroext %vl) { 177; CHECK-LABEL: vpselect_vpadd: 178; CHECK: # %bb.0: 179; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 180; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 181; CHECK-NEXT: ret 182 %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) 183 %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 184 ret <8 x i32> %b 185} 186 187; Test glued node of select should not be deleted. 188define <8 x i32> @vpselect_vpadd2(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { 189; CHECK-LABEL: vpselect_vpadd2: 190; CHECK: # %bb.0: 191; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 192; CHECK-NEXT: vmseq.vv v0, v9, v10 193; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t 194; CHECK-NEXT: ret 195 %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) 196 %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> splat (i1 true), i32 %vl) 197 %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 198 ret <8 x i32> %b 199} 200 201; Test vp.select have all-ones mask. 202define <8 x i32> @vpselect_vpadd3(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { 203; CHECK-LABEL: vpselect_vpadd3: 204; CHECK: # %bb.0: 205; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 206; CHECK-NEXT: vadd.vv v8, v9, v10 207; CHECK-NEXT: ret 208 %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) 209 %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 210 ret <8 x i32> %b 211} 212 213; Test float binary operator with vp.select and vp.fadd. 214define <8 x float> @vpselect_vpfadd(<8 x float> %passthru, <8 x float> %x, <8 x float> %y, <8 x i1> %m, i32 zeroext %vl) { 215; CHECK-LABEL: vpselect_vpfadd: 216; CHECK: # %bb.0: 217; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 218; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t 219; CHECK-NEXT: ret 220 %a = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %x, <8 x float> %y, <8 x i1> splat (i1 true), i32 %vl) 221 %b = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) 222 ret <8 x float> %b 223} 224 225; Test conversion by fptosi. 226define <8 x i16> @vpselect_vpfptosi(<8 x i16> %passthru, <8 x float> %x, <8 x i1> %m, i32 zeroext %vl) { 227; CHECK-LABEL: vpselect_vpfptosi: 228; CHECK: # %bb.0: 229; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 230; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t 231; CHECK-NEXT: ret 232 %a = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float> %x, <8 x i1> splat (i1 true), i32 %vl) 233 %b = call <8 x i16> @llvm.vp.select.v8i16(<8 x i1> %m, <8 x i16> %a, <8 x i16> %passthru, i32 %vl) 234 ret <8 x i16> %b 235} 236 237; Test conversion by sitofp. 238define <8 x float> @vpselect_vpsitofp(<8 x float> %passthru, <8 x i64> %x, <8 x i1> %m, i32 zeroext %vl) { 239; CHECK-LABEL: vpselect_vpsitofp: 240; CHECK: # %bb.0: 241; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 242; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t 243; CHECK-NEXT: ret 244 %a = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> %x, <8 x i1> splat (i1 true), i32 %vl) 245 %b = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) 246 ret <8 x float> %b 247} 248 249; Test integer extension by vp.zext. 250define <8 x i32> @vpselect_vpzext(<8 x i32> %passthru, <8 x i8> %x, <8 x i1> %m, i32 zeroext %vl) { 251; CHECK-LABEL: vpselect_vpzext: 252; CHECK: # %bb.0: 253; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 254; CHECK-NEXT: vzext.vf4 v8, v9, v0.t 255; CHECK-NEXT: ret 256 %a = call <8 x i32> @llvm.vp.zext.v8i32.v8i8(<8 x i8> %x, <8 x i1> splat (i1 true), i32 %vl) 257 %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 258 ret <8 x i32> %b 259} 260 261; Test integer truncation by vp.trunc. 262define <8 x i32> @vpselect_vptrunc(<8 x i32> %passthru, <8 x i64> %x, <8 x i1> %m, i32 zeroext %vl) { 263; CHECK-LABEL: vpselect_vptrunc: 264; CHECK: # %bb.0: 265; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 266; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t 267; CHECK-NEXT: ret 268 %a = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> %x, <8 x i1> splat (i1 true), i32 %vl) 269 %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 270 ret <8 x i32> %b 271} 272 273; Test integer extension by vp.fpext. 274define <8 x double> @vpselect_vpfpext(<8 x double> %passthru, <8 x float> %x, <8 x i1> %m, i32 zeroext %vl) { 275; CHECK-LABEL: vpselect_vpfpext: 276; CHECK: # %bb.0: 277; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 278; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t 279; CHECK-NEXT: ret 280 %a = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> %x, <8 x i1> splat (i1 true), i32 %vl) 281 %b = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %a, <8 x double> %passthru, i32 %vl) 282 ret <8 x double> %b 283} 284 285; Test integer truncation by vp.trunc. 286define <8 x float> @vpselect_vpfptrunc(<8 x float> %passthru, <8 x double> %x, <8 x i1> %m, i32 zeroext %vl) { 287; CHECK-LABEL: vpselect_vpfptrunc: 288; CHECK: # %bb.0: 289; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 290; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t 291; CHECK-NEXT: ret 292 %a = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> %x, <8 x i1> splat (i1 true), i32 %vl) 293 %b = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) 294 ret <8 x float> %b 295} 296 297; Test load operation by vp.load. 298define <8 x i32> @vpselect_vpload(<8 x i32> %passthru, ptr %p, <8 x i1> %m, i32 zeroext %vl) { 299; CHECK-LABEL: vpselect_vpload: 300; CHECK: # %bb.0: 301; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu 302; CHECK-NEXT: vle32.v v8, (a0), v0.t 303; CHECK-NEXT: ret 304 %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> splat (i1 true), i32 %vl) 305 %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 306 ret <8 x i32> %b 307} 308 309; Test result have chain and glued node. 310define <8 x i32> @vpselect_vpload2(<8 x i32> %passthru, ptr %p, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { 311; CHECK-LABEL: vpselect_vpload2: 312; CHECK: # %bb.0: 313; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu 314; CHECK-NEXT: vmseq.vv v0, v9, v10 315; CHECK-NEXT: vle32.v v8, (a0), v0.t 316; CHECK-NEXT: ret 317 %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> splat (i1 true), i32 %vl) 318 %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> splat (i1 true), i32 %vl) 319 %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) 320 ret <8 x i32> %b 321} 322