1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64 6; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN 8; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN 10 11declare <4 x i1> @llvm.vp.merge.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) 12 13define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 zeroext %evl) { 14; RV32-LABEL: vpmerge_vv_v4i1: 15; RV32: # %bb.0: 16; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 17; RV32-NEXT: vid.v v10 18; RV32-NEXT: vmsltu.vx v10, v10, a0 19; RV32-NEXT: vmand.mm v9, v9, v10 20; RV32-NEXT: vmandn.mm v8, v8, v9 21; RV32-NEXT: vmand.mm v9, v0, v9 22; RV32-NEXT: vmor.mm v0, v9, v8 23; RV32-NEXT: ret 24; 25; RV64-LABEL: vpmerge_vv_v4i1: 26; RV64: # %bb.0: 27; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 28; RV64-NEXT: vid.v v10 29; RV64-NEXT: vmsltu.vx v12, v10, a0 30; RV64-NEXT: vmand.mm v9, v9, v12 31; RV64-NEXT: vmandn.mm v8, v8, v9 32; RV64-NEXT: vmand.mm v9, v0, v9 33; RV64-NEXT: vmor.mm v0, v9, v8 34; RV64-NEXT: ret 35; 36; RV32ZVFHMIN-LABEL: vpmerge_vv_v4i1: 37; RV32ZVFHMIN: # %bb.0: 38; RV32ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma 39; RV32ZVFHMIN-NEXT: vid.v v10 40; RV32ZVFHMIN-NEXT: vmsltu.vx v10, v10, a0 41; RV32ZVFHMIN-NEXT: vmand.mm v9, v9, v10 42; RV32ZVFHMIN-NEXT: vmandn.mm v8, v8, v9 43; RV32ZVFHMIN-NEXT: vmand.mm v9, v0, v9 44; RV32ZVFHMIN-NEXT: vmor.mm v0, v9, v8 45; RV32ZVFHMIN-NEXT: ret 46; 47; RV64ZVFHMIN-LABEL: vpmerge_vv_v4i1: 48; RV64ZVFHMIN: # %bb.0: 49; RV64ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma 50; RV64ZVFHMIN-NEXT: vid.v v10 51; RV64ZVFHMIN-NEXT: vmsltu.vx v12, v10, a0 52; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v12 53; RV64ZVFHMIN-NEXT: vmandn.mm v8, v8, v9 54; RV64ZVFHMIN-NEXT: vmand.mm v9, v0, v9 55; RV64ZVFHMIN-NEXT: vmor.mm v0, v9, v8 56; RV64ZVFHMIN-NEXT: ret 57 %v = call <4 x i1> @llvm.vp.merge.v4i1(<4 x i1> %m, <4 x i1> %va, <4 x i1> %vb, i32 %evl) 58 ret <4 x i1> %v 59} 60 61define <8 x i1> @vpmerge_vv_v8i1(<8 x i1> %va, <8 x i1> %vb, <8 x i1> %m, i32 zeroext %evl) { 62; RV32-LABEL: vpmerge_vv_v8i1: 63; RV32: # %bb.0: 64; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 65; RV32-NEXT: vid.v v10 66; RV32-NEXT: vmsltu.vx v12, v10, a0 67; RV32-NEXT: vmand.mm v9, v9, v12 68; RV32-NEXT: vmandn.mm v8, v8, v9 69; RV32-NEXT: vmand.mm v9, v0, v9 70; RV32-NEXT: vmor.mm v0, v9, v8 71; RV32-NEXT: ret 72; 73; RV64-LABEL: vpmerge_vv_v8i1: 74; RV64: # %bb.0: 75; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 76; RV64-NEXT: vid.v v12 77; RV64-NEXT: vmsltu.vx v10, v12, a0 78; RV64-NEXT: vmand.mm v9, v9, v10 79; RV64-NEXT: vmandn.mm v8, v8, v9 80; RV64-NEXT: vmand.mm v9, v0, v9 81; RV64-NEXT: vmor.mm v0, v9, v8 82; RV64-NEXT: ret 83; 84; RV32ZVFHMIN-LABEL: vpmerge_vv_v8i1: 85; RV32ZVFHMIN: # %bb.0: 86; RV32ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma 87; RV32ZVFHMIN-NEXT: vid.v v10 88; RV32ZVFHMIN-NEXT: vmsltu.vx v12, v10, a0 89; RV32ZVFHMIN-NEXT: vmand.mm v9, v9, v12 90; RV32ZVFHMIN-NEXT: vmandn.mm v8, v8, v9 91; RV32ZVFHMIN-NEXT: vmand.mm v9, v0, v9 92; RV32ZVFHMIN-NEXT: vmor.mm v0, v9, v8 93; RV32ZVFHMIN-NEXT: ret 94; 95; RV64ZVFHMIN-LABEL: vpmerge_vv_v8i1: 96; RV64ZVFHMIN: # %bb.0: 97; RV64ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma 98; RV64ZVFHMIN-NEXT: vid.v v12 99; RV64ZVFHMIN-NEXT: vmsltu.vx v10, v12, a0 100; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v10 101; RV64ZVFHMIN-NEXT: vmandn.mm v8, v8, v9 102; RV64ZVFHMIN-NEXT: vmand.mm v9, v0, v9 103; RV64ZVFHMIN-NEXT: vmor.mm v0, v9, v8 104; RV64ZVFHMIN-NEXT: ret 105 %v = call <8 x i1> @llvm.vp.merge.v8i1(<8 x i1> %m, <8 x i1> %va, <8 x i1> %vb, i32 %evl) 106 ret <8 x i1> %v 107} 108 109define <16 x i1> @vpmerge_vv_v16i1(<16 x i1> %va, <16 x i1> %vb, <16 x i1> %m, i32 zeroext %evl) { 110; RV32-LABEL: vpmerge_vv_v16i1: 111; RV32: # %bb.0: 112; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 113; RV32-NEXT: vid.v v12 114; RV32-NEXT: vmsltu.vx v10, v12, a0 115; RV32-NEXT: vmand.mm v9, v9, v10 116; RV32-NEXT: vmandn.mm v8, v8, v9 117; RV32-NEXT: vmand.mm v9, v0, v9 118; RV32-NEXT: vmor.mm v0, v9, v8 119; RV32-NEXT: ret 120; 121; RV64-LABEL: vpmerge_vv_v16i1: 122; RV64: # %bb.0: 123; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 124; RV64-NEXT: vid.v v16 125; RV64-NEXT: vmsltu.vx v10, v16, a0 126; RV64-NEXT: vmand.mm v9, v9, v10 127; RV64-NEXT: vmandn.mm v8, v8, v9 128; RV64-NEXT: vmand.mm v9, v0, v9 129; RV64-NEXT: vmor.mm v0, v9, v8 130; RV64-NEXT: ret 131; 132; RV32ZVFHMIN-LABEL: vpmerge_vv_v16i1: 133; RV32ZVFHMIN: # %bb.0: 134; RV32ZVFHMIN-NEXT: vsetivli zero, 16, e32, m4, ta, ma 135; RV32ZVFHMIN-NEXT: vid.v v12 136; RV32ZVFHMIN-NEXT: vmsltu.vx v10, v12, a0 137; RV32ZVFHMIN-NEXT: vmand.mm v9, v9, v10 138; RV32ZVFHMIN-NEXT: vmandn.mm v8, v8, v9 139; RV32ZVFHMIN-NEXT: vmand.mm v9, v0, v9 140; RV32ZVFHMIN-NEXT: vmor.mm v0, v9, v8 141; RV32ZVFHMIN-NEXT: ret 142; 143; RV64ZVFHMIN-LABEL: vpmerge_vv_v16i1: 144; RV64ZVFHMIN: # %bb.0: 145; RV64ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma 146; RV64ZVFHMIN-NEXT: vid.v v16 147; RV64ZVFHMIN-NEXT: vmsltu.vx v10, v16, a0 148; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v10 149; RV64ZVFHMIN-NEXT: vmandn.mm v8, v8, v9 150; RV64ZVFHMIN-NEXT: vmand.mm v9, v0, v9 151; RV64ZVFHMIN-NEXT: vmor.mm v0, v9, v8 152; RV64ZVFHMIN-NEXT: ret 153 %v = call <16 x i1> @llvm.vp.merge.v16i1(<16 x i1> %m, <16 x i1> %va, <16 x i1> %vb, i32 %evl) 154 ret <16 x i1> %v 155} 156 157define <32 x i1> @vpmerge_vv_v32i1(<32 x i1> %va, <32 x i1> %vb, <32 x i1> %m, i32 zeroext %evl) { 158; RV32-LABEL: vpmerge_vv_v32i1: 159; RV32: # %bb.0: 160; RV32-NEXT: li a1, 32 161; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma 162; RV32-NEXT: vid.v v16 163; RV32-NEXT: vmsltu.vx v10, v16, a0 164; RV32-NEXT: vmand.mm v9, v9, v10 165; RV32-NEXT: vmandn.mm v8, v8, v9 166; RV32-NEXT: vmand.mm v9, v0, v9 167; RV32-NEXT: vmor.mm v0, v9, v8 168; RV32-NEXT: ret 169; 170; RV64-LABEL: vpmerge_vv_v32i1: 171; RV64: # %bb.0: 172; RV64-NEXT: vsetvli a1, zero, e8, m2, ta, ma 173; RV64-NEXT: vmv.v.i v10, 0 174; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma 175; RV64-NEXT: vmerge.vim v12, v10, 1, v0 176; RV64-NEXT: vmv1r.v v0, v8 177; RV64-NEXT: vsetvli a1, zero, e8, m2, ta, ma 178; RV64-NEXT: vmerge.vim v10, v10, 1, v0 179; RV64-NEXT: vmv1r.v v0, v9 180; RV64-NEXT: vsetvli zero, a0, e8, m2, tu, ma 181; RV64-NEXT: vmerge.vvm v10, v10, v12, v0 182; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma 183; RV64-NEXT: vmsne.vi v0, v10, 0 184; RV64-NEXT: ret 185; 186; RV32ZVFHMIN-LABEL: vpmerge_vv_v32i1: 187; RV32ZVFHMIN: # %bb.0: 188; RV32ZVFHMIN-NEXT: li a1, 32 189; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma 190; RV32ZVFHMIN-NEXT: vid.v v16 191; RV32ZVFHMIN-NEXT: vmsltu.vx v10, v16, a0 192; RV32ZVFHMIN-NEXT: vmand.mm v9, v9, v10 193; RV32ZVFHMIN-NEXT: vmandn.mm v8, v8, v9 194; RV32ZVFHMIN-NEXT: vmand.mm v9, v0, v9 195; RV32ZVFHMIN-NEXT: vmor.mm v0, v9, v8 196; RV32ZVFHMIN-NEXT: ret 197; 198; RV64ZVFHMIN-LABEL: vpmerge_vv_v32i1: 199; RV64ZVFHMIN: # %bb.0: 200; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, m2, ta, ma 201; RV64ZVFHMIN-NEXT: vmv.v.i v10, 0 202; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e8, m2, ta, ma 203; RV64ZVFHMIN-NEXT: vmerge.vim v12, v10, 1, v0 204; RV64ZVFHMIN-NEXT: vmv1r.v v0, v8 205; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, m2, ta, ma 206; RV64ZVFHMIN-NEXT: vmerge.vim v10, v10, 1, v0 207; RV64ZVFHMIN-NEXT: vmv1r.v v0, v9 208; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e8, m2, tu, ma 209; RV64ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 210; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e8, m2, ta, ma 211; RV64ZVFHMIN-NEXT: vmsne.vi v0, v10, 0 212; RV64ZVFHMIN-NEXT: ret 213 %v = call <32 x i1> @llvm.vp.merge.v32i1(<32 x i1> %m, <32 x i1> %va, <32 x i1> %vb, i32 %evl) 214 ret <32 x i1> %v 215} 216 217define <64 x i1> @vpmerge_vv_v64i1(<64 x i1> %va, <64 x i1> %vb, <64 x i1> %m, i32 zeroext %evl) { 218; CHECK-LABEL: vpmerge_vv_v64i1: 219; CHECK: # %bb.0: 220; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma 221; CHECK-NEXT: vmv.v.i v12, 0 222; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 223; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 224; CHECK-NEXT: vmv1r.v v0, v8 225; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma 226; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 227; CHECK-NEXT: vmv1r.v v0, v9 228; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma 229; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 230; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma 231; CHECK-NEXT: vmsne.vi v0, v12, 0 232; CHECK-NEXT: ret 233 %v = call <64 x i1> @llvm.vp.merge.v64i1(<64 x i1> %m, <64 x i1> %va, <64 x i1> %vb, i32 %evl) 234 ret <64 x i1> %v 235} 236 237declare <2 x i8> @llvm.vp.merge.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32) 238 239define <2 x i8> @vpmerge_vv_v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { 240; CHECK-LABEL: vpmerge_vv_v2i8: 241; CHECK: # %bb.0: 242; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma 243; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 244; CHECK-NEXT: vmv1r.v v8, v9 245; CHECK-NEXT: ret 246 %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) 247 ret <2 x i8> %v 248} 249 250define <2 x i8> @vpmerge_vx_v2i8(i8 %a, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { 251; CHECK-LABEL: vpmerge_vx_v2i8: 252; CHECK: # %bb.0: 253; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma 254; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 255; CHECK-NEXT: ret 256 %elt.head = insertelement <2 x i8> poison, i8 %a, i32 0 257 %va = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer 258 %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) 259 ret <2 x i8> %v 260} 261 262define <2 x i8> @vpmerge_vi_v2i8(<2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { 263; CHECK-LABEL: vpmerge_vi_v2i8: 264; CHECK: # %bb.0: 265; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma 266; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 267; CHECK-NEXT: ret 268 %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> splat (i8 2), <2 x i8> %vb, i32 %evl) 269 ret <2 x i8> %v 270} 271 272declare <4 x i8> @llvm.vp.merge.v4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32) 273 274define <4 x i8> @vpmerge_vv_v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { 275; CHECK-LABEL: vpmerge_vv_v4i8: 276; CHECK: # %bb.0: 277; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma 278; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 279; CHECK-NEXT: vmv1r.v v8, v9 280; CHECK-NEXT: ret 281 %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) 282 ret <4 x i8> %v 283} 284 285define <4 x i8> @vpmerge_vx_v4i8(i8 %a, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { 286; CHECK-LABEL: vpmerge_vx_v4i8: 287; CHECK: # %bb.0: 288; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma 289; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 290; CHECK-NEXT: ret 291 %elt.head = insertelement <4 x i8> poison, i8 %a, i32 0 292 %va = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer 293 %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) 294 ret <4 x i8> %v 295} 296 297define <4 x i8> @vpmerge_vi_v4i8(<4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { 298; CHECK-LABEL: vpmerge_vi_v4i8: 299; CHECK: # %bb.0: 300; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma 301; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 302; CHECK-NEXT: ret 303 %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> splat (i8 2), <4 x i8> %vb, i32 %evl) 304 ret <4 x i8> %v 305} 306 307declare <6 x i8> @llvm.vp.merge.v6i8(<6 x i1>, <6 x i8>, <6 x i8>, i32) 308 309define <6 x i8> @vpmerge_vv_v6i8(<6 x i8> %va, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) { 310; CHECK-LABEL: vpmerge_vv_v6i8: 311; CHECK: # %bb.0: 312; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma 313; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 314; CHECK-NEXT: vmv1r.v v8, v9 315; CHECK-NEXT: ret 316 %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl) 317 ret <6 x i8> %v 318} 319 320define <6 x i8> @vpmerge_vx_v6i8(i8 %a, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) { 321; CHECK-LABEL: vpmerge_vx_v6i8: 322; CHECK: # %bb.0: 323; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma 324; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 325; CHECK-NEXT: ret 326 %elt.head = insertelement <6 x i8> poison, i8 %a, i32 0 327 %va = shufflevector <6 x i8> %elt.head, <6 x i8> poison, <6 x i32> zeroinitializer 328 %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl) 329 ret <6 x i8> %v 330} 331 332define <6 x i8> @vpmerge_vi_v6i8(<6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) { 333; CHECK-LABEL: vpmerge_vi_v6i8: 334; CHECK: # %bb.0: 335; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma 336; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 337; CHECK-NEXT: ret 338 %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> splat (i8 2), <6 x i8> %vb, i32 %evl) 339 ret <6 x i8> %v 340} 341 342declare <8 x i7> @llvm.vp.merge.v8i7(<8 x i1>, <8 x i7>, <8 x i7>, i32) 343 344define <8 x i7> @vpmerge_vv_v8i7(<8 x i7> %va, <8 x i7> %vb, <8 x i1> %m, i32 zeroext %evl) { 345; CHECK-LABEL: vpmerge_vv_v8i7: 346; CHECK: # %bb.0: 347; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma 348; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 349; CHECK-NEXT: vmv1r.v v8, v9 350; CHECK-NEXT: ret 351 %v = call <8 x i7> @llvm.vp.merge.v8i7(<8 x i1> %m, <8 x i7> %va, <8 x i7> %vb, i32 %evl) 352 ret <8 x i7> %v 353} 354 355define <8 x i7> @vpmerge_vx_v8i7(i7 %a, <8 x i7> %vb, <8 x i1> %m, i32 zeroext %evl) { 356; CHECK-LABEL: vpmerge_vx_v8i7: 357; CHECK: # %bb.0: 358; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma 359; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 360; CHECK-NEXT: ret 361 %elt.head = insertelement <8 x i7> poison, i7 %a, i32 0 362 %va = shufflevector <8 x i7> %elt.head, <8 x i7> poison, <8 x i32> zeroinitializer 363 %v = call <8 x i7> @llvm.vp.merge.v8i7(<8 x i1> %m, <8 x i7> %va, <8 x i7> %vb, i32 %evl) 364 ret <8 x i7> %v 365} 366 367define <8 x i7> @vpmerge_vi_v8i7(<8 x i7> %vb, <8 x i1> %m, i32 zeroext %evl) { 368; CHECK-LABEL: vpmerge_vi_v8i7: 369; CHECK: # %bb.0: 370; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma 371; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 372; CHECK-NEXT: ret 373 %v = call <8 x i7> @llvm.vp.merge.v8i7(<8 x i1> %m, <8 x i7> splat (i7 2), <8 x i7> %vb, i32 %evl) 374 ret <8 x i7> %v 375} 376 377declare <8 x i8> @llvm.vp.merge.v8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32) 378 379define <8 x i8> @vpmerge_vv_v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { 380; CHECK-LABEL: vpmerge_vv_v8i8: 381; CHECK: # %bb.0: 382; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma 383; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 384; CHECK-NEXT: vmv1r.v v8, v9 385; CHECK-NEXT: ret 386 %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) 387 ret <8 x i8> %v 388} 389 390define <8 x i8> @vpmerge_vx_v8i8(i8 %a, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { 391; CHECK-LABEL: vpmerge_vx_v8i8: 392; CHECK: # %bb.0: 393; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma 394; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 395; CHECK-NEXT: ret 396 %elt.head = insertelement <8 x i8> poison, i8 %a, i32 0 397 %va = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer 398 %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) 399 ret <8 x i8> %v 400} 401 402define <8 x i8> @vpmerge_vi_v8i8(<8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { 403; CHECK-LABEL: vpmerge_vi_v8i8: 404; CHECK: # %bb.0: 405; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma 406; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 407; CHECK-NEXT: ret 408 %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> splat (i8 2), <8 x i8> %vb, i32 %evl) 409 ret <8 x i8> %v 410} 411 412declare <16 x i8> @llvm.vp.merge.v16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32) 413 414define <16 x i8> @vpmerge_vv_v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { 415; CHECK-LABEL: vpmerge_vv_v16i8: 416; CHECK: # %bb.0: 417; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma 418; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 419; CHECK-NEXT: vmv1r.v v8, v9 420; CHECK-NEXT: ret 421 %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) 422 ret <16 x i8> %v 423} 424 425define <16 x i8> @vpmerge_vx_v16i8(i8 %a, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { 426; CHECK-LABEL: vpmerge_vx_v16i8: 427; CHECK: # %bb.0: 428; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma 429; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 430; CHECK-NEXT: ret 431 %elt.head = insertelement <16 x i8> poison, i8 %a, i32 0 432 %va = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer 433 %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) 434 ret <16 x i8> %v 435} 436 437define <16 x i8> @vpmerge_vi_v16i8(<16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { 438; CHECK-LABEL: vpmerge_vi_v16i8: 439; CHECK: # %bb.0: 440; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma 441; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 442; CHECK-NEXT: ret 443 %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> splat (i8 2), <16 x i8> %vb, i32 %evl) 444 ret <16 x i8> %v 445} 446 447declare <2 x i16> @llvm.vp.merge.v2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32) 448 449define <2 x i16> @vpmerge_vv_v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { 450; CHECK-LABEL: vpmerge_vv_v2i16: 451; CHECK: # %bb.0: 452; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma 453; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 454; CHECK-NEXT: vmv1r.v v8, v9 455; CHECK-NEXT: ret 456 %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) 457 ret <2 x i16> %v 458} 459 460define <2 x i16> @vpmerge_vx_v2i16(i16 %a, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { 461; CHECK-LABEL: vpmerge_vx_v2i16: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma 464; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 465; CHECK-NEXT: ret 466 %elt.head = insertelement <2 x i16> poison, i16 %a, i32 0 467 %va = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer 468 %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) 469 ret <2 x i16> %v 470} 471 472define <2 x i16> @vpmerge_vi_v2i16(<2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { 473; CHECK-LABEL: vpmerge_vi_v2i16: 474; CHECK: # %bb.0: 475; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma 476; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 477; CHECK-NEXT: ret 478 %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> splat (i16 2), <2 x i16> %vb, i32 %evl) 479 ret <2 x i16> %v 480} 481 482declare <4 x i16> @llvm.vp.merge.v4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32) 483 484define <4 x i16> @vpmerge_vv_v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { 485; CHECK-LABEL: vpmerge_vv_v4i16: 486; CHECK: # %bb.0: 487; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma 488; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 489; CHECK-NEXT: vmv1r.v v8, v9 490; CHECK-NEXT: ret 491 %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) 492 ret <4 x i16> %v 493} 494 495define <4 x i16> @vpmerge_vx_v4i16(i16 %a, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { 496; CHECK-LABEL: vpmerge_vx_v4i16: 497; CHECK: # %bb.0: 498; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma 499; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 500; CHECK-NEXT: ret 501 %elt.head = insertelement <4 x i16> poison, i16 %a, i32 0 502 %va = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer 503 %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) 504 ret <4 x i16> %v 505} 506 507define <4 x i16> @vpmerge_vi_v4i16(<4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { 508; CHECK-LABEL: vpmerge_vi_v4i16: 509; CHECK: # %bb.0: 510; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma 511; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 512; CHECK-NEXT: ret 513 %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> splat (i16 2), <4 x i16> %vb, i32 %evl) 514 ret <4 x i16> %v 515} 516 517declare <8 x i16> @llvm.vp.merge.v8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) 518 519define <8 x i16> @vpmerge_vv_v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { 520; CHECK-LABEL: vpmerge_vv_v8i16: 521; CHECK: # %bb.0: 522; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma 523; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 524; CHECK-NEXT: vmv1r.v v8, v9 525; CHECK-NEXT: ret 526 %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) 527 ret <8 x i16> %v 528} 529 530define <8 x i16> @vpmerge_vx_v8i16(i16 %a, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { 531; CHECK-LABEL: vpmerge_vx_v8i16: 532; CHECK: # %bb.0: 533; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma 534; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 535; CHECK-NEXT: ret 536 %elt.head = insertelement <8 x i16> poison, i16 %a, i32 0 537 %va = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer 538 %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) 539 ret <8 x i16> %v 540} 541 542define <8 x i16> @vpmerge_vi_v8i16(<8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { 543; CHECK-LABEL: vpmerge_vi_v8i16: 544; CHECK: # %bb.0: 545; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma 546; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 547; CHECK-NEXT: ret 548 %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> splat (i16 2), <8 x i16> %vb, i32 %evl) 549 ret <8 x i16> %v 550} 551 552declare <16 x i16> @llvm.vp.merge.v16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32) 553 554define <16 x i16> @vpmerge_vv_v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { 555; CHECK-LABEL: vpmerge_vv_v16i16: 556; CHECK: # %bb.0: 557; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma 558; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 559; CHECK-NEXT: vmv2r.v v8, v10 560; CHECK-NEXT: ret 561 %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) 562 ret <16 x i16> %v 563} 564 565define <16 x i16> @vpmerge_vx_v16i16(i16 %a, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { 566; CHECK-LABEL: vpmerge_vx_v16i16: 567; CHECK: # %bb.0: 568; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma 569; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 570; CHECK-NEXT: ret 571 %elt.head = insertelement <16 x i16> poison, i16 %a, i32 0 572 %va = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer 573 %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) 574 ret <16 x i16> %v 575} 576 577define <16 x i16> @vpmerge_vi_v16i16(<16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { 578; CHECK-LABEL: vpmerge_vi_v16i16: 579; CHECK: # %bb.0: 580; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma 581; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 582; CHECK-NEXT: ret 583 %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> splat (i16 2), <16 x i16> %vb, i32 %evl) 584 ret <16 x i16> %v 585} 586 587declare <2 x i32> @llvm.vp.merge.v2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32) 588 589define <2 x i32> @vpmerge_vv_v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { 590; CHECK-LABEL: vpmerge_vv_v2i32: 591; CHECK: # %bb.0: 592; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma 593; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 594; CHECK-NEXT: vmv1r.v v8, v9 595; CHECK-NEXT: ret 596 %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) 597 ret <2 x i32> %v 598} 599 600define <2 x i32> @vpmerge_vx_v2i32(i32 %a, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { 601; CHECK-LABEL: vpmerge_vx_v2i32: 602; CHECK: # %bb.0: 603; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma 604; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 605; CHECK-NEXT: ret 606 %elt.head = insertelement <2 x i32> poison, i32 %a, i32 0 607 %va = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer 608 %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) 609 ret <2 x i32> %v 610} 611 612define <2 x i32> @vpmerge_vi_v2i32(<2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { 613; CHECK-LABEL: vpmerge_vi_v2i32: 614; CHECK: # %bb.0: 615; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma 616; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 617; CHECK-NEXT: ret 618 %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> splat (i32 2), <2 x i32> %vb, i32 %evl) 619 ret <2 x i32> %v 620} 621 622declare <4 x i32> @llvm.vp.merge.v4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32) 623 624define <4 x i32> @vpmerge_vv_v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { 625; CHECK-LABEL: vpmerge_vv_v4i32: 626; CHECK: # %bb.0: 627; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 628; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 629; CHECK-NEXT: vmv1r.v v8, v9 630; CHECK-NEXT: ret 631 %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) 632 ret <4 x i32> %v 633} 634 635define <4 x i32> @vpmerge_vx_v4i32(i32 %a, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { 636; CHECK-LABEL: vpmerge_vx_v4i32: 637; CHECK: # %bb.0: 638; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma 639; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 640; CHECK-NEXT: ret 641 %elt.head = insertelement <4 x i32> poison, i32 %a, i32 0 642 %va = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer 643 %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) 644 ret <4 x i32> %v 645} 646 647define <4 x i32> @vpmerge_vi_v4i32(<4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { 648; CHECK-LABEL: vpmerge_vi_v4i32: 649; CHECK: # %bb.0: 650; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 651; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 652; CHECK-NEXT: ret 653 %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> splat (i32 2), <4 x i32> %vb, i32 %evl) 654 ret <4 x i32> %v 655} 656 657declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) 658 659define <8 x i32> @vpmerge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { 660; CHECK-LABEL: vpmerge_vv_v8i32: 661; CHECK: # %bb.0: 662; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma 663; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 664; CHECK-NEXT: vmv2r.v v8, v10 665; CHECK-NEXT: ret 666 %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) 667 ret <8 x i32> %v 668} 669 670define <8 x i32> @vpmerge_vx_v8i32(i32 %a, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { 671; CHECK-LABEL: vpmerge_vx_v8i32: 672; CHECK: # %bb.0: 673; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma 674; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 675; CHECK-NEXT: ret 676 %elt.head = insertelement <8 x i32> poison, i32 %a, i32 0 677 %va = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer 678 %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) 679 ret <8 x i32> %v 680} 681 682define <8 x i32> @vpmerge_vi_v8i32(<8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { 683; CHECK-LABEL: vpmerge_vi_v8i32: 684; CHECK: # %bb.0: 685; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma 686; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 687; CHECK-NEXT: ret 688 %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> splat (i32 2), <8 x i32> %vb, i32 %evl) 689 ret <8 x i32> %v 690} 691 692declare <16 x i32> @llvm.vp.merge.v16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32) 693 694define <16 x i32> @vpmerge_vv_v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { 695; CHECK-LABEL: vpmerge_vv_v16i32: 696; CHECK: # %bb.0: 697; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma 698; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 699; CHECK-NEXT: vmv4r.v v8, v12 700; CHECK-NEXT: ret 701 %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) 702 ret <16 x i32> %v 703} 704 705define <16 x i32> @vpmerge_vx_v16i32(i32 %a, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { 706; CHECK-LABEL: vpmerge_vx_v16i32: 707; CHECK: # %bb.0: 708; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma 709; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 710; CHECK-NEXT: ret 711 %elt.head = insertelement <16 x i32> poison, i32 %a, i32 0 712 %va = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer 713 %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) 714 ret <16 x i32> %v 715} 716 717define <16 x i32> @vpmerge_vi_v16i32(<16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { 718; CHECK-LABEL: vpmerge_vi_v16i32: 719; CHECK: # %bb.0: 720; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma 721; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 722; CHECK-NEXT: ret 723 %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> splat (i32 2), <16 x i32> %vb, i32 %evl) 724 ret <16 x i32> %v 725} 726 727declare <2 x i64> @llvm.vp.merge.v2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32) 728 729define <2 x i64> @vpmerge_vv_v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { 730; CHECK-LABEL: vpmerge_vv_v2i64: 731; CHECK: # %bb.0: 732; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma 733; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 734; CHECK-NEXT: vmv1r.v v8, v9 735; CHECK-NEXT: ret 736 %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) 737 ret <2 x i64> %v 738} 739 740define <2 x i64> @vpmerge_vx_v2i64(i64 %a, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { 741; RV32-LABEL: vpmerge_vx_v2i64: 742; RV32: # %bb.0: 743; RV32-NEXT: addi sp, sp, -16 744; RV32-NEXT: .cfi_def_cfa_offset 16 745; RV32-NEXT: sw a0, 8(sp) 746; RV32-NEXT: sw a1, 12(sp) 747; RV32-NEXT: addi a0, sp, 8 748; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 749; RV32-NEXT: vlse64.v v9, (a0), zero 750; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma 751; RV32-NEXT: vmerge.vvm v8, v8, v9, v0 752; RV32-NEXT: addi sp, sp, 16 753; RV32-NEXT: .cfi_def_cfa_offset 0 754; RV32-NEXT: ret 755; 756; RV64-LABEL: vpmerge_vx_v2i64: 757; RV64: # %bb.0: 758; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma 759; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 760; RV64-NEXT: ret 761; 762; RV32ZVFHMIN-LABEL: vpmerge_vx_v2i64: 763; RV32ZVFHMIN: # %bb.0: 764; RV32ZVFHMIN-NEXT: addi sp, sp, -16 765; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 766; RV32ZVFHMIN-NEXT: sw a0, 8(sp) 767; RV32ZVFHMIN-NEXT: sw a1, 12(sp) 768; RV32ZVFHMIN-NEXT: addi a0, sp, 8 769; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma 770; RV32ZVFHMIN-NEXT: vlse64.v v9, (a0), zero 771; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m1, tu, ma 772; RV32ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0 773; RV32ZVFHMIN-NEXT: addi sp, sp, 16 774; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 775; RV32ZVFHMIN-NEXT: ret 776; 777; RV64ZVFHMIN-LABEL: vpmerge_vx_v2i64: 778; RV64ZVFHMIN: # %bb.0: 779; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m1, tu, ma 780; RV64ZVFHMIN-NEXT: vmerge.vxm v8, v8, a0, v0 781; RV64ZVFHMIN-NEXT: ret 782 %elt.head = insertelement <2 x i64> poison, i64 %a, i32 0 783 %va = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer 784 %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) 785 ret <2 x i64> %v 786} 787 788define <2 x i64> @vpmerge_vi_v2i64(<2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { 789; CHECK-LABEL: vpmerge_vi_v2i64: 790; CHECK: # %bb.0: 791; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma 792; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 793; CHECK-NEXT: ret 794 %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> splat (i64 2), <2 x i64> %vb, i32 %evl) 795 ret <2 x i64> %v 796} 797 798declare <4 x i64> @llvm.vp.merge.v4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32) 799 800define <4 x i64> @vpmerge_vv_v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { 801; CHECK-LABEL: vpmerge_vv_v4i64: 802; CHECK: # %bb.0: 803; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma 804; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 805; CHECK-NEXT: vmv2r.v v8, v10 806; CHECK-NEXT: ret 807 %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) 808 ret <4 x i64> %v 809} 810 811define <4 x i64> @vpmerge_vx_v4i64(i64 %a, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { 812; RV32-LABEL: vpmerge_vx_v4i64: 813; RV32: # %bb.0: 814; RV32-NEXT: addi sp, sp, -16 815; RV32-NEXT: .cfi_def_cfa_offset 16 816; RV32-NEXT: sw a0, 8(sp) 817; RV32-NEXT: sw a1, 12(sp) 818; RV32-NEXT: addi a0, sp, 8 819; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 820; RV32-NEXT: vlse64.v v10, (a0), zero 821; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma 822; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 823; RV32-NEXT: addi sp, sp, 16 824; RV32-NEXT: .cfi_def_cfa_offset 0 825; RV32-NEXT: ret 826; 827; RV64-LABEL: vpmerge_vx_v4i64: 828; RV64: # %bb.0: 829; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma 830; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 831; RV64-NEXT: ret 832; 833; RV32ZVFHMIN-LABEL: vpmerge_vx_v4i64: 834; RV32ZVFHMIN: # %bb.0: 835; RV32ZVFHMIN-NEXT: addi sp, sp, -16 836; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 837; RV32ZVFHMIN-NEXT: sw a0, 8(sp) 838; RV32ZVFHMIN-NEXT: sw a1, 12(sp) 839; RV32ZVFHMIN-NEXT: addi a0, sp, 8 840; RV32ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma 841; RV32ZVFHMIN-NEXT: vlse64.v v10, (a0), zero 842; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m2, tu, ma 843; RV32ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 844; RV32ZVFHMIN-NEXT: addi sp, sp, 16 845; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 846; RV32ZVFHMIN-NEXT: ret 847; 848; RV64ZVFHMIN-LABEL: vpmerge_vx_v4i64: 849; RV64ZVFHMIN: # %bb.0: 850; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m2, tu, ma 851; RV64ZVFHMIN-NEXT: vmerge.vxm v8, v8, a0, v0 852; RV64ZVFHMIN-NEXT: ret 853 %elt.head = insertelement <4 x i64> poison, i64 %a, i32 0 854 %va = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer 855 %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) 856 ret <4 x i64> %v 857} 858 859define <4 x i64> @vpmerge_vi_v4i64(<4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { 860; CHECK-LABEL: vpmerge_vi_v4i64: 861; CHECK: # %bb.0: 862; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma 863; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 864; CHECK-NEXT: ret 865 %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> splat (i64 2), <4 x i64> %vb, i32 %evl) 866 ret <4 x i64> %v 867} 868 869declare <8 x i64> @llvm.vp.merge.v8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32) 870 871define <8 x i64> @vpmerge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { 872; CHECK-LABEL: vpmerge_vv_v8i64: 873; CHECK: # %bb.0: 874; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma 875; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 876; CHECK-NEXT: vmv4r.v v8, v12 877; CHECK-NEXT: ret 878 %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) 879 ret <8 x i64> %v 880} 881 882define <8 x i64> @vpmerge_vx_v8i64(i64 %a, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { 883; RV32-LABEL: vpmerge_vx_v8i64: 884; RV32: # %bb.0: 885; RV32-NEXT: addi sp, sp, -16 886; RV32-NEXT: .cfi_def_cfa_offset 16 887; RV32-NEXT: sw a0, 8(sp) 888; RV32-NEXT: sw a1, 12(sp) 889; RV32-NEXT: addi a0, sp, 8 890; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 891; RV32-NEXT: vlse64.v v12, (a0), zero 892; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma 893; RV32-NEXT: vmerge.vvm v8, v8, v12, v0 894; RV32-NEXT: addi sp, sp, 16 895; RV32-NEXT: .cfi_def_cfa_offset 0 896; RV32-NEXT: ret 897; 898; RV64-LABEL: vpmerge_vx_v8i64: 899; RV64: # %bb.0: 900; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma 901; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 902; RV64-NEXT: ret 903; 904; RV32ZVFHMIN-LABEL: vpmerge_vx_v8i64: 905; RV32ZVFHMIN: # %bb.0: 906; RV32ZVFHMIN-NEXT: addi sp, sp, -16 907; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 908; RV32ZVFHMIN-NEXT: sw a0, 8(sp) 909; RV32ZVFHMIN-NEXT: sw a1, 12(sp) 910; RV32ZVFHMIN-NEXT: addi a0, sp, 8 911; RV32ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma 912; RV32ZVFHMIN-NEXT: vlse64.v v12, (a0), zero 913; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m4, tu, ma 914; RV32ZVFHMIN-NEXT: vmerge.vvm v8, v8, v12, v0 915; RV32ZVFHMIN-NEXT: addi sp, sp, 16 916; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 917; RV32ZVFHMIN-NEXT: ret 918; 919; RV64ZVFHMIN-LABEL: vpmerge_vx_v8i64: 920; RV64ZVFHMIN: # %bb.0: 921; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m4, tu, ma 922; RV64ZVFHMIN-NEXT: vmerge.vxm v8, v8, a0, v0 923; RV64ZVFHMIN-NEXT: ret 924 %elt.head = insertelement <8 x i64> poison, i64 %a, i32 0 925 %va = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer 926 %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) 927 ret <8 x i64> %v 928} 929 930define <8 x i64> @vpmerge_vi_v8i64(<8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { 931; CHECK-LABEL: vpmerge_vi_v8i64: 932; CHECK: # %bb.0: 933; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma 934; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 935; CHECK-NEXT: ret 936 %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> splat (i64 2), <8 x i64> %vb, i32 %evl) 937 ret <8 x i64> %v 938} 939 940declare <16 x i64> @llvm.vp.merge.v16i64(<16 x i1>, <16 x i64>, <16 x i64>, i32) 941 942define <16 x i64> @vpmerge_vv_v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { 943; CHECK-LABEL: vpmerge_vv_v16i64: 944; CHECK: # %bb.0: 945; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma 946; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 947; CHECK-NEXT: vmv8r.v v8, v16 948; CHECK-NEXT: ret 949 %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) 950 ret <16 x i64> %v 951} 952 953define <16 x i64> @vpmerge_vx_v16i64(i64 %a, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { 954; RV32-LABEL: vpmerge_vx_v16i64: 955; RV32: # %bb.0: 956; RV32-NEXT: addi sp, sp, -16 957; RV32-NEXT: .cfi_def_cfa_offset 16 958; RV32-NEXT: sw a0, 8(sp) 959; RV32-NEXT: sw a1, 12(sp) 960; RV32-NEXT: addi a0, sp, 8 961; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 962; RV32-NEXT: vlse64.v v16, (a0), zero 963; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, ma 964; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 965; RV32-NEXT: addi sp, sp, 16 966; RV32-NEXT: .cfi_def_cfa_offset 0 967; RV32-NEXT: ret 968; 969; RV64-LABEL: vpmerge_vx_v16i64: 970; RV64: # %bb.0: 971; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma 972; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 973; RV64-NEXT: ret 974; 975; RV32ZVFHMIN-LABEL: vpmerge_vx_v16i64: 976; RV32ZVFHMIN: # %bb.0: 977; RV32ZVFHMIN-NEXT: addi sp, sp, -16 978; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 979; RV32ZVFHMIN-NEXT: sw a0, 8(sp) 980; RV32ZVFHMIN-NEXT: sw a1, 12(sp) 981; RV32ZVFHMIN-NEXT: addi a0, sp, 8 982; RV32ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma 983; RV32ZVFHMIN-NEXT: vlse64.v v16, (a0), zero 984; RV32ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, tu, ma 985; RV32ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 986; RV32ZVFHMIN-NEXT: addi sp, sp, 16 987; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 988; RV32ZVFHMIN-NEXT: ret 989; 990; RV64ZVFHMIN-LABEL: vpmerge_vx_v16i64: 991; RV64ZVFHMIN: # %bb.0: 992; RV64ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, tu, ma 993; RV64ZVFHMIN-NEXT: vmerge.vxm v8, v8, a0, v0 994; RV64ZVFHMIN-NEXT: ret 995 %elt.head = insertelement <16 x i64> poison, i64 %a, i32 0 996 %va = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer 997 %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) 998 ret <16 x i64> %v 999} 1000 1001define <16 x i64> @vpmerge_vi_v16i64(<16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { 1002; CHECK-LABEL: vpmerge_vi_v16i64: 1003; CHECK: # %bb.0: 1004; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma 1005; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 1006; CHECK-NEXT: ret 1007 %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> splat (i64 2), <16 x i64> %vb, i32 %evl) 1008 ret <16 x i64> %v 1009} 1010 1011declare <2 x half> @llvm.vp.merge.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32) 1012 1013define <2 x half> @vpmerge_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { 1014; CHECK-LABEL: vpmerge_vv_v2f16: 1015; CHECK: # %bb.0: 1016; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma 1017; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 1018; CHECK-NEXT: vmv1r.v v8, v9 1019; CHECK-NEXT: ret 1020 %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl) 1021 ret <2 x half> %v 1022} 1023 1024define <2 x half> @vpmerge_vf_v2f16(half %a, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { 1025; ZVFH-LABEL: vpmerge_vf_v2f16: 1026; ZVFH: # %bb.0: 1027; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma 1028; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 1029; ZVFH-NEXT: ret 1030; 1031; ZVFHMIN-LABEL: vpmerge_vf_v2f16: 1032; ZVFHMIN: # %bb.0: 1033; ZVFHMIN-NEXT: fmv.x.h a1, fa0 1034; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 1035; ZVFHMIN-NEXT: vmv.v.x v9, a1 1036; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, ma 1037; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0 1038; ZVFHMIN-NEXT: ret 1039 %elt.head = insertelement <2 x half> poison, half %a, i32 0 1040 %va = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer 1041 %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl) 1042 ret <2 x half> %v 1043} 1044 1045declare <4 x half> @llvm.vp.merge.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32) 1046 1047define <4 x half> @vpmerge_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { 1048; CHECK-LABEL: vpmerge_vv_v4f16: 1049; CHECK: # %bb.0: 1050; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma 1051; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 1052; CHECK-NEXT: vmv1r.v v8, v9 1053; CHECK-NEXT: ret 1054 %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl) 1055 ret <4 x half> %v 1056} 1057 1058define <4 x half> @vpmerge_vf_v4f16(half %a, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { 1059; ZVFH-LABEL: vpmerge_vf_v4f16: 1060; ZVFH: # %bb.0: 1061; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma 1062; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 1063; ZVFH-NEXT: ret 1064; 1065; ZVFHMIN-LABEL: vpmerge_vf_v4f16: 1066; ZVFHMIN: # %bb.0: 1067; ZVFHMIN-NEXT: fmv.x.h a1, fa0 1068; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1069; ZVFHMIN-NEXT: vmv.v.x v9, a1 1070; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, ma 1071; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0 1072; ZVFHMIN-NEXT: ret 1073 %elt.head = insertelement <4 x half> poison, half %a, i32 0 1074 %va = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer 1075 %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl) 1076 ret <4 x half> %v 1077} 1078 1079declare <8 x half> @llvm.vp.merge.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32) 1080 1081define <8 x half> @vpmerge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { 1082; CHECK-LABEL: vpmerge_vv_v8f16: 1083; CHECK: # %bb.0: 1084; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma 1085; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 1086; CHECK-NEXT: vmv1r.v v8, v9 1087; CHECK-NEXT: ret 1088 %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl) 1089 ret <8 x half> %v 1090} 1091 1092define <8 x half> @vpmerge_vf_v8f16(half %a, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { 1093; ZVFH-LABEL: vpmerge_vf_v8f16: 1094; ZVFH: # %bb.0: 1095; ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma 1096; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 1097; ZVFH-NEXT: ret 1098; 1099; ZVFHMIN-LABEL: vpmerge_vf_v8f16: 1100; ZVFHMIN: # %bb.0: 1101; ZVFHMIN-NEXT: fmv.x.h a1, fa0 1102; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1103; ZVFHMIN-NEXT: vmv.v.x v9, a1 1104; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, ma 1105; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0 1106; ZVFHMIN-NEXT: ret 1107 %elt.head = insertelement <8 x half> poison, half %a, i32 0 1108 %va = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer 1109 %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl) 1110 ret <8 x half> %v 1111} 1112 1113declare <16 x half> @llvm.vp.merge.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32) 1114 1115define <16 x half> @vpmerge_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { 1116; CHECK-LABEL: vpmerge_vv_v16f16: 1117; CHECK: # %bb.0: 1118; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma 1119; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 1120; CHECK-NEXT: vmv2r.v v8, v10 1121; CHECK-NEXT: ret 1122 %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl) 1123 ret <16 x half> %v 1124} 1125 1126define <16 x half> @vpmerge_vf_v16f16(half %a, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { 1127; ZVFH-LABEL: vpmerge_vf_v16f16: 1128; ZVFH: # %bb.0: 1129; ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma 1130; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 1131; ZVFH-NEXT: ret 1132; 1133; ZVFHMIN-LABEL: vpmerge_vf_v16f16: 1134; ZVFHMIN: # %bb.0: 1135; ZVFHMIN-NEXT: fmv.x.h a1, fa0 1136; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1137; ZVFHMIN-NEXT: vmv.v.x v10, a1 1138; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, ma 1139; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 1140; ZVFHMIN-NEXT: ret 1141 %elt.head = insertelement <16 x half> poison, half %a, i32 0 1142 %va = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer 1143 %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl) 1144 ret <16 x half> %v 1145} 1146 1147declare <2 x float> @llvm.vp.merge.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32) 1148 1149define <2 x float> @vpmerge_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { 1150; CHECK-LABEL: vpmerge_vv_v2f32: 1151; CHECK: # %bb.0: 1152; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma 1153; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 1154; CHECK-NEXT: vmv1r.v v8, v9 1155; CHECK-NEXT: ret 1156 %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl) 1157 ret <2 x float> %v 1158} 1159 1160define <2 x float> @vpmerge_vf_v2f32(float %a, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { 1161; CHECK-LABEL: vpmerge_vf_v2f32: 1162; CHECK: # %bb.0: 1163; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma 1164; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 1165; CHECK-NEXT: ret 1166 %elt.head = insertelement <2 x float> poison, float %a, i32 0 1167 %va = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer 1168 %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl) 1169 ret <2 x float> %v 1170} 1171 1172declare <4 x float> @llvm.vp.merge.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32) 1173 1174define <4 x float> @vpmerge_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { 1175; CHECK-LABEL: vpmerge_vv_v4f32: 1176; CHECK: # %bb.0: 1177; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 1178; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 1179; CHECK-NEXT: vmv1r.v v8, v9 1180; CHECK-NEXT: ret 1181 %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl) 1182 ret <4 x float> %v 1183} 1184 1185define <4 x float> @vpmerge_vf_v4f32(float %a, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { 1186; CHECK-LABEL: vpmerge_vf_v4f32: 1187; CHECK: # %bb.0: 1188; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 1189; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 1190; CHECK-NEXT: ret 1191 %elt.head = insertelement <4 x float> poison, float %a, i32 0 1192 %va = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer 1193 %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl) 1194 ret <4 x float> %v 1195} 1196 1197declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) 1198 1199define <8 x float> @vpmerge_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { 1200; CHECK-LABEL: vpmerge_vv_v8f32: 1201; CHECK: # %bb.0: 1202; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma 1203; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 1204; CHECK-NEXT: vmv2r.v v8, v10 1205; CHECK-NEXT: ret 1206 %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl) 1207 ret <8 x float> %v 1208} 1209 1210define <8 x float> @vpmerge_vf_v8f32(float %a, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { 1211; CHECK-LABEL: vpmerge_vf_v8f32: 1212; CHECK: # %bb.0: 1213; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma 1214; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 1215; CHECK-NEXT: ret 1216 %elt.head = insertelement <8 x float> poison, float %a, i32 0 1217 %va = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer 1218 %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl) 1219 ret <8 x float> %v 1220} 1221 1222declare <16 x float> @llvm.vp.merge.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32) 1223 1224define <16 x float> @vpmerge_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { 1225; CHECK-LABEL: vpmerge_vv_v16f32: 1226; CHECK: # %bb.0: 1227; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma 1228; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 1229; CHECK-NEXT: vmv4r.v v8, v12 1230; CHECK-NEXT: ret 1231 %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl) 1232 ret <16 x float> %v 1233} 1234 1235define <16 x float> @vpmerge_vf_v16f32(float %a, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { 1236; CHECK-LABEL: vpmerge_vf_v16f32: 1237; CHECK: # %bb.0: 1238; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma 1239; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 1240; CHECK-NEXT: ret 1241 %elt.head = insertelement <16 x float> poison, float %a, i32 0 1242 %va = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer 1243 %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl) 1244 ret <16 x float> %v 1245} 1246 1247declare <2 x double> @llvm.vp.merge.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32) 1248 1249define <2 x double> @vpmerge_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { 1250; CHECK-LABEL: vpmerge_vv_v2f64: 1251; CHECK: # %bb.0: 1252; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma 1253; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 1254; CHECK-NEXT: vmv1r.v v8, v9 1255; CHECK-NEXT: ret 1256 %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl) 1257 ret <2 x double> %v 1258} 1259 1260define <2 x double> @vpmerge_vf_v2f64(double %a, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { 1261; CHECK-LABEL: vpmerge_vf_v2f64: 1262; CHECK: # %bb.0: 1263; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma 1264; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 1265; CHECK-NEXT: ret 1266 %elt.head = insertelement <2 x double> poison, double %a, i32 0 1267 %va = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer 1268 %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl) 1269 ret <2 x double> %v 1270} 1271 1272declare <4 x double> @llvm.vp.merge.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32) 1273 1274define <4 x double> @vpmerge_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { 1275; CHECK-LABEL: vpmerge_vv_v4f64: 1276; CHECK: # %bb.0: 1277; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma 1278; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 1279; CHECK-NEXT: vmv2r.v v8, v10 1280; CHECK-NEXT: ret 1281 %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl) 1282 ret <4 x double> %v 1283} 1284 1285define <4 x double> @vpmerge_vf_v4f64(double %a, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { 1286; CHECK-LABEL: vpmerge_vf_v4f64: 1287; CHECK: # %bb.0: 1288; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma 1289; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 1290; CHECK-NEXT: ret 1291 %elt.head = insertelement <4 x double> poison, double %a, i32 0 1292 %va = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer 1293 %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl) 1294 ret <4 x double> %v 1295} 1296 1297declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) 1298 1299define <8 x double> @vpmerge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { 1300; CHECK-LABEL: vpmerge_vv_v8f64: 1301; CHECK: # %bb.0: 1302; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma 1303; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 1304; CHECK-NEXT: vmv4r.v v8, v12 1305; CHECK-NEXT: ret 1306 %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl) 1307 ret <8 x double> %v 1308} 1309 1310define <8 x double> @vpmerge_vf_v8f64(double %a, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { 1311; CHECK-LABEL: vpmerge_vf_v8f64: 1312; CHECK: # %bb.0: 1313; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma 1314; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 1315; CHECK-NEXT: ret 1316 %elt.head = insertelement <8 x double> poison, double %a, i32 0 1317 %va = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer 1318 %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl) 1319 ret <8 x double> %v 1320} 1321 1322declare <16 x double> @llvm.vp.merge.v16f64(<16 x i1>, <16 x double>, <16 x double>, i32) 1323 1324define <16 x double> @vpmerge_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { 1325; CHECK-LABEL: vpmerge_vv_v16f64: 1326; CHECK: # %bb.0: 1327; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma 1328; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 1329; CHECK-NEXT: vmv8r.v v8, v16 1330; CHECK-NEXT: ret 1331 %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl) 1332 ret <16 x double> %v 1333} 1334 1335define <16 x double> @vpmerge_vf_v16f64(double %a, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { 1336; CHECK-LABEL: vpmerge_vf_v16f64: 1337; CHECK: # %bb.0: 1338; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma 1339; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 1340; CHECK-NEXT: ret 1341 %elt.head = insertelement <16 x double> poison, double %a, i32 0 1342 %va = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer 1343 %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl) 1344 ret <16 x double> %v 1345} 1346 1347declare <32 x double> @llvm.vp.merge.v32f64(<32 x i1>, <32 x double>, <32 x double>, i32) 1348 1349define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { 1350; CHECK-LABEL: vpmerge_vv_v32f64: 1351; CHECK: # %bb.0: 1352; CHECK-NEXT: addi sp, sp, -16 1353; CHECK-NEXT: .cfi_def_cfa_offset 16 1354; CHECK-NEXT: csrr a1, vlenb 1355; CHECK-NEXT: slli a1, a1, 3 1356; CHECK-NEXT: sub sp, sp, a1 1357; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1358; CHECK-NEXT: addi a1, sp, 16 1359; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1360; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1361; CHECK-NEXT: vmv8r.v v16, v8 1362; CHECK-NEXT: addi a1, a0, 128 1363; CHECK-NEXT: vle64.v v24, (a1) 1364; CHECK-NEXT: vle64.v v8, (a0) 1365; CHECK-NEXT: li a1, 16 1366; CHECK-NEXT: mv a0, a2 1367; CHECK-NEXT: bltu a2, a1, .LBB83_2 1368; CHECK-NEXT: # %bb.1: 1369; CHECK-NEXT: li a0, 16 1370; CHECK-NEXT: .LBB83_2: 1371; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma 1372; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 1373; CHECK-NEXT: addi a0, a2, -16 1374; CHECK-NEXT: sltu a1, a2, a0 1375; CHECK-NEXT: addi a1, a1, -1 1376; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1377; CHECK-NEXT: vslidedown.vi v0, v0, 2 1378; CHECK-NEXT: and a0, a1, a0 1379; CHECK-NEXT: addi a1, sp, 16 1380; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 1381; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma 1382; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0 1383; CHECK-NEXT: vmv8r.v v16, v24 1384; CHECK-NEXT: csrr a0, vlenb 1385; CHECK-NEXT: slli a0, a0, 3 1386; CHECK-NEXT: add sp, sp, a0 1387; CHECK-NEXT: .cfi_def_cfa sp, 16 1388; CHECK-NEXT: addi sp, sp, 16 1389; CHECK-NEXT: .cfi_def_cfa_offset 0 1390; CHECK-NEXT: ret 1391 %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl) 1392 ret <32 x double> %v 1393} 1394 1395define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { 1396; CHECK-LABEL: vpmerge_vf_v32f64: 1397; CHECK: # %bb.0: 1398; CHECK-NEXT: li a2, 16 1399; CHECK-NEXT: mv a1, a0 1400; CHECK-NEXT: bltu a0, a2, .LBB84_2 1401; CHECK-NEXT: # %bb.1: 1402; CHECK-NEXT: li a1, 16 1403; CHECK-NEXT: .LBB84_2: 1404; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma 1405; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 1406; CHECK-NEXT: addi a1, a0, -16 1407; CHECK-NEXT: sltu a0, a0, a1 1408; CHECK-NEXT: addi a0, a0, -1 1409; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1410; CHECK-NEXT: vslidedown.vi v0, v0, 2 1411; CHECK-NEXT: and a0, a0, a1 1412; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma 1413; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 1414; CHECK-NEXT: ret 1415 %elt.head = insertelement <32 x double> poison, double %a, i32 0 1416 %va = shufflevector <32 x double> %elt.head, <32 x double> poison, <32 x i32> zeroinitializer 1417 %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl) 1418 ret <32 x double> %v 1419} 1420