1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+m,+zfbfmin,+zvfbfmin -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+m,+zfbfmin,+zvfbfmin -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s 6; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+zvfh,+m,+zfbfmin,+zvfbfmin -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s 8; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+zvfh,+m,+zfbfmin,+zvfbfmin -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s 10 11declare <vscale x 1 x bfloat> @llvm.vp.merge.nxv1bf16(<vscale x 1 x i1>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, i32) 12 13define <vscale x 1 x bfloat> @vpmerge_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { 14; CHECK-LABEL: vpmerge_vv_nxv1bf16: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma 17; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 18; CHECK-NEXT: vmv1r.v v8, v9 19; CHECK-NEXT: ret 20 %v = call <vscale x 1 x bfloat> @llvm.vp.merge.nxv1bf16(<vscale x 1 x i1> %m, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 %evl) 21 ret <vscale x 1 x bfloat> %v 22} 23 24define <vscale x 1 x bfloat> @vpmerge_vf_nxv1bf16(bfloat %a, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { 25; CHECK-LABEL: vpmerge_vf_nxv1bf16: 26; CHECK: # %bb.0: 27; CHECK-NEXT: fmv.x.h a1, fa0 28; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 29; CHECK-NEXT: vmv.v.x v9, a1 30; CHECK-NEXT: vsetvli zero, zero, e16, mf4, tu, ma 31; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 32; CHECK-NEXT: ret 33 %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %a, i32 0 34 %va = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer 35 %v = call <vscale x 1 x bfloat> @llvm.vp.merge.nxv1bf16(<vscale x 1 x i1> %m, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 %evl) 36 ret <vscale x 1 x bfloat> %v 37} 38 39declare <vscale x 2 x bfloat> @llvm.vp.merge.nxv2bf16(<vscale x 2 x i1>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32) 40 41define <vscale x 2 x bfloat> @vpmerge_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { 42; CHECK-LABEL: vpmerge_vv_nxv2bf16: 43; CHECK: # %bb.0: 44; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma 45; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 46; CHECK-NEXT: vmv1r.v v8, v9 47; CHECK-NEXT: ret 48 %v = call <vscale x 2 x bfloat> @llvm.vp.merge.nxv2bf16(<vscale x 2 x i1> %m, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 %evl) 49 ret <vscale x 2 x bfloat> %v 50} 51 52define <vscale x 2 x bfloat> @vpmerge_vf_nxv2bf16(bfloat %a, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { 53; CHECK-LABEL: vpmerge_vf_nxv2bf16: 54; CHECK: # %bb.0: 55; CHECK-NEXT: fmv.x.h a1, fa0 56; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 57; CHECK-NEXT: vmv.v.x v9, a1 58; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, ma 59; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 60; CHECK-NEXT: ret 61 %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %a, i32 0 62 %va = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer 63 %v = call <vscale x 2 x bfloat> @llvm.vp.merge.nxv2bf16(<vscale x 2 x i1> %m, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 %evl) 64 ret <vscale x 2 x bfloat> %v 65} 66 67declare <vscale x 4 x bfloat> @llvm.vp.merge.nxv4bf16(<vscale x 4 x i1>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32) 68 69define <vscale x 4 x bfloat> @vpmerge_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { 70; CHECK-LABEL: vpmerge_vv_nxv4bf16: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma 73; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 74; CHECK-NEXT: vmv1r.v v8, v9 75; CHECK-NEXT: ret 76 %v = call <vscale x 4 x bfloat> @llvm.vp.merge.nxv4bf16(<vscale x 4 x i1> %m, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 %evl) 77 ret <vscale x 4 x bfloat> %v 78} 79 80define <vscale x 4 x bfloat> @vpmerge_vf_nxv4bf16(bfloat %a, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { 81; CHECK-LABEL: vpmerge_vf_nxv4bf16: 82; CHECK: # %bb.0: 83; CHECK-NEXT: fmv.x.h a1, fa0 84; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 85; CHECK-NEXT: vmv.v.x v9, a1 86; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma 87; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 88; CHECK-NEXT: ret 89 %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %a, i32 0 90 %va = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer 91 %v = call <vscale x 4 x bfloat> @llvm.vp.merge.nxv4bf16(<vscale x 4 x i1> %m, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 %evl) 92 ret <vscale x 4 x bfloat> %v 93} 94 95declare <vscale x 8 x bfloat> @llvm.vp.merge.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) 96 97define <vscale x 8 x bfloat> @vpmerge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { 98; CHECK-LABEL: vpmerge_vv_nxv8bf16: 99; CHECK: # %bb.0: 100; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma 101; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 102; CHECK-NEXT: vmv2r.v v8, v10 103; CHECK-NEXT: ret 104 %v = call <vscale x 8 x bfloat> @llvm.vp.merge.nxv8bf16(<vscale x 8 x i1> %m, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 %evl) 105 ret <vscale x 8 x bfloat> %v 106} 107 108define <vscale x 8 x bfloat> @vpmerge_vf_nxv8bf16(bfloat %a, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { 109; CHECK-LABEL: vpmerge_vf_nxv8bf16: 110; CHECK: # %bb.0: 111; CHECK-NEXT: fmv.x.h a1, fa0 112; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 113; CHECK-NEXT: vmv.v.x v10, a1 114; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma 115; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 116; CHECK-NEXT: ret 117 %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %a, i32 0 118 %va = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer 119 %v = call <vscale x 8 x bfloat> @llvm.vp.merge.nxv8bf16(<vscale x 8 x i1> %m, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 %evl) 120 ret <vscale x 8 x bfloat> %v 121} 122 123declare <vscale x 16 x bfloat> @llvm.vp.merge.nxv16bf16(<vscale x 16 x i1>, <vscale x 16 x bfloat>, <vscale x 16 x bfloat>, i32) 124 125define <vscale x 16 x bfloat> @vpmerge_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { 126; CHECK-LABEL: vpmerge_vv_nxv16bf16: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma 129; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 130; CHECK-NEXT: vmv4r.v v8, v12 131; CHECK-NEXT: ret 132 %v = call <vscale x 16 x bfloat> @llvm.vp.merge.nxv16bf16(<vscale x 16 x i1> %m, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 %evl) 133 ret <vscale x 16 x bfloat> %v 134} 135 136define <vscale x 16 x bfloat> @vpmerge_vf_nxv16bf16(bfloat %a, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { 137; CHECK-LABEL: vpmerge_vf_nxv16bf16: 138; CHECK: # %bb.0: 139; CHECK-NEXT: fmv.x.h a1, fa0 140; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 141; CHECK-NEXT: vmv.v.x v12, a1 142; CHECK-NEXT: vsetvli zero, zero, e16, m4, tu, ma 143; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 144; CHECK-NEXT: ret 145 %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %a, i32 0 146 %va = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer 147 %v = call <vscale x 16 x bfloat> @llvm.vp.merge.nxv16bf16(<vscale x 16 x i1> %m, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 %evl) 148 ret <vscale x 16 x bfloat> %v 149} 150 151declare <vscale x 32 x bfloat> @llvm.vp.merge.nxv32bf16(<vscale x 32 x i1>, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>, i32) 152 153define <vscale x 32 x bfloat> @vpmerge_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { 154; CHECK-LABEL: vpmerge_vv_nxv32bf16: 155; CHECK: # %bb.0: 156; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma 157; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 158; CHECK-NEXT: vmv8r.v v8, v16 159; CHECK-NEXT: ret 160 %v = call <vscale x 32 x bfloat> @llvm.vp.merge.nxv32bf16(<vscale x 32 x i1> %m, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 %evl) 161 ret <vscale x 32 x bfloat> %v 162} 163 164define <vscale x 32 x bfloat> @vpmerge_vf_nxv32bf16(bfloat %a, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { 165; CHECK-LABEL: vpmerge_vf_nxv32bf16: 166; CHECK: # %bb.0: 167; CHECK-NEXT: fmv.x.h a1, fa0 168; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 169; CHECK-NEXT: vmv.v.x v16, a1 170; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma 171; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 172; CHECK-NEXT: ret 173 %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %a, i32 0 174 %va = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer 175 %v = call <vscale x 32 x bfloat> @llvm.vp.merge.nxv32bf16(<vscale x 32 x i1> %m, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 %evl) 176 ret <vscale x 32 x bfloat> %v 177} 178