1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+m,+zfbfmin,+zvfbfmin -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+m,+zfbfmin,+zvfbfmin -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s 6; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+zvfh,+m,+zfbfmin,+zvfbfmin -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s 8; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+m,+zvfh,+zfbfmin,+zvfbfmin -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s 10 11declare <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1>, <2 x bfloat>, <2 x bfloat>, i32) 12 13define <2 x bfloat> @vpmerge_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) { 14; CHECK-LABEL: vpmerge_vv_v2bf16: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma 17; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 18; CHECK-NEXT: vmv1r.v v8, v9 19; CHECK-NEXT: ret 20 %v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl) 21 ret <2 x bfloat> %v 22} 23 24define <2 x bfloat> @vpmerge_vf_v2bf16(bfloat %a, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) { 25; CHECK-LABEL: vpmerge_vf_v2bf16: 26; CHECK: # %bb.0: 27; CHECK-NEXT: fmv.x.h a1, fa0 28; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 29; CHECK-NEXT: vmv.v.x v9, a1 30; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma 31; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 32; CHECK-NEXT: ret 33 %elt.head = insertelement <2 x bfloat> poison, bfloat %a, i32 0 34 %va = shufflevector <2 x bfloat> %elt.head, <2 x bfloat> poison, <2 x i32> zeroinitializer 35 %v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl) 36 ret <2 x bfloat> %v 37} 38 39declare <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1>, <4 x bfloat>, <4 x bfloat>, i32) 40 41define <4 x bfloat> @vpmerge_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) { 42; CHECK-LABEL: vpmerge_vv_v4bf16: 43; CHECK: # %bb.0: 44; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma 45; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 46; CHECK-NEXT: vmv1r.v v8, v9 47; CHECK-NEXT: ret 48 %v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl) 49 ret <4 x bfloat> %v 50} 51 52define <4 x bfloat> @vpmerge_vf_v4bf16(bfloat %a, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) { 53; CHECK-LABEL: vpmerge_vf_v4bf16: 54; CHECK: # %bb.0: 55; CHECK-NEXT: fmv.x.h a1, fa0 56; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 57; CHECK-NEXT: vmv.v.x v9, a1 58; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma 59; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 60; CHECK-NEXT: ret 61 %elt.head = insertelement <4 x bfloat> poison, bfloat %a, i32 0 62 %va = shufflevector <4 x bfloat> %elt.head, <4 x bfloat> poison, <4 x i32> zeroinitializer 63 %v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl) 64 ret <4 x bfloat> %v 65} 66 67declare <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1>, <8 x bfloat>, <8 x bfloat>, i32) 68 69define <8 x bfloat> @vpmerge_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) { 70; CHECK-LABEL: vpmerge_vv_v8bf16: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma 73; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 74; CHECK-NEXT: vmv1r.v v8, v9 75; CHECK-NEXT: ret 76 %v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl) 77 ret <8 x bfloat> %v 78} 79 80define <8 x bfloat> @vpmerge_vf_v8bf16(bfloat %a, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) { 81; CHECK-LABEL: vpmerge_vf_v8bf16: 82; CHECK: # %bb.0: 83; CHECK-NEXT: fmv.x.h a1, fa0 84; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 85; CHECK-NEXT: vmv.v.x v9, a1 86; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma 87; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 88; CHECK-NEXT: ret 89 %elt.head = insertelement <8 x bfloat> poison, bfloat %a, i32 0 90 %va = shufflevector <8 x bfloat> %elt.head, <8 x bfloat> poison, <8 x i32> zeroinitializer 91 %v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl) 92 ret <8 x bfloat> %v 93} 94 95declare <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1>, <16 x bfloat>, <16 x bfloat>, i32) 96 97define <16 x bfloat> @vpmerge_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) { 98; CHECK-LABEL: vpmerge_vv_v16bf16: 99; CHECK: # %bb.0: 100; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma 101; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 102; CHECK-NEXT: vmv2r.v v8, v10 103; CHECK-NEXT: ret 104 %v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl) 105 ret <16 x bfloat> %v 106} 107 108define <16 x bfloat> @vpmerge_vf_v16bf16(bfloat %a, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) { 109; CHECK-LABEL: vpmerge_vf_v16bf16: 110; CHECK: # %bb.0: 111; CHECK-NEXT: fmv.x.h a1, fa0 112; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 113; CHECK-NEXT: vmv.v.x v10, a1 114; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma 115; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 116; CHECK-NEXT: ret 117 %elt.head = insertelement <16 x bfloat> poison, bfloat %a, i32 0 118 %va = shufflevector <16 x bfloat> %elt.head, <16 x bfloat> poison, <16 x i32> zeroinitializer 119 %v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl) 120 ret <16 x bfloat> %v 121} 122