1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 6; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 8; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 10 11declare <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32) 12declare <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32) 13declare <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) 14declare <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1>, <vscale x 1 x float>, <vscale x 1 x float>, i32) 15 16define <vscale x 1 x float> @vfmacc_vv_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 17; ZVFH-LABEL: vfmacc_vv_nxv1f32: 18; ZVFH: # %bb.0: 19; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 20; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t 21; ZVFH-NEXT: vmv1r.v v8, v10 22; ZVFH-NEXT: ret 23; 24; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32: 25; ZVFHMIN: # %bb.0: 26; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 27; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t 28; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t 29; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 30; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t 31; ZVFHMIN-NEXT: ret 32 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl) 33 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl) 34 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl) 35 ret <vscale x 1 x float> %v 36} 37 38define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) { 39; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked: 40; ZVFH: # %bb.0: 41; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 42; ZVFH-NEXT: vfwmacc.vv v10, v8, v9 43; ZVFH-NEXT: vmv1r.v v8, v10 44; ZVFH-NEXT: ret 45; 46; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked: 47; ZVFHMIN: # %bb.0: 48; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 49; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 50; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 51; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 52; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10 53; ZVFHMIN-NEXT: ret 54 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 55 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 56 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 57 ret <vscale x 1 x float> %v 58} 59 60define <vscale x 1 x float> @vfmacc_vv_nxv1f32_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 61; ZVFH-LABEL: vfmacc_vv_nxv1f32_tu: 62; ZVFH: # %bb.0: 63; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu 64; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t 65; ZVFH-NEXT: vmv1r.v v8, v10 66; ZVFH-NEXT: ret 67; 68; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_tu: 69; ZVFHMIN: # %bb.0: 70; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 71; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 72; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 73; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu 74; ZVFHMIN-NEXT: vfmacc.vv v10, v11, v8, v0.t 75; ZVFHMIN-NEXT: vmv1r.v v8, v10 76; ZVFHMIN-NEXT: ret 77 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 78 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 79 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 80 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl) 81 ret <vscale x 1 x float> %u 82} 83 84; FIXME: Support this case? 85define <vscale x 1 x float> @vfmacc_vv_nxv1f32_masked__tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 86; ZVFH-LABEL: vfmacc_vv_nxv1f32_masked__tu: 87; ZVFH: # %bb.0: 88; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu 89; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t 90; ZVFH-NEXT: vmv1r.v v8, v10 91; ZVFH-NEXT: ret 92; 93; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_masked__tu: 94; ZVFHMIN: # %bb.0: 95; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 96; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t 97; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t 98; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 99; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t 100; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma 101; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v8, v0 102; ZVFHMIN-NEXT: vmv1r.v v8, v10 103; ZVFHMIN-NEXT: ret 104 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl) 105 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl) 106 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl) 107 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl) 108 ret <vscale x 1 x float> %u 109} 110 111define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) { 112; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked_tu: 113; ZVFH: # %bb.0: 114; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma 115; ZVFH-NEXT: vfwmacc.vv v10, v8, v9 116; ZVFH-NEXT: vmv1r.v v8, v10 117; ZVFH-NEXT: ret 118; 119; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked_tu: 120; ZVFHMIN: # %bb.0: 121; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 122; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 123; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 124; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma 125; ZVFHMIN-NEXT: vfmacc.vv v10, v11, v8 126; ZVFHMIN-NEXT: vmv1r.v v8, v10 127; ZVFHMIN-NEXT: ret 128 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 129 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 130 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 131 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl) 132 ret <vscale x 1 x float> %u 133} 134 135define <vscale x 1 x float> @vfmacc_vf_nxv1f32(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) { 136; ZVFH-LABEL: vfmacc_vf_nxv1f32: 137; ZVFH: # %bb.0: 138; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 139; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t 140; ZVFH-NEXT: vmv1r.v v8, v9 141; ZVFH-NEXT: ret 142; 143; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32: 144; ZVFHMIN: # %bb.0: 145; ZVFHMIN-NEXT: fmv.x.h a1, fa0 146; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 147; ZVFHMIN-NEXT: vmv.v.x v10, a1 148; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t 149; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t 150; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 151; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v9, v0.t 152; ZVFHMIN-NEXT: ret 153 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0 154 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 155 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl) 156 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl) 157 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl) 158 ret <vscale x 1 x float> %v 159} 160 161define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) { 162; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute: 163; ZVFH: # %bb.0: 164; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 165; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t 166; ZVFH-NEXT: vmv1r.v v8, v9 167; ZVFH-NEXT: ret 168; 169; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute: 170; ZVFHMIN: # %bb.0: 171; ZVFHMIN-NEXT: fmv.x.h a1, fa0 172; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 173; ZVFHMIN-NEXT: vmv.v.x v11, a1 174; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 175; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t 176; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 177; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v9, v0.t 178; ZVFHMIN-NEXT: vmv1r.v v8, v10 179; ZVFHMIN-NEXT: ret 180 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0 181 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 182 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl) 183 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl) 184 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl) 185 ret <vscale x 1 x float> %v 186} 187 188define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) { 189; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked: 190; ZVFH: # %bb.0: 191; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 192; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8 193; ZVFH-NEXT: vmv1r.v v8, v9 194; ZVFH-NEXT: ret 195; 196; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked: 197; ZVFHMIN: # %bb.0: 198; ZVFHMIN-NEXT: fmv.x.h a1, fa0 199; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 200; ZVFHMIN-NEXT: vmv.v.x v10, a1 201; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 202; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 203; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 204; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v9 205; ZVFHMIN-NEXT: ret 206 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0 207 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 208 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 209 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 210 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 211 ret <vscale x 1 x float> %v 212} 213 214define <vscale x 1 x float> @vfmacc_vf_nxv1f32_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) { 215; ZVFH-LABEL: vfmacc_vf_nxv1f32_tu: 216; ZVFH: # %bb.0: 217; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu 218; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t 219; ZVFH-NEXT: vmv1r.v v8, v9 220; ZVFH-NEXT: ret 221; 222; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_tu: 223; ZVFHMIN: # %bb.0: 224; ZVFHMIN-NEXT: fmv.x.h a1, fa0 225; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 226; ZVFHMIN-NEXT: vmv.v.x v10, a1 227; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 228; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 229; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu 230; ZVFHMIN-NEXT: vfmacc.vv v9, v11, v8, v0.t 231; ZVFHMIN-NEXT: vmv1r.v v8, v9 232; ZVFHMIN-NEXT: ret 233 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0 234 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 235 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 236 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 237 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 238 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl) 239 ret <vscale x 1 x float> %u 240} 241 242define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) { 243; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute_tu: 244; ZVFH: # %bb.0: 245; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu 246; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t 247; ZVFH-NEXT: vmv1r.v v8, v9 248; ZVFH-NEXT: ret 249; 250; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute_tu: 251; ZVFHMIN: # %bb.0: 252; ZVFHMIN-NEXT: fmv.x.h a1, fa0 253; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 254; ZVFHMIN-NEXT: vmv.v.x v10, a1 255; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 256; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 257; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu 258; ZVFHMIN-NEXT: vfmacc.vv v9, v8, v11, v0.t 259; ZVFHMIN-NEXT: vmv1r.v v8, v9 260; ZVFHMIN-NEXT: ret 261 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0 262 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 263 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 264 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 265 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 266 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl) 267 ret <vscale x 1 x float> %u 268} 269 270define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) { 271; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked_tu: 272; ZVFH: # %bb.0: 273; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma 274; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8 275; ZVFH-NEXT: vmv1r.v v8, v9 276; ZVFH-NEXT: ret 277; 278; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked_tu: 279; ZVFHMIN: # %bb.0: 280; ZVFHMIN-NEXT: fmv.x.h a1, fa0 281; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 282; ZVFHMIN-NEXT: vmv.v.x v10, a1 283; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 284; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 285; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma 286; ZVFHMIN-NEXT: vfmacc.vv v9, v11, v8 287; ZVFHMIN-NEXT: vmv1r.v v8, v9 288; ZVFHMIN-NEXT: ret 289 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0 290 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 291 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 292 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 293 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 294 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl) 295 ret <vscale x 1 x float> %u 296} 297 298declare <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32) 299declare <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) 300declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32) 301declare <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32) 302declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32) 303 304define <vscale x 2 x float> @vfmacc_vv_nxv2f32(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 305; ZVFH-LABEL: vfmacc_vv_nxv2f32: 306; ZVFH: # %bb.0: 307; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 308; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t 309; ZVFH-NEXT: vmv1r.v v8, v10 310; ZVFH-NEXT: ret 311; 312; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32: 313; ZVFHMIN: # %bb.0: 314; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 315; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t 316; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t 317; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 318; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t 319; ZVFHMIN-NEXT: ret 320 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl) 321 %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl) 322 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 %evl) 323 ret <vscale x 2 x float> %v 324} 325 326define <vscale x 2 x float> @vfmacc_vv_nxv2f32_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, i32 zeroext %evl) { 327; ZVFH-LABEL: vfmacc_vv_nxv2f32_unmasked: 328; ZVFH: # %bb.0: 329; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 330; ZVFH-NEXT: vfwmacc.vv v10, v8, v9 331; ZVFH-NEXT: vmv1r.v v8, v10 332; ZVFH-NEXT: ret 333; 334; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32_unmasked: 335; ZVFHMIN: # %bb.0: 336; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 337; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 338; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 339; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 340; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10 341; ZVFHMIN-NEXT: ret 342 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 343 %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 344 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 345 ret <vscale x 2 x float> %v 346} 347 348define <vscale x 2 x float> @vfmacc_vf_nxv2f32(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) { 349; ZVFH-LABEL: vfmacc_vf_nxv2f32: 350; ZVFH: # %bb.0: 351; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 352; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t 353; ZVFH-NEXT: vmv1r.v v8, v9 354; ZVFH-NEXT: ret 355; 356; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32: 357; ZVFHMIN: # %bb.0: 358; ZVFHMIN-NEXT: fmv.x.h a1, fa0 359; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 360; ZVFHMIN-NEXT: vmv.v.x v10, a1 361; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t 362; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t 363; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 364; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v9, v0.t 365; ZVFHMIN-NEXT: ret 366 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0 367 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer 368 %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl) 369 %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl) 370 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 %evl) 371 ret <vscale x 2 x float> %v 372} 373 374define <vscale x 2 x float> @vfmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, i32 zeroext %evl) { 375; ZVFH-LABEL: vfmacc_vf_nxv2f32_unmasked: 376; ZVFH: # %bb.0: 377; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 378; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8 379; ZVFH-NEXT: vmv1r.v v8, v9 380; ZVFH-NEXT: ret 381; 382; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32_unmasked: 383; ZVFHMIN: # %bb.0: 384; ZVFHMIN-NEXT: fmv.x.h a1, fa0 385; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 386; ZVFHMIN-NEXT: vmv.v.x v10, a1 387; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 388; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 389; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 390; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v9 391; ZVFHMIN-NEXT: ret 392 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0 393 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer 394 %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 395 %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 396 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 397 ret <vscale x 2 x float> %v 398} 399 400declare <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32) 401declare <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32) 402declare <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32) 403declare <vscale x 4 x float> @llvm.vp.select.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32) 404declare <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32) 405 406define <vscale x 4 x float> @vfmacc_vv_nxv4f32(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 407; ZVFH-LABEL: vfmacc_vv_nxv4f32: 408; ZVFH: # %bb.0: 409; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 410; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t 411; ZVFH-NEXT: vmv2r.v v8, v10 412; ZVFH-NEXT: ret 413; 414; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32: 415; ZVFHMIN: # %bb.0: 416; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 417; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t 418; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t 419; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 420; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t 421; ZVFHMIN-NEXT: vmv.v.v v8, v12 422; ZVFHMIN-NEXT: ret 423 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl) 424 %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl) 425 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 %evl) 426 ret <vscale x 4 x float> %v 427} 428 429define <vscale x 4 x float> @vfmacc_vv_nxv4f32_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, i32 zeroext %evl) { 430; ZVFH-LABEL: vfmacc_vv_nxv4f32_unmasked: 431; ZVFH: # %bb.0: 432; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 433; ZVFH-NEXT: vfwmacc.vv v10, v8, v9 434; ZVFH-NEXT: vmv2r.v v8, v10 435; ZVFH-NEXT: ret 436; 437; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32_unmasked: 438; ZVFHMIN: # %bb.0: 439; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 440; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 441; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 442; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 443; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10 444; ZVFHMIN-NEXT: vmv.v.v v8, v12 445; ZVFHMIN-NEXT: ret 446 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 447 %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 448 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 449 ret <vscale x 4 x float> %v 450} 451 452define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) { 453; ZVFH-LABEL: vfmacc_vf_nxv4f32: 454; ZVFH: # %bb.0: 455; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 456; ZVFH-NEXT: vfwmacc.vf v10, fa0, v8, v0.t 457; ZVFH-NEXT: vmv2r.v v8, v10 458; ZVFH-NEXT: ret 459; 460; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32: 461; ZVFHMIN: # %bb.0: 462; ZVFHMIN-NEXT: fmv.x.h a1, fa0 463; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 464; ZVFHMIN-NEXT: vmv.v.x v12, a1 465; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t 466; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t 467; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 468; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v10, v0.t 469; ZVFHMIN-NEXT: ret 470 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0 471 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer 472 %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl) 473 %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl) 474 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 %evl) 475 ret <vscale x 4 x float> %v 476} 477 478define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, i32 zeroext %evl) { 479; ZVFH-LABEL: vfmacc_vf_nxv4f32_unmasked: 480; ZVFH: # %bb.0: 481; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 482; ZVFH-NEXT: vfwmacc.vf v10, fa0, v8 483; ZVFH-NEXT: vmv2r.v v8, v10 484; ZVFH-NEXT: ret 485; 486; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32_unmasked: 487; ZVFHMIN: # %bb.0: 488; ZVFHMIN-NEXT: fmv.x.h a1, fa0 489; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 490; ZVFHMIN-NEXT: vmv.v.x v12, a1 491; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 492; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 493; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 494; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v10 495; ZVFHMIN-NEXT: ret 496 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0 497 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer 498 %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 499 %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 500 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 501 ret <vscale x 4 x float> %v 502} 503 504declare <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32) 505declare <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32) 506declare <vscale x 8 x float> @llvm.vp.merge.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32) 507declare <vscale x 8 x float> @llvm.vp.select.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32) 508declare <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32) 509 510define <vscale x 8 x float> @vfmacc_vv_nxv8f32(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 511; ZVFH-LABEL: vfmacc_vv_nxv8f32: 512; ZVFH: # %bb.0: 513; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 514; ZVFH-NEXT: vfwmacc.vv v12, v8, v10, v0.t 515; ZVFH-NEXT: vmv4r.v v8, v12 516; ZVFH-NEXT: ret 517; 518; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32: 519; ZVFHMIN: # %bb.0: 520; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 521; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t 522; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t 523; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 524; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t 525; ZVFHMIN-NEXT: vmv.v.v v8, v16 526; ZVFHMIN-NEXT: ret 527 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl) 528 %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl) 529 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 %evl) 530 ret <vscale x 8 x float> %v 531} 532 533define <vscale x 8 x float> @vfmacc_vv_nxv8f32_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, i32 zeroext %evl) { 534; ZVFH-LABEL: vfmacc_vv_nxv8f32_unmasked: 535; ZVFH: # %bb.0: 536; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 537; ZVFH-NEXT: vfwmacc.vv v12, v8, v10 538; ZVFH-NEXT: vmv4r.v v8, v12 539; ZVFH-NEXT: ret 540; 541; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32_unmasked: 542; ZVFHMIN: # %bb.0: 543; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 544; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 545; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 546; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 547; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12 548; ZVFHMIN-NEXT: vmv.v.v v8, v16 549; ZVFHMIN-NEXT: ret 550 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 551 %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 552 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 553 ret <vscale x 8 x float> %v 554} 555 556define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) { 557; ZVFH-LABEL: vfmacc_vf_nxv8f32: 558; ZVFH: # %bb.0: 559; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 560; ZVFH-NEXT: vfwmacc.vf v12, fa0, v8, v0.t 561; ZVFH-NEXT: vmv4r.v v8, v12 562; ZVFH-NEXT: ret 563; 564; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32: 565; ZVFHMIN: # %bb.0: 566; ZVFHMIN-NEXT: fmv.x.h a1, fa0 567; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 568; ZVFHMIN-NEXT: vmv.v.x v16, a1 569; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t 570; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t 571; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 572; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v12, v0.t 573; ZVFHMIN-NEXT: ret 574 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 575 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer 576 %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl) 577 %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl) 578 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 %evl) 579 ret <vscale x 8 x float> %v 580} 581 582define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, i32 zeroext %evl) { 583; ZVFH-LABEL: vfmacc_vf_nxv8f32_unmasked: 584; ZVFH: # %bb.0: 585; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 586; ZVFH-NEXT: vfwmacc.vf v12, fa0, v8 587; ZVFH-NEXT: vmv4r.v v8, v12 588; ZVFH-NEXT: ret 589; 590; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32_unmasked: 591; ZVFHMIN: # %bb.0: 592; ZVFHMIN-NEXT: fmv.x.h a1, fa0 593; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 594; ZVFHMIN-NEXT: vmv.v.x v16, a1 595; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 596; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 597; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 598; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v12 599; ZVFHMIN-NEXT: ret 600 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 601 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer 602 %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 603 %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 604 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 605 ret <vscale x 8 x float> %v 606} 607 608declare <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x i1>, i32) 609declare <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32) 610declare <vscale x 16 x float> @llvm.vp.merge.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32) 611declare <vscale x 16 x float> @llvm.vp.select.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32) 612declare <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32) 613 614define <vscale x 16 x float> @vfmacc_vv_nxv16f32(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 615; ZVFH-LABEL: vfmacc_vv_nxv16f32: 616; ZVFH: # %bb.0: 617; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 618; ZVFH-NEXT: vfwmacc.vv v16, v8, v12, v0.t 619; ZVFH-NEXT: vmv8r.v v8, v16 620; ZVFH-NEXT: ret 621; 622; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32: 623; ZVFHMIN: # %bb.0: 624; ZVFHMIN-NEXT: addi sp, sp, -16 625; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 626; ZVFHMIN-NEXT: csrr a1, vlenb 627; ZVFHMIN-NEXT: slli a1, a1, 3 628; ZVFHMIN-NEXT: sub sp, sp, a1 629; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 630; ZVFHMIN-NEXT: addi a1, sp, 16 631; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 632; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 633; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t 634; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t 635; ZVFHMIN-NEXT: addi a0, sp, 16 636; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 637; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 638; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t 639; ZVFHMIN-NEXT: vmv.v.v v8, v24 640; ZVFHMIN-NEXT: csrr a0, vlenb 641; ZVFHMIN-NEXT: slli a0, a0, 3 642; ZVFHMIN-NEXT: add sp, sp, a0 643; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 644; ZVFHMIN-NEXT: addi sp, sp, 16 645; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 646; ZVFHMIN-NEXT: ret 647 %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> %m, i32 %evl) 648 %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl) 649 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 %evl) 650 ret <vscale x 16 x float> %v 651} 652 653define <vscale x 16 x float> @vfmacc_vv_nxv16f32_unmasked(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, i32 zeroext %evl) { 654; ZVFH-LABEL: vfmacc_vv_nxv16f32_unmasked: 655; ZVFH: # %bb.0: 656; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 657; ZVFH-NEXT: vfwmacc.vv v16, v8, v12 658; ZVFH-NEXT: vmv8r.v v8, v16 659; ZVFH-NEXT: ret 660; 661; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32_unmasked: 662; ZVFHMIN: # %bb.0: 663; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 664; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 665; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 666; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 667; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16 668; ZVFHMIN-NEXT: vmv.v.v v8, v24 669; ZVFHMIN-NEXT: ret 670 %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 671 %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 672 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 673 ret <vscale x 16 x float> %v 674} 675 676define <vscale x 16 x float> @vfmacc_vf_nxv16f32(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 zeroext %evl) { 677; ZVFH-LABEL: vfmacc_vf_nxv16f32: 678; ZVFH: # %bb.0: 679; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 680; ZVFH-NEXT: vfwmacc.vf v16, fa0, v8, v0.t 681; ZVFH-NEXT: vmv8r.v v8, v16 682; ZVFH-NEXT: ret 683; 684; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32: 685; ZVFHMIN: # %bb.0: 686; ZVFHMIN-NEXT: fmv.x.h a1, fa0 687; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 688; ZVFHMIN-NEXT: vmv.v.x v4, a1 689; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t 690; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t 691; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 692; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t 693; ZVFHMIN-NEXT: ret 694 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0 695 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer 696 %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl) 697 %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl) 698 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 %evl) 699 ret <vscale x 16 x float> %v 700} 701 702define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, i32 zeroext %evl) { 703; ZVFH-LABEL: vfmacc_vf_nxv16f32_unmasked: 704; ZVFH: # %bb.0: 705; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 706; ZVFH-NEXT: vfwmacc.vf v16, fa0, v8 707; ZVFH-NEXT: vmv8r.v v8, v16 708; ZVFH-NEXT: ret 709; 710; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32_unmasked: 711; ZVFHMIN: # %bb.0: 712; ZVFHMIN-NEXT: fmv.x.h a1, fa0 713; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 714; ZVFHMIN-NEXT: vmv.v.x v24, a1 715; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 716; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 717; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 718; ZVFHMIN-NEXT: vfmadd.vv v8, v0, v16 719; ZVFHMIN-NEXT: ret 720 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0 721 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer 722 %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 723 %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 724 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 725 ret <vscale x 16 x float> %v 726} 727 728declare <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x i1>, i32) 729declare <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32) 730declare <vscale x 1 x double> @llvm.vp.merge.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32) 731declare <vscale x 1 x double> @llvm.vp.select.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32) 732declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32) 733 734define <vscale x 1 x double> @vfmacc_vv_nxv1f64(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 735; CHECK-LABEL: vfmacc_vv_nxv1f64: 736; CHECK: # %bb.0: 737; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 738; CHECK-NEXT: vfwmacc.vv v10, v8, v9, v0.t 739; CHECK-NEXT: vmv1r.v v8, v10 740; CHECK-NEXT: ret 741 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> %m, i32 %evl) 742 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 %evl) 743 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl) 744 ret <vscale x 1 x double> %v 745} 746 747define <vscale x 1 x double> @vfmacc_vv_nxv1f64_unmasked(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, i32 zeroext %evl) { 748; CHECK-LABEL: vfmacc_vv_nxv1f64_unmasked: 749; CHECK: # %bb.0: 750; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 751; CHECK-NEXT: vfwmacc.vv v10, v8, v9 752; CHECK-NEXT: vmv1r.v v8, v10 753; CHECK-NEXT: ret 754 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 755 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 756 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 757 ret <vscale x 1 x double> %v 758} 759 760define <vscale x 1 x double> @vfmacc_vf_nxv1f64(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) { 761; CHECK-LABEL: vfmacc_vf_nxv1f64: 762; CHECK: # %bb.0: 763; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 764; CHECK-NEXT: vfwmacc.vf v9, fa0, v8, v0.t 765; CHECK-NEXT: vmv1r.v v8, v9 766; CHECK-NEXT: ret 767 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0 768 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer 769 %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl) 770 %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl) 771 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 %evl) 772 ret <vscale x 1 x double> %v 773} 774 775define <vscale x 1 x double> @vfmacc_vf_nxv1f64_unmasked(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, i32 zeroext %evl) { 776; CHECK-LABEL: vfmacc_vf_nxv1f64_unmasked: 777; CHECK: # %bb.0: 778; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 779; CHECK-NEXT: vfwmacc.vf v9, fa0, v8 780; CHECK-NEXT: vmv1r.v v8, v9 781; CHECK-NEXT: ret 782 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0 783 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer 784 %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 785 %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 786 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 787 ret <vscale x 1 x double> %v 788} 789 790declare <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, i32) 791declare <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) 792declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32) 793declare <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32) 794declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) 795 796define <vscale x 2 x double> @vfmacc_vv_nxv2f64(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 797; CHECK-LABEL: vfmacc_vv_nxv2f64: 798; CHECK: # %bb.0: 799; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 800; CHECK-NEXT: vfwmacc.vv v10, v8, v9, v0.t 801; CHECK-NEXT: vmv2r.v v8, v10 802; CHECK-NEXT: ret 803 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %evl) 804 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 %evl) 805 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl) 806 ret <vscale x 2 x double> %v 807} 808 809define <vscale x 2 x double> @vfmacc_vv_nxv2f64_unmasked(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, i32 zeroext %evl) { 810; CHECK-LABEL: vfmacc_vv_nxv2f64_unmasked: 811; CHECK: # %bb.0: 812; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 813; CHECK-NEXT: vfwmacc.vv v10, v8, v9 814; CHECK-NEXT: vmv2r.v v8, v10 815; CHECK-NEXT: ret 816 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 817 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 818 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 819 ret <vscale x 2 x double> %v 820} 821 822define <vscale x 2 x double> @vfmacc_vf_nxv2f64(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) { 823; CHECK-LABEL: vfmacc_vf_nxv2f64: 824; CHECK: # %bb.0: 825; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 826; CHECK-NEXT: vfwmacc.vf v10, fa0, v8, v0.t 827; CHECK-NEXT: vmv2r.v v8, v10 828; CHECK-NEXT: ret 829 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0 830 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 831 %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl) 832 %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl) 833 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 %evl) 834 ret <vscale x 2 x double> %v 835} 836 837define <vscale x 2 x double> @vfmacc_vf_nxv2f64_unmasked(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, i32 zeroext %evl) { 838; CHECK-LABEL: vfmacc_vf_nxv2f64_unmasked: 839; CHECK: # %bb.0: 840; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 841; CHECK-NEXT: vfwmacc.vf v10, fa0, v8 842; CHECK-NEXT: vmv2r.v v8, v10 843; CHECK-NEXT: ret 844 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0 845 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 846 %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 847 %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 848 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 849 ret <vscale x 2 x double> %v 850} 851 852declare <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x i1>, i32) 853declare <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32) 854declare <vscale x 4 x double> @llvm.vp.merge.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32) 855declare <vscale x 4 x double> @llvm.vp.select.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32) 856declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32) 857 858define <vscale x 4 x double> @vfmacc_vv_nxv4f64(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 859; CHECK-LABEL: vfmacc_vv_nxv4f64: 860; CHECK: # %bb.0: 861; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 862; CHECK-NEXT: vfwmacc.vv v12, v8, v10, v0.t 863; CHECK-NEXT: vmv4r.v v8, v12 864; CHECK-NEXT: ret 865 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %m, i32 %evl) 866 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 %evl) 867 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl) 868 ret <vscale x 4 x double> %v 869} 870 871define <vscale x 4 x double> @vfmacc_vv_nxv4f64_unmasked(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, i32 zeroext %evl) { 872; CHECK-LABEL: vfmacc_vv_nxv4f64_unmasked: 873; CHECK: # %bb.0: 874; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 875; CHECK-NEXT: vfwmacc.vv v12, v8, v10 876; CHECK-NEXT: vmv4r.v v8, v12 877; CHECK-NEXT: ret 878 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 879 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 880 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 881 ret <vscale x 4 x double> %v 882} 883 884define <vscale x 4 x double> @vfmacc_vf_nxv4f64(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) { 885; CHECK-LABEL: vfmacc_vf_nxv4f64: 886; CHECK: # %bb.0: 887; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 888; CHECK-NEXT: vfwmacc.vf v12, fa0, v8, v0.t 889; CHECK-NEXT: vmv4r.v v8, v12 890; CHECK-NEXT: ret 891 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0 892 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 893 %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl) 894 %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl) 895 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 %evl) 896 ret <vscale x 4 x double> %v 897} 898 899define <vscale x 4 x double> @vfmacc_vf_nxv4f64_unmasked(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, i32 zeroext %evl) { 900; CHECK-LABEL: vfmacc_vf_nxv4f64_unmasked: 901; CHECK: # %bb.0: 902; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 903; CHECK-NEXT: vfwmacc.vf v12, fa0, v8 904; CHECK-NEXT: vmv4r.v v8, v12 905; CHECK-NEXT: ret 906 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0 907 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 908 %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 909 %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 910 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 911 ret <vscale x 4 x double> %v 912} 913 914declare <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x i1>, i32) 915declare <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32) 916declare <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32) 917declare <vscale x 8 x double> @llvm.vp.select.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32) 918declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32) 919 920define <vscale x 8 x double> @vfmacc_vv_nxv8f64(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 921; CHECK-LABEL: vfmacc_vv_nxv8f64: 922; CHECK: # %bb.0: 923; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 924; CHECK-NEXT: vfwmacc.vv v16, v8, v12, v0.t 925; CHECK-NEXT: vmv8r.v v8, v16 926; CHECK-NEXT: ret 927 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> %m, i32 %evl) 928 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 %evl) 929 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl) 930 ret <vscale x 8 x double> %v 931} 932 933define <vscale x 8 x double> @vfmacc_vv_nxv8f64_unmasked(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, i32 zeroext %evl) { 934; CHECK-LABEL: vfmacc_vv_nxv8f64_unmasked: 935; CHECK: # %bb.0: 936; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 937; CHECK-NEXT: vfwmacc.vv v16, v8, v12 938; CHECK-NEXT: vmv8r.v v8, v16 939; CHECK-NEXT: ret 940 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 941 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 942 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 943 ret <vscale x 8 x double> %v 944} 945 946define <vscale x 8 x double> @vfmacc_vf_nxv8f64(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) { 947; CHECK-LABEL: vfmacc_vf_nxv8f64: 948; CHECK: # %bb.0: 949; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 950; CHECK-NEXT: vfwmacc.vf v16, fa0, v8, v0.t 951; CHECK-NEXT: vmv8r.v v8, v16 952; CHECK-NEXT: ret 953 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0 954 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer 955 %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl) 956 %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl) 957 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 %evl) 958 ret <vscale x 8 x double> %v 959} 960 961define <vscale x 8 x double> @vfmacc_vf_nxv8f64_unmasked(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, i32 zeroext %evl) { 962; CHECK-LABEL: vfmacc_vf_nxv8f64_unmasked: 963; CHECK: # %bb.0: 964; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 965; CHECK-NEXT: vfwmacc.vf v16, fa0, v8 966; CHECK-NEXT: vmv8r.v v8, v16 967; CHECK-NEXT: ret 968 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0 969 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer 970 %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 971 %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 972 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 973 ret <vscale x 8 x double> %v 974} 975 976declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) 977 978define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 979; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16: 980; CHECK: # %bb.0: 981; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 982; CHECK-NEXT: vfwcvt.f.f.v v11, v8, v0.t 983; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t 984; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 985; CHECK-NEXT: vfwmacc.vv v10, v11, v8, v0.t 986; CHECK-NEXT: vmv1r.v v8, v10 987; CHECK-NEXT: ret 988 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl) 989 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl) 990 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl) 991 ret <vscale x 1 x double> %v 992} 993 994define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, i32 zeroext %evl) { 995; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16_unmasked: 996; CHECK: # %bb.0: 997; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 998; CHECK-NEXT: vfwcvt.f.f.v v11, v8 999; CHECK-NEXT: vfwcvt.f.f.v v8, v9 1000; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 1001; CHECK-NEXT: vfwmacc.vv v10, v11, v8 1002; CHECK-NEXT: vmv1r.v v8, v10 1003; CHECK-NEXT: ret 1004 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1005 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1006 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1007 ret <vscale x 1 x double> %v 1008} 1009 1010declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32) 1011 1012define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1013; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16: 1014; CHECK: # %bb.0: 1015; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1016; CHECK-NEXT: vfwcvt.f.f.v v12, v8, v0.t 1017; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t 1018; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 1019; CHECK-NEXT: vfwmacc.vv v10, v12, v8, v0.t 1020; CHECK-NEXT: vmv2r.v v8, v10 1021; CHECK-NEXT: ret 1022 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl) 1023 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl) 1024 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl) 1025 ret <vscale x 2 x double> %v 1026} 1027 1028define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, i32 zeroext %evl) { 1029; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16_unmasked: 1030; CHECK: # %bb.0: 1031; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1032; CHECK-NEXT: vfwcvt.f.f.v v12, v8 1033; CHECK-NEXT: vfwcvt.f.f.v v8, v9 1034; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 1035; CHECK-NEXT: vfwmacc.vv v10, v12, v8 1036; CHECK-NEXT: vmv2r.v v8, v10 1037; CHECK-NEXT: ret 1038 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1039 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1040 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1041 ret <vscale x 2 x double> %v 1042} 1043 1044declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32) 1045 1046define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1047; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16: 1048; CHECK: # %bb.0: 1049; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1050; CHECK-NEXT: vfwcvt.f.f.v v10, v8, v0.t 1051; CHECK-NEXT: vfwcvt.f.f.v v16, v9, v0.t 1052; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1053; CHECK-NEXT: vfwmacc.vv v12, v10, v16, v0.t 1054; CHECK-NEXT: vmv4r.v v8, v12 1055; CHECK-NEXT: ret 1056 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl) 1057 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl) 1058 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl) 1059 ret <vscale x 4 x double> %v 1060} 1061 1062define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, i32 zeroext %evl) { 1063; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16_unmasked: 1064; CHECK: # %bb.0: 1065; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1066; CHECK-NEXT: vfwcvt.f.f.v v10, v8 1067; CHECK-NEXT: vfwcvt.f.f.v v16, v9 1068; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1069; CHECK-NEXT: vfwmacc.vv v12, v10, v16 1070; CHECK-NEXT: vmv4r.v v8, v12 1071; CHECK-NEXT: ret 1072 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1073 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1074 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1075 ret <vscale x 4 x double> %v 1076} 1077 1078declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32) 1079 1080define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1081; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16: 1082; CHECK: # %bb.0: 1083; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1084; CHECK-NEXT: vfwcvt.f.f.v v12, v8, v0.t 1085; CHECK-NEXT: vfwcvt.f.f.v v24, v10, v0.t 1086; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1087; CHECK-NEXT: vfwmacc.vv v16, v12, v24, v0.t 1088; CHECK-NEXT: vmv8r.v v8, v16 1089; CHECK-NEXT: ret 1090 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl) 1091 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl) 1092 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl) 1093 ret <vscale x 8 x double> %v 1094} 1095 1096define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, i32 zeroext %evl) { 1097; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16_unmasked: 1098; CHECK: # %bb.0: 1099; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1100; CHECK-NEXT: vfwcvt.f.f.v v12, v8 1101; CHECK-NEXT: vfwcvt.f.f.v v24, v10 1102; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1103; CHECK-NEXT: vfwmacc.vv v16, v12, v24 1104; CHECK-NEXT: vmv8r.v v8, v16 1105; CHECK-NEXT: ret 1106 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1107 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1108 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1109 ret <vscale x 8 x double> %v 1110} 1111 1112define <vscale x 1 x float> @vfmacc_squared_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1113; ZVFH-LABEL: vfmacc_squared_nxv1f32: 1114; ZVFH: # %bb.0: 1115; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1116; ZVFH-NEXT: vfwmacc.vv v10, v8, v8, v0.t 1117; ZVFH-NEXT: vmv1r.v v8, v10 1118; ZVFH-NEXT: ret 1119; 1120; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32: 1121; ZVFHMIN: # %bb.0: 1122; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1123; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t 1124; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 1125; ZVFHMIN-NEXT: vfmadd.vv v9, v9, v10, v0.t 1126; ZVFHMIN-NEXT: vmv1r.v v8, v9 1127; ZVFHMIN-NEXT: ret 1128 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl) 1129 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl) 1130 ret <vscale x 1 x float> %v 1131} 1132 1133define <vscale x 1 x float> @vfmacc_squared_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) { 1134; ZVFH-LABEL: vfmacc_squared_nxv1f32_unmasked: 1135; ZVFH: # %bb.0: 1136; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1137; ZVFH-NEXT: vfwmacc.vv v10, v8, v8 1138; ZVFH-NEXT: vmv1r.v v8, v10 1139; ZVFH-NEXT: ret 1140; 1141; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32_unmasked: 1142; ZVFHMIN: # %bb.0: 1143; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1144; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 1145; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 1146; ZVFHMIN-NEXT: vfmadd.vv v9, v9, v10 1147; ZVFHMIN-NEXT: vmv1r.v v8, v9 1148; ZVFHMIN-NEXT: ret 1149 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1150 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1151 ret <vscale x 1 x float> %v 1152} 1153