1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s 6 7declare <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32) 8 9define <2 x half> @vfma_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { 10; CHECK-LABEL: vfma_vv_v2f16: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 13; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t 14; CHECK-NEXT: vmv1r.v v8, v9 15; CHECK-NEXT: ret 16 %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl) 17 ret <2 x half> %v 18} 19 20define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x half> %c, i32 zeroext %evl) { 21; CHECK-LABEL: vfma_vv_v2f16_unmasked: 22; CHECK: # %bb.0: 23; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 24; CHECK-NEXT: vfmadd.vv v8, v9, v10 25; CHECK-NEXT: ret 26 %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 true), i32 %evl) 27 ret <2 x half> %v 28} 29 30define <2 x half> @vfma_vf_v2f16(<2 x half> %va, half %b, <2 x half> %vc, <2 x i1> %m, i32 zeroext %evl) { 31; CHECK-LABEL: vfma_vf_v2f16: 32; CHECK: # %bb.0: 33; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 34; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 35; CHECK-NEXT: ret 36 %elt.head = insertelement <2 x half> poison, half %b, i32 0 37 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer 38 %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl) 39 ret <2 x half> %v 40} 41 42define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %vc, i32 zeroext %evl) { 43; CHECK-LABEL: vfma_vf_v2f16_unmasked: 44; CHECK: # %bb.0: 45; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 46; CHECK-NEXT: vfmadd.vf v8, fa0, v9 47; CHECK-NEXT: ret 48 %elt.head = insertelement <2 x half> poison, half %b, i32 0 49 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer 50 %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> splat (i1 true), i32 %evl) 51 ret <2 x half> %v 52} 53 54declare <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32) 55 56define <4 x half> @vfma_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { 57; CHECK-LABEL: vfma_vv_v4f16: 58; CHECK: # %bb.0: 59; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 60; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t 61; CHECK-NEXT: vmv1r.v v8, v9 62; CHECK-NEXT: ret 63 %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl) 64 ret <4 x half> %v 65} 66 67define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x half> %c, i32 zeroext %evl) { 68; CHECK-LABEL: vfma_vv_v4f16_unmasked: 69; CHECK: # %bb.0: 70; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 71; CHECK-NEXT: vfmadd.vv v8, v9, v10 72; CHECK-NEXT: ret 73 %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 true), i32 %evl) 74 ret <4 x half> %v 75} 76 77define <4 x half> @vfma_vf_v4f16(<4 x half> %va, half %b, <4 x half> %vc, <4 x i1> %m, i32 zeroext %evl) { 78; CHECK-LABEL: vfma_vf_v4f16: 79; CHECK: # %bb.0: 80; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 81; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 82; CHECK-NEXT: ret 83 %elt.head = insertelement <4 x half> poison, half %b, i32 0 84 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer 85 %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl) 86 ret <4 x half> %v 87} 88 89define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %vc, i32 zeroext %evl) { 90; CHECK-LABEL: vfma_vf_v4f16_unmasked: 91; CHECK: # %bb.0: 92; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 93; CHECK-NEXT: vfmadd.vf v8, fa0, v9 94; CHECK-NEXT: ret 95 %elt.head = insertelement <4 x half> poison, half %b, i32 0 96 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer 97 %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> splat (i1 true), i32 %evl) 98 ret <4 x half> %v 99} 100 101declare <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32) 102 103define <8 x half> @vfma_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { 104; CHECK-LABEL: vfma_vv_v8f16: 105; CHECK: # %bb.0: 106; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 107; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t 108; CHECK-NEXT: vmv.v.v v8, v9 109; CHECK-NEXT: ret 110 %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl) 111 ret <8 x half> %v 112} 113 114define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x half> %c, i32 zeroext %evl) { 115; CHECK-LABEL: vfma_vv_v8f16_unmasked: 116; CHECK: # %bb.0: 117; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 118; CHECK-NEXT: vfmadd.vv v8, v9, v10 119; CHECK-NEXT: ret 120 %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 true), i32 %evl) 121 ret <8 x half> %v 122} 123 124define <8 x half> @vfma_vf_v8f16(<8 x half> %va, half %b, <8 x half> %vc, <8 x i1> %m, i32 zeroext %evl) { 125; CHECK-LABEL: vfma_vf_v8f16: 126; CHECK: # %bb.0: 127; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 128; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 129; CHECK-NEXT: ret 130 %elt.head = insertelement <8 x half> poison, half %b, i32 0 131 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer 132 %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl) 133 ret <8 x half> %v 134} 135 136define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %vc, i32 zeroext %evl) { 137; CHECK-LABEL: vfma_vf_v8f16_unmasked: 138; CHECK: # %bb.0: 139; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 140; CHECK-NEXT: vfmadd.vf v8, fa0, v9 141; CHECK-NEXT: ret 142 %elt.head = insertelement <8 x half> poison, half %b, i32 0 143 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer 144 %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> splat (i1 true), i32 %evl) 145 ret <8 x half> %v 146} 147 148declare <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32) 149 150define <16 x half> @vfma_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { 151; CHECK-LABEL: vfma_vv_v16f16: 152; CHECK: # %bb.0: 153; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 154; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t 155; CHECK-NEXT: vmv.v.v v8, v10 156; CHECK-NEXT: ret 157 %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl) 158 ret <16 x half> %v 159} 160 161define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16 x half> %c, i32 zeroext %evl) { 162; CHECK-LABEL: vfma_vv_v16f16_unmasked: 163; CHECK: # %bb.0: 164; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 165; CHECK-NEXT: vfmadd.vv v8, v10, v12 166; CHECK-NEXT: ret 167 %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 true), i32 %evl) 168 ret <16 x half> %v 169} 170 171define <16 x half> @vfma_vf_v16f16(<16 x half> %va, half %b, <16 x half> %vc, <16 x i1> %m, i32 zeroext %evl) { 172; CHECK-LABEL: vfma_vf_v16f16: 173; CHECK: # %bb.0: 174; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 175; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t 176; CHECK-NEXT: ret 177 %elt.head = insertelement <16 x half> poison, half %b, i32 0 178 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer 179 %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl) 180 ret <16 x half> %v 181} 182 183define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half> %vc, i32 zeroext %evl) { 184; CHECK-LABEL: vfma_vf_v16f16_unmasked: 185; CHECK: # %bb.0: 186; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 187; CHECK-NEXT: vfmadd.vf v8, fa0, v10 188; CHECK-NEXT: ret 189 %elt.head = insertelement <16 x half> poison, half %b, i32 0 190 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer 191 %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> splat (i1 true), i32 %evl) 192 ret <16 x half> %v 193} 194 195declare <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32) 196 197define <2 x float> @vfma_vv_v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { 198; CHECK-LABEL: vfma_vv_v2f32: 199; CHECK: # %bb.0: 200; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 201; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t 202; CHECK-NEXT: vmv1r.v v8, v9 203; CHECK-NEXT: ret 204 %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl) 205 ret <2 x float> %v 206} 207 208define <2 x float> @vfma_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, <2 x float> %c, i32 zeroext %evl) { 209; CHECK-LABEL: vfma_vv_v2f32_unmasked: 210; CHECK: # %bb.0: 211; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 212; CHECK-NEXT: vfmadd.vv v8, v9, v10 213; CHECK-NEXT: ret 214 %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 true), i32 %evl) 215 ret <2 x float> %v 216} 217 218define <2 x float> @vfma_vf_v2f32(<2 x float> %va, float %b, <2 x float> %vc, <2 x i1> %m, i32 zeroext %evl) { 219; CHECK-LABEL: vfma_vf_v2f32: 220; CHECK: # %bb.0: 221; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 222; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 223; CHECK-NEXT: ret 224 %elt.head = insertelement <2 x float> poison, float %b, i32 0 225 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer 226 %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl) 227 ret <2 x float> %v 228} 229 230define <2 x float> @vfma_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x float> %vc, i32 zeroext %evl) { 231; CHECK-LABEL: vfma_vf_v2f32_unmasked: 232; CHECK: # %bb.0: 233; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 234; CHECK-NEXT: vfmadd.vf v8, fa0, v9 235; CHECK-NEXT: ret 236 %elt.head = insertelement <2 x float> poison, float %b, i32 0 237 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer 238 %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> splat (i1 true), i32 %evl) 239 ret <2 x float> %v 240} 241 242declare <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32) 243 244define <4 x float> @vfma_vv_v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { 245; CHECK-LABEL: vfma_vv_v4f32: 246; CHECK: # %bb.0: 247; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 248; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t 249; CHECK-NEXT: vmv.v.v v8, v9 250; CHECK-NEXT: ret 251 %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl) 252 ret <4 x float> %v 253} 254 255define <4 x float> @vfma_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, <4 x float> %c, i32 zeroext %evl) { 256; CHECK-LABEL: vfma_vv_v4f32_unmasked: 257; CHECK: # %bb.0: 258; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 259; CHECK-NEXT: vfmadd.vv v8, v9, v10 260; CHECK-NEXT: ret 261 %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 true), i32 %evl) 262 ret <4 x float> %v 263} 264 265define <4 x float> @vfma_vf_v4f32(<4 x float> %va, float %b, <4 x float> %vc, <4 x i1> %m, i32 zeroext %evl) { 266; CHECK-LABEL: vfma_vf_v4f32: 267; CHECK: # %bb.0: 268; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 269; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 270; CHECK-NEXT: ret 271 %elt.head = insertelement <4 x float> poison, float %b, i32 0 272 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer 273 %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl) 274 ret <4 x float> %v 275} 276 277define <4 x float> @vfma_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x float> %vc, i32 zeroext %evl) { 278; CHECK-LABEL: vfma_vf_v4f32_unmasked: 279; CHECK: # %bb.0: 280; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 281; CHECK-NEXT: vfmadd.vf v8, fa0, v9 282; CHECK-NEXT: ret 283 %elt.head = insertelement <4 x float> poison, float %b, i32 0 284 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer 285 %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> splat (i1 true), i32 %evl) 286 ret <4 x float> %v 287} 288 289declare <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) 290 291define <8 x float> @vfma_vv_v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { 292; CHECK-LABEL: vfma_vv_v8f32: 293; CHECK: # %bb.0: 294; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 295; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t 296; CHECK-NEXT: vmv.v.v v8, v10 297; CHECK-NEXT: ret 298 %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl) 299 ret <8 x float> %v 300} 301 302define <8 x float> @vfma_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, <8 x float> %c, i32 zeroext %evl) { 303; CHECK-LABEL: vfma_vv_v8f32_unmasked: 304; CHECK: # %bb.0: 305; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 306; CHECK-NEXT: vfmadd.vv v8, v10, v12 307; CHECK-NEXT: ret 308 %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 true), i32 %evl) 309 ret <8 x float> %v 310} 311 312define <8 x float> @vfma_vf_v8f32(<8 x float> %va, float %b, <8 x float> %vc, <8 x i1> %m, i32 zeroext %evl) { 313; CHECK-LABEL: vfma_vf_v8f32: 314; CHECK: # %bb.0: 315; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 316; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t 317; CHECK-NEXT: ret 318 %elt.head = insertelement <8 x float> poison, float %b, i32 0 319 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer 320 %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl) 321 ret <8 x float> %v 322} 323 324define <8 x float> @vfma_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x float> %vc, i32 zeroext %evl) { 325; CHECK-LABEL: vfma_vf_v8f32_unmasked: 326; CHECK: # %bb.0: 327; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 328; CHECK-NEXT: vfmadd.vf v8, fa0, v10 329; CHECK-NEXT: ret 330 %elt.head = insertelement <8 x float> poison, float %b, i32 0 331 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer 332 %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> splat (i1 true), i32 %evl) 333 ret <8 x float> %v 334} 335 336declare <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32) 337 338define <16 x float> @vfma_vv_v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { 339; CHECK-LABEL: vfma_vv_v16f32: 340; CHECK: # %bb.0: 341; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 342; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t 343; CHECK-NEXT: vmv.v.v v8, v12 344; CHECK-NEXT: ret 345 %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl) 346 ret <16 x float> %v 347} 348 349define <16 x float> @vfma_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, <16 x float> %c, i32 zeroext %evl) { 350; CHECK-LABEL: vfma_vv_v16f32_unmasked: 351; CHECK: # %bb.0: 352; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 353; CHECK-NEXT: vfmadd.vv v8, v12, v16 354; CHECK-NEXT: ret 355 %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 true), i32 %evl) 356 ret <16 x float> %v 357} 358 359define <16 x float> @vfma_vf_v16f32(<16 x float> %va, float %b, <16 x float> %vc, <16 x i1> %m, i32 zeroext %evl) { 360; CHECK-LABEL: vfma_vf_v16f32: 361; CHECK: # %bb.0: 362; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 363; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t 364; CHECK-NEXT: ret 365 %elt.head = insertelement <16 x float> poison, float %b, i32 0 366 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer 367 %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl) 368 ret <16 x float> %v 369} 370 371define <16 x float> @vfma_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x float> %vc, i32 zeroext %evl) { 372; CHECK-LABEL: vfma_vf_v16f32_unmasked: 373; CHECK: # %bb.0: 374; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 375; CHECK-NEXT: vfmadd.vf v8, fa0, v12 376; CHECK-NEXT: ret 377 %elt.head = insertelement <16 x float> poison, float %b, i32 0 378 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer 379 %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> splat (i1 true), i32 %evl) 380 ret <16 x float> %v 381} 382 383declare <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32) 384 385define <2 x double> @vfma_vv_v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { 386; CHECK-LABEL: vfma_vv_v2f64: 387; CHECK: # %bb.0: 388; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 389; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t 390; CHECK-NEXT: vmv.v.v v8, v9 391; CHECK-NEXT: ret 392 %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl) 393 ret <2 x double> %v 394} 395 396define <2 x double> @vfma_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, <2 x double> %c, i32 zeroext %evl) { 397; CHECK-LABEL: vfma_vv_v2f64_unmasked: 398; CHECK: # %bb.0: 399; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 400; CHECK-NEXT: vfmadd.vv v8, v9, v10 401; CHECK-NEXT: ret 402 %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 true), i32 %evl) 403 ret <2 x double> %v 404} 405 406define <2 x double> @vfma_vf_v2f64(<2 x double> %va, double %b, <2 x double> %vc, <2 x i1> %m, i32 zeroext %evl) { 407; CHECK-LABEL: vfma_vf_v2f64: 408; CHECK: # %bb.0: 409; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 410; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 411; CHECK-NEXT: ret 412 %elt.head = insertelement <2 x double> poison, double %b, i32 0 413 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer 414 %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl) 415 ret <2 x double> %v 416} 417 418define <2 x double> @vfma_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x double> %vc, i32 zeroext %evl) { 419; CHECK-LABEL: vfma_vf_v2f64_unmasked: 420; CHECK: # %bb.0: 421; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 422; CHECK-NEXT: vfmadd.vf v8, fa0, v9 423; CHECK-NEXT: ret 424 %elt.head = insertelement <2 x double> poison, double %b, i32 0 425 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer 426 %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> splat (i1 true), i32 %evl) 427 ret <2 x double> %v 428} 429 430declare <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32) 431 432define <4 x double> @vfma_vv_v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { 433; CHECK-LABEL: vfma_vv_v4f64: 434; CHECK: # %bb.0: 435; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 436; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t 437; CHECK-NEXT: vmv.v.v v8, v10 438; CHECK-NEXT: ret 439 %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl) 440 ret <4 x double> %v 441} 442 443define <4 x double> @vfma_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, <4 x double> %c, i32 zeroext %evl) { 444; CHECK-LABEL: vfma_vv_v4f64_unmasked: 445; CHECK: # %bb.0: 446; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 447; CHECK-NEXT: vfmadd.vv v8, v10, v12 448; CHECK-NEXT: ret 449 %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 true), i32 %evl) 450 ret <4 x double> %v 451} 452 453define <4 x double> @vfma_vf_v4f64(<4 x double> %va, double %b, <4 x double> %vc, <4 x i1> %m, i32 zeroext %evl) { 454; CHECK-LABEL: vfma_vf_v4f64: 455; CHECK: # %bb.0: 456; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 457; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t 458; CHECK-NEXT: ret 459 %elt.head = insertelement <4 x double> poison, double %b, i32 0 460 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer 461 %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl) 462 ret <4 x double> %v 463} 464 465define <4 x double> @vfma_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x double> %vc, i32 zeroext %evl) { 466; CHECK-LABEL: vfma_vf_v4f64_unmasked: 467; CHECK: # %bb.0: 468; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 469; CHECK-NEXT: vfmadd.vf v8, fa0, v10 470; CHECK-NEXT: ret 471 %elt.head = insertelement <4 x double> poison, double %b, i32 0 472 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer 473 %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> splat (i1 true), i32 %evl) 474 ret <4 x double> %v 475} 476 477declare <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32) 478 479define <8 x double> @vfma_vv_v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { 480; CHECK-LABEL: vfma_vv_v8f64: 481; CHECK: # %bb.0: 482; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 483; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t 484; CHECK-NEXT: vmv.v.v v8, v12 485; CHECK-NEXT: ret 486 %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl) 487 ret <8 x double> %v 488} 489 490define <8 x double> @vfma_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, <8 x double> %c, i32 zeroext %evl) { 491; CHECK-LABEL: vfma_vv_v8f64_unmasked: 492; CHECK: # %bb.0: 493; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 494; CHECK-NEXT: vfmadd.vv v8, v12, v16 495; CHECK-NEXT: ret 496 %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 true), i32 %evl) 497 ret <8 x double> %v 498} 499 500define <8 x double> @vfma_vf_v8f64(<8 x double> %va, double %b, <8 x double> %vc, <8 x i1> %m, i32 zeroext %evl) { 501; CHECK-LABEL: vfma_vf_v8f64: 502; CHECK: # %bb.0: 503; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 504; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t 505; CHECK-NEXT: ret 506 %elt.head = insertelement <8 x double> poison, double %b, i32 0 507 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer 508 %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl) 509 ret <8 x double> %v 510} 511 512define <8 x double> @vfma_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x double> %vc, i32 zeroext %evl) { 513; CHECK-LABEL: vfma_vf_v8f64_unmasked: 514; CHECK: # %bb.0: 515; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 516; CHECK-NEXT: vfmadd.vf v8, fa0, v12 517; CHECK-NEXT: ret 518 %elt.head = insertelement <8 x double> poison, double %b, i32 0 519 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer 520 %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> splat (i1 true), i32 %evl) 521 ret <8 x double> %v 522} 523 524declare <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double>, <15 x double>, <15 x double>, <15 x i1>, i32) 525 526define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) { 527; CHECK-LABEL: vfma_vv_v15f64: 528; CHECK: # %bb.0: 529; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 530; CHECK-NEXT: vle64.v v24, (a0) 531; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 532; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t 533; CHECK-NEXT: vmv.v.v v8, v16 534; CHECK-NEXT: ret 535 %v = call <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl) 536 ret <15 x double> %v 537} 538 539define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) { 540; CHECK-LABEL: vfma_vv_v15f64_unmasked: 541; CHECK: # %bb.0: 542; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 543; CHECK-NEXT: vle64.v v24, (a0) 544; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 545; CHECK-NEXT: vfmadd.vv v8, v16, v24 546; CHECK-NEXT: ret 547 %v = call <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> splat (i1 true), i32 %evl) 548 ret <15 x double> %v 549} 550 551declare <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double>, <16 x double>, <16 x double>, <16 x i1>, i32) 552 553define <16 x double> @vfma_vv_v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 zeroext %evl) { 554; CHECK-LABEL: vfma_vv_v16f64: 555; CHECK: # %bb.0: 556; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 557; CHECK-NEXT: vle64.v v24, (a0) 558; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 559; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t 560; CHECK-NEXT: vmv.v.v v8, v16 561; CHECK-NEXT: ret 562 %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl) 563 ret <16 x double> %v 564} 565 566define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %b, <16 x double> %c, i32 zeroext %evl) { 567; CHECK-LABEL: vfma_vv_v16f64_unmasked: 568; CHECK: # %bb.0: 569; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 570; CHECK-NEXT: vle64.v v24, (a0) 571; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 572; CHECK-NEXT: vfmadd.vv v8, v16, v24 573; CHECK-NEXT: ret 574 %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> splat (i1 true), i32 %evl) 575 ret <16 x double> %v 576} 577 578define <16 x double> @vfma_vf_v16f64(<16 x double> %va, double %b, <16 x double> %vc, <16 x i1> %m, i32 zeroext %evl) { 579; CHECK-LABEL: vfma_vf_v16f64: 580; CHECK: # %bb.0: 581; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 582; CHECK-NEXT: vfmadd.vf v8, fa0, v16, v0.t 583; CHECK-NEXT: ret 584 %elt.head = insertelement <16 x double> poison, double %b, i32 0 585 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer 586 %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl) 587 ret <16 x double> %v 588} 589 590define <16 x double> @vfma_vf_v16f64_unmasked(<16 x double> %va, double %b, <16 x double> %vc, i32 zeroext %evl) { 591; CHECK-LABEL: vfma_vf_v16f64_unmasked: 592; CHECK: # %bb.0: 593; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 594; CHECK-NEXT: vfmadd.vf v8, fa0, v16 595; CHECK-NEXT: ret 596 %elt.head = insertelement <16 x double> poison, double %b, i32 0 597 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer 598 %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> splat (i1 true), i32 %evl) 599 ret <16 x double> %v 600} 601 602declare <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double>, <32 x double>, <32 x double>, <32 x i1>, i32) 603 604define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 zeroext %evl) { 605; CHECK-LABEL: vfma_vv_v32f64: 606; CHECK: # %bb.0: 607; CHECK-NEXT: addi sp, sp, -16 608; CHECK-NEXT: .cfi_def_cfa_offset 16 609; CHECK-NEXT: csrr a1, vlenb 610; CHECK-NEXT: slli a1, a1, 5 611; CHECK-NEXT: sub sp, sp, a1 612; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 613; CHECK-NEXT: csrr a1, vlenb 614; CHECK-NEXT: li a3, 24 615; CHECK-NEXT: mul a1, a1, a3 616; CHECK-NEXT: add a1, sp, a1 617; CHECK-NEXT: addi a1, a1, 16 618; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 619; CHECK-NEXT: csrr a1, vlenb 620; CHECK-NEXT: slli a1, a1, 4 621; CHECK-NEXT: add a1, sp, a1 622; CHECK-NEXT: addi a1, a1, 16 623; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 624; CHECK-NEXT: addi a1, a2, 128 625; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 626; CHECK-NEXT: vle64.v v24, (a2) 627; CHECK-NEXT: addi a2, a0, 128 628; CHECK-NEXT: vle64.v v8, (a1) 629; CHECK-NEXT: csrr a1, vlenb 630; CHECK-NEXT: slli a1, a1, 3 631; CHECK-NEXT: add a1, sp, a1 632; CHECK-NEXT: addi a1, a1, 16 633; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 634; CHECK-NEXT: vle64.v v8, (a2) 635; CHECK-NEXT: addi a1, sp, 16 636; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 637; CHECK-NEXT: vle64.v v8, (a0) 638; CHECK-NEXT: li a1, 16 639; CHECK-NEXT: mv a0, a4 640; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 641; CHECK-NEXT: vslidedown.vi v7, v0, 2 642; CHECK-NEXT: bltu a4, a1, .LBB50_2 643; CHECK-NEXT: # %bb.1: 644; CHECK-NEXT: li a0, 16 645; CHECK-NEXT: .LBB50_2: 646; CHECK-NEXT: csrr a1, vlenb 647; CHECK-NEXT: slli a1, a1, 4 648; CHECK-NEXT: add a1, sp, a1 649; CHECK-NEXT: addi a1, a1, 16 650; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 651; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 652; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t 653; CHECK-NEXT: csrr a0, vlenb 654; CHECK-NEXT: slli a0, a0, 4 655; CHECK-NEXT: add a0, sp, a0 656; CHECK-NEXT: addi a0, a0, 16 657; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 658; CHECK-NEXT: addi a0, a4, -16 659; CHECK-NEXT: sltu a1, a4, a0 660; CHECK-NEXT: addi a1, a1, -1 661; CHECK-NEXT: and a0, a1, a0 662; CHECK-NEXT: vmv1r.v v0, v7 663; CHECK-NEXT: csrr a1, vlenb 664; CHECK-NEXT: li a2, 24 665; CHECK-NEXT: mul a1, a1, a2 666; CHECK-NEXT: add a1, sp, a1 667; CHECK-NEXT: addi a1, a1, 16 668; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 669; CHECK-NEXT: csrr a1, vlenb 670; CHECK-NEXT: slli a1, a1, 3 671; CHECK-NEXT: add a1, sp, a1 672; CHECK-NEXT: addi a1, a1, 16 673; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 674; CHECK-NEXT: addi a1, sp, 16 675; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 676; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 677; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t 678; CHECK-NEXT: vmv.v.v v16, v8 679; CHECK-NEXT: csrr a0, vlenb 680; CHECK-NEXT: slli a0, a0, 4 681; CHECK-NEXT: add a0, sp, a0 682; CHECK-NEXT: addi a0, a0, 16 683; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 684; CHECK-NEXT: csrr a0, vlenb 685; CHECK-NEXT: slli a0, a0, 5 686; CHECK-NEXT: add sp, sp, a0 687; CHECK-NEXT: .cfi_def_cfa sp, 16 688; CHECK-NEXT: addi sp, sp, 16 689; CHECK-NEXT: .cfi_def_cfa_offset 0 690; CHECK-NEXT: ret 691 %v = call <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl) 692 ret <32 x double> %v 693} 694 695define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %b, <32 x double> %c, i32 zeroext %evl) { 696; CHECK-LABEL: vfma_vv_v32f64_unmasked: 697; CHECK: # %bb.0: 698; CHECK-NEXT: addi sp, sp, -16 699; CHECK-NEXT: .cfi_def_cfa_offset 16 700; CHECK-NEXT: csrr a1, vlenb 701; CHECK-NEXT: li a3, 24 702; CHECK-NEXT: mul a1, a1, a3 703; CHECK-NEXT: sub sp, sp, a1 704; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 705; CHECK-NEXT: csrr a1, vlenb 706; CHECK-NEXT: slli a1, a1, 4 707; CHECK-NEXT: add a1, sp, a1 708; CHECK-NEXT: addi a1, a1, 16 709; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 710; CHECK-NEXT: csrr a1, vlenb 711; CHECK-NEXT: slli a1, a1, 3 712; CHECK-NEXT: add a1, sp, a1 713; CHECK-NEXT: addi a1, a1, 16 714; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 715; CHECK-NEXT: addi a1, a2, 128 716; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 717; CHECK-NEXT: vle64.v v16, (a2) 718; CHECK-NEXT: addi a2, a0, 128 719; CHECK-NEXT: vle64.v v8, (a1) 720; CHECK-NEXT: addi a1, sp, 16 721; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 722; CHECK-NEXT: vle64.v v24, (a2) 723; CHECK-NEXT: vle64.v v0, (a0) 724; CHECK-NEXT: li a1, 16 725; CHECK-NEXT: mv a0, a4 726; CHECK-NEXT: bltu a4, a1, .LBB51_2 727; CHECK-NEXT: # %bb.1: 728; CHECK-NEXT: li a0, 16 729; CHECK-NEXT: .LBB51_2: 730; CHECK-NEXT: csrr a1, vlenb 731; CHECK-NEXT: slli a1, a1, 3 732; CHECK-NEXT: add a1, sp, a1 733; CHECK-NEXT: addi a1, a1, 16 734; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 735; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 736; CHECK-NEXT: vfmadd.vv v0, v8, v16 737; CHECK-NEXT: addi a0, a4, -16 738; CHECK-NEXT: sltu a1, a4, a0 739; CHECK-NEXT: addi a1, a1, -1 740; CHECK-NEXT: and a0, a1, a0 741; CHECK-NEXT: csrr a1, vlenb 742; CHECK-NEXT: slli a1, a1, 4 743; CHECK-NEXT: add a1, sp, a1 744; CHECK-NEXT: addi a1, a1, 16 745; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 746; CHECK-NEXT: addi a1, sp, 16 747; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 748; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 749; CHECK-NEXT: vfmadd.vv v24, v16, v8 750; CHECK-NEXT: vmv8r.v v8, v0 751; CHECK-NEXT: vmv.v.v v16, v24 752; CHECK-NEXT: csrr a0, vlenb 753; CHECK-NEXT: li a1, 24 754; CHECK-NEXT: mul a0, a0, a1 755; CHECK-NEXT: add sp, sp, a0 756; CHECK-NEXT: .cfi_def_cfa sp, 16 757; CHECK-NEXT: addi sp, sp, 16 758; CHECK-NEXT: .cfi_def_cfa_offset 0 759; CHECK-NEXT: ret 760 %v = call <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> splat (i1 true), i32 %evl) 761 ret <32 x double> %v 762} 763