1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 \ 9; RUN: -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 10; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 11; RUN: --check-prefixes=CHECK,ZVFHMIN 12; RUN: llc -mtriple=riscv64 \ 13; RUN: -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 14; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 15; RUN: --check-prefixes=CHECK,ZVFHMIN 16 17; This tests a mix of vfmacc and vfmadd by using different operand orders to 18; trigger commuting in TwoAddressInstructionPass. 19 20define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) strictfp { 21; CHECK-LABEL: vfmadd_vv_nxv1bf16: 22; CHECK: # %bb.0: 23; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 24; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 25; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 26; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 27; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 28; CHECK-NEXT: vfmadd.vv v9, v10, v11 29; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 30; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 31; CHECK-NEXT: ret 32 %vd = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict") 33 ret <vscale x 1 x bfloat> %vd 34} 35 36define <vscale x 1 x bfloat> @vfmadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, bfloat %c) strictfp { 37; CHECK-LABEL: vfmadd_vf_nxv1bf16: 38; CHECK: # %bb.0: 39; CHECK-NEXT: fmv.x.h a0, fa0 40; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma 41; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 42; CHECK-NEXT: vmv.v.x v9, a0 43; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 44; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 45; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 46; CHECK-NEXT: vfmadd.vv v12, v11, v10 47; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 48; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 49; CHECK-NEXT: ret 50 %head = insertelement <vscale x 1 x bfloat> poison, bfloat %c, i32 0 51 %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer 52 %vd = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 53 ret <vscale x 1 x bfloat> %vd 54} 55 56 57define <vscale x 2 x bfloat> @vfmadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc) strictfp { 58; CHECK-LABEL: vfmadd_vv_nxv2bf16: 59; CHECK: # %bb.0: 60; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 61; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 62; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v10 63; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 64; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 65; CHECK-NEXT: vfmadd.vv v10, v9, v11 66; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 67; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 68; CHECK-NEXT: ret 69 %vd = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 70 ret <vscale x 2 x bfloat> %vd 71} 72 73define <vscale x 2 x bfloat> @vfmadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, bfloat %c) strictfp { 74; CHECK-LABEL: vfmadd_vf_nxv2bf16: 75; CHECK: # %bb.0: 76; CHECK-NEXT: fmv.x.h a0, fa0 77; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma 78; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 79; CHECK-NEXT: vmv.v.x v8, a0 80; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 81; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 82; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 83; CHECK-NEXT: vfmadd.vv v9, v11, v10 84; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 85; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 86; CHECK-NEXT: ret 87 %head = insertelement <vscale x 2 x bfloat> poison, bfloat %c, i32 0 88 %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer 89 %vd = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fma.nxv2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %splat, <vscale x 2 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 90 ret <vscale x 2 x bfloat> %vd 91} 92 93 94define <vscale x 4 x bfloat> @vfmadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc) strictfp { 95; CHECK-LABEL: vfmadd_vv_nxv4bf16: 96; CHECK: # %bb.0: 97; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 98; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 99; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 100; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 101; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 102; CHECK-NEXT: vfmadd.vv v14, v10, v12 103; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 104; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 105; CHECK-NEXT: ret 106 %vd = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fma.nxv4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict") 107 ret <vscale x 4 x bfloat> %vd 108} 109 110define <vscale x 4 x bfloat> @vfmadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, bfloat %c) strictfp { 111; CHECK-LABEL: vfmadd_vf_nxv4bf16: 112; CHECK: # %bb.0: 113; CHECK-NEXT: fmv.x.h a0, fa0 114; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma 115; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 116; CHECK-NEXT: vmv.v.x v9, a0 117; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 118; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 119; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 120; CHECK-NEXT: vfmadd.vv v14, v12, v10 121; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 122; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 123; CHECK-NEXT: ret 124 %head = insertelement <vscale x 4 x bfloat> poison, bfloat %c, i32 0 125 %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer 126 %vd = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 127 ret <vscale x 4 x bfloat> %vd 128} 129 130 131define <vscale x 8 x bfloat> @vfmadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc) strictfp { 132; CHECK-LABEL: vfmadd_vv_nxv8bf16: 133; CHECK: # %bb.0: 134; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 135; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 136; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v12 137; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 138; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 139; CHECK-NEXT: vfmadd.vv v12, v20, v16 140; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 141; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 142; CHECK-NEXT: ret 143 %vd = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 144 ret <vscale x 8 x bfloat> %vd 145} 146 147define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, bfloat %c) strictfp { 148; CHECK-LABEL: vfmadd_vf_nxv8bf16: 149; CHECK: # %bb.0: 150; CHECK-NEXT: fmv.x.h a0, fa0 151; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 152; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 153; CHECK-NEXT: vmv.v.x v8, a0 154; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 155; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8 156; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 157; CHECK-NEXT: vfmadd.vv v20, v16, v12 158; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 159; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20 160; CHECK-NEXT: ret 161 %head = insertelement <vscale x 8 x bfloat> poison, bfloat %c, i32 0 162 %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer 163 %vd = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 164 ret <vscale x 8 x bfloat> %vd 165} 166 167 168define <vscale x 16 x bfloat> @vfmadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc) strictfp { 169; CHECK-LABEL: vfmadd_vv_nxv16bf16: 170; CHECK: # %bb.0: 171; CHECK-NEXT: addi sp, sp, -16 172; CHECK-NEXT: .cfi_def_cfa_offset 16 173; CHECK-NEXT: csrr a0, vlenb 174; CHECK-NEXT: slli a0, a0, 2 175; CHECK-NEXT: sub sp, sp, a0 176; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb 177; CHECK-NEXT: addi a0, sp, 16 178; CHECK-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill 179; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 180; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 181; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 182; CHECK-NEXT: addi a0, sp, 16 183; CHECK-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload 184; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 185; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 186; CHECK-NEXT: vfmadd.vv v24, v0, v16 187; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 188; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 189; CHECK-NEXT: csrr a0, vlenb 190; CHECK-NEXT: slli a0, a0, 2 191; CHECK-NEXT: add sp, sp, a0 192; CHECK-NEXT: .cfi_def_cfa sp, 16 193; CHECK-NEXT: addi sp, sp, 16 194; CHECK-NEXT: .cfi_def_cfa_offset 0 195; CHECK-NEXT: ret 196 %vd = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fma.nxv16bf16(<vscale x 16 x bfloat> %vc, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 197 ret <vscale x 16 x bfloat> %vd 198} 199 200define <vscale x 16 x bfloat> @vfmadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, bfloat %c) strictfp { 201; CHECK-LABEL: vfmadd_vf_nxv16bf16: 202; CHECK: # %bb.0: 203; CHECK-NEXT: fmv.x.h a0, fa0 204; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma 205; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 206; CHECK-NEXT: vmv.v.x v12, a0 207; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 208; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12 209; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 210; CHECK-NEXT: vfmadd.vv v0, v24, v16 211; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 212; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 213; CHECK-NEXT: ret 214 %head = insertelement <vscale x 16 x bfloat> poison, bfloat %c, i32 0 215 %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer 216 %vd = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 217 ret <vscale x 16 x bfloat> %vd 218} 219 220 221define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc) strictfp { 222; CHECK-LABEL: vfmadd_vv_nxv32bf16: 223; CHECK: # %bb.0: 224; CHECK-NEXT: addi sp, sp, -16 225; CHECK-NEXT: .cfi_def_cfa_offset 16 226; CHECK-NEXT: csrr a1, vlenb 227; CHECK-NEXT: slli a1, a1, 5 228; CHECK-NEXT: sub sp, sp, a1 229; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 230; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma 231; CHECK-NEXT: vmv8r.v v0, v16 232; CHECK-NEXT: addi a1, sp, 16 233; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 234; CHECK-NEXT: vmv8r.v v16, v8 235; CHECK-NEXT: vl8re16.v v8, (a0) 236; CHECK-NEXT: csrr a0, vlenb 237; CHECK-NEXT: slli a0, a0, 4 238; CHECK-NEXT: add a0, sp, a0 239; CHECK-NEXT: addi a0, a0, 16 240; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 241; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 242; CHECK-NEXT: csrr a0, vlenb 243; CHECK-NEXT: li a1, 24 244; CHECK-NEXT: mul a0, a0, a1 245; CHECK-NEXT: add a0, sp, a0 246; CHECK-NEXT: addi a0, a0, 16 247; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 248; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 249; CHECK-NEXT: csrr a0, vlenb 250; CHECK-NEXT: slli a0, a0, 3 251; CHECK-NEXT: add a0, sp, a0 252; CHECK-NEXT: addi a0, a0, 16 253; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 254; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 255; CHECK-NEXT: csrr a0, vlenb 256; CHECK-NEXT: li a1, 24 257; CHECK-NEXT: mul a0, a0, a1 258; CHECK-NEXT: add a0, sp, a0 259; CHECK-NEXT: addi a0, a0, 16 260; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 261; CHECK-NEXT: csrr a0, vlenb 262; CHECK-NEXT: slli a0, a0, 3 263; CHECK-NEXT: add a0, sp, a0 264; CHECK-NEXT: addi a0, a0, 16 265; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 266; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 267; CHECK-NEXT: vfmadd.vv v0, v8, v24 268; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 269; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 270; CHECK-NEXT: csrr a0, vlenb 271; CHECK-NEXT: li a1, 24 272; CHECK-NEXT: mul a0, a0, a1 273; CHECK-NEXT: add a0, sp, a0 274; CHECK-NEXT: addi a0, a0, 16 275; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 276; CHECK-NEXT: addi a0, sp, 16 277; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 278; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 279; CHECK-NEXT: csrr a0, vlenb 280; CHECK-NEXT: slli a0, a0, 4 281; CHECK-NEXT: add a0, sp, a0 282; CHECK-NEXT: addi a0, a0, 16 283; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 284; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 285; CHECK-NEXT: csrr a0, vlenb 286; CHECK-NEXT: li a1, 24 287; CHECK-NEXT: mul a0, a0, a1 288; CHECK-NEXT: add a0, sp, a0 289; CHECK-NEXT: addi a0, a0, 16 290; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 291; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 292; CHECK-NEXT: vfmadd.vv v16, v8, v24 293; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 294; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 295; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 296; CHECK-NEXT: csrr a0, vlenb 297; CHECK-NEXT: slli a0, a0, 5 298; CHECK-NEXT: add sp, sp, a0 299; CHECK-NEXT: .cfi_def_cfa sp, 16 300; CHECK-NEXT: addi sp, sp, 16 301; CHECK-NEXT: .cfi_def_cfa_offset 0 302; CHECK-NEXT: ret 303 %vd = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fma.nxv32bf16(<vscale x 32 x bfloat> %vc, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 304 ret <vscale x 32 x bfloat> %vd 305} 306 307define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, bfloat %c) strictfp { 308; CHECK-LABEL: vfmadd_vf_nxv32bf16: 309; CHECK: # %bb.0: 310; CHECK-NEXT: addi sp, sp, -16 311; CHECK-NEXT: .cfi_def_cfa_offset 16 312; CHECK-NEXT: csrr a0, vlenb 313; CHECK-NEXT: li a1, 24 314; CHECK-NEXT: mul a0, a0, a1 315; CHECK-NEXT: sub sp, sp, a0 316; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 317; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 318; CHECK-NEXT: vmv8r.v v24, v16 319; CHECK-NEXT: csrr a0, vlenb 320; CHECK-NEXT: slli a0, a0, 3 321; CHECK-NEXT: add a0, sp, a0 322; CHECK-NEXT: addi a0, a0, 16 323; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 324; CHECK-NEXT: fmv.x.h a0, fa0 325; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 326; CHECK-NEXT: addi a1, sp, 16 327; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 328; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 329; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma 330; CHECK-NEXT: vmv.v.x v24, a0 331; CHECK-NEXT: csrr a0, vlenb 332; CHECK-NEXT: slli a0, a0, 4 333; CHECK-NEXT: add a0, sp, a0 334; CHECK-NEXT: addi a0, a0, 16 335; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 336; CHECK-NEXT: csrr a0, vlenb 337; CHECK-NEXT: slli a0, a0, 4 338; CHECK-NEXT: add a0, sp, a0 339; CHECK-NEXT: addi a0, a0, 16 340; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 341; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 342; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 343; CHECK-NEXT: addi a0, sp, 16 344; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 345; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 346; CHECK-NEXT: vfmadd.vv v24, v16, v0 347; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 348; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 349; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 350; CHECK-NEXT: csrr a0, vlenb 351; CHECK-NEXT: slli a0, a0, 3 352; CHECK-NEXT: add a0, sp, a0 353; CHECK-NEXT: addi a0, a0, 16 354; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 355; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 356; CHECK-NEXT: csrr a0, vlenb 357; CHECK-NEXT: slli a0, a0, 4 358; CHECK-NEXT: add a0, sp, a0 359; CHECK-NEXT: addi a0, a0, 16 360; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 361; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v4 362; CHECK-NEXT: addi a0, sp, 16 363; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 364; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 365; CHECK-NEXT: vfmadd.vv v16, v8, v0 366; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 367; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 368; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 369; CHECK-NEXT: csrr a0, vlenb 370; CHECK-NEXT: li a1, 24 371; CHECK-NEXT: mul a0, a0, a1 372; CHECK-NEXT: add sp, sp, a0 373; CHECK-NEXT: .cfi_def_cfa sp, 16 374; CHECK-NEXT: addi sp, sp, 16 375; CHECK-NEXT: .cfi_def_cfa_offset 0 376; CHECK-NEXT: ret 377 %head = insertelement <vscale x 32 x bfloat> poison, bfloat %c, i32 0 378 %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer 379 %vd = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fma.nxv32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %splat, <vscale x 32 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 380 ret <vscale x 32 x bfloat> %vd 381} 382 383declare <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata) 384 385define <vscale x 1 x half> @vfmadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) strictfp { 386; ZVFH-LABEL: vfmadd_vv_nxv1f16: 387; ZVFH: # %bb.0: 388; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 389; ZVFH-NEXT: vfmadd.vv v8, v9, v10 390; ZVFH-NEXT: ret 391; 392; ZVFHMIN-LABEL: vfmadd_vv_nxv1f16: 393; ZVFHMIN: # %bb.0: 394; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 395; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 396; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 397; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 398; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 399; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11 400; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 401; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 402; ZVFHMIN-NEXT: ret 403 %vd = call <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict") 404 ret <vscale x 1 x half> %vd 405} 406 407define <vscale x 1 x half> @vfmadd_vf_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, half %c) strictfp { 408; ZVFH-LABEL: vfmadd_vf_nxv1f16: 409; ZVFH: # %bb.0: 410; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 411; ZVFH-NEXT: vfmadd.vf v8, fa0, v9 412; ZVFH-NEXT: ret 413; 414; ZVFHMIN-LABEL: vfmadd_vf_nxv1f16: 415; ZVFHMIN: # %bb.0: 416; ZVFHMIN-NEXT: fmv.x.h a0, fa0 417; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma 418; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 419; ZVFHMIN-NEXT: vmv.v.x v9, a0 420; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 421; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 422; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 423; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 424; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 425; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 426; ZVFHMIN-NEXT: ret 427 %head = insertelement <vscale x 1 x half> poison, half %c, i32 0 428 %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 429 %vd = call <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %splat, <vscale x 1 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 430 ret <vscale x 1 x half> %vd 431} 432 433declare <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>, metadata, metadata) 434 435define <vscale x 2 x half> @vfmadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x half> %vc) strictfp { 436; ZVFH-LABEL: vfmadd_vv_nxv2f16: 437; ZVFH: # %bb.0: 438; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 439; ZVFH-NEXT: vfmadd.vv v8, v10, v9 440; ZVFH-NEXT: ret 441; 442; ZVFHMIN-LABEL: vfmadd_vv_nxv2f16: 443; ZVFHMIN: # %bb.0: 444; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 445; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 446; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 447; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 448; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 449; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v11 450; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 451; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 452; ZVFHMIN-NEXT: ret 453 %vd = call <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vc, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 454 ret <vscale x 2 x half> %vd 455} 456 457define <vscale x 2 x half> @vfmadd_vf_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, half %c) strictfp { 458; ZVFH-LABEL: vfmadd_vf_nxv2f16: 459; ZVFH: # %bb.0: 460; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 461; ZVFH-NEXT: vfmacc.vf v8, fa0, v9 462; ZVFH-NEXT: ret 463; 464; ZVFHMIN-LABEL: vfmadd_vf_nxv2f16: 465; ZVFHMIN: # %bb.0: 466; ZVFHMIN-NEXT: fmv.x.h a0, fa0 467; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma 468; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 469; ZVFHMIN-NEXT: vmv.v.x v8, a0 470; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 471; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 472; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 473; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v10 474; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 475; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 476; ZVFHMIN-NEXT: ret 477 %head = insertelement <vscale x 2 x half> poison, half %c, i32 0 478 %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer 479 %vd = call <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x half> %splat, <vscale x 2 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 480 ret <vscale x 2 x half> %vd 481} 482 483declare <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>, metadata, metadata) 484 485define <vscale x 4 x half> @vfmadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x half> %vc) strictfp { 486; ZVFH-LABEL: vfmadd_vv_nxv4f16: 487; ZVFH: # %bb.0: 488; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma 489; ZVFH-NEXT: vfmadd.vv v8, v9, v10 490; ZVFH-NEXT: ret 491; 492; ZVFHMIN-LABEL: vfmadd_vv_nxv4f16: 493; ZVFHMIN: # %bb.0: 494; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma 495; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 496; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 497; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 498; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 499; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12 500; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 501; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 502; ZVFHMIN-NEXT: ret 503 %vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x half> %va, <vscale x 4 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict") 504 ret <vscale x 4 x half> %vd 505} 506 507define <vscale x 4 x half> @vfmadd_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, half %c) strictfp { 508; ZVFH-LABEL: vfmadd_vf_nxv4f16: 509; ZVFH: # %bb.0: 510; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma 511; ZVFH-NEXT: vfmadd.vf v8, fa0, v9 512; ZVFH-NEXT: ret 513; 514; ZVFHMIN-LABEL: vfmadd_vf_nxv4f16: 515; ZVFHMIN: # %bb.0: 516; ZVFHMIN-NEXT: fmv.x.h a0, fa0 517; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma 518; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 519; ZVFHMIN-NEXT: vmv.v.x v9, a0 520; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 521; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 522; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 523; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10 524; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 525; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 526; ZVFHMIN-NEXT: ret 527 %head = insertelement <vscale x 4 x half> poison, half %c, i32 0 528 %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer 529 %vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %splat, <vscale x 4 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 530 ret <vscale x 4 x half> %vd 531} 532 533declare <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, metadata, metadata) 534 535define <vscale x 8 x half> @vfmadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x half> %vc) strictfp { 536; ZVFH-LABEL: vfmadd_vv_nxv8f16: 537; ZVFH: # %bb.0: 538; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma 539; ZVFH-NEXT: vfmacc.vv v8, v12, v10 540; ZVFH-NEXT: ret 541; 542; ZVFHMIN-LABEL: vfmadd_vv_nxv8f16: 543; ZVFHMIN: # %bb.0: 544; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma 545; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 546; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12 547; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 548; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 549; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16 550; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 551; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 552; ZVFHMIN-NEXT: ret 553 %vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %vc, <vscale x 8 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 554 ret <vscale x 8 x half> %vd 555} 556 557define <vscale x 8 x half> @vfmadd_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, half %c) strictfp { 558; ZVFH-LABEL: vfmadd_vf_nxv8f16: 559; ZVFH: # %bb.0: 560; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma 561; ZVFH-NEXT: vfmacc.vf v8, fa0, v10 562; ZVFH-NEXT: ret 563; 564; ZVFHMIN-LABEL: vfmadd_vf_nxv8f16: 565; ZVFHMIN: # %bb.0: 566; ZVFHMIN-NEXT: fmv.x.h a0, fa0 567; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma 568; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 569; ZVFHMIN-NEXT: vmv.v.x v8, a0 570; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 571; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 572; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 573; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12 574; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 575; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 576; ZVFHMIN-NEXT: ret 577 %head = insertelement <vscale x 8 x half> poison, half %c, i32 0 578 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer 579 %vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %splat, <vscale x 8 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 580 ret <vscale x 8 x half> %vd 581} 582 583declare <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x half>, metadata, metadata) 584 585define <vscale x 16 x half> @vfmadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x half> %vc) strictfp { 586; ZVFH-LABEL: vfmadd_vv_nxv16f16: 587; ZVFH: # %bb.0: 588; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 589; ZVFH-NEXT: vfmadd.vv v8, v16, v12 590; ZVFH-NEXT: ret 591; 592; ZVFHMIN-LABEL: vfmadd_vv_nxv16f16: 593; ZVFHMIN: # %bb.0: 594; ZVFHMIN-NEXT: addi sp, sp, -16 595; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 596; ZVFHMIN-NEXT: csrr a0, vlenb 597; ZVFHMIN-NEXT: slli a0, a0, 2 598; ZVFHMIN-NEXT: sub sp, sp, a0 599; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb 600; ZVFHMIN-NEXT: addi a0, sp, 16 601; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill 602; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 603; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 604; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 605; ZVFHMIN-NEXT: addi a0, sp, 16 606; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload 607; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 608; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 609; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16 610; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 611; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 612; ZVFHMIN-NEXT: csrr a0, vlenb 613; ZVFHMIN-NEXT: slli a0, a0, 2 614; ZVFHMIN-NEXT: add sp, sp, a0 615; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 616; ZVFHMIN-NEXT: addi sp, sp, 16 617; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 618; ZVFHMIN-NEXT: ret 619 %vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %vc, <vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 620 ret <vscale x 16 x half> %vd 621} 622 623define <vscale x 16 x half> @vfmadd_vf_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, half %c) strictfp { 624; ZVFH-LABEL: vfmadd_vf_nxv16f16: 625; ZVFH: # %bb.0: 626; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 627; ZVFH-NEXT: vfmadd.vf v8, fa0, v12 628; ZVFH-NEXT: ret 629; 630; ZVFHMIN-LABEL: vfmadd_vf_nxv16f16: 631; ZVFHMIN: # %bb.0: 632; ZVFHMIN-NEXT: fmv.x.h a0, fa0 633; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma 634; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 635; ZVFHMIN-NEXT: vmv.v.x v12, a0 636; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 637; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 638; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 639; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 640; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 641; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 642; ZVFHMIN-NEXT: ret 643 %head = insertelement <vscale x 16 x half> poison, half %c, i32 0 644 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer 645 %vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %splat, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 646 ret <vscale x 16 x half> %vd 647} 648 649declare <vscale x 32 x half> @llvm.experimental.constrained.fma.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>, <vscale x 32 x half>, metadata, metadata) 650 651define <vscale x 32 x half> @vfmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x half> %vc) strictfp { 652; ZVFH-LABEL: vfmadd_vv_nxv32f16: 653; ZVFH: # %bb.0: 654; ZVFH-NEXT: vl8re16.v v24, (a0) 655; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma 656; ZVFH-NEXT: vfmacc.vv v8, v16, v24 657; ZVFH-NEXT: ret 658; 659; ZVFHMIN-LABEL: vfmadd_vv_nxv32f16: 660; ZVFHMIN: # %bb.0: 661; ZVFHMIN-NEXT: addi sp, sp, -16 662; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 663; ZVFHMIN-NEXT: csrr a1, vlenb 664; ZVFHMIN-NEXT: slli a1, a1, 5 665; ZVFHMIN-NEXT: sub sp, sp, a1 666; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 667; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma 668; ZVFHMIN-NEXT: vmv8r.v v0, v16 669; ZVFHMIN-NEXT: addi a1, sp, 16 670; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 671; ZVFHMIN-NEXT: vmv8r.v v16, v8 672; ZVFHMIN-NEXT: vl8re16.v v8, (a0) 673; ZVFHMIN-NEXT: csrr a0, vlenb 674; ZVFHMIN-NEXT: slli a0, a0, 4 675; ZVFHMIN-NEXT: add a0, sp, a0 676; ZVFHMIN-NEXT: addi a0, a0, 16 677; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 678; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 679; ZVFHMIN-NEXT: csrr a0, vlenb 680; ZVFHMIN-NEXT: li a1, 24 681; ZVFHMIN-NEXT: mul a0, a0, a1 682; ZVFHMIN-NEXT: add a0, sp, a0 683; ZVFHMIN-NEXT: addi a0, a0, 16 684; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 685; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 686; ZVFHMIN-NEXT: csrr a0, vlenb 687; ZVFHMIN-NEXT: slli a0, a0, 3 688; ZVFHMIN-NEXT: add a0, sp, a0 689; ZVFHMIN-NEXT: addi a0, a0, 16 690; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 691; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 692; ZVFHMIN-NEXT: csrr a0, vlenb 693; ZVFHMIN-NEXT: li a1, 24 694; ZVFHMIN-NEXT: mul a0, a0, a1 695; ZVFHMIN-NEXT: add a0, sp, a0 696; ZVFHMIN-NEXT: addi a0, a0, 16 697; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 698; ZVFHMIN-NEXT: csrr a0, vlenb 699; ZVFHMIN-NEXT: slli a0, a0, 3 700; ZVFHMIN-NEXT: add a0, sp, a0 701; ZVFHMIN-NEXT: addi a0, a0, 16 702; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 703; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 704; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v24 705; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 706; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 707; ZVFHMIN-NEXT: csrr a0, vlenb 708; ZVFHMIN-NEXT: li a1, 24 709; ZVFHMIN-NEXT: mul a0, a0, a1 710; ZVFHMIN-NEXT: add a0, sp, a0 711; ZVFHMIN-NEXT: addi a0, a0, 16 712; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 713; ZVFHMIN-NEXT: addi a0, sp, 16 714; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 715; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 716; ZVFHMIN-NEXT: csrr a0, vlenb 717; ZVFHMIN-NEXT: slli a0, a0, 4 718; ZVFHMIN-NEXT: add a0, sp, a0 719; ZVFHMIN-NEXT: addi a0, a0, 16 720; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 721; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 722; ZVFHMIN-NEXT: csrr a0, vlenb 723; ZVFHMIN-NEXT: li a1, 24 724; ZVFHMIN-NEXT: mul a0, a0, a1 725; ZVFHMIN-NEXT: add a0, sp, a0 726; ZVFHMIN-NEXT: addi a0, a0, 16 727; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 728; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 729; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24 730; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 731; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 732; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 733; ZVFHMIN-NEXT: csrr a0, vlenb 734; ZVFHMIN-NEXT: slli a0, a0, 5 735; ZVFHMIN-NEXT: add sp, sp, a0 736; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 737; ZVFHMIN-NEXT: addi sp, sp, 16 738; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 739; ZVFHMIN-NEXT: ret 740 %vd = call <vscale x 32 x half> @llvm.experimental.constrained.fma.nxv32f16(<vscale x 32 x half> %vc, <vscale x 32 x half> %vb, <vscale x 32 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 741 ret <vscale x 32 x half> %vd 742} 743 744define <vscale x 32 x half> @vfmadd_vf_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, half %c) strictfp { 745; ZVFH-LABEL: vfmadd_vf_nxv32f16: 746; ZVFH: # %bb.0: 747; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma 748; ZVFH-NEXT: vfmacc.vf v8, fa0, v16 749; ZVFH-NEXT: ret 750; 751; ZVFHMIN-LABEL: vfmadd_vf_nxv32f16: 752; ZVFHMIN: # %bb.0: 753; ZVFHMIN-NEXT: addi sp, sp, -16 754; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 755; ZVFHMIN-NEXT: csrr a0, vlenb 756; ZVFHMIN-NEXT: li a1, 24 757; ZVFHMIN-NEXT: mul a0, a0, a1 758; ZVFHMIN-NEXT: sub sp, sp, a0 759; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 760; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 761; ZVFHMIN-NEXT: vmv8r.v v24, v16 762; ZVFHMIN-NEXT: csrr a0, vlenb 763; ZVFHMIN-NEXT: slli a0, a0, 3 764; ZVFHMIN-NEXT: add a0, sp, a0 765; ZVFHMIN-NEXT: addi a0, a0, 16 766; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 767; ZVFHMIN-NEXT: fmv.x.h a0, fa0 768; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 769; ZVFHMIN-NEXT: addi a1, sp, 16 770; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 771; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 772; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma 773; ZVFHMIN-NEXT: vmv.v.x v24, a0 774; ZVFHMIN-NEXT: csrr a0, vlenb 775; ZVFHMIN-NEXT: slli a0, a0, 4 776; ZVFHMIN-NEXT: add a0, sp, a0 777; ZVFHMIN-NEXT: addi a0, a0, 16 778; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 779; ZVFHMIN-NEXT: csrr a0, vlenb 780; ZVFHMIN-NEXT: slli a0, a0, 4 781; ZVFHMIN-NEXT: add a0, sp, a0 782; ZVFHMIN-NEXT: addi a0, a0, 16 783; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 784; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 785; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 786; ZVFHMIN-NEXT: addi a0, sp, 16 787; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 788; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 789; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v0 790; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 791; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 792; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 793; ZVFHMIN-NEXT: csrr a0, vlenb 794; ZVFHMIN-NEXT: slli a0, a0, 3 795; ZVFHMIN-NEXT: add a0, sp, a0 796; ZVFHMIN-NEXT: addi a0, a0, 16 797; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 798; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 799; ZVFHMIN-NEXT: csrr a0, vlenb 800; ZVFHMIN-NEXT: slli a0, a0, 4 801; ZVFHMIN-NEXT: add a0, sp, a0 802; ZVFHMIN-NEXT: addi a0, a0, 16 803; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 804; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 805; ZVFHMIN-NEXT: addi a0, sp, 16 806; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 807; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 808; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v0 809; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 810; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 811; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 812; ZVFHMIN-NEXT: csrr a0, vlenb 813; ZVFHMIN-NEXT: li a1, 24 814; ZVFHMIN-NEXT: mul a0, a0, a1 815; ZVFHMIN-NEXT: add sp, sp, a0 816; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 817; ZVFHMIN-NEXT: addi sp, sp, 16 818; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 819; ZVFHMIN-NEXT: ret 820 %head = insertelement <vscale x 32 x half> poison, half %c, i32 0 821 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer 822 %vd = call <vscale x 32 x half> @llvm.experimental.constrained.fma.nxv32f16(<vscale x 32 x half> %vb, <vscale x 32 x half> %splat, <vscale x 32 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 823 ret <vscale x 32 x half> %vd 824} 825 826declare <vscale x 1 x float> @llvm.experimental.constrained.fma.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, metadata, metadata) 827 828define <vscale x 1 x float> @vfmadd_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x float> %vc) strictfp { 829; CHECK-LABEL: vfmadd_vv_nxv1f32: 830; CHECK: # %bb.0: 831; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 832; CHECK-NEXT: vfmadd.vv v8, v9, v10 833; CHECK-NEXT: ret 834 %vd = call <vscale x 1 x float> @llvm.experimental.constrained.fma.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x float> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict") 835 ret <vscale x 1 x float> %vd 836} 837 838define <vscale x 1 x float> @vfmadd_vf_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, float %c) strictfp { 839; CHECK-LABEL: vfmadd_vf_nxv1f32: 840; CHECK: # %bb.0: 841; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 842; CHECK-NEXT: vfmadd.vf v8, fa0, v9 843; CHECK-NEXT: ret 844 %head = insertelement <vscale x 1 x float> poison, float %c, i32 0 845 %splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer 846 %vd = call <vscale x 1 x float> @llvm.experimental.constrained.fma.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %splat, <vscale x 1 x float> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 847 ret <vscale x 1 x float> %vd 848} 849 850declare <vscale x 2 x float> @llvm.experimental.constrained.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, metadata, metadata) 851 852define <vscale x 2 x float> @vfmadd_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x float> %vc) strictfp { 853; CHECK-LABEL: vfmadd_vv_nxv2f32: 854; CHECK: # %bb.0: 855; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 856; CHECK-NEXT: vfmadd.vv v8, v10, v9 857; CHECK-NEXT: ret 858 %vd = call <vscale x 2 x float> @llvm.experimental.constrained.fma.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vc, <vscale x 2 x float> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 859 ret <vscale x 2 x float> %vd 860} 861 862define <vscale x 2 x float> @vfmadd_vf_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, float %c) strictfp { 863; CHECK-LABEL: vfmadd_vf_nxv2f32: 864; CHECK: # %bb.0: 865; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 866; CHECK-NEXT: vfmacc.vf v8, fa0, v9 867; CHECK-NEXT: ret 868 %head = insertelement <vscale x 2 x float> poison, float %c, i32 0 869 %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 870 %vd = call <vscale x 2 x float> @llvm.experimental.constrained.fma.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x float> %splat, <vscale x 2 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 871 ret <vscale x 2 x float> %vd 872} 873 874declare <vscale x 4 x float> @llvm.experimental.constrained.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, metadata, metadata) 875 876define <vscale x 4 x float> @vfmadd_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x float> %vc) strictfp { 877; CHECK-LABEL: vfmadd_vv_nxv4f32: 878; CHECK: # %bb.0: 879; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 880; CHECK-NEXT: vfmadd.vv v8, v10, v12 881; CHECK-NEXT: ret 882 %vd = call <vscale x 4 x float> @llvm.experimental.constrained.fma.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x float> %va, <vscale x 4 x float> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict") 883 ret <vscale x 4 x float> %vd 884} 885 886define <vscale x 4 x float> @vfmadd_vf_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, float %c) strictfp { 887; CHECK-LABEL: vfmadd_vf_nxv4f32: 888; CHECK: # %bb.0: 889; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 890; CHECK-NEXT: vfmadd.vf v8, fa0, v10 891; CHECK-NEXT: ret 892 %head = insertelement <vscale x 4 x float> poison, float %c, i32 0 893 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 894 %vd = call <vscale x 4 x float> @llvm.experimental.constrained.fma.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %splat, <vscale x 4 x float> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 895 ret <vscale x 4 x float> %vd 896} 897 898declare <vscale x 8 x float> @llvm.experimental.constrained.fma.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>, metadata, metadata) 899 900define <vscale x 8 x float> @vfmadd_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x float> %vc) strictfp { 901; CHECK-LABEL: vfmadd_vv_nxv8f32: 902; CHECK: # %bb.0: 903; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 904; CHECK-NEXT: vfmacc.vv v8, v16, v12 905; CHECK-NEXT: ret 906 %vd = call <vscale x 8 x float> @llvm.experimental.constrained.fma.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x float> %vc, <vscale x 8 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 907 ret <vscale x 8 x float> %vd 908} 909 910define <vscale x 8 x float> @vfmadd_vf_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, float %c) strictfp { 911; CHECK-LABEL: vfmadd_vf_nxv8f32: 912; CHECK: # %bb.0: 913; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 914; CHECK-NEXT: vfmacc.vf v8, fa0, v12 915; CHECK-NEXT: ret 916 %head = insertelement <vscale x 8 x float> poison, float %c, i32 0 917 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer 918 %vd = call <vscale x 8 x float> @llvm.experimental.constrained.fma.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x float> %splat, <vscale x 8 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 919 ret <vscale x 8 x float> %vd 920} 921 922declare <vscale x 16 x float> @llvm.experimental.constrained.fma.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>, metadata, metadata) 923 924define <vscale x 16 x float> @vfmadd_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x float> %vc) strictfp { 925; CHECK-LABEL: vfmadd_vv_nxv16f32: 926; CHECK: # %bb.0: 927; CHECK-NEXT: vl8re32.v v24, (a0) 928; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 929; CHECK-NEXT: vfmadd.vv v8, v24, v16 930; CHECK-NEXT: ret 931 %vd = call <vscale x 16 x float> @llvm.experimental.constrained.fma.nxv16f32(<vscale x 16 x float> %vc, <vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 932 ret <vscale x 16 x float> %vd 933} 934 935define <vscale x 16 x float> @vfmadd_vf_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, float %c) strictfp { 936; CHECK-LABEL: vfmadd_vf_nxv16f32: 937; CHECK: # %bb.0: 938; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 939; CHECK-NEXT: vfmadd.vf v8, fa0, v16 940; CHECK-NEXT: ret 941 %head = insertelement <vscale x 16 x float> poison, float %c, i32 0 942 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer 943 %vd = call <vscale x 16 x float> @llvm.experimental.constrained.fma.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %splat, <vscale x 16 x float> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 944 ret <vscale x 16 x float> %vd 945} 946 947declare <vscale x 1 x double> @llvm.experimental.constrained.fma.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, metadata, metadata) 948 949define <vscale x 1 x double> @vfmadd_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x double> %vc) strictfp { 950; CHECK-LABEL: vfmadd_vv_nxv1f64: 951; CHECK: # %bb.0: 952; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 953; CHECK-NEXT: vfmadd.vv v8, v9, v10 954; CHECK-NEXT: ret 955 %vd = call <vscale x 1 x double> @llvm.experimental.constrained.fma.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x double> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict") 956 ret <vscale x 1 x double> %vd 957} 958 959define <vscale x 1 x double> @vfmadd_vf_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, double %c) strictfp { 960; CHECK-LABEL: vfmadd_vf_nxv1f64: 961; CHECK: # %bb.0: 962; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 963; CHECK-NEXT: vfmadd.vf v8, fa0, v9 964; CHECK-NEXT: ret 965 %head = insertelement <vscale x 1 x double> poison, double %c, i32 0 966 %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer 967 %vd = call <vscale x 1 x double> @llvm.experimental.constrained.fma.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %splat, <vscale x 1 x double> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 968 ret <vscale x 1 x double> %vd 969} 970 971declare <vscale x 2 x double> @llvm.experimental.constrained.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, metadata, metadata) 972 973define <vscale x 2 x double> @vfmadd_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x double> %vc) strictfp { 974; CHECK-LABEL: vfmadd_vv_nxv2f64: 975; CHECK: # %bb.0: 976; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 977; CHECK-NEXT: vfmadd.vv v8, v12, v10 978; CHECK-NEXT: ret 979 %vd = call <vscale x 2 x double> @llvm.experimental.constrained.fma.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vc, <vscale x 2 x double> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 980 ret <vscale x 2 x double> %vd 981} 982 983define <vscale x 2 x double> @vfmadd_vf_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, double %c) strictfp { 984; CHECK-LABEL: vfmadd_vf_nxv2f64: 985; CHECK: # %bb.0: 986; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 987; CHECK-NEXT: vfmacc.vf v8, fa0, v10 988; CHECK-NEXT: ret 989 %head = insertelement <vscale x 2 x double> poison, double %c, i32 0 990 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer 991 %vd = call <vscale x 2 x double> @llvm.experimental.constrained.fma.nxv2f64(<vscale x 2 x double> %vb, <vscale x 2 x double> %splat, <vscale x 2 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 992 ret <vscale x 2 x double> %vd 993} 994 995declare <vscale x 4 x double> @llvm.experimental.constrained.fma.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>, metadata, metadata) 996 997define <vscale x 4 x double> @vfmadd_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x double> %vc) strictfp { 998; CHECK-LABEL: vfmadd_vv_nxv4f64: 999; CHECK: # %bb.0: 1000; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1001; CHECK-NEXT: vfmadd.vv v8, v12, v16 1002; CHECK-NEXT: ret 1003 %vd = call <vscale x 4 x double> @llvm.experimental.constrained.fma.nxv4f64(<vscale x 4 x double> %vb, <vscale x 4 x double> %va, <vscale x 4 x double> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict") 1004 ret <vscale x 4 x double> %vd 1005} 1006 1007define <vscale x 4 x double> @vfmadd_vf_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, double %c) strictfp { 1008; CHECK-LABEL: vfmadd_vf_nxv4f64: 1009; CHECK: # %bb.0: 1010; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1011; CHECK-NEXT: vfmadd.vf v8, fa0, v12 1012; CHECK-NEXT: ret 1013 %head = insertelement <vscale x 4 x double> poison, double %c, i32 0 1014 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer 1015 %vd = call <vscale x 4 x double> @llvm.experimental.constrained.fma.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %splat, <vscale x 4 x double> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") 1016 ret <vscale x 4 x double> %vd 1017} 1018 1019declare <vscale x 8 x double> @llvm.experimental.constrained.fma.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>, metadata, metadata) 1020 1021define <vscale x 8 x double> @vfmadd_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x double> %vc) strictfp { 1022; CHECK-LABEL: vfmadd_vv_nxv8f64: 1023; CHECK: # %bb.0: 1024; CHECK-NEXT: vl8re64.v v24, (a0) 1025; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1026; CHECK-NEXT: vfmacc.vv v8, v16, v24 1027; CHECK-NEXT: ret 1028 %vd = call <vscale x 8 x double> @llvm.experimental.constrained.fma.nxv8f64(<vscale x 8 x double> %vb, <vscale x 8 x double> %vc, <vscale x 8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 1029 ret <vscale x 8 x double> %vd 1030} 1031 1032define <vscale x 8 x double> @vfmadd_vf_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, double %c) strictfp { 1033; CHECK-LABEL: vfmadd_vf_nxv8f64: 1034; CHECK: # %bb.0: 1035; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1036; CHECK-NEXT: vfmacc.vf v8, fa0, v16 1037; CHECK-NEXT: ret 1038 %head = insertelement <vscale x 8 x double> poison, double %c, i32 0 1039 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer 1040 %vd = call <vscale x 8 x double> @llvm.experimental.constrained.fma.nxv8f64(<vscale x 8 x double> %vb, <vscale x 8 x double> %splat, <vscale x 8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") 1041 ret <vscale x 8 x double> %vd 1042} 1043