1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 6; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 8; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 10 11; This tests a mix of vfmacc and vfmadd by using different operand orders to 12; trigger commuting in TwoAddressInstructionPass. 13 14define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) { 15; CHECK-LABEL: vfmadd_vv_nxv1bf16: 16; CHECK: # %bb.0: 17; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 18; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 19; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 20; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 21; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 22; CHECK-NEXT: vfmadd.vv v12, v10, v11 23; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 24; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 25; CHECK-NEXT: ret 26 %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) 27 ret <vscale x 1 x bfloat> %vd 28} 29 30define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16_commuted(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) { 31; CHECK-LABEL: vfmadd_vv_nxv1bf16_commuted: 32; CHECK: # %bb.0: 33; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 34; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 35; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 36; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v10 37; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 38; CHECK-NEXT: vfmadd.vv v9, v8, v11 39; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 40; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 41; CHECK-NEXT: ret 42 %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, <vscale x 1 x bfloat> %va) 43 ret <vscale x 1 x bfloat> %vd 44} 45 46define <vscale x 1 x bfloat> @vfmadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, bfloat %c) { 47; CHECK-LABEL: vfmadd_vf_nxv1bf16: 48; CHECK: # %bb.0: 49; CHECK-NEXT: fmv.x.h a0, fa0 50; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma 51; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 52; CHECK-NEXT: vmv.v.x v9, a0 53; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 54; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 55; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 56; CHECK-NEXT: vfmadd.vv v12, v11, v10 57; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 58; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 59; CHECK-NEXT: ret 60 %head = insertelement <vscale x 1 x bfloat> poison, bfloat %c, i32 0 61 %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer 62 %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, <vscale x 1 x bfloat> %vb) 63 ret <vscale x 1 x bfloat> %vd 64} 65 66declare <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>) 67 68define <vscale x 2 x bfloat> @vfmadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc) { 69; CHECK-LABEL: vfmadd_vv_nxv2bf16: 70; CHECK: # %bb.0: 71; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 72; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 73; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 74; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 75; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 76; CHECK-NEXT: vfmadd.vv v12, v9, v11 77; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 78; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 79; CHECK-NEXT: ret 80 %vd = call <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x bfloat> %vb) 81 ret <vscale x 2 x bfloat> %vd 82} 83 84define <vscale x 2 x bfloat> @vfmadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, bfloat %c) { 85; CHECK-LABEL: vfmadd_vf_nxv2bf16: 86; CHECK: # %bb.0: 87; CHECK-NEXT: fmv.x.h a0, fa0 88; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma 89; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 90; CHECK-NEXT: vmv.v.x v8, a0 91; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 92; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 93; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 94; CHECK-NEXT: vfmadd.vv v9, v11, v10 95; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 96; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 97; CHECK-NEXT: ret 98 %head = insertelement <vscale x 2 x bfloat> poison, bfloat %c, i32 0 99 %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer 100 %vd = call <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %splat, <vscale x 2 x bfloat> %va) 101 ret <vscale x 2 x bfloat> %vd 102} 103 104declare <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>) 105 106define <vscale x 4 x bfloat> @vfmadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc) { 107; CHECK-LABEL: vfmadd_vv_nxv4bf16: 108; CHECK: # %bb.0: 109; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 110; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 111; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 112; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 113; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 114; CHECK-NEXT: vfmadd.vv v14, v10, v12 115; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 116; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 117; CHECK-NEXT: ret 118 %vd = call <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc) 119 ret <vscale x 4 x bfloat> %vd 120} 121 122define <vscale x 4 x bfloat> @vfmadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, bfloat %c) { 123; CHECK-LABEL: vfmadd_vf_nxv4bf16: 124; CHECK: # %bb.0: 125; CHECK-NEXT: fmv.x.h a0, fa0 126; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma 127; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 128; CHECK-NEXT: vmv.v.x v9, a0 129; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 130; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 131; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 132; CHECK-NEXT: vfmadd.vv v14, v12, v10 133; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 134; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 135; CHECK-NEXT: ret 136 %head = insertelement <vscale x 4 x bfloat> poison, bfloat %c, i32 0 137 %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer 138 %vd = call <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, <vscale x 4 x bfloat> %vb) 139 ret <vscale x 4 x bfloat> %vd 140} 141 142declare <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 143 144define <vscale x 8 x bfloat> @vfmadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc) { 145; CHECK-LABEL: vfmadd_vv_nxv8bf16: 146; CHECK: # %bb.0: 147; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 148; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 149; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 150; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 151; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 152; CHECK-NEXT: vfmadd.vv v24, v20, v16 153; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 154; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 155; CHECK-NEXT: ret 156 %vd = call <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x bfloat> %va) 157 ret <vscale x 8 x bfloat> %vd 158} 159 160define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, bfloat %c) { 161; CHECK-LABEL: vfmadd_vf_nxv8bf16: 162; CHECK: # %bb.0: 163; CHECK-NEXT: fmv.x.h a0, fa0 164; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 165; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 166; CHECK-NEXT: vmv.v.x v8, a0 167; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 168; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8 169; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 170; CHECK-NEXT: vfmadd.vv v20, v16, v12 171; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 172; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20 173; CHECK-NEXT: ret 174 %head = insertelement <vscale x 8 x bfloat> poison, bfloat %c, i32 0 175 %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer 176 %vd = call <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va) 177 ret <vscale x 8 x bfloat> %vd 178} 179 180declare <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x bfloat>) 181 182define <vscale x 16 x bfloat> @vfmadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc) { 183; CHECK-LABEL: vfmadd_vv_nxv16bf16: 184; CHECK: # %bb.0: 185; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 186; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 187; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 188; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 189; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 190; CHECK-NEXT: vfmadd.vv v16, v0, v24 191; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 192; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 193; CHECK-NEXT: ret 194 %vd = call <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat> %vc, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) 195 ret <vscale x 16 x bfloat> %vd 196} 197 198define <vscale x 16 x bfloat> @vfmadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, bfloat %c) { 199; CHECK-LABEL: vfmadd_vf_nxv16bf16: 200; CHECK: # %bb.0: 201; CHECK-NEXT: fmv.x.h a0, fa0 202; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma 203; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 204; CHECK-NEXT: vmv.v.x v12, a0 205; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 206; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12 207; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 208; CHECK-NEXT: vfmadd.vv v0, v24, v16 209; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 210; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 211; CHECK-NEXT: ret 212 %head = insertelement <vscale x 16 x bfloat> poison, bfloat %c, i32 0 213 %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer 214 %vd = call <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, <vscale x 16 x bfloat> %vb) 215 ret <vscale x 16 x bfloat> %vd 216} 217 218declare <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>) 219 220define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc) { 221; ZVFH-LABEL: vfmadd_vv_nxv32bf16: 222; ZVFH: # %bb.0: 223; ZVFH-NEXT: addi sp, sp, -16 224; ZVFH-NEXT: .cfi_def_cfa_offset 16 225; ZVFH-NEXT: csrr a1, vlenb 226; ZVFH-NEXT: slli a1, a1, 5 227; ZVFH-NEXT: sub sp, sp, a1 228; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 229; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma 230; ZVFH-NEXT: vmv8r.v v0, v16 231; ZVFH-NEXT: addi a1, sp, 16 232; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 233; ZVFH-NEXT: vmv8r.v v16, v8 234; ZVFH-NEXT: vl8re16.v v8, (a0) 235; ZVFH-NEXT: csrr a0, vlenb 236; ZVFH-NEXT: slli a0, a0, 4 237; ZVFH-NEXT: add a0, sp, a0 238; ZVFH-NEXT: addi a0, a0, 16 239; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 240; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v16 241; ZVFH-NEXT: csrr a0, vlenb 242; ZVFH-NEXT: slli a0, a0, 3 243; ZVFH-NEXT: mv a1, a0 244; ZVFH-NEXT: slli a0, a0, 1 245; ZVFH-NEXT: add a0, a0, a1 246; ZVFH-NEXT: add a0, sp, a0 247; ZVFH-NEXT: addi a0, a0, 16 248; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 249; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v0 250; ZVFH-NEXT: csrr a0, vlenb 251; ZVFH-NEXT: slli a0, a0, 3 252; ZVFH-NEXT: add a0, sp, a0 253; ZVFH-NEXT: addi a0, a0, 16 254; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 255; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8 256; ZVFH-NEXT: csrr a0, vlenb 257; ZVFH-NEXT: slli a0, a0, 3 258; ZVFH-NEXT: mv a1, a0 259; ZVFH-NEXT: slli a0, a0, 1 260; ZVFH-NEXT: add a0, a0, a1 261; ZVFH-NEXT: add a0, sp, a0 262; ZVFH-NEXT: addi a0, a0, 16 263; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 264; ZVFH-NEXT: csrr a0, vlenb 265; ZVFH-NEXT: slli a0, a0, 3 266; ZVFH-NEXT: add a0, sp, a0 267; ZVFH-NEXT: addi a0, a0, 16 268; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 269; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma 270; ZVFH-NEXT: vfmadd.vv v0, v8, v24 271; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma 272; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20 273; ZVFH-NEXT: csrr a0, vlenb 274; ZVFH-NEXT: slli a0, a0, 3 275; ZVFH-NEXT: mv a1, a0 276; ZVFH-NEXT: slli a0, a0, 1 277; ZVFH-NEXT: add a0, a0, a1 278; ZVFH-NEXT: add a0, sp, a0 279; ZVFH-NEXT: addi a0, a0, 16 280; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 281; ZVFH-NEXT: addi a0, sp, 16 282; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 283; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v20 284; ZVFH-NEXT: csrr a0, vlenb 285; ZVFH-NEXT: slli a0, a0, 4 286; ZVFH-NEXT: add a0, sp, a0 287; ZVFH-NEXT: addi a0, a0, 16 288; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 289; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v28 290; ZVFH-NEXT: csrr a0, vlenb 291; ZVFH-NEXT: slli a0, a0, 3 292; ZVFH-NEXT: mv a1, a0 293; ZVFH-NEXT: slli a0, a0, 1 294; ZVFH-NEXT: add a0, a0, a1 295; ZVFH-NEXT: add a0, sp, a0 296; ZVFH-NEXT: addi a0, a0, 16 297; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 298; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma 299; ZVFH-NEXT: vfmadd.vv v16, v8, v24 300; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma 301; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0 302; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16 303; ZVFH-NEXT: csrr a0, vlenb 304; ZVFH-NEXT: slli a0, a0, 5 305; ZVFH-NEXT: add sp, sp, a0 306; ZVFH-NEXT: .cfi_def_cfa sp, 16 307; ZVFH-NEXT: addi sp, sp, 16 308; ZVFH-NEXT: .cfi_def_cfa_offset 0 309; ZVFH-NEXT: ret 310; 311; ZVFHMIN-LABEL: vfmadd_vv_nxv32bf16: 312; ZVFHMIN: # %bb.0: 313; ZVFHMIN-NEXT: addi sp, sp, -16 314; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 315; ZVFHMIN-NEXT: csrr a1, vlenb 316; ZVFHMIN-NEXT: slli a1, a1, 5 317; ZVFHMIN-NEXT: sub sp, sp, a1 318; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 319; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma 320; ZVFHMIN-NEXT: vmv8r.v v0, v16 321; ZVFHMIN-NEXT: addi a1, sp, 16 322; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 323; ZVFHMIN-NEXT: vmv8r.v v16, v8 324; ZVFHMIN-NEXT: vl8re16.v v8, (a0) 325; ZVFHMIN-NEXT: csrr a0, vlenb 326; ZVFHMIN-NEXT: slli a0, a0, 4 327; ZVFHMIN-NEXT: add a0, sp, a0 328; ZVFHMIN-NEXT: addi a0, a0, 16 329; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 330; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v16 331; ZVFHMIN-NEXT: csrr a0, vlenb 332; ZVFHMIN-NEXT: li a1, 24 333; ZVFHMIN-NEXT: mul a0, a0, a1 334; ZVFHMIN-NEXT: add a0, sp, a0 335; ZVFHMIN-NEXT: addi a0, a0, 16 336; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 337; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v0 338; ZVFHMIN-NEXT: csrr a0, vlenb 339; ZVFHMIN-NEXT: slli a0, a0, 3 340; ZVFHMIN-NEXT: add a0, sp, a0 341; ZVFHMIN-NEXT: addi a0, a0, 16 342; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 343; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8 344; ZVFHMIN-NEXT: csrr a0, vlenb 345; ZVFHMIN-NEXT: li a1, 24 346; ZVFHMIN-NEXT: mul a0, a0, a1 347; ZVFHMIN-NEXT: add a0, sp, a0 348; ZVFHMIN-NEXT: addi a0, a0, 16 349; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 350; ZVFHMIN-NEXT: csrr a0, vlenb 351; ZVFHMIN-NEXT: slli a0, a0, 3 352; ZVFHMIN-NEXT: add a0, sp, a0 353; ZVFHMIN-NEXT: addi a0, a0, 16 354; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 355; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 356; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v24 357; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 358; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20 359; ZVFHMIN-NEXT: csrr a0, vlenb 360; ZVFHMIN-NEXT: li a1, 24 361; ZVFHMIN-NEXT: mul a0, a0, a1 362; ZVFHMIN-NEXT: add a0, sp, a0 363; ZVFHMIN-NEXT: addi a0, a0, 16 364; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 365; ZVFHMIN-NEXT: addi a0, sp, 16 366; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 367; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v20 368; ZVFHMIN-NEXT: csrr a0, vlenb 369; ZVFHMIN-NEXT: slli a0, a0, 4 370; ZVFHMIN-NEXT: add a0, sp, a0 371; ZVFHMIN-NEXT: addi a0, a0, 16 372; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 373; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v28 374; ZVFHMIN-NEXT: csrr a0, vlenb 375; ZVFHMIN-NEXT: li a1, 24 376; ZVFHMIN-NEXT: mul a0, a0, a1 377; ZVFHMIN-NEXT: add a0, sp, a0 378; ZVFHMIN-NEXT: addi a0, a0, 16 379; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 380; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 381; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24 382; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 383; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0 384; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16 385; ZVFHMIN-NEXT: csrr a0, vlenb 386; ZVFHMIN-NEXT: slli a0, a0, 5 387; ZVFHMIN-NEXT: add sp, sp, a0 388; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 389; ZVFHMIN-NEXT: addi sp, sp, 16 390; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 391; ZVFHMIN-NEXT: ret 392 %vd = call <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat> %vc, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va) 393 ret <vscale x 32 x bfloat> %vd 394} 395 396define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, bfloat %c) { 397; ZVFH-LABEL: vfmadd_vf_nxv32bf16: 398; ZVFH: # %bb.0: 399; ZVFH-NEXT: addi sp, sp, -16 400; ZVFH-NEXT: .cfi_def_cfa_offset 16 401; ZVFH-NEXT: csrr a0, vlenb 402; ZVFH-NEXT: slli a0, a0, 5 403; ZVFH-NEXT: sub sp, sp, a0 404; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 405; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 406; ZVFH-NEXT: vmv8r.v v0, v16 407; ZVFH-NEXT: addi a0, sp, 16 408; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 409; ZVFH-NEXT: vmv8r.v v16, v8 410; ZVFH-NEXT: fmv.x.h a0, fa0 411; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v16 412; ZVFH-NEXT: csrr a1, vlenb 413; ZVFH-NEXT: slli a1, a1, 4 414; ZVFH-NEXT: add a1, sp, a1 415; ZVFH-NEXT: addi a1, a1, 16 416; ZVFH-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 417; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v0 418; ZVFH-NEXT: csrr a1, vlenb 419; ZVFH-NEXT: slli a1, a1, 3 420; ZVFH-NEXT: add a1, sp, a1 421; ZVFH-NEXT: addi a1, a1, 16 422; ZVFH-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 423; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma 424; ZVFH-NEXT: vmv.v.x v24, a0 425; ZVFH-NEXT: csrr a0, vlenb 426; ZVFH-NEXT: slli a0, a0, 3 427; ZVFH-NEXT: mv a1, a0 428; ZVFH-NEXT: slli a0, a0, 1 429; ZVFH-NEXT: add a0, a0, a1 430; ZVFH-NEXT: add a0, sp, a0 431; ZVFH-NEXT: addi a0, a0, 16 432; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 433; ZVFH-NEXT: csrr a0, vlenb 434; ZVFH-NEXT: slli a0, a0, 3 435; ZVFH-NEXT: mv a1, a0 436; ZVFH-NEXT: slli a0, a0, 1 437; ZVFH-NEXT: add a0, a0, a1 438; ZVFH-NEXT: add a0, sp, a0 439; ZVFH-NEXT: addi a0, a0, 16 440; ZVFH-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 441; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 442; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v0 443; ZVFH-NEXT: csrr a0, vlenb 444; ZVFH-NEXT: slli a0, a0, 4 445; ZVFH-NEXT: add a0, sp, a0 446; ZVFH-NEXT: addi a0, a0, 16 447; ZVFH-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 448; ZVFH-NEXT: csrr a0, vlenb 449; ZVFH-NEXT: slli a0, a0, 3 450; ZVFH-NEXT: add a0, sp, a0 451; ZVFH-NEXT: addi a0, a0, 16 452; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 453; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma 454; ZVFH-NEXT: vfmadd.vv v8, v24, v0 455; ZVFH-NEXT: vmv.v.v v24, v8 456; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma 457; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v20 458; ZVFH-NEXT: csrr a0, vlenb 459; ZVFH-NEXT: slli a0, a0, 4 460; ZVFH-NEXT: add a0, sp, a0 461; ZVFH-NEXT: addi a0, a0, 16 462; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 463; ZVFH-NEXT: addi a0, sp, 16 464; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 465; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v20 466; ZVFH-NEXT: csrr a0, vlenb 467; ZVFH-NEXT: slli a0, a0, 3 468; ZVFH-NEXT: mv a1, a0 469; ZVFH-NEXT: slli a0, a0, 1 470; ZVFH-NEXT: add a0, a0, a1 471; ZVFH-NEXT: add a0, sp, a0 472; ZVFH-NEXT: addi a0, a0, 16 473; ZVFH-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 474; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v4 475; ZVFH-NEXT: csrr a0, vlenb 476; ZVFH-NEXT: slli a0, a0, 4 477; ZVFH-NEXT: add a0, sp, a0 478; ZVFH-NEXT: addi a0, a0, 16 479; ZVFH-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 480; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma 481; ZVFH-NEXT: vfmadd.vv v16, v8, v0 482; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma 483; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v24 484; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16 485; ZVFH-NEXT: csrr a0, vlenb 486; ZVFH-NEXT: slli a0, a0, 5 487; ZVFH-NEXT: add sp, sp, a0 488; ZVFH-NEXT: .cfi_def_cfa sp, 16 489; ZVFH-NEXT: addi sp, sp, 16 490; ZVFH-NEXT: .cfi_def_cfa_offset 0 491; ZVFH-NEXT: ret 492; 493; ZVFHMIN-LABEL: vfmadd_vf_nxv32bf16: 494; ZVFHMIN: # %bb.0: 495; ZVFHMIN-NEXT: addi sp, sp, -16 496; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 497; ZVFHMIN-NEXT: csrr a0, vlenb 498; ZVFHMIN-NEXT: slli a0, a0, 5 499; ZVFHMIN-NEXT: sub sp, sp, a0 500; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 501; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 502; ZVFHMIN-NEXT: vmv8r.v v0, v16 503; ZVFHMIN-NEXT: addi a0, sp, 16 504; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 505; ZVFHMIN-NEXT: vmv8r.v v16, v8 506; ZVFHMIN-NEXT: fmv.x.h a0, fa0 507; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v16 508; ZVFHMIN-NEXT: csrr a1, vlenb 509; ZVFHMIN-NEXT: slli a1, a1, 4 510; ZVFHMIN-NEXT: add a1, sp, a1 511; ZVFHMIN-NEXT: addi a1, a1, 16 512; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 513; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v0 514; ZVFHMIN-NEXT: csrr a1, vlenb 515; ZVFHMIN-NEXT: slli a1, a1, 3 516; ZVFHMIN-NEXT: add a1, sp, a1 517; ZVFHMIN-NEXT: addi a1, a1, 16 518; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 519; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma 520; ZVFHMIN-NEXT: vmv.v.x v24, a0 521; ZVFHMIN-NEXT: csrr a0, vlenb 522; ZVFHMIN-NEXT: li a1, 24 523; ZVFHMIN-NEXT: mul a0, a0, a1 524; ZVFHMIN-NEXT: add a0, sp, a0 525; ZVFHMIN-NEXT: addi a0, a0, 16 526; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 527; ZVFHMIN-NEXT: csrr a0, vlenb 528; ZVFHMIN-NEXT: li a1, 24 529; ZVFHMIN-NEXT: mul a0, a0, a1 530; ZVFHMIN-NEXT: add a0, sp, a0 531; ZVFHMIN-NEXT: addi a0, a0, 16 532; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 533; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 534; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v0 535; ZVFHMIN-NEXT: csrr a0, vlenb 536; ZVFHMIN-NEXT: slli a0, a0, 4 537; ZVFHMIN-NEXT: add a0, sp, a0 538; ZVFHMIN-NEXT: addi a0, a0, 16 539; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 540; ZVFHMIN-NEXT: csrr a0, vlenb 541; ZVFHMIN-NEXT: slli a0, a0, 3 542; ZVFHMIN-NEXT: add a0, sp, a0 543; ZVFHMIN-NEXT: addi a0, a0, 16 544; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 545; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 546; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v0 547; ZVFHMIN-NEXT: vmv.v.v v24, v8 548; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 549; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v20 550; ZVFHMIN-NEXT: csrr a0, vlenb 551; ZVFHMIN-NEXT: slli a0, a0, 4 552; ZVFHMIN-NEXT: add a0, sp, a0 553; ZVFHMIN-NEXT: addi a0, a0, 16 554; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 555; ZVFHMIN-NEXT: addi a0, sp, 16 556; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 557; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v20 558; ZVFHMIN-NEXT: csrr a0, vlenb 559; ZVFHMIN-NEXT: li a1, 24 560; ZVFHMIN-NEXT: mul a0, a0, a1 561; ZVFHMIN-NEXT: add a0, sp, a0 562; ZVFHMIN-NEXT: addi a0, a0, 16 563; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 564; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v4 565; ZVFHMIN-NEXT: csrr a0, vlenb 566; ZVFHMIN-NEXT: slli a0, a0, 4 567; ZVFHMIN-NEXT: add a0, sp, a0 568; ZVFHMIN-NEXT: addi a0, a0, 16 569; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 570; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 571; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v0 572; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 573; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v24 574; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16 575; ZVFHMIN-NEXT: csrr a0, vlenb 576; ZVFHMIN-NEXT: slli a0, a0, 5 577; ZVFHMIN-NEXT: add sp, sp, a0 578; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 579; ZVFHMIN-NEXT: addi sp, sp, 16 580; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 581; ZVFHMIN-NEXT: ret 582 %head = insertelement <vscale x 32 x bfloat> poison, bfloat %c, i32 0 583 %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer 584 %vd = call <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %splat, <vscale x 32 x bfloat> %va) 585 ret <vscale x 32 x bfloat> %vd 586} 587 588declare <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>) 589 590define <vscale x 1 x half> @vfmadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) { 591; ZVFH-LABEL: vfmadd_vv_nxv1f16: 592; ZVFH: # %bb.0: 593; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 594; ZVFH-NEXT: vfmadd.vv v8, v9, v10 595; ZVFH-NEXT: ret 596; 597; ZVFHMIN-LABEL: vfmadd_vv_nxv1f16: 598; ZVFHMIN: # %bb.0: 599; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 600; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 601; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 602; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 603; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 604; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11 605; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 606; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 607; ZVFHMIN-NEXT: ret 608 %vd = call <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) 609 ret <vscale x 1 x half> %vd 610} 611 612define <vscale x 1 x half> @vfmadd_vv_nxv1f16_commuted(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) { 613; ZVFH-LABEL: vfmadd_vv_nxv1f16_commuted: 614; ZVFH: # %bb.0: 615; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 616; ZVFH-NEXT: vfmacc.vv v8, v10, v9 617; ZVFH-NEXT: ret 618; 619; ZVFHMIN-LABEL: vfmadd_vv_nxv1f16_commuted: 620; ZVFHMIN: # %bb.0: 621; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 622; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 623; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 624; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 625; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 626; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11 627; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 628; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 629; ZVFHMIN-NEXT: ret 630 %vd = call <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half> %vb, <vscale x 1 x half> %vc, <vscale x 1 x half> %va) 631 ret <vscale x 1 x half> %vd 632} 633 634define <vscale x 1 x half> @vfmadd_vf_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, half %c) { 635; ZVFH-LABEL: vfmadd_vf_nxv1f16: 636; ZVFH: # %bb.0: 637; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 638; ZVFH-NEXT: vfmadd.vf v8, fa0, v9 639; ZVFH-NEXT: ret 640; 641; ZVFHMIN-LABEL: vfmadd_vf_nxv1f16: 642; ZVFHMIN: # %bb.0: 643; ZVFHMIN-NEXT: fmv.x.h a0, fa0 644; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma 645; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 646; ZVFHMIN-NEXT: vmv.v.x v9, a0 647; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 648; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 649; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 650; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 651; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 652; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 653; ZVFHMIN-NEXT: ret 654 %head = insertelement <vscale x 1 x half> poison, half %c, i32 0 655 %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 656 %vd = call <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %splat, <vscale x 1 x half> %vb) 657 ret <vscale x 1 x half> %vd 658} 659 660declare <vscale x 2 x half> @llvm.fma.v2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>) 661 662define <vscale x 2 x half> @vfmadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x half> %vc) { 663; ZVFH-LABEL: vfmadd_vv_nxv2f16: 664; ZVFH: # %bb.0: 665; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 666; ZVFH-NEXT: vfmadd.vv v8, v10, v9 667; ZVFH-NEXT: ret 668; 669; ZVFHMIN-LABEL: vfmadd_vv_nxv2f16: 670; ZVFHMIN: # %bb.0: 671; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 672; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 673; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 674; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 675; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 676; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11 677; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 678; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 679; ZVFHMIN-NEXT: ret 680 %vd = call <vscale x 2 x half> @llvm.fma.v2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vc, <vscale x 2 x half> %vb) 681 ret <vscale x 2 x half> %vd 682} 683 684define <vscale x 2 x half> @vfmadd_vf_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, half %c) { 685; ZVFH-LABEL: vfmadd_vf_nxv2f16: 686; ZVFH: # %bb.0: 687; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 688; ZVFH-NEXT: vfmacc.vf v8, fa0, v9 689; ZVFH-NEXT: ret 690; 691; ZVFHMIN-LABEL: vfmadd_vf_nxv2f16: 692; ZVFHMIN: # %bb.0: 693; ZVFHMIN-NEXT: fmv.x.h a0, fa0 694; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma 695; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 696; ZVFHMIN-NEXT: vmv.v.x v8, a0 697; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 698; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 699; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 700; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v10 701; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 702; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 703; ZVFHMIN-NEXT: ret 704 %head = insertelement <vscale x 2 x half> poison, half %c, i32 0 705 %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer 706 %vd = call <vscale x 2 x half> @llvm.fma.v2f16(<vscale x 2 x half> %vb, <vscale x 2 x half> %splat, <vscale x 2 x half> %va) 707 ret <vscale x 2 x half> %vd 708} 709 710declare <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>) 711 712define <vscale x 4 x half> @vfmadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x half> %vc) { 713; ZVFH-LABEL: vfmadd_vv_nxv4f16: 714; ZVFH: # %bb.0: 715; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma 716; ZVFH-NEXT: vfmadd.vv v8, v9, v10 717; ZVFH-NEXT: ret 718; 719; ZVFHMIN-LABEL: vfmadd_vv_nxv4f16: 720; ZVFHMIN: # %bb.0: 721; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma 722; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 723; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 724; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 725; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 726; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12 727; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 728; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 729; ZVFHMIN-NEXT: ret 730 %vd = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> %vb, <vscale x 4 x half> %va, <vscale x 4 x half> %vc) 731 ret <vscale x 4 x half> %vd 732} 733 734define <vscale x 4 x half> @vfmadd_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, half %c) { 735; ZVFH-LABEL: vfmadd_vf_nxv4f16: 736; ZVFH: # %bb.0: 737; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma 738; ZVFH-NEXT: vfmadd.vf v8, fa0, v9 739; ZVFH-NEXT: ret 740; 741; ZVFHMIN-LABEL: vfmadd_vf_nxv4f16: 742; ZVFHMIN: # %bb.0: 743; ZVFHMIN-NEXT: fmv.x.h a0, fa0 744; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma 745; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 746; ZVFHMIN-NEXT: vmv.v.x v9, a0 747; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 748; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 749; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 750; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10 751; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 752; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 753; ZVFHMIN-NEXT: ret 754 %head = insertelement <vscale x 4 x half> poison, half %c, i32 0 755 %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer 756 %vd = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %splat, <vscale x 4 x half> %vb) 757 ret <vscale x 4 x half> %vd 758} 759 760declare <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 761 762define <vscale x 8 x half> @vfmadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x half> %vc) { 763; ZVFH-LABEL: vfmadd_vv_nxv8f16: 764; ZVFH: # %bb.0: 765; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma 766; ZVFH-NEXT: vfmacc.vv v8, v12, v10 767; ZVFH-NEXT: ret 768; 769; ZVFHMIN-LABEL: vfmadd_vv_nxv8f16: 770; ZVFHMIN: # %bb.0: 771; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma 772; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 773; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 774; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 775; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 776; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16 777; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 778; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 779; ZVFHMIN-NEXT: ret 780 %vd = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %vc, <vscale x 8 x half> %va) 781 ret <vscale x 8 x half> %vd 782} 783 784define <vscale x 8 x half> @vfmadd_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, half %c) { 785; ZVFH-LABEL: vfmadd_vf_nxv8f16: 786; ZVFH: # %bb.0: 787; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma 788; ZVFH-NEXT: vfmacc.vf v8, fa0, v10 789; ZVFH-NEXT: ret 790; 791; ZVFHMIN-LABEL: vfmadd_vf_nxv8f16: 792; ZVFHMIN: # %bb.0: 793; ZVFHMIN-NEXT: fmv.x.h a0, fa0 794; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma 795; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 796; ZVFHMIN-NEXT: vmv.v.x v8, a0 797; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 798; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 799; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 800; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12 801; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 802; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 803; ZVFHMIN-NEXT: ret 804 %head = insertelement <vscale x 8 x half> poison, half %c, i32 0 805 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer 806 %vd = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %splat, <vscale x 8 x half> %va) 807 ret <vscale x 8 x half> %vd 808} 809 810declare <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x half>) 811 812define <vscale x 16 x half> @vfmadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x half> %vc) { 813; ZVFH-LABEL: vfmadd_vv_nxv16f16: 814; ZVFH: # %bb.0: 815; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 816; ZVFH-NEXT: vfmadd.vv v8, v16, v12 817; ZVFH-NEXT: ret 818; 819; ZVFHMIN-LABEL: vfmadd_vv_nxv16f16: 820; ZVFHMIN: # %bb.0: 821; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 822; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 823; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 824; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 825; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 826; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24 827; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 828; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 829; ZVFHMIN-NEXT: ret 830 %vd = call <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half> %vc, <vscale x 16 x half> %va, <vscale x 16 x half> %vb) 831 ret <vscale x 16 x half> %vd 832} 833 834define <vscale x 16 x half> @vfmadd_vf_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, half %c) { 835; ZVFH-LABEL: vfmadd_vf_nxv16f16: 836; ZVFH: # %bb.0: 837; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma 838; ZVFH-NEXT: vfmadd.vf v8, fa0, v12 839; ZVFH-NEXT: ret 840; 841; ZVFHMIN-LABEL: vfmadd_vf_nxv16f16: 842; ZVFHMIN: # %bb.0: 843; ZVFHMIN-NEXT: fmv.x.h a0, fa0 844; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma 845; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 846; ZVFHMIN-NEXT: vmv.v.x v12, a0 847; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 848; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 849; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 850; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 851; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 852; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 853; ZVFHMIN-NEXT: ret 854 %head = insertelement <vscale x 16 x half> poison, half %c, i32 0 855 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer 856 %vd = call <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %splat, <vscale x 16 x half> %vb) 857 ret <vscale x 16 x half> %vd 858} 859 860declare <vscale x 32 x half> @llvm.fma.v32f16(<vscale x 32 x half>, <vscale x 32 x half>, <vscale x 32 x half>) 861 862define <vscale x 32 x half> @vfmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x half> %vc) { 863; ZVFH-LABEL: vfmadd_vv_nxv32f16: 864; ZVFH: # %bb.0: 865; ZVFH-NEXT: vl8re16.v v24, (a0) 866; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma 867; ZVFH-NEXT: vfmacc.vv v8, v16, v24 868; ZVFH-NEXT: ret 869; 870; ZVFHMIN-LABEL: vfmadd_vv_nxv32f16: 871; ZVFHMIN: # %bb.0: 872; ZVFHMIN-NEXT: addi sp, sp, -16 873; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 874; ZVFHMIN-NEXT: csrr a1, vlenb 875; ZVFHMIN-NEXT: slli a1, a1, 5 876; ZVFHMIN-NEXT: sub sp, sp, a1 877; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 878; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma 879; ZVFHMIN-NEXT: vmv8r.v v0, v16 880; ZVFHMIN-NEXT: addi a1, sp, 16 881; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 882; ZVFHMIN-NEXT: vmv8r.v v16, v8 883; ZVFHMIN-NEXT: vl8re16.v v8, (a0) 884; ZVFHMIN-NEXT: csrr a0, vlenb 885; ZVFHMIN-NEXT: slli a0, a0, 4 886; ZVFHMIN-NEXT: add a0, sp, a0 887; ZVFHMIN-NEXT: addi a0, a0, 16 888; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 889; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 890; ZVFHMIN-NEXT: csrr a0, vlenb 891; ZVFHMIN-NEXT: li a1, 24 892; ZVFHMIN-NEXT: mul a0, a0, a1 893; ZVFHMIN-NEXT: add a0, sp, a0 894; ZVFHMIN-NEXT: addi a0, a0, 16 895; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 896; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 897; ZVFHMIN-NEXT: csrr a0, vlenb 898; ZVFHMIN-NEXT: slli a0, a0, 3 899; ZVFHMIN-NEXT: add a0, sp, a0 900; ZVFHMIN-NEXT: addi a0, a0, 16 901; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 902; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 903; ZVFHMIN-NEXT: csrr a0, vlenb 904; ZVFHMIN-NEXT: li a1, 24 905; ZVFHMIN-NEXT: mul a0, a0, a1 906; ZVFHMIN-NEXT: add a0, sp, a0 907; ZVFHMIN-NEXT: addi a0, a0, 16 908; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 909; ZVFHMIN-NEXT: csrr a0, vlenb 910; ZVFHMIN-NEXT: slli a0, a0, 3 911; ZVFHMIN-NEXT: add a0, sp, a0 912; ZVFHMIN-NEXT: addi a0, a0, 16 913; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 914; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 915; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v24 916; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 917; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 918; ZVFHMIN-NEXT: csrr a0, vlenb 919; ZVFHMIN-NEXT: li a1, 24 920; ZVFHMIN-NEXT: mul a0, a0, a1 921; ZVFHMIN-NEXT: add a0, sp, a0 922; ZVFHMIN-NEXT: addi a0, a0, 16 923; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 924; ZVFHMIN-NEXT: addi a0, sp, 16 925; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 926; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 927; ZVFHMIN-NEXT: csrr a0, vlenb 928; ZVFHMIN-NEXT: slli a0, a0, 4 929; ZVFHMIN-NEXT: add a0, sp, a0 930; ZVFHMIN-NEXT: addi a0, a0, 16 931; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 932; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 933; ZVFHMIN-NEXT: csrr a0, vlenb 934; ZVFHMIN-NEXT: li a1, 24 935; ZVFHMIN-NEXT: mul a0, a0, a1 936; ZVFHMIN-NEXT: add a0, sp, a0 937; ZVFHMIN-NEXT: addi a0, a0, 16 938; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 939; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 940; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24 941; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 942; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 943; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 944; ZVFHMIN-NEXT: csrr a0, vlenb 945; ZVFHMIN-NEXT: slli a0, a0, 5 946; ZVFHMIN-NEXT: add sp, sp, a0 947; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 948; ZVFHMIN-NEXT: addi sp, sp, 16 949; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 950; ZVFHMIN-NEXT: ret 951 %vd = call <vscale x 32 x half> @llvm.fma.v32f16(<vscale x 32 x half> %vc, <vscale x 32 x half> %vb, <vscale x 32 x half> %va) 952 ret <vscale x 32 x half> %vd 953} 954 955define <vscale x 32 x half> @vfmadd_vf_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, half %c) { 956; ZVFH-LABEL: vfmadd_vf_nxv32f16: 957; ZVFH: # %bb.0: 958; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma 959; ZVFH-NEXT: vfmacc.vf v8, fa0, v16 960; ZVFH-NEXT: ret 961; 962; ZVFHMIN-LABEL: vfmadd_vf_nxv32f16: 963; ZVFHMIN: # %bb.0: 964; ZVFHMIN-NEXT: addi sp, sp, -16 965; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 966; ZVFHMIN-NEXT: csrr a0, vlenb 967; ZVFHMIN-NEXT: slli a0, a0, 5 968; ZVFHMIN-NEXT: sub sp, sp, a0 969; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 970; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 971; ZVFHMIN-NEXT: vmv8r.v v0, v16 972; ZVFHMIN-NEXT: addi a0, sp, 16 973; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 974; ZVFHMIN-NEXT: vmv8r.v v16, v8 975; ZVFHMIN-NEXT: fmv.x.h a0, fa0 976; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 977; ZVFHMIN-NEXT: csrr a1, vlenb 978; ZVFHMIN-NEXT: slli a1, a1, 4 979; ZVFHMIN-NEXT: add a1, sp, a1 980; ZVFHMIN-NEXT: addi a1, a1, 16 981; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 982; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 983; ZVFHMIN-NEXT: csrr a1, vlenb 984; ZVFHMIN-NEXT: slli a1, a1, 3 985; ZVFHMIN-NEXT: add a1, sp, a1 986; ZVFHMIN-NEXT: addi a1, a1, 16 987; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 988; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma 989; ZVFHMIN-NEXT: vmv.v.x v24, a0 990; ZVFHMIN-NEXT: csrr a0, vlenb 991; ZVFHMIN-NEXT: li a1, 24 992; ZVFHMIN-NEXT: mul a0, a0, a1 993; ZVFHMIN-NEXT: add a0, sp, a0 994; ZVFHMIN-NEXT: addi a0, a0, 16 995; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 996; ZVFHMIN-NEXT: csrr a0, vlenb 997; ZVFHMIN-NEXT: li a1, 24 998; ZVFHMIN-NEXT: mul a0, a0, a1 999; ZVFHMIN-NEXT: add a0, sp, a0 1000; ZVFHMIN-NEXT: addi a0, a0, 16 1001; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 1002; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma 1003; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 1004; ZVFHMIN-NEXT: csrr a0, vlenb 1005; ZVFHMIN-NEXT: slli a0, a0, 4 1006; ZVFHMIN-NEXT: add a0, sp, a0 1007; ZVFHMIN-NEXT: addi a0, a0, 16 1008; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 1009; ZVFHMIN-NEXT: csrr a0, vlenb 1010; ZVFHMIN-NEXT: slli a0, a0, 3 1011; ZVFHMIN-NEXT: add a0, sp, a0 1012; ZVFHMIN-NEXT: addi a0, a0, 16 1013; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 1014; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1015; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v0 1016; ZVFHMIN-NEXT: vmv.v.v v24, v8 1017; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1018; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 1019; ZVFHMIN-NEXT: csrr a0, vlenb 1020; ZVFHMIN-NEXT: slli a0, a0, 4 1021; ZVFHMIN-NEXT: add a0, sp, a0 1022; ZVFHMIN-NEXT: addi a0, a0, 16 1023; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 1024; ZVFHMIN-NEXT: addi a0, sp, 16 1025; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1026; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 1027; ZVFHMIN-NEXT: csrr a0, vlenb 1028; ZVFHMIN-NEXT: li a1, 24 1029; ZVFHMIN-NEXT: mul a0, a0, a1 1030; ZVFHMIN-NEXT: add a0, sp, a0 1031; ZVFHMIN-NEXT: addi a0, a0, 16 1032; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 1033; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 1034; ZVFHMIN-NEXT: csrr a0, vlenb 1035; ZVFHMIN-NEXT: slli a0, a0, 4 1036; ZVFHMIN-NEXT: add a0, sp, a0 1037; ZVFHMIN-NEXT: addi a0, a0, 16 1038; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 1039; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1040; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v0 1041; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1042; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 1043; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 1044; ZVFHMIN-NEXT: csrr a0, vlenb 1045; ZVFHMIN-NEXT: slli a0, a0, 5 1046; ZVFHMIN-NEXT: add sp, sp, a0 1047; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 1048; ZVFHMIN-NEXT: addi sp, sp, 16 1049; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 1050; ZVFHMIN-NEXT: ret 1051 %head = insertelement <vscale x 32 x half> poison, half %c, i32 0 1052 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer 1053 %vd = call <vscale x 32 x half> @llvm.fma.v32f16(<vscale x 32 x half> %vb, <vscale x 32 x half> %splat, <vscale x 32 x half> %va) 1054 ret <vscale x 32 x half> %vd 1055} 1056 1057declare <vscale x 1 x float> @llvm.fma.v1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>) 1058 1059define <vscale x 1 x float> @vfmadd_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x float> %vc) { 1060; CHECK-LABEL: vfmadd_vv_nxv1f32: 1061; CHECK: # %bb.0: 1062; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 1063; CHECK-NEXT: vfmadd.vv v8, v9, v10 1064; CHECK-NEXT: ret 1065 %vd = call <vscale x 1 x float> @llvm.fma.v1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x float> %vc) 1066 ret <vscale x 1 x float> %vd 1067} 1068 1069define <vscale x 1 x float> @vfmadd_vf_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, float %c) { 1070; CHECK-LABEL: vfmadd_vf_nxv1f32: 1071; CHECK: # %bb.0: 1072; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 1073; CHECK-NEXT: vfmadd.vf v8, fa0, v9 1074; CHECK-NEXT: ret 1075 %head = insertelement <vscale x 1 x float> poison, float %c, i32 0 1076 %splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer 1077 %vd = call <vscale x 1 x float> @llvm.fma.v1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %splat, <vscale x 1 x float> %vb) 1078 ret <vscale x 1 x float> %vd 1079} 1080 1081declare <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>) 1082 1083define <vscale x 2 x float> @vfmadd_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x float> %vc) { 1084; CHECK-LABEL: vfmadd_vv_nxv2f32: 1085; CHECK: # %bb.0: 1086; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1087; CHECK-NEXT: vfmadd.vv v8, v10, v9 1088; CHECK-NEXT: ret 1089 %vd = call <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vc, <vscale x 2 x float> %vb) 1090 ret <vscale x 2 x float> %vd 1091} 1092 1093define <vscale x 2 x float> @vfmadd_vf_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, float %c) { 1094; CHECK-LABEL: vfmadd_vf_nxv2f32: 1095; CHECK: # %bb.0: 1096; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1097; CHECK-NEXT: vfmacc.vf v8, fa0, v9 1098; CHECK-NEXT: ret 1099 %head = insertelement <vscale x 2 x float> poison, float %c, i32 0 1100 %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 1101 %vd = call <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float> %vb, <vscale x 2 x float> %splat, <vscale x 2 x float> %va) 1102 ret <vscale x 2 x float> %vd 1103} 1104 1105declare <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 1106 1107define <vscale x 4 x float> @vfmadd_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x float> %vc) { 1108; CHECK-LABEL: vfmadd_vv_nxv4f32: 1109; CHECK: # %bb.0: 1110; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1111; CHECK-NEXT: vfmadd.vv v8, v10, v12 1112; CHECK-NEXT: ret 1113 %vd = call <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float> %vb, <vscale x 4 x float> %va, <vscale x 4 x float> %vc) 1114 ret <vscale x 4 x float> %vd 1115} 1116 1117define <vscale x 4 x float> @vfmadd_vf_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, float %c) { 1118; CHECK-LABEL: vfmadd_vf_nxv4f32: 1119; CHECK: # %bb.0: 1120; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1121; CHECK-NEXT: vfmadd.vf v8, fa0, v10 1122; CHECK-NEXT: ret 1123 %head = insertelement <vscale x 4 x float> poison, float %c, i32 0 1124 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 1125 %vd = call <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %splat, <vscale x 4 x float> %vb) 1126 ret <vscale x 4 x float> %vd 1127} 1128 1129declare <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>) 1130 1131define <vscale x 8 x float> @vfmadd_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x float> %vc) { 1132; CHECK-LABEL: vfmadd_vv_nxv8f32: 1133; CHECK: # %bb.0: 1134; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 1135; CHECK-NEXT: vfmacc.vv v8, v16, v12 1136; CHECK-NEXT: ret 1137 %vd = call <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float> %vb, <vscale x 8 x float> %vc, <vscale x 8 x float> %va) 1138 ret <vscale x 8 x float> %vd 1139} 1140 1141define <vscale x 8 x float> @vfmadd_vf_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, float %c) { 1142; CHECK-LABEL: vfmadd_vf_nxv8f32: 1143; CHECK: # %bb.0: 1144; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 1145; CHECK-NEXT: vfmacc.vf v8, fa0, v12 1146; CHECK-NEXT: ret 1147 %head = insertelement <vscale x 8 x float> poison, float %c, i32 0 1148 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer 1149 %vd = call <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float> %vb, <vscale x 8 x float> %splat, <vscale x 8 x float> %va) 1150 ret <vscale x 8 x float> %vd 1151} 1152 1153declare <vscale x 16 x float> @llvm.fma.v16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>) 1154 1155define <vscale x 16 x float> @vfmadd_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x float> %vc) { 1156; CHECK-LABEL: vfmadd_vv_nxv16f32: 1157; CHECK: # %bb.0: 1158; CHECK-NEXT: vl8re32.v v24, (a0) 1159; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 1160; CHECK-NEXT: vfmadd.vv v8, v24, v16 1161; CHECK-NEXT: ret 1162 %vd = call <vscale x 16 x float> @llvm.fma.v16f32(<vscale x 16 x float> %vc, <vscale x 16 x float> %va, <vscale x 16 x float> %vb) 1163 ret <vscale x 16 x float> %vd 1164} 1165 1166define <vscale x 16 x float> @vfmadd_vf_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, float %c) { 1167; CHECK-LABEL: vfmadd_vf_nxv16f32: 1168; CHECK: # %bb.0: 1169; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 1170; CHECK-NEXT: vfmadd.vf v8, fa0, v16 1171; CHECK-NEXT: ret 1172 %head = insertelement <vscale x 16 x float> poison, float %c, i32 0 1173 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer 1174 %vd = call <vscale x 16 x float> @llvm.fma.v16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %splat, <vscale x 16 x float> %vb) 1175 ret <vscale x 16 x float> %vd 1176} 1177 1178declare <vscale x 1 x double> @llvm.fma.v1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>) 1179 1180define <vscale x 1 x double> @vfmadd_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x double> %vc) { 1181; CHECK-LABEL: vfmadd_vv_nxv1f64: 1182; CHECK: # %bb.0: 1183; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1184; CHECK-NEXT: vfmadd.vv v8, v9, v10 1185; CHECK-NEXT: ret 1186 %vd = call <vscale x 1 x double> @llvm.fma.v1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x double> %vc) 1187 ret <vscale x 1 x double> %vd 1188} 1189 1190define <vscale x 1 x double> @vfmadd_vf_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, double %c) { 1191; CHECK-LABEL: vfmadd_vf_nxv1f64: 1192; CHECK: # %bb.0: 1193; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1194; CHECK-NEXT: vfmadd.vf v8, fa0, v9 1195; CHECK-NEXT: ret 1196 %head = insertelement <vscale x 1 x double> poison, double %c, i32 0 1197 %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer 1198 %vd = call <vscale x 1 x double> @llvm.fma.v1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %splat, <vscale x 1 x double> %vb) 1199 ret <vscale x 1 x double> %vd 1200} 1201 1202declare <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 1203 1204define <vscale x 2 x double> @vfmadd_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x double> %vc) { 1205; CHECK-LABEL: vfmadd_vv_nxv2f64: 1206; CHECK: # %bb.0: 1207; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1208; CHECK-NEXT: vfmadd.vv v8, v12, v10 1209; CHECK-NEXT: ret 1210 %vd = call <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vc, <vscale x 2 x double> %vb) 1211 ret <vscale x 2 x double> %vd 1212} 1213 1214define <vscale x 2 x double> @vfmadd_vf_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, double %c) { 1215; CHECK-LABEL: vfmadd_vf_nxv2f64: 1216; CHECK: # %bb.0: 1217; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1218; CHECK-NEXT: vfmacc.vf v8, fa0, v10 1219; CHECK-NEXT: ret 1220 %head = insertelement <vscale x 2 x double> poison, double %c, i32 0 1221 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer 1222 %vd = call <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double> %vb, <vscale x 2 x double> %splat, <vscale x 2 x double> %va) 1223 ret <vscale x 2 x double> %vd 1224} 1225 1226declare <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>) 1227 1228define <vscale x 4 x double> @vfmadd_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x double> %vc) { 1229; CHECK-LABEL: vfmadd_vv_nxv4f64: 1230; CHECK: # %bb.0: 1231; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1232; CHECK-NEXT: vfmadd.vv v8, v12, v16 1233; CHECK-NEXT: ret 1234 %vd = call <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double> %vb, <vscale x 4 x double> %va, <vscale x 4 x double> %vc) 1235 ret <vscale x 4 x double> %vd 1236} 1237 1238define <vscale x 4 x double> @vfmadd_vf_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, double %c) { 1239; CHECK-LABEL: vfmadd_vf_nxv4f64: 1240; CHECK: # %bb.0: 1241; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1242; CHECK-NEXT: vfmadd.vf v8, fa0, v12 1243; CHECK-NEXT: ret 1244 %head = insertelement <vscale x 4 x double> poison, double %c, i32 0 1245 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer 1246 %vd = call <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %splat, <vscale x 4 x double> %vb) 1247 ret <vscale x 4 x double> %vd 1248} 1249 1250declare <vscale x 8 x double> @llvm.fma.v8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>) 1251 1252define <vscale x 8 x double> @vfmadd_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x double> %vc) { 1253; CHECK-LABEL: vfmadd_vv_nxv8f64: 1254; CHECK: # %bb.0: 1255; CHECK-NEXT: vl8re64.v v24, (a0) 1256; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1257; CHECK-NEXT: vfmacc.vv v8, v16, v24 1258; CHECK-NEXT: ret 1259 %vd = call <vscale x 8 x double> @llvm.fma.v8f64(<vscale x 8 x double> %vb, <vscale x 8 x double> %vc, <vscale x 8 x double> %va) 1260 ret <vscale x 8 x double> %vd 1261} 1262 1263define <vscale x 8 x double> @vfmadd_vf_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, double %c) { 1264; CHECK-LABEL: vfmadd_vf_nxv8f64: 1265; CHECK: # %bb.0: 1266; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1267; CHECK-NEXT: vfmacc.vf v8, fa0, v16 1268; CHECK-NEXT: ret 1269 %head = insertelement <vscale x 8 x double> poison, double %c, i32 0 1270 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer 1271 %vd = call <vscale x 8 x double> @llvm.fma.v8f64(<vscale x 8 x double> %vb, <vscale x 8 x double> %splat, <vscale x 8 x double> %va) 1272 ret <vscale x 8 x double> %vd 1273} 1274