1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 6; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 8; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 10 11declare <2 x half> @llvm.vp.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32) 12 13define <2 x half> @vfma_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { 14; ZVFH-LABEL: vfma_vv_v2f16: 15; ZVFH: # %bb.0: 16; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 17; ZVFH-NEXT: vfmadd.vv v9, v8, v10, v0.t 18; ZVFH-NEXT: vmv1r.v v8, v9 19; ZVFH-NEXT: ret 20; 21; ZVFHMIN-LABEL: vfma_vv_v2f16: 22; ZVFHMIN: # %bb.0: 23; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 24; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t 25; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 26; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t 27; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 28; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11, v0.t 29; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 30; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 31; ZVFHMIN-NEXT: ret 32 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl) 33 ret <2 x half> %v 34} 35 36define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x half> %c, i32 zeroext %evl) { 37; ZVFH-LABEL: vfma_vv_v2f16_unmasked: 38; ZVFH: # %bb.0: 39; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 40; ZVFH-NEXT: vfmadd.vv v8, v9, v10 41; ZVFH-NEXT: ret 42; 43; ZVFHMIN-LABEL: vfma_vv_v2f16_unmasked: 44; ZVFHMIN: # %bb.0: 45; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 46; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 47; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 48; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 49; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 50; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11 51; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 52; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 53; ZVFHMIN-NEXT: ret 54 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 true), i32 %evl) 55 ret <2 x half> %v 56} 57 58define <2 x half> @vfma_vf_v2f16(<2 x half> %va, half %b, <2 x half> %vc, <2 x i1> %m, i32 zeroext %evl) { 59; ZVFH-LABEL: vfma_vf_v2f16: 60; ZVFH: # %bb.0: 61; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 62; ZVFH-NEXT: vfmadd.vf v8, fa0, v9, v0.t 63; ZVFH-NEXT: ret 64; 65; ZVFHMIN-LABEL: vfma_vf_v2f16: 66; ZVFHMIN: # %bb.0: 67; ZVFHMIN-NEXT: fmv.x.h a1, fa0 68; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 69; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t 70; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 71; ZVFHMIN-NEXT: vmv.v.x v9, a1 72; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 73; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t 74; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t 75; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 76; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10, v0.t 77; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 78; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 79; ZVFHMIN-NEXT: ret 80 %elt.head = insertelement <2 x half> poison, half %b, i32 0 81 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer 82 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl) 83 ret <2 x half> %v 84} 85 86define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %vc, i32 zeroext %evl) { 87; ZVFH-LABEL: vfma_vf_v2f16_unmasked: 88; ZVFH: # %bb.0: 89; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 90; ZVFH-NEXT: vfmadd.vf v8, fa0, v9 91; ZVFH-NEXT: ret 92; 93; ZVFHMIN-LABEL: vfma_vf_v2f16_unmasked: 94; ZVFHMIN: # %bb.0: 95; ZVFHMIN-NEXT: fmv.x.h a1, fa0 96; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 97; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 98; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 99; ZVFHMIN-NEXT: vmv.v.x v9, a1 100; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 101; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 102; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 103; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 104; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 105; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 106; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 107; ZVFHMIN-NEXT: ret 108 %elt.head = insertelement <2 x half> poison, half %b, i32 0 109 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer 110 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> splat (i1 true), i32 %evl) 111 ret <2 x half> %v 112} 113 114declare <4 x half> @llvm.vp.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32) 115 116define <4 x half> @vfma_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { 117; ZVFH-LABEL: vfma_vv_v4f16: 118; ZVFH: # %bb.0: 119; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 120; ZVFH-NEXT: vfmadd.vv v9, v8, v10, v0.t 121; ZVFH-NEXT: vmv1r.v v8, v9 122; ZVFH-NEXT: ret 123; 124; ZVFHMIN-LABEL: vfma_vv_v4f16: 125; ZVFHMIN: # %bb.0: 126; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 127; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t 128; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 129; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t 130; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 131; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11, v0.t 132; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 133; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 134; ZVFHMIN-NEXT: ret 135 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl) 136 ret <4 x half> %v 137} 138 139define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x half> %c, i32 zeroext %evl) { 140; ZVFH-LABEL: vfma_vv_v4f16_unmasked: 141; ZVFH: # %bb.0: 142; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 143; ZVFH-NEXT: vfmadd.vv v8, v9, v10 144; ZVFH-NEXT: ret 145; 146; ZVFHMIN-LABEL: vfma_vv_v4f16_unmasked: 147; ZVFHMIN: # %bb.0: 148; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 149; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 150; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 151; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 152; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 153; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11 154; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 155; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 156; ZVFHMIN-NEXT: ret 157 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 true), i32 %evl) 158 ret <4 x half> %v 159} 160 161define <4 x half> @vfma_vf_v4f16(<4 x half> %va, half %b, <4 x half> %vc, <4 x i1> %m, i32 zeroext %evl) { 162; ZVFH-LABEL: vfma_vf_v4f16: 163; ZVFH: # %bb.0: 164; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 165; ZVFH-NEXT: vfmadd.vf v8, fa0, v9, v0.t 166; ZVFH-NEXT: ret 167; 168; ZVFHMIN-LABEL: vfma_vf_v4f16: 169; ZVFHMIN: # %bb.0: 170; ZVFHMIN-NEXT: fmv.x.h a1, fa0 171; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 172; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t 173; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 174; ZVFHMIN-NEXT: vmv.v.x v9, a1 175; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 176; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t 177; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t 178; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 179; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10, v0.t 180; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 181; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 182; ZVFHMIN-NEXT: ret 183 %elt.head = insertelement <4 x half> poison, half %b, i32 0 184 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer 185 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl) 186 ret <4 x half> %v 187} 188 189define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %vc, i32 zeroext %evl) { 190; ZVFH-LABEL: vfma_vf_v4f16_unmasked: 191; ZVFH: # %bb.0: 192; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 193; ZVFH-NEXT: vfmadd.vf v8, fa0, v9 194; ZVFH-NEXT: ret 195; 196; ZVFHMIN-LABEL: vfma_vf_v4f16_unmasked: 197; ZVFHMIN: # %bb.0: 198; ZVFHMIN-NEXT: fmv.x.h a1, fa0 199; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 200; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 201; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 202; ZVFHMIN-NEXT: vmv.v.x v9, a1 203; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 204; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 205; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 206; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 207; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 208; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 209; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 210; ZVFHMIN-NEXT: ret 211 %elt.head = insertelement <4 x half> poison, half %b, i32 0 212 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer 213 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> splat (i1 true), i32 %evl) 214 ret <4 x half> %v 215} 216 217declare <8 x half> @llvm.vp.fma.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32) 218 219define <8 x half> @vfma_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { 220; ZVFH-LABEL: vfma_vv_v8f16: 221; ZVFH: # %bb.0: 222; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 223; ZVFH-NEXT: vfmadd.vv v9, v8, v10, v0.t 224; ZVFH-NEXT: vmv.v.v v8, v9 225; ZVFH-NEXT: ret 226; 227; ZVFHMIN-LABEL: vfma_vv_v8f16: 228; ZVFHMIN: # %bb.0: 229; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 230; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t 231; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 232; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t 233; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 234; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t 235; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 236; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t 237; ZVFHMIN-NEXT: ret 238 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl) 239 ret <8 x half> %v 240} 241 242define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x half> %c, i32 zeroext %evl) { 243; ZVFH-LABEL: vfma_vv_v8f16_unmasked: 244; ZVFH: # %bb.0: 245; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 246; ZVFH-NEXT: vfmadd.vv v8, v9, v10 247; ZVFH-NEXT: ret 248; 249; ZVFHMIN-LABEL: vfma_vv_v8f16_unmasked: 250; ZVFHMIN: # %bb.0: 251; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 252; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 253; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 254; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 255; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 256; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12 257; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 258; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 259; ZVFHMIN-NEXT: ret 260 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 true), i32 %evl) 261 ret <8 x half> %v 262} 263 264define <8 x half> @vfma_vf_v8f16(<8 x half> %va, half %b, <8 x half> %vc, <8 x i1> %m, i32 zeroext %evl) { 265; ZVFH-LABEL: vfma_vf_v8f16: 266; ZVFH: # %bb.0: 267; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 268; ZVFH-NEXT: vfmadd.vf v8, fa0, v9, v0.t 269; ZVFH-NEXT: ret 270; 271; ZVFHMIN-LABEL: vfma_vf_v8f16: 272; ZVFHMIN: # %bb.0: 273; ZVFHMIN-NEXT: fmv.x.h a1, fa0 274; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 275; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t 276; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 277; ZVFHMIN-NEXT: vmv.v.x v9, a1 278; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 279; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t 280; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t 281; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 282; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10, v0.t 283; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 284; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t 285; ZVFHMIN-NEXT: ret 286 %elt.head = insertelement <8 x half> poison, half %b, i32 0 287 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer 288 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl) 289 ret <8 x half> %v 290} 291 292define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %vc, i32 zeroext %evl) { 293; ZVFH-LABEL: vfma_vf_v8f16_unmasked: 294; ZVFH: # %bb.0: 295; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 296; ZVFH-NEXT: vfmadd.vf v8, fa0, v9 297; ZVFH-NEXT: ret 298; 299; ZVFHMIN-LABEL: vfma_vf_v8f16_unmasked: 300; ZVFHMIN: # %bb.0: 301; ZVFHMIN-NEXT: fmv.x.h a1, fa0 302; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 303; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 304; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 305; ZVFHMIN-NEXT: vmv.v.x v9, a1 306; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 307; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 308; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 309; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 310; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10 311; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 312; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 313; ZVFHMIN-NEXT: ret 314 %elt.head = insertelement <8 x half> poison, half %b, i32 0 315 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer 316 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> splat (i1 true), i32 %evl) 317 ret <8 x half> %v 318} 319 320declare <16 x half> @llvm.vp.fma.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32) 321 322define <16 x half> @vfma_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { 323; ZVFH-LABEL: vfma_vv_v16f16: 324; ZVFH: # %bb.0: 325; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 326; ZVFH-NEXT: vfmadd.vv v10, v8, v12, v0.t 327; ZVFH-NEXT: vmv.v.v v8, v10 328; ZVFH-NEXT: ret 329; 330; ZVFHMIN-LABEL: vfma_vv_v16f16: 331; ZVFHMIN: # %bb.0: 332; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 333; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t 334; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t 335; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t 336; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 337; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t 338; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 339; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t 340; ZVFHMIN-NEXT: ret 341 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl) 342 ret <16 x half> %v 343} 344 345define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16 x half> %c, i32 zeroext %evl) { 346; ZVFH-LABEL: vfma_vv_v16f16_unmasked: 347; ZVFH: # %bb.0: 348; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 349; ZVFH-NEXT: vfmadd.vv v8, v10, v12 350; ZVFH-NEXT: ret 351; 352; ZVFHMIN-LABEL: vfma_vv_v16f16_unmasked: 353; ZVFHMIN: # %bb.0: 354; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 355; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 356; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 357; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 358; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 359; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16 360; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 361; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 362; ZVFHMIN-NEXT: ret 363 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 true), i32 %evl) 364 ret <16 x half> %v 365} 366 367define <16 x half> @vfma_vf_v16f16(<16 x half> %va, half %b, <16 x half> %vc, <16 x i1> %m, i32 zeroext %evl) { 368; ZVFH-LABEL: vfma_vf_v16f16: 369; ZVFH: # %bb.0: 370; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 371; ZVFH-NEXT: vfmadd.vf v8, fa0, v10, v0.t 372; ZVFH-NEXT: ret 373; 374; ZVFHMIN-LABEL: vfma_vf_v16f16: 375; ZVFHMIN: # %bb.0: 376; ZVFHMIN-NEXT: fmv.x.h a1, fa0 377; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 378; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t 379; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 380; ZVFHMIN-NEXT: vmv.v.x v10, a1 381; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 382; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t 383; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t 384; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 385; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12, v0.t 386; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 387; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t 388; ZVFHMIN-NEXT: ret 389 %elt.head = insertelement <16 x half> poison, half %b, i32 0 390 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer 391 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl) 392 ret <16 x half> %v 393} 394 395define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half> %vc, i32 zeroext %evl) { 396; ZVFH-LABEL: vfma_vf_v16f16_unmasked: 397; ZVFH: # %bb.0: 398; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 399; ZVFH-NEXT: vfmadd.vf v8, fa0, v10 400; ZVFH-NEXT: ret 401; 402; ZVFHMIN-LABEL: vfma_vf_v16f16_unmasked: 403; ZVFHMIN: # %bb.0: 404; ZVFHMIN-NEXT: fmv.x.h a1, fa0 405; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 406; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 407; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 408; ZVFHMIN-NEXT: vmv.v.x v10, a1 409; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 410; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 411; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 412; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 413; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12 414; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 415; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 416; ZVFHMIN-NEXT: ret 417 %elt.head = insertelement <16 x half> poison, half %b, i32 0 418 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer 419 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> splat (i1 true), i32 %evl) 420 ret <16 x half> %v 421} 422 423declare <2 x float> @llvm.vp.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32) 424 425define <2 x float> @vfma_vv_v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { 426; CHECK-LABEL: vfma_vv_v2f32: 427; CHECK: # %bb.0: 428; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 429; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t 430; CHECK-NEXT: vmv1r.v v8, v9 431; CHECK-NEXT: ret 432 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl) 433 ret <2 x float> %v 434} 435 436define <2 x float> @vfma_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, <2 x float> %c, i32 zeroext %evl) { 437; CHECK-LABEL: vfma_vv_v2f32_unmasked: 438; CHECK: # %bb.0: 439; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 440; CHECK-NEXT: vfmadd.vv v8, v9, v10 441; CHECK-NEXT: ret 442 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 true), i32 %evl) 443 ret <2 x float> %v 444} 445 446define <2 x float> @vfma_vf_v2f32(<2 x float> %va, float %b, <2 x float> %vc, <2 x i1> %m, i32 zeroext %evl) { 447; CHECK-LABEL: vfma_vf_v2f32: 448; CHECK: # %bb.0: 449; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 450; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 451; CHECK-NEXT: ret 452 %elt.head = insertelement <2 x float> poison, float %b, i32 0 453 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer 454 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl) 455 ret <2 x float> %v 456} 457 458define <2 x float> @vfma_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x float> %vc, i32 zeroext %evl) { 459; CHECK-LABEL: vfma_vf_v2f32_unmasked: 460; CHECK: # %bb.0: 461; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 462; CHECK-NEXT: vfmadd.vf v8, fa0, v9 463; CHECK-NEXT: ret 464 %elt.head = insertelement <2 x float> poison, float %b, i32 0 465 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer 466 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> splat (i1 true), i32 %evl) 467 ret <2 x float> %v 468} 469 470declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32) 471 472define <4 x float> @vfma_vv_v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { 473; CHECK-LABEL: vfma_vv_v4f32: 474; CHECK: # %bb.0: 475; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 476; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t 477; CHECK-NEXT: vmv.v.v v8, v9 478; CHECK-NEXT: ret 479 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl) 480 ret <4 x float> %v 481} 482 483define <4 x float> @vfma_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, <4 x float> %c, i32 zeroext %evl) { 484; CHECK-LABEL: vfma_vv_v4f32_unmasked: 485; CHECK: # %bb.0: 486; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 487; CHECK-NEXT: vfmadd.vv v8, v9, v10 488; CHECK-NEXT: ret 489 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 true), i32 %evl) 490 ret <4 x float> %v 491} 492 493define <4 x float> @vfma_vf_v4f32(<4 x float> %va, float %b, <4 x float> %vc, <4 x i1> %m, i32 zeroext %evl) { 494; CHECK-LABEL: vfma_vf_v4f32: 495; CHECK: # %bb.0: 496; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 497; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 498; CHECK-NEXT: ret 499 %elt.head = insertelement <4 x float> poison, float %b, i32 0 500 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer 501 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl) 502 ret <4 x float> %v 503} 504 505define <4 x float> @vfma_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x float> %vc, i32 zeroext %evl) { 506; CHECK-LABEL: vfma_vf_v4f32_unmasked: 507; CHECK: # %bb.0: 508; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 509; CHECK-NEXT: vfmadd.vf v8, fa0, v9 510; CHECK-NEXT: ret 511 %elt.head = insertelement <4 x float> poison, float %b, i32 0 512 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer 513 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> splat (i1 true), i32 %evl) 514 ret <4 x float> %v 515} 516 517declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) 518 519define <8 x float> @vfma_vv_v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { 520; CHECK-LABEL: vfma_vv_v8f32: 521; CHECK: # %bb.0: 522; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 523; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t 524; CHECK-NEXT: vmv.v.v v8, v10 525; CHECK-NEXT: ret 526 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl) 527 ret <8 x float> %v 528} 529 530define <8 x float> @vfma_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, <8 x float> %c, i32 zeroext %evl) { 531; CHECK-LABEL: vfma_vv_v8f32_unmasked: 532; CHECK: # %bb.0: 533; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 534; CHECK-NEXT: vfmadd.vv v8, v10, v12 535; CHECK-NEXT: ret 536 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 true), i32 %evl) 537 ret <8 x float> %v 538} 539 540define <8 x float> @vfma_vf_v8f32(<8 x float> %va, float %b, <8 x float> %vc, <8 x i1> %m, i32 zeroext %evl) { 541; CHECK-LABEL: vfma_vf_v8f32: 542; CHECK: # %bb.0: 543; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 544; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t 545; CHECK-NEXT: ret 546 %elt.head = insertelement <8 x float> poison, float %b, i32 0 547 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer 548 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl) 549 ret <8 x float> %v 550} 551 552define <8 x float> @vfma_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x float> %vc, i32 zeroext %evl) { 553; CHECK-LABEL: vfma_vf_v8f32_unmasked: 554; CHECK: # %bb.0: 555; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 556; CHECK-NEXT: vfmadd.vf v8, fa0, v10 557; CHECK-NEXT: ret 558 %elt.head = insertelement <8 x float> poison, float %b, i32 0 559 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer 560 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> splat (i1 true), i32 %evl) 561 ret <8 x float> %v 562} 563 564declare <16 x float> @llvm.vp.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32) 565 566define <16 x float> @vfma_vv_v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { 567; CHECK-LABEL: vfma_vv_v16f32: 568; CHECK: # %bb.0: 569; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 570; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t 571; CHECK-NEXT: vmv.v.v v8, v12 572; CHECK-NEXT: ret 573 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl) 574 ret <16 x float> %v 575} 576 577define <16 x float> @vfma_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, <16 x float> %c, i32 zeroext %evl) { 578; CHECK-LABEL: vfma_vv_v16f32_unmasked: 579; CHECK: # %bb.0: 580; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 581; CHECK-NEXT: vfmadd.vv v8, v12, v16 582; CHECK-NEXT: ret 583 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 true), i32 %evl) 584 ret <16 x float> %v 585} 586 587define <16 x float> @vfma_vf_v16f32(<16 x float> %va, float %b, <16 x float> %vc, <16 x i1> %m, i32 zeroext %evl) { 588; CHECK-LABEL: vfma_vf_v16f32: 589; CHECK: # %bb.0: 590; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 591; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t 592; CHECK-NEXT: ret 593 %elt.head = insertelement <16 x float> poison, float %b, i32 0 594 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer 595 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl) 596 ret <16 x float> %v 597} 598 599define <16 x float> @vfma_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x float> %vc, i32 zeroext %evl) { 600; CHECK-LABEL: vfma_vf_v16f32_unmasked: 601; CHECK: # %bb.0: 602; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 603; CHECK-NEXT: vfmadd.vf v8, fa0, v12 604; CHECK-NEXT: ret 605 %elt.head = insertelement <16 x float> poison, float %b, i32 0 606 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer 607 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> splat (i1 true), i32 %evl) 608 ret <16 x float> %v 609} 610 611declare <2 x double> @llvm.vp.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32) 612 613define <2 x double> @vfma_vv_v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { 614; CHECK-LABEL: vfma_vv_v2f64: 615; CHECK: # %bb.0: 616; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 617; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t 618; CHECK-NEXT: vmv.v.v v8, v9 619; CHECK-NEXT: ret 620 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl) 621 ret <2 x double> %v 622} 623 624define <2 x double> @vfma_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, <2 x double> %c, i32 zeroext %evl) { 625; CHECK-LABEL: vfma_vv_v2f64_unmasked: 626; CHECK: # %bb.0: 627; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 628; CHECK-NEXT: vfmadd.vv v8, v9, v10 629; CHECK-NEXT: ret 630 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 true), i32 %evl) 631 ret <2 x double> %v 632} 633 634define <2 x double> @vfma_vf_v2f64(<2 x double> %va, double %b, <2 x double> %vc, <2 x i1> %m, i32 zeroext %evl) { 635; CHECK-LABEL: vfma_vf_v2f64: 636; CHECK: # %bb.0: 637; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 638; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t 639; CHECK-NEXT: ret 640 %elt.head = insertelement <2 x double> poison, double %b, i32 0 641 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer 642 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl) 643 ret <2 x double> %v 644} 645 646define <2 x double> @vfma_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x double> %vc, i32 zeroext %evl) { 647; CHECK-LABEL: vfma_vf_v2f64_unmasked: 648; CHECK: # %bb.0: 649; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 650; CHECK-NEXT: vfmadd.vf v8, fa0, v9 651; CHECK-NEXT: ret 652 %elt.head = insertelement <2 x double> poison, double %b, i32 0 653 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer 654 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> splat (i1 true), i32 %evl) 655 ret <2 x double> %v 656} 657 658declare <4 x double> @llvm.vp.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32) 659 660define <4 x double> @vfma_vv_v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { 661; CHECK-LABEL: vfma_vv_v4f64: 662; CHECK: # %bb.0: 663; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 664; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t 665; CHECK-NEXT: vmv.v.v v8, v10 666; CHECK-NEXT: ret 667 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl) 668 ret <4 x double> %v 669} 670 671define <4 x double> @vfma_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, <4 x double> %c, i32 zeroext %evl) { 672; CHECK-LABEL: vfma_vv_v4f64_unmasked: 673; CHECK: # %bb.0: 674; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 675; CHECK-NEXT: vfmadd.vv v8, v10, v12 676; CHECK-NEXT: ret 677 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 true), i32 %evl) 678 ret <4 x double> %v 679} 680 681define <4 x double> @vfma_vf_v4f64(<4 x double> %va, double %b, <4 x double> %vc, <4 x i1> %m, i32 zeroext %evl) { 682; CHECK-LABEL: vfma_vf_v4f64: 683; CHECK: # %bb.0: 684; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 685; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t 686; CHECK-NEXT: ret 687 %elt.head = insertelement <4 x double> poison, double %b, i32 0 688 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer 689 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl) 690 ret <4 x double> %v 691} 692 693define <4 x double> @vfma_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x double> %vc, i32 zeroext %evl) { 694; CHECK-LABEL: vfma_vf_v4f64_unmasked: 695; CHECK: # %bb.0: 696; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 697; CHECK-NEXT: vfmadd.vf v8, fa0, v10 698; CHECK-NEXT: ret 699 %elt.head = insertelement <4 x double> poison, double %b, i32 0 700 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer 701 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> splat (i1 true), i32 %evl) 702 ret <4 x double> %v 703} 704 705declare <8 x double> @llvm.vp.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32) 706 707define <8 x double> @vfma_vv_v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { 708; CHECK-LABEL: vfma_vv_v8f64: 709; CHECK: # %bb.0: 710; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 711; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t 712; CHECK-NEXT: vmv.v.v v8, v12 713; CHECK-NEXT: ret 714 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl) 715 ret <8 x double> %v 716} 717 718define <8 x double> @vfma_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, <8 x double> %c, i32 zeroext %evl) { 719; CHECK-LABEL: vfma_vv_v8f64_unmasked: 720; CHECK: # %bb.0: 721; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 722; CHECK-NEXT: vfmadd.vv v8, v12, v16 723; CHECK-NEXT: ret 724 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 true), i32 %evl) 725 ret <8 x double> %v 726} 727 728define <8 x double> @vfma_vf_v8f64(<8 x double> %va, double %b, <8 x double> %vc, <8 x i1> %m, i32 zeroext %evl) { 729; CHECK-LABEL: vfma_vf_v8f64: 730; CHECK: # %bb.0: 731; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 732; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t 733; CHECK-NEXT: ret 734 %elt.head = insertelement <8 x double> poison, double %b, i32 0 735 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer 736 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl) 737 ret <8 x double> %v 738} 739 740define <8 x double> @vfma_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x double> %vc, i32 zeroext %evl) { 741; CHECK-LABEL: vfma_vf_v8f64_unmasked: 742; CHECK: # %bb.0: 743; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 744; CHECK-NEXT: vfmadd.vf v8, fa0, v12 745; CHECK-NEXT: ret 746 %elt.head = insertelement <8 x double> poison, double %b, i32 0 747 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer 748 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> splat (i1 true), i32 %evl) 749 ret <8 x double> %v 750} 751 752declare <15 x double> @llvm.vp.fma.v15f64(<15 x double>, <15 x double>, <15 x double>, <15 x i1>, i32) 753 754define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) { 755; CHECK-LABEL: vfma_vv_v15f64: 756; CHECK: # %bb.0: 757; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 758; CHECK-NEXT: vle64.v v24, (a0) 759; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 760; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t 761; CHECK-NEXT: vmv.v.v v8, v16 762; CHECK-NEXT: ret 763 %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl) 764 ret <15 x double> %v 765} 766 767define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) { 768; CHECK-LABEL: vfma_vv_v15f64_unmasked: 769; CHECK: # %bb.0: 770; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 771; CHECK-NEXT: vle64.v v24, (a0) 772; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 773; CHECK-NEXT: vfmadd.vv v8, v16, v24 774; CHECK-NEXT: ret 775 %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> splat (i1 true), i32 %evl) 776 ret <15 x double> %v 777} 778 779declare <16 x double> @llvm.vp.fma.v16f64(<16 x double>, <16 x double>, <16 x double>, <16 x i1>, i32) 780 781define <16 x double> @vfma_vv_v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 zeroext %evl) { 782; CHECK-LABEL: vfma_vv_v16f64: 783; CHECK: # %bb.0: 784; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 785; CHECK-NEXT: vle64.v v24, (a0) 786; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 787; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t 788; CHECK-NEXT: vmv.v.v v8, v16 789; CHECK-NEXT: ret 790 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl) 791 ret <16 x double> %v 792} 793 794define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %b, <16 x double> %c, i32 zeroext %evl) { 795; CHECK-LABEL: vfma_vv_v16f64_unmasked: 796; CHECK: # %bb.0: 797; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 798; CHECK-NEXT: vle64.v v24, (a0) 799; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma 800; CHECK-NEXT: vfmadd.vv v8, v16, v24 801; CHECK-NEXT: ret 802 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> splat (i1 true), i32 %evl) 803 ret <16 x double> %v 804} 805 806define <16 x double> @vfma_vf_v16f64(<16 x double> %va, double %b, <16 x double> %vc, <16 x i1> %m, i32 zeroext %evl) { 807; CHECK-LABEL: vfma_vf_v16f64: 808; CHECK: # %bb.0: 809; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 810; CHECK-NEXT: vfmadd.vf v8, fa0, v16, v0.t 811; CHECK-NEXT: ret 812 %elt.head = insertelement <16 x double> poison, double %b, i32 0 813 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer 814 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl) 815 ret <16 x double> %v 816} 817 818define <16 x double> @vfma_vf_v16f64_unmasked(<16 x double> %va, double %b, <16 x double> %vc, i32 zeroext %evl) { 819; CHECK-LABEL: vfma_vf_v16f64_unmasked: 820; CHECK: # %bb.0: 821; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 822; CHECK-NEXT: vfmadd.vf v8, fa0, v16 823; CHECK-NEXT: ret 824 %elt.head = insertelement <16 x double> poison, double %b, i32 0 825 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer 826 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> splat (i1 true), i32 %evl) 827 ret <16 x double> %v 828} 829 830declare <32 x double> @llvm.vp.fma.v32f64(<32 x double>, <32 x double>, <32 x double>, <32 x i1>, i32) 831 832define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 zeroext %evl) { 833; CHECK-LABEL: vfma_vv_v32f64: 834; CHECK: # %bb.0: 835; CHECK-NEXT: addi sp, sp, -16 836; CHECK-NEXT: .cfi_def_cfa_offset 16 837; CHECK-NEXT: csrr a1, vlenb 838; CHECK-NEXT: slli a1, a1, 5 839; CHECK-NEXT: sub sp, sp, a1 840; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 841; CHECK-NEXT: csrr a1, vlenb 842; CHECK-NEXT: li a3, 24 843; CHECK-NEXT: mul a1, a1, a3 844; CHECK-NEXT: add a1, sp, a1 845; CHECK-NEXT: addi a1, a1, 16 846; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 847; CHECK-NEXT: csrr a1, vlenb 848; CHECK-NEXT: slli a1, a1, 4 849; CHECK-NEXT: add a1, sp, a1 850; CHECK-NEXT: addi a1, a1, 16 851; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 852; CHECK-NEXT: addi a1, a2, 128 853; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 854; CHECK-NEXT: vle64.v v24, (a2) 855; CHECK-NEXT: addi a2, a0, 128 856; CHECK-NEXT: vle64.v v8, (a1) 857; CHECK-NEXT: csrr a1, vlenb 858; CHECK-NEXT: slli a1, a1, 3 859; CHECK-NEXT: add a1, sp, a1 860; CHECK-NEXT: addi a1, a1, 16 861; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 862; CHECK-NEXT: vle64.v v8, (a2) 863; CHECK-NEXT: addi a1, sp, 16 864; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 865; CHECK-NEXT: vle64.v v8, (a0) 866; CHECK-NEXT: li a1, 16 867; CHECK-NEXT: mv a0, a4 868; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 869; CHECK-NEXT: vslidedown.vi v7, v0, 2 870; CHECK-NEXT: bltu a4, a1, .LBB50_2 871; CHECK-NEXT: # %bb.1: 872; CHECK-NEXT: li a0, 16 873; CHECK-NEXT: .LBB50_2: 874; CHECK-NEXT: csrr a1, vlenb 875; CHECK-NEXT: slli a1, a1, 4 876; CHECK-NEXT: add a1, sp, a1 877; CHECK-NEXT: addi a1, a1, 16 878; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 879; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 880; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t 881; CHECK-NEXT: csrr a0, vlenb 882; CHECK-NEXT: slli a0, a0, 4 883; CHECK-NEXT: add a0, sp, a0 884; CHECK-NEXT: addi a0, a0, 16 885; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 886; CHECK-NEXT: addi a0, a4, -16 887; CHECK-NEXT: sltu a1, a4, a0 888; CHECK-NEXT: addi a1, a1, -1 889; CHECK-NEXT: and a0, a1, a0 890; CHECK-NEXT: vmv1r.v v0, v7 891; CHECK-NEXT: csrr a1, vlenb 892; CHECK-NEXT: li a2, 24 893; CHECK-NEXT: mul a1, a1, a2 894; CHECK-NEXT: add a1, sp, a1 895; CHECK-NEXT: addi a1, a1, 16 896; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 897; CHECK-NEXT: csrr a1, vlenb 898; CHECK-NEXT: slli a1, a1, 3 899; CHECK-NEXT: add a1, sp, a1 900; CHECK-NEXT: addi a1, a1, 16 901; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 902; CHECK-NEXT: addi a1, sp, 16 903; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 904; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 905; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t 906; CHECK-NEXT: vmv.v.v v16, v8 907; CHECK-NEXT: csrr a0, vlenb 908; CHECK-NEXT: slli a0, a0, 4 909; CHECK-NEXT: add a0, sp, a0 910; CHECK-NEXT: addi a0, a0, 16 911; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 912; CHECK-NEXT: csrr a0, vlenb 913; CHECK-NEXT: slli a0, a0, 5 914; CHECK-NEXT: add sp, sp, a0 915; CHECK-NEXT: .cfi_def_cfa sp, 16 916; CHECK-NEXT: addi sp, sp, 16 917; CHECK-NEXT: .cfi_def_cfa_offset 0 918; CHECK-NEXT: ret 919 %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl) 920 ret <32 x double> %v 921} 922 923define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %b, <32 x double> %c, i32 zeroext %evl) { 924; CHECK-LABEL: vfma_vv_v32f64_unmasked: 925; CHECK: # %bb.0: 926; CHECK-NEXT: addi sp, sp, -16 927; CHECK-NEXT: .cfi_def_cfa_offset 16 928; CHECK-NEXT: csrr a1, vlenb 929; CHECK-NEXT: li a3, 24 930; CHECK-NEXT: mul a1, a1, a3 931; CHECK-NEXT: sub sp, sp, a1 932; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 933; CHECK-NEXT: csrr a1, vlenb 934; CHECK-NEXT: slli a1, a1, 4 935; CHECK-NEXT: add a1, sp, a1 936; CHECK-NEXT: addi a1, a1, 16 937; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 938; CHECK-NEXT: csrr a1, vlenb 939; CHECK-NEXT: slli a1, a1, 3 940; CHECK-NEXT: add a1, sp, a1 941; CHECK-NEXT: addi a1, a1, 16 942; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 943; CHECK-NEXT: addi a1, a2, 128 944; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 945; CHECK-NEXT: vle64.v v16, (a2) 946; CHECK-NEXT: addi a2, a0, 128 947; CHECK-NEXT: vle64.v v8, (a1) 948; CHECK-NEXT: addi a1, sp, 16 949; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 950; CHECK-NEXT: vle64.v v24, (a2) 951; CHECK-NEXT: vle64.v v0, (a0) 952; CHECK-NEXT: li a1, 16 953; CHECK-NEXT: mv a0, a4 954; CHECK-NEXT: bltu a4, a1, .LBB51_2 955; CHECK-NEXT: # %bb.1: 956; CHECK-NEXT: li a0, 16 957; CHECK-NEXT: .LBB51_2: 958; CHECK-NEXT: csrr a1, vlenb 959; CHECK-NEXT: slli a1, a1, 3 960; CHECK-NEXT: add a1, sp, a1 961; CHECK-NEXT: addi a1, a1, 16 962; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 963; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 964; CHECK-NEXT: vfmadd.vv v0, v8, v16 965; CHECK-NEXT: addi a0, a4, -16 966; CHECK-NEXT: sltu a1, a4, a0 967; CHECK-NEXT: addi a1, a1, -1 968; CHECK-NEXT: and a0, a1, a0 969; CHECK-NEXT: csrr a1, vlenb 970; CHECK-NEXT: slli a1, a1, 4 971; CHECK-NEXT: add a1, sp, a1 972; CHECK-NEXT: addi a1, a1, 16 973; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 974; CHECK-NEXT: addi a1, sp, 16 975; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 976; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 977; CHECK-NEXT: vfmadd.vv v24, v16, v8 978; CHECK-NEXT: vmv8r.v v8, v0 979; CHECK-NEXT: vmv.v.v v16, v24 980; CHECK-NEXT: csrr a0, vlenb 981; CHECK-NEXT: li a1, 24 982; CHECK-NEXT: mul a0, a0, a1 983; CHECK-NEXT: add sp, sp, a0 984; CHECK-NEXT: .cfi_def_cfa sp, 16 985; CHECK-NEXT: addi sp, sp, 16 986; CHECK-NEXT: .cfi_def_cfa_offset 0 987; CHECK-NEXT: ret 988 %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> splat (i1 true), i32 %evl) 989 ret <32 x double> %v 990} 991