1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA 3; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA 4; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN 5; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN 6 7define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x bfloat> %c) { 8; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v1f32: 9; ZVFBFWMA: # %bb.0: 10; ZVFBFWMA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 11; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10 12; ZVFBFWMA-NEXT: ret 13; 14; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v1f32: 15; ZVFBFMIN: # %bb.0: 16; ZVFBFMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 17; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9 18; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10 19; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 20; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9 21; ZVFBFMIN-NEXT: ret 22 %b.ext = fpext <1 x bfloat> %b to <1 x float> 23 %c.ext = fpext <1 x bfloat> %c to <1 x float> 24 %res = call <1 x float> @llvm.fma.v1f32(<1 x float> %b.ext, <1 x float> %c.ext, <1 x float> %a) 25 ret <1 x float> %res 26} 27 28define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> %c) { 29; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v1f32: 30; ZVFBFWMA: # %bb.0: 31; ZVFBFWMA-NEXT: fmv.x.h a0, fa0 32; ZVFBFWMA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 33; ZVFBFWMA-NEXT: vmv.s.x v10, a0 34; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v9 35; ZVFBFWMA-NEXT: ret 36; 37; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v1f32: 38; ZVFBFMIN: # %bb.0: 39; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 40; ZVFBFMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 41; ZVFBFMIN-NEXT: vmv.s.x v10, a0 42; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v10 43; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 44; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 45; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v10 46; ZVFBFMIN-NEXT: ret 47 %b.head = insertelement <1 x bfloat> poison, bfloat %b, i32 0 48 %b.splat = shufflevector <1 x bfloat> %b.head, <1 x bfloat> poison, <1 x i32> zeroinitializer 49 %b.ext = fpext <1 x bfloat> %b.splat to <1 x float> 50 %c.ext = fpext <1 x bfloat> %c to <1 x float> 51 %res = call <1 x float> @llvm.fma.v1f32(<1 x float> %b.ext, <1 x float> %c.ext, <1 x float> %a) 52 ret <1 x float> %res 53} 54 55define <2 x float> @vfwmaccbf16_vv_v2f32(<2 x float> %a, <2 x bfloat> %b, <2 x bfloat> %c) { 56; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v2f32: 57; ZVFBFWMA: # %bb.0: 58; ZVFBFWMA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 59; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10 60; ZVFBFWMA-NEXT: ret 61; 62; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v2f32: 63; ZVFBFMIN: # %bb.0: 64; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 65; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9 66; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10 67; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 68; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9 69; ZVFBFMIN-NEXT: ret 70 %b.ext = fpext <2 x bfloat> %b to <2 x float> 71 %c.ext = fpext <2 x bfloat> %c to <2 x float> 72 %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %b.ext, <2 x float> %c.ext, <2 x float> %a) 73 ret <2 x float> %res 74} 75 76define <2 x float> @vfwmaccbf16_vf_v2f32(<2 x float> %a, bfloat %b, <2 x bfloat> %c) { 77; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v2f32: 78; ZVFBFWMA: # %bb.0: 79; ZVFBFWMA-NEXT: fmv.x.h a0, fa0 80; ZVFBFWMA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 81; ZVFBFWMA-NEXT: vmv.v.x v10, a0 82; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v9 83; ZVFBFWMA-NEXT: ret 84; 85; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v2f32: 86; ZVFBFMIN: # %bb.0: 87; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 88; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 89; ZVFBFMIN-NEXT: vmv.v.x v10, a0 90; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v10 91; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 92; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 93; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v10 94; ZVFBFMIN-NEXT: ret 95 %b.head = insertelement <2 x bfloat> poison, bfloat %b, i32 0 96 %b.splat = shufflevector <2 x bfloat> %b.head, <2 x bfloat> poison, <2 x i32> zeroinitializer 97 %b.ext = fpext <2 x bfloat> %b.splat to <2 x float> 98 %c.ext = fpext <2 x bfloat> %c to <2 x float> 99 %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %b.ext, <2 x float> %c.ext, <2 x float> %a) 100 ret <2 x float> %res 101} 102 103define <4 x float> @vfwmaccbf16_vv_v4f32(<4 x float> %a, <4 x bfloat> %b, <4 x bfloat> %c) { 104; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v4f32: 105; ZVFBFWMA: # %bb.0: 106; ZVFBFWMA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 107; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10 108; ZVFBFWMA-NEXT: ret 109; 110; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v4f32: 111; ZVFBFMIN: # %bb.0: 112; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 113; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9 114; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10 115; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 116; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9 117; ZVFBFMIN-NEXT: ret 118 %b.ext = fpext <4 x bfloat> %b to <4 x float> 119 %c.ext = fpext <4 x bfloat> %c to <4 x float> 120 %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %b.ext, <4 x float> %c.ext, <4 x float> %a) 121 ret <4 x float> %res 122} 123 124define <4 x float> @vfwmaccbf16_vf_v4f32(<4 x float> %a, bfloat %b, <4 x bfloat> %c) { 125; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v4f32: 126; ZVFBFWMA: # %bb.0: 127; ZVFBFWMA-NEXT: fmv.x.h a0, fa0 128; ZVFBFWMA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 129; ZVFBFWMA-NEXT: vmv.v.x v10, a0 130; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v9 131; ZVFBFWMA-NEXT: ret 132; 133; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v4f32: 134; ZVFBFMIN: # %bb.0: 135; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 136; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 137; ZVFBFMIN-NEXT: vmv.v.x v10, a0 138; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v10 139; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 140; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 141; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v10 142; ZVFBFMIN-NEXT: ret 143 %b.head = insertelement <4 x bfloat> poison, bfloat %b, i32 0 144 %b.splat = shufflevector <4 x bfloat> %b.head, <4 x bfloat> poison, <4 x i32> zeroinitializer 145 %b.ext = fpext <4 x bfloat> %b.splat to <4 x float> 146 %c.ext = fpext <4 x bfloat> %c to <4 x float> 147 %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %b.ext, <4 x float> %c.ext, <4 x float> %a) 148 ret <4 x float> %res 149} 150 151define <8 x float> @vfwmaccbf16_vv_v8f32(<8 x float> %a, <8 x bfloat> %b, <8 x bfloat> %c) { 152; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v8f32: 153; ZVFBFWMA: # %bb.0: 154; ZVFBFWMA-NEXT: vsetivli zero, 8, e16, m1, ta, ma 155; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v11 156; ZVFBFWMA-NEXT: ret 157; 158; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v8f32: 159; ZVFBFMIN: # %bb.0: 160; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 161; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10 162; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v14, v11 163; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 164; ZVFBFMIN-NEXT: vfmacc.vv v8, v12, v14 165; ZVFBFMIN-NEXT: ret 166 %b.ext = fpext <8 x bfloat> %b to <8 x float> 167 %c.ext = fpext <8 x bfloat> %c to <8 x float> 168 %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %b.ext, <8 x float> %c.ext, <8 x float> %a) 169 ret <8 x float> %res 170} 171 172define <8 x float> @vfwmaccbf16_vf_v8f32(<8 x float> %a, bfloat %b, <8 x bfloat> %c) { 173; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v8f32: 174; ZVFBFWMA: # %bb.0: 175; ZVFBFWMA-NEXT: fmv.x.h a0, fa0 176; ZVFBFWMA-NEXT: vsetivli zero, 8, e16, m1, ta, ma 177; ZVFBFWMA-NEXT: vmv.v.x v11, a0 178; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v11, v10 179; ZVFBFWMA-NEXT: ret 180; 181; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v8f32: 182; ZVFBFMIN: # %bb.0: 183; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 184; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 185; ZVFBFMIN-NEXT: vmv.v.x v11, a0 186; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v11 187; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v14, v10 188; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 189; ZVFBFMIN-NEXT: vfmacc.vv v8, v12, v14 190; ZVFBFMIN-NEXT: ret 191 %b.head = insertelement <8 x bfloat> poison, bfloat %b, i32 0 192 %b.splat = shufflevector <8 x bfloat> %b.head, <8 x bfloat> poison, <8 x i32> zeroinitializer 193 %b.ext = fpext <8 x bfloat> %b.splat to <8 x float> 194 %c.ext = fpext <8 x bfloat> %c to <8 x float> 195 %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %b.ext, <8 x float> %c.ext, <8 x float> %a) 196 ret <8 x float> %res 197} 198 199define <16 x float> @vfwmaccbf16_vv_v16f32(<16 x float> %a, <16 x bfloat> %b, <16 x bfloat> %c) { 200; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v16f32: 201; ZVFBFWMA: # %bb.0: 202; ZVFBFWMA-NEXT: vsetivli zero, 16, e16, m2, ta, ma 203; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v12, v14 204; ZVFBFWMA-NEXT: ret 205; 206; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v16f32: 207; ZVFBFMIN: # %bb.0: 208; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 209; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 210; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v20, v14 211; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 212; ZVFBFMIN-NEXT: vfmacc.vv v8, v16, v20 213; ZVFBFMIN-NEXT: ret 214 %b.ext = fpext <16 x bfloat> %b to <16 x float> 215 %c.ext = fpext <16 x bfloat> %c to <16 x float> 216 %res = call <16 x float> @llvm.fma.v16f32(<16 x float> %b.ext, <16 x float> %c.ext, <16 x float> %a) 217 ret <16 x float> %res 218} 219 220define <16 x float> @vfwmaccbf16_vf_v16f32(<16 x float> %a, bfloat %b, <16 x bfloat> %c) { 221; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v16f32: 222; ZVFBFWMA: # %bb.0: 223; ZVFBFWMA-NEXT: fmv.x.h a0, fa0 224; ZVFBFWMA-NEXT: vsetivli zero, 16, e16, m2, ta, ma 225; ZVFBFWMA-NEXT: vmv.v.x v14, a0 226; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v14, v12 227; ZVFBFWMA-NEXT: ret 228; 229; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v16f32: 230; ZVFBFMIN: # %bb.0: 231; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 232; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 233; ZVFBFMIN-NEXT: vmv.v.x v14, a0 234; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v14 235; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v20, v12 236; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 237; ZVFBFMIN-NEXT: vfmacc.vv v8, v16, v20 238; ZVFBFMIN-NEXT: ret 239 %b.head = insertelement <16 x bfloat> poison, bfloat %b, i32 0 240 %b.splat = shufflevector <16 x bfloat> %b.head, <16 x bfloat> poison, <16 x i32> zeroinitializer 241 %b.ext = fpext <16 x bfloat> %b.splat to <16 x float> 242 %c.ext = fpext <16 x bfloat> %c to <16 x float> 243 %res = call <16 x float> @llvm.fma.v16f32(<16 x float> %b.ext, <16 x float> %c.ext, <16 x float> %a) 244 ret <16 x float> %res 245} 246 247define <32 x float> @vfwmaccbf32_vv_v32f32(<32 x float> %a, <32 x bfloat> %b, <32 x bfloat> %c) { 248; ZVFBFWMA-LABEL: vfwmaccbf32_vv_v32f32: 249; ZVFBFWMA: # %bb.0: 250; ZVFBFWMA-NEXT: li a0, 32 251; ZVFBFWMA-NEXT: vsetvli zero, a0, e16, m4, ta, ma 252; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v16, v20 253; ZVFBFWMA-NEXT: ret 254; 255; ZVFBFMIN-LABEL: vfwmaccbf32_vv_v32f32: 256; ZVFBFMIN: # %bb.0: 257; ZVFBFMIN-NEXT: li a0, 32 258; ZVFBFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 259; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v16 260; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v20 261; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 262; ZVFBFMIN-NEXT: vfmacc.vv v8, v24, v0 263; ZVFBFMIN-NEXT: ret 264 %b.ext = fpext <32 x bfloat> %b to <32 x float> 265 %c.ext = fpext <32 x bfloat> %c to <32 x float> 266 %res = call <32 x float> @llvm.fma.v32f32(<32 x float> %b.ext, <32 x float> %c.ext, <32 x float> %a) 267 ret <32 x float> %res 268} 269 270define <32 x float> @vfwmaccbf32_vf_v32f32(<32 x float> %a, bfloat %b, <32 x bfloat> %c) { 271; ZVFBFWMA-LABEL: vfwmaccbf32_vf_v32f32: 272; ZVFBFWMA: # %bb.0: 273; ZVFBFWMA-NEXT: fmv.x.h a0, fa0 274; ZVFBFWMA-NEXT: li a1, 32 275; ZVFBFWMA-NEXT: vsetvli zero, a1, e16, m4, ta, ma 276; ZVFBFWMA-NEXT: vmv.v.x v20, a0 277; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v20, v16 278; ZVFBFWMA-NEXT: ret 279; 280; ZVFBFMIN-LABEL: vfwmaccbf32_vf_v32f32: 281; ZVFBFMIN: # %bb.0: 282; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 283; ZVFBFMIN-NEXT: li a1, 32 284; ZVFBFMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma 285; ZVFBFMIN-NEXT: vmv.v.x v20, a0 286; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v20 287; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v16 288; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 289; ZVFBFMIN-NEXT: vfmacc.vv v8, v24, v0 290; ZVFBFMIN-NEXT: ret 291 %b.head = insertelement <32 x bfloat> poison, bfloat %b, i32 0 292 %b.splat = shufflevector <32 x bfloat> %b.head, <32 x bfloat> poison, <32 x i32> zeroinitializer 293 %b.ext = fpext <32 x bfloat> %b.splat to <32 x float> 294 %c.ext = fpext <32 x bfloat> %c to <32 x float> 295 %res = call <32 x float> @llvm.fma.v32f32(<32 x float> %b.ext, <32 x float> %c.ext, <32 x float> %a) 296 ret <32 x float> %res 297} 298