1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,ZVFH 3; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 4 5define <vscale x 2 x float> @vfwadd_same_operand_nxv2bf16(<vscale x 2 x bfloat> %arg, i32 signext %vl) { 6; CHECK-LABEL: vfwadd_same_operand_nxv2bf16: 7; CHECK: # %bb.0: # %bb 8; CHECK-NEXT: slli a0, a0, 32 9; CHECK-NEXT: srli a0, a0, 32 10; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 11; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 12; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 13; CHECK-NEXT: vfadd.vv v8, v9, v9 14; CHECK-NEXT: ret 15bb: 16 %tmp = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2bf16(<vscale x 2 x bfloat> %arg, <vscale x 2 x i1> splat (i1 true), i32 %vl) 17 %tmp2 = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %tmp, <vscale x 2 x float> %tmp, <vscale x 2 x i1> splat (i1 true), i32 %vl) 18 ret <vscale x 2 x float> %tmp2 19} 20 21; Make sure we don't widen vfmadd.vv -> vfwmaccvbf16.vv if there's other 22; unwidenable uses 23define <vscale x 2 x float> @vfwadd_same_operand_nxv2bf16_multiuse(<vscale x 2 x bfloat> %arg, <vscale x 2 x float> %acc, i32 signext %vl, ptr %p) { 24; CHECK-LABEL: vfwadd_same_operand_nxv2bf16_multiuse: 25; CHECK: # %bb.0: # %bb 26; CHECK-NEXT: slli a0, a0, 32 27; CHECK-NEXT: srli a0, a0, 32 28; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 29; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 30; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 31; CHECK-NEXT: vfadd.vv v8, v10, v10 32; CHECK-NEXT: vfmadd.vv v10, v10, v9 33; CHECK-NEXT: vs1r.v v10, (a1) 34; CHECK-NEXT: ret 35bb: 36 %tmp = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2bf16(<vscale x 2 x bfloat> %arg, <vscale x 2 x i1> splat (i1 true), i32 %vl) 37 %tmp2 = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %tmp, <vscale x 2 x float> %tmp, <vscale x 2 x i1> splat (i1 true), i32 %vl) 38 %tmp3 = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %tmp, <vscale x 2 x float> %tmp, <vscale x 2 x float> %acc, <vscale x 2 x i1> splat (i1 true), i32 %vl) 39 store <vscale x 2 x float> %tmp3, ptr %p 40 ret <vscale x 2 x float> %tmp2 41} 42 43define <vscale x 2 x float> @vfwadd_same_operand(<vscale x 2 x half> %arg, i32 signext %vl) { 44; ZVFH-LABEL: vfwadd_same_operand: 45; ZVFH: # %bb.0: # %bb 46; ZVFH-NEXT: slli a0, a0, 32 47; ZVFH-NEXT: srli a0, a0, 32 48; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 49; ZVFH-NEXT: vfwadd.vv v9, v8, v8 50; ZVFH-NEXT: vmv1r.v v8, v9 51; ZVFH-NEXT: ret 52; 53; ZVFHMIN-LABEL: vfwadd_same_operand: 54; ZVFHMIN: # %bb.0: # %bb 55; ZVFHMIN-NEXT: slli a0, a0, 32 56; ZVFHMIN-NEXT: srli a0, a0, 32 57; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 58; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 59; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 60; ZVFHMIN-NEXT: vfadd.vv v8, v9, v9 61; ZVFHMIN-NEXT: ret 62bb: 63 %tmp = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %arg, <vscale x 2 x i1> splat (i1 true), i32 %vl) 64 %tmp2 = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %tmp, <vscale x 2 x float> %tmp, <vscale x 2 x i1> splat (i1 true), i32 %vl) 65 ret <vscale x 2 x float> %tmp2 66} 67 68define <vscale x 2 x float> @vfwadd_tu(<vscale x 2 x half> %arg, <vscale x 2 x float> %arg1, i32 signext %arg2) { 69; ZVFH-LABEL: vfwadd_tu: 70; ZVFH: # %bb.0: # %bb 71; ZVFH-NEXT: slli a0, a0, 32 72; ZVFH-NEXT: srli a0, a0, 32 73; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma 74; ZVFH-NEXT: vfwadd.wv v9, v9, v8 75; ZVFH-NEXT: vmv1r.v v8, v9 76; ZVFH-NEXT: ret 77; 78; ZVFHMIN-LABEL: vfwadd_tu: 79; ZVFHMIN: # %bb.0: # %bb 80; ZVFHMIN-NEXT: slli a0, a0, 32 81; ZVFHMIN-NEXT: srli a0, a0, 32 82; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 83; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 84; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, tu, ma 85; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 86; ZVFHMIN-NEXT: vmv1r.v v8, v9 87; ZVFHMIN-NEXT: ret 88bb: 89 %tmp = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %arg, <vscale x 2 x i1> splat (i1 true), i32 %arg2) 90 %tmp3 = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %arg1, <vscale x 2 x float> %tmp, <vscale x 2 x i1> splat (i1 true), i32 %arg2) 91 %tmp4 = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x float> %tmp3, <vscale x 2 x float> %arg1, i32 %arg2) 92 ret <vscale x 2 x float> %tmp4 93} 94 95declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32) 96declare <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32) 97declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32) 98