18a71f44eSAlex Bradbury; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 232597685SJianjian Guan; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin -verify-machineinstrs \ 38a71f44eSAlex Bradbury; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK,RV32IZFBFMIN %s 432597685SJianjian Guan; RUN: llc -mtriple=riscv64 -mattr=+zfbfmin -verify-machineinstrs \ 58a71f44eSAlex Bradbury; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK,RV64IZFBFMIN %s 68a71f44eSAlex Bradbury 78a71f44eSAlex Bradburydefine bfloat @flh(ptr %a) nounwind { 88a71f44eSAlex Bradbury; CHECK-LABEL: flh: 98a71f44eSAlex Bradbury; CHECK: # %bb.0: 108a71f44eSAlex Bradbury; CHECK-NEXT: flh fa5, 6(a0) 118a71f44eSAlex Bradbury; CHECK-NEXT: flh fa4, 0(a0) 128a71f44eSAlex Bradbury; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 138a71f44eSAlex Bradbury; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 148a71f44eSAlex Bradbury; CHECK-NEXT: fadd.s fa5, fa4, fa5 158a71f44eSAlex Bradbury; CHECK-NEXT: fcvt.bf16.s fa0, fa5 168a71f44eSAlex Bradbury; CHECK-NEXT: ret 178a71f44eSAlex Bradbury %1 = load bfloat, ptr %a 188a71f44eSAlex Bradbury %2 = getelementptr bfloat, ptr %a, i32 3 198a71f44eSAlex Bradbury %3 = load bfloat, ptr %2 208a71f44eSAlex Bradbury; Use both loaded values in an FP op to ensure an flh is used, even for the 218a71f44eSAlex Bradbury; soft bfloat ABI 228a71f44eSAlex Bradbury %4 = fadd bfloat %1, %3 238a71f44eSAlex Bradbury ret bfloat %4 248a71f44eSAlex Bradbury} 258a71f44eSAlex Bradbury 268a71f44eSAlex Bradburydefine dso_local void @fsh(ptr %a, bfloat %b, bfloat %c) nounwind { 278a71f44eSAlex Bradbury; CHECK-LABEL: fsh: 288a71f44eSAlex Bradbury; CHECK: # %bb.0: 298a71f44eSAlex Bradbury; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 308a71f44eSAlex Bradbury; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 318a71f44eSAlex Bradbury; CHECK-NEXT: fadd.s fa5, fa4, fa5 328a71f44eSAlex Bradbury; CHECK-NEXT: fcvt.bf16.s fa5, fa5 338a71f44eSAlex Bradbury; CHECK-NEXT: fsh fa5, 0(a0) 348a71f44eSAlex Bradbury; CHECK-NEXT: fsh fa5, 16(a0) 358a71f44eSAlex Bradbury; CHECK-NEXT: ret 368a71f44eSAlex Bradbury %1 = fadd bfloat %b, %c 378a71f44eSAlex Bradbury store bfloat %1, ptr %a 388a71f44eSAlex Bradbury %2 = getelementptr bfloat, ptr %a, i32 8 398a71f44eSAlex Bradbury store bfloat %1, ptr %2 408a71f44eSAlex Bradbury ret void 418a71f44eSAlex Bradbury} 428a71f44eSAlex Bradbury 438a71f44eSAlex Bradbury; Check load and store to a global 448a71f44eSAlex Bradbury@G = dso_local global bfloat 0.0 458a71f44eSAlex Bradbury 468a71f44eSAlex Bradburydefine bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind { 478a71f44eSAlex Bradbury; Use %a and %b in an FP op to ensure bfloat precision floating point registers 488a71f44eSAlex Bradbury; are used, even for the soft bfloat ABI 498a71f44eSAlex Bradbury; CHECK-LABEL: flh_fsh_global: 508a71f44eSAlex Bradbury; CHECK: # %bb.0: 518a71f44eSAlex Bradbury; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 528a71f44eSAlex Bradbury; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 538a71f44eSAlex Bradbury; CHECK-NEXT: lui a0, %hi(G) 54*9122c523SPengcheng Wang; CHECK-NEXT: fadd.s fa5, fa4, fa5 55*9122c523SPengcheng Wang; CHECK-NEXT: flh fa4, %lo(G)(a0) 56*9122c523SPengcheng Wang; CHECK-NEXT: fcvt.bf16.s fa0, fa5 578a71f44eSAlex Bradbury; CHECK-NEXT: addi a1, a0, %lo(G) 588a71f44eSAlex Bradbury; CHECK-NEXT: fsh fa0, %lo(G)(a0) 598a71f44eSAlex Bradbury; CHECK-NEXT: flh fa5, 18(a1) 608a71f44eSAlex Bradbury; CHECK-NEXT: fsh fa0, 18(a1) 618a71f44eSAlex Bradbury; CHECK-NEXT: ret 628a71f44eSAlex Bradbury %1 = fadd bfloat %a, %b 638a71f44eSAlex Bradbury %2 = load volatile bfloat, ptr @G 648a71f44eSAlex Bradbury store bfloat %1, ptr @G 658a71f44eSAlex Bradbury %3 = getelementptr bfloat, ptr @G, i32 9 668a71f44eSAlex Bradbury %4 = load volatile bfloat, ptr %3 678a71f44eSAlex Bradbury store bfloat %1, ptr %3 688a71f44eSAlex Bradbury ret bfloat %1 698a71f44eSAlex Bradbury} 708a71f44eSAlex Bradbury 718a71f44eSAlex Bradbury; Ensure that 1 is added to the high 20 bits if bit 11 of the low part is 1 728a71f44eSAlex Bradburydefine bfloat @flh_fsh_constant(bfloat %a) nounwind { 738a71f44eSAlex Bradbury; RV32IZFBFMIN-LABEL: flh_fsh_constant: 748a71f44eSAlex Bradbury; RV32IZFBFMIN: # %bb.0: 758a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: lui a0, 912092 768a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: flh fa5, -273(a0) 778a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 788a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 798a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 808a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 818a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fsh fa0, -273(a0) 828a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: ret 838a71f44eSAlex Bradbury; 848a71f44eSAlex Bradbury; RV64IZFBFMIN-LABEL: flh_fsh_constant: 858a71f44eSAlex Bradbury; RV64IZFBFMIN: # %bb.0: 868a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: lui a0, 228023 878a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: slli a0, a0, 2 888a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: flh fa5, -273(a0) 898a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 908a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 918a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 928a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 938a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fsh fa0, -273(a0) 948a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: ret 958a71f44eSAlex Bradbury %1 = inttoptr i32 3735928559 to ptr 968a71f44eSAlex Bradbury %2 = load volatile bfloat, ptr %1 978a71f44eSAlex Bradbury %3 = fadd bfloat %a, %2 988a71f44eSAlex Bradbury store bfloat %3, ptr %1 998a71f44eSAlex Bradbury ret bfloat %3 1008a71f44eSAlex Bradbury} 1018a71f44eSAlex Bradbury 1028a71f44eSAlex Bradburydeclare void @notdead(ptr) 1038a71f44eSAlex Bradbury 1048a71f44eSAlex Bradburydefine bfloat @flh_stack(bfloat %a) nounwind { 1058a71f44eSAlex Bradbury; RV32IZFBFMIN-LABEL: flh_stack: 1068a71f44eSAlex Bradbury; RV32IZFBFMIN: # %bb.0: 1078a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: addi sp, sp, -16 1088a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1098a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill 1108a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fmv.s fs0, fa0 1118a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: addi a0, sp, 4 112eabaee0cSFangrui Song; RV32IZFBFMIN-NEXT: call notdead 1138a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: flh fa5, 4(sp) 1148a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fs0 1158a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 1168a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4 1178a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 1188a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1198a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload 1208a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: addi sp, sp, 16 1218a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: ret 1228a71f44eSAlex Bradbury; 1238a71f44eSAlex Bradbury; RV64IZFBFMIN-LABEL: flh_stack: 1248a71f44eSAlex Bradbury; RV64IZFBFMIN: # %bb.0: 1258a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: addi sp, sp, -16 1268a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 1278a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill 1288a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fmv.s fs0, fa0 1298a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: mv a0, sp 130eabaee0cSFangrui Song; RV64IZFBFMIN-NEXT: call notdead 1318a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: flh fa5, 0(sp) 1328a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fs0 1338a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 1348a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4 1358a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 1368a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 1378a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload 1388a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: addi sp, sp, 16 1398a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: ret 1408a71f44eSAlex Bradbury %1 = alloca bfloat, align 4 1418a71f44eSAlex Bradbury call void @notdead(ptr %1) 1428a71f44eSAlex Bradbury %2 = load bfloat, ptr %1 1438a71f44eSAlex Bradbury %3 = fadd bfloat %2, %a ; force load in to FPR16 1448a71f44eSAlex Bradbury ret bfloat %3 1458a71f44eSAlex Bradbury} 1468a71f44eSAlex Bradbury 1478a71f44eSAlex Bradburydefine dso_local void @fsh_stack(bfloat %a, bfloat %b) nounwind { 1488a71f44eSAlex Bradbury; RV32IZFBFMIN-LABEL: fsh_stack: 1498a71f44eSAlex Bradbury; RV32IZFBFMIN: # %bb.0: 1508a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: addi sp, sp, -16 1518a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1528a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 1538a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 1548a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 1558a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 1568a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: fsh fa5, 8(sp) 1578a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: addi a0, sp, 8 158eabaee0cSFangrui Song; RV32IZFBFMIN-NEXT: call notdead 1598a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1608a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: addi sp, sp, 16 1618a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT: ret 1628a71f44eSAlex Bradbury; 1638a71f44eSAlex Bradbury; RV64IZFBFMIN-LABEL: fsh_stack: 1648a71f44eSAlex Bradbury; RV64IZFBFMIN: # %bb.0: 1658a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: addi sp, sp, -16 1668a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 1678a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 1688a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 1698a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 1708a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 1718a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: fsh fa5, 4(sp) 1728a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: addi a0, sp, 4 173eabaee0cSFangrui Song; RV64IZFBFMIN-NEXT: call notdead 1748a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 1758a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: addi sp, sp, 16 1768a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT: ret 1778a71f44eSAlex Bradbury %1 = fadd bfloat %a, %b ; force store from FPR16 1788a71f44eSAlex Bradbury %2 = alloca bfloat, align 4 1798a71f44eSAlex Bradbury store bfloat %1, ptr %2 1808a71f44eSAlex Bradbury call void @notdead(ptr %2) 1818a71f44eSAlex Bradbury ret void 1828a71f44eSAlex Bradbury} 183