xref: /llvm-project/llvm/test/CodeGen/RISCV/bfloat-mem.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
18a71f44eSAlex Bradbury; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
232597685SJianjian Guan; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin -verify-machineinstrs \
38a71f44eSAlex Bradbury; RUN:   -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK,RV32IZFBFMIN %s
432597685SJianjian Guan; RUN: llc -mtriple=riscv64 -mattr=+zfbfmin -verify-machineinstrs \
58a71f44eSAlex Bradbury; RUN:   -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK,RV64IZFBFMIN %s
68a71f44eSAlex Bradbury
78a71f44eSAlex Bradburydefine bfloat @flh(ptr %a) nounwind {
88a71f44eSAlex Bradbury; CHECK-LABEL: flh:
98a71f44eSAlex Bradbury; CHECK:       # %bb.0:
108a71f44eSAlex Bradbury; CHECK-NEXT:    flh fa5, 6(a0)
118a71f44eSAlex Bradbury; CHECK-NEXT:    flh fa4, 0(a0)
128a71f44eSAlex Bradbury; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
138a71f44eSAlex Bradbury; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
148a71f44eSAlex Bradbury; CHECK-NEXT:    fadd.s fa5, fa4, fa5
158a71f44eSAlex Bradbury; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
168a71f44eSAlex Bradbury; CHECK-NEXT:    ret
178a71f44eSAlex Bradbury  %1 = load bfloat, ptr %a
188a71f44eSAlex Bradbury  %2 = getelementptr bfloat, ptr %a, i32 3
198a71f44eSAlex Bradbury  %3 = load bfloat, ptr %2
208a71f44eSAlex Bradbury; Use both loaded values in an FP op to ensure an flh is used, even for the
218a71f44eSAlex Bradbury; soft bfloat ABI
228a71f44eSAlex Bradbury  %4 = fadd bfloat %1, %3
238a71f44eSAlex Bradbury  ret bfloat %4
248a71f44eSAlex Bradbury}
258a71f44eSAlex Bradbury
268a71f44eSAlex Bradburydefine dso_local void @fsh(ptr %a, bfloat %b, bfloat %c) nounwind {
278a71f44eSAlex Bradbury; CHECK-LABEL: fsh:
288a71f44eSAlex Bradbury; CHECK:       # %bb.0:
298a71f44eSAlex Bradbury; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
308a71f44eSAlex Bradbury; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
318a71f44eSAlex Bradbury; CHECK-NEXT:    fadd.s fa5, fa4, fa5
328a71f44eSAlex Bradbury; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
338a71f44eSAlex Bradbury; CHECK-NEXT:    fsh fa5, 0(a0)
348a71f44eSAlex Bradbury; CHECK-NEXT:    fsh fa5, 16(a0)
358a71f44eSAlex Bradbury; CHECK-NEXT:    ret
368a71f44eSAlex Bradbury  %1 = fadd bfloat %b, %c
378a71f44eSAlex Bradbury  store bfloat %1, ptr %a
388a71f44eSAlex Bradbury  %2 = getelementptr bfloat, ptr %a, i32 8
398a71f44eSAlex Bradbury  store bfloat %1, ptr %2
408a71f44eSAlex Bradbury  ret void
418a71f44eSAlex Bradbury}
428a71f44eSAlex Bradbury
438a71f44eSAlex Bradbury; Check load and store to a global
448a71f44eSAlex Bradbury@G = dso_local global bfloat 0.0
458a71f44eSAlex Bradbury
468a71f44eSAlex Bradburydefine bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind {
478a71f44eSAlex Bradbury; Use %a and %b in an FP op to ensure bfloat precision floating point registers
488a71f44eSAlex Bradbury; are used, even for the soft bfloat ABI
498a71f44eSAlex Bradbury; CHECK-LABEL: flh_fsh_global:
508a71f44eSAlex Bradbury; CHECK:       # %bb.0:
518a71f44eSAlex Bradbury; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
528a71f44eSAlex Bradbury; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
538a71f44eSAlex Bradbury; CHECK-NEXT:    lui a0, %hi(G)
54*9122c523SPengcheng Wang; CHECK-NEXT:    fadd.s fa5, fa4, fa5
55*9122c523SPengcheng Wang; CHECK-NEXT:    flh fa4, %lo(G)(a0)
56*9122c523SPengcheng Wang; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
578a71f44eSAlex Bradbury; CHECK-NEXT:    addi a1, a0, %lo(G)
588a71f44eSAlex Bradbury; CHECK-NEXT:    fsh fa0, %lo(G)(a0)
598a71f44eSAlex Bradbury; CHECK-NEXT:    flh fa5, 18(a1)
608a71f44eSAlex Bradbury; CHECK-NEXT:    fsh fa0, 18(a1)
618a71f44eSAlex Bradbury; CHECK-NEXT:    ret
628a71f44eSAlex Bradbury  %1 = fadd bfloat %a, %b
638a71f44eSAlex Bradbury  %2 = load volatile bfloat, ptr @G
648a71f44eSAlex Bradbury  store bfloat %1, ptr @G
658a71f44eSAlex Bradbury  %3 = getelementptr bfloat, ptr @G, i32 9
668a71f44eSAlex Bradbury  %4 = load volatile bfloat, ptr %3
678a71f44eSAlex Bradbury  store bfloat %1, ptr %3
688a71f44eSAlex Bradbury  ret bfloat %1
698a71f44eSAlex Bradbury}
708a71f44eSAlex Bradbury
718a71f44eSAlex Bradbury; Ensure that 1 is added to the high 20 bits if bit 11 of the low part is 1
728a71f44eSAlex Bradburydefine bfloat @flh_fsh_constant(bfloat %a) nounwind {
738a71f44eSAlex Bradbury; RV32IZFBFMIN-LABEL: flh_fsh_constant:
748a71f44eSAlex Bradbury; RV32IZFBFMIN:       # %bb.0:
758a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    lui a0, 912092
768a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    flh fa5, -273(a0)
778a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
788a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
798a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
808a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
818a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fsh fa0, -273(a0)
828a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    ret
838a71f44eSAlex Bradbury;
848a71f44eSAlex Bradbury; RV64IZFBFMIN-LABEL: flh_fsh_constant:
858a71f44eSAlex Bradbury; RV64IZFBFMIN:       # %bb.0:
868a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    lui a0, 228023
878a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    slli a0, a0, 2
888a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    flh fa5, -273(a0)
898a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
908a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
918a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
928a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
938a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fsh fa0, -273(a0)
948a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    ret
958a71f44eSAlex Bradbury  %1 = inttoptr i32 3735928559 to ptr
968a71f44eSAlex Bradbury  %2 = load volatile bfloat, ptr %1
978a71f44eSAlex Bradbury  %3 = fadd bfloat %a, %2
988a71f44eSAlex Bradbury  store bfloat %3, ptr %1
998a71f44eSAlex Bradbury  ret bfloat %3
1008a71f44eSAlex Bradbury}
1018a71f44eSAlex Bradbury
1028a71f44eSAlex Bradburydeclare void @notdead(ptr)
1038a71f44eSAlex Bradbury
1048a71f44eSAlex Bradburydefine bfloat @flh_stack(bfloat %a) nounwind {
1058a71f44eSAlex Bradbury; RV32IZFBFMIN-LABEL: flh_stack:
1068a71f44eSAlex Bradbury; RV32IZFBFMIN:       # %bb.0:
1078a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
1088a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
1098a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fsw fs0, 8(sp) # 4-byte Folded Spill
1108a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fmv.s fs0, fa0
1118a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    addi a0, sp, 4
112eabaee0cSFangrui Song; RV32IZFBFMIN-NEXT:    call notdead
1138a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    flh fa5, 4(sp)
1148a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fs0
1158a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
1168a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
1178a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
1188a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
1198a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    flw fs0, 8(sp) # 4-byte Folded Reload
1208a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
1218a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    ret
1228a71f44eSAlex Bradbury;
1238a71f44eSAlex Bradbury; RV64IZFBFMIN-LABEL: flh_stack:
1248a71f44eSAlex Bradbury; RV64IZFBFMIN:       # %bb.0:
1258a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
1268a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
1278a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
1288a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fmv.s fs0, fa0
1298a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    mv a0, sp
130eabaee0cSFangrui Song; RV64IZFBFMIN-NEXT:    call notdead
1318a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    flh fa5, 0(sp)
1328a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fs0
1338a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
1348a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
1358a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
1368a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
1378a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
1388a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
1398a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    ret
1408a71f44eSAlex Bradbury  %1 = alloca bfloat, align 4
1418a71f44eSAlex Bradbury  call void @notdead(ptr %1)
1428a71f44eSAlex Bradbury  %2 = load bfloat, ptr %1
1438a71f44eSAlex Bradbury  %3 = fadd bfloat %2, %a ; force load in to FPR16
1448a71f44eSAlex Bradbury  ret bfloat %3
1458a71f44eSAlex Bradbury}
1468a71f44eSAlex Bradbury
1478a71f44eSAlex Bradburydefine dso_local void @fsh_stack(bfloat %a, bfloat %b) nounwind {
1488a71f44eSAlex Bradbury; RV32IZFBFMIN-LABEL: fsh_stack:
1498a71f44eSAlex Bradbury; RV32IZFBFMIN:       # %bb.0:
1508a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
1518a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
1528a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
1538a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
1548a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
1558a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
1568a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    fsh fa5, 8(sp)
1578a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    addi a0, sp, 8
158eabaee0cSFangrui Song; RV32IZFBFMIN-NEXT:    call notdead
1598a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
1608a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
1618a71f44eSAlex Bradbury; RV32IZFBFMIN-NEXT:    ret
1628a71f44eSAlex Bradbury;
1638a71f44eSAlex Bradbury; RV64IZFBFMIN-LABEL: fsh_stack:
1648a71f44eSAlex Bradbury; RV64IZFBFMIN:       # %bb.0:
1658a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
1668a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
1678a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
1688a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
1698a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
1708a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
1718a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    fsh fa5, 4(sp)
1728a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    addi a0, sp, 4
173eabaee0cSFangrui Song; RV64IZFBFMIN-NEXT:    call notdead
1748a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
1758a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
1768a71f44eSAlex Bradbury; RV64IZFBFMIN-NEXT:    ret
1778a71f44eSAlex Bradbury  %1 = fadd bfloat %a, %b ; force store from FPR16
1788a71f44eSAlex Bradbury  %2 = alloca bfloat, align 4
1798a71f44eSAlex Bradbury  store bfloat %1, ptr %2
1808a71f44eSAlex Bradbury  call void @notdead(ptr %2)
1818a71f44eSAlex Bradbury  ret void
1828a71f44eSAlex Bradbury}
183