1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin -verify-machineinstrs \ 3; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK,RV32IZFBFMIN %s 4; RUN: llc -mtriple=riscv64 -mattr=+zfbfmin -verify-machineinstrs \ 5; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK,RV64IZFBFMIN %s 6 7define bfloat @flh(ptr %a) nounwind { 8; CHECK-LABEL: flh: 9; CHECK: # %bb.0: 10; CHECK-NEXT: flh fa5, 6(a0) 11; CHECK-NEXT: flh fa4, 0(a0) 12; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 13; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 14; CHECK-NEXT: fadd.s fa5, fa4, fa5 15; CHECK-NEXT: fcvt.bf16.s fa0, fa5 16; CHECK-NEXT: ret 17 %1 = load bfloat, ptr %a 18 %2 = getelementptr bfloat, ptr %a, i32 3 19 %3 = load bfloat, ptr %2 20; Use both loaded values in an FP op to ensure an flh is used, even for the 21; soft bfloat ABI 22 %4 = fadd bfloat %1, %3 23 ret bfloat %4 24} 25 26define dso_local void @fsh(ptr %a, bfloat %b, bfloat %c) nounwind { 27; CHECK-LABEL: fsh: 28; CHECK: # %bb.0: 29; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 30; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 31; CHECK-NEXT: fadd.s fa5, fa4, fa5 32; CHECK-NEXT: fcvt.bf16.s fa5, fa5 33; CHECK-NEXT: fsh fa5, 0(a0) 34; CHECK-NEXT: fsh fa5, 16(a0) 35; CHECK-NEXT: ret 36 %1 = fadd bfloat %b, %c 37 store bfloat %1, ptr %a 38 %2 = getelementptr bfloat, ptr %a, i32 8 39 store bfloat %1, ptr %2 40 ret void 41} 42 43; Check load and store to a global 44@G = dso_local global bfloat 0.0 45 46define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind { 47; Use %a and %b in an FP op to ensure bfloat precision floating point registers 48; are used, even for the soft bfloat ABI 49; CHECK-LABEL: flh_fsh_global: 50; CHECK: # %bb.0: 51; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 52; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 53; CHECK-NEXT: lui a0, %hi(G) 54; CHECK-NEXT: fadd.s fa5, fa4, fa5 55; CHECK-NEXT: flh fa4, %lo(G)(a0) 56; CHECK-NEXT: fcvt.bf16.s fa0, fa5 57; CHECK-NEXT: addi a1, a0, %lo(G) 58; CHECK-NEXT: fsh fa0, %lo(G)(a0) 59; CHECK-NEXT: flh fa5, 18(a1) 60; CHECK-NEXT: fsh fa0, 18(a1) 61; CHECK-NEXT: ret 62 %1 = fadd bfloat %a, %b 63 %2 = load volatile bfloat, ptr @G 64 store bfloat %1, ptr @G 65 %3 = getelementptr bfloat, ptr @G, i32 9 66 %4 = load volatile bfloat, ptr %3 67 store bfloat %1, ptr %3 68 ret bfloat %1 69} 70 71; Ensure that 1 is added to the high 20 bits if bit 11 of the low part is 1 72define bfloat @flh_fsh_constant(bfloat %a) nounwind { 73; RV32IZFBFMIN-LABEL: flh_fsh_constant: 74; RV32IZFBFMIN: # %bb.0: 75; RV32IZFBFMIN-NEXT: lui a0, 912092 76; RV32IZFBFMIN-NEXT: flh fa5, -273(a0) 77; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 78; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 79; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 80; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 81; RV32IZFBFMIN-NEXT: fsh fa0, -273(a0) 82; RV32IZFBFMIN-NEXT: ret 83; 84; RV64IZFBFMIN-LABEL: flh_fsh_constant: 85; RV64IZFBFMIN: # %bb.0: 86; RV64IZFBFMIN-NEXT: lui a0, 228023 87; RV64IZFBFMIN-NEXT: slli a0, a0, 2 88; RV64IZFBFMIN-NEXT: flh fa5, -273(a0) 89; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 90; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 91; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 92; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 93; RV64IZFBFMIN-NEXT: fsh fa0, -273(a0) 94; RV64IZFBFMIN-NEXT: ret 95 %1 = inttoptr i32 3735928559 to ptr 96 %2 = load volatile bfloat, ptr %1 97 %3 = fadd bfloat %a, %2 98 store bfloat %3, ptr %1 99 ret bfloat %3 100} 101 102declare void @notdead(ptr) 103 104define bfloat @flh_stack(bfloat %a) nounwind { 105; RV32IZFBFMIN-LABEL: flh_stack: 106; RV32IZFBFMIN: # %bb.0: 107; RV32IZFBFMIN-NEXT: addi sp, sp, -16 108; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 109; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill 110; RV32IZFBFMIN-NEXT: fmv.s fs0, fa0 111; RV32IZFBFMIN-NEXT: addi a0, sp, 4 112; RV32IZFBFMIN-NEXT: call notdead 113; RV32IZFBFMIN-NEXT: flh fa5, 4(sp) 114; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fs0 115; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 116; RV32IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4 117; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 118; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 119; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload 120; RV32IZFBFMIN-NEXT: addi sp, sp, 16 121; RV32IZFBFMIN-NEXT: ret 122; 123; RV64IZFBFMIN-LABEL: flh_stack: 124; RV64IZFBFMIN: # %bb.0: 125; RV64IZFBFMIN-NEXT: addi sp, sp, -16 126; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 127; RV64IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill 128; RV64IZFBFMIN-NEXT: fmv.s fs0, fa0 129; RV64IZFBFMIN-NEXT: mv a0, sp 130; RV64IZFBFMIN-NEXT: call notdead 131; RV64IZFBFMIN-NEXT: flh fa5, 0(sp) 132; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fs0 133; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5 134; RV64IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4 135; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 136; RV64IZFBFMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 137; RV64IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload 138; RV64IZFBFMIN-NEXT: addi sp, sp, 16 139; RV64IZFBFMIN-NEXT: ret 140 %1 = alloca bfloat, align 4 141 call void @notdead(ptr %1) 142 %2 = load bfloat, ptr %1 143 %3 = fadd bfloat %2, %a ; force load in to FPR16 144 ret bfloat %3 145} 146 147define dso_local void @fsh_stack(bfloat %a, bfloat %b) nounwind { 148; RV32IZFBFMIN-LABEL: fsh_stack: 149; RV32IZFBFMIN: # %bb.0: 150; RV32IZFBFMIN-NEXT: addi sp, sp, -16 151; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 152; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 153; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 154; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 155; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 156; RV32IZFBFMIN-NEXT: fsh fa5, 8(sp) 157; RV32IZFBFMIN-NEXT: addi a0, sp, 8 158; RV32IZFBFMIN-NEXT: call notdead 159; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 160; RV32IZFBFMIN-NEXT: addi sp, sp, 16 161; RV32IZFBFMIN-NEXT: ret 162; 163; RV64IZFBFMIN-LABEL: fsh_stack: 164; RV64IZFBFMIN: # %bb.0: 165; RV64IZFBFMIN-NEXT: addi sp, sp, -16 166; RV64IZFBFMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 167; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 168; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 169; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 170; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 171; RV64IZFBFMIN-NEXT: fsh fa5, 4(sp) 172; RV64IZFBFMIN-NEXT: addi a0, sp, 4 173; RV64IZFBFMIN-NEXT: call notdead 174; RV64IZFBFMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 175; RV64IZFBFMIN-NEXT: addi sp, sp, 16 176; RV64IZFBFMIN-NEXT: ret 177 %1 = fadd bfloat %a, %b ; force store from FPR16 178 %2 = alloca bfloat, align 4 179 store bfloat %1, ptr %2 180 call void @notdead(ptr %2) 181 ret void 182} 183