xref: /llvm-project/llvm/test/CodeGen/RISCV/bfloat-mem.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin -verify-machineinstrs \
3; RUN:   -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK,RV32IZFBFMIN %s
4; RUN: llc -mtriple=riscv64 -mattr=+zfbfmin -verify-machineinstrs \
5; RUN:   -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK,RV64IZFBFMIN %s
6
7define bfloat @flh(ptr %a) nounwind {
8; CHECK-LABEL: flh:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    flh fa5, 6(a0)
11; CHECK-NEXT:    flh fa4, 0(a0)
12; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
13; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
14; CHECK-NEXT:    fadd.s fa5, fa4, fa5
15; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
16; CHECK-NEXT:    ret
17  %1 = load bfloat, ptr %a
18  %2 = getelementptr bfloat, ptr %a, i32 3
19  %3 = load bfloat, ptr %2
20; Use both loaded values in an FP op to ensure an flh is used, even for the
21; soft bfloat ABI
22  %4 = fadd bfloat %1, %3
23  ret bfloat %4
24}
25
26define dso_local void @fsh(ptr %a, bfloat %b, bfloat %c) nounwind {
27; CHECK-LABEL: fsh:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
30; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
31; CHECK-NEXT:    fadd.s fa5, fa4, fa5
32; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
33; CHECK-NEXT:    fsh fa5, 0(a0)
34; CHECK-NEXT:    fsh fa5, 16(a0)
35; CHECK-NEXT:    ret
36  %1 = fadd bfloat %b, %c
37  store bfloat %1, ptr %a
38  %2 = getelementptr bfloat, ptr %a, i32 8
39  store bfloat %1, ptr %2
40  ret void
41}
42
43; Check load and store to a global
44@G = dso_local global bfloat 0.0
45
46define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind {
47; Use %a and %b in an FP op to ensure bfloat precision floating point registers
48; are used, even for the soft bfloat ABI
49; CHECK-LABEL: flh_fsh_global:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
52; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
53; CHECK-NEXT:    lui a0, %hi(G)
54; CHECK-NEXT:    fadd.s fa5, fa4, fa5
55; CHECK-NEXT:    flh fa4, %lo(G)(a0)
56; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
57; CHECK-NEXT:    addi a1, a0, %lo(G)
58; CHECK-NEXT:    fsh fa0, %lo(G)(a0)
59; CHECK-NEXT:    flh fa5, 18(a1)
60; CHECK-NEXT:    fsh fa0, 18(a1)
61; CHECK-NEXT:    ret
62  %1 = fadd bfloat %a, %b
63  %2 = load volatile bfloat, ptr @G
64  store bfloat %1, ptr @G
65  %3 = getelementptr bfloat, ptr @G, i32 9
66  %4 = load volatile bfloat, ptr %3
67  store bfloat %1, ptr %3
68  ret bfloat %1
69}
70
71; Ensure that 1 is added to the high 20 bits if bit 11 of the low part is 1
72define bfloat @flh_fsh_constant(bfloat %a) nounwind {
73; RV32IZFBFMIN-LABEL: flh_fsh_constant:
74; RV32IZFBFMIN:       # %bb.0:
75; RV32IZFBFMIN-NEXT:    lui a0, 912092
76; RV32IZFBFMIN-NEXT:    flh fa5, -273(a0)
77; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
78; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
79; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
80; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
81; RV32IZFBFMIN-NEXT:    fsh fa0, -273(a0)
82; RV32IZFBFMIN-NEXT:    ret
83;
84; RV64IZFBFMIN-LABEL: flh_fsh_constant:
85; RV64IZFBFMIN:       # %bb.0:
86; RV64IZFBFMIN-NEXT:    lui a0, 228023
87; RV64IZFBFMIN-NEXT:    slli a0, a0, 2
88; RV64IZFBFMIN-NEXT:    flh fa5, -273(a0)
89; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
90; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
91; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
92; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
93; RV64IZFBFMIN-NEXT:    fsh fa0, -273(a0)
94; RV64IZFBFMIN-NEXT:    ret
95  %1 = inttoptr i32 3735928559 to ptr
96  %2 = load volatile bfloat, ptr %1
97  %3 = fadd bfloat %a, %2
98  store bfloat %3, ptr %1
99  ret bfloat %3
100}
101
102declare void @notdead(ptr)
103
104define bfloat @flh_stack(bfloat %a) nounwind {
105; RV32IZFBFMIN-LABEL: flh_stack:
106; RV32IZFBFMIN:       # %bb.0:
107; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
108; RV32IZFBFMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
109; RV32IZFBFMIN-NEXT:    fsw fs0, 8(sp) # 4-byte Folded Spill
110; RV32IZFBFMIN-NEXT:    fmv.s fs0, fa0
111; RV32IZFBFMIN-NEXT:    addi a0, sp, 4
112; RV32IZFBFMIN-NEXT:    call notdead
113; RV32IZFBFMIN-NEXT:    flh fa5, 4(sp)
114; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fs0
115; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
116; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
117; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
118; RV32IZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
119; RV32IZFBFMIN-NEXT:    flw fs0, 8(sp) # 4-byte Folded Reload
120; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
121; RV32IZFBFMIN-NEXT:    ret
122;
123; RV64IZFBFMIN-LABEL: flh_stack:
124; RV64IZFBFMIN:       # %bb.0:
125; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
126; RV64IZFBFMIN-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
127; RV64IZFBFMIN-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
128; RV64IZFBFMIN-NEXT:    fmv.s fs0, fa0
129; RV64IZFBFMIN-NEXT:    mv a0, sp
130; RV64IZFBFMIN-NEXT:    call notdead
131; RV64IZFBFMIN-NEXT:    flh fa5, 0(sp)
132; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fs0
133; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
134; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa5, fa4
135; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
136; RV64IZFBFMIN-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
137; RV64IZFBFMIN-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
138; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
139; RV64IZFBFMIN-NEXT:    ret
140  %1 = alloca bfloat, align 4
141  call void @notdead(ptr %1)
142  %2 = load bfloat, ptr %1
143  %3 = fadd bfloat %2, %a ; force load in to FPR16
144  ret bfloat %3
145}
146
147define dso_local void @fsh_stack(bfloat %a, bfloat %b) nounwind {
148; RV32IZFBFMIN-LABEL: fsh_stack:
149; RV32IZFBFMIN:       # %bb.0:
150; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
151; RV32IZFBFMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
152; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
153; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
154; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
155; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
156; RV32IZFBFMIN-NEXT:    fsh fa5, 8(sp)
157; RV32IZFBFMIN-NEXT:    addi a0, sp, 8
158; RV32IZFBFMIN-NEXT:    call notdead
159; RV32IZFBFMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
160; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
161; RV32IZFBFMIN-NEXT:    ret
162;
163; RV64IZFBFMIN-LABEL: fsh_stack:
164; RV64IZFBFMIN:       # %bb.0:
165; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
166; RV64IZFBFMIN-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
167; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
168; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
169; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
170; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
171; RV64IZFBFMIN-NEXT:    fsh fa5, 4(sp)
172; RV64IZFBFMIN-NEXT:    addi a0, sp, 4
173; RV64IZFBFMIN-NEXT:    call notdead
174; RV64IZFBFMIN-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
175; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
176; RV64IZFBFMIN-NEXT:    ret
177  %1 = fadd bfloat %a, %b ; force store from FPR16
178  %2 = alloca bfloat, align 4
179  store bfloat %1, ptr %2
180  call void @notdead(ptr %2)
181  ret void
182}
183