xref: /llvm-project/llvm/test/CodeGen/RISCV/bfloat.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32I-ILP32
3; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64I-LP64
4; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32
5; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs < %s  | FileCheck %s -check-prefix=RV64ID-LP64
6; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32D
7; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64d -verify-machineinstrs < %s  | FileCheck %s -check-prefix=RV64ID-LP64D
8
9define bfloat @float_to_bfloat(float %a) nounwind {
10; RV32I-ILP32-LABEL: float_to_bfloat:
11; RV32I-ILP32:       # %bb.0:
12; RV32I-ILP32-NEXT:    addi sp, sp, -16
13; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
14; RV32I-ILP32-NEXT:    call __truncsfbf2
15; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
16; RV32I-ILP32-NEXT:    addi sp, sp, 16
17; RV32I-ILP32-NEXT:    ret
18;
19; RV64I-LP64-LABEL: float_to_bfloat:
20; RV64I-LP64:       # %bb.0:
21; RV64I-LP64-NEXT:    addi sp, sp, -16
22; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
23; RV64I-LP64-NEXT:    call __truncsfbf2
24; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
25; RV64I-LP64-NEXT:    addi sp, sp, 16
26; RV64I-LP64-NEXT:    ret
27;
28; RV32ID-ILP32-LABEL: float_to_bfloat:
29; RV32ID-ILP32:       # %bb.0:
30; RV32ID-ILP32-NEXT:    addi sp, sp, -16
31; RV32ID-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
32; RV32ID-ILP32-NEXT:    call __truncsfbf2
33; RV32ID-ILP32-NEXT:    lui a1, 1048560
34; RV32ID-ILP32-NEXT:    or a0, a0, a1
35; RV32ID-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
36; RV32ID-ILP32-NEXT:    addi sp, sp, 16
37; RV32ID-ILP32-NEXT:    ret
38;
39; RV64ID-LP64-LABEL: float_to_bfloat:
40; RV64ID-LP64:       # %bb.0:
41; RV64ID-LP64-NEXT:    addi sp, sp, -16
42; RV64ID-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
43; RV64ID-LP64-NEXT:    call __truncsfbf2
44; RV64ID-LP64-NEXT:    lui a1, 1048560
45; RV64ID-LP64-NEXT:    or a0, a0, a1
46; RV64ID-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
47; RV64ID-LP64-NEXT:    addi sp, sp, 16
48; RV64ID-LP64-NEXT:    ret
49;
50; RV32ID-ILP32D-LABEL: float_to_bfloat:
51; RV32ID-ILP32D:       # %bb.0:
52; RV32ID-ILP32D-NEXT:    addi sp, sp, -16
53; RV32ID-ILP32D-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
54; RV32ID-ILP32D-NEXT:    call __truncsfbf2
55; RV32ID-ILP32D-NEXT:    fmv.x.w a0, fa0
56; RV32ID-ILP32D-NEXT:    lui a1, 1048560
57; RV32ID-ILP32D-NEXT:    or a0, a0, a1
58; RV32ID-ILP32D-NEXT:    fmv.w.x fa0, a0
59; RV32ID-ILP32D-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
60; RV32ID-ILP32D-NEXT:    addi sp, sp, 16
61; RV32ID-ILP32D-NEXT:    ret
62;
63; RV64ID-LP64D-LABEL: float_to_bfloat:
64; RV64ID-LP64D:       # %bb.0:
65; RV64ID-LP64D-NEXT:    addi sp, sp, -16
66; RV64ID-LP64D-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
67; RV64ID-LP64D-NEXT:    call __truncsfbf2
68; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
69; RV64ID-LP64D-NEXT:    lui a1, 1048560
70; RV64ID-LP64D-NEXT:    or a0, a0, a1
71; RV64ID-LP64D-NEXT:    fmv.w.x fa0, a0
72; RV64ID-LP64D-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
73; RV64ID-LP64D-NEXT:    addi sp, sp, 16
74; RV64ID-LP64D-NEXT:    ret
75  %1 = fptrunc float %a to bfloat
76  ret bfloat %1
77}
78
79define bfloat @double_to_bfloat(double %a) nounwind {
80; RV32I-ILP32-LABEL: double_to_bfloat:
81; RV32I-ILP32:       # %bb.0:
82; RV32I-ILP32-NEXT:    addi sp, sp, -16
83; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
84; RV32I-ILP32-NEXT:    call __truncdfbf2
85; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
86; RV32I-ILP32-NEXT:    addi sp, sp, 16
87; RV32I-ILP32-NEXT:    ret
88;
89; RV64I-LP64-LABEL: double_to_bfloat:
90; RV64I-LP64:       # %bb.0:
91; RV64I-LP64-NEXT:    addi sp, sp, -16
92; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
93; RV64I-LP64-NEXT:    call __truncdfbf2
94; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
95; RV64I-LP64-NEXT:    addi sp, sp, 16
96; RV64I-LP64-NEXT:    ret
97;
98; RV32ID-ILP32-LABEL: double_to_bfloat:
99; RV32ID-ILP32:       # %bb.0:
100; RV32ID-ILP32-NEXT:    addi sp, sp, -16
101; RV32ID-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
102; RV32ID-ILP32-NEXT:    call __truncdfbf2
103; RV32ID-ILP32-NEXT:    lui a1, 1048560
104; RV32ID-ILP32-NEXT:    or a0, a0, a1
105; RV32ID-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
106; RV32ID-ILP32-NEXT:    addi sp, sp, 16
107; RV32ID-ILP32-NEXT:    ret
108;
109; RV64ID-LP64-LABEL: double_to_bfloat:
110; RV64ID-LP64:       # %bb.0:
111; RV64ID-LP64-NEXT:    addi sp, sp, -16
112; RV64ID-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
113; RV64ID-LP64-NEXT:    call __truncdfbf2
114; RV64ID-LP64-NEXT:    lui a1, 1048560
115; RV64ID-LP64-NEXT:    or a0, a0, a1
116; RV64ID-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
117; RV64ID-LP64-NEXT:    addi sp, sp, 16
118; RV64ID-LP64-NEXT:    ret
119;
120; RV32ID-ILP32D-LABEL: double_to_bfloat:
121; RV32ID-ILP32D:       # %bb.0:
122; RV32ID-ILP32D-NEXT:    addi sp, sp, -16
123; RV32ID-ILP32D-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
124; RV32ID-ILP32D-NEXT:    call __truncdfbf2
125; RV32ID-ILP32D-NEXT:    fmv.x.w a0, fa0
126; RV32ID-ILP32D-NEXT:    lui a1, 1048560
127; RV32ID-ILP32D-NEXT:    or a0, a0, a1
128; RV32ID-ILP32D-NEXT:    fmv.w.x fa0, a0
129; RV32ID-ILP32D-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
130; RV32ID-ILP32D-NEXT:    addi sp, sp, 16
131; RV32ID-ILP32D-NEXT:    ret
132;
133; RV64ID-LP64D-LABEL: double_to_bfloat:
134; RV64ID-LP64D:       # %bb.0:
135; RV64ID-LP64D-NEXT:    addi sp, sp, -16
136; RV64ID-LP64D-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
137; RV64ID-LP64D-NEXT:    call __truncdfbf2
138; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
139; RV64ID-LP64D-NEXT:    lui a1, 1048560
140; RV64ID-LP64D-NEXT:    or a0, a0, a1
141; RV64ID-LP64D-NEXT:    fmv.w.x fa0, a0
142; RV64ID-LP64D-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
143; RV64ID-LP64D-NEXT:    addi sp, sp, 16
144; RV64ID-LP64D-NEXT:    ret
145  %1 = fptrunc double %a to bfloat
146  ret bfloat %1
147}
148
149define float @bfloat_to_float(bfloat %a) nounwind {
150; RV32I-ILP32-LABEL: bfloat_to_float:
151; RV32I-ILP32:       # %bb.0:
152; RV32I-ILP32-NEXT:    slli a0, a0, 16
153; RV32I-ILP32-NEXT:    ret
154;
155; RV64I-LP64-LABEL: bfloat_to_float:
156; RV64I-LP64:       # %bb.0:
157; RV64I-LP64-NEXT:    slliw a0, a0, 16
158; RV64I-LP64-NEXT:    ret
159;
160; RV32ID-ILP32-LABEL: bfloat_to_float:
161; RV32ID-ILP32:       # %bb.0:
162; RV32ID-ILP32-NEXT:    slli a0, a0, 16
163; RV32ID-ILP32-NEXT:    ret
164;
165; RV64ID-LP64-LABEL: bfloat_to_float:
166; RV64ID-LP64:       # %bb.0:
167; RV64ID-LP64-NEXT:    slli a0, a0, 16
168; RV64ID-LP64-NEXT:    ret
169;
170; RV32ID-ILP32D-LABEL: bfloat_to_float:
171; RV32ID-ILP32D:       # %bb.0:
172; RV32ID-ILP32D-NEXT:    fmv.x.w a0, fa0
173; RV32ID-ILP32D-NEXT:    slli a0, a0, 16
174; RV32ID-ILP32D-NEXT:    fmv.w.x fa0, a0
175; RV32ID-ILP32D-NEXT:    ret
176;
177; RV64ID-LP64D-LABEL: bfloat_to_float:
178; RV64ID-LP64D:       # %bb.0:
179; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
180; RV64ID-LP64D-NEXT:    slli a0, a0, 16
181; RV64ID-LP64D-NEXT:    fmv.w.x fa0, a0
182; RV64ID-LP64D-NEXT:    ret
183  %1 = fpext bfloat %a to float
184  ret float %1
185}
186
187define double @bfloat_to_double(bfloat %a) nounwind {
188; RV32I-ILP32-LABEL: bfloat_to_double:
189; RV32I-ILP32:       # %bb.0:
190; RV32I-ILP32-NEXT:    addi sp, sp, -16
191; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
192; RV32I-ILP32-NEXT:    slli a0, a0, 16
193; RV32I-ILP32-NEXT:    call __extendsfdf2
194; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
195; RV32I-ILP32-NEXT:    addi sp, sp, 16
196; RV32I-ILP32-NEXT:    ret
197;
198; RV64I-LP64-LABEL: bfloat_to_double:
199; RV64I-LP64:       # %bb.0:
200; RV64I-LP64-NEXT:    addi sp, sp, -16
201; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
202; RV64I-LP64-NEXT:    slliw a0, a0, 16
203; RV64I-LP64-NEXT:    call __extendsfdf2
204; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
205; RV64I-LP64-NEXT:    addi sp, sp, 16
206; RV64I-LP64-NEXT:    ret
207;
208; RV32ID-ILP32-LABEL: bfloat_to_double:
209; RV32ID-ILP32:       # %bb.0:
210; RV32ID-ILP32-NEXT:    addi sp, sp, -16
211; RV32ID-ILP32-NEXT:    slli a0, a0, 16
212; RV32ID-ILP32-NEXT:    fmv.w.x fa5, a0
213; RV32ID-ILP32-NEXT:    fcvt.d.s fa5, fa5
214; RV32ID-ILP32-NEXT:    fsd fa5, 8(sp)
215; RV32ID-ILP32-NEXT:    lw a0, 8(sp)
216; RV32ID-ILP32-NEXT:    lw a1, 12(sp)
217; RV32ID-ILP32-NEXT:    addi sp, sp, 16
218; RV32ID-ILP32-NEXT:    ret
219;
220; RV64ID-LP64-LABEL: bfloat_to_double:
221; RV64ID-LP64:       # %bb.0:
222; RV64ID-LP64-NEXT:    slli a0, a0, 16
223; RV64ID-LP64-NEXT:    fmv.w.x fa5, a0
224; RV64ID-LP64-NEXT:    fcvt.d.s fa5, fa5
225; RV64ID-LP64-NEXT:    fmv.x.d a0, fa5
226; RV64ID-LP64-NEXT:    ret
227;
228; RV32ID-ILP32D-LABEL: bfloat_to_double:
229; RV32ID-ILP32D:       # %bb.0:
230; RV32ID-ILP32D-NEXT:    fmv.x.w a0, fa0
231; RV32ID-ILP32D-NEXT:    slli a0, a0, 16
232; RV32ID-ILP32D-NEXT:    fmv.w.x fa5, a0
233; RV32ID-ILP32D-NEXT:    fcvt.d.s fa0, fa5
234; RV32ID-ILP32D-NEXT:    ret
235;
236; RV64ID-LP64D-LABEL: bfloat_to_double:
237; RV64ID-LP64D:       # %bb.0:
238; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
239; RV64ID-LP64D-NEXT:    slli a0, a0, 16
240; RV64ID-LP64D-NEXT:    fmv.w.x fa5, a0
241; RV64ID-LP64D-NEXT:    fcvt.d.s fa0, fa5
242; RV64ID-LP64D-NEXT:    ret
243  %1 = fpext bfloat %a to double
244  ret double %1
245}
246
247define bfloat @i16_to_bfloat(i16 %a) nounwind {
248; RV32I-ILP32-LABEL: i16_to_bfloat:
249; RV32I-ILP32:       # %bb.0:
250; RV32I-ILP32-NEXT:    ret
251;
252; RV64I-LP64-LABEL: i16_to_bfloat:
253; RV64I-LP64:       # %bb.0:
254; RV64I-LP64-NEXT:    ret
255;
256; RV32ID-ILP32-LABEL: i16_to_bfloat:
257; RV32ID-ILP32:       # %bb.0:
258; RV32ID-ILP32-NEXT:    lui a1, 1048560
259; RV32ID-ILP32-NEXT:    or a0, a0, a1
260; RV32ID-ILP32-NEXT:    ret
261;
262; RV64ID-LP64-LABEL: i16_to_bfloat:
263; RV64ID-LP64:       # %bb.0:
264; RV64ID-LP64-NEXT:    lui a1, 1048560
265; RV64ID-LP64-NEXT:    or a0, a0, a1
266; RV64ID-LP64-NEXT:    ret
267;
268; RV32ID-ILP32D-LABEL: i16_to_bfloat:
269; RV32ID-ILP32D:       # %bb.0:
270; RV32ID-ILP32D-NEXT:    lui a1, 1048560
271; RV32ID-ILP32D-NEXT:    or a0, a0, a1
272; RV32ID-ILP32D-NEXT:    fmv.w.x fa0, a0
273; RV32ID-ILP32D-NEXT:    ret
274;
275; RV64ID-LP64D-LABEL: i16_to_bfloat:
276; RV64ID-LP64D:       # %bb.0:
277; RV64ID-LP64D-NEXT:    lui a1, 1048560
278; RV64ID-LP64D-NEXT:    or a0, a0, a1
279; RV64ID-LP64D-NEXT:    fmv.w.x fa0, a0
280; RV64ID-LP64D-NEXT:    ret
281  %1 = bitcast i16 %a to bfloat
282  ret bfloat %1
283}
284
285define i16 @bfloat_to_i16(bfloat %a) nounwind {
286; RV32I-ILP32-LABEL: bfloat_to_i16:
287; RV32I-ILP32:       # %bb.0:
288; RV32I-ILP32-NEXT:    ret
289;
290; RV64I-LP64-LABEL: bfloat_to_i16:
291; RV64I-LP64:       # %bb.0:
292; RV64I-LP64-NEXT:    ret
293;
294; RV32ID-ILP32-LABEL: bfloat_to_i16:
295; RV32ID-ILP32:       # %bb.0:
296; RV32ID-ILP32-NEXT:    ret
297;
298; RV64ID-LP64-LABEL: bfloat_to_i16:
299; RV64ID-LP64:       # %bb.0:
300; RV64ID-LP64-NEXT:    ret
301;
302; RV32ID-ILP32D-LABEL: bfloat_to_i16:
303; RV32ID-ILP32D:       # %bb.0:
304; RV32ID-ILP32D-NEXT:    fmv.x.w a0, fa0
305; RV32ID-ILP32D-NEXT:    ret
306;
307; RV64ID-LP64D-LABEL: bfloat_to_i16:
308; RV64ID-LP64D:       # %bb.0:
309; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
310; RV64ID-LP64D-NEXT:    ret
311  %1 = bitcast bfloat %a to i16
312  ret i16 %1
313}
314
315define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
316; RV32I-ILP32-LABEL: bfloat_add:
317; RV32I-ILP32:       # %bb.0:
318; RV32I-ILP32-NEXT:    addi sp, sp, -16
319; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
320; RV32I-ILP32-NEXT:    slli a0, a0, 16
321; RV32I-ILP32-NEXT:    slli a1, a1, 16
322; RV32I-ILP32-NEXT:    call __addsf3
323; RV32I-ILP32-NEXT:    call __truncsfbf2
324; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
325; RV32I-ILP32-NEXT:    addi sp, sp, 16
326; RV32I-ILP32-NEXT:    ret
327;
328; RV64I-LP64-LABEL: bfloat_add:
329; RV64I-LP64:       # %bb.0:
330; RV64I-LP64-NEXT:    addi sp, sp, -16
331; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
332; RV64I-LP64-NEXT:    slliw a0, a0, 16
333; RV64I-LP64-NEXT:    slliw a1, a1, 16
334; RV64I-LP64-NEXT:    call __addsf3
335; RV64I-LP64-NEXT:    call __truncsfbf2
336; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
337; RV64I-LP64-NEXT:    addi sp, sp, 16
338; RV64I-LP64-NEXT:    ret
339;
340; RV32ID-ILP32-LABEL: bfloat_add:
341; RV32ID-ILP32:       # %bb.0:
342; RV32ID-ILP32-NEXT:    addi sp, sp, -16
343; RV32ID-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
344; RV32ID-ILP32-NEXT:    slli a1, a1, 16
345; RV32ID-ILP32-NEXT:    slli a0, a0, 16
346; RV32ID-ILP32-NEXT:    fmv.w.x fa5, a1
347; RV32ID-ILP32-NEXT:    fmv.w.x fa4, a0
348; RV32ID-ILP32-NEXT:    fadd.s fa5, fa4, fa5
349; RV32ID-ILP32-NEXT:    fmv.x.w a0, fa5
350; RV32ID-ILP32-NEXT:    call __truncsfbf2
351; RV32ID-ILP32-NEXT:    lui a1, 1048560
352; RV32ID-ILP32-NEXT:    or a0, a0, a1
353; RV32ID-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
354; RV32ID-ILP32-NEXT:    addi sp, sp, 16
355; RV32ID-ILP32-NEXT:    ret
356;
357; RV64ID-LP64-LABEL: bfloat_add:
358; RV64ID-LP64:       # %bb.0:
359; RV64ID-LP64-NEXT:    addi sp, sp, -16
360; RV64ID-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
361; RV64ID-LP64-NEXT:    slli a1, a1, 16
362; RV64ID-LP64-NEXT:    slli a0, a0, 16
363; RV64ID-LP64-NEXT:    fmv.w.x fa5, a1
364; RV64ID-LP64-NEXT:    fmv.w.x fa4, a0
365; RV64ID-LP64-NEXT:    fadd.s fa5, fa4, fa5
366; RV64ID-LP64-NEXT:    fmv.x.w a0, fa5
367; RV64ID-LP64-NEXT:    call __truncsfbf2
368; RV64ID-LP64-NEXT:    lui a1, 1048560
369; RV64ID-LP64-NEXT:    or a0, a0, a1
370; RV64ID-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
371; RV64ID-LP64-NEXT:    addi sp, sp, 16
372; RV64ID-LP64-NEXT:    ret
373;
374; RV32ID-ILP32D-LABEL: bfloat_add:
375; RV32ID-ILP32D:       # %bb.0:
376; RV32ID-ILP32D-NEXT:    addi sp, sp, -16
377; RV32ID-ILP32D-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
378; RV32ID-ILP32D-NEXT:    fmv.x.w a0, fa0
379; RV32ID-ILP32D-NEXT:    fmv.x.w a1, fa1
380; RV32ID-ILP32D-NEXT:    slli a1, a1, 16
381; RV32ID-ILP32D-NEXT:    slli a0, a0, 16
382; RV32ID-ILP32D-NEXT:    fmv.w.x fa5, a1
383; RV32ID-ILP32D-NEXT:    fmv.w.x fa4, a0
384; RV32ID-ILP32D-NEXT:    fadd.s fa0, fa4, fa5
385; RV32ID-ILP32D-NEXT:    call __truncsfbf2
386; RV32ID-ILP32D-NEXT:    fmv.x.w a0, fa0
387; RV32ID-ILP32D-NEXT:    lui a1, 1048560
388; RV32ID-ILP32D-NEXT:    or a0, a0, a1
389; RV32ID-ILP32D-NEXT:    fmv.w.x fa0, a0
390; RV32ID-ILP32D-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
391; RV32ID-ILP32D-NEXT:    addi sp, sp, 16
392; RV32ID-ILP32D-NEXT:    ret
393;
394; RV64ID-LP64D-LABEL: bfloat_add:
395; RV64ID-LP64D:       # %bb.0:
396; RV64ID-LP64D-NEXT:    addi sp, sp, -16
397; RV64ID-LP64D-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
398; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
399; RV64ID-LP64D-NEXT:    fmv.x.w a1, fa1
400; RV64ID-LP64D-NEXT:    slli a1, a1, 16
401; RV64ID-LP64D-NEXT:    slli a0, a0, 16
402; RV64ID-LP64D-NEXT:    fmv.w.x fa5, a1
403; RV64ID-LP64D-NEXT:    fmv.w.x fa4, a0
404; RV64ID-LP64D-NEXT:    fadd.s fa0, fa4, fa5
405; RV64ID-LP64D-NEXT:    call __truncsfbf2
406; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
407; RV64ID-LP64D-NEXT:    lui a1, 1048560
408; RV64ID-LP64D-NEXT:    or a0, a0, a1
409; RV64ID-LP64D-NEXT:    fmv.w.x fa0, a0
410; RV64ID-LP64D-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
411; RV64ID-LP64D-NEXT:    addi sp, sp, 16
412; RV64ID-LP64D-NEXT:    ret
413  %1 = fadd bfloat %a, %b
414  ret bfloat %1
415}
416
417define bfloat @bfloat_load(ptr %a) nounwind {
418; RV32I-ILP32-LABEL: bfloat_load:
419; RV32I-ILP32:       # %bb.0:
420; RV32I-ILP32-NEXT:    addi sp, sp, -16
421; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
422; RV32I-ILP32-NEXT:    lh a1, 0(a0)
423; RV32I-ILP32-NEXT:    lh a2, 6(a0)
424; RV32I-ILP32-NEXT:    slli a0, a1, 16
425; RV32I-ILP32-NEXT:    slli a1, a2, 16
426; RV32I-ILP32-NEXT:    call __addsf3
427; RV32I-ILP32-NEXT:    call __truncsfbf2
428; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
429; RV32I-ILP32-NEXT:    addi sp, sp, 16
430; RV32I-ILP32-NEXT:    ret
431;
432; RV64I-LP64-LABEL: bfloat_load:
433; RV64I-LP64:       # %bb.0:
434; RV64I-LP64-NEXT:    addi sp, sp, -16
435; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
436; RV64I-LP64-NEXT:    lh a1, 0(a0)
437; RV64I-LP64-NEXT:    lh a2, 6(a0)
438; RV64I-LP64-NEXT:    slliw a0, a1, 16
439; RV64I-LP64-NEXT:    slliw a1, a2, 16
440; RV64I-LP64-NEXT:    call __addsf3
441; RV64I-LP64-NEXT:    call __truncsfbf2
442; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
443; RV64I-LP64-NEXT:    addi sp, sp, 16
444; RV64I-LP64-NEXT:    ret
445;
446; RV32ID-ILP32-LABEL: bfloat_load:
447; RV32ID-ILP32:       # %bb.0:
448; RV32ID-ILP32-NEXT:    addi sp, sp, -16
449; RV32ID-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
450; RV32ID-ILP32-NEXT:    lhu a1, 6(a0)
451; RV32ID-ILP32-NEXT:    lhu a0, 0(a0)
452; RV32ID-ILP32-NEXT:    slli a1, a1, 16
453; RV32ID-ILP32-NEXT:    slli a0, a0, 16
454; RV32ID-ILP32-NEXT:    fmv.w.x fa5, a1
455; RV32ID-ILP32-NEXT:    fmv.w.x fa4, a0
456; RV32ID-ILP32-NEXT:    fadd.s fa5, fa4, fa5
457; RV32ID-ILP32-NEXT:    fmv.x.w a0, fa5
458; RV32ID-ILP32-NEXT:    call __truncsfbf2
459; RV32ID-ILP32-NEXT:    lui a1, 1048560
460; RV32ID-ILP32-NEXT:    or a0, a0, a1
461; RV32ID-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
462; RV32ID-ILP32-NEXT:    addi sp, sp, 16
463; RV32ID-ILP32-NEXT:    ret
464;
465; RV64ID-LP64-LABEL: bfloat_load:
466; RV64ID-LP64:       # %bb.0:
467; RV64ID-LP64-NEXT:    addi sp, sp, -16
468; RV64ID-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
469; RV64ID-LP64-NEXT:    lhu a1, 6(a0)
470; RV64ID-LP64-NEXT:    lhu a0, 0(a0)
471; RV64ID-LP64-NEXT:    slli a1, a1, 16
472; RV64ID-LP64-NEXT:    slli a0, a0, 16
473; RV64ID-LP64-NEXT:    fmv.w.x fa5, a1
474; RV64ID-LP64-NEXT:    fmv.w.x fa4, a0
475; RV64ID-LP64-NEXT:    fadd.s fa5, fa4, fa5
476; RV64ID-LP64-NEXT:    fmv.x.w a0, fa5
477; RV64ID-LP64-NEXT:    call __truncsfbf2
478; RV64ID-LP64-NEXT:    lui a1, 1048560
479; RV64ID-LP64-NEXT:    or a0, a0, a1
480; RV64ID-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
481; RV64ID-LP64-NEXT:    addi sp, sp, 16
482; RV64ID-LP64-NEXT:    ret
483;
484; RV32ID-ILP32D-LABEL: bfloat_load:
485; RV32ID-ILP32D:       # %bb.0:
486; RV32ID-ILP32D-NEXT:    addi sp, sp, -16
487; RV32ID-ILP32D-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
488; RV32ID-ILP32D-NEXT:    lhu a1, 6(a0)
489; RV32ID-ILP32D-NEXT:    lhu a0, 0(a0)
490; RV32ID-ILP32D-NEXT:    slli a1, a1, 16
491; RV32ID-ILP32D-NEXT:    slli a0, a0, 16
492; RV32ID-ILP32D-NEXT:    fmv.w.x fa5, a1
493; RV32ID-ILP32D-NEXT:    fmv.w.x fa4, a0
494; RV32ID-ILP32D-NEXT:    fadd.s fa0, fa4, fa5
495; RV32ID-ILP32D-NEXT:    call __truncsfbf2
496; RV32ID-ILP32D-NEXT:    fmv.x.w a0, fa0
497; RV32ID-ILP32D-NEXT:    lui a1, 1048560
498; RV32ID-ILP32D-NEXT:    or a0, a0, a1
499; RV32ID-ILP32D-NEXT:    fmv.w.x fa0, a0
500; RV32ID-ILP32D-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
501; RV32ID-ILP32D-NEXT:    addi sp, sp, 16
502; RV32ID-ILP32D-NEXT:    ret
503;
504; RV64ID-LP64D-LABEL: bfloat_load:
505; RV64ID-LP64D:       # %bb.0:
506; RV64ID-LP64D-NEXT:    addi sp, sp, -16
507; RV64ID-LP64D-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
508; RV64ID-LP64D-NEXT:    lhu a1, 6(a0)
509; RV64ID-LP64D-NEXT:    lhu a0, 0(a0)
510; RV64ID-LP64D-NEXT:    slli a1, a1, 16
511; RV64ID-LP64D-NEXT:    slli a0, a0, 16
512; RV64ID-LP64D-NEXT:    fmv.w.x fa5, a1
513; RV64ID-LP64D-NEXT:    fmv.w.x fa4, a0
514; RV64ID-LP64D-NEXT:    fadd.s fa0, fa4, fa5
515; RV64ID-LP64D-NEXT:    call __truncsfbf2
516; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
517; RV64ID-LP64D-NEXT:    lui a1, 1048560
518; RV64ID-LP64D-NEXT:    or a0, a0, a1
519; RV64ID-LP64D-NEXT:    fmv.w.x fa0, a0
520; RV64ID-LP64D-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
521; RV64ID-LP64D-NEXT:    addi sp, sp, 16
522; RV64ID-LP64D-NEXT:    ret
523  %1 = load bfloat, ptr %a
524  %2 = getelementptr bfloat, ptr %a, i32 3
525  %3 = load bfloat, ptr %2
526  %4 = fadd bfloat %1, %3
527  ret bfloat %4
528}
529
530define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
531; RV32I-ILP32-LABEL: bfloat_store:
532; RV32I-ILP32:       # %bb.0:
533; RV32I-ILP32-NEXT:    addi sp, sp, -16
534; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
535; RV32I-ILP32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
536; RV32I-ILP32-NEXT:    mv s0, a0
537; RV32I-ILP32-NEXT:    slli a0, a1, 16
538; RV32I-ILP32-NEXT:    slli a1, a2, 16
539; RV32I-ILP32-NEXT:    call __addsf3
540; RV32I-ILP32-NEXT:    call __truncsfbf2
541; RV32I-ILP32-NEXT:    sh a0, 0(s0)
542; RV32I-ILP32-NEXT:    sh a0, 16(s0)
543; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
544; RV32I-ILP32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
545; RV32I-ILP32-NEXT:    addi sp, sp, 16
546; RV32I-ILP32-NEXT:    ret
547;
548; RV64I-LP64-LABEL: bfloat_store:
549; RV64I-LP64:       # %bb.0:
550; RV64I-LP64-NEXT:    addi sp, sp, -16
551; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
552; RV64I-LP64-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
553; RV64I-LP64-NEXT:    mv s0, a0
554; RV64I-LP64-NEXT:    slliw a0, a1, 16
555; RV64I-LP64-NEXT:    slliw a1, a2, 16
556; RV64I-LP64-NEXT:    call __addsf3
557; RV64I-LP64-NEXT:    call __truncsfbf2
558; RV64I-LP64-NEXT:    sh a0, 0(s0)
559; RV64I-LP64-NEXT:    sh a0, 16(s0)
560; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
561; RV64I-LP64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
562; RV64I-LP64-NEXT:    addi sp, sp, 16
563; RV64I-LP64-NEXT:    ret
564;
565; RV32ID-ILP32-LABEL: bfloat_store:
566; RV32ID-ILP32:       # %bb.0:
567; RV32ID-ILP32-NEXT:    addi sp, sp, -16
568; RV32ID-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
569; RV32ID-ILP32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
570; RV32ID-ILP32-NEXT:    mv s0, a0
571; RV32ID-ILP32-NEXT:    slli a2, a2, 16
572; RV32ID-ILP32-NEXT:    slli a1, a1, 16
573; RV32ID-ILP32-NEXT:    fmv.w.x fa5, a2
574; RV32ID-ILP32-NEXT:    fmv.w.x fa4, a1
575; RV32ID-ILP32-NEXT:    fadd.s fa5, fa4, fa5
576; RV32ID-ILP32-NEXT:    fmv.x.w a0, fa5
577; RV32ID-ILP32-NEXT:    call __truncsfbf2
578; RV32ID-ILP32-NEXT:    sh a0, 0(s0)
579; RV32ID-ILP32-NEXT:    sh a0, 16(s0)
580; RV32ID-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
581; RV32ID-ILP32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
582; RV32ID-ILP32-NEXT:    addi sp, sp, 16
583; RV32ID-ILP32-NEXT:    ret
584;
585; RV64ID-LP64-LABEL: bfloat_store:
586; RV64ID-LP64:       # %bb.0:
587; RV64ID-LP64-NEXT:    addi sp, sp, -16
588; RV64ID-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
589; RV64ID-LP64-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
590; RV64ID-LP64-NEXT:    mv s0, a0
591; RV64ID-LP64-NEXT:    slli a2, a2, 16
592; RV64ID-LP64-NEXT:    slli a1, a1, 16
593; RV64ID-LP64-NEXT:    fmv.w.x fa5, a2
594; RV64ID-LP64-NEXT:    fmv.w.x fa4, a1
595; RV64ID-LP64-NEXT:    fadd.s fa5, fa4, fa5
596; RV64ID-LP64-NEXT:    fmv.x.w a0, fa5
597; RV64ID-LP64-NEXT:    call __truncsfbf2
598; RV64ID-LP64-NEXT:    sh a0, 0(s0)
599; RV64ID-LP64-NEXT:    sh a0, 16(s0)
600; RV64ID-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
601; RV64ID-LP64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
602; RV64ID-LP64-NEXT:    addi sp, sp, 16
603; RV64ID-LP64-NEXT:    ret
604;
605; RV32ID-ILP32D-LABEL: bfloat_store:
606; RV32ID-ILP32D:       # %bb.0:
607; RV32ID-ILP32D-NEXT:    addi sp, sp, -16
608; RV32ID-ILP32D-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
609; RV32ID-ILP32D-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
610; RV32ID-ILP32D-NEXT:    mv s0, a0
611; RV32ID-ILP32D-NEXT:    fmv.x.w a0, fa0
612; RV32ID-ILP32D-NEXT:    fmv.x.w a1, fa1
613; RV32ID-ILP32D-NEXT:    slli a1, a1, 16
614; RV32ID-ILP32D-NEXT:    slli a0, a0, 16
615; RV32ID-ILP32D-NEXT:    fmv.w.x fa5, a1
616; RV32ID-ILP32D-NEXT:    fmv.w.x fa4, a0
617; RV32ID-ILP32D-NEXT:    fadd.s fa0, fa4, fa5
618; RV32ID-ILP32D-NEXT:    call __truncsfbf2
619; RV32ID-ILP32D-NEXT:    fmv.x.w a0, fa0
620; RV32ID-ILP32D-NEXT:    sh a0, 0(s0)
621; RV32ID-ILP32D-NEXT:    sh a0, 16(s0)
622; RV32ID-ILP32D-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
623; RV32ID-ILP32D-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
624; RV32ID-ILP32D-NEXT:    addi sp, sp, 16
625; RV32ID-ILP32D-NEXT:    ret
626;
627; RV64ID-LP64D-LABEL: bfloat_store:
628; RV64ID-LP64D:       # %bb.0:
629; RV64ID-LP64D-NEXT:    addi sp, sp, -16
630; RV64ID-LP64D-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
631; RV64ID-LP64D-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
632; RV64ID-LP64D-NEXT:    mv s0, a0
633; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
634; RV64ID-LP64D-NEXT:    fmv.x.w a1, fa1
635; RV64ID-LP64D-NEXT:    slli a1, a1, 16
636; RV64ID-LP64D-NEXT:    slli a0, a0, 16
637; RV64ID-LP64D-NEXT:    fmv.w.x fa5, a1
638; RV64ID-LP64D-NEXT:    fmv.w.x fa4, a0
639; RV64ID-LP64D-NEXT:    fadd.s fa0, fa4, fa5
640; RV64ID-LP64D-NEXT:    call __truncsfbf2
641; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
642; RV64ID-LP64D-NEXT:    sh a0, 0(s0)
643; RV64ID-LP64D-NEXT:    sh a0, 16(s0)
644; RV64ID-LP64D-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
645; RV64ID-LP64D-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
646; RV64ID-LP64D-NEXT:    addi sp, sp, 16
647; RV64ID-LP64D-NEXT:    ret
648  %1 = fadd bfloat %b, %c
649  store bfloat %1, ptr %a
650  %2 = getelementptr bfloat, ptr %a, i32 8
651  store bfloat %1, ptr %2
652  ret void
653}
654