xref: /llvm-project/llvm/test/CodeGen/RISCV/bfloat-arith.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin -verify-machineinstrs \
3; RUN:   -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK,RV32IZFBFMIN %s
4; RUN: llc -mtriple=riscv64 -mattr=+zfbfmin -verify-machineinstrs \
5; RUN:   -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK,RV64IZFBFMIN %s
6
7; These tests descend from float-arith.ll, where each function was targeted at
8; a particular RISC-V FPU instruction.
9
10define bfloat @fadd_bf16(bfloat %a, bfloat %b) nounwind {
11; CHECK-LABEL: fadd_bf16:
12; CHECK:       # %bb.0:
13; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
14; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
15; CHECK-NEXT:    fadd.s fa5, fa4, fa5
16; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
17; CHECK-NEXT:    ret
18  %1 = fadd bfloat %a, %b
19  ret bfloat %1
20}
21
22define bfloat @fsub_bf16(bfloat %a, bfloat %b) nounwind {
23; CHECK-LABEL: fsub_bf16:
24; CHECK:       # %bb.0:
25; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
26; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
27; CHECK-NEXT:    fsub.s fa5, fa4, fa5
28; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
29; CHECK-NEXT:    ret
30  %1 = fsub bfloat %a, %b
31  ret bfloat %1
32}
33
34define bfloat @fmul_bf16(bfloat %a, bfloat %b) nounwind {
35; CHECK-LABEL: fmul_bf16:
36; CHECK:       # %bb.0:
37; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
38; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
39; CHECK-NEXT:    fmul.s fa5, fa4, fa5
40; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
41; CHECK-NEXT:    ret
42  %1 = fmul bfloat %a, %b
43  ret bfloat %1
44}
45
46define bfloat @fdiv_bf16(bfloat %a, bfloat %b) nounwind {
47; CHECK-LABEL: fdiv_bf16:
48; CHECK:       # %bb.0:
49; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
50; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
51; CHECK-NEXT:    fdiv.s fa5, fa4, fa5
52; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
53; CHECK-NEXT:    ret
54  %1 = fdiv bfloat %a, %b
55  ret bfloat %1
56}
57
58declare bfloat @llvm.sqrt.bf16(bfloat)
59
60define bfloat @fsqrt_bf16(bfloat %a) nounwind {
61; CHECK-LABEL: fsqrt_bf16:
62; CHECK:       # %bb.0:
63; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
64; CHECK-NEXT:    fsqrt.s fa5, fa5
65; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
66; CHECK-NEXT:    ret
67  %1 = call bfloat @llvm.sqrt.bf16(bfloat %a)
68  ret bfloat %1
69}
70
71declare bfloat @llvm.copysign.bf16(bfloat, bfloat)
72
73define bfloat @fsgnj_bf16(bfloat %a, bfloat %b) nounwind {
74; RV32IZFBFMIN-LABEL: fsgnj_bf16:
75; RV32IZFBFMIN:       # %bb.0:
76; RV32IZFBFMIN-NEXT:    fmv.x.h a0, fa1
77; RV32IZFBFMIN-NEXT:    lui a1, 1048568
78; RV32IZFBFMIN-NEXT:    and a0, a0, a1
79; RV32IZFBFMIN-NEXT:    fmv.x.h a1, fa0
80; RV32IZFBFMIN-NEXT:    slli a1, a1, 17
81; RV32IZFBFMIN-NEXT:    srli a1, a1, 17
82; RV32IZFBFMIN-NEXT:    or a0, a1, a0
83; RV32IZFBFMIN-NEXT:    fmv.h.x fa0, a0
84; RV32IZFBFMIN-NEXT:    ret
85;
86; RV64IZFBFMIN-LABEL: fsgnj_bf16:
87; RV64IZFBFMIN:       # %bb.0:
88; RV64IZFBFMIN-NEXT:    fmv.x.h a0, fa1
89; RV64IZFBFMIN-NEXT:    lui a1, 1048568
90; RV64IZFBFMIN-NEXT:    and a0, a0, a1
91; RV64IZFBFMIN-NEXT:    fmv.x.h a1, fa0
92; RV64IZFBFMIN-NEXT:    slli a1, a1, 49
93; RV64IZFBFMIN-NEXT:    srli a1, a1, 49
94; RV64IZFBFMIN-NEXT:    or a0, a1, a0
95; RV64IZFBFMIN-NEXT:    fmv.h.x fa0, a0
96; RV64IZFBFMIN-NEXT:    ret
97  %1 = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %b)
98  ret bfloat %1
99}
100
101define i32 @fneg_bf16(bfloat %a, bfloat %b) nounwind {
102; CHECK-LABEL: fneg_bf16:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
105; CHECK-NEXT:    lui a0, 1048568
106; CHECK-NEXT:    fadd.s fa5, fa5, fa5
107; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
108; CHECK-NEXT:    fmv.x.h a1, fa5
109; CHECK-NEXT:    xor a0, a1, a0
110; CHECK-NEXT:    fmv.h.x fa4, a0
111; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
112; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
113; CHECK-NEXT:    feq.s a0, fa5, fa4
114; CHECK-NEXT:    ret
115  %1 = fadd bfloat %a, %a
116  %2 = fneg bfloat %1
117  %3 = fcmp oeq bfloat %1, %2
118  %4 = zext i1 %3 to i32
119  ret i32 %4
120}
121
122define bfloat @fsgnjn_bf16(bfloat %a, bfloat %b) nounwind {
123; RV32IZFBFMIN-LABEL: fsgnjn_bf16:
124; RV32IZFBFMIN:       # %bb.0:
125; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
126; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
127; RV32IZFBFMIN-NEXT:    lui a0, 1048568
128; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
129; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
130; RV32IZFBFMIN-NEXT:    fmv.x.h a1, fa5
131; RV32IZFBFMIN-NEXT:    not a1, a1
132; RV32IZFBFMIN-NEXT:    and a0, a1, a0
133; RV32IZFBFMIN-NEXT:    fmv.x.h a1, fa0
134; RV32IZFBFMIN-NEXT:    slli a1, a1, 17
135; RV32IZFBFMIN-NEXT:    srli a1, a1, 17
136; RV32IZFBFMIN-NEXT:    or a0, a1, a0
137; RV32IZFBFMIN-NEXT:    fmv.h.x fa0, a0
138; RV32IZFBFMIN-NEXT:    ret
139;
140; RV64IZFBFMIN-LABEL: fsgnjn_bf16:
141; RV64IZFBFMIN:       # %bb.0:
142; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
143; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
144; RV64IZFBFMIN-NEXT:    lui a0, 1048568
145; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
146; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
147; RV64IZFBFMIN-NEXT:    fmv.x.h a1, fa5
148; RV64IZFBFMIN-NEXT:    not a1, a1
149; RV64IZFBFMIN-NEXT:    and a0, a1, a0
150; RV64IZFBFMIN-NEXT:    fmv.x.h a1, fa0
151; RV64IZFBFMIN-NEXT:    slli a1, a1, 49
152; RV64IZFBFMIN-NEXT:    srli a1, a1, 49
153; RV64IZFBFMIN-NEXT:    or a0, a1, a0
154; RV64IZFBFMIN-NEXT:    fmv.h.x fa0, a0
155; RV64IZFBFMIN-NEXT:    ret
156  %1 = fadd bfloat %a, %b
157  %2 = fneg bfloat %1
158  %3 = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %2)
159  ret bfloat %3
160}
161
162declare bfloat @llvm.fabs.bf16(bfloat)
163
164define bfloat @fabs_bf16(bfloat %a, bfloat %b) nounwind {
165; RV32IZFBFMIN-LABEL: fabs_bf16:
166; RV32IZFBFMIN:       # %bb.0:
167; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
168; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
169; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
170; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
171; RV32IZFBFMIN-NEXT:    fmv.x.h a0, fa5
172; RV32IZFBFMIN-NEXT:    slli a0, a0, 17
173; RV32IZFBFMIN-NEXT:    srli a0, a0, 17
174; RV32IZFBFMIN-NEXT:    fmv.h.x fa4, a0
175; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
176; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
177; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
178; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
179; RV32IZFBFMIN-NEXT:    ret
180;
181; RV64IZFBFMIN-LABEL: fabs_bf16:
182; RV64IZFBFMIN:       # %bb.0:
183; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
184; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
185; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
186; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
187; RV64IZFBFMIN-NEXT:    fmv.x.h a0, fa5
188; RV64IZFBFMIN-NEXT:    slli a0, a0, 49
189; RV64IZFBFMIN-NEXT:    srli a0, a0, 49
190; RV64IZFBFMIN-NEXT:    fmv.h.x fa4, a0
191; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
192; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa4
193; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
194; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
195; RV64IZFBFMIN-NEXT:    ret
196  %1 = fadd bfloat %a, %b
197  %2 = call bfloat @llvm.fabs.bf16(bfloat %1)
198  %3 = fadd bfloat %2, %1
199  ret bfloat %3
200}
201
202declare bfloat @llvm.minnum.bf16(bfloat, bfloat)
203
204define bfloat @fmin_bf16(bfloat %a, bfloat %b) nounwind {
205; CHECK-LABEL: fmin_bf16:
206; CHECK:       # %bb.0:
207; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
208; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
209; CHECK-NEXT:    fmin.s fa5, fa4, fa5
210; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
211; CHECK-NEXT:    ret
212  %1 = call bfloat @llvm.minnum.bf16(bfloat %a, bfloat %b)
213  ret bfloat %1
214}
215
216declare bfloat @llvm.maxnum.bf16(bfloat, bfloat)
217
218define bfloat @fmax_bf16(bfloat %a, bfloat %b) nounwind {
219; CHECK-LABEL: fmax_bf16:
220; CHECK:       # %bb.0:
221; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
222; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
223; CHECK-NEXT:    fmax.s fa5, fa4, fa5
224; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
225; CHECK-NEXT:    ret
226  %1 = call bfloat @llvm.maxnum.bf16(bfloat %a, bfloat %b)
227  ret bfloat %1
228}
229
230declare bfloat @llvm.fma.bf16(bfloat, bfloat, bfloat)
231
232define bfloat @fmadd_bf16(bfloat %a, bfloat %b, bfloat %c) nounwind {
233; CHECK-LABEL: fmadd_bf16:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    fcvt.s.bf16 fa5, fa2
236; CHECK-NEXT:    fcvt.s.bf16 fa4, fa1
237; CHECK-NEXT:    fcvt.s.bf16 fa3, fa0
238; CHECK-NEXT:    fmadd.s fa5, fa3, fa4, fa5
239; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
240; CHECK-NEXT:    ret
241  %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
242  ret bfloat %1
243}
244
245define bfloat @fmsub_bf16(bfloat %a, bfloat %b, bfloat %c) nounwind {
246; CHECK-LABEL: fmsub_bf16:
247; CHECK:       # %bb.0:
248; CHECK-NEXT:    fcvt.s.bf16 fa5, fa2
249; CHECK-NEXT:    fmv.w.x fa4, zero
250; CHECK-NEXT:    lui a0, 1048568
251; CHECK-NEXT:    fcvt.s.bf16 fa3, fa1
252; CHECK-NEXT:    fadd.s fa5, fa5, fa4
253; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
254; CHECK-NEXT:    fmv.x.h a1, fa5
255; CHECK-NEXT:    xor a0, a1, a0
256; CHECK-NEXT:    fmv.h.x fa5, a0
257; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
258; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
259; CHECK-NEXT:    fmadd.s fa5, fa4, fa3, fa5
260; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
261; CHECK-NEXT:    ret
262  %c_ = fadd bfloat 0.0, %c ; avoid negation using xor
263  %negc = fsub bfloat -0.0, %c_
264  %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %negc)
265  ret bfloat %1
266}
267
268define bfloat @fnmadd_bf16(bfloat %a, bfloat %b, bfloat %c) nounwind {
269; CHECK-LABEL: fnmadd_bf16:
270; CHECK:       # %bb.0:
271; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
272; CHECK-NEXT:    fmv.w.x fa4, zero
273; CHECK-NEXT:    fcvt.s.bf16 fa3, fa2
274; CHECK-NEXT:    lui a0, 1048568
275; CHECK-NEXT:    fadd.s fa5, fa5, fa4
276; CHECK-NEXT:    fadd.s fa4, fa3, fa4
277; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
278; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
279; CHECK-NEXT:    fmv.x.h a1, fa5
280; CHECK-NEXT:    fmv.x.h a2, fa4
281; CHECK-NEXT:    xor a1, a1, a0
282; CHECK-NEXT:    xor a0, a2, a0
283; CHECK-NEXT:    fmv.h.x fa5, a1
284; CHECK-NEXT:    fmv.h.x fa4, a0
285; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
286; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
287; CHECK-NEXT:    fcvt.s.bf16 fa3, fa1
288; CHECK-NEXT:    fmadd.s fa5, fa5, fa3, fa4
289; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
290; CHECK-NEXT:    ret
291  %a_ = fadd bfloat 0.0, %a
292  %c_ = fadd bfloat 0.0, %c
293  %nega = fsub bfloat -0.0, %a_
294  %negc = fsub bfloat -0.0, %c_
295  %1 = call bfloat @llvm.fma.bf16(bfloat %nega, bfloat %b, bfloat %negc)
296  ret bfloat %1
297}
298
299define bfloat @fnmadd_s_2(bfloat %a, bfloat %b, bfloat %c) nounwind {
300; CHECK-LABEL: fnmadd_s_2:
301; CHECK:       # %bb.0:
302; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
303; CHECK-NEXT:    fmv.w.x fa4, zero
304; CHECK-NEXT:    fcvt.s.bf16 fa3, fa2
305; CHECK-NEXT:    lui a0, 1048568
306; CHECK-NEXT:    fadd.s fa5, fa5, fa4
307; CHECK-NEXT:    fadd.s fa4, fa3, fa4
308; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
309; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
310; CHECK-NEXT:    fmv.x.h a1, fa5
311; CHECK-NEXT:    fmv.x.h a2, fa4
312; CHECK-NEXT:    xor a1, a1, a0
313; CHECK-NEXT:    xor a0, a2, a0
314; CHECK-NEXT:    fmv.h.x fa5, a1
315; CHECK-NEXT:    fmv.h.x fa4, a0
316; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
317; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
318; CHECK-NEXT:    fcvt.s.bf16 fa3, fa0
319; CHECK-NEXT:    fmadd.s fa5, fa3, fa5, fa4
320; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
321; CHECK-NEXT:    ret
322  %b_ = fadd bfloat 0.0, %b
323  %c_ = fadd bfloat 0.0, %c
324  %negb = fsub bfloat -0.0, %b_
325  %negc = fsub bfloat -0.0, %c_
326  %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %negb, bfloat %negc)
327  ret bfloat %1
328}
329
330define bfloat @fnmadd_s_3(bfloat %a, bfloat %b, bfloat %c) nounwind {
331; CHECK-LABEL: fnmadd_s_3:
332; CHECK:       # %bb.0:
333; CHECK-NEXT:    fcvt.s.bf16 fa5, fa2
334; CHECK-NEXT:    fcvt.s.bf16 fa4, fa1
335; CHECK-NEXT:    fcvt.s.bf16 fa3, fa0
336; CHECK-NEXT:    fmadd.s fa5, fa3, fa4, fa5
337; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
338; CHECK-NEXT:    fmv.x.h a0, fa5
339; CHECK-NEXT:    lui a1, 1048568
340; CHECK-NEXT:    xor a0, a0, a1
341; CHECK-NEXT:    fmv.h.x fa0, a0
342; CHECK-NEXT:    ret
343  %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
344  %neg = fneg bfloat %1
345  ret bfloat %neg
346}
347
348
349define bfloat @fnmadd_nsz(bfloat %a, bfloat %b, bfloat %c) nounwind {
350; CHECK-LABEL: fnmadd_nsz:
351; CHECK:       # %bb.0:
352; CHECK-NEXT:    fcvt.s.bf16 fa5, fa2
353; CHECK-NEXT:    fcvt.s.bf16 fa4, fa1
354; CHECK-NEXT:    fcvt.s.bf16 fa3, fa0
355; CHECK-NEXT:    fmadd.s fa5, fa3, fa4, fa5
356; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
357; CHECK-NEXT:    fmv.x.h a0, fa5
358; CHECK-NEXT:    lui a1, 1048568
359; CHECK-NEXT:    xor a0, a0, a1
360; CHECK-NEXT:    fmv.h.x fa0, a0
361; CHECK-NEXT:    ret
362  %1 = call nsz bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
363  %neg = fneg nsz bfloat %1
364  ret bfloat %neg
365}
366
367define bfloat @fnmsub_bf16(bfloat %a, bfloat %b, bfloat %c) nounwind {
368; CHECK-LABEL: fnmsub_bf16:
369; CHECK:       # %bb.0:
370; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
371; CHECK-NEXT:    fmv.w.x fa4, zero
372; CHECK-NEXT:    lui a0, 1048568
373; CHECK-NEXT:    fcvt.s.bf16 fa3, fa2
374; CHECK-NEXT:    fadd.s fa5, fa5, fa4
375; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
376; CHECK-NEXT:    fmv.x.h a1, fa5
377; CHECK-NEXT:    xor a0, a1, a0
378; CHECK-NEXT:    fmv.h.x fa5, a0
379; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
380; CHECK-NEXT:    fcvt.s.bf16 fa4, fa1
381; CHECK-NEXT:    fmadd.s fa5, fa5, fa4, fa3
382; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
383; CHECK-NEXT:    ret
384  %a_ = fadd bfloat 0.0, %a
385  %nega = fsub bfloat -0.0, %a_
386  %1 = call bfloat @llvm.fma.bf16(bfloat %nega, bfloat %b, bfloat %c)
387  ret bfloat %1
388}
389
390define bfloat @fnmsub_bf16_2(bfloat %a, bfloat %b, bfloat %c) nounwind {
391; CHECK-LABEL: fnmsub_bf16_2:
392; CHECK:       # %bb.0:
393; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
394; CHECK-NEXT:    fmv.w.x fa4, zero
395; CHECK-NEXT:    lui a0, 1048568
396; CHECK-NEXT:    fcvt.s.bf16 fa3, fa2
397; CHECK-NEXT:    fadd.s fa5, fa5, fa4
398; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
399; CHECK-NEXT:    fmv.x.h a1, fa5
400; CHECK-NEXT:    xor a0, a1, a0
401; CHECK-NEXT:    fmv.h.x fa5, a0
402; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
403; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
404; CHECK-NEXT:    fmadd.s fa5, fa4, fa5, fa3
405; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
406; CHECK-NEXT:    ret
407  %b_ = fadd bfloat 0.0, %b
408  %negb = fsub bfloat -0.0, %b_
409  %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %negb, bfloat %c)
410  ret bfloat %1
411}
412
413define bfloat @fmadd_bf16_contract(bfloat %a, bfloat %b, bfloat %c) nounwind {
414; CHECK-LABEL: fmadd_bf16_contract:
415; CHECK:       # %bb.0:
416; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
417; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
418; CHECK-NEXT:    fmul.s fa5, fa4, fa5
419; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
420; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
421; CHECK-NEXT:    fcvt.s.bf16 fa4, fa2
422; CHECK-NEXT:    fadd.s fa5, fa5, fa4
423; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
424; CHECK-NEXT:    ret
425  %1 = fmul contract bfloat %a, %b
426  %2 = fadd contract bfloat %1, %c
427  ret bfloat %2
428}
429
430define bfloat @fmsub_bf16_contract(bfloat %a, bfloat %b, bfloat %c) nounwind {
431; CHECK-LABEL: fmsub_bf16_contract:
432; CHECK:       # %bb.0:
433; CHECK-NEXT:    fcvt.s.bf16 fa5, fa2
434; CHECK-NEXT:    fmv.w.x fa4, zero
435; CHECK-NEXT:    fcvt.s.bf16 fa3, fa1
436; CHECK-NEXT:    fcvt.s.bf16 fa2, fa0
437; CHECK-NEXT:    fadd.s fa5, fa5, fa4
438; CHECK-NEXT:    fmul.s fa4, fa2, fa3
439; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
440; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
441; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
442; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
443; CHECK-NEXT:    fsub.s fa5, fa4, fa5
444; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
445; CHECK-NEXT:    ret
446  %c_ = fadd bfloat 0.0, %c ; avoid negation using xor
447  %1 = fmul contract bfloat %a, %b
448  %2 = fsub contract bfloat %1, %c_
449  ret bfloat %2
450}
451
452define bfloat @fnmadd_bf16_contract(bfloat %a, bfloat %b, bfloat %c) nounwind {
453; CHECK-LABEL: fnmadd_bf16_contract:
454; CHECK:       # %bb.0:
455; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
456; CHECK-NEXT:    fmv.w.x fa4, zero
457; CHECK-NEXT:    fcvt.s.bf16 fa3, fa1
458; CHECK-NEXT:    fcvt.s.bf16 fa2, fa2
459; CHECK-NEXT:    lui a0, 1048568
460; CHECK-NEXT:    fadd.s fa5, fa5, fa4
461; CHECK-NEXT:    fadd.s fa3, fa3, fa4
462; CHECK-NEXT:    fadd.s fa4, fa2, fa4
463; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
464; CHECK-NEXT:    fcvt.bf16.s fa3, fa3
465; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
466; CHECK-NEXT:    fcvt.s.bf16 fa3, fa3
467; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
468; CHECK-NEXT:    fmul.s fa5, fa5, fa3
469; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
470; CHECK-NEXT:    fmv.x.h a1, fa5
471; CHECK-NEXT:    xor a0, a1, a0
472; CHECK-NEXT:    fmv.h.x fa5, a0
473; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
474; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
475; CHECK-NEXT:    fsub.s fa5, fa5, fa4
476; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
477; CHECK-NEXT:    ret
478  %a_ = fadd bfloat 0.0, %a ; avoid negation using xor
479  %b_ = fadd bfloat 0.0, %b ; avoid negation using xor
480  %c_ = fadd bfloat 0.0, %c ; avoid negation using xor
481  %1 = fmul contract bfloat %a_, %b_
482  %2 = fneg bfloat %1
483  %3 = fsub contract bfloat %2, %c_
484  ret bfloat %3
485}
486
487define bfloat @fnmsub_bf16_contract(bfloat %a, bfloat %b, bfloat %c) nounwind {
488; CHECK-LABEL: fnmsub_bf16_contract:
489; CHECK:       # %bb.0:
490; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
491; CHECK-NEXT:    fmv.w.x fa4, zero
492; CHECK-NEXT:    fcvt.s.bf16 fa3, fa1
493; CHECK-NEXT:    fadd.s fa5, fa5, fa4
494; CHECK-NEXT:    fadd.s fa4, fa3, fa4
495; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
496; CHECK-NEXT:    fcvt.bf16.s fa4, fa4
497; CHECK-NEXT:    fcvt.s.bf16 fa4, fa4
498; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
499; CHECK-NEXT:    fmul.s fa5, fa5, fa4
500; CHECK-NEXT:    fcvt.bf16.s fa5, fa5
501; CHECK-NEXT:    fcvt.s.bf16 fa5, fa5
502; CHECK-NEXT:    fcvt.s.bf16 fa4, fa2
503; CHECK-NEXT:    fsub.s fa5, fa4, fa5
504; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
505; CHECK-NEXT:    ret
506  %a_ = fadd bfloat 0.0, %a ; avoid negation using xor
507  %b_ = fadd bfloat 0.0, %b ; avoid negation using xor
508  %1 = fmul contract bfloat %a_, %b_
509  %2 = fsub contract bfloat %c, %1
510  ret bfloat %2
511}
512