xref: /llvm-project/llvm/test/CodeGen/ARM/arm-bf16-pcs.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple armv8.6a-arm-none-eabi -o - %s | FileCheck %s --check-prefix=BASE --check-prefix=BASE-ARM
3; RUN: llc -mtriple thumbv8.6a-arm-none-eabi -o - %s | FileCheck %s --check-prefix=BASE --check-prefix=BASE-THUMB
4; RUN: llc -mtriple armv8.6a-arm-none-eabi -mattr=+fullfp16 -o - %s | FileCheck %s --check-prefix=FULLFP16 --check-prefix=FULLFP16-ARM
5; RUN: llc -mtriple thumbv8.6a-arm-none-eabi -mattr=+fullfp16 -o - %s | FileCheck %s --check-prefix=FULLFP16 --check-prefix=FULLFP16-THUMB
6
7define bfloat @bf_load_soft(ptr %p) {
8; BASE-LABEL: bf_load_soft:
9; BASE:       @ %bb.0:
10; BASE-NEXT:    ldrh r0, [r0]
11; BASE-NEXT:    bx lr
12;
13; FULLFP16-LABEL: bf_load_soft:
14; FULLFP16:       @ %bb.0:
15; FULLFP16-NEXT:    vldr.16 s0, [r0]
16; FULLFP16-NEXT:    vmov r0, s0
17; FULLFP16-NEXT:    bx lr
18  %f = load bfloat, ptr %p, align 2
19  ret bfloat %f
20}
21
22define arm_aapcs_vfpcc bfloat @bf_load_hard(ptr %p) {
23; BASE-LABEL: bf_load_hard:
24; BASE:       @ %bb.0:
25; BASE-NEXT:    ldrh r0, [r0]
26; BASE-NEXT:    vmov s0, r0
27; BASE-NEXT:    bx lr
28;
29; FULLFP16-LABEL: bf_load_hard:
30; FULLFP16:       @ %bb.0:
31; FULLFP16-NEXT:    vldr.16 s0, [r0]
32; FULLFP16-NEXT:    bx lr
33  %f = load bfloat, ptr %p, align 2
34  ret bfloat %f
35}
36
37define void @bf_store_soft(ptr %p, bfloat %f) {
38; BASE-LABEL: bf_store_soft:
39; BASE:       @ %bb.0:
40; BASE-NEXT:    strh r1, [r0]
41; BASE-NEXT:    bx lr
42;
43; FULLFP16-LABEL: bf_store_soft:
44; FULLFP16:       @ %bb.0:
45; FULLFP16-NEXT:    vmov.f16 s0, r1
46; FULLFP16-NEXT:    vstr.16 s0, [r0]
47; FULLFP16-NEXT:    bx lr
48  store bfloat %f, ptr %p, align 2
49  ret void
50}
51
52define arm_aapcs_vfpcc void @bf_store_hard(ptr %p, bfloat %f) {
53; BASE-LABEL: bf_store_hard:
54; BASE:       @ %bb.0:
55; BASE-NEXT:    vmov r1, s0
56; BASE-NEXT:    strh r1, [r0]
57; BASE-NEXT:    bx lr
58;
59; FULLFP16-LABEL: bf_store_hard:
60; FULLFP16:       @ %bb.0:
61; FULLFP16-NEXT:    vstr.16 s0, [r0]
62; FULLFP16-NEXT:    bx lr
63  store bfloat %f, ptr %p, align 2
64  ret void
65}
66
67define i32 @bf_to_int_soft(bfloat %f) {
68; BASE-LABEL: bf_to_int_soft:
69; BASE:       @ %bb.0:
70; BASE-NEXT:    uxth r0, r0
71; BASE-NEXT:    bx lr
72;
73; FULLFP16-LABEL: bf_to_int_soft:
74; FULLFP16:       @ %bb.0:
75; FULLFP16-NEXT:    vmov.f16 s0, r0
76; FULLFP16-NEXT:    vmov.f16 r0, s0
77; FULLFP16-NEXT:    bx lr
78  %h = bitcast bfloat %f to i16
79  %w = zext i16 %h to i32
80  ret i32 %w
81}
82
83define arm_aapcs_vfpcc i32 @bf_to_int_hard(bfloat %f) {
84; BASE-LABEL: bf_to_int_hard:
85; BASE:       @ %bb.0:
86; BASE-NEXT:    vmov r0, s0
87; BASE-NEXT:    uxth r0, r0
88; BASE-NEXT:    bx lr
89;
90; FULLFP16-LABEL: bf_to_int_hard:
91; FULLFP16:       @ %bb.0:
92; FULLFP16-NEXT:    vmov.f16 r0, s0
93; FULLFP16-NEXT:    bx lr
94  %h = bitcast bfloat %f to i16
95  %w = zext i16 %h to i32
96  ret i32 %w
97}
98
99define bfloat @bf_from_int_soft(i32 %w) {
100; BASE-ARM-LABEL: bf_from_int_soft:
101; BASE-ARM:       @ %bb.0:
102; BASE-ARM-NEXT:    .pad #4
103; BASE-ARM-NEXT:    sub sp, sp, #4
104; BASE-ARM-NEXT:    strh r0, [sp, #2]
105; BASE-ARM-NEXT:    ldrh r0, [sp, #2]
106; BASE-ARM-NEXT:    add sp, sp, #4
107; BASE-ARM-NEXT:    bx lr
108;
109; BASE-THUMB-LABEL: bf_from_int_soft:
110; BASE-THUMB:       @ %bb.0:
111; BASE-THUMB-NEXT:    .pad #4
112; BASE-THUMB-NEXT:    sub sp, #4
113; BASE-THUMB-NEXT:    strh.w r0, [sp, #2]
114; BASE-THUMB-NEXT:    ldrh.w r0, [sp, #2]
115; BASE-THUMB-NEXT:    add sp, #4
116; BASE-THUMB-NEXT:    bx lr
117;
118; FULLFP16-LABEL: bf_from_int_soft:
119; FULLFP16:       @ %bb.0:
120; FULLFP16-NEXT:    vmov.f16 s0, r0
121; FULLFP16-NEXT:    vmov r0, s0
122; FULLFP16-NEXT:    bx lr
123  %h = trunc i32 %w to i16
124  %f = bitcast i16 %h to bfloat
125  ret bfloat %f
126}
127
128define arm_aapcs_vfpcc bfloat @bf_from_int_hard(i32 %w) {
129; BASE-ARM-LABEL: bf_from_int_hard:
130; BASE-ARM:       @ %bb.0:
131; BASE-ARM-NEXT:    .pad #4
132; BASE-ARM-NEXT:    sub sp, sp, #4
133; BASE-ARM-NEXT:    strh r0, [sp, #2]
134; BASE-ARM-NEXT:    ldrh r0, [sp, #2]
135; BASE-ARM-NEXT:    vmov s0, r0
136; BASE-ARM-NEXT:    add sp, sp, #4
137; BASE-ARM-NEXT:    bx lr
138;
139; BASE-THUMB-LABEL: bf_from_int_hard:
140; BASE-THUMB:       @ %bb.0:
141; BASE-THUMB-NEXT:    .pad #4
142; BASE-THUMB-NEXT:    sub sp, #4
143; BASE-THUMB-NEXT:    strh.w r0, [sp, #2]
144; BASE-THUMB-NEXT:    ldrh.w r0, [sp, #2]
145; BASE-THUMB-NEXT:    vmov s0, r0
146; BASE-THUMB-NEXT:    add sp, #4
147; BASE-THUMB-NEXT:    bx lr
148;
149; FULLFP16-LABEL: bf_from_int_hard:
150; FULLFP16:       @ %bb.0:
151; FULLFP16-NEXT:    vmov.f16 s0, r0
152; FULLFP16-NEXT:    bx lr
153  %h = trunc i32 %w to i16
154  %f = bitcast i16 %h to bfloat
155  ret bfloat %f
156}
157
158define bfloat @test_fncall_soft(bfloat %bf, ptr %f) {
159; BASE-ARM-LABEL: test_fncall_soft:
160; BASE-ARM:       @ %bb.0:
161; BASE-ARM-NEXT:    .save {r4, r5, r11, lr}
162; BASE-ARM-NEXT:    push {r4, r5, r11, lr}
163; BASE-ARM-NEXT:    .pad #8
164; BASE-ARM-NEXT:    sub sp, sp, #8
165; BASE-ARM-NEXT:    uxth r5, r0
166; BASE-ARM-NEXT:    mov r4, r1
167; BASE-ARM-NEXT:    mov r0, r5
168; BASE-ARM-NEXT:    mov r1, r5
169; BASE-ARM-NEXT:    blx r4
170; BASE-ARM-NEXT:    strh r0, [sp, #6]
171; BASE-ARM-NEXT:    uxth r1, r0
172; BASE-ARM-NEXT:    mov r0, r5
173; BASE-ARM-NEXT:    blx r4
174; BASE-ARM-NEXT:    ldrh r0, [sp, #6]
175; BASE-ARM-NEXT:    add sp, sp, #8
176; BASE-ARM-NEXT:    pop {r4, r5, r11, pc}
177;
178; BASE-THUMB-LABEL: test_fncall_soft:
179; BASE-THUMB:       @ %bb.0:
180; BASE-THUMB-NEXT:    .save {r4, r5, r7, lr}
181; BASE-THUMB-NEXT:    push {r4, r5, r7, lr}
182; BASE-THUMB-NEXT:    .pad #8
183; BASE-THUMB-NEXT:    sub sp, #8
184; BASE-THUMB-NEXT:    uxth r5, r0
185; BASE-THUMB-NEXT:    mov r4, r1
186; BASE-THUMB-NEXT:    mov r0, r5
187; BASE-THUMB-NEXT:    mov r1, r5
188; BASE-THUMB-NEXT:    blx r4
189; BASE-THUMB-NEXT:    strh.w r0, [sp, #6]
190; BASE-THUMB-NEXT:    uxth r1, r0
191; BASE-THUMB-NEXT:    mov r0, r5
192; BASE-THUMB-NEXT:    blx r4
193; BASE-THUMB-NEXT:    ldrh.w r0, [sp, #6]
194; BASE-THUMB-NEXT:    add sp, #8
195; BASE-THUMB-NEXT:    pop {r4, r5, r7, pc}
196;
197; FULLFP16-ARM-LABEL: test_fncall_soft:
198; FULLFP16-ARM:       @ %bb.0:
199; FULLFP16-ARM-NEXT:    .save {r4, r5, r11, lr}
200; FULLFP16-ARM-NEXT:    push {r4, r5, r11, lr}
201; FULLFP16-ARM-NEXT:    .vsave {d8}
202; FULLFP16-ARM-NEXT:    vpush {d8}
203; FULLFP16-ARM-NEXT:    vmov.f16 s0, r0
204; FULLFP16-ARM-NEXT:    mov r4, r1
205; FULLFP16-ARM-NEXT:    vmov.f16 r5, s0
206; FULLFP16-ARM-NEXT:    mov r0, r5
207; FULLFP16-ARM-NEXT:    mov r1, r5
208; FULLFP16-ARM-NEXT:    blx r4
209; FULLFP16-ARM-NEXT:    vmov.f16 s16, r0
210; FULLFP16-ARM-NEXT:    mov r0, r5
211; FULLFP16-ARM-NEXT:    vmov.f16 r1, s16
212; FULLFP16-ARM-NEXT:    blx r4
213; FULLFP16-ARM-NEXT:    vmov r0, s16
214; FULLFP16-ARM-NEXT:    vpop {d8}
215; FULLFP16-ARM-NEXT:    pop {r4, r5, r11, pc}
216;
217; FULLFP16-THUMB-LABEL: test_fncall_soft:
218; FULLFP16-THUMB:       @ %bb.0:
219; FULLFP16-THUMB-NEXT:    .save {r4, r5, r7, lr}
220; FULLFP16-THUMB-NEXT:    push {r4, r5, r7, lr}
221; FULLFP16-THUMB-NEXT:    .vsave {d8}
222; FULLFP16-THUMB-NEXT:    vpush {d8}
223; FULLFP16-THUMB-NEXT:    vmov.f16 s0, r0
224; FULLFP16-THUMB-NEXT:    mov r4, r1
225; FULLFP16-THUMB-NEXT:    vmov.f16 r5, s0
226; FULLFP16-THUMB-NEXT:    mov r0, r5
227; FULLFP16-THUMB-NEXT:    mov r1, r5
228; FULLFP16-THUMB-NEXT:    blx r4
229; FULLFP16-THUMB-NEXT:    vmov.f16 s16, r0
230; FULLFP16-THUMB-NEXT:    mov r0, r5
231; FULLFP16-THUMB-NEXT:    vmov.f16 r1, s16
232; FULLFP16-THUMB-NEXT:    blx r4
233; FULLFP16-THUMB-NEXT:    vmov r0, s16
234; FULLFP16-THUMB-NEXT:    vpop {d8}
235; FULLFP16-THUMB-NEXT:    pop {r4, r5, r7, pc}
236  %call = tail call bfloat %f(bfloat %bf, bfloat %bf)
237  %call1 = tail call bfloat %f(bfloat %bf, bfloat %call)
238  ret bfloat %call
239}
240
241define arm_aapcs_vfpcc bfloat @test_fncall_hard(bfloat %bf, ptr %f) {
242; BASE-ARM-LABEL: test_fncall_hard:
243; BASE-ARM:       @ %bb.0:
244; BASE-ARM-NEXT:    .save {r4, lr}
245; BASE-ARM-NEXT:    push {r4, lr}
246; BASE-ARM-NEXT:    .vsave {d8}
247; BASE-ARM-NEXT:    vpush {d8}
248; BASE-ARM-NEXT:    .pad #8
249; BASE-ARM-NEXT:    sub sp, sp, #8
250; BASE-ARM-NEXT:    mov r4, r0
251; BASE-ARM-NEXT:    vmov r0, s0
252; BASE-ARM-NEXT:    uxth r0, r0
253; BASE-ARM-NEXT:    vmov s16, r0
254; BASE-ARM-NEXT:    vmov.f32 s0, s16
255; BASE-ARM-NEXT:    vmov.f32 s1, s16
256; BASE-ARM-NEXT:    blx r4
257; BASE-ARM-NEXT:    vmov r0, s0
258; BASE-ARM-NEXT:    vmov.f32 s0, s16
259; BASE-ARM-NEXT:    strh r0, [sp, #6]
260; BASE-ARM-NEXT:    uxth r0, r0
261; BASE-ARM-NEXT:    vmov s1, r0
262; BASE-ARM-NEXT:    blx r4
263; BASE-ARM-NEXT:    ldrh r0, [sp, #6]
264; BASE-ARM-NEXT:    vmov s0, r0
265; BASE-ARM-NEXT:    add sp, sp, #8
266; BASE-ARM-NEXT:    vpop {d8}
267; BASE-ARM-NEXT:    pop {r4, pc}
268;
269; BASE-THUMB-LABEL: test_fncall_hard:
270; BASE-THUMB:       @ %bb.0:
271; BASE-THUMB-NEXT:    .save {r4, lr}
272; BASE-THUMB-NEXT:    push {r4, lr}
273; BASE-THUMB-NEXT:    .vsave {d8}
274; BASE-THUMB-NEXT:    vpush {d8}
275; BASE-THUMB-NEXT:    .pad #8
276; BASE-THUMB-NEXT:    sub sp, #8
277; BASE-THUMB-NEXT:    mov r4, r0
278; BASE-THUMB-NEXT:    vmov r0, s0
279; BASE-THUMB-NEXT:    uxth r0, r0
280; BASE-THUMB-NEXT:    vmov s16, r0
281; BASE-THUMB-NEXT:    vmov.f32 s0, s16
282; BASE-THUMB-NEXT:    vmov.f32 s1, s16
283; BASE-THUMB-NEXT:    blx r4
284; BASE-THUMB-NEXT:    vmov r0, s0
285; BASE-THUMB-NEXT:    vmov.f32 s0, s16
286; BASE-THUMB-NEXT:    strh.w r0, [sp, #6]
287; BASE-THUMB-NEXT:    uxth r0, r0
288; BASE-THUMB-NEXT:    vmov s1, r0
289; BASE-THUMB-NEXT:    blx r4
290; BASE-THUMB-NEXT:    ldrh.w r0, [sp, #6]
291; BASE-THUMB-NEXT:    vmov s0, r0
292; BASE-THUMB-NEXT:    add sp, #8
293; BASE-THUMB-NEXT:    vpop {d8}
294; BASE-THUMB-NEXT:    pop {r4, pc}
295;
296; FULLFP16-LABEL: test_fncall_hard:
297; FULLFP16:       @ %bb.0:
298; FULLFP16-NEXT:    .save {r4, lr}
299; FULLFP16-NEXT:    push {r4, lr}
300; FULLFP16-NEXT:    .vsave {d8, d9}
301; FULLFP16-NEXT:    vpush {d8, d9}
302; FULLFP16-NEXT:    mov r4, r0
303; FULLFP16-NEXT:    vmov.f16 r0, s0
304; FULLFP16-NEXT:    vmov s16, r0
305; FULLFP16-NEXT:    vmov.f32 s0, s16
306; FULLFP16-NEXT:    vmov.f32 s1, s16
307; FULLFP16-NEXT:    blx r4
308; FULLFP16-NEXT:    vmov.f16 r0, s0
309; FULLFP16-NEXT:    vmov.f32 s18, s0
310; FULLFP16-NEXT:    vmov.f32 s0, s16
311; FULLFP16-NEXT:    vmov s1, r0
312; FULLFP16-NEXT:    blx r4
313; FULLFP16-NEXT:    vmov.f32 s0, s18
314; FULLFP16-NEXT:    vpop {d8, d9}
315; FULLFP16-NEXT:    pop {r4, pc}
316  %call = tail call arm_aapcs_vfpcc bfloat %f(bfloat %bf, bfloat %bf)
317  %call1 = tail call arm_aapcs_vfpcc bfloat %f(bfloat %bf, bfloat %call)
318  ret bfloat %call
319}
320