xref: /llvm-project/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll (revision 737fc353d2e8c9c6f0db78f6af6ab05a1e349d2b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 < %s | FileCheck -check-prefix=NOFP16 %s
3
4declare void @f16_user(half)
5declare half @f16_result()
6
7declare void @v2f16_user(<2 x half>)
8declare <2 x half> @v2f16_result()
9
10declare void @v4f16_user(<4 x half>)
11declare <4 x half> @v4f16_result()
12
13declare void @v8f16_user(<8 x half>)
14declare <8 x half> @v8f16_result()
15
16define void @f16_arg(half %arg, ptr %ptr) #0 {
17; NOFP16-LABEL: f16_arg:
18; NOFP16:       // %bb.0:
19; NOFP16-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
20; NOFP16-NEXT:    .cfi_def_cfa_offset 16
21; NOFP16-NEXT:    .cfi_offset w19, -8
22; NOFP16-NEXT:    .cfi_offset w30, -16
23; NOFP16-NEXT:    and w0, w0, #0xffff
24; NOFP16-NEXT:    mov x19, x1
25; NOFP16-NEXT:    bl __gnu_h2f_ieee
26; NOFP16-NEXT:    str w0, [x19]
27; NOFP16-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
28; NOFP16-NEXT:    ret
29  %fpext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
30  store float %fpext, ptr %ptr
31  ret void
32}
33
34define void @v2f16_arg(<2 x half> %arg, ptr %ptr) #0 {
35; NOFP16-LABEL: v2f16_arg:
36; NOFP16:       // %bb.0:
37; NOFP16-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
38; NOFP16-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
39; NOFP16-NEXT:    .cfi_def_cfa_offset 32
40; NOFP16-NEXT:    .cfi_offset w19, -8
41; NOFP16-NEXT:    .cfi_offset w20, -16
42; NOFP16-NEXT:    .cfi_offset w21, -24
43; NOFP16-NEXT:    .cfi_offset w30, -32
44; NOFP16-NEXT:    and w0, w0, #0xffff
45; NOFP16-NEXT:    mov x19, x2
46; NOFP16-NEXT:    mov w20, w1
47; NOFP16-NEXT:    bl __gnu_h2f_ieee
48; NOFP16-NEXT:    mov w21, w0
49; NOFP16-NEXT:    and w0, w20, #0xffff
50; NOFP16-NEXT:    bl __gnu_h2f_ieee
51; NOFP16-NEXT:    stp w21, w0, [x19]
52; NOFP16-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
53; NOFP16-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
54; NOFP16-NEXT:    ret
55  %fpext = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
56  store <2 x float> %fpext, ptr %ptr
57  ret void
58}
59
60define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
61; NOFP16-LABEL: v3f16_arg:
62; NOFP16:       // %bb.0:
63; NOFP16-NEXT:    str x30, [sp, #-48]! // 8-byte Folded Spill
64; NOFP16-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
65; NOFP16-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
66; NOFP16-NEXT:    .cfi_def_cfa_offset 48
67; NOFP16-NEXT:    .cfi_offset w19, -8
68; NOFP16-NEXT:    .cfi_offset w20, -16
69; NOFP16-NEXT:    .cfi_offset w21, -24
70; NOFP16-NEXT:    .cfi_offset w22, -32
71; NOFP16-NEXT:    .cfi_offset w30, -48
72; NOFP16-NEXT:    mov w21, w0
73; NOFP16-NEXT:    and w0, w1, #0xffff
74; NOFP16-NEXT:    mov x19, x3
75; NOFP16-NEXT:    mov w20, w2
76; NOFP16-NEXT:    bl __gnu_h2f_ieee
77; NOFP16-NEXT:    mov w22, w0
78; NOFP16-NEXT:    and w0, w21, #0xffff
79; NOFP16-NEXT:    bl __gnu_h2f_ieee
80; NOFP16-NEXT:    mov w8, w0
81; NOFP16-NEXT:    and w0, w20, #0xffff
82; NOFP16-NEXT:    orr x21, x8, x22, lsl #32
83; NOFP16-NEXT:    bl __gnu_h2f_ieee
84; NOFP16-NEXT:    str x21, [x19]
85; NOFP16-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
86; NOFP16-NEXT:    str w0, [x19, #8]
87; NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
88; NOFP16-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
89; NOFP16-NEXT:    ret
90  %fpext = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
91  store <3 x float> %fpext, ptr %ptr
92  ret void
93}
94
95define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
96; NOFP16-LABEL: v4f16_arg:
97; NOFP16:       // %bb.0:
98; NOFP16-NEXT:    stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
99; NOFP16-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
100; NOFP16-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
101; NOFP16-NEXT:    .cfi_def_cfa_offset 48
102; NOFP16-NEXT:    .cfi_offset w19, -8
103; NOFP16-NEXT:    .cfi_offset w20, -16
104; NOFP16-NEXT:    .cfi_offset w21, -24
105; NOFP16-NEXT:    .cfi_offset w22, -32
106; NOFP16-NEXT:    .cfi_offset w23, -40
107; NOFP16-NEXT:    .cfi_offset w30, -48
108; NOFP16-NEXT:    and w0, w0, #0xffff
109; NOFP16-NEXT:    mov x19, x4
110; NOFP16-NEXT:    mov w20, w3
111; NOFP16-NEXT:    mov w21, w2
112; NOFP16-NEXT:    mov w22, w1
113; NOFP16-NEXT:    bl __gnu_h2f_ieee
114; NOFP16-NEXT:    mov w23, w0
115; NOFP16-NEXT:    and w0, w22, #0xffff
116; NOFP16-NEXT:    bl __gnu_h2f_ieee
117; NOFP16-NEXT:    mov w22, w0
118; NOFP16-NEXT:    and w0, w21, #0xffff
119; NOFP16-NEXT:    bl __gnu_h2f_ieee
120; NOFP16-NEXT:    mov w21, w0
121; NOFP16-NEXT:    and w0, w20, #0xffff
122; NOFP16-NEXT:    bl __gnu_h2f_ieee
123; NOFP16-NEXT:    stp w21, w0, [x19, #8]
124; NOFP16-NEXT:    stp w23, w22, [x19]
125; NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
126; NOFP16-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
127; NOFP16-NEXT:    ldp x30, x23, [sp], #48 // 16-byte Folded Reload
128; NOFP16-NEXT:    ret
129  %fpext = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %arg, metadata !"fpexcept.strict")
130  store <4 x float> %fpext, ptr %ptr
131  ret void
132}
133
134 define half @f16_return(float %arg) #0 {
135; NOFP16-LABEL: f16_return:
136; NOFP16:       // %bb.0:
137; NOFP16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
138; NOFP16-NEXT:    .cfi_def_cfa_offset 16
139; NOFP16-NEXT:    .cfi_offset w30, -16
140; NOFP16-NEXT:    bl __gnu_f2h_ieee
141; NOFP16-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
142; NOFP16-NEXT:    ret
143   %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
144   ret half %fptrunc
145 }
146
147 define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
148; NOFP16-LABEL: v2f16_return:
149; NOFP16:       // %bb.0:
150; NOFP16-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
151; NOFP16-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
152; NOFP16-NEXT:    .cfi_def_cfa_offset 32
153; NOFP16-NEXT:    .cfi_offset w19, -8
154; NOFP16-NEXT:    .cfi_offset w20, -16
155; NOFP16-NEXT:    .cfi_offset w30, -32
156; NOFP16-NEXT:    mov w19, w0
157; NOFP16-NEXT:    mov w0, w1
158; NOFP16-NEXT:    bl __gnu_f2h_ieee
159; NOFP16-NEXT:    mov w20, w0
160; NOFP16-NEXT:    mov w0, w19
161; NOFP16-NEXT:    bl __gnu_f2h_ieee
162; NOFP16-NEXT:    mov w1, w20
163; NOFP16-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
164; NOFP16-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
165; NOFP16-NEXT:    ret
166   %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
167   ret <2 x half> %fptrunc
168 }
169
170 define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
171; NOFP16-LABEL: v3f16_return:
172; NOFP16:       // %bb.0:
173; NOFP16-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
174; NOFP16-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
175; NOFP16-NEXT:    .cfi_def_cfa_offset 32
176; NOFP16-NEXT:    .cfi_offset w19, -8
177; NOFP16-NEXT:    .cfi_offset w20, -16
178; NOFP16-NEXT:    .cfi_offset w21, -24
179; NOFP16-NEXT:    .cfi_offset w30, -32
180; NOFP16-NEXT:    mov w20, w0
181; NOFP16-NEXT:    mov w0, w2
182; NOFP16-NEXT:    mov w19, w1
183; NOFP16-NEXT:    bl __gnu_f2h_ieee
184; NOFP16-NEXT:    mov w21, w0
185; NOFP16-NEXT:    mov w0, w19
186; NOFP16-NEXT:    bl __gnu_f2h_ieee
187; NOFP16-NEXT:    mov w19, w0
188; NOFP16-NEXT:    mov w0, w20
189; NOFP16-NEXT:    bl __gnu_f2h_ieee
190; NOFP16-NEXT:    mov w1, w19
191; NOFP16-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
192; NOFP16-NEXT:    mov w2, w21
193; NOFP16-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
194; NOFP16-NEXT:    ret
195   %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
196   ret <3 x half> %fptrunc
197 }
198
199 define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
200; NOFP16-LABEL: v4f16_return:
201; NOFP16:       // %bb.0:
202; NOFP16-NEXT:    str x30, [sp, #-48]! // 8-byte Folded Spill
203; NOFP16-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
204; NOFP16-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
205; NOFP16-NEXT:    .cfi_def_cfa_offset 48
206; NOFP16-NEXT:    .cfi_offset w19, -8
207; NOFP16-NEXT:    .cfi_offset w20, -16
208; NOFP16-NEXT:    .cfi_offset w21, -24
209; NOFP16-NEXT:    .cfi_offset w22, -32
210; NOFP16-NEXT:    .cfi_offset w30, -48
211; NOFP16-NEXT:    mov w21, w0
212; NOFP16-NEXT:    mov w0, w3
213; NOFP16-NEXT:    mov w19, w2
214; NOFP16-NEXT:    mov w20, w1
215; NOFP16-NEXT:    bl __gnu_f2h_ieee
216; NOFP16-NEXT:    mov w22, w0
217; NOFP16-NEXT:    mov w0, w19
218; NOFP16-NEXT:    bl __gnu_f2h_ieee
219; NOFP16-NEXT:    mov w19, w0
220; NOFP16-NEXT:    mov w0, w20
221; NOFP16-NEXT:    bl __gnu_f2h_ieee
222; NOFP16-NEXT:    mov w20, w0
223; NOFP16-NEXT:    mov w0, w21
224; NOFP16-NEXT:    bl __gnu_f2h_ieee
225; NOFP16-NEXT:    mov w1, w20
226; NOFP16-NEXT:    mov w2, w19
227; NOFP16-NEXT:    mov w3, w22
228; NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
229; NOFP16-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
230; NOFP16-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
231; NOFP16-NEXT:    ret
232   %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
233   ret <4 x half> %fptrunc
234 }
235
236; FIXME:
237; define void @outgoing_f16_arg(ptr %ptr) #0 {
238;   %val = load half, ptr %ptr
239;   call void @f16_user(half %val)
240;   ret void
241; }
242
243; define void @outgoing_v2f16_arg(ptr %ptr) #0 {
244;   %val = load <2 x half>, ptr %ptr
245;   call void @v2f16_user(<2 x half> %val)
246;   ret void
247; }
248
249; define void @outgoing_f16_return(ptr %ptr) #0 {
250;   %val = call half @f16_result()
251;   store half %val, ptr %ptr
252;   ret void
253; }
254
255; define void @outgoing_v2f16_return(ptr %ptr) #0 {
256;   %val = call <2 x half> @v2f16_result()
257;   store <2 x half> %val, ptr %ptr
258;   ret void
259; }
260
261define void @outgoing_v4f16_return(ptr %ptr) #0 {
262; NOFP16-LABEL: outgoing_v4f16_return:
263; NOFP16:       // %bb.0:
264; NOFP16-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
265; NOFP16-NEXT:    .cfi_def_cfa_offset 16
266; NOFP16-NEXT:    .cfi_offset w19, -8
267; NOFP16-NEXT:    .cfi_offset w30, -16
268; NOFP16-NEXT:    mov x19, x0
269; NOFP16-NEXT:    bl v4f16_result
270; NOFP16-NEXT:    strh w2, [x19, #4]
271; NOFP16-NEXT:    strh w3, [x19, #6]
272; NOFP16-NEXT:    strh w1, [x19, #2]
273; NOFP16-NEXT:    strh w0, [x19]
274; NOFP16-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
275; NOFP16-NEXT:    ret
276  %val = call <4 x half> @v4f16_result() #0
277  store <4 x half> %val, ptr %ptr
278  ret void
279}
280
281define void @outgoing_v8f16_return(ptr %ptr) #0 {
282; NOFP16-LABEL: outgoing_v8f16_return:
283; NOFP16:       // %bb.0:
284; NOFP16-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
285; NOFP16-NEXT:    .cfi_def_cfa_offset 16
286; NOFP16-NEXT:    .cfi_offset w19, -8
287; NOFP16-NEXT:    .cfi_offset w30, -16
288; NOFP16-NEXT:    mov x19, x0
289; NOFP16-NEXT:    bl v8f16_result
290; NOFP16-NEXT:    strh w5, [x19, #10]
291; NOFP16-NEXT:    strh w7, [x19, #14]
292; NOFP16-NEXT:    strh w6, [x19, #12]
293; NOFP16-NEXT:    strh w4, [x19, #8]
294; NOFP16-NEXT:    strh w3, [x19, #6]
295; NOFP16-NEXT:    strh w2, [x19, #4]
296; NOFP16-NEXT:    strh w1, [x19, #2]
297; NOFP16-NEXT:    strh w0, [x19]
298; NOFP16-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
299; NOFP16-NEXT:    ret
300  %val = call <8 x half> @v8f16_result() #0
301  store <8 x half> %val, ptr %ptr
302  ret void
303}
304
305define half @call_split_type_used_outside_block_v8f16() #0 {
306; NOFP16-LABEL: call_split_type_used_outside_block_v8f16:
307; NOFP16:       // %bb.0: // %bb0
308; NOFP16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
309; NOFP16-NEXT:    .cfi_def_cfa_offset 16
310; NOFP16-NEXT:    .cfi_offset w30, -16
311; NOFP16-NEXT:    bl v8f16_result
312; NOFP16-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
313; NOFP16-NEXT:    ret
314bb0:
315  %split.ret.type = call <8 x half> @v8f16_result() #0
316  br label %bb1
317
318bb1:
319  %extract = extractelement <8 x half> %split.ret.type, i32 0
320  ret half %extract
321}
322
323declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #0
324declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #0
325declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #0
326declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata) #0
327
328declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #0
329declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #0
330declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #0
331declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) #0
332
333attributes #0 = { strictfp }
334