xref: /llvm-project/llvm/test/CodeGen/ARM/musttail.ll (revision 376d7b27fa3de4f72c2a3cec4f941c1ca3f1d7f2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=armv7a-none-eabi %s -o - | FileCheck %s
3
4declare i32 @many_args_callee(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5)
5
6define i32 @many_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32  %4, i32  %5) {
7; CHECK-LABEL: many_args_tail:
8; CHECK:       @ %bb.0:
9; CHECK-NEXT:    mov r0, #5
10; CHECK-NEXT:    mov r1, #2
11; CHECK-NEXT:    str r0, [sp]
12; CHECK-NEXT:    mov r0, #6
13; CHECK-NEXT:    str r0, [sp, #4]
14; CHECK-NEXT:    mov r0, #1
15; CHECK-NEXT:    mov r2, #3
16; CHECK-NEXT:    mov r3, #4
17; CHECK-NEXT:    b many_args_callee
18  %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
19  ret i32 %ret
20}
21
22define i32 @many_args_musttail(i32 %0, i32 %1, i32 %2, i32 %3, i32  %4, i32  %5) {
23; CHECK-LABEL: many_args_musttail:
24; CHECK:       @ %bb.0:
25; CHECK-NEXT:    mov r0, #5
26; CHECK-NEXT:    mov r1, #2
27; CHECK-NEXT:    str r0, [sp]
28; CHECK-NEXT:    mov r0, #6
29; CHECK-NEXT:    str r0, [sp, #4]
30; CHECK-NEXT:    mov r0, #1
31; CHECK-NEXT:    mov r2, #3
32; CHECK-NEXT:    mov r3, #4
33; CHECK-NEXT:    b many_args_callee
34  %ret = musttail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
35  ret i32 %ret
36}
37
38; This function has more arguments than it's tail-callee. This isn't valid for
39; the musttail attribute, but can still be tail-called as a non-guaranteed
40; optimisation, because the outgoing arguments to @many_args_callee fit in the
41; stack space allocated by the caller of @more_args_tail.
42define i32 @more_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32  %4, i32 %5, i32 %6) {
43; CHECK-LABEL: more_args_tail:
44; CHECK:       @ %bb.0:
45; CHECK-NEXT:    mov r0, #5
46; CHECK-NEXT:    mov r1, #2
47; CHECK-NEXT:    str r0, [sp]
48; CHECK-NEXT:    mov r0, #6
49; CHECK-NEXT:    str r0, [sp, #4]
50; CHECK-NEXT:    mov r0, #1
51; CHECK-NEXT:    mov r2, #3
52; CHECK-NEXT:    mov r3, #4
53; CHECK-NEXT:    b many_args_callee
54  %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
55  ret i32 %ret
56}
57
58; Again, this isn't valid for musttail, but can be tail-called in practice
59; because the stack size if the same.
60define i32 @different_args_tail(i64 %0, i64 %1, i64 %2) {
61; CHECK-LABEL: different_args_tail:
62; CHECK:       @ %bb.0:
63; CHECK-NEXT:    mov r0, #5
64; CHECK-NEXT:    mov r1, #2
65; CHECK-NEXT:    str r0, [sp]
66; CHECK-NEXT:    mov r0, #6
67; CHECK-NEXT:    str r0, [sp, #4]
68; CHECK-NEXT:    mov r0, #1
69; CHECK-NEXT:    mov r2, #3
70; CHECK-NEXT:    mov r3, #4
71; CHECK-NEXT:    b many_args_callee
72  %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
73  ret i32 %ret
74}
75
76; Here, the caller requires less stack space for it's arguments than the
77; callee, so it would not ba valid to do a tail-call.
78define i32 @fewer_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32  %4) {
79; CHECK-LABEL: fewer_args_tail:
80; CHECK:       @ %bb.0:
81; CHECK-NEXT:    .save {r11, lr}
82; CHECK-NEXT:    push {r11, lr}
83; CHECK-NEXT:    .pad #8
84; CHECK-NEXT:    sub sp, sp, #8
85; CHECK-NEXT:    mov r1, #6
86; CHECK-NEXT:    mov r0, #5
87; CHECK-NEXT:    strd r0, r1, [sp]
88; CHECK-NEXT:    mov r0, #1
89; CHECK-NEXT:    mov r1, #2
90; CHECK-NEXT:    mov r2, #3
91; CHECK-NEXT:    mov r3, #4
92; CHECK-NEXT:    bl many_args_callee
93; CHECK-NEXT:    add sp, sp, #8
94; CHECK-NEXT:    pop {r11, pc}
95  %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
96  ret i32 %ret
97}
98
99declare void @sret_callee(ptr sret({ double, double }) align 8)
100
101; Functions which return by sret can be tail-called because the incoming sret
102; pointer gets passed through to the callee.
103define void @sret_caller_tail(ptr sret({ double, double }) align 8 %result) {
104; CHECK-LABEL: sret_caller_tail:
105; CHECK:       @ %bb.0: @ %entry
106; CHECK-NEXT:    b sret_callee
107entry:
108  tail call void @sret_callee(ptr sret({ double, double }) align 8 %result)
109  ret void
110}
111
112define void @sret_caller_musttail(ptr sret({ double, double }) align 8 %result) {
113; CHECK-LABEL: sret_caller_musttail:
114; CHECK:       @ %bb.0: @ %entry
115; CHECK-NEXT:    b sret_callee
116entry:
117  musttail call void @sret_callee(ptr sret({ double, double }) align 8 %result)
118  ret void
119}
120
121; Clang only uses byval for arguments of 65 bytes or larger, but we test with a
122; 20 byte struct to keep the tests more readable. This size was chosen to still
123; make sure that it will be split between registers and the stack, to test all
124; of the interesting code paths in the backend.
125%twenty_bytes = type { [5 x i32] }
126declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
127
128; Functions with byval parameters can be tail-called, because the value is
129; actually passed in registers and the stack in the same way for the caller and
130; callee. Within @large_caller the first 16 bytes of the argument are spilled
131; to the local stack frame, but for the tail-call they are passed in r0-r3, so
132; it's safe to de-allocate that memory before the call.
133; TODO: The SUB and STM instructions are unnecessary and could be optimised
134; out, but the behaviour of this is still correct.
135define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
136; CHECK-LABEL: large_caller:
137; CHECK:       @ %bb.0: @ %entry
138; CHECK-NEXT:    .pad #16
139; CHECK-NEXT:    sub sp, sp, #16
140; CHECK-NEXT:    stm sp!, {r0, r1, r2, r3}
141; CHECK-NEXT:    b large_callee
142entry:
143  musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
144  ret void
145}
146
147; As above, but with some inline asm to test that the arguments in r0-r3 are
148; re-loaded before the call.
149define void @large_caller_check_regs(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
150; CHECK-LABEL: large_caller_check_regs:
151; CHECK:       @ %bb.0: @ %entry
152; CHECK-NEXT:    .pad #16
153; CHECK-NEXT:    sub sp, sp, #16
154; CHECK-NEXT:    stm sp, {r0, r1, r2, r3}
155; CHECK-NEXT:    @APP
156; CHECK-NEXT:    @NO_APP
157; CHECK-NEXT:    pop {r0, r1, r2, r3}
158; CHECK-NEXT:    b large_callee
159entry:
160  tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"()
161  musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
162  ret void
163}
164
165; The IR for this one looks dodgy, because it has an alloca passed to a
166; musttail function, but it is passed as a byval argument, so will be copied
167; into the stack space allocated by @large_caller_new_value's caller, so is
168; valid.
169define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
170; CHECK-LABEL: large_caller_new_value:
171; CHECK:       @ %bb.0: @ %entry
172; CHECK-NEXT:    .pad #36
173; CHECK-NEXT:    sub sp, sp, #36
174; CHECK-NEXT:    add r12, sp, #20
175; CHECK-NEXT:    stm r12, {r0, r1, r2, r3}
176; CHECK-NEXT:    mov r0, #4
177; CHECK-NEXT:    add r1, sp, #36
178; CHECK-NEXT:    str r0, [sp, #16]
179; CHECK-NEXT:    mov r0, #3
180; CHECK-NEXT:    str r0, [sp, #12]
181; CHECK-NEXT:    mov r0, #2
182; CHECK-NEXT:    str r0, [sp, #8]
183; CHECK-NEXT:    mov r0, #1
184; CHECK-NEXT:    str r0, [sp, #4]
185; CHECK-NEXT:    mov r0, #0
186; CHECK-NEXT:    str r0, [sp]
187; CHECK-NEXT:    mov r0, sp
188; CHECK-NEXT:    add r0, r0, #16
189; CHECK-NEXT:    mov r3, #3
190; CHECK-NEXT:    ldr r2, [r0], #4
191; CHECK-NEXT:    str r2, [r1], #4
192; CHECK-NEXT:    mov r0, #0
193; CHECK-NEXT:    mov r1, #1
194; CHECK-NEXT:    mov r2, #2
195; CHECK-NEXT:    add sp, sp, #36
196; CHECK-NEXT:    b large_callee
197entry:
198  %y = alloca %twenty_bytes, align 4
199  store i32 0, ptr %y, align 4
200  %0 = getelementptr inbounds i8, ptr %y, i32 4
201  store i32 1, ptr %0, align 4
202  %1 = getelementptr inbounds i8, ptr %y, i32 8
203  store i32 2, ptr %1, align 4
204  %2 = getelementptr inbounds i8, ptr %y, i32 12
205  store i32 3, ptr %2, align 4
206  %3 = getelementptr inbounds i8, ptr %y, i32 16
207  store i32 4, ptr %3, align 4
208  musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y)
209  ret void
210}
211
212declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4)
213define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
214; CHECK-LABEL: swap_byvals:
215; CHECK:       @ %bb.0: @ %entry
216; CHECK-NEXT:    .pad #16
217; CHECK-NEXT:    sub sp, sp, #16
218; CHECK-NEXT:    .save {r4, r5, r11, lr}
219; CHECK-NEXT:    push {r4, r5, r11, lr}
220; CHECK-NEXT:    .pad #40
221; CHECK-NEXT:    sub sp, sp, #40
222; CHECK-NEXT:    add r12, sp, #56
223; CHECK-NEXT:    add lr, sp, #20
224; CHECK-NEXT:    stm r12, {r0, r1, r2, r3}
225; CHECK-NEXT:    add r0, sp, #56
226; CHECK-NEXT:    mov r12, sp
227; CHECK-NEXT:    ldr r1, [r0], #4
228; CHECK-NEXT:    mov r2, r12
229; CHECK-NEXT:    str r1, [r2], #4
230; CHECK-NEXT:    add r3, sp, #20
231; CHECK-NEXT:    ldr r1, [r0], #4
232; CHECK-NEXT:    add r4, sp, #76
233; CHECK-NEXT:    str r1, [r2], #4
234; CHECK-NEXT:    ldr r1, [r0], #4
235; CHECK-NEXT:    str r1, [r2], #4
236; CHECK-NEXT:    ldr r1, [r0], #4
237; CHECK-NEXT:    str r1, [r2], #4
238; CHECK-NEXT:    ldr r1, [r0], #4
239; CHECK-NEXT:    add r0, sp, #76
240; CHECK-NEXT:    str r1, [r2], #4
241; CHECK-NEXT:    mov r2, lr
242; CHECK-NEXT:    ldr r1, [r0], #4
243; CHECK-NEXT:    str r1, [r2], #4
244; CHECK-NEXT:    ldr r1, [r0], #4
245; CHECK-NEXT:    str r1, [r2], #4
246; CHECK-NEXT:    ldr r1, [r0], #4
247; CHECK-NEXT:    str r1, [r2], #4
248; CHECK-NEXT:    ldr r1, [r0], #4
249; CHECK-NEXT:    str r1, [r2], #4
250; CHECK-NEXT:    ldr r1, [r0], #4
251; CHECK-NEXT:    str r1, [r2], #4
252; CHECK-NEXT:    ldm r3, {r0, r1, r2, r3}
253; CHECK-NEXT:    ldr r5, [r12], #4
254; CHECK-NEXT:    str r5, [r4], #4
255; CHECK-NEXT:    ldr r5, [r12], #4
256; CHECK-NEXT:    str r5, [r4], #4
257; CHECK-NEXT:    ldr r5, [r12], #4
258; CHECK-NEXT:    str r5, [r4], #4
259; CHECK-NEXT:    ldr r5, [r12], #4
260; CHECK-NEXT:    str r5, [r4], #4
261; CHECK-NEXT:    ldr r5, [r12], #4
262; CHECK-NEXT:    str r5, [r4], #4
263; CHECK-NEXT:    add r5, lr, #16
264; CHECK-NEXT:    add r12, sp, #72
265; CHECK-NEXT:    ldr r4, [r5], #4
266; CHECK-NEXT:    str r4, [r12], #4
267; CHECK-NEXT:    add sp, sp, #40
268; CHECK-NEXT:    pop {r4, r5, r11, lr}
269; CHECK-NEXT:    add sp, sp, #16
270; CHECK-NEXT:    b two_byvals_callee
271entry:
272  musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a)
273  ret void
274}
275
276; A forwarded byval arg, but at a different offset on the stack, so it needs to
277; be copied to the local stack frame first. This can't be musttail because of
278; the different signatures, but is still tail-called as an optimisation.
279declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
280define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
281; CHECK-LABEL: shift_byval:
282; CHECK:       @ %bb.0: @ %entry
283; CHECK-NEXT:    .pad #12
284; CHECK-NEXT:    sub sp, sp, #12
285; CHECK-NEXT:    .save {r4, lr}
286; CHECK-NEXT:    push {r4, lr}
287; CHECK-NEXT:    .pad #20
288; CHECK-NEXT:    sub sp, sp, #20
289; CHECK-NEXT:    add r0, sp, #28
290; CHECK-NEXT:    add lr, sp, #40
291; CHECK-NEXT:    stm r0, {r1, r2, r3}
292; CHECK-NEXT:    add r0, sp, #28
293; CHECK-NEXT:    mov r1, sp
294; CHECK-NEXT:    ldr r2, [r0], #4
295; CHECK-NEXT:    add r12, r1, #16
296; CHECK-NEXT:    str r2, [r1], #4
297; CHECK-NEXT:    ldr r2, [r0], #4
298; CHECK-NEXT:    str r2, [r1], #4
299; CHECK-NEXT:    ldr r2, [r0], #4
300; CHECK-NEXT:    str r2, [r1], #4
301; CHECK-NEXT:    ldr r2, [r0], #4
302; CHECK-NEXT:    str r2, [r1], #4
303; CHECK-NEXT:    ldr r2, [r0], #4
304; CHECK-NEXT:    str r2, [r1], #4
305; CHECK-NEXT:    ldm sp, {r0, r1, r2, r3}
306; CHECK-NEXT:    ldr r4, [r12], #4
307; CHECK-NEXT:    str r4, [lr], #4
308; CHECK-NEXT:    add sp, sp, #20
309; CHECK-NEXT:    pop {r4, lr}
310; CHECK-NEXT:    add sp, sp, #12
311; CHECK-NEXT:    b shift_byval_callee
312entry:
313  tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b)
314  ret void
315}
316
317; A global object passed to a byval argument, so it must be copied, but doesn't
318; need a stack temporary.
319@large_global = external global %twenty_bytes
320define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
321; CHECK-LABEL: large_caller_from_global:
322; CHECK:       @ %bb.0: @ %entry
323; CHECK-NEXT:    .pad #16
324; CHECK-NEXT:    sub sp, sp, #16
325; CHECK-NEXT:    .save {r4, lr}
326; CHECK-NEXT:    push {r4, lr}
327; CHECK-NEXT:    add r12, sp, #8
328; CHECK-NEXT:    add lr, sp, #24
329; CHECK-NEXT:    stm r12, {r0, r1, r2, r3}
330; CHECK-NEXT:    movw r3, :lower16:large_global
331; CHECK-NEXT:    movt r3, :upper16:large_global
332; CHECK-NEXT:    add r12, r3, #16
333; CHECK-NEXT:    ldm r3, {r0, r1, r2, r3}
334; CHECK-NEXT:    ldr r4, [r12], #4
335; CHECK-NEXT:    str r4, [lr], #4
336; CHECK-NEXT:    pop {r4, lr}
337; CHECK-NEXT:    add sp, sp, #16
338; CHECK-NEXT:    b large_callee
339entry:
340  musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global)
341  ret void
342}
343