xref: /llvm-project/llvm/test/CodeGen/ARM/fastcc-tailcall.ll (revision d88f96dff3f192fc0c1bf57f7810b95a709b3591)
1*d88f96dfSTim Northover; RUN: llc -verify-machineinstrs < %s -mtriple=thumbv7k-apple-watchos -tailcallopt | FileCheck %s
2*d88f96dfSTim Northover
3*d88f96dfSTim Northoverdeclare fastcc void @callee_stack0()
4*d88f96dfSTim Northoverdeclare fastcc void @callee_stack4([4 x i32], i32)
5*d88f96dfSTim Northoverdeclare fastcc void @callee_stack20([4 x i32], [5 x i32])
6*d88f96dfSTim Northoverdeclare extern_weak fastcc void @callee_weak()
7*d88f96dfSTim Northover
8*d88f96dfSTim Northoverdefine fastcc void @caller_to0_from0() nounwind {
9*d88f96dfSTim Northover; CHECK-LABEL: _caller_to0_from0:
10*d88f96dfSTim Northover
11*d88f96dfSTim Northover  tail call fastcc void @callee_stack0()
12*d88f96dfSTim Northover  ret void
13*d88f96dfSTim Northover; CHECK-NOT: add
14*d88f96dfSTim Northover; CHECK-NOT: sub
15*d88f96dfSTim Northover; CHECK: b.w _callee_stack0
16*d88f96dfSTim Northover}
17*d88f96dfSTim Northover
18*d88f96dfSTim Northoverdefine fastcc void @caller_to0_from4([4 x i32], i32) {
19*d88f96dfSTim Northover; CHECK-LABEL: _caller_to0_from4:
20*d88f96dfSTim Northover
21*d88f96dfSTim Northover  tail call fastcc void @callee_stack0()
22*d88f96dfSTim Northover  ret void
23*d88f96dfSTim Northover
24*d88f96dfSTim Northover; CHECK: add sp, #16
25*d88f96dfSTim Northover; CHECK-NEXT: b.w _callee_stack0
26*d88f96dfSTim Northover}
27*d88f96dfSTim Northover
28*d88f96dfSTim Northoverdefine fastcc void @caller_to4_from0() {
29*d88f96dfSTim Northover; Key point is that the "42" should go #16 below incoming stack
30*d88f96dfSTim Northover; pointer (we didn't have arg space to reuse).
31*d88f96dfSTim Northover  tail call fastcc void @callee_stack4([4 x i32] undef, i32 42)
32*d88f96dfSTim Northover  ret void
33*d88f96dfSTim Northover
34*d88f96dfSTim Northover; CHECK-LABEL: _caller_to4_from0:
35*d88f96dfSTim Northover; CHECK: sub sp, #16
36*d88f96dfSTim Northover; CHECK: movs [[TMP:r[0-9]+]], #42
37*d88f96dfSTim Northover; CHECK: str [[TMP]], [sp]
38*d88f96dfSTim Northover; CHECK-NOT: add sp
39*d88f96dfSTim Northover; CHECK: b.w _callee_stack4
40*d88f96dfSTim Northover
41*d88f96dfSTim Northover}
42*d88f96dfSTim Northover
43*d88f96dfSTim Northoverdefine fastcc void @caller_to4_from4([4 x i32], i32 %a) {
44*d88f96dfSTim Northover; CHECK-LABEL: _caller_to4_from4:
45*d88f96dfSTim Northover; CHECK-NOT: sub sp
46*d88f96dfSTim Northover; Key point is that the "%a" should go where at SP on entry.
47*d88f96dfSTim Northover  tail call fastcc void @callee_stack4([4 x i32] undef, i32 42)
48*d88f96dfSTim Northover  ret void
49*d88f96dfSTim Northover
50*d88f96dfSTim Northover; CHECK: str {{r[0-9]+}}, [sp]
51*d88f96dfSTim Northover; CHECK-NOT: add sp
52*d88f96dfSTim Northover; CHECK-NEXT: b.w _callee_stack4
53*d88f96dfSTim Northover}
54*d88f96dfSTim Northover
55*d88f96dfSTim Northoverdefine fastcc void @caller_to20_from4([4 x i32], i32 %a) {
56*d88f96dfSTim Northover; CHECK-LABEL: _caller_to20_from4:
57*d88f96dfSTim Northover; CHECK: sub sp, #16
58*d88f96dfSTim Northover
59*d88f96dfSTim Northover; Important point is that the call reuses the "dead" argument space
60*d88f96dfSTim Northover; above %a on the stack. If it tries to go below incoming-SP then the
61*d88f96dfSTim Northover; _callee will not deallocate the space, even in fastcc.
62*d88f96dfSTim Northover  tail call fastcc void @callee_stack20([4 x i32] undef, [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5])
63*d88f96dfSTim Northover
64*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp]
65*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp, #4]
66*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp, #8]
67*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp, #12]
68*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp, #16]
69*d88f96dfSTim Northover; CHECK-NOT: add sp
70*d88f96dfSTim Northover; CHECK-NOT: sub sp
71*d88f96dfSTim Northover; CHECK: b.w _callee_stack20
72*d88f96dfSTim Northover  ret void
73*d88f96dfSTim Northover}
74*d88f96dfSTim Northover
75*d88f96dfSTim Northover
76*d88f96dfSTim Northoverdefine fastcc void @caller_to4_from24([4 x i32], i64 %a, i64 %b, i64 %c) {
77*d88f96dfSTim Northover; CHECK-LABEL: _caller_to4_from24:
78*d88f96dfSTim Northover
79*d88f96dfSTim Northover
80*d88f96dfSTim Northover; Key point is that the "%a" should go where at #16 above SP on entry.
81*d88f96dfSTim Northover  tail call fastcc void @callee_stack4([4 x i32] undef, i32 42)
82*d88f96dfSTim Northover  ret void
83*d88f96dfSTim Northover
84*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp, #16]
85*d88f96dfSTim Northover; CHECK: add sp, #16
86*d88f96dfSTim Northover; CHECK-NEXT: b.w _callee_stack4
87*d88f96dfSTim Northover}
88*d88f96dfSTim Northover
89*d88f96dfSTim Northover
90*d88f96dfSTim Northoverdefine fastcc void @caller_to20_from20([4 x i32], [5 x i32] %a) {
91*d88f96dfSTim Northover; CHECK-LABEL: _caller_to20_from20:
92*d88f96dfSTim Northover; CHECK-NOT: add sp,
93*d88f96dfSTim Northover; CHECK-NOT: sub sp,
94*d88f96dfSTim Northover
95*d88f96dfSTim Northover; Here we want to make sure that both loads happen before the stores:
96*d88f96dfSTim Northover; otherwise either %a or %b.w will be wrongly clobbered.
97*d88f96dfSTim Northover  tail call fastcc void @callee_stack20([4 x i32] undef, [5 x i32] %a)
98*d88f96dfSTim Northover  ret void
99*d88f96dfSTim Northover
100*d88f96dfSTim Northover  ; If these ever get interleaved make sure aliasing slots don't clobber each
101*d88f96dfSTim Northover  ; other.
102*d88f96dfSTim Northover; CHECK: ldrd {{.*}}, {{.*}}, [sp, #12]
103*d88f96dfSTim Northover; CHECK: ldm.w sp,
104*d88f96dfSTim Northover; CHECK: stm.w
105*d88f96dfSTim Northover; CHECK: strd
106*d88f96dfSTim Northover; CHECK-NEXT: b.w _callee_stack20
107*d88f96dfSTim Northover}
108*d88f96dfSTim Northover
109*d88f96dfSTim Northoverdefine fastcc void @disable_tail_calls() nounwind "disable-tail-calls"="true" {
110*d88f96dfSTim Northover; CHECK-LABEL: disable_tail_calls:
111*d88f96dfSTim Northover
112*d88f96dfSTim Northover  tail call fastcc void @callee_stack0()
113*d88f96dfSTim Northover  ret void
114*d88f96dfSTim Northover
115*d88f96dfSTim Northover; CHECK: bl _callee_stack0
116*d88f96dfSTim Northover; CHECK: ret
117*d88f96dfSTim Northover}
118*d88f96dfSTim Northover
119*d88f96dfSTim Northoverdefine fastcc void @normal_ret_with_stack([4 x i32], i32 %a) {
120*d88f96dfSTim Northover; CHECK: _normal_ret_with_stack:
121*d88f96dfSTim Northover; CHECK: add sp, #16
122*d88f96dfSTim Northover; CHECK: bx lr
123*d88f96dfSTim Northover  ret void
124*d88f96dfSTim Northover}
125*d88f96dfSTim Northover
126*d88f96dfSTim Northoverdeclare { [2 x float] } @get_vec2()
127*d88f96dfSTim Northover
128*d88f96dfSTim Northoverdefine void @fromC_totail() {
129*d88f96dfSTim Northover; COMMON-LABEL: fromC_totail:
130*d88f96dfSTim Northover; COMMON: puch {r4, lr}
131*d88f96dfSTim Northover; COMMON: sub sp, #8
132*d88f96dfSTim Northover
133*d88f96dfSTim Northover; COMMON-NOT: sub sp,
134*d88f96dfSTim Northover; COMMON: movs [[TMP:r[0-9]+]], #42
135*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp]
136*d88f96dfSTim Northover; COMMON: bl _callee_stack4
137*d88f96dfSTim Northover  ; We must reset the stack to where it was before the call by undoing its extra stack pop.
138*d88f96dfSTim Northover; COMMON: sub sp, #16
139*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp]
140*d88f96dfSTim Northover; COMMON: bl callee_stack4
141*d88f96dfSTim Northover; COMMON: sub sp, #16
142*d88f96dfSTim Northover
143*d88f96dfSTim Northover  call fastcc void @callee_stack4([4 x i32] undef, i32 42)
144*d88f96dfSTim Northover  call fastcc void @callee_stack4([4 x i32] undef, i32 42)
145*d88f96dfSTim Northover  ret void
146*d88f96dfSTim Northover}
147*d88f96dfSTim Northover
148*d88f96dfSTim Northoverdefine void @fromC_totail_noreservedframe(i32 %len) {
149*d88f96dfSTim Northover; COMMON-LABEL: fromC_totail_noreservedframe:
150*d88f96dfSTim Northover; COMMON: sub.w sp, sp, r{{.*}}
151*d88f96dfSTim Northover
152*d88f96dfSTim Northover; COMMON: movs [[TMP:r[0-9]+]], #42
153*d88f96dfSTim Northover  ; Note stack is subtracted here to allocate space for arg
154*d88f96dfSTim Northover; COMMON: sub.w sp, #16
155*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp]
156*d88f96dfSTim Northover; COMMON: bl _callee_stack4
157*d88f96dfSTim Northover  ; And here.
158*d88f96dfSTim Northover; COMMON: sub sp, #16
159*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp]
160*d88f96dfSTim Northover; COMMON: bl _callee_stack4
161*d88f96dfSTim Northover  ; But not restored here because callee_stack8 did that for us.
162*d88f96dfSTim Northover; COMMON-NOT: sub sp,
163*d88f96dfSTim Northover
164*d88f96dfSTim Northover  ; Variable sized allocation prevents reserving frame at start of function so each call must allocate any stack space it needs.
165*d88f96dfSTim Northover  %var = alloca i32, i32 %len
166*d88f96dfSTim Northover
167*d88f96dfSTim Northover  call fastcc void @callee_stack4([4 x i32] undef, i32 42)
168*d88f96dfSTim Northover  call fastcc void @callee_stack4([4 x i32] undef, i32 42)
169*d88f96dfSTim Northover  ret void
170*d88f96dfSTim Northover}
171*d88f96dfSTim Northover
172*d88f96dfSTim Northoverdeclare void @Ccallee_stack4([4 x i32], i32)
173*d88f96dfSTim Northover
174*d88f96dfSTim Northoverdefine fastcc void @fromtail_toC() {
175*d88f96dfSTim Northover; COMMON-LABEL: fromtail_toC:
176*d88f96dfSTim Northover; COMMON: push {r4, lr}
177*d88f96dfSTim Northover; COMMON: sub sp, #8
178*d88f96dfSTim Northover
179*d88f96dfSTim Northover; COMMON-NOT: sub sp,
180*d88f96dfSTim Northover; COMMON: movs [[TMP:r[0-9]+]], #42
181*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp]
182*d88f96dfSTim Northover; COMMON: bl _Ccallee_stack4
183*d88f96dfSTim Northover  ; C callees will return with the stack exactly where we left it, so we mustn't try to fix anything.
184*d88f96dfSTim Northover; COMMON-NOT: add sp,
185*d88f96dfSTim Northover; COMMON-NOT: sub sp,
186*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp]{{$}}
187*d88f96dfSTim Northover; COMMON: bl _Ccallee_stack4
188*d88f96dfSTim Northover; COMMON-NOT: sub sp,
189*d88f96dfSTim Northover
190*d88f96dfSTim Northover  call void @Ccallee_stack4([4 x i32] undef, i32 42)
191*d88f96dfSTim Northover  call void @Ccallee_stack4([4 x i32] undef, i32 42)
192*d88f96dfSTim Northover  ret void
193*d88f96dfSTim Northover}
194