xref: /llvm-project/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll (revision 31d6a572579a5d1d9ae14a1a9d4ffbdb1b098e49)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - 2>&1 | FileCheck %s --check-prefix=DARWIN
3; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-windows -global-isel -o - 2>&1 | FileCheck %s --check-prefix=WINDOWS
4
5declare void @simple_fn()
6define void @tail_call() {
7  ; DARWIN-LABEL: name: tail_call
8  ; DARWIN: bb.1 (%ir-block.0):
9  ; DARWIN-NEXT:   TCRETURNdi @simple_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp
10  ; WINDOWS-LABEL: name: tail_call
11  ; WINDOWS: bb.1 (%ir-block.0):
12  ; WINDOWS-NEXT:   TCRETURNdi @simple_fn, 0, csr_aarch64_aapcs, implicit $sp
13  tail call void @simple_fn()
14  ret void
15}
16
17; We should get a TCRETURNri here.
18; FIXME: We don't need the COPY.
19define void @indirect_tail_call(ptr %func) {
20  ; DARWIN-LABEL: name: indirect_tail_call
21  ; DARWIN: bb.1 (%ir-block.0):
22  ; DARWIN-NEXT:   liveins: $x0
23  ; DARWIN-NEXT: {{  $}}
24  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
25  ; DARWIN-NEXT:   TCRETURNri [[COPY]](p0), 0, csr_darwin_aarch64_aapcs, implicit $sp
26  ; WINDOWS-LABEL: name: indirect_tail_call
27  ; WINDOWS: bb.1 (%ir-block.0):
28  ; WINDOWS-NEXT:   liveins: $x0
29  ; WINDOWS-NEXT: {{  $}}
30  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
31  ; WINDOWS-NEXT:   TCRETURNri [[COPY]](p0), 0, csr_aarch64_aapcs, implicit $sp
32  tail call void %func()
33  ret void
34}
35
36declare void @outgoing_args_fn(i32)
37define void @test_outgoing_args(i32 %a) {
38  ; DARWIN-LABEL: name: test_outgoing_args
39  ; DARWIN: bb.1 (%ir-block.0):
40  ; DARWIN-NEXT:   liveins: $w0
41  ; DARWIN-NEXT: {{  $}}
42  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
43  ; DARWIN-NEXT:   $w0 = COPY [[COPY]](s32)
44  ; DARWIN-NEXT:   TCRETURNdi @outgoing_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $w0
45  ; WINDOWS-LABEL: name: test_outgoing_args
46  ; WINDOWS: bb.1 (%ir-block.0):
47  ; WINDOWS-NEXT:   liveins: $w0
48  ; WINDOWS-NEXT: {{  $}}
49  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
50  ; WINDOWS-NEXT:   $w0 = COPY [[COPY]](s32)
51  ; WINDOWS-NEXT:   TCRETURNdi @outgoing_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0
52  tail call void @outgoing_args_fn(i32 %a)
53  ret void
54}
55
56; Verify that we create frame indices for memory arguments in tail calls.
57; We get a bunch of copies here which are unused and thus eliminated. So, let's
58; just focus on what matters, which is that we get a G_FRAME_INDEX.
59declare void @outgoing_stack_args_fn(<4 x half>)
60define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) {
61  ; DARWIN-LABEL: name: test_outgoing_stack_args
62  ; DARWIN: bb.1 (%ir-block.1):
63  ; DARWIN-NEXT:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
64  ; DARWIN-NEXT: {{  $}}
65  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
66  ; DARWIN-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
67  ; DARWIN-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
68  ; DARWIN-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
69  ; DARWIN-NEXT:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
70  ; DARWIN-NEXT:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
71  ; DARWIN-NEXT:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
72  ; DARWIN-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
73  ; DARWIN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
74  ; DARWIN-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
75  ; DARWIN-NEXT:   $d0 = COPY [[LOAD]](<4 x s16>)
76  ; DARWIN-NEXT:   TCRETURNdi @outgoing_stack_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $d0
77  ; WINDOWS-LABEL: name: test_outgoing_stack_args
78  ; WINDOWS: bb.1 (%ir-block.1):
79  ; WINDOWS-NEXT:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
80  ; WINDOWS-NEXT: {{  $}}
81  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
82  ; WINDOWS-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
83  ; WINDOWS-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
84  ; WINDOWS-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
85  ; WINDOWS-NEXT:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
86  ; WINDOWS-NEXT:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
87  ; WINDOWS-NEXT:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
88  ; WINDOWS-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
89  ; WINDOWS-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
90  ; WINDOWS-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
91  ; WINDOWS-NEXT:   $d0 = COPY [[LOAD]](<4 x s16>)
92  ; WINDOWS-NEXT:   TCRETURNdi @outgoing_stack_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0
93  tail call void @outgoing_stack_args_fn(<4 x half> %arg)
94  ret void
95}
96
97; Verify that we don't tail call when we cannot fit arguments on the caller's
98; stack.
99declare i32 @too_big_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i8 %c, i16 %s)
100define i32 @test_too_big_stack() {
101  ; DARWIN-LABEL: name: test_too_big_stack
102  ; DARWIN: bb.1.entry:
103  ; DARWIN-NEXT:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
104  ; DARWIN-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
105  ; DARWIN-NEXT:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
106  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 4, 0, implicit-def $sp, implicit $sp
107  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
108  ; DARWIN-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
109  ; DARWIN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
110  ; DARWIN-NEXT:   G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
111  ; DARWIN-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
112  ; DARWIN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
113  ; DARWIN-NEXT:   G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 2, align 1)
114  ; DARWIN-NEXT:   $x0 = COPY [[DEF]](s64)
115  ; DARWIN-NEXT:   $x1 = COPY [[DEF]](s64)
116  ; DARWIN-NEXT:   $x2 = COPY [[DEF]](s64)
117  ; DARWIN-NEXT:   $x3 = COPY [[DEF]](s64)
118  ; DARWIN-NEXT:   $x4 = COPY [[DEF]](s64)
119  ; DARWIN-NEXT:   $x5 = COPY [[DEF]](s64)
120  ; DARWIN-NEXT:   $x6 = COPY [[DEF]](s64)
121  ; DARWIN-NEXT:   $x7 = COPY [[DEF]](s64)
122  ; DARWIN-NEXT:   BL @too_big_stack, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
123  ; DARWIN-NEXT:   ADJCALLSTACKUP 4, 0, implicit-def $sp, implicit $sp
124  ; DARWIN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
125  ; DARWIN-NEXT:   $w0 = COPY [[COPY1]](s32)
126  ; DARWIN-NEXT:   RET_ReallyLR implicit $w0
127  ; WINDOWS-LABEL: name: test_too_big_stack
128  ; WINDOWS: bb.1.entry:
129  ; WINDOWS-NEXT:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
130  ; WINDOWS-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
131  ; WINDOWS-NEXT:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
132  ; WINDOWS-NEXT:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
133  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
134  ; WINDOWS-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
135  ; WINDOWS-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
136  ; WINDOWS-NEXT:   G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
137  ; WINDOWS-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
138  ; WINDOWS-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
139  ; WINDOWS-NEXT:   G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 8, align 1)
140  ; WINDOWS-NEXT:   $x0 = COPY [[DEF]](s64)
141  ; WINDOWS-NEXT:   $x1 = COPY [[DEF]](s64)
142  ; WINDOWS-NEXT:   $x2 = COPY [[DEF]](s64)
143  ; WINDOWS-NEXT:   $x3 = COPY [[DEF]](s64)
144  ; WINDOWS-NEXT:   $x4 = COPY [[DEF]](s64)
145  ; WINDOWS-NEXT:   $x5 = COPY [[DEF]](s64)
146  ; WINDOWS-NEXT:   $x6 = COPY [[DEF]](s64)
147  ; WINDOWS-NEXT:   $x7 = COPY [[DEF]](s64)
148  ; WINDOWS-NEXT:   BL @too_big_stack, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
149  ; WINDOWS-NEXT:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
150  ; WINDOWS-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
151  ; WINDOWS-NEXT:   $w0 = COPY [[COPY1]](s32)
152  ; WINDOWS-NEXT:   RET_ReallyLR implicit $w0
153entry:
154  %call = tail call i32 @too_big_stack(i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i8 8, i16 9)
155  ret i32 %call
156}
157
158; Right now, we don't want to tail call callees with nonvoid return types, since
159; call lowering will insert COPYs after the call.
160; TODO: Support this.
161declare i32 @nonvoid_ret()
162define i32 @test_nonvoid_ret() {
163  ; DARWIN-LABEL: name: test_nonvoid_ret
164  ; DARWIN: bb.1 (%ir-block.0):
165  ; DARWIN-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
166  ; WINDOWS-LABEL: name: test_nonvoid_ret
167  ; WINDOWS: bb.1 (%ir-block.0):
168  ; WINDOWS-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
169  %call = tail call i32 @nonvoid_ret()
170  ret i32 %call
171}
172
173declare void @varargs(i32, double, i64, ...)
174define void @test_varargs() {
175  ; DARWIN-LABEL: name: test_varargs
176  ; DARWIN: bb.1 (%ir-block.0):
177  ; DARWIN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
178  ; DARWIN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
179  ; DARWIN-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
180  ; DARWIN-NEXT:   $w0 = COPY [[C]](s32)
181  ; DARWIN-NEXT:   $d0 = COPY [[C1]](s64)
182  ; DARWIN-NEXT:   $x1 = COPY [[C2]](s64)
183  ; DARWIN-NEXT:   TCRETURNdi @varargs, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1
184  ; WINDOWS-LABEL: name: test_varargs
185  ; WINDOWS: bb.1 (%ir-block.0):
186  ; WINDOWS-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
187  ; WINDOWS-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
188  ; WINDOWS-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
189  ; WINDOWS-NEXT:   $w0 = COPY [[C]](s32)
190  ; WINDOWS-NEXT:   $x1 = COPY [[C1]](s64)
191  ; WINDOWS-NEXT:   $x2 = COPY [[C2]](s64)
192  ; WINDOWS-NEXT:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2
193  tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12)
194  ret void
195}
196
197; Darwin should not tail call here, because the last parameter to @varargs is
198; not fixed. So, it's passed on the stack, which will make us not fit. On
199; Windows, it's passed in a register, so it's safe to tail call.
200define void @test_varargs_2() {
201
202  ; DARWIN-LABEL: name: test_varargs_2
203  ; DARWIN: bb.1 (%ir-block.0):
204  ; DARWIN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
205  ; DARWIN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
206  ; DARWIN-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
207  ; DARWIN-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
208  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
209  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
210  ; DARWIN-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
211  ; DARWIN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
212  ; DARWIN-NEXT:   G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
213  ; DARWIN-NEXT:   $w0 = COPY [[C]](s32)
214  ; DARWIN-NEXT:   $d0 = COPY [[C1]](s64)
215  ; DARWIN-NEXT:   $x1 = COPY [[C2]](s64)
216  ; DARWIN-NEXT:   BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
217  ; DARWIN-NEXT:   ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
218  ; DARWIN-NEXT:   RET_ReallyLR
219  ; WINDOWS-LABEL: name: test_varargs_2
220  ; WINDOWS: bb.1 (%ir-block.0):
221  ; WINDOWS-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
222  ; WINDOWS-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
223  ; WINDOWS-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
224  ; WINDOWS-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
225  ; WINDOWS-NEXT:   $w0 = COPY [[C]](s32)
226  ; WINDOWS-NEXT:   $x1 = COPY [[C1]](s64)
227  ; WINDOWS-NEXT:   $x2 = COPY [[C2]](s64)
228  ; WINDOWS-NEXT:   $x3 = COPY [[C3]](s64)
229  ; WINDOWS-NEXT:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3
230  tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314)
231  ret void
232}
233
234; Same deal here, even though we have enough room to fit. On Darwin, we'll pass
235; the last argument to @varargs on the stack. We don't allow tail calling
236; varargs arguments that are on the stack.
237define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) {
238
239  ; DARWIN-LABEL: name: test_varargs_3
240  ; DARWIN: bb.1 (%ir-block.1):
241  ; DARWIN-NEXT:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
242  ; DARWIN-NEXT: {{  $}}
243  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
244  ; DARWIN-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
245  ; DARWIN-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
246  ; DARWIN-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
247  ; DARWIN-NEXT:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
248  ; DARWIN-NEXT:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
249  ; DARWIN-NEXT:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
250  ; DARWIN-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
251  ; DARWIN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
252  ; DARWIN-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
253  ; DARWIN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
254  ; DARWIN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
255  ; DARWIN-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
256  ; DARWIN-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
257  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
258  ; DARWIN-NEXT:   [[COPY8:%[0-9]+]]:_(p0) = COPY $sp
259  ; DARWIN-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
260  ; DARWIN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C4]](s64)
261  ; DARWIN-NEXT:   G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
262  ; DARWIN-NEXT:   $w0 = COPY [[C]](s32)
263  ; DARWIN-NEXT:   $d0 = COPY [[C1]](s64)
264  ; DARWIN-NEXT:   $x1 = COPY [[C2]](s64)
265  ; DARWIN-NEXT:   BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
266  ; DARWIN-NEXT:   ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
267  ; DARWIN-NEXT:   RET_ReallyLR
268  ; WINDOWS-LABEL: name: test_varargs_3
269  ; WINDOWS: bb.1 (%ir-block.1):
270  ; WINDOWS-NEXT:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
271  ; WINDOWS-NEXT: {{  $}}
272  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
273  ; WINDOWS-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
274  ; WINDOWS-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
275  ; WINDOWS-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
276  ; WINDOWS-NEXT:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
277  ; WINDOWS-NEXT:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
278  ; WINDOWS-NEXT:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
279  ; WINDOWS-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
280  ; WINDOWS-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
281  ; WINDOWS-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
282  ; WINDOWS-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
283  ; WINDOWS-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
284  ; WINDOWS-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
285  ; WINDOWS-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
286  ; WINDOWS-NEXT:   $w0 = COPY [[C]](s32)
287  ; WINDOWS-NEXT:   $x1 = COPY [[C1]](s64)
288  ; WINDOWS-NEXT:   $x2 = COPY [[C2]](s64)
289  ; WINDOWS-NEXT:   $x3 = COPY [[C3]](s64)
290  ; WINDOWS-NEXT:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3
291  tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314)
292  ret void
293}
294
295; Unsupported calling convention for tail calls. Make sure we never tail call
296; it.
297declare ghccc void @bad_call_conv_fn()
298define void @test_bad_call_conv() {
299  ; DARWIN-LABEL: name: test_bad_call_conv
300  ; DARWIN: bb.1 (%ir-block.0):
301  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
302  ; DARWIN-NEXT:   BL @bad_call_conv_fn, csr_aarch64_noregs, implicit-def $lr, implicit $sp
303  ; DARWIN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
304  ; DARWIN-NEXT:   RET_ReallyLR
305  ; WINDOWS-LABEL: name: test_bad_call_conv
306  ; WINDOWS: bb.1 (%ir-block.0):
307  ; WINDOWS-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
308  ; WINDOWS-NEXT:   BL @bad_call_conv_fn, csr_aarch64_noregs, implicit-def $lr, implicit $sp
309  ; WINDOWS-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
310  ; WINDOWS-NEXT:   RET_ReallyLR
311  tail call ghccc void @bad_call_conv_fn()
312  ret void
313}
314
315; Shouldn't tail call when the caller has byval arguments.
316define void @test_byval(ptr byval(i8) %ptr) {
317  ; DARWIN-LABEL: name: test_byval
318  ; DARWIN: bb.1 (%ir-block.0):
319  ; DARWIN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
320  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
321  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
322  ; DARWIN-NEXT:   BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
323  ; DARWIN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
324  ; DARWIN-NEXT:   RET_ReallyLR
325  ; WINDOWS-LABEL: name: test_byval
326  ; WINDOWS: bb.1 (%ir-block.0):
327  ; WINDOWS-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
328  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
329  ; WINDOWS-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
330  ; WINDOWS-NEXT:   BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
331  ; WINDOWS-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
332  ; WINDOWS-NEXT:   RET_ReallyLR
333  tail call void @simple_fn()
334  ret void
335}
336
337; Shouldn't tail call when the caller has inreg arguments.
338define void @test_inreg(ptr inreg %ptr) {
339  ; DARWIN-LABEL: name: test_inreg
340  ; DARWIN: bb.1 (%ir-block.0):
341  ; DARWIN-NEXT:   liveins: $x0
342  ; DARWIN-NEXT: {{  $}}
343  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
344  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
345  ; DARWIN-NEXT:   BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
346  ; DARWIN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
347  ; DARWIN-NEXT:   RET_ReallyLR
348  ; WINDOWS-LABEL: name: test_inreg
349  ; WINDOWS: bb.1 (%ir-block.0):
350  ; WINDOWS-NEXT:   liveins: $x0
351  ; WINDOWS-NEXT: {{  $}}
352  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
353  ; WINDOWS-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
354  ; WINDOWS-NEXT:   BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
355  ; WINDOWS-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
356  ; WINDOWS-NEXT:   RET_ReallyLR
357  tail call void @simple_fn()
358  ret void
359}
360
361declare fastcc void @fast_fn()
362define void @test_mismatched_caller() {
363  ; DARWIN-LABEL: name: test_mismatched_caller
364  ; DARWIN: bb.1 (%ir-block.0):
365  ; DARWIN-NEXT:   TCRETURNdi @fast_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp
366  ; WINDOWS-LABEL: name: test_mismatched_caller
367  ; WINDOWS: bb.1 (%ir-block.0):
368  ; WINDOWS-NEXT:   TCRETURNdi @fast_fn, 0, csr_aarch64_aapcs, implicit $sp
369  tail call fastcc void @fast_fn()
370  ret void
371}
372
373; Verify that lifetime markers and llvm.assume don't impact tail calling.
374declare void @llvm.assume(i1)
375define void @test_assume() local_unnamed_addr {
376  ; DARWIN-LABEL: name: test_assume
377  ; DARWIN: bb.1.entry:
378  ; DARWIN-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
379  ; WINDOWS-LABEL: name: test_assume
380  ; WINDOWS: bb.1.entry:
381  ; WINDOWS-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
382entry:
383  %x = tail call i32 @nonvoid_ret()
384  %y = icmp ne i32 %x, 0
385  tail call void @llvm.assume(i1 %y)
386  ret void
387}
388
389declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
390declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
391define void @test_lifetime() local_unnamed_addr {
392  ; DARWIN-LABEL: name: test_lifetime
393  ; DARWIN: bb.1.entry:
394  ; DARWIN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.t
395  ; DARWIN-NEXT:   LIFETIME_START %stack.0.t
396  ; DARWIN-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
397  ; WINDOWS-LABEL: name: test_lifetime
398  ; WINDOWS: bb.1.entry:
399  ; WINDOWS-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.t
400  ; WINDOWS-NEXT:   LIFETIME_START %stack.0.t
401  ; WINDOWS-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
402entry:
403  %t = alloca i8, align 1
404  call void @llvm.lifetime.start.p0(i64 1, ptr %t)
405  %x = tail call i32 @nonvoid_ret()
406  %y = icmp ne i32 %x, 0
407  tail call void @llvm.lifetime.end.p0(i64 1, ptr %t)
408  ret void
409}
410
411; We can tail call when the callee swiftself is the same as the caller one.
412; It would be nice to move this to swiftself.ll, but it's important to verify
413; that we get the COPY that makes this safe in the first place.
414declare ptr @pluto()
415define hidden swiftcc i64 @swiftself_indirect_tail(ptr swiftself %arg) {
416  ; DARWIN-LABEL: name: swiftself_indirect_tail
417  ; DARWIN: bb.1 (%ir-block.0):
418  ; DARWIN-NEXT:   liveins: $x20
419  ; DARWIN-NEXT: {{  $}}
420  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x20
421  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
422  ; DARWIN-NEXT:   BL @pluto, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
423  ; DARWIN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
424  ; DARWIN-NEXT:   [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
425  ; DARWIN-NEXT:   $x20 = COPY [[COPY]](p0)
426  ; DARWIN-NEXT:   TCRETURNri [[COPY1]](p0), 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x20
427  ; WINDOWS-LABEL: name: swiftself_indirect_tail
428  ; WINDOWS: bb.1 (%ir-block.0):
429  ; WINDOWS-NEXT:   liveins: $x20
430  ; WINDOWS-NEXT: {{  $}}
431  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x20
432  ; WINDOWS-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
433  ; WINDOWS-NEXT:   BL @pluto, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
434  ; WINDOWS-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
435  ; WINDOWS-NEXT:   [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
436  ; WINDOWS-NEXT:   $x20 = COPY [[COPY]](p0)
437  ; WINDOWS-NEXT:   TCRETURNri [[COPY1]](p0), 0, csr_aarch64_aapcs, implicit $sp, implicit $x20
438  %tmp = call ptr @pluto()
439  %tmp2 = tail call swiftcc i64 %tmp(ptr swiftself %arg)
440  ret i64 %tmp2
441}
442
443; Verify that we can tail call musttail callees.
444declare void @must_callee(ptr)
445define void @foo(ptr) {
446  ; DARWIN-LABEL: name: foo
447  ; DARWIN: bb.1 (%ir-block.1):
448  ; DARWIN-NEXT:   liveins: $x0
449  ; DARWIN-NEXT: {{  $}}
450  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
451  ; DARWIN-NEXT:   [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
452  ; DARWIN-NEXT:   $x0 = COPY [[C]](p0)
453  ; DARWIN-NEXT:   TCRETURNdi @must_callee, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x0
454  ; WINDOWS-LABEL: name: foo
455  ; WINDOWS: bb.1 (%ir-block.1):
456  ; WINDOWS-NEXT:   liveins: $x0
457  ; WINDOWS-NEXT: {{  $}}
458  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
459  ; WINDOWS-NEXT:   [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
460  ; WINDOWS-NEXT:   $x0 = COPY [[C]](p0)
461  ; WINDOWS-NEXT:   TCRETURNdi @must_callee, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0
462  musttail call void @must_callee(ptr null)
463  ret void
464}
465
466; Verify we emit a tail call with a type that requires splitting into
467; multiple registers.
468declare void @outgoing_v16f16(<16 x half>)
469define void @test_tail_call_outgoing_v16f16(<16 x half> %arg) {
470  ; DARWIN-LABEL: name: test_tail_call_outgoing_v16f16
471  ; DARWIN: bb.1 (%ir-block.0):
472  ; DARWIN-NEXT:   liveins: $q0, $q1
473  ; DARWIN-NEXT: {{  $}}
474  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
475  ; DARWIN-NEXT:   [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
476  ; DARWIN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>)
477  ; DARWIN-NEXT:   [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
478  ; DARWIN-NEXT:   $q0 = COPY [[UV]](<8 x s16>)
479  ; DARWIN-NEXT:   $q1 = COPY [[UV1]](<8 x s16>)
480  ; DARWIN-NEXT:   TCRETURNdi @outgoing_v16f16, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1
481  ; WINDOWS-LABEL: name: test_tail_call_outgoing_v16f16
482  ; WINDOWS: bb.1 (%ir-block.0):
483  ; WINDOWS-NEXT:   liveins: $q0, $q1
484  ; WINDOWS-NEXT: {{  $}}
485  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
486  ; WINDOWS-NEXT:   [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
487  ; WINDOWS-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>)
488  ; WINDOWS-NEXT:   [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
489  ; WINDOWS-NEXT:   $q0 = COPY [[UV]](<8 x s16>)
490  ; WINDOWS-NEXT:   $q1 = COPY [[UV1]](<8 x s16>)
491  ; WINDOWS-NEXT:   TCRETURNdi @outgoing_v16f16, 0, csr_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1
492  tail call void @outgoing_v16f16(<16 x half> %arg)
493  ret void
494}
495