1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - 2>&1 | FileCheck %s --check-prefix=DARWIN 3; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-windows -global-isel -o - 2>&1 | FileCheck %s --check-prefix=WINDOWS 4 5declare void @simple_fn() 6define void @tail_call() { 7 ; DARWIN-LABEL: name: tail_call 8 ; DARWIN: bb.1 (%ir-block.0): 9 ; DARWIN-NEXT: TCRETURNdi @simple_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp 10 ; WINDOWS-LABEL: name: tail_call 11 ; WINDOWS: bb.1 (%ir-block.0): 12 ; WINDOWS-NEXT: TCRETURNdi @simple_fn, 0, csr_aarch64_aapcs, implicit $sp 13 tail call void @simple_fn() 14 ret void 15} 16 17; We should get a TCRETURNri here. 18; FIXME: We don't need the COPY. 19define void @indirect_tail_call(ptr %func) { 20 ; DARWIN-LABEL: name: indirect_tail_call 21 ; DARWIN: bb.1 (%ir-block.0): 22 ; DARWIN-NEXT: liveins: $x0 23 ; DARWIN-NEXT: {{ $}} 24 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0 25 ; DARWIN-NEXT: TCRETURNri [[COPY]](p0), 0, csr_darwin_aarch64_aapcs, implicit $sp 26 ; WINDOWS-LABEL: name: indirect_tail_call 27 ; WINDOWS: bb.1 (%ir-block.0): 28 ; WINDOWS-NEXT: liveins: $x0 29 ; WINDOWS-NEXT: {{ $}} 30 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0 31 ; WINDOWS-NEXT: TCRETURNri [[COPY]](p0), 0, csr_aarch64_aapcs, implicit $sp 32 tail call void %func() 33 ret void 34} 35 36declare void @outgoing_args_fn(i32) 37define void @test_outgoing_args(i32 %a) { 38 ; DARWIN-LABEL: name: test_outgoing_args 39 ; DARWIN: bb.1 (%ir-block.0): 40 ; DARWIN-NEXT: liveins: $w0 41 ; DARWIN-NEXT: {{ $}} 42 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 43 ; DARWIN-NEXT: $w0 = COPY [[COPY]](s32) 44 ; DARWIN-NEXT: TCRETURNdi @outgoing_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $w0 45 ; WINDOWS-LABEL: name: test_outgoing_args 46 ; WINDOWS: bb.1 (%ir-block.0): 47 ; WINDOWS-NEXT: liveins: $w0 48 ; WINDOWS-NEXT: {{ $}} 49 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 50 ; WINDOWS-NEXT: $w0 = COPY [[COPY]](s32) 51 ; WINDOWS-NEXT: TCRETURNdi @outgoing_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0 52 tail call void @outgoing_args_fn(i32 %a) 53 ret void 54} 55 56; Verify that we create frame indices for memory arguments in tail calls. 57; We get a bunch of copies here which are unused and thus eliminated. So, let's 58; just focus on what matters, which is that we get a G_FRAME_INDEX. 59declare void @outgoing_stack_args_fn(<4 x half>) 60define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) { 61 ; DARWIN-LABEL: name: test_outgoing_stack_args 62 ; DARWIN: bb.1 (%ir-block.1): 63 ; DARWIN-NEXT: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 64 ; DARWIN-NEXT: {{ $}} 65 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 66 ; DARWIN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 67 ; DARWIN-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 68 ; DARWIN-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 69 ; DARWIN-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4 70 ; DARWIN-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5 71 ; DARWIN-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 72 ; DARWIN-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 73 ; DARWIN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 74 ; DARWIN-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16) 75 ; DARWIN-NEXT: $d0 = COPY [[LOAD]](<4 x s16>) 76 ; DARWIN-NEXT: TCRETURNdi @outgoing_stack_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $d0 77 ; WINDOWS-LABEL: name: test_outgoing_stack_args 78 ; WINDOWS: bb.1 (%ir-block.1): 79 ; WINDOWS-NEXT: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 80 ; WINDOWS-NEXT: {{ $}} 81 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 82 ; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 83 ; WINDOWS-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 84 ; WINDOWS-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 85 ; WINDOWS-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4 86 ; WINDOWS-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5 87 ; WINDOWS-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 88 ; WINDOWS-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 89 ; WINDOWS-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 90 ; WINDOWS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16) 91 ; WINDOWS-NEXT: $d0 = COPY [[LOAD]](<4 x s16>) 92 ; WINDOWS-NEXT: TCRETURNdi @outgoing_stack_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0 93 tail call void @outgoing_stack_args_fn(<4 x half> %arg) 94 ret void 95} 96 97; Verify that we don't tail call when we cannot fit arguments on the caller's 98; stack. 99declare i32 @too_big_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i8 %c, i16 %s) 100define i32 @test_too_big_stack() { 101 ; DARWIN-LABEL: name: test_too_big_stack 102 ; DARWIN: bb.1.entry: 103 ; DARWIN-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF 104 ; DARWIN-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8 105 ; DARWIN-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 106 ; DARWIN-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $sp, implicit $sp 107 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp 108 ; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 109 ; DARWIN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) 110 ; DARWIN-NEXT: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack) 111 ; DARWIN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 112 ; DARWIN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) 113 ; DARWIN-NEXT: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 2, align 1) 114 ; DARWIN-NEXT: $x0 = COPY [[DEF]](s64) 115 ; DARWIN-NEXT: $x1 = COPY [[DEF]](s64) 116 ; DARWIN-NEXT: $x2 = COPY [[DEF]](s64) 117 ; DARWIN-NEXT: $x3 = COPY [[DEF]](s64) 118 ; DARWIN-NEXT: $x4 = COPY [[DEF]](s64) 119 ; DARWIN-NEXT: $x5 = COPY [[DEF]](s64) 120 ; DARWIN-NEXT: $x6 = COPY [[DEF]](s64) 121 ; DARWIN-NEXT: $x7 = COPY [[DEF]](s64) 122 ; DARWIN-NEXT: BL @too_big_stack, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0 123 ; DARWIN-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $sp, implicit $sp 124 ; DARWIN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0 125 ; DARWIN-NEXT: $w0 = COPY [[COPY1]](s32) 126 ; DARWIN-NEXT: RET_ReallyLR implicit $w0 127 ; WINDOWS-LABEL: name: test_too_big_stack 128 ; WINDOWS: bb.1.entry: 129 ; WINDOWS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF 130 ; WINDOWS-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8 131 ; WINDOWS-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 132 ; WINDOWS-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp 133 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp 134 ; WINDOWS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 135 ; WINDOWS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) 136 ; WINDOWS-NEXT: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack) 137 ; WINDOWS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 138 ; WINDOWS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) 139 ; WINDOWS-NEXT: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 8, align 1) 140 ; WINDOWS-NEXT: $x0 = COPY [[DEF]](s64) 141 ; WINDOWS-NEXT: $x1 = COPY [[DEF]](s64) 142 ; WINDOWS-NEXT: $x2 = COPY [[DEF]](s64) 143 ; WINDOWS-NEXT: $x3 = COPY [[DEF]](s64) 144 ; WINDOWS-NEXT: $x4 = COPY [[DEF]](s64) 145 ; WINDOWS-NEXT: $x5 = COPY [[DEF]](s64) 146 ; WINDOWS-NEXT: $x6 = COPY [[DEF]](s64) 147 ; WINDOWS-NEXT: $x7 = COPY [[DEF]](s64) 148 ; WINDOWS-NEXT: BL @too_big_stack, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0 149 ; WINDOWS-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp 150 ; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0 151 ; WINDOWS-NEXT: $w0 = COPY [[COPY1]](s32) 152 ; WINDOWS-NEXT: RET_ReallyLR implicit $w0 153entry: 154 %call = tail call i32 @too_big_stack(i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i8 8, i16 9) 155 ret i32 %call 156} 157 158; Right now, we don't want to tail call callees with nonvoid return types, since 159; call lowering will insert COPYs after the call. 160; TODO: Support this. 161declare i32 @nonvoid_ret() 162define i32 @test_nonvoid_ret() { 163 ; DARWIN-LABEL: name: test_nonvoid_ret 164 ; DARWIN: bb.1 (%ir-block.0): 165 ; DARWIN-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp 166 ; WINDOWS-LABEL: name: test_nonvoid_ret 167 ; WINDOWS: bb.1 (%ir-block.0): 168 ; WINDOWS-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp 169 %call = tail call i32 @nonvoid_ret() 170 ret i32 %call 171} 172 173declare void @varargs(i32, double, i64, ...) 174define void @test_varargs() { 175 ; DARWIN-LABEL: name: test_varargs 176 ; DARWIN: bb.1 (%ir-block.0): 177 ; DARWIN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 178 ; DARWIN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 179 ; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 180 ; DARWIN-NEXT: $w0 = COPY [[C]](s32) 181 ; DARWIN-NEXT: $d0 = COPY [[C1]](s64) 182 ; DARWIN-NEXT: $x1 = COPY [[C2]](s64) 183 ; DARWIN-NEXT: TCRETURNdi @varargs, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1 184 ; WINDOWS-LABEL: name: test_varargs 185 ; WINDOWS: bb.1 (%ir-block.0): 186 ; WINDOWS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 187 ; WINDOWS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 188 ; WINDOWS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 189 ; WINDOWS-NEXT: $w0 = COPY [[C]](s32) 190 ; WINDOWS-NEXT: $x1 = COPY [[C1]](s64) 191 ; WINDOWS-NEXT: $x2 = COPY [[C2]](s64) 192 ; WINDOWS-NEXT: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2 193 tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12) 194 ret void 195} 196 197; Darwin should not tail call here, because the last parameter to @varargs is 198; not fixed. So, it's passed on the stack, which will make us not fit. On 199; Windows, it's passed in a register, so it's safe to tail call. 200define void @test_varargs_2() { 201 202 ; DARWIN-LABEL: name: test_varargs_2 203 ; DARWIN: bb.1 (%ir-block.0): 204 ; DARWIN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 205 ; DARWIN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 206 ; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 207 ; DARWIN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314 208 ; DARWIN-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp 209 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp 210 ; DARWIN-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 211 ; DARWIN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) 212 ; DARWIN-NEXT: G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1) 213 ; DARWIN-NEXT: $w0 = COPY [[C]](s32) 214 ; DARWIN-NEXT: $d0 = COPY [[C1]](s64) 215 ; DARWIN-NEXT: $x1 = COPY [[C2]](s64) 216 ; DARWIN-NEXT: BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1 217 ; DARWIN-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp 218 ; DARWIN-NEXT: RET_ReallyLR 219 ; WINDOWS-LABEL: name: test_varargs_2 220 ; WINDOWS: bb.1 (%ir-block.0): 221 ; WINDOWS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 222 ; WINDOWS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 223 ; WINDOWS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 224 ; WINDOWS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314 225 ; WINDOWS-NEXT: $w0 = COPY [[C]](s32) 226 ; WINDOWS-NEXT: $x1 = COPY [[C1]](s64) 227 ; WINDOWS-NEXT: $x2 = COPY [[C2]](s64) 228 ; WINDOWS-NEXT: $x3 = COPY [[C3]](s64) 229 ; WINDOWS-NEXT: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3 230 tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314) 231 ret void 232} 233 234; Same deal here, even though we have enough room to fit. On Darwin, we'll pass 235; the last argument to @varargs on the stack. We don't allow tail calling 236; varargs arguments that are on the stack. 237define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) { 238 239 ; DARWIN-LABEL: name: test_varargs_3 240 ; DARWIN: bb.1 (%ir-block.1): 241 ; DARWIN-NEXT: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 242 ; DARWIN-NEXT: {{ $}} 243 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 244 ; DARWIN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 245 ; DARWIN-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 246 ; DARWIN-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 247 ; DARWIN-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4 248 ; DARWIN-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5 249 ; DARWIN-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 250 ; DARWIN-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 251 ; DARWIN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 252 ; DARWIN-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16) 253 ; DARWIN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 254 ; DARWIN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 255 ; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 256 ; DARWIN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314 257 ; DARWIN-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp 258 ; DARWIN-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $sp 259 ; DARWIN-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 260 ; DARWIN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C4]](s64) 261 ; DARWIN-NEXT: G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1) 262 ; DARWIN-NEXT: $w0 = COPY [[C]](s32) 263 ; DARWIN-NEXT: $d0 = COPY [[C1]](s64) 264 ; DARWIN-NEXT: $x1 = COPY [[C2]](s64) 265 ; DARWIN-NEXT: BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1 266 ; DARWIN-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp 267 ; DARWIN-NEXT: RET_ReallyLR 268 ; WINDOWS-LABEL: name: test_varargs_3 269 ; WINDOWS: bb.1 (%ir-block.1): 270 ; WINDOWS-NEXT: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 271 ; WINDOWS-NEXT: {{ $}} 272 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 273 ; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 274 ; WINDOWS-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 275 ; WINDOWS-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 276 ; WINDOWS-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4 277 ; WINDOWS-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5 278 ; WINDOWS-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 279 ; WINDOWS-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 280 ; WINDOWS-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 281 ; WINDOWS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16) 282 ; WINDOWS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 283 ; WINDOWS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 284 ; WINDOWS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 285 ; WINDOWS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314 286 ; WINDOWS-NEXT: $w0 = COPY [[C]](s32) 287 ; WINDOWS-NEXT: $x1 = COPY [[C1]](s64) 288 ; WINDOWS-NEXT: $x2 = COPY [[C2]](s64) 289 ; WINDOWS-NEXT: $x3 = COPY [[C3]](s64) 290 ; WINDOWS-NEXT: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3 291 tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314) 292 ret void 293} 294 295; Unsupported calling convention for tail calls. Make sure we never tail call 296; it. 297declare ghccc void @bad_call_conv_fn() 298define void @test_bad_call_conv() { 299 ; DARWIN-LABEL: name: test_bad_call_conv 300 ; DARWIN: bb.1 (%ir-block.0): 301 ; DARWIN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 302 ; DARWIN-NEXT: BL @bad_call_conv_fn, csr_aarch64_noregs, implicit-def $lr, implicit $sp 303 ; DARWIN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 304 ; DARWIN-NEXT: RET_ReallyLR 305 ; WINDOWS-LABEL: name: test_bad_call_conv 306 ; WINDOWS: bb.1 (%ir-block.0): 307 ; WINDOWS-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 308 ; WINDOWS-NEXT: BL @bad_call_conv_fn, csr_aarch64_noregs, implicit-def $lr, implicit $sp 309 ; WINDOWS-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 310 ; WINDOWS-NEXT: RET_ReallyLR 311 tail call ghccc void @bad_call_conv_fn() 312 ret void 313} 314 315; Shouldn't tail call when the caller has byval arguments. 316define void @test_byval(ptr byval(i8) %ptr) { 317 ; DARWIN-LABEL: name: test_byval 318 ; DARWIN: bb.1 (%ir-block.0): 319 ; DARWIN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 320 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0) 321 ; DARWIN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 322 ; DARWIN-NEXT: BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp 323 ; DARWIN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 324 ; DARWIN-NEXT: RET_ReallyLR 325 ; WINDOWS-LABEL: name: test_byval 326 ; WINDOWS: bb.1 (%ir-block.0): 327 ; WINDOWS-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 328 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0) 329 ; WINDOWS-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 330 ; WINDOWS-NEXT: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp 331 ; WINDOWS-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 332 ; WINDOWS-NEXT: RET_ReallyLR 333 tail call void @simple_fn() 334 ret void 335} 336 337; Shouldn't tail call when the caller has inreg arguments. 338define void @test_inreg(ptr inreg %ptr) { 339 ; DARWIN-LABEL: name: test_inreg 340 ; DARWIN: bb.1 (%ir-block.0): 341 ; DARWIN-NEXT: liveins: $x0 342 ; DARWIN-NEXT: {{ $}} 343 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 344 ; DARWIN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 345 ; DARWIN-NEXT: BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp 346 ; DARWIN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 347 ; DARWIN-NEXT: RET_ReallyLR 348 ; WINDOWS-LABEL: name: test_inreg 349 ; WINDOWS: bb.1 (%ir-block.0): 350 ; WINDOWS-NEXT: liveins: $x0 351 ; WINDOWS-NEXT: {{ $}} 352 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 353 ; WINDOWS-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 354 ; WINDOWS-NEXT: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp 355 ; WINDOWS-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 356 ; WINDOWS-NEXT: RET_ReallyLR 357 tail call void @simple_fn() 358 ret void 359} 360 361declare fastcc void @fast_fn() 362define void @test_mismatched_caller() { 363 ; DARWIN-LABEL: name: test_mismatched_caller 364 ; DARWIN: bb.1 (%ir-block.0): 365 ; DARWIN-NEXT: TCRETURNdi @fast_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp 366 ; WINDOWS-LABEL: name: test_mismatched_caller 367 ; WINDOWS: bb.1 (%ir-block.0): 368 ; WINDOWS-NEXT: TCRETURNdi @fast_fn, 0, csr_aarch64_aapcs, implicit $sp 369 tail call fastcc void @fast_fn() 370 ret void 371} 372 373; Verify that lifetime markers and llvm.assume don't impact tail calling. 374declare void @llvm.assume(i1) 375define void @test_assume() local_unnamed_addr { 376 ; DARWIN-LABEL: name: test_assume 377 ; DARWIN: bb.1.entry: 378 ; DARWIN-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp 379 ; WINDOWS-LABEL: name: test_assume 380 ; WINDOWS: bb.1.entry: 381 ; WINDOWS-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp 382entry: 383 %x = tail call i32 @nonvoid_ret() 384 %y = icmp ne i32 %x, 0 385 tail call void @llvm.assume(i1 %y) 386 ret void 387} 388 389declare void @llvm.lifetime.start.p0(i64, ptr nocapture) 390declare void @llvm.lifetime.end.p0(i64, ptr nocapture) 391define void @test_lifetime() local_unnamed_addr { 392 ; DARWIN-LABEL: name: test_lifetime 393 ; DARWIN: bb.1.entry: 394 ; DARWIN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.t 395 ; DARWIN-NEXT: LIFETIME_START %stack.0.t 396 ; DARWIN-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp 397 ; WINDOWS-LABEL: name: test_lifetime 398 ; WINDOWS: bb.1.entry: 399 ; WINDOWS-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.t 400 ; WINDOWS-NEXT: LIFETIME_START %stack.0.t 401 ; WINDOWS-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp 402entry: 403 %t = alloca i8, align 1 404 call void @llvm.lifetime.start.p0(i64 1, ptr %t) 405 %x = tail call i32 @nonvoid_ret() 406 %y = icmp ne i32 %x, 0 407 tail call void @llvm.lifetime.end.p0(i64 1, ptr %t) 408 ret void 409} 410 411; We can tail call when the callee swiftself is the same as the caller one. 412; It would be nice to move this to swiftself.ll, but it's important to verify 413; that we get the COPY that makes this safe in the first place. 414declare ptr @pluto() 415define hidden swiftcc i64 @swiftself_indirect_tail(ptr swiftself %arg) { 416 ; DARWIN-LABEL: name: swiftself_indirect_tail 417 ; DARWIN: bb.1 (%ir-block.0): 418 ; DARWIN-NEXT: liveins: $x20 419 ; DARWIN-NEXT: {{ $}} 420 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x20 421 ; DARWIN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 422 ; DARWIN-NEXT: BL @pluto, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0 423 ; DARWIN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 424 ; DARWIN-NEXT: [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0 425 ; DARWIN-NEXT: $x20 = COPY [[COPY]](p0) 426 ; DARWIN-NEXT: TCRETURNri [[COPY1]](p0), 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x20 427 ; WINDOWS-LABEL: name: swiftself_indirect_tail 428 ; WINDOWS: bb.1 (%ir-block.0): 429 ; WINDOWS-NEXT: liveins: $x20 430 ; WINDOWS-NEXT: {{ $}} 431 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x20 432 ; WINDOWS-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp 433 ; WINDOWS-NEXT: BL @pluto, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0 434 ; WINDOWS-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp 435 ; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0 436 ; WINDOWS-NEXT: $x20 = COPY [[COPY]](p0) 437 ; WINDOWS-NEXT: TCRETURNri [[COPY1]](p0), 0, csr_aarch64_aapcs, implicit $sp, implicit $x20 438 %tmp = call ptr @pluto() 439 %tmp2 = tail call swiftcc i64 %tmp(ptr swiftself %arg) 440 ret i64 %tmp2 441} 442 443; Verify that we can tail call musttail callees. 444declare void @must_callee(ptr) 445define void @foo(ptr) { 446 ; DARWIN-LABEL: name: foo 447 ; DARWIN: bb.1 (%ir-block.1): 448 ; DARWIN-NEXT: liveins: $x0 449 ; DARWIN-NEXT: {{ $}} 450 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 451 ; DARWIN-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 452 ; DARWIN-NEXT: $x0 = COPY [[C]](p0) 453 ; DARWIN-NEXT: TCRETURNdi @must_callee, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x0 454 ; WINDOWS-LABEL: name: foo 455 ; WINDOWS: bb.1 (%ir-block.1): 456 ; WINDOWS-NEXT: liveins: $x0 457 ; WINDOWS-NEXT: {{ $}} 458 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 459 ; WINDOWS-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 460 ; WINDOWS-NEXT: $x0 = COPY [[C]](p0) 461 ; WINDOWS-NEXT: TCRETURNdi @must_callee, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0 462 musttail call void @must_callee(ptr null) 463 ret void 464} 465 466; Verify we emit a tail call with a type that requires splitting into 467; multiple registers. 468declare void @outgoing_v16f16(<16 x half>) 469define void @test_tail_call_outgoing_v16f16(<16 x half> %arg) { 470 ; DARWIN-LABEL: name: test_tail_call_outgoing_v16f16 471 ; DARWIN: bb.1 (%ir-block.0): 472 ; DARWIN-NEXT: liveins: $q0, $q1 473 ; DARWIN-NEXT: {{ $}} 474 ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 475 ; DARWIN-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 476 ; DARWIN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>) 477 ; DARWIN-NEXT: [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) 478 ; DARWIN-NEXT: $q0 = COPY [[UV]](<8 x s16>) 479 ; DARWIN-NEXT: $q1 = COPY [[UV1]](<8 x s16>) 480 ; DARWIN-NEXT: TCRETURNdi @outgoing_v16f16, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1 481 ; WINDOWS-LABEL: name: test_tail_call_outgoing_v16f16 482 ; WINDOWS: bb.1 (%ir-block.0): 483 ; WINDOWS-NEXT: liveins: $q0, $q1 484 ; WINDOWS-NEXT: {{ $}} 485 ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 486 ; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 487 ; WINDOWS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>) 488 ; WINDOWS-NEXT: [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) 489 ; WINDOWS-NEXT: $q0 = COPY [[UV]](<8 x s16>) 490 ; WINDOWS-NEXT: $q1 = COPY [[UV1]](<8 x s16>) 491 ; WINDOWS-NEXT: TCRETURNdi @outgoing_v16f16, 0, csr_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1 492 tail call void @outgoing_v16f16(<16 x half> %arg) 493 ret void 494} 495