1*d88f96dfSTim Northover; RUN: llc -verify-machineinstrs < %s -mtriple=thumbv7k-apple-watchos -tailcallopt | FileCheck %s 2*d88f96dfSTim Northover 3*d88f96dfSTim Northoverdeclare fastcc void @callee_stack0() 4*d88f96dfSTim Northoverdeclare fastcc void @callee_stack4([4 x i32], i32) 5*d88f96dfSTim Northoverdeclare fastcc void @callee_stack20([4 x i32], [5 x i32]) 6*d88f96dfSTim Northoverdeclare extern_weak fastcc void @callee_weak() 7*d88f96dfSTim Northover 8*d88f96dfSTim Northoverdefine fastcc void @caller_to0_from0() nounwind { 9*d88f96dfSTim Northover; CHECK-LABEL: _caller_to0_from0: 10*d88f96dfSTim Northover 11*d88f96dfSTim Northover tail call fastcc void @callee_stack0() 12*d88f96dfSTim Northover ret void 13*d88f96dfSTim Northover; CHECK-NOT: add 14*d88f96dfSTim Northover; CHECK-NOT: sub 15*d88f96dfSTim Northover; CHECK: b.w _callee_stack0 16*d88f96dfSTim Northover} 17*d88f96dfSTim Northover 18*d88f96dfSTim Northoverdefine fastcc void @caller_to0_from4([4 x i32], i32) { 19*d88f96dfSTim Northover; CHECK-LABEL: _caller_to0_from4: 20*d88f96dfSTim Northover 21*d88f96dfSTim Northover tail call fastcc void @callee_stack0() 22*d88f96dfSTim Northover ret void 23*d88f96dfSTim Northover 24*d88f96dfSTim Northover; CHECK: add sp, #16 25*d88f96dfSTim Northover; CHECK-NEXT: b.w _callee_stack0 26*d88f96dfSTim Northover} 27*d88f96dfSTim Northover 28*d88f96dfSTim Northoverdefine fastcc void @caller_to4_from0() { 29*d88f96dfSTim Northover; Key point is that the "42" should go #16 below incoming stack 30*d88f96dfSTim Northover; pointer (we didn't have arg space to reuse). 31*d88f96dfSTim Northover tail call fastcc void @callee_stack4([4 x i32] undef, i32 42) 32*d88f96dfSTim Northover ret void 33*d88f96dfSTim Northover 34*d88f96dfSTim Northover; CHECK-LABEL: _caller_to4_from0: 35*d88f96dfSTim Northover; CHECK: sub sp, #16 36*d88f96dfSTim Northover; CHECK: movs [[TMP:r[0-9]+]], #42 37*d88f96dfSTim Northover; CHECK: str [[TMP]], [sp] 38*d88f96dfSTim Northover; CHECK-NOT: add sp 39*d88f96dfSTim Northover; CHECK: b.w _callee_stack4 40*d88f96dfSTim Northover 41*d88f96dfSTim Northover} 42*d88f96dfSTim Northover 43*d88f96dfSTim Northoverdefine fastcc void @caller_to4_from4([4 x i32], i32 %a) { 44*d88f96dfSTim Northover; CHECK-LABEL: _caller_to4_from4: 45*d88f96dfSTim Northover; CHECK-NOT: sub sp 46*d88f96dfSTim Northover; Key point is that the "%a" should go where at SP on entry. 47*d88f96dfSTim Northover tail call fastcc void @callee_stack4([4 x i32] undef, i32 42) 48*d88f96dfSTim Northover ret void 49*d88f96dfSTim Northover 50*d88f96dfSTim Northover; CHECK: str {{r[0-9]+}}, [sp] 51*d88f96dfSTim Northover; CHECK-NOT: add sp 52*d88f96dfSTim Northover; CHECK-NEXT: b.w _callee_stack4 53*d88f96dfSTim Northover} 54*d88f96dfSTim Northover 55*d88f96dfSTim Northoverdefine fastcc void @caller_to20_from4([4 x i32], i32 %a) { 56*d88f96dfSTim Northover; CHECK-LABEL: _caller_to20_from4: 57*d88f96dfSTim Northover; CHECK: sub sp, #16 58*d88f96dfSTim Northover 59*d88f96dfSTim Northover; Important point is that the call reuses the "dead" argument space 60*d88f96dfSTim Northover; above %a on the stack. If it tries to go below incoming-SP then the 61*d88f96dfSTim Northover; _callee will not deallocate the space, even in fastcc. 62*d88f96dfSTim Northover tail call fastcc void @callee_stack20([4 x i32] undef, [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5]) 63*d88f96dfSTim Northover 64*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp] 65*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp, #4] 66*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp, #8] 67*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp, #12] 68*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp, #16] 69*d88f96dfSTim Northover; CHECK-NOT: add sp 70*d88f96dfSTim Northover; CHECK-NOT: sub sp 71*d88f96dfSTim Northover; CHECK: b.w _callee_stack20 72*d88f96dfSTim Northover ret void 73*d88f96dfSTim Northover} 74*d88f96dfSTim Northover 75*d88f96dfSTim Northover 76*d88f96dfSTim Northoverdefine fastcc void @caller_to4_from24([4 x i32], i64 %a, i64 %b, i64 %c) { 77*d88f96dfSTim Northover; CHECK-LABEL: _caller_to4_from24: 78*d88f96dfSTim Northover 79*d88f96dfSTim Northover 80*d88f96dfSTim Northover; Key point is that the "%a" should go where at #16 above SP on entry. 81*d88f96dfSTim Northover tail call fastcc void @callee_stack4([4 x i32] undef, i32 42) 82*d88f96dfSTim Northover ret void 83*d88f96dfSTim Northover 84*d88f96dfSTim Northover; CHECK: str {{.*}}, [sp, #16] 85*d88f96dfSTim Northover; CHECK: add sp, #16 86*d88f96dfSTim Northover; CHECK-NEXT: b.w _callee_stack4 87*d88f96dfSTim Northover} 88*d88f96dfSTim Northover 89*d88f96dfSTim Northover 90*d88f96dfSTim Northoverdefine fastcc void @caller_to20_from20([4 x i32], [5 x i32] %a) { 91*d88f96dfSTim Northover; CHECK-LABEL: _caller_to20_from20: 92*d88f96dfSTim Northover; CHECK-NOT: add sp, 93*d88f96dfSTim Northover; CHECK-NOT: sub sp, 94*d88f96dfSTim Northover 95*d88f96dfSTim Northover; Here we want to make sure that both loads happen before the stores: 96*d88f96dfSTim Northover; otherwise either %a or %b.w will be wrongly clobbered. 97*d88f96dfSTim Northover tail call fastcc void @callee_stack20([4 x i32] undef, [5 x i32] %a) 98*d88f96dfSTim Northover ret void 99*d88f96dfSTim Northover 100*d88f96dfSTim Northover ; If these ever get interleaved make sure aliasing slots don't clobber each 101*d88f96dfSTim Northover ; other. 102*d88f96dfSTim Northover; CHECK: ldrd {{.*}}, {{.*}}, [sp, #12] 103*d88f96dfSTim Northover; CHECK: ldm.w sp, 104*d88f96dfSTim Northover; CHECK: stm.w 105*d88f96dfSTim Northover; CHECK: strd 106*d88f96dfSTim Northover; CHECK-NEXT: b.w _callee_stack20 107*d88f96dfSTim Northover} 108*d88f96dfSTim Northover 109*d88f96dfSTim Northoverdefine fastcc void @disable_tail_calls() nounwind "disable-tail-calls"="true" { 110*d88f96dfSTim Northover; CHECK-LABEL: disable_tail_calls: 111*d88f96dfSTim Northover 112*d88f96dfSTim Northover tail call fastcc void @callee_stack0() 113*d88f96dfSTim Northover ret void 114*d88f96dfSTim Northover 115*d88f96dfSTim Northover; CHECK: bl _callee_stack0 116*d88f96dfSTim Northover; CHECK: ret 117*d88f96dfSTim Northover} 118*d88f96dfSTim Northover 119*d88f96dfSTim Northoverdefine fastcc void @normal_ret_with_stack([4 x i32], i32 %a) { 120*d88f96dfSTim Northover; CHECK: _normal_ret_with_stack: 121*d88f96dfSTim Northover; CHECK: add sp, #16 122*d88f96dfSTim Northover; CHECK: bx lr 123*d88f96dfSTim Northover ret void 124*d88f96dfSTim Northover} 125*d88f96dfSTim Northover 126*d88f96dfSTim Northoverdeclare { [2 x float] } @get_vec2() 127*d88f96dfSTim Northover 128*d88f96dfSTim Northoverdefine void @fromC_totail() { 129*d88f96dfSTim Northover; COMMON-LABEL: fromC_totail: 130*d88f96dfSTim Northover; COMMON: puch {r4, lr} 131*d88f96dfSTim Northover; COMMON: sub sp, #8 132*d88f96dfSTim Northover 133*d88f96dfSTim Northover; COMMON-NOT: sub sp, 134*d88f96dfSTim Northover; COMMON: movs [[TMP:r[0-9]+]], #42 135*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp] 136*d88f96dfSTim Northover; COMMON: bl _callee_stack4 137*d88f96dfSTim Northover ; We must reset the stack to where it was before the call by undoing its extra stack pop. 138*d88f96dfSTim Northover; COMMON: sub sp, #16 139*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp] 140*d88f96dfSTim Northover; COMMON: bl callee_stack4 141*d88f96dfSTim Northover; COMMON: sub sp, #16 142*d88f96dfSTim Northover 143*d88f96dfSTim Northover call fastcc void @callee_stack4([4 x i32] undef, i32 42) 144*d88f96dfSTim Northover call fastcc void @callee_stack4([4 x i32] undef, i32 42) 145*d88f96dfSTim Northover ret void 146*d88f96dfSTim Northover} 147*d88f96dfSTim Northover 148*d88f96dfSTim Northoverdefine void @fromC_totail_noreservedframe(i32 %len) { 149*d88f96dfSTim Northover; COMMON-LABEL: fromC_totail_noreservedframe: 150*d88f96dfSTim Northover; COMMON: sub.w sp, sp, r{{.*}} 151*d88f96dfSTim Northover 152*d88f96dfSTim Northover; COMMON: movs [[TMP:r[0-9]+]], #42 153*d88f96dfSTim Northover ; Note stack is subtracted here to allocate space for arg 154*d88f96dfSTim Northover; COMMON: sub.w sp, #16 155*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp] 156*d88f96dfSTim Northover; COMMON: bl _callee_stack4 157*d88f96dfSTim Northover ; And here. 158*d88f96dfSTim Northover; COMMON: sub sp, #16 159*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp] 160*d88f96dfSTim Northover; COMMON: bl _callee_stack4 161*d88f96dfSTim Northover ; But not restored here because callee_stack8 did that for us. 162*d88f96dfSTim Northover; COMMON-NOT: sub sp, 163*d88f96dfSTim Northover 164*d88f96dfSTim Northover ; Variable sized allocation prevents reserving frame at start of function so each call must allocate any stack space it needs. 165*d88f96dfSTim Northover %var = alloca i32, i32 %len 166*d88f96dfSTim Northover 167*d88f96dfSTim Northover call fastcc void @callee_stack4([4 x i32] undef, i32 42) 168*d88f96dfSTim Northover call fastcc void @callee_stack4([4 x i32] undef, i32 42) 169*d88f96dfSTim Northover ret void 170*d88f96dfSTim Northover} 171*d88f96dfSTim Northover 172*d88f96dfSTim Northoverdeclare void @Ccallee_stack4([4 x i32], i32) 173*d88f96dfSTim Northover 174*d88f96dfSTim Northoverdefine fastcc void @fromtail_toC() { 175*d88f96dfSTim Northover; COMMON-LABEL: fromtail_toC: 176*d88f96dfSTim Northover; COMMON: push {r4, lr} 177*d88f96dfSTim Northover; COMMON: sub sp, #8 178*d88f96dfSTim Northover 179*d88f96dfSTim Northover; COMMON-NOT: sub sp, 180*d88f96dfSTim Northover; COMMON: movs [[TMP:r[0-9]+]], #42 181*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp] 182*d88f96dfSTim Northover; COMMON: bl _Ccallee_stack4 183*d88f96dfSTim Northover ; C callees will return with the stack exactly where we left it, so we mustn't try to fix anything. 184*d88f96dfSTim Northover; COMMON-NOT: add sp, 185*d88f96dfSTim Northover; COMMON-NOT: sub sp, 186*d88f96dfSTim Northover; COMMON: str [[TMP]], [sp]{{$}} 187*d88f96dfSTim Northover; COMMON: bl _Ccallee_stack4 188*d88f96dfSTim Northover; COMMON-NOT: sub sp, 189*d88f96dfSTim Northover 190*d88f96dfSTim Northover call void @Ccallee_stack4([4 x i32] undef, i32 42) 191*d88f96dfSTim Northover call void @Ccallee_stack4([4 x i32] undef, i32 42) 192*d88f96dfSTim Northover ret void 193*d88f96dfSTim Northover} 194