1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=armv7a-none-eabi %s -o - | FileCheck %s 3 4declare i32 @many_args_callee(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) 5 6define i32 @many_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) { 7; CHECK-LABEL: many_args_tail: 8; CHECK: @ %bb.0: 9; CHECK-NEXT: mov r0, #5 10; CHECK-NEXT: mov r1, #2 11; CHECK-NEXT: str r0, [sp] 12; CHECK-NEXT: mov r0, #6 13; CHECK-NEXT: str r0, [sp, #4] 14; CHECK-NEXT: mov r0, #1 15; CHECK-NEXT: mov r2, #3 16; CHECK-NEXT: mov r3, #4 17; CHECK-NEXT: b many_args_callee 18 %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) 19 ret i32 %ret 20} 21 22define i32 @many_args_musttail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) { 23; CHECK-LABEL: many_args_musttail: 24; CHECK: @ %bb.0: 25; CHECK-NEXT: mov r0, #5 26; CHECK-NEXT: mov r1, #2 27; CHECK-NEXT: str r0, [sp] 28; CHECK-NEXT: mov r0, #6 29; CHECK-NEXT: str r0, [sp, #4] 30; CHECK-NEXT: mov r0, #1 31; CHECK-NEXT: mov r2, #3 32; CHECK-NEXT: mov r3, #4 33; CHECK-NEXT: b many_args_callee 34 %ret = musttail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) 35 ret i32 %ret 36} 37 38; This function has more arguments than it's tail-callee. This isn't valid for 39; the musttail attribute, but can still be tail-called as a non-guaranteed 40; optimisation, because the outgoing arguments to @many_args_callee fit in the 41; stack space allocated by the caller of @more_args_tail. 42define i32 @more_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) { 43; CHECK-LABEL: more_args_tail: 44; CHECK: @ %bb.0: 45; CHECK-NEXT: mov r0, #5 46; CHECK-NEXT: mov r1, #2 47; CHECK-NEXT: str r0, [sp] 48; CHECK-NEXT: mov r0, #6 49; CHECK-NEXT: str r0, [sp, #4] 50; CHECK-NEXT: mov r0, #1 51; CHECK-NEXT: mov r2, #3 52; CHECK-NEXT: mov r3, #4 53; CHECK-NEXT: b many_args_callee 54 %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) 55 ret i32 %ret 56} 57 58; Again, this isn't valid for musttail, but can be tail-called in practice 59; because the stack size if the same. 60define i32 @different_args_tail(i64 %0, i64 %1, i64 %2) { 61; CHECK-LABEL: different_args_tail: 62; CHECK: @ %bb.0: 63; CHECK-NEXT: mov r0, #5 64; CHECK-NEXT: mov r1, #2 65; CHECK-NEXT: str r0, [sp] 66; CHECK-NEXT: mov r0, #6 67; CHECK-NEXT: str r0, [sp, #4] 68; CHECK-NEXT: mov r0, #1 69; CHECK-NEXT: mov r2, #3 70; CHECK-NEXT: mov r3, #4 71; CHECK-NEXT: b many_args_callee 72 %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) 73 ret i32 %ret 74} 75 76; Here, the caller requires less stack space for it's arguments than the 77; callee, so it would not ba valid to do a tail-call. 78define i32 @fewer_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) { 79; CHECK-LABEL: fewer_args_tail: 80; CHECK: @ %bb.0: 81; CHECK-NEXT: .save {r11, lr} 82; CHECK-NEXT: push {r11, lr} 83; CHECK-NEXT: .pad #8 84; CHECK-NEXT: sub sp, sp, #8 85; CHECK-NEXT: mov r1, #6 86; CHECK-NEXT: mov r0, #5 87; CHECK-NEXT: strd r0, r1, [sp] 88; CHECK-NEXT: mov r0, #1 89; CHECK-NEXT: mov r1, #2 90; CHECK-NEXT: mov r2, #3 91; CHECK-NEXT: mov r3, #4 92; CHECK-NEXT: bl many_args_callee 93; CHECK-NEXT: add sp, sp, #8 94; CHECK-NEXT: pop {r11, pc} 95 %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) 96 ret i32 %ret 97} 98 99declare void @sret_callee(ptr sret({ double, double }) align 8) 100 101; Functions which return by sret can be tail-called because the incoming sret 102; pointer gets passed through to the callee. 103define void @sret_caller_tail(ptr sret({ double, double }) align 8 %result) { 104; CHECK-LABEL: sret_caller_tail: 105; CHECK: @ %bb.0: @ %entry 106; CHECK-NEXT: b sret_callee 107entry: 108 tail call void @sret_callee(ptr sret({ double, double }) align 8 %result) 109 ret void 110} 111 112define void @sret_caller_musttail(ptr sret({ double, double }) align 8 %result) { 113; CHECK-LABEL: sret_caller_musttail: 114; CHECK: @ %bb.0: @ %entry 115; CHECK-NEXT: b sret_callee 116entry: 117 musttail call void @sret_callee(ptr sret({ double, double }) align 8 %result) 118 ret void 119} 120 121; Clang only uses byval for arguments of 65 bytes or larger, but we test with a 122; 20 byte struct to keep the tests more readable. This size was chosen to still 123; make sure that it will be split between registers and the stack, to test all 124; of the interesting code paths in the backend. 125%twenty_bytes = type { [5 x i32] } 126declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4) 127 128; Functions with byval parameters can be tail-called, because the value is 129; actually passed in registers and the stack in the same way for the caller and 130; callee. Within @large_caller the first 16 bytes of the argument are spilled 131; to the local stack frame, but for the tail-call they are passed in r0-r3, so 132; it's safe to de-allocate that memory before the call. 133; TODO: The SUB and STM instructions are unnecessary and could be optimised 134; out, but the behaviour of this is still correct. 135define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { 136; CHECK-LABEL: large_caller: 137; CHECK: @ %bb.0: @ %entry 138; CHECK-NEXT: .pad #16 139; CHECK-NEXT: sub sp, sp, #16 140; CHECK-NEXT: stm sp!, {r0, r1, r2, r3} 141; CHECK-NEXT: b large_callee 142entry: 143 musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a) 144 ret void 145} 146 147; As above, but with some inline asm to test that the arguments in r0-r3 are 148; re-loaded before the call. 149define void @large_caller_check_regs(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { 150; CHECK-LABEL: large_caller_check_regs: 151; CHECK: @ %bb.0: @ %entry 152; CHECK-NEXT: .pad #16 153; CHECK-NEXT: sub sp, sp, #16 154; CHECK-NEXT: stm sp, {r0, r1, r2, r3} 155; CHECK-NEXT: @APP 156; CHECK-NEXT: @NO_APP 157; CHECK-NEXT: pop {r0, r1, r2, r3} 158; CHECK-NEXT: b large_callee 159entry: 160 tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"() 161 musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a) 162 ret void 163} 164 165; The IR for this one looks dodgy, because it has an alloca passed to a 166; musttail function, but it is passed as a byval argument, so will be copied 167; into the stack space allocated by @large_caller_new_value's caller, so is 168; valid. 169define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { 170; CHECK-LABEL: large_caller_new_value: 171; CHECK: @ %bb.0: @ %entry 172; CHECK-NEXT: .pad #36 173; CHECK-NEXT: sub sp, sp, #36 174; CHECK-NEXT: add r12, sp, #20 175; CHECK-NEXT: stm r12, {r0, r1, r2, r3} 176; CHECK-NEXT: mov r0, #4 177; CHECK-NEXT: add r1, sp, #36 178; CHECK-NEXT: str r0, [sp, #16] 179; CHECK-NEXT: mov r0, #3 180; CHECK-NEXT: str r0, [sp, #12] 181; CHECK-NEXT: mov r0, #2 182; CHECK-NEXT: str r0, [sp, #8] 183; CHECK-NEXT: mov r0, #1 184; CHECK-NEXT: str r0, [sp, #4] 185; CHECK-NEXT: mov r0, #0 186; CHECK-NEXT: str r0, [sp] 187; CHECK-NEXT: mov r0, sp 188; CHECK-NEXT: add r0, r0, #16 189; CHECK-NEXT: mov r3, #3 190; CHECK-NEXT: ldr r2, [r0], #4 191; CHECK-NEXT: str r2, [r1], #4 192; CHECK-NEXT: mov r0, #0 193; CHECK-NEXT: mov r1, #1 194; CHECK-NEXT: mov r2, #2 195; CHECK-NEXT: add sp, sp, #36 196; CHECK-NEXT: b large_callee 197entry: 198 %y = alloca %twenty_bytes, align 4 199 store i32 0, ptr %y, align 4 200 %0 = getelementptr inbounds i8, ptr %y, i32 4 201 store i32 1, ptr %0, align 4 202 %1 = getelementptr inbounds i8, ptr %y, i32 8 203 store i32 2, ptr %1, align 4 204 %2 = getelementptr inbounds i8, ptr %y, i32 12 205 store i32 3, ptr %2, align 4 206 %3 = getelementptr inbounds i8, ptr %y, i32 16 207 store i32 4, ptr %3, align 4 208 musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y) 209 ret void 210} 211 212declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4) 213define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { 214; CHECK-LABEL: swap_byvals: 215; CHECK: @ %bb.0: @ %entry 216; CHECK-NEXT: .pad #16 217; CHECK-NEXT: sub sp, sp, #16 218; CHECK-NEXT: .save {r4, r5, r11, lr} 219; CHECK-NEXT: push {r4, r5, r11, lr} 220; CHECK-NEXT: .pad #40 221; CHECK-NEXT: sub sp, sp, #40 222; CHECK-NEXT: add r12, sp, #56 223; CHECK-NEXT: add lr, sp, #20 224; CHECK-NEXT: stm r12, {r0, r1, r2, r3} 225; CHECK-NEXT: add r0, sp, #56 226; CHECK-NEXT: mov r12, sp 227; CHECK-NEXT: ldr r1, [r0], #4 228; CHECK-NEXT: mov r2, r12 229; CHECK-NEXT: str r1, [r2], #4 230; CHECK-NEXT: add r3, sp, #20 231; CHECK-NEXT: ldr r1, [r0], #4 232; CHECK-NEXT: add r4, sp, #76 233; CHECK-NEXT: str r1, [r2], #4 234; CHECK-NEXT: ldr r1, [r0], #4 235; CHECK-NEXT: str r1, [r2], #4 236; CHECK-NEXT: ldr r1, [r0], #4 237; CHECK-NEXT: str r1, [r2], #4 238; CHECK-NEXT: ldr r1, [r0], #4 239; CHECK-NEXT: add r0, sp, #76 240; CHECK-NEXT: str r1, [r2], #4 241; CHECK-NEXT: mov r2, lr 242; CHECK-NEXT: ldr r1, [r0], #4 243; CHECK-NEXT: str r1, [r2], #4 244; CHECK-NEXT: ldr r1, [r0], #4 245; CHECK-NEXT: str r1, [r2], #4 246; CHECK-NEXT: ldr r1, [r0], #4 247; CHECK-NEXT: str r1, [r2], #4 248; CHECK-NEXT: ldr r1, [r0], #4 249; CHECK-NEXT: str r1, [r2], #4 250; CHECK-NEXT: ldr r1, [r0], #4 251; CHECK-NEXT: str r1, [r2], #4 252; CHECK-NEXT: ldm r3, {r0, r1, r2, r3} 253; CHECK-NEXT: ldr r5, [r12], #4 254; CHECK-NEXT: str r5, [r4], #4 255; CHECK-NEXT: ldr r5, [r12], #4 256; CHECK-NEXT: str r5, [r4], #4 257; CHECK-NEXT: ldr r5, [r12], #4 258; CHECK-NEXT: str r5, [r4], #4 259; CHECK-NEXT: ldr r5, [r12], #4 260; CHECK-NEXT: str r5, [r4], #4 261; CHECK-NEXT: ldr r5, [r12], #4 262; CHECK-NEXT: str r5, [r4], #4 263; CHECK-NEXT: add r5, lr, #16 264; CHECK-NEXT: add r12, sp, #72 265; CHECK-NEXT: ldr r4, [r5], #4 266; CHECK-NEXT: str r4, [r12], #4 267; CHECK-NEXT: add sp, sp, #40 268; CHECK-NEXT: pop {r4, r5, r11, lr} 269; CHECK-NEXT: add sp, sp, #16 270; CHECK-NEXT: b two_byvals_callee 271entry: 272 musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a) 273 ret void 274} 275 276; A forwarded byval arg, but at a different offset on the stack, so it needs to 277; be copied to the local stack frame first. This can't be musttail because of 278; the different signatures, but is still tail-called as an optimisation. 279declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4) 280define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { 281; CHECK-LABEL: shift_byval: 282; CHECK: @ %bb.0: @ %entry 283; CHECK-NEXT: .pad #12 284; CHECK-NEXT: sub sp, sp, #12 285; CHECK-NEXT: .save {r4, lr} 286; CHECK-NEXT: push {r4, lr} 287; CHECK-NEXT: .pad #20 288; CHECK-NEXT: sub sp, sp, #20 289; CHECK-NEXT: add r0, sp, #28 290; CHECK-NEXT: add lr, sp, #40 291; CHECK-NEXT: stm r0, {r1, r2, r3} 292; CHECK-NEXT: add r0, sp, #28 293; CHECK-NEXT: mov r1, sp 294; CHECK-NEXT: ldr r2, [r0], #4 295; CHECK-NEXT: add r12, r1, #16 296; CHECK-NEXT: str r2, [r1], #4 297; CHECK-NEXT: ldr r2, [r0], #4 298; CHECK-NEXT: str r2, [r1], #4 299; CHECK-NEXT: ldr r2, [r0], #4 300; CHECK-NEXT: str r2, [r1], #4 301; CHECK-NEXT: ldr r2, [r0], #4 302; CHECK-NEXT: str r2, [r1], #4 303; CHECK-NEXT: ldr r2, [r0], #4 304; CHECK-NEXT: str r2, [r1], #4 305; CHECK-NEXT: ldm sp, {r0, r1, r2, r3} 306; CHECK-NEXT: ldr r4, [r12], #4 307; CHECK-NEXT: str r4, [lr], #4 308; CHECK-NEXT: add sp, sp, #20 309; CHECK-NEXT: pop {r4, lr} 310; CHECK-NEXT: add sp, sp, #12 311; CHECK-NEXT: b shift_byval_callee 312entry: 313 tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b) 314 ret void 315} 316 317; A global object passed to a byval argument, so it must be copied, but doesn't 318; need a stack temporary. 319@large_global = external global %twenty_bytes 320define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { 321; CHECK-LABEL: large_caller_from_global: 322; CHECK: @ %bb.0: @ %entry 323; CHECK-NEXT: .pad #16 324; CHECK-NEXT: sub sp, sp, #16 325; CHECK-NEXT: .save {r4, lr} 326; CHECK-NEXT: push {r4, lr} 327; CHECK-NEXT: add r12, sp, #8 328; CHECK-NEXT: add lr, sp, #24 329; CHECK-NEXT: stm r12, {r0, r1, r2, r3} 330; CHECK-NEXT: movw r3, :lower16:large_global 331; CHECK-NEXT: movt r3, :upper16:large_global 332; CHECK-NEXT: add r12, r3, #16 333; CHECK-NEXT: ldm r3, {r0, r1, r2, r3} 334; CHECK-NEXT: ldr r4, [r12], #4 335; CHECK-NEXT: str r4, [lr], #4 336; CHECK-NEXT: pop {r4, lr} 337; CHECK-NEXT: add sp, sp, #16 338; CHECK-NEXT: b large_callee 339entry: 340 musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global) 341 ret void 342} 343