1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -enable-post-misched=false | FileCheck %s 3; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -enable-post-misched=false -global-isel | FileCheck %s 4 5; Tests for prolog sequences for stack probing, when using a 64KiB stack guard. 6 7; 64k bytes is the largest frame we can probe in one go. 8define void @static_65536(ptr %out) #0 { 9; CHECK-LABEL: static_65536: 10; CHECK: // %bb.0: // %entry 11; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 12; CHECK-NEXT: .cfi_def_cfa_offset 16 13; CHECK-NEXT: .cfi_offset w29, -16 14; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 15; CHECK-NEXT: .cfi_def_cfa_offset 65552 16; CHECK-NEXT: str xzr, [sp] 17; CHECK-NEXT: mov x8, sp 18; CHECK-NEXT: str x8, [x0] 19; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 20; CHECK-NEXT: .cfi_def_cfa_offset 16 21; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 22; CHECK-NEXT: .cfi_def_cfa_offset 0 23; CHECK-NEXT: .cfi_restore w29 24; CHECK-NEXT: ret 25entry: 26 %v = alloca i8, i64 65536, align 1 27 store ptr %v, ptr %out, align 8 28 ret void 29} 30 31; 64k+16 bytes, still needs just one probe. 32define void @static_65552(ptr %out) #0 { 33; CHECK-LABEL: static_65552: 34; CHECK: // %bb.0: // %entry 35; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 36; CHECK-NEXT: .cfi_def_cfa_offset 16 37; CHECK-NEXT: .cfi_offset w29, -16 38; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 39; CHECK-NEXT: .cfi_def_cfa_offset 65552 40; CHECK-NEXT: str xzr, [sp], #-16 41; CHECK-NEXT: .cfi_def_cfa_offset 65568 42; CHECK-NEXT: mov x8, sp 43; CHECK-NEXT: str x8, [x0] 44; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 45; CHECK-NEXT: .cfi_def_cfa_offset 32 46; CHECK-NEXT: add sp, sp, #16 47; CHECK-NEXT: .cfi_def_cfa_offset 16 48; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 49; CHECK-NEXT: .cfi_def_cfa_offset 0 50; CHECK-NEXT: .cfi_restore w29 51; CHECK-NEXT: ret 52entry: 53 %v = alloca i8, i64 65552, align 1 54 store ptr %v, ptr %out, align 8 55 ret void 56} 57 58; 64k+1024 bytes, the largest frame which needs just one probe. 59define void @static_66560(ptr %out) #0 { 60; CHECK-LABEL: static_66560: 61; CHECK: // %bb.0: // %entry 62; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 63; CHECK-NEXT: .cfi_def_cfa_offset 16 64; CHECK-NEXT: .cfi_offset w29, -16 65; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 66; CHECK-NEXT: .cfi_def_cfa_offset 65552 67; CHECK-NEXT: str xzr, [sp] 68; CHECK-NEXT: sub sp, sp, #1024 69; CHECK-NEXT: .cfi_def_cfa_offset 66576 70; CHECK-NEXT: mov x8, sp 71; CHECK-NEXT: str x8, [x0] 72; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 73; CHECK-NEXT: .cfi_def_cfa_offset 1040 74; CHECK-NEXT: add sp, sp, #1024 75; CHECK-NEXT: .cfi_def_cfa_offset 16 76; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 77; CHECK-NEXT: .cfi_def_cfa_offset 0 78; CHECK-NEXT: .cfi_restore w29 79; CHECK-NEXT: ret 80entry: 81 %v = alloca i8, i64 66560, align 1 82 store ptr %v, ptr %out, align 8 83 ret void 84} 85 86; 64k+1024+16 bytes, the smallest frame which needs two probes. 87define void @static_66576(ptr %out) #0 { 88; CHECK-LABEL: static_66576: 89; CHECK: // %bb.0: // %entry 90; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 91; CHECK-NEXT: .cfi_def_cfa_offset 16 92; CHECK-NEXT: .cfi_offset w29, -16 93; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 94; CHECK-NEXT: .cfi_def_cfa_offset 65552 95; CHECK-NEXT: str xzr, [sp] 96; CHECK-NEXT: sub sp, sp, #1040 97; CHECK-NEXT: .cfi_def_cfa_offset 66592 98; CHECK-NEXT: str xzr, [sp] 99; CHECK-NEXT: mov x8, sp 100; CHECK-NEXT: str x8, [x0] 101; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 102; CHECK-NEXT: .cfi_def_cfa_offset 1056 103; CHECK-NEXT: add sp, sp, #1040 104; CHECK-NEXT: .cfi_def_cfa_offset 16 105; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 106; CHECK-NEXT: .cfi_def_cfa_offset 0 107; CHECK-NEXT: .cfi_restore w29 108; CHECK-NEXT: ret 109entry: 110 %v = alloca i8, i64 66576, align 1 111 store ptr %v, ptr %out, align 8 112 ret void 113} 114 115; 2*64k+1024, the largest frame needing two probes. 116define void @static_132096(ptr %out) #0 { 117; CHECK-LABEL: static_132096: 118; CHECK: // %bb.0: // %entry 119; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 120; CHECK-NEXT: .cfi_def_cfa_offset 16 121; CHECK-NEXT: .cfi_offset w29, -16 122; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 123; CHECK-NEXT: .cfi_def_cfa_offset 65552 124; CHECK-NEXT: str xzr, [sp] 125; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 126; CHECK-NEXT: .cfi_def_cfa_offset 131088 127; CHECK-NEXT: str xzr, [sp] 128; CHECK-NEXT: sub sp, sp, #1024 129; CHECK-NEXT: .cfi_def_cfa_offset 132112 130; CHECK-NEXT: mov x8, sp 131; CHECK-NEXT: str x8, [x0] 132; CHECK-NEXT: add sp, sp, #32, lsl #12 // =131072 133; CHECK-NEXT: .cfi_def_cfa_offset 1040 134; CHECK-NEXT: add sp, sp, #1024 135; CHECK-NEXT: .cfi_def_cfa_offset 16 136; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 137; CHECK-NEXT: .cfi_def_cfa_offset 0 138; CHECK-NEXT: .cfi_restore w29 139; CHECK-NEXT: ret 140entry: 141 %v = alloca i8, i64 132096, align 1 142 store ptr %v, ptr %out, align 8 143 ret void 144} 145 146; 5*64k-16, the largest frame probed without a loop. 147define void @static_327664(ptr %out) #0 { 148; CHECK-LABEL: static_327664: 149; CHECK: // %bb.0: // %entry 150; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 151; CHECK-NEXT: .cfi_def_cfa_offset 16 152; CHECK-NEXT: .cfi_offset w29, -16 153; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 154; CHECK-NEXT: .cfi_def_cfa_offset 65552 155; CHECK-NEXT: str xzr, [sp] 156; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 157; CHECK-NEXT: .cfi_def_cfa_offset 131088 158; CHECK-NEXT: str xzr, [sp] 159; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 160; CHECK-NEXT: .cfi_def_cfa_offset 196624 161; CHECK-NEXT: str xzr, [sp] 162; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 163; CHECK-NEXT: .cfi_def_cfa_offset 262160 164; CHECK-NEXT: str xzr, [sp] 165; CHECK-NEXT: sub sp, sp, #15, lsl #12 // =61440 166; CHECK-NEXT: .cfi_def_cfa_offset 323600 167; CHECK-NEXT: sub sp, sp, #4080 168; CHECK-NEXT: .cfi_def_cfa_offset 327680 169; CHECK-NEXT: str xzr, [sp] 170; CHECK-NEXT: mov x8, sp 171; CHECK-NEXT: str x8, [x0] 172; CHECK-NEXT: add sp, sp, #79, lsl #12 // =323584 173; CHECK-NEXT: .cfi_def_cfa_offset 4096 174; CHECK-NEXT: add sp, sp, #4080 175; CHECK-NEXT: .cfi_def_cfa_offset 16 176; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 177; CHECK-NEXT: .cfi_def_cfa_offset 0 178; CHECK-NEXT: .cfi_restore w29 179; CHECK-NEXT: ret 180entry: 181 %v = alloca i8, i64 327664, align 1 182 store ptr %v, ptr %out, align 8 183 ret void 184} 185 186; 5*64k, smallest frame probed with a loop. 187define void @static_327680(ptr %out) #0 { 188; CHECK-LABEL: static_327680: 189; CHECK: // %bb.0: // %entry 190; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 191; CHECK-NEXT: .cfi_def_cfa_offset 16 192; CHECK-NEXT: .cfi_offset w29, -16 193; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 194; CHECK-NEXT: .cfi_def_cfa w9, 327696 195; CHECK-NEXT: .LBB6_1: // %entry 196; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 197; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 198; CHECK-NEXT: str xzr, [sp] 199; CHECK-NEXT: cmp sp, x9 200; CHECK-NEXT: b.ne .LBB6_1 201; CHECK-NEXT: // %bb.2: // %entry 202; CHECK-NEXT: .cfi_def_cfa_register wsp 203; CHECK-NEXT: mov x8, sp 204; CHECK-NEXT: str x8, [x0] 205; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 206; CHECK-NEXT: .cfi_def_cfa_offset 16 207; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 208; CHECK-NEXT: .cfi_def_cfa_offset 0 209; CHECK-NEXT: .cfi_restore w29 210; CHECK-NEXT: ret 211entry: 212 %v = alloca i8, i64 327680, align 1 213 store ptr %v, ptr %out, align 8 214 ret void 215} 216 217; 5*64k+1024, large enough to use a loop, but not a multiple of 64KiB 218; so has a reminder, but no extra probe. 219define void @static_328704(ptr %out) #0 { 220; CHECK-LABEL: static_328704: 221; CHECK: // %bb.0: // %entry 222; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 223; CHECK-NEXT: .cfi_def_cfa_offset 16 224; CHECK-NEXT: .cfi_offset w29, -16 225; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 226; CHECK-NEXT: .cfi_def_cfa w9, 327696 227; CHECK-NEXT: .LBB7_1: // %entry 228; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 229; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 230; CHECK-NEXT: str xzr, [sp] 231; CHECK-NEXT: cmp sp, x9 232; CHECK-NEXT: b.ne .LBB7_1 233; CHECK-NEXT: // %bb.2: // %entry 234; CHECK-NEXT: .cfi_def_cfa_register wsp 235; CHECK-NEXT: sub sp, sp, #1024 236; CHECK-NEXT: .cfi_def_cfa_offset 328720 237; CHECK-NEXT: mov x8, sp 238; CHECK-NEXT: str x8, [x0] 239; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 240; CHECK-NEXT: .cfi_def_cfa_offset 1040 241; CHECK-NEXT: add sp, sp, #1024 242; CHECK-NEXT: .cfi_def_cfa_offset 16 243; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 244; CHECK-NEXT: .cfi_def_cfa_offset 0 245; CHECK-NEXT: .cfi_restore w29 246; CHECK-NEXT: ret 247entry: 248 %v = alloca i8, i64 328704, align 1 249 store ptr %v, ptr %out, align 8 250 ret void 251} 252 253; 5*64k+1040, large enough to use a loop, has a reminder and 254; an extra probe. 255define void @static_328720(ptr %out) #0 { 256; CHECK-LABEL: static_328720: 257; CHECK: // %bb.0: // %entry 258; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 259; CHECK-NEXT: .cfi_def_cfa_offset 16 260; CHECK-NEXT: .cfi_offset w29, -16 261; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 262; CHECK-NEXT: .cfi_def_cfa w9, 327696 263; CHECK-NEXT: .LBB8_1: // %entry 264; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 265; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 266; CHECK-NEXT: str xzr, [sp] 267; CHECK-NEXT: cmp sp, x9 268; CHECK-NEXT: b.ne .LBB8_1 269; CHECK-NEXT: // %bb.2: // %entry 270; CHECK-NEXT: .cfi_def_cfa_register wsp 271; CHECK-NEXT: sub sp, sp, #1040 272; CHECK-NEXT: .cfi_def_cfa_offset 328736 273; CHECK-NEXT: str xzr, [sp] 274; CHECK-NEXT: mov x8, sp 275; CHECK-NEXT: str x8, [x0] 276; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 277; CHECK-NEXT: .cfi_def_cfa_offset 1056 278; CHECK-NEXT: add sp, sp, #1040 279; CHECK-NEXT: .cfi_def_cfa_offset 16 280; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 281; CHECK-NEXT: .cfi_def_cfa_offset 0 282; CHECK-NEXT: .cfi_restore w29 283; CHECK-NEXT: ret 284entry: 285 %v = alloca i8, i64 328720, align 1 286 store ptr %v, ptr %out, align 8 287 ret void 288} 289 290; A small allocation, but with a very large alignment requirement. We do this 291; by moving SP far enough that a sufficiently-aligned block will exist 292; somewhere in the stack frame, so must probe the whole of that larger SP move. 293define void @static_16_align_131072(ptr %out) #0 { 294; CHECK-LABEL: static_16_align_131072: 295; CHECK: // %bb.0: // %entry 296; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 297; CHECK-NEXT: .cfi_def_cfa_offset 16 298; CHECK-NEXT: mov x29, sp 299; CHECK-NEXT: .cfi_def_cfa w29, 16 300; CHECK-NEXT: .cfi_offset w30, -8 301; CHECK-NEXT: .cfi_offset w29, -16 302; CHECK-NEXT: sub x9, sp, #31, lsl #12 // =126976 303; CHECK-NEXT: sub x9, x9, #4080 304; CHECK-NEXT: and x9, x9, #0xfffffffffffe0000 305; CHECK-NEXT: .LBB9_1: // %entry 306; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 307; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 308; CHECK-NEXT: cmp sp, x9 309; CHECK-NEXT: b.le .LBB9_3 310; CHECK-NEXT: // %bb.2: // %entry 311; CHECK-NEXT: // in Loop: Header=BB9_1 Depth=1 312; CHECK-NEXT: str xzr, [sp] 313; CHECK-NEXT: b .LBB9_1 314; CHECK-NEXT: .LBB9_3: // %entry 315; CHECK-NEXT: mov sp, x9 316; CHECK-NEXT: ldr xzr, [sp] 317; CHECK-NEXT: mov x8, sp 318; CHECK-NEXT: str x8, [x0] 319; CHECK-NEXT: mov sp, x29 320; CHECK-NEXT: .cfi_def_cfa wsp, 16 321; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 322; CHECK-NEXT: .cfi_def_cfa_offset 0 323; CHECK-NEXT: .cfi_restore w30 324; CHECK-NEXT: .cfi_restore w29 325; CHECK-NEXT: ret 326entry: 327 %v = alloca i8, i64 16, align 131072 328 store ptr %v, ptr %out, align 8 329 ret void 330} 331 332; A small allocation, but with a very large alignment requirement which 333; is nevertheless small enough as to not need a loop. 334define void @static_16_align_8192(ptr %out) #0 { 335; CHECK-LABEL: static_16_align_8192: 336; CHECK: // %bb.0: // %entry 337; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 338; CHECK-NEXT: .cfi_def_cfa_offset 16 339; CHECK-NEXT: mov x29, sp 340; CHECK-NEXT: .cfi_def_cfa w29, 16 341; CHECK-NEXT: .cfi_offset w30, -8 342; CHECK-NEXT: .cfi_offset w29, -16 343; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096 344; CHECK-NEXT: sub x9, x9, #4080 345; CHECK-NEXT: and sp, x9, #0xffffffffffffe000 346; CHECK-NEXT: str xzr, [sp] 347; CHECK-NEXT: mov x8, sp 348; CHECK-NEXT: str x8, [x0] 349; CHECK-NEXT: mov sp, x29 350; CHECK-NEXT: .cfi_def_cfa wsp, 16 351; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 352; CHECK-NEXT: .cfi_def_cfa_offset 0 353; CHECK-NEXT: .cfi_restore w30 354; CHECK-NEXT: .cfi_restore w29 355; CHECK-NEXT: ret 356entry: 357 %v = alloca i8, i64 16, align 8192 358 store ptr %v, ptr %out, align 8 359 ret void 360} 361 362; A large allocation with a very large alignment requirement which 363; is nevertheless small enough as to not need a loop. 364define void @static_32752_align_32k(ptr %out) #0 { 365; CHECK-LABEL: static_32752_align_32k: 366; CHECK: // %bb.0: // %entry 367; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 368; CHECK-NEXT: .cfi_def_cfa_offset 16 369; CHECK-NEXT: mov x29, sp 370; CHECK-NEXT: .cfi_def_cfa w29, 16 371; CHECK-NEXT: .cfi_offset w30, -8 372; CHECK-NEXT: .cfi_offset w29, -16 373; CHECK-NEXT: sub x9, sp, #7, lsl #12 // =28672 374; CHECK-NEXT: sub x9, x9, #4080 375; CHECK-NEXT: and sp, x9, #0xffffffffffff8000 376; CHECK-NEXT: str xzr, [sp] 377; CHECK-NEXT: mov x8, sp 378; CHECK-NEXT: str x8, [x0] 379; CHECK-NEXT: mov sp, x29 380; CHECK-NEXT: .cfi_def_cfa wsp, 16 381; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 382; CHECK-NEXT: .cfi_def_cfa_offset 0 383; CHECK-NEXT: .cfi_restore w30 384; CHECK-NEXT: .cfi_restore w29 385; CHECK-NEXT: ret 386entry: 387 %v = alloca i8, i64 32752, align 32768 388 store ptr %v, ptr %out, align 8 389 ret void 390} 391 392attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "stack-probe-size"="65536" "frame-pointer"="none" } 393