1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -enable-post-misched=false | FileCheck %s 3; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -enable-post-misched=false -global-isel | FileCheck %s 4 5; Tests for prolog sequences for stack probing, when using a 4KiB stack guard. 6 7; The stack probing parameters in function attributes take precedence over 8; ones in the module flags. 9 10; Small stack frame, no probing required. 11define void @static_64(ptr %out) #0 { 12; CHECK-LABEL: static_64: 13; CHECK: // %bb.0: // %entry 14; CHECK-NEXT: sub sp, sp, #64 15; CHECK-NEXT: .cfi_def_cfa_offset 64 16; CHECK-NEXT: mov x8, sp 17; CHECK-NEXT: str x8, [x0] 18; CHECK-NEXT: add sp, sp, #64 19; CHECK-NEXT: .cfi_def_cfa_offset 0 20; CHECK-NEXT: ret 21entry: 22 %v = alloca i8, i64 64, align 1 23 store ptr %v, ptr %out, align 8 24 ret void 25} 26 27; At 256 bytes we start to always create a frame pointer. No frame smaller then 28; this needs a probe, so we can use the saving of at least one CSR as a probe 29; at the top of our frame. 30define void @static_256(ptr %out) #0 { 31; CHECK-LABEL: static_256: 32; CHECK: // %bb.0: // %entry 33; CHECK-NEXT: sub sp, sp, #272 34; CHECK-NEXT: .cfi_def_cfa_offset 272 35; CHECK-NEXT: str x29, [sp, #256] // 8-byte Folded Spill 36; CHECK-NEXT: .cfi_offset w29, -16 37; CHECK-NEXT: mov x8, sp 38; CHECK-NEXT: str x8, [x0] 39; CHECK-NEXT: add sp, sp, #272 40; CHECK-NEXT: .cfi_def_cfa_offset 0 41; CHECK-NEXT: .cfi_restore w29 42; CHECK-NEXT: ret 43entry: 44 %v = alloca i8, i64 256, align 1 45 store ptr %v, ptr %out, align 8 46 ret void 47} 48 49; At 1024 bytes, this is the largest frame which doesn't need probing. 50define void @static_1024(ptr %out) #0 { 51; CHECK-LABEL: static_1024: 52; CHECK: // %bb.0: // %entry 53; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 54; CHECK-NEXT: .cfi_def_cfa_offset 16 55; CHECK-NEXT: .cfi_offset w29, -16 56; CHECK-NEXT: sub sp, sp, #1024 57; CHECK-NEXT: .cfi_def_cfa_offset 1040 58; CHECK-NEXT: mov x8, sp 59; CHECK-NEXT: str x8, [x0] 60; CHECK-NEXT: add sp, sp, #1024 61; CHECK-NEXT: .cfi_def_cfa_offset 16 62; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 63; CHECK-NEXT: .cfi_def_cfa_offset 0 64; CHECK-NEXT: .cfi_restore w29 65; CHECK-NEXT: ret 66entry: 67 %v = alloca i8, i64 1024, align 1 68 store ptr %v, ptr %out, align 8 69 ret void 70} 71 72; At 1024+16 bytes, this is the smallest frame which needs probing. 73define void @static_1040(ptr %out) #0 { 74; CHECK-LABEL: static_1040: 75; CHECK: // %bb.0: // %entry 76; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 77; CHECK-NEXT: .cfi_def_cfa_offset 16 78; CHECK-NEXT: .cfi_offset w29, -16 79; CHECK-NEXT: sub sp, sp, #1040 80; CHECK-NEXT: .cfi_def_cfa_offset 1056 81; CHECK-NEXT: str xzr, [sp] 82; CHECK-NEXT: mov x8, sp 83; CHECK-NEXT: str x8, [x0] 84; CHECK-NEXT: add sp, sp, #1040 85; CHECK-NEXT: .cfi_def_cfa_offset 16 86; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 87; CHECK-NEXT: .cfi_def_cfa_offset 0 88; CHECK-NEXT: .cfi_restore w29 89; CHECK-NEXT: ret 90entry: 91 %v = alloca i8, i64 1040, align 1 92 store ptr %v, ptr %out, align 8 93 ret void 94} 95 96; 4k bytes is the largest frame we can probe in one go. 97define void @static_4096(ptr %out) #0 { 98; CHECK-LABEL: static_4096: 99; CHECK: // %bb.0: // %entry 100; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 101; CHECK-NEXT: .cfi_def_cfa_offset 16 102; CHECK-NEXT: .cfi_offset w29, -16 103; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 104; CHECK-NEXT: .cfi_def_cfa_offset 4112 105; CHECK-NEXT: str xzr, [sp] 106; CHECK-NEXT: mov x8, sp 107; CHECK-NEXT: str x8, [x0] 108; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 109; CHECK-NEXT: .cfi_def_cfa_offset 16 110; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 111; CHECK-NEXT: .cfi_def_cfa_offset 0 112; CHECK-NEXT: .cfi_restore w29 113; CHECK-NEXT: ret 114entry: 115 %v = alloca i8, i64 4096, align 1 116 store ptr %v, ptr %out, align 8 117 ret void 118} 119 120; 4k+16 bytes, still needs just one probe. 121define void @static_4112(ptr %out) #0 { 122; CHECK-LABEL: static_4112: 123; CHECK: // %bb.0: // %entry 124; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 125; CHECK-NEXT: .cfi_def_cfa_offset 16 126; CHECK-NEXT: .cfi_offset w29, -16 127; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 128; CHECK-NEXT: .cfi_def_cfa_offset 4112 129; CHECK-NEXT: str xzr, [sp], #-16 130; CHECK-NEXT: .cfi_def_cfa_offset 4128 131; CHECK-NEXT: mov x8, sp 132; CHECK-NEXT: str x8, [x0] 133; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 134; CHECK-NEXT: .cfi_def_cfa_offset 32 135; CHECK-NEXT: add sp, sp, #16 136; CHECK-NEXT: .cfi_def_cfa_offset 16 137; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 138; CHECK-NEXT: .cfi_def_cfa_offset 0 139; CHECK-NEXT: .cfi_restore w29 140; CHECK-NEXT: ret 141entry: 142 %v = alloca i8, i64 4112, align 1 143 store ptr %v, ptr %out, align 8 144 ret void 145} 146 147; 4k+1024 bytes, the largest frame which needs just one probe. 148define void @static_5120(ptr %out) #0 { 149; CHECK-LABEL: static_5120: 150; CHECK: // %bb.0: // %entry 151; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 152; CHECK-NEXT: .cfi_def_cfa_offset 16 153; CHECK-NEXT: .cfi_offset w29, -16 154; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 155; CHECK-NEXT: .cfi_def_cfa_offset 4112 156; CHECK-NEXT: str xzr, [sp] 157; CHECK-NEXT: sub sp, sp, #1024 158; CHECK-NEXT: .cfi_def_cfa_offset 5136 159; CHECK-NEXT: mov x8, sp 160; CHECK-NEXT: str x8, [x0] 161; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 162; CHECK-NEXT: .cfi_def_cfa_offset 1040 163; CHECK-NEXT: add sp, sp, #1024 164; CHECK-NEXT: .cfi_def_cfa_offset 16 165; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 166; CHECK-NEXT: .cfi_def_cfa_offset 0 167; CHECK-NEXT: .cfi_restore w29 168; CHECK-NEXT: ret 169entry: 170 %v = alloca i8, i64 5120, align 1 171 store ptr %v, ptr %out, align 8 172 ret void 173} 174 175; 4k+1024+16, the smallest frame which needs two probes. 176define void @static_5136(ptr %out) #0 { 177; CHECK-LABEL: static_5136: 178; CHECK: // %bb.0: // %entry 179; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 180; CHECK-NEXT: .cfi_def_cfa_offset 16 181; CHECK-NEXT: .cfi_offset w29, -16 182; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 183; CHECK-NEXT: .cfi_def_cfa_offset 4112 184; CHECK-NEXT: str xzr, [sp] 185; CHECK-NEXT: sub sp, sp, #1040 186; CHECK-NEXT: .cfi_def_cfa_offset 5152 187; CHECK-NEXT: str xzr, [sp] 188; CHECK-NEXT: mov x8, sp 189; CHECK-NEXT: str x8, [x0] 190; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 191; CHECK-NEXT: .cfi_def_cfa_offset 1056 192; CHECK-NEXT: add sp, sp, #1040 193; CHECK-NEXT: .cfi_def_cfa_offset 16 194; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 195; CHECK-NEXT: .cfi_def_cfa_offset 0 196; CHECK-NEXT: .cfi_restore w29 197; CHECK-NEXT: ret 198entry: 199 %v = alloca i8, i64 5136, align 1 200 store ptr %v, ptr %out, align 8 201 ret void 202} 203 204; 2*4k+1024, the largest frame needing two probes 205define void @static_9216(ptr %out) #0 { 206; CHECK-LABEL: static_9216: 207; CHECK: // %bb.0: // %entry 208; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 209; CHECK-NEXT: .cfi_def_cfa_offset 16 210; CHECK-NEXT: .cfi_offset w29, -16 211; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 212; CHECK-NEXT: .cfi_def_cfa_offset 4112 213; CHECK-NEXT: str xzr, [sp] 214; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 215; CHECK-NEXT: .cfi_def_cfa_offset 8208 216; CHECK-NEXT: str xzr, [sp] 217; CHECK-NEXT: sub sp, sp, #1024 218; CHECK-NEXT: .cfi_def_cfa_offset 9232 219; CHECK-NEXT: mov x8, sp 220; CHECK-NEXT: str x8, [x0] 221; CHECK-NEXT: add sp, sp, #2, lsl #12 // =8192 222; CHECK-NEXT: .cfi_def_cfa_offset 1040 223; CHECK-NEXT: add sp, sp, #1024 224; CHECK-NEXT: .cfi_def_cfa_offset 16 225; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 226; CHECK-NEXT: .cfi_def_cfa_offset 0 227; CHECK-NEXT: .cfi_restore w29 228; CHECK-NEXT: ret 229entry: 230 %v = alloca i8, i64 9216, align 1 231 store ptr %v, ptr %out, align 8 232 ret void 233} 234 235; 5*4k-16, the largest frame probed without a loop 236define void @static_20464(ptr %out) #0 { 237; CHECK-LABEL: static_20464: 238; CHECK: // %bb.0: // %entry 239; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 240; CHECK-NEXT: .cfi_def_cfa_offset 16 241; CHECK-NEXT: .cfi_offset w29, -16 242; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 243; CHECK-NEXT: .cfi_def_cfa_offset 4112 244; CHECK-NEXT: str xzr, [sp] 245; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 246; CHECK-NEXT: .cfi_def_cfa_offset 8208 247; CHECK-NEXT: str xzr, [sp] 248; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 249; CHECK-NEXT: .cfi_def_cfa_offset 12304 250; CHECK-NEXT: str xzr, [sp] 251; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 252; CHECK-NEXT: .cfi_def_cfa_offset 16400 253; CHECK-NEXT: str xzr, [sp] 254; CHECK-NEXT: sub sp, sp, #4080 255; CHECK-NEXT: .cfi_def_cfa_offset 20480 256; CHECK-NEXT: str xzr, [sp] 257; CHECK-NEXT: mov x8, sp 258; CHECK-NEXT: str x8, [x0] 259; CHECK-NEXT: add sp, sp, #4, lsl #12 // =16384 260; CHECK-NEXT: .cfi_def_cfa_offset 4096 261; CHECK-NEXT: add sp, sp, #4080 262; CHECK-NEXT: .cfi_def_cfa_offset 16 263; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 264; CHECK-NEXT: .cfi_def_cfa_offset 0 265; CHECK-NEXT: .cfi_restore w29 266; CHECK-NEXT: ret 267entry: 268 %v = alloca i8, i64 20464, align 1 269 store ptr %v, ptr %out, align 8 270 ret void 271} 272 273; 5*4k, the smallest frame probed with a loop 274define void @static_20480(ptr %out) #0 { 275; CHECK-LABEL: static_20480: 276; CHECK: // %bb.0: // %entry 277; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 278; CHECK-NEXT: .cfi_def_cfa_offset 16 279; CHECK-NEXT: .cfi_offset w29, -16 280; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480 281; CHECK-NEXT: .cfi_def_cfa w9, 20496 282; CHECK-NEXT: .LBB10_1: // %entry 283; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 284; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 285; CHECK-NEXT: str xzr, [sp] 286; CHECK-NEXT: cmp sp, x9 287; CHECK-NEXT: b.ne .LBB10_1 288; CHECK-NEXT: // %bb.2: // %entry 289; CHECK-NEXT: .cfi_def_cfa_register wsp 290; CHECK-NEXT: mov x8, sp 291; CHECK-NEXT: str x8, [x0] 292; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480 293; CHECK-NEXT: .cfi_def_cfa_offset 16 294; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 295; CHECK-NEXT: .cfi_def_cfa_offset 0 296; CHECK-NEXT: .cfi_restore w29 297; CHECK-NEXT: ret 298entry: 299 %v = alloca i8, i64 20480, align 1 300 store ptr %v, ptr %out, align 8 301 ret void 302} 303 304; 5*4k + 1024, large enough to use a loop, but not a multiple of 4KiB 305; so has a reminder, but no extra probe. 306define void @static_21504(ptr %out) #0 { 307; CHECK-LABEL: static_21504: 308; CHECK: // %bb.0: // %entry 309; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 310; CHECK-NEXT: .cfi_def_cfa_offset 16 311; CHECK-NEXT: .cfi_offset w29, -16 312; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480 313; CHECK-NEXT: .cfi_def_cfa w9, 20496 314; CHECK-NEXT: .LBB11_1: // %entry 315; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 316; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 317; CHECK-NEXT: str xzr, [sp] 318; CHECK-NEXT: cmp sp, x9 319; CHECK-NEXT: b.ne .LBB11_1 320; CHECK-NEXT: // %bb.2: // %entry 321; CHECK-NEXT: .cfi_def_cfa_register wsp 322; CHECK-NEXT: sub sp, sp, #1024 323; CHECK-NEXT: .cfi_def_cfa_offset 21520 324; CHECK-NEXT: mov x8, sp 325; CHECK-NEXT: str x8, [x0] 326; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480 327; CHECK-NEXT: .cfi_def_cfa_offset 1040 328; CHECK-NEXT: add sp, sp, #1024 329; CHECK-NEXT: .cfi_def_cfa_offset 16 330; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 331; CHECK-NEXT: .cfi_def_cfa_offset 0 332; CHECK-NEXT: .cfi_restore w29 333; CHECK-NEXT: ret 334entry: 335 %v = alloca i8, i64 21504, align 1 336 store ptr %v, ptr %out, align 8 337 ret void 338} 339 340; 5*4k+1040, large enough to use a loop, has a reminder and 341; an extra probe. 342define void @static_21520(ptr %out) #0 { 343; CHECK-LABEL: static_21520: 344; CHECK: // %bb.0: // %entry 345; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 346; CHECK-NEXT: .cfi_def_cfa_offset 16 347; CHECK-NEXT: .cfi_offset w29, -16 348; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480 349; CHECK-NEXT: .cfi_def_cfa w9, 20496 350; CHECK-NEXT: .LBB12_1: // %entry 351; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 352; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 353; CHECK-NEXT: str xzr, [sp] 354; CHECK-NEXT: cmp sp, x9 355; CHECK-NEXT: b.ne .LBB12_1 356; CHECK-NEXT: // %bb.2: // %entry 357; CHECK-NEXT: .cfi_def_cfa_register wsp 358; CHECK-NEXT: sub sp, sp, #1040 359; CHECK-NEXT: .cfi_def_cfa_offset 21536 360; CHECK-NEXT: str xzr, [sp] 361; CHECK-NEXT: mov x8, sp 362; CHECK-NEXT: str x8, [x0] 363; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480 364; CHECK-NEXT: .cfi_def_cfa_offset 1056 365; CHECK-NEXT: add sp, sp, #1040 366; CHECK-NEXT: .cfi_def_cfa_offset 16 367; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 368; CHECK-NEXT: .cfi_def_cfa_offset 0 369; CHECK-NEXT: .cfi_restore w29 370; CHECK-NEXT: ret 371entry: 372 %v = alloca i8, i64 21520, align 1 373 store ptr %v, ptr %out, align 8 374 ret void 375} 376 377; A small allocation, but with a very large alignment requirement. We do this 378; by moving SP far enough that a sufficiently-aligned block will exist 379; somewhere in the stack frame, so must probe the whole of that larger SP move. 380define void @static_16_align_8192(ptr %out) #0 { 381; CHECK-LABEL: static_16_align_8192: 382; CHECK: // %bb.0: // %entry 383; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 384; CHECK-NEXT: .cfi_def_cfa_offset 16 385; CHECK-NEXT: mov x29, sp 386; CHECK-NEXT: .cfi_def_cfa w29, 16 387; CHECK-NEXT: .cfi_offset w30, -8 388; CHECK-NEXT: .cfi_offset w29, -16 389; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096 390; CHECK-NEXT: sub x9, x9, #4080 391; CHECK-NEXT: and x9, x9, #0xffffffffffffe000 392; CHECK-NEXT: .LBB13_1: // %entry 393; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 394; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 395; CHECK-NEXT: cmp sp, x9 396; CHECK-NEXT: b.le .LBB13_3 397; CHECK-NEXT: // %bb.2: // %entry 398; CHECK-NEXT: // in Loop: Header=BB13_1 Depth=1 399; CHECK-NEXT: str xzr, [sp] 400; CHECK-NEXT: b .LBB13_1 401; CHECK-NEXT: .LBB13_3: // %entry 402; CHECK-NEXT: mov sp, x9 403; CHECK-NEXT: ldr xzr, [sp] 404; CHECK-NEXT: mov x8, sp 405; CHECK-NEXT: str x8, [x0] 406; CHECK-NEXT: mov sp, x29 407; CHECK-NEXT: .cfi_def_cfa wsp, 16 408; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 409; CHECK-NEXT: .cfi_def_cfa_offset 0 410; CHECK-NEXT: .cfi_restore w30 411; CHECK-NEXT: .cfi_restore w29 412; CHECK-NEXT: ret 413entry: 414 %v = alloca i8, i64 16, align 8192 415 store ptr %v, ptr %out, align 8 416 ret void 417} 418 419; A small allocation with a very large alignment requirement, but 420; nevertheless small enough as to not need a loop. 421define void @static_16_align_2048(ptr %out) #0 { 422; CHECK-LABEL: static_16_align_2048: 423; CHECK: // %bb.0: // %entry 424; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 425; CHECK-NEXT: .cfi_def_cfa_offset 16 426; CHECK-NEXT: mov x29, sp 427; CHECK-NEXT: .cfi_def_cfa w29, 16 428; CHECK-NEXT: .cfi_offset w30, -8 429; CHECK-NEXT: .cfi_offset w29, -16 430; CHECK-NEXT: sub x9, sp, #2032 431; CHECK-NEXT: and sp, x9, #0xfffffffffffff800 432; CHECK-NEXT: str xzr, [sp] 433; CHECK-NEXT: mov x8, sp 434; CHECK-NEXT: str x8, [x0] 435; CHECK-NEXT: mov sp, x29 436; CHECK-NEXT: .cfi_def_cfa wsp, 16 437; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 438; CHECK-NEXT: .cfi_def_cfa_offset 0 439; CHECK-NEXT: .cfi_restore w30 440; CHECK-NEXT: .cfi_restore w29 441; CHECK-NEXT: ret 442entry: 443 %v = alloca i8, i64 16, align 2048 444 store ptr %v, ptr %out, align 8 445 ret void 446} 447 448; A large(-ish) allocation with a very large alignment requirement, but 449; nevertheless small enough as to not need a loop. 450define void @static_2032_align_2048(ptr %out) #0 { 451; CHECK-LABEL: static_2032_align_2048: 452; CHECK: // %bb.0: // %entry 453; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 454; CHECK-NEXT: .cfi_def_cfa_offset 16 455; CHECK-NEXT: mov x29, sp 456; CHECK-NEXT: .cfi_def_cfa w29, 16 457; CHECK-NEXT: .cfi_offset w30, -8 458; CHECK-NEXT: .cfi_offset w29, -16 459; CHECK-NEXT: sub x9, sp, #2032 460; CHECK-NEXT: and sp, x9, #0xfffffffffffff800 461; CHECK-NEXT: str xzr, [sp] 462; CHECK-NEXT: mov x8, sp 463; CHECK-NEXT: str x8, [x0] 464; CHECK-NEXT: mov sp, x29 465; CHECK-NEXT: .cfi_def_cfa wsp, 16 466; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 467; CHECK-NEXT: .cfi_def_cfa_offset 0 468; CHECK-NEXT: .cfi_restore w30 469; CHECK-NEXT: .cfi_restore w29 470; CHECK-NEXT: ret 471entry: 472 %v = alloca i8, i64 2032, align 2048 473 store ptr %v, ptr %out, align 8 474 ret void 475} 476 477; Test stack probing is enabled by module flags 478define void @static_9232(ptr %out) uwtable(async) { 479; CHECK-LABEL: static_9232: 480; CHECK: // %bb.0: // %entry 481; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 482; CHECK-NEXT: .cfi_def_cfa_offset 16 483; CHECK-NEXT: .cfi_offset w29, -16 484; CHECK-NEXT: sub sp, sp, #2, lsl #12 // =8192 485; CHECK-NEXT: .cfi_def_cfa_offset 8208 486; CHECK-NEXT: sub sp, sp, #800 487; CHECK-NEXT: .cfi_def_cfa_offset 9008 488; CHECK-NEXT: str xzr, [sp], #-240 489; CHECK-NEXT: .cfi_def_cfa_offset 9248 490; CHECK-NEXT: mov x8, sp 491; CHECK-NEXT: str x8, [x0] 492; CHECK-NEXT: add sp, sp, #2, lsl #12 // =8192 493; CHECK-NEXT: .cfi_def_cfa_offset 1056 494; CHECK-NEXT: add sp, sp, #1040 495; CHECK-NEXT: .cfi_def_cfa_offset 16 496; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 497; CHECK-NEXT: .cfi_def_cfa_offset 0 498; CHECK-NEXT: .cfi_restore w29 499; CHECK-NEXT: ret 500entry: 501 %v = alloca i8, i64 9232, align 1 502 store ptr %v, ptr %out, align 8 503 ret void 504} 505 506; Test for a tight upper bound on the amount of stack adjustment 507; due to stack realignment. No probes should appear. 508define void @static_1008(ptr %out) #0 { 509; CHECK-LABEL: static_1008: 510; CHECK: // %bb.0: // %entry 511; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 512; CHECK-NEXT: .cfi_def_cfa_offset 16 513; CHECK-NEXT: mov x29, sp 514; CHECK-NEXT: .cfi_def_cfa w29, 16 515; CHECK-NEXT: .cfi_offset w30, -8 516; CHECK-NEXT: .cfi_offset w29, -16 517; CHECK-NEXT: sub x9, sp, #1008 518; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 519; CHECK-NEXT: mov x8, sp 520; CHECK-NEXT: str x8, [x0] 521; CHECK-NEXT: mov sp, x29 522; CHECK-NEXT: .cfi_def_cfa wsp, 16 523; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 524; CHECK-NEXT: .cfi_def_cfa_offset 0 525; CHECK-NEXT: .cfi_restore w30 526; CHECK-NEXT: .cfi_restore w29 527; CHECK-NEXT: ret 528entry: 529 %v = alloca i8, i32 1008, align 32 530 store ptr %v, ptr %out, align 8 531 ret void 532} 533 534attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "stack-probe-size"="4096" "frame-pointer"="none" } 535 536!llvm.module.flags = !{!0, !1} 537 538!0 = !{i32 4, !"probe-stack", !"inline-asm"} 539!1 = !{i32 8, !"stack-probe-size", i32 9000} 540