xref: /llvm-project/llvm/test/CodeGen/AArch64/stack-probing.ll (revision 3d18c8cd265c0c0bf1d85226c4770a2dd0f86e8f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -enable-post-misched=false | FileCheck %s
3; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -enable-post-misched=false -global-isel | FileCheck %s
4
5; Tests for prolog sequences for stack probing, when using a 4KiB stack guard.
6
7; The stack probing parameters in function attributes take precedence over
8; ones in the module flags.
9
10; Small stack frame, no probing required.
11define void @static_64(ptr %out) #0 {
12; CHECK-LABEL: static_64:
13; CHECK:       // %bb.0: // %entry
14; CHECK-NEXT:    sub sp, sp, #64
15; CHECK-NEXT:    .cfi_def_cfa_offset 64
16; CHECK-NEXT:    mov x8, sp
17; CHECK-NEXT:    str x8, [x0]
18; CHECK-NEXT:    add sp, sp, #64
19; CHECK-NEXT:    .cfi_def_cfa_offset 0
20; CHECK-NEXT:    ret
21entry:
22  %v = alloca i8, i64 64, align 1
23  store ptr %v, ptr %out, align 8
24  ret void
25}
26
27; At 256 bytes we start to always create a frame pointer. No frame smaller then
28; this needs a probe, so we can use the saving of at least one CSR as a probe
29; at the top of our frame.
30define void @static_256(ptr %out) #0 {
31; CHECK-LABEL: static_256:
32; CHECK:       // %bb.0: // %entry
33; CHECK-NEXT:    sub sp, sp, #272
34; CHECK-NEXT:    .cfi_def_cfa_offset 272
35; CHECK-NEXT:    str x29, [sp, #256] // 8-byte Folded Spill
36; CHECK-NEXT:    .cfi_offset w29, -16
37; CHECK-NEXT:    mov x8, sp
38; CHECK-NEXT:    str x8, [x0]
39; CHECK-NEXT:    add sp, sp, #272
40; CHECK-NEXT:    .cfi_def_cfa_offset 0
41; CHECK-NEXT:    .cfi_restore w29
42; CHECK-NEXT:    ret
43entry:
44  %v = alloca i8, i64 256, align 1
45  store ptr %v, ptr %out, align 8
46  ret void
47}
48
49; At 1024 bytes, this is the largest frame which doesn't need probing.
50define void @static_1024(ptr %out) #0 {
51; CHECK-LABEL: static_1024:
52; CHECK:       // %bb.0: // %entry
53; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
54; CHECK-NEXT:    .cfi_def_cfa_offset 16
55; CHECK-NEXT:    .cfi_offset w29, -16
56; CHECK-NEXT:    sub sp, sp, #1024
57; CHECK-NEXT:    .cfi_def_cfa_offset 1040
58; CHECK-NEXT:    mov x8, sp
59; CHECK-NEXT:    str x8, [x0]
60; CHECK-NEXT:    add sp, sp, #1024
61; CHECK-NEXT:    .cfi_def_cfa_offset 16
62; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
63; CHECK-NEXT:    .cfi_def_cfa_offset 0
64; CHECK-NEXT:    .cfi_restore w29
65; CHECK-NEXT:    ret
66entry:
67  %v = alloca i8, i64 1024, align 1
68  store ptr %v, ptr %out, align 8
69  ret void
70}
71
72; At 1024+16 bytes, this is the smallest frame which needs probing.
73define void @static_1040(ptr %out) #0 {
74; CHECK-LABEL: static_1040:
75; CHECK:       // %bb.0: // %entry
76; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
77; CHECK-NEXT:    .cfi_def_cfa_offset 16
78; CHECK-NEXT:    .cfi_offset w29, -16
79; CHECK-NEXT:    sub sp, sp, #1040
80; CHECK-NEXT:    .cfi_def_cfa_offset 1056
81; CHECK-NEXT:    str xzr, [sp]
82; CHECK-NEXT:    mov x8, sp
83; CHECK-NEXT:    str x8, [x0]
84; CHECK-NEXT:    add sp, sp, #1040
85; CHECK-NEXT:    .cfi_def_cfa_offset 16
86; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
87; CHECK-NEXT:    .cfi_def_cfa_offset 0
88; CHECK-NEXT:    .cfi_restore w29
89; CHECK-NEXT:    ret
90entry:
91  %v = alloca i8, i64 1040, align 1
92  store ptr %v, ptr %out, align 8
93  ret void
94}
95
96; 4k bytes is the largest frame we can probe in one go.
97define void @static_4096(ptr %out) #0 {
98; CHECK-LABEL: static_4096:
99; CHECK:       // %bb.0: // %entry
100; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
101; CHECK-NEXT:    .cfi_def_cfa_offset 16
102; CHECK-NEXT:    .cfi_offset w29, -16
103; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
104; CHECK-NEXT:    .cfi_def_cfa_offset 4112
105; CHECK-NEXT:    str xzr, [sp]
106; CHECK-NEXT:    mov x8, sp
107; CHECK-NEXT:    str x8, [x0]
108; CHECK-NEXT:    add sp, sp, #1, lsl #12 // =4096
109; CHECK-NEXT:    .cfi_def_cfa_offset 16
110; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
111; CHECK-NEXT:    .cfi_def_cfa_offset 0
112; CHECK-NEXT:    .cfi_restore w29
113; CHECK-NEXT:    ret
114entry:
115  %v = alloca i8, i64 4096, align 1
116  store ptr %v, ptr %out, align 8
117  ret void
118}
119
120; 4k+16 bytes, still needs just one probe.
121define void @static_4112(ptr %out) #0 {
122; CHECK-LABEL: static_4112:
123; CHECK:       // %bb.0: // %entry
124; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
125; CHECK-NEXT:    .cfi_def_cfa_offset 16
126; CHECK-NEXT:    .cfi_offset w29, -16
127; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
128; CHECK-NEXT:    .cfi_def_cfa_offset 4112
129; CHECK-NEXT:    str xzr, [sp], #-16
130; CHECK-NEXT:    .cfi_def_cfa_offset 4128
131; CHECK-NEXT:    mov x8, sp
132; CHECK-NEXT:    str x8, [x0]
133; CHECK-NEXT:    add sp, sp, #1, lsl #12 // =4096
134; CHECK-NEXT:    .cfi_def_cfa_offset 32
135; CHECK-NEXT:    add sp, sp, #16
136; CHECK-NEXT:    .cfi_def_cfa_offset 16
137; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
138; CHECK-NEXT:    .cfi_def_cfa_offset 0
139; CHECK-NEXT:    .cfi_restore w29
140; CHECK-NEXT:    ret
141entry:
142  %v = alloca i8, i64 4112, align 1
143  store ptr %v, ptr %out, align 8
144  ret void
145}
146
147; 4k+1024 bytes, the largest frame which needs just one probe.
148define void @static_5120(ptr %out) #0 {
149; CHECK-LABEL: static_5120:
150; CHECK:       // %bb.0: // %entry
151; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
152; CHECK-NEXT:    .cfi_def_cfa_offset 16
153; CHECK-NEXT:    .cfi_offset w29, -16
154; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
155; CHECK-NEXT:    .cfi_def_cfa_offset 4112
156; CHECK-NEXT:    str xzr, [sp]
157; CHECK-NEXT:    sub sp, sp, #1024
158; CHECK-NEXT:    .cfi_def_cfa_offset 5136
159; CHECK-NEXT:    mov x8, sp
160; CHECK-NEXT:    str x8, [x0]
161; CHECK-NEXT:    add sp, sp, #1, lsl #12 // =4096
162; CHECK-NEXT:    .cfi_def_cfa_offset 1040
163; CHECK-NEXT:    add sp, sp, #1024
164; CHECK-NEXT:    .cfi_def_cfa_offset 16
165; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
166; CHECK-NEXT:    .cfi_def_cfa_offset 0
167; CHECK-NEXT:    .cfi_restore w29
168; CHECK-NEXT:    ret
169entry:
170  %v = alloca i8, i64 5120, align 1
171  store ptr %v, ptr %out, align 8
172  ret void
173}
174
175; 4k+1024+16, the smallest frame which needs two probes.
176define void @static_5136(ptr %out) #0 {
177; CHECK-LABEL: static_5136:
178; CHECK:       // %bb.0: // %entry
179; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
180; CHECK-NEXT:    .cfi_def_cfa_offset 16
181; CHECK-NEXT:    .cfi_offset w29, -16
182; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
183; CHECK-NEXT:    .cfi_def_cfa_offset 4112
184; CHECK-NEXT:    str xzr, [sp]
185; CHECK-NEXT:    sub sp, sp, #1040
186; CHECK-NEXT:    .cfi_def_cfa_offset 5152
187; CHECK-NEXT:    str xzr, [sp]
188; CHECK-NEXT:    mov x8, sp
189; CHECK-NEXT:    str x8, [x0]
190; CHECK-NEXT:    add sp, sp, #1, lsl #12 // =4096
191; CHECK-NEXT:    .cfi_def_cfa_offset 1056
192; CHECK-NEXT:    add sp, sp, #1040
193; CHECK-NEXT:    .cfi_def_cfa_offset 16
194; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
195; CHECK-NEXT:    .cfi_def_cfa_offset 0
196; CHECK-NEXT:    .cfi_restore w29
197; CHECK-NEXT:    ret
198entry:
199  %v = alloca i8, i64 5136, align 1
200  store ptr %v, ptr %out, align 8
201  ret void
202}
203
204; 2*4k+1024, the largest frame needing two probes
205define void @static_9216(ptr %out) #0 {
206; CHECK-LABEL: static_9216:
207; CHECK:       // %bb.0: // %entry
208; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
209; CHECK-NEXT:    .cfi_def_cfa_offset 16
210; CHECK-NEXT:    .cfi_offset w29, -16
211; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
212; CHECK-NEXT:    .cfi_def_cfa_offset 4112
213; CHECK-NEXT:    str xzr, [sp]
214; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
215; CHECK-NEXT:    .cfi_def_cfa_offset 8208
216; CHECK-NEXT:    str xzr, [sp]
217; CHECK-NEXT:    sub sp, sp, #1024
218; CHECK-NEXT:    .cfi_def_cfa_offset 9232
219; CHECK-NEXT:    mov x8, sp
220; CHECK-NEXT:    str x8, [x0]
221; CHECK-NEXT:    add sp, sp, #2, lsl #12 // =8192
222; CHECK-NEXT:    .cfi_def_cfa_offset 1040
223; CHECK-NEXT:    add sp, sp, #1024
224; CHECK-NEXT:    .cfi_def_cfa_offset 16
225; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
226; CHECK-NEXT:    .cfi_def_cfa_offset 0
227; CHECK-NEXT:    .cfi_restore w29
228; CHECK-NEXT:    ret
229entry:
230  %v = alloca i8, i64 9216, align 1
231  store ptr %v, ptr %out, align 8
232  ret void
233}
234
235; 5*4k-16, the largest frame probed without a loop
236define void @static_20464(ptr %out) #0 {
237; CHECK-LABEL: static_20464:
238; CHECK:       // %bb.0: // %entry
239; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
240; CHECK-NEXT:    .cfi_def_cfa_offset 16
241; CHECK-NEXT:    .cfi_offset w29, -16
242; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
243; CHECK-NEXT:    .cfi_def_cfa_offset 4112
244; CHECK-NEXT:    str xzr, [sp]
245; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
246; CHECK-NEXT:    .cfi_def_cfa_offset 8208
247; CHECK-NEXT:    str xzr, [sp]
248; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
249; CHECK-NEXT:    .cfi_def_cfa_offset 12304
250; CHECK-NEXT:    str xzr, [sp]
251; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
252; CHECK-NEXT:    .cfi_def_cfa_offset 16400
253; CHECK-NEXT:    str xzr, [sp]
254; CHECK-NEXT:    sub sp, sp, #4080
255; CHECK-NEXT:    .cfi_def_cfa_offset 20480
256; CHECK-NEXT:    str xzr, [sp]
257; CHECK-NEXT:    mov x8, sp
258; CHECK-NEXT:    str x8, [x0]
259; CHECK-NEXT:    add sp, sp, #4, lsl #12 // =16384
260; CHECK-NEXT:    .cfi_def_cfa_offset 4096
261; CHECK-NEXT:    add sp, sp, #4080
262; CHECK-NEXT:    .cfi_def_cfa_offset 16
263; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
264; CHECK-NEXT:    .cfi_def_cfa_offset 0
265; CHECK-NEXT:    .cfi_restore w29
266; CHECK-NEXT:    ret
267entry:
268  %v = alloca i8, i64 20464, align 1
269  store ptr %v, ptr %out, align 8
270  ret void
271}
272
273; 5*4k, the smallest frame probed with a loop
274define void @static_20480(ptr %out) #0 {
275; CHECK-LABEL: static_20480:
276; CHECK:       // %bb.0: // %entry
277; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
278; CHECK-NEXT:    .cfi_def_cfa_offset 16
279; CHECK-NEXT:    .cfi_offset w29, -16
280; CHECK-NEXT:    sub x9, sp, #5, lsl #12 // =20480
281; CHECK-NEXT:    .cfi_def_cfa w9, 20496
282; CHECK-NEXT:  .LBB10_1: // %entry
283; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
284; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
285; CHECK-NEXT:    str xzr, [sp]
286; CHECK-NEXT:    cmp sp, x9
287; CHECK-NEXT:    b.ne .LBB10_1
288; CHECK-NEXT:  // %bb.2: // %entry
289; CHECK-NEXT:    .cfi_def_cfa_register wsp
290; CHECK-NEXT:    mov x8, sp
291; CHECK-NEXT:    str x8, [x0]
292; CHECK-NEXT:    add sp, sp, #5, lsl #12 // =20480
293; CHECK-NEXT:    .cfi_def_cfa_offset 16
294; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
295; CHECK-NEXT:    .cfi_def_cfa_offset 0
296; CHECK-NEXT:    .cfi_restore w29
297; CHECK-NEXT:    ret
298entry:
299  %v = alloca i8, i64 20480, align 1
300  store ptr %v, ptr %out, align 8
301  ret void
302}
303
304; 5*4k + 1024, large enough to use a loop, but not a multiple of 4KiB
305; so has a reminder, but no extra probe.
306define void @static_21504(ptr %out) #0 {
307; CHECK-LABEL: static_21504:
308; CHECK:       // %bb.0: // %entry
309; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
310; CHECK-NEXT:    .cfi_def_cfa_offset 16
311; CHECK-NEXT:    .cfi_offset w29, -16
312; CHECK-NEXT:    sub x9, sp, #5, lsl #12 // =20480
313; CHECK-NEXT:    .cfi_def_cfa w9, 20496
314; CHECK-NEXT:  .LBB11_1: // %entry
315; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
316; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
317; CHECK-NEXT:    str xzr, [sp]
318; CHECK-NEXT:    cmp sp, x9
319; CHECK-NEXT:    b.ne .LBB11_1
320; CHECK-NEXT:  // %bb.2: // %entry
321; CHECK-NEXT:    .cfi_def_cfa_register wsp
322; CHECK-NEXT:    sub sp, sp, #1024
323; CHECK-NEXT:    .cfi_def_cfa_offset 21520
324; CHECK-NEXT:    mov x8, sp
325; CHECK-NEXT:    str x8, [x0]
326; CHECK-NEXT:    add sp, sp, #5, lsl #12 // =20480
327; CHECK-NEXT:    .cfi_def_cfa_offset 1040
328; CHECK-NEXT:    add sp, sp, #1024
329; CHECK-NEXT:    .cfi_def_cfa_offset 16
330; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
331; CHECK-NEXT:    .cfi_def_cfa_offset 0
332; CHECK-NEXT:    .cfi_restore w29
333; CHECK-NEXT:    ret
334entry:
335  %v = alloca i8, i64 21504, align 1
336  store ptr %v, ptr %out, align 8
337  ret void
338}
339
340; 5*4k+1040, large enough to use a loop, has a reminder and
341; an extra probe.
342define void @static_21520(ptr %out) #0 {
343; CHECK-LABEL: static_21520:
344; CHECK:       // %bb.0: // %entry
345; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
346; CHECK-NEXT:    .cfi_def_cfa_offset 16
347; CHECK-NEXT:    .cfi_offset w29, -16
348; CHECK-NEXT:    sub x9, sp, #5, lsl #12 // =20480
349; CHECK-NEXT:    .cfi_def_cfa w9, 20496
350; CHECK-NEXT:  .LBB12_1: // %entry
351; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
352; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
353; CHECK-NEXT:    str xzr, [sp]
354; CHECK-NEXT:    cmp sp, x9
355; CHECK-NEXT:    b.ne .LBB12_1
356; CHECK-NEXT:  // %bb.2: // %entry
357; CHECK-NEXT:    .cfi_def_cfa_register wsp
358; CHECK-NEXT:    sub sp, sp, #1040
359; CHECK-NEXT:    .cfi_def_cfa_offset 21536
360; CHECK-NEXT:    str xzr, [sp]
361; CHECK-NEXT:    mov x8, sp
362; CHECK-NEXT:    str x8, [x0]
363; CHECK-NEXT:    add sp, sp, #5, lsl #12 // =20480
364; CHECK-NEXT:    .cfi_def_cfa_offset 1056
365; CHECK-NEXT:    add sp, sp, #1040
366; CHECK-NEXT:    .cfi_def_cfa_offset 16
367; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
368; CHECK-NEXT:    .cfi_def_cfa_offset 0
369; CHECK-NEXT:    .cfi_restore w29
370; CHECK-NEXT:    ret
371entry:
372  %v = alloca i8, i64 21520, align 1
373  store ptr %v, ptr %out, align 8
374  ret void
375}
376
377; A small allocation, but with a very large alignment requirement. We do this
378; by moving SP far enough that a sufficiently-aligned block will exist
379; somewhere in the stack frame, so must probe the whole of that larger SP move.
380define void @static_16_align_8192(ptr %out) #0 {
381; CHECK-LABEL: static_16_align_8192:
382; CHECK:       // %bb.0: // %entry
383; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
384; CHECK-NEXT:    .cfi_def_cfa_offset 16
385; CHECK-NEXT:    mov x29, sp
386; CHECK-NEXT:    .cfi_def_cfa w29, 16
387; CHECK-NEXT:    .cfi_offset w30, -8
388; CHECK-NEXT:    .cfi_offset w29, -16
389; CHECK-NEXT:    sub x9, sp, #1, lsl #12 // =4096
390; CHECK-NEXT:    sub x9, x9, #4080
391; CHECK-NEXT:    and x9, x9, #0xffffffffffffe000
392; CHECK-NEXT:  .LBB13_1: // %entry
393; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
394; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
395; CHECK-NEXT:    cmp sp, x9
396; CHECK-NEXT:    b.le .LBB13_3
397; CHECK-NEXT:  // %bb.2: // %entry
398; CHECK-NEXT:    // in Loop: Header=BB13_1 Depth=1
399; CHECK-NEXT:    str xzr, [sp]
400; CHECK-NEXT:    b .LBB13_1
401; CHECK-NEXT:  .LBB13_3: // %entry
402; CHECK-NEXT:    mov sp, x9
403; CHECK-NEXT:    ldr xzr, [sp]
404; CHECK-NEXT:    mov x8, sp
405; CHECK-NEXT:    str x8, [x0]
406; CHECK-NEXT:    mov sp, x29
407; CHECK-NEXT:    .cfi_def_cfa wsp, 16
408; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
409; CHECK-NEXT:    .cfi_def_cfa_offset 0
410; CHECK-NEXT:    .cfi_restore w30
411; CHECK-NEXT:    .cfi_restore w29
412; CHECK-NEXT:    ret
413entry:
414  %v = alloca i8, i64 16, align 8192
415  store ptr %v, ptr %out, align 8
416  ret void
417}
418
419; A small allocation with a very large alignment requirement, but
420; nevertheless small enough as to not need a loop.
421define void @static_16_align_2048(ptr %out) #0 {
422; CHECK-LABEL: static_16_align_2048:
423; CHECK:       // %bb.0: // %entry
424; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
425; CHECK-NEXT:    .cfi_def_cfa_offset 16
426; CHECK-NEXT:    mov x29, sp
427; CHECK-NEXT:    .cfi_def_cfa w29, 16
428; CHECK-NEXT:    .cfi_offset w30, -8
429; CHECK-NEXT:    .cfi_offset w29, -16
430; CHECK-NEXT:    sub x9, sp, #2032
431; CHECK-NEXT:    and sp, x9, #0xfffffffffffff800
432; CHECK-NEXT:    str xzr, [sp]
433; CHECK-NEXT:    mov x8, sp
434; CHECK-NEXT:    str x8, [x0]
435; CHECK-NEXT:    mov sp, x29
436; CHECK-NEXT:    .cfi_def_cfa wsp, 16
437; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
438; CHECK-NEXT:    .cfi_def_cfa_offset 0
439; CHECK-NEXT:    .cfi_restore w30
440; CHECK-NEXT:    .cfi_restore w29
441; CHECK-NEXT:    ret
442entry:
443  %v = alloca i8, i64 16, align 2048
444  store ptr %v, ptr %out, align 8
445  ret void
446}
447
448; A large(-ish) allocation with a very large alignment requirement, but
449; nevertheless small enough as to not need a loop.
450define void @static_2032_align_2048(ptr %out) #0 {
451; CHECK-LABEL: static_2032_align_2048:
452; CHECK:       // %bb.0: // %entry
453; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
454; CHECK-NEXT:    .cfi_def_cfa_offset 16
455; CHECK-NEXT:    mov x29, sp
456; CHECK-NEXT:    .cfi_def_cfa w29, 16
457; CHECK-NEXT:    .cfi_offset w30, -8
458; CHECK-NEXT:    .cfi_offset w29, -16
459; CHECK-NEXT:    sub x9, sp, #2032
460; CHECK-NEXT:    and sp, x9, #0xfffffffffffff800
461; CHECK-NEXT:    str xzr, [sp]
462; CHECK-NEXT:    mov x8, sp
463; CHECK-NEXT:    str x8, [x0]
464; CHECK-NEXT:    mov sp, x29
465; CHECK-NEXT:    .cfi_def_cfa wsp, 16
466; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
467; CHECK-NEXT:    .cfi_def_cfa_offset 0
468; CHECK-NEXT:    .cfi_restore w30
469; CHECK-NEXT:    .cfi_restore w29
470; CHECK-NEXT:    ret
471entry:
472  %v = alloca i8, i64 2032, align 2048
473  store ptr %v, ptr %out, align 8
474  ret void
475}
476
477; Test stack probing is enabled by module flags
478define void @static_9232(ptr %out) uwtable(async) {
479; CHECK-LABEL: static_9232:
480; CHECK:       // %bb.0: // %entry
481; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
482; CHECK-NEXT:    .cfi_def_cfa_offset 16
483; CHECK-NEXT:    .cfi_offset w29, -16
484; CHECK-NEXT:    sub sp, sp, #2, lsl #12 // =8192
485; CHECK-NEXT:    .cfi_def_cfa_offset 8208
486; CHECK-NEXT:    sub sp, sp, #800
487; CHECK-NEXT:    .cfi_def_cfa_offset 9008
488; CHECK-NEXT:    str xzr, [sp], #-240
489; CHECK-NEXT:    .cfi_def_cfa_offset 9248
490; CHECK-NEXT:    mov x8, sp
491; CHECK-NEXT:    str x8, [x0]
492; CHECK-NEXT:    add sp, sp, #2, lsl #12 // =8192
493; CHECK-NEXT:    .cfi_def_cfa_offset 1056
494; CHECK-NEXT:    add sp, sp, #1040
495; CHECK-NEXT:    .cfi_def_cfa_offset 16
496; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
497; CHECK-NEXT:    .cfi_def_cfa_offset 0
498; CHECK-NEXT:    .cfi_restore w29
499; CHECK-NEXT:    ret
500entry:
501  %v = alloca i8, i64 9232, align 1
502  store ptr %v, ptr %out, align 8
503  ret void
504}
505
506; Test for a tight upper bound on the amount of stack adjustment
507; due to stack realignment. No probes should appear.
508define void @static_1008(ptr %out) #0 {
509; CHECK-LABEL: static_1008:
510; CHECK:       // %bb.0: // %entry
511; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
512; CHECK-NEXT:    .cfi_def_cfa_offset 16
513; CHECK-NEXT:    mov x29, sp
514; CHECK-NEXT:    .cfi_def_cfa w29, 16
515; CHECK-NEXT:    .cfi_offset w30, -8
516; CHECK-NEXT:    .cfi_offset w29, -16
517; CHECK-NEXT:    sub x9, sp, #1008
518; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
519; CHECK-NEXT:    mov x8, sp
520; CHECK-NEXT:    str x8, [x0]
521; CHECK-NEXT:    mov sp, x29
522; CHECK-NEXT:    .cfi_def_cfa wsp, 16
523; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
524; CHECK-NEXT:    .cfi_def_cfa_offset 0
525; CHECK-NEXT:    .cfi_restore w30
526; CHECK-NEXT:    .cfi_restore w29
527; CHECK-NEXT:    ret
528entry:
529  %v = alloca i8, i32 1008, align 32
530  store ptr %v, ptr %out, align 8
531  ret void
532}
533
534attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "stack-probe-size"="4096" "frame-pointer"="none" }
535
536!llvm.module.flags = !{!0, !1}
537
538!0 = !{i32 4, !"probe-stack", !"inline-asm"}
539!1 = !{i32 8, !"stack-probe-size", i32 9000}
540