xref: /llvm-project/llvm/test/CodeGen/AArch64/stack-probing-64k.ll (revision 3d18c8cd265c0c0bf1d85226c4770a2dd0f86e8f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -enable-post-misched=false | FileCheck %s
3; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -enable-post-misched=false -global-isel | FileCheck %s
4
5; Tests for prolog sequences for stack probing, when using a 64KiB stack guard.
6
7; 64k bytes is the largest frame we can probe in one go.
8define void @static_65536(ptr %out) #0 {
9; CHECK-LABEL: static_65536:
10; CHECK:       // %bb.0: // %entry
11; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
12; CHECK-NEXT:    .cfi_def_cfa_offset 16
13; CHECK-NEXT:    .cfi_offset w29, -16
14; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
15; CHECK-NEXT:    .cfi_def_cfa_offset 65552
16; CHECK-NEXT:    str xzr, [sp]
17; CHECK-NEXT:    mov x8, sp
18; CHECK-NEXT:    str x8, [x0]
19; CHECK-NEXT:    add sp, sp, #16, lsl #12 // =65536
20; CHECK-NEXT:    .cfi_def_cfa_offset 16
21; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
22; CHECK-NEXT:    .cfi_def_cfa_offset 0
23; CHECK-NEXT:    .cfi_restore w29
24; CHECK-NEXT:    ret
25entry:
26  %v = alloca i8, i64 65536, align 1
27  store ptr %v, ptr %out, align 8
28  ret void
29}
30
31; 64k+16 bytes, still needs just one probe.
32define void @static_65552(ptr %out) #0 {
33; CHECK-LABEL: static_65552:
34; CHECK:       // %bb.0: // %entry
35; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
36; CHECK-NEXT:    .cfi_def_cfa_offset 16
37; CHECK-NEXT:    .cfi_offset w29, -16
38; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
39; CHECK-NEXT:    .cfi_def_cfa_offset 65552
40; CHECK-NEXT:    str xzr, [sp], #-16
41; CHECK-NEXT:    .cfi_def_cfa_offset 65568
42; CHECK-NEXT:    mov x8, sp
43; CHECK-NEXT:    str x8, [x0]
44; CHECK-NEXT:    add sp, sp, #16, lsl #12 // =65536
45; CHECK-NEXT:    .cfi_def_cfa_offset 32
46; CHECK-NEXT:    add sp, sp, #16
47; CHECK-NEXT:    .cfi_def_cfa_offset 16
48; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
49; CHECK-NEXT:    .cfi_def_cfa_offset 0
50; CHECK-NEXT:    .cfi_restore w29
51; CHECK-NEXT:    ret
52entry:
53  %v = alloca i8, i64 65552, align 1
54  store ptr %v, ptr %out, align 8
55  ret void
56}
57
58; 64k+1024 bytes, the largest frame which needs just one probe.
59define void @static_66560(ptr %out) #0 {
60; CHECK-LABEL: static_66560:
61; CHECK:       // %bb.0: // %entry
62; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
63; CHECK-NEXT:    .cfi_def_cfa_offset 16
64; CHECK-NEXT:    .cfi_offset w29, -16
65; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
66; CHECK-NEXT:    .cfi_def_cfa_offset 65552
67; CHECK-NEXT:    str xzr, [sp]
68; CHECK-NEXT:    sub sp, sp, #1024
69; CHECK-NEXT:    .cfi_def_cfa_offset 66576
70; CHECK-NEXT:    mov x8, sp
71; CHECK-NEXT:    str x8, [x0]
72; CHECK-NEXT:    add sp, sp, #16, lsl #12 // =65536
73; CHECK-NEXT:    .cfi_def_cfa_offset 1040
74; CHECK-NEXT:    add sp, sp, #1024
75; CHECK-NEXT:    .cfi_def_cfa_offset 16
76; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
77; CHECK-NEXT:    .cfi_def_cfa_offset 0
78; CHECK-NEXT:    .cfi_restore w29
79; CHECK-NEXT:    ret
80entry:
81  %v = alloca i8, i64 66560, align 1
82  store ptr %v, ptr %out, align 8
83  ret void
84}
85
86; 64k+1024+16 bytes, the smallest frame which needs two probes.
87define void @static_66576(ptr %out) #0 {
88; CHECK-LABEL: static_66576:
89; CHECK:       // %bb.0: // %entry
90; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
91; CHECK-NEXT:    .cfi_def_cfa_offset 16
92; CHECK-NEXT:    .cfi_offset w29, -16
93; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
94; CHECK-NEXT:    .cfi_def_cfa_offset 65552
95; CHECK-NEXT:    str xzr, [sp]
96; CHECK-NEXT:    sub sp, sp, #1040
97; CHECK-NEXT:    .cfi_def_cfa_offset 66592
98; CHECK-NEXT:    str xzr, [sp]
99; CHECK-NEXT:    mov x8, sp
100; CHECK-NEXT:    str x8, [x0]
101; CHECK-NEXT:    add sp, sp, #16, lsl #12 // =65536
102; CHECK-NEXT:    .cfi_def_cfa_offset 1056
103; CHECK-NEXT:    add sp, sp, #1040
104; CHECK-NEXT:    .cfi_def_cfa_offset 16
105; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
106; CHECK-NEXT:    .cfi_def_cfa_offset 0
107; CHECK-NEXT:    .cfi_restore w29
108; CHECK-NEXT:    ret
109entry:
110  %v = alloca i8, i64 66576, align 1
111  store ptr %v, ptr %out, align 8
112  ret void
113}
114
115; 2*64k+1024, the largest frame needing two probes.
116define void @static_132096(ptr %out) #0 {
117; CHECK-LABEL: static_132096:
118; CHECK:       // %bb.0: // %entry
119; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
120; CHECK-NEXT:    .cfi_def_cfa_offset 16
121; CHECK-NEXT:    .cfi_offset w29, -16
122; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
123; CHECK-NEXT:    .cfi_def_cfa_offset 65552
124; CHECK-NEXT:    str xzr, [sp]
125; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
126; CHECK-NEXT:    .cfi_def_cfa_offset 131088
127; CHECK-NEXT:    str xzr, [sp]
128; CHECK-NEXT:    sub sp, sp, #1024
129; CHECK-NEXT:    .cfi_def_cfa_offset 132112
130; CHECK-NEXT:    mov x8, sp
131; CHECK-NEXT:    str x8, [x0]
132; CHECK-NEXT:    add sp, sp, #32, lsl #12 // =131072
133; CHECK-NEXT:    .cfi_def_cfa_offset 1040
134; CHECK-NEXT:    add sp, sp, #1024
135; CHECK-NEXT:    .cfi_def_cfa_offset 16
136; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
137; CHECK-NEXT:    .cfi_def_cfa_offset 0
138; CHECK-NEXT:    .cfi_restore w29
139; CHECK-NEXT:    ret
140entry:
141  %v = alloca i8, i64 132096, align 1
142  store ptr %v, ptr %out, align 8
143  ret void
144}
145
146; 5*64k-16, the largest frame probed without a loop.
147define void @static_327664(ptr %out) #0 {
148; CHECK-LABEL: static_327664:
149; CHECK:       // %bb.0: // %entry
150; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
151; CHECK-NEXT:    .cfi_def_cfa_offset 16
152; CHECK-NEXT:    .cfi_offset w29, -16
153; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
154; CHECK-NEXT:    .cfi_def_cfa_offset 65552
155; CHECK-NEXT:    str xzr, [sp]
156; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
157; CHECK-NEXT:    .cfi_def_cfa_offset 131088
158; CHECK-NEXT:    str xzr, [sp]
159; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
160; CHECK-NEXT:    .cfi_def_cfa_offset 196624
161; CHECK-NEXT:    str xzr, [sp]
162; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
163; CHECK-NEXT:    .cfi_def_cfa_offset 262160
164; CHECK-NEXT:    str xzr, [sp]
165; CHECK-NEXT:    sub sp, sp, #15, lsl #12 // =61440
166; CHECK-NEXT:    .cfi_def_cfa_offset 323600
167; CHECK-NEXT:    sub sp, sp, #4080
168; CHECK-NEXT:    .cfi_def_cfa_offset 327680
169; CHECK-NEXT:    str xzr, [sp]
170; CHECK-NEXT:    mov x8, sp
171; CHECK-NEXT:    str x8, [x0]
172; CHECK-NEXT:    add sp, sp, #79, lsl #12 // =323584
173; CHECK-NEXT:    .cfi_def_cfa_offset 4096
174; CHECK-NEXT:    add sp, sp, #4080
175; CHECK-NEXT:    .cfi_def_cfa_offset 16
176; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
177; CHECK-NEXT:    .cfi_def_cfa_offset 0
178; CHECK-NEXT:    .cfi_restore w29
179; CHECK-NEXT:    ret
180entry:
181  %v = alloca i8, i64 327664, align 1
182  store ptr %v, ptr %out, align 8
183  ret void
184}
185
186; 5*64k, smallest frame probed with a loop.
187define void @static_327680(ptr %out) #0 {
188; CHECK-LABEL: static_327680:
189; CHECK:       // %bb.0: // %entry
190; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
191; CHECK-NEXT:    .cfi_def_cfa_offset 16
192; CHECK-NEXT:    .cfi_offset w29, -16
193; CHECK-NEXT:    sub x9, sp, #80, lsl #12 // =327680
194; CHECK-NEXT:    .cfi_def_cfa w9, 327696
195; CHECK-NEXT:  .LBB6_1: // %entry
196; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
197; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
198; CHECK-NEXT:    str xzr, [sp]
199; CHECK-NEXT:    cmp sp, x9
200; CHECK-NEXT:    b.ne .LBB6_1
201; CHECK-NEXT:  // %bb.2: // %entry
202; CHECK-NEXT:    .cfi_def_cfa_register wsp
203; CHECK-NEXT:    mov x8, sp
204; CHECK-NEXT:    str x8, [x0]
205; CHECK-NEXT:    add sp, sp, #80, lsl #12 // =327680
206; CHECK-NEXT:    .cfi_def_cfa_offset 16
207; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
208; CHECK-NEXT:    .cfi_def_cfa_offset 0
209; CHECK-NEXT:    .cfi_restore w29
210; CHECK-NEXT:    ret
211entry:
212  %v = alloca i8, i64 327680, align 1
213  store ptr %v, ptr %out, align 8
214  ret void
215}
216
217; 5*64k+1024, large enough to use a loop, but not a multiple of 64KiB
218; so has a reminder, but no extra probe.
219define void @static_328704(ptr %out) #0 {
220; CHECK-LABEL: static_328704:
221; CHECK:       // %bb.0: // %entry
222; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
223; CHECK-NEXT:    .cfi_def_cfa_offset 16
224; CHECK-NEXT:    .cfi_offset w29, -16
225; CHECK-NEXT:    sub x9, sp, #80, lsl #12 // =327680
226; CHECK-NEXT:    .cfi_def_cfa w9, 327696
227; CHECK-NEXT:  .LBB7_1: // %entry
228; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
229; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
230; CHECK-NEXT:    str xzr, [sp]
231; CHECK-NEXT:    cmp sp, x9
232; CHECK-NEXT:    b.ne .LBB7_1
233; CHECK-NEXT:  // %bb.2: // %entry
234; CHECK-NEXT:    .cfi_def_cfa_register wsp
235; CHECK-NEXT:    sub sp, sp, #1024
236; CHECK-NEXT:    .cfi_def_cfa_offset 328720
237; CHECK-NEXT:    mov x8, sp
238; CHECK-NEXT:    str x8, [x0]
239; CHECK-NEXT:    add sp, sp, #80, lsl #12 // =327680
240; CHECK-NEXT:    .cfi_def_cfa_offset 1040
241; CHECK-NEXT:    add sp, sp, #1024
242; CHECK-NEXT:    .cfi_def_cfa_offset 16
243; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
244; CHECK-NEXT:    .cfi_def_cfa_offset 0
245; CHECK-NEXT:    .cfi_restore w29
246; CHECK-NEXT:    ret
247entry:
248  %v = alloca i8, i64 328704, align 1
249  store ptr %v, ptr %out, align 8
250  ret void
251}
252
253; 5*64k+1040, large enough to use a loop, has a reminder and
254; an extra probe.
255define void @static_328720(ptr %out) #0 {
256; CHECK-LABEL: static_328720:
257; CHECK:       // %bb.0: // %entry
258; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
259; CHECK-NEXT:    .cfi_def_cfa_offset 16
260; CHECK-NEXT:    .cfi_offset w29, -16
261; CHECK-NEXT:    sub x9, sp, #80, lsl #12 // =327680
262; CHECK-NEXT:    .cfi_def_cfa w9, 327696
263; CHECK-NEXT:  .LBB8_1: // %entry
264; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
265; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
266; CHECK-NEXT:    str xzr, [sp]
267; CHECK-NEXT:    cmp sp, x9
268; CHECK-NEXT:    b.ne .LBB8_1
269; CHECK-NEXT:  // %bb.2: // %entry
270; CHECK-NEXT:    .cfi_def_cfa_register wsp
271; CHECK-NEXT:    sub sp, sp, #1040
272; CHECK-NEXT:    .cfi_def_cfa_offset 328736
273; CHECK-NEXT:    str xzr, [sp]
274; CHECK-NEXT:    mov x8, sp
275; CHECK-NEXT:    str x8, [x0]
276; CHECK-NEXT:    add sp, sp, #80, lsl #12 // =327680
277; CHECK-NEXT:    .cfi_def_cfa_offset 1056
278; CHECK-NEXT:    add sp, sp, #1040
279; CHECK-NEXT:    .cfi_def_cfa_offset 16
280; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
281; CHECK-NEXT:    .cfi_def_cfa_offset 0
282; CHECK-NEXT:    .cfi_restore w29
283; CHECK-NEXT:    ret
284entry:
285  %v = alloca i8, i64 328720, align 1
286  store ptr %v, ptr %out, align 8
287  ret void
288}
289
290; A small allocation, but with a very large alignment requirement. We do this
291; by moving SP far enough that a sufficiently-aligned block will exist
292; somewhere in the stack frame, so must probe the whole of that larger SP move.
293define void @static_16_align_131072(ptr %out) #0 {
294; CHECK-LABEL: static_16_align_131072:
295; CHECK:       // %bb.0: // %entry
296; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
297; CHECK-NEXT:    .cfi_def_cfa_offset 16
298; CHECK-NEXT:    mov x29, sp
299; CHECK-NEXT:    .cfi_def_cfa w29, 16
300; CHECK-NEXT:    .cfi_offset w30, -8
301; CHECK-NEXT:    .cfi_offset w29, -16
302; CHECK-NEXT:    sub x9, sp, #31, lsl #12 // =126976
303; CHECK-NEXT:    sub x9, x9, #4080
304; CHECK-NEXT:    and x9, x9, #0xfffffffffffe0000
305; CHECK-NEXT:  .LBB9_1: // %entry
306; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
307; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
308; CHECK-NEXT:    cmp sp, x9
309; CHECK-NEXT:    b.le .LBB9_3
310; CHECK-NEXT:  // %bb.2: // %entry
311; CHECK-NEXT:    // in Loop: Header=BB9_1 Depth=1
312; CHECK-NEXT:    str xzr, [sp]
313; CHECK-NEXT:    b .LBB9_1
314; CHECK-NEXT:  .LBB9_3: // %entry
315; CHECK-NEXT:    mov sp, x9
316; CHECK-NEXT:    ldr xzr, [sp]
317; CHECK-NEXT:    mov x8, sp
318; CHECK-NEXT:    str x8, [x0]
319; CHECK-NEXT:    mov sp, x29
320; CHECK-NEXT:    .cfi_def_cfa wsp, 16
321; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
322; CHECK-NEXT:    .cfi_def_cfa_offset 0
323; CHECK-NEXT:    .cfi_restore w30
324; CHECK-NEXT:    .cfi_restore w29
325; CHECK-NEXT:    ret
326entry:
327  %v = alloca i8, i64 16, align 131072
328  store ptr %v, ptr %out, align 8
329  ret void
330}
331
332; A small allocation, but with a very large alignment requirement which
333; is nevertheless small enough as to not need a loop.
334define void @static_16_align_8192(ptr %out) #0 {
335; CHECK-LABEL: static_16_align_8192:
336; CHECK:       // %bb.0: // %entry
337; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
338; CHECK-NEXT:    .cfi_def_cfa_offset 16
339; CHECK-NEXT:    mov x29, sp
340; CHECK-NEXT:    .cfi_def_cfa w29, 16
341; CHECK-NEXT:    .cfi_offset w30, -8
342; CHECK-NEXT:    .cfi_offset w29, -16
343; CHECK-NEXT:    sub x9, sp, #1, lsl #12 // =4096
344; CHECK-NEXT:    sub x9, x9, #4080
345; CHECK-NEXT:    and sp, x9, #0xffffffffffffe000
346; CHECK-NEXT:    str xzr, [sp]
347; CHECK-NEXT:    mov x8, sp
348; CHECK-NEXT:    str x8, [x0]
349; CHECK-NEXT:    mov sp, x29
350; CHECK-NEXT:    .cfi_def_cfa wsp, 16
351; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
352; CHECK-NEXT:    .cfi_def_cfa_offset 0
353; CHECK-NEXT:    .cfi_restore w30
354; CHECK-NEXT:    .cfi_restore w29
355; CHECK-NEXT:    ret
356entry:
357  %v = alloca i8, i64 16, align 8192
358  store ptr %v, ptr %out, align 8
359  ret void
360}
361
362; A large allocation with a very large alignment requirement which
363; is nevertheless small enough as to not need a loop.
364define void @static_32752_align_32k(ptr %out) #0 {
365; CHECK-LABEL: static_32752_align_32k:
366; CHECK:       // %bb.0: // %entry
367; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
368; CHECK-NEXT:    .cfi_def_cfa_offset 16
369; CHECK-NEXT:    mov x29, sp
370; CHECK-NEXT:    .cfi_def_cfa w29, 16
371; CHECK-NEXT:    .cfi_offset w30, -8
372; CHECK-NEXT:    .cfi_offset w29, -16
373; CHECK-NEXT:    sub x9, sp, #7, lsl #12 // =28672
374; CHECK-NEXT:    sub x9, x9, #4080
375; CHECK-NEXT:    and sp, x9, #0xffffffffffff8000
376; CHECK-NEXT:    str xzr, [sp]
377; CHECK-NEXT:    mov x8, sp
378; CHECK-NEXT:    str x8, [x0]
379; CHECK-NEXT:    mov sp, x29
380; CHECK-NEXT:    .cfi_def_cfa wsp, 16
381; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
382; CHECK-NEXT:    .cfi_def_cfa_offset 0
383; CHECK-NEXT:    .cfi_restore w30
384; CHECK-NEXT:    .cfi_restore w29
385; CHECK-NEXT:    ret
386entry:
387  %v = alloca i8, i64 32752, align 32768
388  store ptr %v, ptr %out, align 8
389  ret void
390}
391
392attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "stack-probe-size"="65536" "frame-pointer"="none" }
393