xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll (revision 01d7f434d21a70158094a9c7da971ce9e0d0915c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
3; RUN:   | FileCheck %s -check-prefix=RV64I
4; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \
5; RUN:   | FileCheck %s -check-prefix=RV32I
6
7; Tests copied from AArch64.
8
9; Dynamically-sized allocation, needs a loop which can handle any size at
10; runtime. The final iteration of the loop will temporarily put SP below the
11; target address, but this doesn't break any of the ABI constraints on the
12; stack, and also doesn't probe below the target SP value.
13define void @dynamic(i64 %size, ptr %out) #0 {
14; RV64I-LABEL: dynamic:
15; RV64I:       # %bb.0:
16; RV64I-NEXT:    addi sp, sp, -16
17; RV64I-NEXT:    .cfi_def_cfa_offset 16
18; RV64I-NEXT:    sd zero, 0(sp)
19; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
20; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
21; RV64I-NEXT:    .cfi_offset ra, -8
22; RV64I-NEXT:    .cfi_offset s0, -16
23; RV64I-NEXT:    addi s0, sp, 16
24; RV64I-NEXT:    .cfi_def_cfa s0, 0
25; RV64I-NEXT:    addi a0, a0, 15
26; RV64I-NEXT:    andi a0, a0, -16
27; RV64I-NEXT:    sub a0, sp, a0
28; RV64I-NEXT:    lui a2, 1
29; RV64I-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
30; RV64I-NEXT:    sub sp, sp, a2
31; RV64I-NEXT:    sd zero, 0(sp)
32; RV64I-NEXT:    blt a0, sp, .LBB0_1
33; RV64I-NEXT:  # %bb.2:
34; RV64I-NEXT:    mv sp, a0
35; RV64I-NEXT:    sd a0, 0(a1)
36; RV64I-NEXT:    addi sp, s0, -16
37; RV64I-NEXT:    .cfi_def_cfa sp, 16
38; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
39; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
40; RV64I-NEXT:    .cfi_restore ra
41; RV64I-NEXT:    .cfi_restore s0
42; RV64I-NEXT:    addi sp, sp, 16
43; RV64I-NEXT:    .cfi_def_cfa_offset 0
44; RV64I-NEXT:    ret
45;
46; RV32I-LABEL: dynamic:
47; RV32I:       # %bb.0:
48; RV32I-NEXT:    addi sp, sp, -16
49; RV32I-NEXT:    .cfi_def_cfa_offset 16
50; RV32I-NEXT:    sw zero, 0(sp)
51; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
52; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
53; RV32I-NEXT:    .cfi_offset ra, -4
54; RV32I-NEXT:    .cfi_offset s0, -8
55; RV32I-NEXT:    addi s0, sp, 16
56; RV32I-NEXT:    .cfi_def_cfa s0, 0
57; RV32I-NEXT:    addi a0, a0, 15
58; RV32I-NEXT:    andi a0, a0, -16
59; RV32I-NEXT:    sub a0, sp, a0
60; RV32I-NEXT:    lui a1, 1
61; RV32I-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
62; RV32I-NEXT:    sub sp, sp, a1
63; RV32I-NEXT:    sw zero, 0(sp)
64; RV32I-NEXT:    blt a0, sp, .LBB0_1
65; RV32I-NEXT:  # %bb.2:
66; RV32I-NEXT:    mv sp, a0
67; RV32I-NEXT:    sw a0, 0(a2)
68; RV32I-NEXT:    addi sp, s0, -16
69; RV32I-NEXT:    .cfi_def_cfa sp, 16
70; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
71; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
72; RV32I-NEXT:    .cfi_restore ra
73; RV32I-NEXT:    .cfi_restore s0
74; RV32I-NEXT:    addi sp, sp, 16
75; RV32I-NEXT:    .cfi_def_cfa_offset 0
76; RV32I-NEXT:    ret
77  %v = alloca i8, i64 %size, align 1
78  store ptr %v, ptr %out, align 8
79  ret void
80}
81
82; This function has a fixed-size stack slot and a dynamic one. The fixed size
83; slot isn't large enough that we would normally probe it, but we need to do so
84; here otherwise the gap between the CSR save and the first probe of the
85; dynamic allocation could be too far apart when the size of the dynamic
86; allocation is close to the guard size.
87define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 {
88; RV64I-LABEL: dynamic_fixed:
89; RV64I:       # %bb.0:
90; RV64I-NEXT:    addi sp, sp, -80
91; RV64I-NEXT:    .cfi_def_cfa_offset 80
92; RV64I-NEXT:    sd zero, 0(sp)
93; RV64I-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
94; RV64I-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
95; RV64I-NEXT:    .cfi_offset ra, -8
96; RV64I-NEXT:    .cfi_offset s0, -16
97; RV64I-NEXT:    addi s0, sp, 80
98; RV64I-NEXT:    .cfi_def_cfa s0, 0
99; RV64I-NEXT:    addi a3, s0, -80
100; RV64I-NEXT:    addi a0, a0, 15
101; RV64I-NEXT:    sd a3, 0(a1)
102; RV64I-NEXT:    andi a0, a0, -16
103; RV64I-NEXT:    sub a0, sp, a0
104; RV64I-NEXT:    lui a1, 1
105; RV64I-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
106; RV64I-NEXT:    sub sp, sp, a1
107; RV64I-NEXT:    sd zero, 0(sp)
108; RV64I-NEXT:    blt a0, sp, .LBB1_1
109; RV64I-NEXT:  # %bb.2:
110; RV64I-NEXT:    mv sp, a0
111; RV64I-NEXT:    sd a0, 0(a2)
112; RV64I-NEXT:    addi sp, s0, -80
113; RV64I-NEXT:    .cfi_def_cfa sp, 80
114; RV64I-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
115; RV64I-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
116; RV64I-NEXT:    .cfi_restore ra
117; RV64I-NEXT:    .cfi_restore s0
118; RV64I-NEXT:    addi sp, sp, 80
119; RV64I-NEXT:    .cfi_def_cfa_offset 0
120; RV64I-NEXT:    ret
121;
122; RV32I-LABEL: dynamic_fixed:
123; RV32I:       # %bb.0:
124; RV32I-NEXT:    addi sp, sp, -80
125; RV32I-NEXT:    .cfi_def_cfa_offset 80
126; RV32I-NEXT:    sw zero, 0(sp)
127; RV32I-NEXT:    sw ra, 76(sp) # 4-byte Folded Spill
128; RV32I-NEXT:    sw s0, 72(sp) # 4-byte Folded Spill
129; RV32I-NEXT:    .cfi_offset ra, -4
130; RV32I-NEXT:    .cfi_offset s0, -8
131; RV32I-NEXT:    addi s0, sp, 80
132; RV32I-NEXT:    .cfi_def_cfa s0, 0
133; RV32I-NEXT:    addi a1, s0, -72
134; RV32I-NEXT:    addi a0, a0, 15
135; RV32I-NEXT:    sw a1, 0(a2)
136; RV32I-NEXT:    andi a0, a0, -16
137; RV32I-NEXT:    sub a0, sp, a0
138; RV32I-NEXT:    lui a1, 1
139; RV32I-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
140; RV32I-NEXT:    sub sp, sp, a1
141; RV32I-NEXT:    sw zero, 0(sp)
142; RV32I-NEXT:    blt a0, sp, .LBB1_1
143; RV32I-NEXT:  # %bb.2:
144; RV32I-NEXT:    mv sp, a0
145; RV32I-NEXT:    sw a0, 0(a3)
146; RV32I-NEXT:    addi sp, s0, -80
147; RV32I-NEXT:    .cfi_def_cfa sp, 80
148; RV32I-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
149; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
150; RV32I-NEXT:    .cfi_restore ra
151; RV32I-NEXT:    .cfi_restore s0
152; RV32I-NEXT:    addi sp, sp, 80
153; RV32I-NEXT:    .cfi_def_cfa_offset 0
154; RV32I-NEXT:    ret
155  %v1 = alloca i8, i64 64, align 1
156  store ptr %v1, ptr %out1, align 8
157  %v2 = alloca i8, i64 %size, align 1
158  store ptr %v2, ptr %out2, align 8
159  ret void
160}
161
162; Dynamic allocation, with an alignment requirement greater than the alignment
163; of SP. Done by ANDing the target SP with a constant to align it down, then
164; doing the loop as normal. Note that we also re-align the stack in the prolog,
165; which isn't actually needed because the only aligned allocations are dynamic,
166; this is done even without stack probing.
167define void @dynamic_align_64(i64 %size, ptr %out) #0 {
168; RV64I-LABEL: dynamic_align_64:
169; RV64I:       # %bb.0:
170; RV64I-NEXT:    addi sp, sp, -64
171; RV64I-NEXT:    .cfi_def_cfa_offset 64
172; RV64I-NEXT:    sd zero, 0(sp)
173; RV64I-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
174; RV64I-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
175; RV64I-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
176; RV64I-NEXT:    .cfi_offset ra, -8
177; RV64I-NEXT:    .cfi_offset s0, -16
178; RV64I-NEXT:    .cfi_offset s1, -24
179; RV64I-NEXT:    addi s0, sp, 64
180; RV64I-NEXT:    .cfi_def_cfa s0, 0
181; RV64I-NEXT:    andi sp, sp, -64
182; RV64I-NEXT:    mv s1, sp
183; RV64I-NEXT:    addi a0, a0, 15
184; RV64I-NEXT:    andi a0, a0, -16
185; RV64I-NEXT:    sub a0, sp, a0
186; RV64I-NEXT:    andi a0, a0, -64
187; RV64I-NEXT:    lui a2, 1
188; RV64I-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
189; RV64I-NEXT:    sub sp, sp, a2
190; RV64I-NEXT:    sd zero, 0(sp)
191; RV64I-NEXT:    blt a0, sp, .LBB2_1
192; RV64I-NEXT:  # %bb.2:
193; RV64I-NEXT:    mv sp, a0
194; RV64I-NEXT:    sd a0, 0(a1)
195; RV64I-NEXT:    addi sp, s0, -64
196; RV64I-NEXT:    .cfi_def_cfa sp, 64
197; RV64I-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
198; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
199; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
200; RV64I-NEXT:    .cfi_restore ra
201; RV64I-NEXT:    .cfi_restore s0
202; RV64I-NEXT:    .cfi_restore s1
203; RV64I-NEXT:    addi sp, sp, 64
204; RV64I-NEXT:    .cfi_def_cfa_offset 0
205; RV64I-NEXT:    ret
206;
207; RV32I-LABEL: dynamic_align_64:
208; RV32I:       # %bb.0:
209; RV32I-NEXT:    addi sp, sp, -64
210; RV32I-NEXT:    .cfi_def_cfa_offset 64
211; RV32I-NEXT:    sw zero, 0(sp)
212; RV32I-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
213; RV32I-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
214; RV32I-NEXT:    sw s1, 52(sp) # 4-byte Folded Spill
215; RV32I-NEXT:    .cfi_offset ra, -4
216; RV32I-NEXT:    .cfi_offset s0, -8
217; RV32I-NEXT:    .cfi_offset s1, -12
218; RV32I-NEXT:    addi s0, sp, 64
219; RV32I-NEXT:    .cfi_def_cfa s0, 0
220; RV32I-NEXT:    andi sp, sp, -64
221; RV32I-NEXT:    mv s1, sp
222; RV32I-NEXT:    addi a0, a0, 15
223; RV32I-NEXT:    andi a0, a0, -16
224; RV32I-NEXT:    sub a0, sp, a0
225; RV32I-NEXT:    andi a0, a0, -64
226; RV32I-NEXT:    lui a1, 1
227; RV32I-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
228; RV32I-NEXT:    sub sp, sp, a1
229; RV32I-NEXT:    sw zero, 0(sp)
230; RV32I-NEXT:    blt a0, sp, .LBB2_1
231; RV32I-NEXT:  # %bb.2:
232; RV32I-NEXT:    mv sp, a0
233; RV32I-NEXT:    sw a0, 0(a2)
234; RV32I-NEXT:    addi sp, s0, -64
235; RV32I-NEXT:    .cfi_def_cfa sp, 64
236; RV32I-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
237; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
238; RV32I-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
239; RV32I-NEXT:    .cfi_restore ra
240; RV32I-NEXT:    .cfi_restore s0
241; RV32I-NEXT:    .cfi_restore s1
242; RV32I-NEXT:    addi sp, sp, 64
243; RV32I-NEXT:    .cfi_def_cfa_offset 0
244; RV32I-NEXT:    ret
245  %v = alloca i8, i64 %size, align 64
246  store ptr %v, ptr %out, align 8
247  ret void
248}
249
250; Dynamic allocation, with an alignment greater than the stack guard size. The
251; only difference to the dynamic allocation is the constant used for aligning
252; the target SP, the loop will probe the whole allocation without needing to
253; know about the alignment padding.
254define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
255; RV64I-LABEL: dynamic_align_8192:
256; RV64I:       # %bb.0:
257; RV64I-NEXT:    addi sp, sp, -2032
258; RV64I-NEXT:    .cfi_def_cfa_offset 2032
259; RV64I-NEXT:    sd zero, 0(sp)
260; RV64I-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
261; RV64I-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
262; RV64I-NEXT:    sd s1, 2008(sp) # 8-byte Folded Spill
263; RV64I-NEXT:    .cfi_offset ra, -8
264; RV64I-NEXT:    .cfi_offset s0, -16
265; RV64I-NEXT:    .cfi_offset s1, -24
266; RV64I-NEXT:    addi s0, sp, 2032
267; RV64I-NEXT:    .cfi_def_cfa s0, 0
268; RV64I-NEXT:    lui a2, 1
269; RV64I-NEXT:    sub sp, sp, a2
270; RV64I-NEXT:    sd zero, 0(sp)
271; RV64I-NEXT:    sub sp, sp, a2
272; RV64I-NEXT:    sd zero, 0(sp)
273; RV64I-NEXT:    sub sp, sp, a2
274; RV64I-NEXT:    sd zero, 0(sp)
275; RV64I-NEXT:    addi sp, sp, -2048
276; RV64I-NEXT:    addi sp, sp, -16
277; RV64I-NEXT:    sd zero, 0(sp)
278; RV64I-NEXT:    srli a2, sp, 13
279; RV64I-NEXT:    slli sp, a2, 13
280; RV64I-NEXT:    mv s1, sp
281; RV64I-NEXT:    addi a0, a0, 15
282; RV64I-NEXT:    lui a2, 1048574
283; RV64I-NEXT:    andi a0, a0, -16
284; RV64I-NEXT:    sub a0, sp, a0
285; RV64I-NEXT:    and a0, a0, a2
286; RV64I-NEXT:    lui a2, 1
287; RV64I-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
288; RV64I-NEXT:    sub sp, sp, a2
289; RV64I-NEXT:    sd zero, 0(sp)
290; RV64I-NEXT:    blt a0, sp, .LBB3_1
291; RV64I-NEXT:  # %bb.2:
292; RV64I-NEXT:    mv sp, a0
293; RV64I-NEXT:    sd a0, 0(a1)
294; RV64I-NEXT:    addi sp, s0, -2032
295; RV64I-NEXT:    .cfi_def_cfa sp, 2032
296; RV64I-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
297; RV64I-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
298; RV64I-NEXT:    ld s1, 2008(sp) # 8-byte Folded Reload
299; RV64I-NEXT:    .cfi_restore ra
300; RV64I-NEXT:    .cfi_restore s0
301; RV64I-NEXT:    .cfi_restore s1
302; RV64I-NEXT:    addi sp, sp, 2032
303; RV64I-NEXT:    .cfi_def_cfa_offset 0
304; RV64I-NEXT:    ret
305;
306; RV32I-LABEL: dynamic_align_8192:
307; RV32I:       # %bb.0:
308; RV32I-NEXT:    addi sp, sp, -2032
309; RV32I-NEXT:    .cfi_def_cfa_offset 2032
310; RV32I-NEXT:    sw zero, 0(sp)
311; RV32I-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
312; RV32I-NEXT:    sw s0, 2024(sp) # 4-byte Folded Spill
313; RV32I-NEXT:    sw s1, 2020(sp) # 4-byte Folded Spill
314; RV32I-NEXT:    .cfi_offset ra, -4
315; RV32I-NEXT:    .cfi_offset s0, -8
316; RV32I-NEXT:    .cfi_offset s1, -12
317; RV32I-NEXT:    addi s0, sp, 2032
318; RV32I-NEXT:    .cfi_def_cfa s0, 0
319; RV32I-NEXT:    lui a1, 1
320; RV32I-NEXT:    sub sp, sp, a1
321; RV32I-NEXT:    sw zero, 0(sp)
322; RV32I-NEXT:    sub sp, sp, a1
323; RV32I-NEXT:    sw zero, 0(sp)
324; RV32I-NEXT:    sub sp, sp, a1
325; RV32I-NEXT:    sw zero, 0(sp)
326; RV32I-NEXT:    addi sp, sp, -2048
327; RV32I-NEXT:    addi sp, sp, -16
328; RV32I-NEXT:    sw zero, 0(sp)
329; RV32I-NEXT:    srli a1, sp, 13
330; RV32I-NEXT:    slli sp, a1, 13
331; RV32I-NEXT:    mv s1, sp
332; RV32I-NEXT:    addi a0, a0, 15
333; RV32I-NEXT:    lui a1, 1048574
334; RV32I-NEXT:    andi a0, a0, -16
335; RV32I-NEXT:    sub a0, sp, a0
336; RV32I-NEXT:    and a0, a0, a1
337; RV32I-NEXT:    lui a1, 1
338; RV32I-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
339; RV32I-NEXT:    sub sp, sp, a1
340; RV32I-NEXT:    sw zero, 0(sp)
341; RV32I-NEXT:    blt a0, sp, .LBB3_1
342; RV32I-NEXT:  # %bb.2:
343; RV32I-NEXT:    mv sp, a0
344; RV32I-NEXT:    sw a0, 0(a2)
345; RV32I-NEXT:    addi sp, s0, -2032
346; RV32I-NEXT:    .cfi_def_cfa sp, 2032
347; RV32I-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
348; RV32I-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
349; RV32I-NEXT:    lw s1, 2020(sp) # 4-byte Folded Reload
350; RV32I-NEXT:    .cfi_restore ra
351; RV32I-NEXT:    .cfi_restore s0
352; RV32I-NEXT:    .cfi_restore s1
353; RV32I-NEXT:    addi sp, sp, 2032
354; RV32I-NEXT:    .cfi_def_cfa_offset 0
355; RV32I-NEXT:    ret
356  %v = alloca i8, i64 %size, align 8192
357  store ptr %v, ptr %out, align 8
358  ret void
359}
360
361; If a function has variable-sized stack objects, then any function calls which
362; need to pass arguments on the stack must allocate the stack space for them
363; dynamically, to ensure they are at the bottom of the frame.
364define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
365; RV64I-LABEL: no_reserved_call_frame:
366; RV64I:       # %bb.0: # %entry
367; RV64I-NEXT:    addi sp, sp, -16
368; RV64I-NEXT:    .cfi_def_cfa_offset 16
369; RV64I-NEXT:    sd zero, 0(sp)
370; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
371; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
372; RV64I-NEXT:    .cfi_offset ra, -8
373; RV64I-NEXT:    .cfi_offset s0, -16
374; RV64I-NEXT:    addi s0, sp, 16
375; RV64I-NEXT:    .cfi_def_cfa s0, 0
376; RV64I-NEXT:    slli a0, a0, 2
377; RV64I-NEXT:    addi a0, a0, 15
378; RV64I-NEXT:    andi a0, a0, -16
379; RV64I-NEXT:    sub a0, sp, a0
380; RV64I-NEXT:    lui a2, 1
381; RV64I-NEXT:  .LBB4_1: # %entry
382; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
383; RV64I-NEXT:    sub sp, sp, a2
384; RV64I-NEXT:    sd zero, 0(sp)
385; RV64I-NEXT:    blt a0, sp, .LBB4_1
386; RV64I-NEXT:  # %bb.2: # %entry
387; RV64I-NEXT:    mv sp, a0
388; RV64I-NEXT:    call callee_stack_args
389; RV64I-NEXT:    addi sp, s0, -16
390; RV64I-NEXT:    .cfi_def_cfa sp, 16
391; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
392; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
393; RV64I-NEXT:    .cfi_restore ra
394; RV64I-NEXT:    .cfi_restore s0
395; RV64I-NEXT:    addi sp, sp, 16
396; RV64I-NEXT:    .cfi_def_cfa_offset 0
397; RV64I-NEXT:    ret
398;
399; RV32I-LABEL: no_reserved_call_frame:
400; RV32I:       # %bb.0: # %entry
401; RV32I-NEXT:    addi sp, sp, -16
402; RV32I-NEXT:    .cfi_def_cfa_offset 16
403; RV32I-NEXT:    sw zero, 0(sp)
404; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
405; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
406; RV32I-NEXT:    .cfi_offset ra, -4
407; RV32I-NEXT:    .cfi_offset s0, -8
408; RV32I-NEXT:    addi s0, sp, 16
409; RV32I-NEXT:    .cfi_def_cfa s0, 0
410; RV32I-NEXT:    mv a1, a2
411; RV32I-NEXT:    slli a0, a0, 2
412; RV32I-NEXT:    addi a0, a0, 15
413; RV32I-NEXT:    andi a0, a0, -16
414; RV32I-NEXT:    sub a0, sp, a0
415; RV32I-NEXT:    lui a2, 1
416; RV32I-NEXT:  .LBB4_1: # %entry
417; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
418; RV32I-NEXT:    sub sp, sp, a2
419; RV32I-NEXT:    sw zero, 0(sp)
420; RV32I-NEXT:    blt a0, sp, .LBB4_1
421; RV32I-NEXT:  # %bb.2: # %entry
422; RV32I-NEXT:    mv sp, a0
423; RV32I-NEXT:    call callee_stack_args
424; RV32I-NEXT:    addi sp, s0, -16
425; RV32I-NEXT:    .cfi_def_cfa sp, 16
426; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
427; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
428; RV32I-NEXT:    .cfi_restore ra
429; RV32I-NEXT:    .cfi_restore s0
430; RV32I-NEXT:    addi sp, sp, 16
431; RV32I-NEXT:    .cfi_def_cfa_offset 0
432; RV32I-NEXT:    ret
433entry:
434  %v = alloca i32, i64 %n
435  call void @callee_stack_args(ptr %v, i32 %dummy)
436  ret void
437}
438
439; Same as above but without a variable-sized allocation, so the reserved call
440; frame can be folded into the fixed-size allocation in the prologue.
441define void @reserved_call_frame(i64 %n, i32 %dummy) #0 {
442; RV64I-LABEL: reserved_call_frame:
443; RV64I:       # %bb.0: # %entry
444; RV64I-NEXT:    addi sp, sp, -416
445; RV64I-NEXT:    .cfi_def_cfa_offset 416
446; RV64I-NEXT:    sd ra, 408(sp) # 8-byte Folded Spill
447; RV64I-NEXT:    .cfi_offset ra, -8
448; RV64I-NEXT:    addi a0, sp, 8
449; RV64I-NEXT:    call callee_stack_args
450; RV64I-NEXT:    ld ra, 408(sp) # 8-byte Folded Reload
451; RV64I-NEXT:    .cfi_restore ra
452; RV64I-NEXT:    addi sp, sp, 416
453; RV64I-NEXT:    .cfi_def_cfa_offset 0
454; RV64I-NEXT:    ret
455;
456; RV32I-LABEL: reserved_call_frame:
457; RV32I:       # %bb.0: # %entry
458; RV32I-NEXT:    addi sp, sp, -416
459; RV32I-NEXT:    .cfi_def_cfa_offset 416
460; RV32I-NEXT:    sw ra, 412(sp) # 4-byte Folded Spill
461; RV32I-NEXT:    .cfi_offset ra, -4
462; RV32I-NEXT:    mv a1, a2
463; RV32I-NEXT:    addi a0, sp, 12
464; RV32I-NEXT:    call callee_stack_args
465; RV32I-NEXT:    lw ra, 412(sp) # 4-byte Folded Reload
466; RV32I-NEXT:    .cfi_restore ra
467; RV32I-NEXT:    addi sp, sp, 416
468; RV32I-NEXT:    .cfi_def_cfa_offset 0
469; RV32I-NEXT:    ret
470entry:
471  %v = alloca i32, i64 100
472  call void @callee_stack_args(ptr %v, i32 %dummy)
473  ret void
474}
475
476declare void @callee_stack_args(ptr, i32)
477
478; Dynamic allocation of vectors
479define void @dynamic_vector(i64 %size, ptr %out) #0 {
480; RV64I-LABEL: dynamic_vector:
481; RV64I:       # %bb.0:
482; RV64I-NEXT:    addi sp, sp, -16
483; RV64I-NEXT:    .cfi_def_cfa_offset 16
484; RV64I-NEXT:    sd zero, 0(sp)
485; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
486; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
487; RV64I-NEXT:    .cfi_offset ra, -8
488; RV64I-NEXT:    .cfi_offset s0, -16
489; RV64I-NEXT:    addi s0, sp, 16
490; RV64I-NEXT:    .cfi_def_cfa s0, 0
491; RV64I-NEXT:    csrr a2, vlenb
492; RV64I-NEXT:    mul a0, a2, a0
493; RV64I-NEXT:    slli a0, a0, 1
494; RV64I-NEXT:    sub a0, sp, a0
495; RV64I-NEXT:    lui a2, 1
496; RV64I-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
497; RV64I-NEXT:    sub sp, sp, a2
498; RV64I-NEXT:    sd zero, 0(sp)
499; RV64I-NEXT:    blt a0, sp, .LBB6_1
500; RV64I-NEXT:  # %bb.2:
501; RV64I-NEXT:    mv sp, a0
502; RV64I-NEXT:    sd a0, 0(a1)
503; RV64I-NEXT:    addi sp, s0, -16
504; RV64I-NEXT:    .cfi_def_cfa sp, 16
505; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
506; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
507; RV64I-NEXT:    .cfi_restore ra
508; RV64I-NEXT:    .cfi_restore s0
509; RV64I-NEXT:    addi sp, sp, 16
510; RV64I-NEXT:    .cfi_def_cfa_offset 0
511; RV64I-NEXT:    ret
512;
513; RV32I-LABEL: dynamic_vector:
514; RV32I:       # %bb.0:
515; RV32I-NEXT:    addi sp, sp, -16
516; RV32I-NEXT:    .cfi_def_cfa_offset 16
517; RV32I-NEXT:    sw zero, 0(sp)
518; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
519; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
520; RV32I-NEXT:    .cfi_offset ra, -4
521; RV32I-NEXT:    .cfi_offset s0, -8
522; RV32I-NEXT:    addi s0, sp, 16
523; RV32I-NEXT:    .cfi_def_cfa s0, 0
524; RV32I-NEXT:    csrr a1, vlenb
525; RV32I-NEXT:    mul a0, a1, a0
526; RV32I-NEXT:    slli a0, a0, 1
527; RV32I-NEXT:    sub a0, sp, a0
528; RV32I-NEXT:    lui a1, 1
529; RV32I-NEXT:  .LBB6_1: # =>This Inner Loop Header: Depth=1
530; RV32I-NEXT:    sub sp, sp, a1
531; RV32I-NEXT:    sw zero, 0(sp)
532; RV32I-NEXT:    blt a0, sp, .LBB6_1
533; RV32I-NEXT:  # %bb.2:
534; RV32I-NEXT:    mv sp, a0
535; RV32I-NEXT:    sw a0, 0(a2)
536; RV32I-NEXT:    addi sp, s0, -16
537; RV32I-NEXT:    .cfi_def_cfa sp, 16
538; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
539; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
540; RV32I-NEXT:    .cfi_restore ra
541; RV32I-NEXT:    .cfi_restore s0
542; RV32I-NEXT:    addi sp, sp, 16
543; RV32I-NEXT:    .cfi_def_cfa_offset 0
544; RV32I-NEXT:    ret
545  %v = alloca <vscale x 4 x float>, i64 %size, align 16
546  store ptr %v, ptr %out, align 8
547  ret void
548}
549
550attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" }
551