xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/stack-probing-rvv.ll (revision 6f53886a9a5e65136619ada7713f31942a1cc1fa)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
3; RUN:   | FileCheck %s -check-prefix=RV64IV
4; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \
5; RUN:   | FileCheck %s -check-prefix=RV32IV
6
7; Tests adapted from AArch64.
8
9; Test prolog sequences for stack probing when vector is involved.
10
11; The space for vector objects needs probing in the general case, because
12; the stack adjustment may happen to be too big (i.e. greater than the
13; probe size).
14
15define void @f_vector(ptr %out) #0 {
16; RV64IV-LABEL: f_vector:
17; RV64IV:       # %bb.0: # %entry
18; RV64IV-NEXT:    csrr t1, vlenb
19; RV64IV-NEXT:    slli t1, t1, 1
20; RV64IV-NEXT:    .cfi_def_cfa t1, -16
21; RV64IV-NEXT:    lui t2, 1
22; RV64IV-NEXT:  .LBB0_1: # %entry
23; RV64IV-NEXT:    # =>This Inner Loop Header: Depth=1
24; RV64IV-NEXT:    sub sp, sp, t2
25; RV64IV-NEXT:    sd zero, 0(sp)
26; RV64IV-NEXT:    sub t1, t1, t2
27; RV64IV-NEXT:    bge t1, t2, .LBB0_1
28; RV64IV-NEXT:  # %bb.2: # %entry
29; RV64IV-NEXT:    .cfi_def_cfa_register sp
30; RV64IV-NEXT:    sub sp, sp, t1
31; RV64IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb
32; RV64IV-NEXT:    csrr a0, vlenb
33; RV64IV-NEXT:    slli a0, a0, 1
34; RV64IV-NEXT:    add sp, sp, a0
35; RV64IV-NEXT:    .cfi_def_cfa sp, 0
36; RV64IV-NEXT:    ret
37;
38; RV32IV-LABEL: f_vector:
39; RV32IV:       # %bb.0: # %entry
40; RV32IV-NEXT:    csrr t1, vlenb
41; RV32IV-NEXT:    slli t1, t1, 1
42; RV32IV-NEXT:    .cfi_def_cfa t1, -16
43; RV32IV-NEXT:    lui t2, 1
44; RV32IV-NEXT:  .LBB0_1: # %entry
45; RV32IV-NEXT:    # =>This Inner Loop Header: Depth=1
46; RV32IV-NEXT:    sub sp, sp, t2
47; RV32IV-NEXT:    sw zero, 0(sp)
48; RV32IV-NEXT:    sub t1, t1, t2
49; RV32IV-NEXT:    bge t1, t2, .LBB0_1
50; RV32IV-NEXT:  # %bb.2: # %entry
51; RV32IV-NEXT:    .cfi_def_cfa_register sp
52; RV32IV-NEXT:    sub sp, sp, t1
53; RV32IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb
54; RV32IV-NEXT:    csrr a0, vlenb
55; RV32IV-NEXT:    slli a0, a0, 1
56; RV32IV-NEXT:    add sp, sp, a0
57; RV32IV-NEXT:    .cfi_def_cfa sp, 0
58; RV32IV-NEXT:    ret
59entry:
60  %vec = alloca <vscale x 4 x float>, align 16
61  ret void
62}
63
64; As above, but with 4 vectors of stack space.
65define void @f4_vector(ptr %out) #0 {
66; RV64IV-LABEL: f4_vector:
67; RV64IV:       # %bb.0: # %entry
68; RV64IV-NEXT:    csrr t1, vlenb
69; RV64IV-NEXT:    slli t1, t1, 3
70; RV64IV-NEXT:    .cfi_def_cfa t1, -64
71; RV64IV-NEXT:    lui t2, 1
72; RV64IV-NEXT:  .LBB1_1: # %entry
73; RV64IV-NEXT:    # =>This Inner Loop Header: Depth=1
74; RV64IV-NEXT:    sub sp, sp, t2
75; RV64IV-NEXT:    sd zero, 0(sp)
76; RV64IV-NEXT:    sub t1, t1, t2
77; RV64IV-NEXT:    bge t1, t2, .LBB1_1
78; RV64IV-NEXT:  # %bb.2: # %entry
79; RV64IV-NEXT:    .cfi_def_cfa_register sp
80; RV64IV-NEXT:    sub sp, sp, t1
81; RV64IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb
82; RV64IV-NEXT:    csrr a0, vlenb
83; RV64IV-NEXT:    slli a0, a0, 3
84; RV64IV-NEXT:    add sp, sp, a0
85; RV64IV-NEXT:    .cfi_def_cfa sp, 0
86; RV64IV-NEXT:    ret
87;
88; RV32IV-LABEL: f4_vector:
89; RV32IV:       # %bb.0: # %entry
90; RV32IV-NEXT:    csrr t1, vlenb
91; RV32IV-NEXT:    slli t1, t1, 3
92; RV32IV-NEXT:    .cfi_def_cfa t1, -64
93; RV32IV-NEXT:    lui t2, 1
94; RV32IV-NEXT:  .LBB1_1: # %entry
95; RV32IV-NEXT:    # =>This Inner Loop Header: Depth=1
96; RV32IV-NEXT:    sub sp, sp, t2
97; RV32IV-NEXT:    sw zero, 0(sp)
98; RV32IV-NEXT:    sub t1, t1, t2
99; RV32IV-NEXT:    bge t1, t2, .LBB1_1
100; RV32IV-NEXT:  # %bb.2: # %entry
101; RV32IV-NEXT:    .cfi_def_cfa_register sp
102; RV32IV-NEXT:    sub sp, sp, t1
103; RV32IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb
104; RV32IV-NEXT:    csrr a0, vlenb
105; RV32IV-NEXT:    slli a0, a0, 3
106; RV32IV-NEXT:    add sp, sp, a0
107; RV32IV-NEXT:    .cfi_def_cfa sp, 0
108; RV32IV-NEXT:    ret
109entry:
110  %vec1 = alloca <vscale x 4 x float>, align 16
111  %vec2 = alloca <vscale x 4 x float>, align 16
112  %vec3 = alloca <vscale x 4 x float>, align 16
113  %vec4 = alloca <vscale x 4 x float>, align 16
114  ret void
115}
116
117; As above, but with 16 vectors of stack space.
118; The stack adjustment is less than or equal to 16 x 256 = 4096, so
119; we can allocate the locals at once.
120define void @f16_vector(ptr %out) #0 {
121; RV64IV-LABEL: f16_vector:
122; RV64IV:       # %bb.0: # %entry
123; RV64IV-NEXT:    csrr t1, vlenb
124; RV64IV-NEXT:    slli t1, t1, 5
125; RV64IV-NEXT:    .cfi_def_cfa t1, -256
126; RV64IV-NEXT:    lui t2, 1
127; RV64IV-NEXT:  .LBB2_1: # %entry
128; RV64IV-NEXT:    # =>This Inner Loop Header: Depth=1
129; RV64IV-NEXT:    sub sp, sp, t2
130; RV64IV-NEXT:    sd zero, 0(sp)
131; RV64IV-NEXT:    sub t1, t1, t2
132; RV64IV-NEXT:    bge t1, t2, .LBB2_1
133; RV64IV-NEXT:  # %bb.2: # %entry
134; RV64IV-NEXT:    .cfi_def_cfa_register sp
135; RV64IV-NEXT:    sub sp, sp, t1
136; RV64IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb
137; RV64IV-NEXT:    csrr a0, vlenb
138; RV64IV-NEXT:    slli a0, a0, 5
139; RV64IV-NEXT:    add sp, sp, a0
140; RV64IV-NEXT:    .cfi_def_cfa sp, 0
141; RV64IV-NEXT:    ret
142;
143; RV32IV-LABEL: f16_vector:
144; RV32IV:       # %bb.0: # %entry
145; RV32IV-NEXT:    csrr t1, vlenb
146; RV32IV-NEXT:    slli t1, t1, 5
147; RV32IV-NEXT:    .cfi_def_cfa t1, -256
148; RV32IV-NEXT:    lui t2, 1
149; RV32IV-NEXT:  .LBB2_1: # %entry
150; RV32IV-NEXT:    # =>This Inner Loop Header: Depth=1
151; RV32IV-NEXT:    sub sp, sp, t2
152; RV32IV-NEXT:    sw zero, 0(sp)
153; RV32IV-NEXT:    sub t1, t1, t2
154; RV32IV-NEXT:    bge t1, t2, .LBB2_1
155; RV32IV-NEXT:  # %bb.2: # %entry
156; RV32IV-NEXT:    .cfi_def_cfa_register sp
157; RV32IV-NEXT:    sub sp, sp, t1
158; RV32IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb
159; RV32IV-NEXT:    csrr a0, vlenb
160; RV32IV-NEXT:    slli a0, a0, 5
161; RV32IV-NEXT:    add sp, sp, a0
162; RV32IV-NEXT:    .cfi_def_cfa sp, 0
163; RV32IV-NEXT:    ret
164entry:
165  %vec1 = alloca <vscale x 4 x float>, align 16
166  %vec2 = alloca <vscale x 4 x float>, align 16
167  %vec3 = alloca <vscale x 4 x float>, align 16
168  %vec4 = alloca <vscale x 4 x float>, align 16
169  %vec5 = alloca <vscale x 4 x float>, align 16
170  %vec6 = alloca <vscale x 4 x float>, align 16
171  %vec7 = alloca <vscale x 4 x float>, align 16
172  %vec8 = alloca <vscale x 4 x float>, align 16
173  %vec9 = alloca <vscale x 4 x float>, align 16
174  %vec10 = alloca <vscale x 4 x float>, align 16
175  %vec11 = alloca <vscale x 4 x float>, align 16
176  %vec12 = alloca <vscale x 4 x float>, align 16
177  %vec13 = alloca <vscale x 4 x float>, align 16
178  %vec14 = alloca <vscale x 4 x float>, align 16
179  %vec15 = alloca <vscale x 4 x float>, align 16
180  %vec16 = alloca <vscale x 4 x float>, align 16
181  ret void
182}
183
184; As above, but with 17 vectors of stack space.
185define void @f17_vector(ptr %out) #0 {
186; RV64IV-LABEL: f17_vector:
187; RV64IV:       # %bb.0: # %entry
188; RV64IV-NEXT:    csrr t1, vlenb
189; RV64IV-NEXT:    li a0, 34
190; RV64IV-NEXT:    mul t1, t1, a0
191; RV64IV-NEXT:    .cfi_def_cfa t1, -272
192; RV64IV-NEXT:    lui t2, 1
193; RV64IV-NEXT:  .LBB3_1: # %entry
194; RV64IV-NEXT:    # =>This Inner Loop Header: Depth=1
195; RV64IV-NEXT:    sub sp, sp, t2
196; RV64IV-NEXT:    sd zero, 0(sp)
197; RV64IV-NEXT:    sub t1, t1, t2
198; RV64IV-NEXT:    bge t1, t2, .LBB3_1
199; RV64IV-NEXT:  # %bb.2: # %entry
200; RV64IV-NEXT:    .cfi_def_cfa_register sp
201; RV64IV-NEXT:    sub sp, sp, t1
202; RV64IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb
203; RV64IV-NEXT:    csrr a0, vlenb
204; RV64IV-NEXT:    li a1, 34
205; RV64IV-NEXT:    mul a0, a0, a1
206; RV64IV-NEXT:    add sp, sp, a0
207; RV64IV-NEXT:    .cfi_def_cfa sp, 0
208; RV64IV-NEXT:    ret
209;
210; RV32IV-LABEL: f17_vector:
211; RV32IV:       # %bb.0: # %entry
212; RV32IV-NEXT:    csrr t1, vlenb
213; RV32IV-NEXT:    li a0, 34
214; RV32IV-NEXT:    mul t1, t1, a0
215; RV32IV-NEXT:    .cfi_def_cfa t1, -272
216; RV32IV-NEXT:    lui t2, 1
217; RV32IV-NEXT:  .LBB3_1: # %entry
218; RV32IV-NEXT:    # =>This Inner Loop Header: Depth=1
219; RV32IV-NEXT:    sub sp, sp, t2
220; RV32IV-NEXT:    sw zero, 0(sp)
221; RV32IV-NEXT:    sub t1, t1, t2
222; RV32IV-NEXT:    bge t1, t2, .LBB3_1
223; RV32IV-NEXT:  # %bb.2: # %entry
224; RV32IV-NEXT:    .cfi_def_cfa_register sp
225; RV32IV-NEXT:    sub sp, sp, t1
226; RV32IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb
227; RV32IV-NEXT:    csrr a0, vlenb
228; RV32IV-NEXT:    li a1, 34
229; RV32IV-NEXT:    mul a0, a0, a1
230; RV32IV-NEXT:    add sp, sp, a0
231; RV32IV-NEXT:    .cfi_def_cfa sp, 0
232; RV32IV-NEXT:    ret
233entry:
234  %vec1 = alloca <vscale x 4 x float>, align 16
235  %vec2 = alloca <vscale x 4 x float>, align 16
236  %vec3 = alloca <vscale x 4 x float>, align 16
237  %vec4 = alloca <vscale x 4 x float>, align 16
238  %vec5 = alloca <vscale x 4 x float>, align 16
239  %vec6 = alloca <vscale x 4 x float>, align 16
240  %vec7 = alloca <vscale x 4 x float>, align 16
241  %vec8 = alloca <vscale x 4 x float>, align 16
242  %vec9 = alloca <vscale x 4 x float>, align 16
243  %vec10 = alloca <vscale x 4 x float>, align 16
244  %vec11 = alloca <vscale x 4 x float>, align 16
245  %vec12 = alloca <vscale x 4 x float>, align 16
246  %vec13 = alloca <vscale x 4 x float>, align 16
247  %vec14 = alloca <vscale x 4 x float>, align 16
248  %vec15 = alloca <vscale x 4 x float>, align 16
249  %vec16 = alloca <vscale x 4 x float>, align 16
250  %vec17 = alloca <vscale x 4 x float>, align 16
251  ret void
252}
253
254; A vector and a 16-byte fixed size object.
255define void @f1_vector_16_arr(ptr %out) #0 {
256; RV64IV-LABEL: f1_vector_16_arr:
257; RV64IV:       # %bb.0: # %entry
258; RV64IV-NEXT:    addi sp, sp, -16
259; RV64IV-NEXT:    .cfi_def_cfa_offset 16
260; RV64IV-NEXT:    csrr t1, vlenb
261; RV64IV-NEXT:    slli t1, t1, 1
262; RV64IV-NEXT:    .cfi_def_cfa t1, -16
263; RV64IV-NEXT:    lui t2, 1
264; RV64IV-NEXT:  .LBB4_1: # %entry
265; RV64IV-NEXT:    # =>This Inner Loop Header: Depth=1
266; RV64IV-NEXT:    sub sp, sp, t2
267; RV64IV-NEXT:    sd zero, 0(sp)
268; RV64IV-NEXT:    sub t1, t1, t2
269; RV64IV-NEXT:    bge t1, t2, .LBB4_1
270; RV64IV-NEXT:  # %bb.2: # %entry
271; RV64IV-NEXT:    .cfi_def_cfa_register sp
272; RV64IV-NEXT:    sub sp, sp, t1
273; RV64IV-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
274; RV64IV-NEXT:    csrr a0, vlenb
275; RV64IV-NEXT:    slli a0, a0, 1
276; RV64IV-NEXT:    add sp, sp, a0
277; RV64IV-NEXT:    .cfi_def_cfa sp, 16
278; RV64IV-NEXT:    addi sp, sp, 16
279; RV64IV-NEXT:    .cfi_def_cfa_offset 0
280; RV64IV-NEXT:    ret
281;
282; RV32IV-LABEL: f1_vector_16_arr:
283; RV32IV:       # %bb.0: # %entry
284; RV32IV-NEXT:    addi sp, sp, -16
285; RV32IV-NEXT:    .cfi_def_cfa_offset 16
286; RV32IV-NEXT:    csrr t1, vlenb
287; RV32IV-NEXT:    slli t1, t1, 1
288; RV32IV-NEXT:    .cfi_def_cfa t1, -16
289; RV32IV-NEXT:    lui t2, 1
290; RV32IV-NEXT:  .LBB4_1: # %entry
291; RV32IV-NEXT:    # =>This Inner Loop Header: Depth=1
292; RV32IV-NEXT:    sub sp, sp, t2
293; RV32IV-NEXT:    sw zero, 0(sp)
294; RV32IV-NEXT:    sub t1, t1, t2
295; RV32IV-NEXT:    bge t1, t2, .LBB4_1
296; RV32IV-NEXT:  # %bb.2: # %entry
297; RV32IV-NEXT:    .cfi_def_cfa_register sp
298; RV32IV-NEXT:    sub sp, sp, t1
299; RV32IV-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
300; RV32IV-NEXT:    csrr a0, vlenb
301; RV32IV-NEXT:    slli a0, a0, 1
302; RV32IV-NEXT:    add sp, sp, a0
303; RV32IV-NEXT:    .cfi_def_cfa sp, 16
304; RV32IV-NEXT:    addi sp, sp, 16
305; RV32IV-NEXT:    .cfi_def_cfa_offset 0
306; RV32IV-NEXT:    ret
307entry:
308  %vec = alloca <vscale x 4 x float>, align 16
309  %arr = alloca i8, i64 16, align 1
310  ret void
311}
312
313; A large vector object and a large slot, both of which need probing.
314define void @f1_vector_4096_arr(ptr %out) #0 {
315; RV64IV-LABEL: f1_vector_4096_arr:
316; RV64IV:       # %bb.0: # %entry
317; RV64IV-NEXT:    lui a0, 1
318; RV64IV-NEXT:    sub sp, sp, a0
319; RV64IV-NEXT:    sd zero, 0(sp)
320; RV64IV-NEXT:    .cfi_def_cfa_offset 4096
321; RV64IV-NEXT:    lui a0, 1
322; RV64IV-NEXT:    sub sp, sp, a0
323; RV64IV-NEXT:    sd zero, 0(sp)
324; RV64IV-NEXT:    .cfi_def_cfa_offset 8192
325; RV64IV-NEXT:    lui a0, 1
326; RV64IV-NEXT:    sub sp, sp, a0
327; RV64IV-NEXT:    sd zero, 0(sp)
328; RV64IV-NEXT:    .cfi_def_cfa_offset 12288
329; RV64IV-NEXT:    addi sp, sp, -16
330; RV64IV-NEXT:    .cfi_def_cfa_offset 12304
331; RV64IV-NEXT:    csrr t1, vlenb
332; RV64IV-NEXT:    slli t1, t1, 7
333; RV64IV-NEXT:    .cfi_def_cfa t1, -1024
334; RV64IV-NEXT:    lui t2, 1
335; RV64IV-NEXT:  .LBB5_1: # %entry
336; RV64IV-NEXT:    # =>This Inner Loop Header: Depth=1
337; RV64IV-NEXT:    sub sp, sp, t2
338; RV64IV-NEXT:    sd zero, 0(sp)
339; RV64IV-NEXT:    sub t1, t1, t2
340; RV64IV-NEXT:    bge t1, t2, .LBB5_1
341; RV64IV-NEXT:  # %bb.2: # %entry
342; RV64IV-NEXT:    .cfi_def_cfa_register sp
343; RV64IV-NEXT:    sub sp, sp, t1
344; RV64IV-NEXT:    .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb
345; RV64IV-NEXT:    csrr a0, vlenb
346; RV64IV-NEXT:    slli a0, a0, 7
347; RV64IV-NEXT:    add sp, sp, a0
348; RV64IV-NEXT:    .cfi_def_cfa sp, 12304
349; RV64IV-NEXT:    lui a0, 3
350; RV64IV-NEXT:    addiw a0, a0, 16
351; RV64IV-NEXT:    add sp, sp, a0
352; RV64IV-NEXT:    .cfi_def_cfa_offset 0
353; RV64IV-NEXT:    ret
354;
355; RV32IV-LABEL: f1_vector_4096_arr:
356; RV32IV:       # %bb.0: # %entry
357; RV32IV-NEXT:    lui a0, 1
358; RV32IV-NEXT:    sub sp, sp, a0
359; RV32IV-NEXT:    sw zero, 0(sp)
360; RV32IV-NEXT:    .cfi_def_cfa_offset 4096
361; RV32IV-NEXT:    lui a0, 1
362; RV32IV-NEXT:    sub sp, sp, a0
363; RV32IV-NEXT:    sw zero, 0(sp)
364; RV32IV-NEXT:    .cfi_def_cfa_offset 8192
365; RV32IV-NEXT:    lui a0, 1
366; RV32IV-NEXT:    sub sp, sp, a0
367; RV32IV-NEXT:    sw zero, 0(sp)
368; RV32IV-NEXT:    .cfi_def_cfa_offset 12288
369; RV32IV-NEXT:    addi sp, sp, -16
370; RV32IV-NEXT:    .cfi_def_cfa_offset 12304
371; RV32IV-NEXT:    csrr t1, vlenb
372; RV32IV-NEXT:    slli t1, t1, 7
373; RV32IV-NEXT:    .cfi_def_cfa t1, -1024
374; RV32IV-NEXT:    lui t2, 1
375; RV32IV-NEXT:  .LBB5_1: # %entry
376; RV32IV-NEXT:    # =>This Inner Loop Header: Depth=1
377; RV32IV-NEXT:    sub sp, sp, t2
378; RV32IV-NEXT:    sw zero, 0(sp)
379; RV32IV-NEXT:    sub t1, t1, t2
380; RV32IV-NEXT:    bge t1, t2, .LBB5_1
381; RV32IV-NEXT:  # %bb.2: # %entry
382; RV32IV-NEXT:    .cfi_def_cfa_register sp
383; RV32IV-NEXT:    sub sp, sp, t1
384; RV32IV-NEXT:    .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb
385; RV32IV-NEXT:    csrr a0, vlenb
386; RV32IV-NEXT:    slli a0, a0, 7
387; RV32IV-NEXT:    add sp, sp, a0
388; RV32IV-NEXT:    .cfi_def_cfa sp, 12304
389; RV32IV-NEXT:    lui a0, 3
390; RV32IV-NEXT:    addi a0, a0, 16
391; RV32IV-NEXT:    add sp, sp, a0
392; RV32IV-NEXT:    .cfi_def_cfa_offset 0
393; RV32IV-NEXT:    ret
394entry:
395  %vec = alloca <vscale x 256 x float>, align 16
396  %arr = alloca i8, i64 12288, align 1
397  ret void
398}
399
400attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" }
401