xref: /llvm-project/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll (revision 6e1ea7e5a7b6e581bf9a030b98a7f63ee2833278)
1; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
2; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=FP-CHECK
3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=NO-SVE-CHECK
4; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs -enable-machine-outliner < %s | FileCheck %s --check-prefix=OUTLINER-CHECK
5
6declare void @callee();
7declare void @fixed_callee(<4 x i32>);
8declare void @scalable_callee(<vscale x 2 x i64>);
9
10declare void @streaming_callee() #0;
11declare void @streaming_callee_with_arg(i32) #0;
12
13; Simple example of a function with one call requiring a streaming mode change
14;
15define void @vg_unwind_simple() #0 {
16; CHECK-LABEL: vg_unwind_simple:
17; CHECK:       // %bb.0:
18; CHECK-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
19; CHECK-NEXT:    .cfi_def_cfa_offset 80
20; CHECK-NEXT:    cntd x9
21; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
22; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
23; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
24; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill
25; CHECK-NEXT:    .cfi_offset w30, -16
26; CHECK-NEXT:    .cfi_offset b8, -24
27; CHECK-NEXT:    .cfi_offset b9, -32
28; CHECK-NEXT:    .cfi_offset b10, -40
29; CHECK-NEXT:    .cfi_offset b11, -48
30; CHECK-NEXT:    .cfi_offset b12, -56
31; CHECK-NEXT:    .cfi_offset b13, -64
32; CHECK-NEXT:    .cfi_offset b14, -72
33; CHECK-NEXT:    .cfi_offset b15, -80
34; CHECK-NEXT:    .cfi_offset vg, -8
35; CHECK-NEXT:    smstop sm
36; CHECK-NEXT:    bl callee
37; CHECK-NEXT:    smstart sm
38; CHECK-NEXT:    .cfi_restore vg
39; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
40; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
41; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
42; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
43; CHECK-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
44; CHECK-NEXT:    .cfi_def_cfa_offset 0
45; CHECK-NEXT:    .cfi_restore w30
46; CHECK-NEXT:    .cfi_restore b8
47; CHECK-NEXT:    .cfi_restore b9
48; CHECK-NEXT:    .cfi_restore b10
49; CHECK-NEXT:    .cfi_restore b11
50; CHECK-NEXT:    .cfi_restore b12
51; CHECK-NEXT:    .cfi_restore b13
52; CHECK-NEXT:    .cfi_restore b14
53; CHECK-NEXT:    .cfi_restore b15
54; CHECK-NEXT:    ret
55;
56; FP-CHECK-LABEL: vg_unwind_simple:
57; FP-CHECK:       // %bb.0:
58; FP-CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
59; FP-CHECK-NEXT:    .cfi_def_cfa_offset 96
60; FP-CHECK-NEXT:    cntd x9
61; FP-CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
62; FP-CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
63; FP-CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
64; FP-CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
65; FP-CHECK-NEXT:    str x9, [sp, #80] // 8-byte Folded Spill
66; FP-CHECK-NEXT:    add x29, sp, #64
67; FP-CHECK-NEXT:    .cfi_def_cfa w29, 32
68; FP-CHECK-NEXT:    .cfi_offset w30, -24
69; FP-CHECK-NEXT:    .cfi_offset w29, -32
70; FP-CHECK-NEXT:    .cfi_offset b8, -40
71; FP-CHECK-NEXT:    .cfi_offset b9, -48
72; FP-CHECK-NEXT:    .cfi_offset b10, -56
73; FP-CHECK-NEXT:    .cfi_offset b11, -64
74; FP-CHECK-NEXT:    .cfi_offset b12, -72
75; FP-CHECK-NEXT:    .cfi_offset b13, -80
76; FP-CHECK-NEXT:    .cfi_offset b14, -88
77; FP-CHECK-NEXT:    .cfi_offset b15, -96
78; FP-CHECK-NEXT:    .cfi_offset vg, -16
79; FP-CHECK-NEXT:    smstop sm
80; FP-CHECK-NEXT:    bl callee
81; FP-CHECK-NEXT:    smstart sm
82; FP-CHECK-NEXT:    .cfi_restore vg
83; FP-CHECK-NEXT:    .cfi_def_cfa wsp, 96
84; FP-CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
85; FP-CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
86; FP-CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
87; FP-CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
88; FP-CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
89; FP-CHECK-NEXT:    .cfi_def_cfa_offset 0
90; FP-CHECK-NEXT:    .cfi_restore w30
91; FP-CHECK-NEXT:    .cfi_restore w29
92; FP-CHECK-NEXT:    .cfi_restore b8
93; FP-CHECK-NEXT:    .cfi_restore b9
94; FP-CHECK-NEXT:    .cfi_restore b10
95; FP-CHECK-NEXT:    .cfi_restore b11
96; FP-CHECK-NEXT:    .cfi_restore b12
97; FP-CHECK-NEXT:    .cfi_restore b13
98; FP-CHECK-NEXT:    .cfi_restore b14
99; FP-CHECK-NEXT:    .cfi_restore b15
100; FP-CHECK-NEXT:    ret
101;
102; OUTLINER-CHECK-LABEL: vg_unwind_simple:
103; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
104;
105  call void @callee();
106  ret void;
107}
108
109; As above, with an extra register clobbered by the inline asm call which
110; changes NeedsGapToAlignStack to false
111;
112define void @vg_unwind_needs_gap() #0 {
113; CHECK-LABEL: vg_unwind_needs_gap:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
116; CHECK-NEXT:    .cfi_def_cfa_offset 96
117; CHECK-NEXT:    cntd x9
118; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
119; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
120; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
121; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill
122; CHECK-NEXT:    str x20, [sp, #80] // 8-byte Folded Spill
123; CHECK-NEXT:    .cfi_offset w20, -16
124; CHECK-NEXT:    .cfi_offset w30, -32
125; CHECK-NEXT:    .cfi_offset b8, -40
126; CHECK-NEXT:    .cfi_offset b9, -48
127; CHECK-NEXT:    .cfi_offset b10, -56
128; CHECK-NEXT:    .cfi_offset b11, -64
129; CHECK-NEXT:    .cfi_offset b12, -72
130; CHECK-NEXT:    .cfi_offset b13, -80
131; CHECK-NEXT:    .cfi_offset b14, -88
132; CHECK-NEXT:    .cfi_offset b15, -96
133; CHECK-NEXT:    //APP
134; CHECK-NEXT:    //NO_APP
135; CHECK-NEXT:    .cfi_offset vg, -24
136; CHECK-NEXT:    smstop sm
137; CHECK-NEXT:    bl callee
138; CHECK-NEXT:    smstart sm
139; CHECK-NEXT:    .cfi_restore vg
140; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
141; CHECK-NEXT:    ldr x20, [sp, #80] // 8-byte Folded Reload
142; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
143; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
144; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
145; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
146; CHECK-NEXT:    .cfi_def_cfa_offset 0
147; CHECK-NEXT:    .cfi_restore w20
148; CHECK-NEXT:    .cfi_restore w30
149; CHECK-NEXT:    .cfi_restore b8
150; CHECK-NEXT:    .cfi_restore b9
151; CHECK-NEXT:    .cfi_restore b10
152; CHECK-NEXT:    .cfi_restore b11
153; CHECK-NEXT:    .cfi_restore b12
154; CHECK-NEXT:    .cfi_restore b13
155; CHECK-NEXT:    .cfi_restore b14
156; CHECK-NEXT:    .cfi_restore b15
157; CHECK-NEXT:    ret
158;
159; FP-CHECK-LABEL: vg_unwind_needs_gap:
160; FP-CHECK:       // %bb.0:
161; FP-CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
162; FP-CHECK-NEXT:    .cfi_def_cfa_offset 96
163; FP-CHECK-NEXT:    cntd x9
164; FP-CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
165; FP-CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
166; FP-CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
167; FP-CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
168; FP-CHECK-NEXT:    stp x9, x20, [sp, #80] // 16-byte Folded Spill
169; FP-CHECK-NEXT:    add x29, sp, #64
170; FP-CHECK-NEXT:    .cfi_def_cfa w29, 32
171; FP-CHECK-NEXT:    .cfi_offset w20, -8
172; FP-CHECK-NEXT:    .cfi_offset w30, -24
173; FP-CHECK-NEXT:    .cfi_offset w29, -32
174; FP-CHECK-NEXT:    .cfi_offset b8, -40
175; FP-CHECK-NEXT:    .cfi_offset b9, -48
176; FP-CHECK-NEXT:    .cfi_offset b10, -56
177; FP-CHECK-NEXT:    .cfi_offset b11, -64
178; FP-CHECK-NEXT:    .cfi_offset b12, -72
179; FP-CHECK-NEXT:    .cfi_offset b13, -80
180; FP-CHECK-NEXT:    .cfi_offset b14, -88
181; FP-CHECK-NEXT:    .cfi_offset b15, -96
182; FP-CHECK-NEXT:    //APP
183; FP-CHECK-NEXT:    //NO_APP
184; FP-CHECK-NEXT:    .cfi_offset vg, -16
185; FP-CHECK-NEXT:    smstop sm
186; FP-CHECK-NEXT:    bl callee
187; FP-CHECK-NEXT:    smstart sm
188; FP-CHECK-NEXT:    .cfi_restore vg
189; FP-CHECK-NEXT:    .cfi_def_cfa wsp, 96
190; FP-CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
191; FP-CHECK-NEXT:    ldr x20, [sp, #88] // 8-byte Folded Reload
192; FP-CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
193; FP-CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
194; FP-CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
195; FP-CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
196; FP-CHECK-NEXT:    .cfi_def_cfa_offset 0
197; FP-CHECK-NEXT:    .cfi_restore w20
198; FP-CHECK-NEXT:    .cfi_restore w30
199; FP-CHECK-NEXT:    .cfi_restore w29
200; FP-CHECK-NEXT:    .cfi_restore b8
201; FP-CHECK-NEXT:    .cfi_restore b9
202; FP-CHECK-NEXT:    .cfi_restore b10
203; FP-CHECK-NEXT:    .cfi_restore b11
204; FP-CHECK-NEXT:    .cfi_restore b12
205; FP-CHECK-NEXT:    .cfi_restore b13
206; FP-CHECK-NEXT:    .cfi_restore b14
207; FP-CHECK-NEXT:    .cfi_restore b15
208; FP-CHECK-NEXT:    ret
209;
210; OUTLINER-CHECK-LABEL: vg_unwind_needs_gap:
211; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
212;
213  call void asm sideeffect "", "~{x20}"()
214  call void @callee();
215  ret void;
216}
217
218define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 {
219; CHECK-LABEL: vg_unwind_with_fixed_args:
220; CHECK:       // %bb.0:
221; CHECK-NEXT:    sub sp, sp, #96
222; CHECK-NEXT:    .cfi_def_cfa_offset 96
223; CHECK-NEXT:    cntd x9
224; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
225; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
226; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
227; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
228; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill
229; CHECK-NEXT:    .cfi_offset w30, -16
230; CHECK-NEXT:    .cfi_offset b8, -24
231; CHECK-NEXT:    .cfi_offset b9, -32
232; CHECK-NEXT:    .cfi_offset b10, -40
233; CHECK-NEXT:    .cfi_offset b11, -48
234; CHECK-NEXT:    .cfi_offset b12, -56
235; CHECK-NEXT:    .cfi_offset b13, -64
236; CHECK-NEXT:    .cfi_offset b14, -72
237; CHECK-NEXT:    .cfi_offset b15, -80
238; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
239; CHECK-NEXT:    .cfi_offset vg, -8
240; CHECK-NEXT:    smstop sm
241; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
242; CHECK-NEXT:    bl fixed_callee
243; CHECK-NEXT:    smstart sm
244; CHECK-NEXT:    .cfi_restore vg
245; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
246; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
247; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
248; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
249; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
250; CHECK-NEXT:    add sp, sp, #96
251; CHECK-NEXT:    .cfi_def_cfa_offset 0
252; CHECK-NEXT:    .cfi_restore w30
253; CHECK-NEXT:    .cfi_restore b8
254; CHECK-NEXT:    .cfi_restore b9
255; CHECK-NEXT:    .cfi_restore b10
256; CHECK-NEXT:    .cfi_restore b11
257; CHECK-NEXT:    .cfi_restore b12
258; CHECK-NEXT:    .cfi_restore b13
259; CHECK-NEXT:    .cfi_restore b14
260; CHECK-NEXT:    .cfi_restore b15
261; CHECK-NEXT:    ret
262;
263; FP-CHECK-LABEL: vg_unwind_with_fixed_args:
264; FP-CHECK:       // %bb.0:
265; FP-CHECK-NEXT:    sub sp, sp, #112
266; FP-CHECK-NEXT:    .cfi_def_cfa_offset 112
267; FP-CHECK-NEXT:    cntd x9
268; FP-CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
269; FP-CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
270; FP-CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
271; FP-CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
272; FP-CHECK-NEXT:    stp x29, x30, [sp, #80] // 16-byte Folded Spill
273; FP-CHECK-NEXT:    str x9, [sp, #96] // 8-byte Folded Spill
274; FP-CHECK-NEXT:    add x29, sp, #80
275; FP-CHECK-NEXT:    .cfi_def_cfa w29, 32
276; FP-CHECK-NEXT:    .cfi_offset w30, -24
277; FP-CHECK-NEXT:    .cfi_offset w29, -32
278; FP-CHECK-NEXT:    .cfi_offset b8, -40
279; FP-CHECK-NEXT:    .cfi_offset b9, -48
280; FP-CHECK-NEXT:    .cfi_offset b10, -56
281; FP-CHECK-NEXT:    .cfi_offset b11, -64
282; FP-CHECK-NEXT:    .cfi_offset b12, -72
283; FP-CHECK-NEXT:    .cfi_offset b13, -80
284; FP-CHECK-NEXT:    .cfi_offset b14, -88
285; FP-CHECK-NEXT:    .cfi_offset b15, -96
286; FP-CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
287; FP-CHECK-NEXT:    .cfi_offset vg, -16
288; FP-CHECK-NEXT:    smstop sm
289; FP-CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
290; FP-CHECK-NEXT:    bl fixed_callee
291; FP-CHECK-NEXT:    smstart sm
292; FP-CHECK-NEXT:    .cfi_restore vg
293; FP-CHECK-NEXT:    .cfi_def_cfa wsp, 112
294; FP-CHECK-NEXT:    ldp x29, x30, [sp, #80] // 16-byte Folded Reload
295; FP-CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
296; FP-CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
297; FP-CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
298; FP-CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
299; FP-CHECK-NEXT:    add sp, sp, #112
300; FP-CHECK-NEXT:    .cfi_def_cfa_offset 0
301; FP-CHECK-NEXT:    .cfi_restore w30
302; FP-CHECK-NEXT:    .cfi_restore w29
303; FP-CHECK-NEXT:    .cfi_restore b8
304; FP-CHECK-NEXT:    .cfi_restore b9
305; FP-CHECK-NEXT:    .cfi_restore b10
306; FP-CHECK-NEXT:    .cfi_restore b11
307; FP-CHECK-NEXT:    .cfi_restore b12
308; FP-CHECK-NEXT:    .cfi_restore b13
309; FP-CHECK-NEXT:    .cfi_restore b14
310; FP-CHECK-NEXT:    .cfi_restore b15
311; FP-CHECK-NEXT:    ret
312;
313; OUTLINER-CHECK-LABEL: vg_unwind_with_fixed_args:
314; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
315;
316  call void @fixed_callee(<4 x i32> %x);
317  ret void;
318}
319
320define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
321; CHECK-LABEL: vg_unwind_with_sve_args:
322; CHECK:       // %bb.0:
323; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
324; CHECK-NEXT:    .cfi_def_cfa_offset 32
325; CHECK-NEXT:    cntd x9
326; CHECK-NEXT:    stp x9, x28, [sp, #16] // 16-byte Folded Spill
327; CHECK-NEXT:    .cfi_offset w28, -8
328; CHECK-NEXT:    .cfi_offset w30, -24
329; CHECK-NEXT:    .cfi_offset w29, -32
330; CHECK-NEXT:    addvl sp, sp, #-18
331; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
332; CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
333; CHECK-NEXT:    ptrue pn8.b
334; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
335; CHECK-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
336; CHECK-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
337; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
338; CHECK-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
339; CHECK-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
340; CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
341; CHECK-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
342; CHECK-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
343; CHECK-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
344; CHECK-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
345; CHECK-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
346; CHECK-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
347; CHECK-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
348; CHECK-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
349; CHECK-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
350; CHECK-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
351; CHECK-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
352; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
353; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
354; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
355; CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
356; CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
357; CHECK-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 32 - 32 * VG
358; CHECK-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 32 - 40 * VG
359; CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
360; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
361; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
362; CHECK-NEXT:    addvl sp, sp, #-1
363; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG
364; CHECK-NEXT:    str z0, [sp] // 16-byte Folded Spill
365; CHECK-NEXT:    //APP
366; CHECK-NEXT:    //NO_APP
367; CHECK-NEXT:    .cfi_offset vg, -16
368; CHECK-NEXT:    smstop sm
369; CHECK-NEXT:    ldr z0, [sp] // 16-byte Folded Reload
370; CHECK-NEXT:    bl scalable_callee
371; CHECK-NEXT:    smstart sm
372; CHECK-NEXT:    .cfi_restore vg
373; CHECK-NEXT:    addvl sp, sp, #1
374; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
375; CHECK-NEXT:    ptrue pn8.b
376; CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
377; CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
378; CHECK-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
379; CHECK-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
380; CHECK-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
381; CHECK-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
382; CHECK-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
383; CHECK-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
384; CHECK-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
385; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
386; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
387; CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
388; CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
389; CHECK-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
390; CHECK-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
391; CHECK-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
392; CHECK-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
393; CHECK-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
394; CHECK-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
395; CHECK-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
396; CHECK-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
397; CHECK-NEXT:    addvl sp, sp, #18
398; CHECK-NEXT:    .cfi_def_cfa wsp, 32
399; CHECK-NEXT:    .cfi_restore z8
400; CHECK-NEXT:    .cfi_restore z9
401; CHECK-NEXT:    .cfi_restore z10
402; CHECK-NEXT:    .cfi_restore z11
403; CHECK-NEXT:    .cfi_restore z12
404; CHECK-NEXT:    .cfi_restore z13
405; CHECK-NEXT:    .cfi_restore z14
406; CHECK-NEXT:    .cfi_restore z15
407; CHECK-NEXT:    ldr x28, [sp, #24] // 8-byte Folded Reload
408; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
409; CHECK-NEXT:    .cfi_def_cfa_offset 0
410; CHECK-NEXT:    .cfi_restore w28
411; CHECK-NEXT:    .cfi_restore w30
412; CHECK-NEXT:    .cfi_restore w29
413; CHECK-NEXT:    ret
414;
415; FP-CHECK-LABEL: vg_unwind_with_sve_args:
416; FP-CHECK:       // %bb.0:
417; FP-CHECK-NEXT:    stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
418; FP-CHECK-NEXT:    .cfi_def_cfa_offset 48
419; FP-CHECK-NEXT:    cntd x9
420; FP-CHECK-NEXT:    stp x28, x27, [sp, #32] // 16-byte Folded Spill
421; FP-CHECK-NEXT:    str x9, [sp, #16] // 8-byte Folded Spill
422; FP-CHECK-NEXT:    mov x29, sp
423; FP-CHECK-NEXT:    .cfi_def_cfa w29, 48
424; FP-CHECK-NEXT:    .cfi_offset w27, -8
425; FP-CHECK-NEXT:    .cfi_offset w28, -16
426; FP-CHECK-NEXT:    .cfi_offset w30, -40
427; FP-CHECK-NEXT:    .cfi_offset w29, -48
428; FP-CHECK-NEXT:    addvl sp, sp, #-18
429; FP-CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
430; FP-CHECK-NEXT:    ptrue pn8.b
431; FP-CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
432; FP-CHECK-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
433; FP-CHECK-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
434; FP-CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
435; FP-CHECK-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
436; FP-CHECK-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
437; FP-CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
438; FP-CHECK-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
439; FP-CHECK-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
440; FP-CHECK-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
441; FP-CHECK-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
442; FP-CHECK-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
443; FP-CHECK-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
444; FP-CHECK-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
445; FP-CHECK-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
446; FP-CHECK-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
447; FP-CHECK-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
448; FP-CHECK-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
449; FP-CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
450; FP-CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
451; FP-CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
452; FP-CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
453; FP-CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
454; FP-CHECK-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG
455; FP-CHECK-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG
456; FP-CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG
457; FP-CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG
458; FP-CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG
459; FP-CHECK-NEXT:    addvl sp, sp, #-1
460; FP-CHECK-NEXT:    str z0, [x29, #-19, mul vl] // 16-byte Folded Spill
461; FP-CHECK-NEXT:    //APP
462; FP-CHECK-NEXT:    //NO_APP
463; FP-CHECK-NEXT:    .cfi_offset vg, -32
464; FP-CHECK-NEXT:    smstop sm
465; FP-CHECK-NEXT:    ldr z0, [x29, #-19, mul vl] // 16-byte Folded Reload
466; FP-CHECK-NEXT:    bl scalable_callee
467; FP-CHECK-NEXT:    smstart sm
468; FP-CHECK-NEXT:    .cfi_restore vg
469; FP-CHECK-NEXT:    addvl sp, sp, #1
470; FP-CHECK-NEXT:    ptrue pn8.b
471; FP-CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
472; FP-CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
473; FP-CHECK-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
474; FP-CHECK-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
475; FP-CHECK-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
476; FP-CHECK-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
477; FP-CHECK-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
478; FP-CHECK-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
479; FP-CHECK-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
480; FP-CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
481; FP-CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
482; FP-CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
483; FP-CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
484; FP-CHECK-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
485; FP-CHECK-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
486; FP-CHECK-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
487; FP-CHECK-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
488; FP-CHECK-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
489; FP-CHECK-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
490; FP-CHECK-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
491; FP-CHECK-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
492; FP-CHECK-NEXT:    addvl sp, sp, #18
493; FP-CHECK-NEXT:    .cfi_restore z8
494; FP-CHECK-NEXT:    .cfi_restore z9
495; FP-CHECK-NEXT:    .cfi_restore z10
496; FP-CHECK-NEXT:    .cfi_restore z11
497; FP-CHECK-NEXT:    .cfi_restore z12
498; FP-CHECK-NEXT:    .cfi_restore z13
499; FP-CHECK-NEXT:    .cfi_restore z14
500; FP-CHECK-NEXT:    .cfi_restore z15
501; FP-CHECK-NEXT:    .cfi_def_cfa wsp, 48
502; FP-CHECK-NEXT:    ldp x28, x27, [sp, #32] // 16-byte Folded Reload
503; FP-CHECK-NEXT:    ldp x29, x30, [sp], #48 // 16-byte Folded Reload
504; FP-CHECK-NEXT:    .cfi_def_cfa_offset 0
505; FP-CHECK-NEXT:    .cfi_restore w27
506; FP-CHECK-NEXT:    .cfi_restore w28
507; FP-CHECK-NEXT:    .cfi_restore w30
508; FP-CHECK-NEXT:    .cfi_restore w29
509; FP-CHECK-NEXT:    ret
510;
511; OUTLINER-CHECK-LABEL: vg_unwind_with_sve_args:
512; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
513;
514  call void asm sideeffect "", "~{x28}"()
515  call void @scalable_callee(<vscale x 2 x i64> %x);
516  ret void;
517}
518
519; This test was based on stack-probing-64k.ll and tries to test multiple uses of
520; findScratchNonCalleeSaveRegister.
521;
522define void @vg_unwind_multiple_scratch_regs(ptr %out) #1 {
523; CHECK-LABEL: vg_unwind_multiple_scratch_regs:
524; CHECK:       // %bb.0: // %entry
525; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
526; CHECK-NEXT:    .cfi_def_cfa_offset 96
527; CHECK-NEXT:    cntd x9
528; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
529; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
530; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
531; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
532; CHECK-NEXT:    str x9, [sp, #80] // 8-byte Folded Spill
533; CHECK-NEXT:    .cfi_offset w30, -24
534; CHECK-NEXT:    .cfi_offset w29, -32
535; CHECK-NEXT:    .cfi_offset b8, -40
536; CHECK-NEXT:    .cfi_offset b9, -48
537; CHECK-NEXT:    .cfi_offset b10, -56
538; CHECK-NEXT:    .cfi_offset b11, -64
539; CHECK-NEXT:    .cfi_offset b12, -72
540; CHECK-NEXT:    .cfi_offset b13, -80
541; CHECK-NEXT:    .cfi_offset b14, -88
542; CHECK-NEXT:    .cfi_offset b15, -96
543; CHECK-NEXT:    sub x9, sp, #80, lsl #12 // =327680
544; CHECK-NEXT:    .cfi_def_cfa w9, 327776
545; CHECK-NEXT:  .LBB4_1: // %entry
546; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
547; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
548; CHECK-NEXT:    cmp sp, x9
549; CHECK-NEXT:    str xzr, [sp]
550; CHECK-NEXT:    b.ne .LBB4_1
551; CHECK-NEXT:  // %bb.2: // %entry
552; CHECK-NEXT:    .cfi_def_cfa_register wsp
553; CHECK-NEXT:    mov x8, sp
554; CHECK-NEXT:    str x8, [x0]
555; CHECK-NEXT:    .cfi_offset vg, -16
556; CHECK-NEXT:    smstop sm
557; CHECK-NEXT:    bl callee
558; CHECK-NEXT:    smstart sm
559; CHECK-NEXT:    .cfi_restore vg
560; CHECK-NEXT:    add sp, sp, #80, lsl #12 // =327680
561; CHECK-NEXT:    .cfi_def_cfa_offset 96
562; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
563; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
564; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
565; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
566; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
567; CHECK-NEXT:    .cfi_def_cfa_offset 0
568; CHECK-NEXT:    .cfi_restore w30
569; CHECK-NEXT:    .cfi_restore w29
570; CHECK-NEXT:    .cfi_restore b8
571; CHECK-NEXT:    .cfi_restore b9
572; CHECK-NEXT:    .cfi_restore b10
573; CHECK-NEXT:    .cfi_restore b11
574; CHECK-NEXT:    .cfi_restore b12
575; CHECK-NEXT:    .cfi_restore b13
576; CHECK-NEXT:    .cfi_restore b14
577; CHECK-NEXT:    .cfi_restore b15
578; CHECK-NEXT:    ret
579;
580; FP-CHECK-LABEL: vg_unwind_multiple_scratch_regs:
581; FP-CHECK:       // %bb.0: // %entry
582; FP-CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
583; FP-CHECK-NEXT:    .cfi_def_cfa_offset 96
584; FP-CHECK-NEXT:    cntd x9
585; FP-CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
586; FP-CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
587; FP-CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
588; FP-CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
589; FP-CHECK-NEXT:    stp x9, x28, [sp, #80] // 16-byte Folded Spill
590; FP-CHECK-NEXT:    add x29, sp, #64
591; FP-CHECK-NEXT:    .cfi_def_cfa w29, 32
592; FP-CHECK-NEXT:    .cfi_offset w28, -8
593; FP-CHECK-NEXT:    .cfi_offset w30, -24
594; FP-CHECK-NEXT:    .cfi_offset w29, -32
595; FP-CHECK-NEXT:    .cfi_offset b8, -40
596; FP-CHECK-NEXT:    .cfi_offset b9, -48
597; FP-CHECK-NEXT:    .cfi_offset b10, -56
598; FP-CHECK-NEXT:    .cfi_offset b11, -64
599; FP-CHECK-NEXT:    .cfi_offset b12, -72
600; FP-CHECK-NEXT:    .cfi_offset b13, -80
601; FP-CHECK-NEXT:    .cfi_offset b14, -88
602; FP-CHECK-NEXT:    .cfi_offset b15, -96
603; FP-CHECK-NEXT:    sub x9, sp, #80, lsl #12 // =327680
604; FP-CHECK-NEXT:  .LBB4_1: // %entry
605; FP-CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
606; FP-CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
607; FP-CHECK-NEXT:    cmp sp, x9
608; FP-CHECK-NEXT:    str xzr, [sp]
609; FP-CHECK-NEXT:    b.ne .LBB4_1
610; FP-CHECK-NEXT:  // %bb.2: // %entry
611; FP-CHECK-NEXT:    mov x8, sp
612; FP-CHECK-NEXT:    str x8, [x0]
613; FP-CHECK-NEXT:    .cfi_offset vg, -16
614; FP-CHECK-NEXT:    smstop sm
615; FP-CHECK-NEXT:    bl callee
616; FP-CHECK-NEXT:    smstart sm
617; FP-CHECK-NEXT:    .cfi_restore vg
618; FP-CHECK-NEXT:    add sp, sp, #80, lsl #12 // =327680
619; FP-CHECK-NEXT:    .cfi_def_cfa wsp, 96
620; FP-CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
621; FP-CHECK-NEXT:    ldr x28, [sp, #88] // 8-byte Folded Reload
622; FP-CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
623; FP-CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
624; FP-CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
625; FP-CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
626; FP-CHECK-NEXT:    .cfi_def_cfa_offset 0
627; FP-CHECK-NEXT:    .cfi_restore w28
628; FP-CHECK-NEXT:    .cfi_restore w30
629; FP-CHECK-NEXT:    .cfi_restore w29
630; FP-CHECK-NEXT:    .cfi_restore b8
631; FP-CHECK-NEXT:    .cfi_restore b9
632; FP-CHECK-NEXT:    .cfi_restore b10
633; FP-CHECK-NEXT:    .cfi_restore b11
634; FP-CHECK-NEXT:    .cfi_restore b12
635; FP-CHECK-NEXT:    .cfi_restore b13
636; FP-CHECK-NEXT:    .cfi_restore b14
637; FP-CHECK-NEXT:    .cfi_restore b15
638; FP-CHECK-NEXT:    ret
639;
640; OUTLINER-CHECK-LABEL: vg_unwind_multiple_scratch_regs:
641; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
642;
643entry:
644  %v = alloca i8, i64 327680, align 1
645  store ptr %v, ptr %out, align 8
646  call void @callee()
647  ret void
648}
649
650; Locally streaming functions require storing both the streaming and
651; non-streaming values of VG.
652;
653define void @vg_locally_streaming_fn() #3 {
654; CHECK-LABEL: vg_locally_streaming_fn:
655; CHECK:       // %bb.0:
656; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
657; CHECK-NEXT:    .cfi_def_cfa_offset 96
658; CHECK-NEXT:    rdsvl x9, #1
659; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
660; CHECK-NEXT:    lsr x9, x9, #3
661; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
662; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
663; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill
664; CHECK-NEXT:    cntd x9
665; CHECK-NEXT:    str x9, [sp, #80] // 8-byte Folded Spill
666; CHECK-NEXT:    .cfi_offset vg, -16
667; CHECK-NEXT:    .cfi_offset w30, -32
668; CHECK-NEXT:    .cfi_offset b8, -40
669; CHECK-NEXT:    .cfi_offset b9, -48
670; CHECK-NEXT:    .cfi_offset b10, -56
671; CHECK-NEXT:    .cfi_offset b11, -64
672; CHECK-NEXT:    .cfi_offset b12, -72
673; CHECK-NEXT:    .cfi_offset b13, -80
674; CHECK-NEXT:    .cfi_offset b14, -88
675; CHECK-NEXT:    .cfi_offset b15, -96
676; CHECK-NEXT:    bl callee
677; CHECK-NEXT:    smstart sm
678; CHECK-NEXT:    .cfi_restore vg
679; CHECK-NEXT:    bl streaming_callee
680; CHECK-NEXT:    .cfi_offset vg, -24
681; CHECK-NEXT:    smstop sm
682; CHECK-NEXT:    bl callee
683; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
684; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
685; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
686; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
687; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
688; CHECK-NEXT:    .cfi_def_cfa_offset 0
689; CHECK-NEXT:    .cfi_restore w30
690; CHECK-NEXT:    .cfi_restore b8
691; CHECK-NEXT:    .cfi_restore b9
692; CHECK-NEXT:    .cfi_restore b10
693; CHECK-NEXT:    .cfi_restore b11
694; CHECK-NEXT:    .cfi_restore b12
695; CHECK-NEXT:    .cfi_restore b13
696; CHECK-NEXT:    .cfi_restore b14
697; CHECK-NEXT:    .cfi_restore b15
698; CHECK-NEXT:    ret
699;
700; FP-CHECK-LABEL: vg_locally_streaming_fn:
701; FP-CHECK:       // %bb.0:
702; FP-CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
703; FP-CHECK-NEXT:    .cfi_def_cfa_offset 96
704; FP-CHECK-NEXT:    rdsvl x9, #1
705; FP-CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
706; FP-CHECK-NEXT:    lsr x9, x9, #3
707; FP-CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
708; FP-CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
709; FP-CHECK-NEXT:    str x9, [sp, #80] // 8-byte Folded Spill
710; FP-CHECK-NEXT:    cntd x9
711; FP-CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
712; FP-CHECK-NEXT:    str x9, [sp, #88] // 8-byte Folded Spill
713; FP-CHECK-NEXT:    add x29, sp, #64
714; FP-CHECK-NEXT:    .cfi_def_cfa w29, 32
715; FP-CHECK-NEXT:    .cfi_offset vg, -8
716; FP-CHECK-NEXT:    .cfi_offset w30, -24
717; FP-CHECK-NEXT:    .cfi_offset w29, -32
718; FP-CHECK-NEXT:    .cfi_offset b8, -40
719; FP-CHECK-NEXT:    .cfi_offset b9, -48
720; FP-CHECK-NEXT:    .cfi_offset b10, -56
721; FP-CHECK-NEXT:    .cfi_offset b11, -64
722; FP-CHECK-NEXT:    .cfi_offset b12, -72
723; FP-CHECK-NEXT:    .cfi_offset b13, -80
724; FP-CHECK-NEXT:    .cfi_offset b14, -88
725; FP-CHECK-NEXT:    .cfi_offset b15, -96
726; FP-CHECK-NEXT:    bl callee
727; FP-CHECK-NEXT:    smstart sm
728; FP-CHECK-NEXT:    .cfi_restore vg
729; FP-CHECK-NEXT:    bl streaming_callee
730; FP-CHECK-NEXT:    .cfi_offset vg, -16
731; FP-CHECK-NEXT:    smstop sm
732; FP-CHECK-NEXT:    bl callee
733; FP-CHECK-NEXT:    .cfi_def_cfa wsp, 96
734; FP-CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
735; FP-CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
736; FP-CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
737; FP-CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
738; FP-CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
739; FP-CHECK-NEXT:    .cfi_def_cfa_offset 0
740; FP-CHECK-NEXT:    .cfi_restore w30
741; FP-CHECK-NEXT:    .cfi_restore w29
742; FP-CHECK-NEXT:    .cfi_restore b8
743; FP-CHECK-NEXT:    .cfi_restore b9
744; FP-CHECK-NEXT:    .cfi_restore b10
745; FP-CHECK-NEXT:    .cfi_restore b11
746; FP-CHECK-NEXT:    .cfi_restore b12
747; FP-CHECK-NEXT:    .cfi_restore b13
748; FP-CHECK-NEXT:    .cfi_restore b14
749; FP-CHECK-NEXT:    .cfi_restore b15
750; FP-CHECK-NEXT:    ret
751;
752; OUTLINER-CHECK-LABEL: vg_locally_streaming_fn:
753; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
754;
755  call void @callee()
756  call void @streaming_callee()
757  call void @callee()
758  ret void
759}
760
761define void @streaming_compatible_to_streaming() #4 {
762; CHECK-LABEL: streaming_compatible_to_streaming:
763; CHECK:       // %bb.0:
764; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
765; CHECK-NEXT:    .cfi_def_cfa_offset 96
766; CHECK-NEXT:    cntd x9
767; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
768; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
769; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
770; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill
771; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill
772; CHECK-NEXT:    .cfi_offset w19, -16
773; CHECK-NEXT:    .cfi_offset w30, -32
774; CHECK-NEXT:    .cfi_offset b8, -40
775; CHECK-NEXT:    .cfi_offset b9, -48
776; CHECK-NEXT:    .cfi_offset b10, -56
777; CHECK-NEXT:    .cfi_offset b11, -64
778; CHECK-NEXT:    .cfi_offset b12, -72
779; CHECK-NEXT:    .cfi_offset b13, -80
780; CHECK-NEXT:    .cfi_offset b14, -88
781; CHECK-NEXT:    .cfi_offset b15, -96
782; CHECK-NEXT:    bl __arm_sme_state
783; CHECK-NEXT:    and x19, x0, #0x1
784; CHECK-NEXT:    .cfi_offset vg, -24
785; CHECK-NEXT:    tbnz w19, #0, .LBB6_2
786; CHECK-NEXT:  // %bb.1:
787; CHECK-NEXT:    smstart sm
788; CHECK-NEXT:  .LBB6_2:
789; CHECK-NEXT:    bl streaming_callee
790; CHECK-NEXT:    tbnz w19, #0, .LBB6_4
791; CHECK-NEXT:  // %bb.3:
792; CHECK-NEXT:    smstop sm
793; CHECK-NEXT:  .LBB6_4:
794; CHECK-NEXT:    .cfi_restore vg
795; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
796; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload
797; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
798; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
799; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
800; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
801; CHECK-NEXT:    .cfi_def_cfa_offset 0
802; CHECK-NEXT:    .cfi_restore w19
803; CHECK-NEXT:    .cfi_restore w30
804; CHECK-NEXT:    .cfi_restore b8
805; CHECK-NEXT:    .cfi_restore b9
806; CHECK-NEXT:    .cfi_restore b10
807; CHECK-NEXT:    .cfi_restore b11
808; CHECK-NEXT:    .cfi_restore b12
809; CHECK-NEXT:    .cfi_restore b13
810; CHECK-NEXT:    .cfi_restore b14
811; CHECK-NEXT:    .cfi_restore b15
812; CHECK-NEXT:    ret
813;
814; FP-CHECK-LABEL: streaming_compatible_to_streaming:
815; FP-CHECK:       // %bb.0:
816; FP-CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
817; FP-CHECK-NEXT:    .cfi_def_cfa_offset 96
818; FP-CHECK-NEXT:    cntd x9
819; FP-CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
820; FP-CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
821; FP-CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
822; FP-CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
823; FP-CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
824; FP-CHECK-NEXT:    add x29, sp, #64
825; FP-CHECK-NEXT:    .cfi_def_cfa w29, 32
826; FP-CHECK-NEXT:    .cfi_offset w19, -8
827; FP-CHECK-NEXT:    .cfi_offset w30, -24
828; FP-CHECK-NEXT:    .cfi_offset w29, -32
829; FP-CHECK-NEXT:    .cfi_offset b8, -40
830; FP-CHECK-NEXT:    .cfi_offset b9, -48
831; FP-CHECK-NEXT:    .cfi_offset b10, -56
832; FP-CHECK-NEXT:    .cfi_offset b11, -64
833; FP-CHECK-NEXT:    .cfi_offset b12, -72
834; FP-CHECK-NEXT:    .cfi_offset b13, -80
835; FP-CHECK-NEXT:    .cfi_offset b14, -88
836; FP-CHECK-NEXT:    .cfi_offset b15, -96
837; FP-CHECK-NEXT:    bl __arm_sme_state
838; FP-CHECK-NEXT:    and x19, x0, #0x1
839; FP-CHECK-NEXT:    .cfi_offset vg, -16
840; FP-CHECK-NEXT:    tbnz w19, #0, .LBB6_2
841; FP-CHECK-NEXT:  // %bb.1:
842; FP-CHECK-NEXT:    smstart sm
843; FP-CHECK-NEXT:  .LBB6_2:
844; FP-CHECK-NEXT:    bl streaming_callee
845; FP-CHECK-NEXT:    tbnz w19, #0, .LBB6_4
846; FP-CHECK-NEXT:  // %bb.3:
847; FP-CHECK-NEXT:    smstop sm
848; FP-CHECK-NEXT:  .LBB6_4:
849; FP-CHECK-NEXT:    .cfi_restore vg
850; FP-CHECK-NEXT:    .cfi_def_cfa wsp, 96
851; FP-CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
852; FP-CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload
853; FP-CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
854; FP-CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
855; FP-CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
856; FP-CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
857; FP-CHECK-NEXT:    .cfi_def_cfa_offset 0
858; FP-CHECK-NEXT:    .cfi_restore w19
859; FP-CHECK-NEXT:    .cfi_restore w30
860; FP-CHECK-NEXT:    .cfi_restore w29
861; FP-CHECK-NEXT:    .cfi_restore b8
862; FP-CHECK-NEXT:    .cfi_restore b9
863; FP-CHECK-NEXT:    .cfi_restore b10
864; FP-CHECK-NEXT:    .cfi_restore b11
865; FP-CHECK-NEXT:    .cfi_restore b12
866; FP-CHECK-NEXT:    .cfi_restore b13
867; FP-CHECK-NEXT:    .cfi_restore b14
868; FP-CHECK-NEXT:    .cfi_restore b15
869; FP-CHECK-NEXT:    ret
870;
871; OUTLINER-CHECK-LABEL: streaming_compatible_to_streaming:
872; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
873;
874  call void @streaming_callee()
875  ret void
876}
877
878define void @streaming_compatible_to_non_streaming() #4 {
879; CHECK-LABEL: streaming_compatible_to_non_streaming:
880; CHECK:       // %bb.0:
881; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
882; CHECK-NEXT:    .cfi_def_cfa_offset 96
883; CHECK-NEXT:    cntd x9
884; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
885; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
886; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
887; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill
888; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill
889; CHECK-NEXT:    .cfi_offset w19, -16
890; CHECK-NEXT:    .cfi_offset w30, -32
891; CHECK-NEXT:    .cfi_offset b8, -40
892; CHECK-NEXT:    .cfi_offset b9, -48
893; CHECK-NEXT:    .cfi_offset b10, -56
894; CHECK-NEXT:    .cfi_offset b11, -64
895; CHECK-NEXT:    .cfi_offset b12, -72
896; CHECK-NEXT:    .cfi_offset b13, -80
897; CHECK-NEXT:    .cfi_offset b14, -88
898; CHECK-NEXT:    .cfi_offset b15, -96
899; CHECK-NEXT:    bl __arm_sme_state
900; CHECK-NEXT:    and x19, x0, #0x1
901; CHECK-NEXT:    .cfi_offset vg, -24
902; CHECK-NEXT:    tbz w19, #0, .LBB7_2
903; CHECK-NEXT:  // %bb.1:
904; CHECK-NEXT:    smstop sm
905; CHECK-NEXT:  .LBB7_2:
906; CHECK-NEXT:    bl callee
907; CHECK-NEXT:    tbz w19, #0, .LBB7_4
908; CHECK-NEXT:  // %bb.3:
909; CHECK-NEXT:    smstart sm
910; CHECK-NEXT:  .LBB7_4:
911; CHECK-NEXT:    .cfi_restore vg
912; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
913; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload
914; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
915; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
916; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
917; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
918; CHECK-NEXT:    .cfi_def_cfa_offset 0
919; CHECK-NEXT:    .cfi_restore w19
920; CHECK-NEXT:    .cfi_restore w30
921; CHECK-NEXT:    .cfi_restore b8
922; CHECK-NEXT:    .cfi_restore b9
923; CHECK-NEXT:    .cfi_restore b10
924; CHECK-NEXT:    .cfi_restore b11
925; CHECK-NEXT:    .cfi_restore b12
926; CHECK-NEXT:    .cfi_restore b13
927; CHECK-NEXT:    .cfi_restore b14
928; CHECK-NEXT:    .cfi_restore b15
929; CHECK-NEXT:    ret
930;
931; FP-CHECK-LABEL: streaming_compatible_to_non_streaming:
932; FP-CHECK:       // %bb.0:
933; FP-CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
934; FP-CHECK-NEXT:    .cfi_def_cfa_offset 96
935; FP-CHECK-NEXT:    cntd x9
936; FP-CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
937; FP-CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
938; FP-CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
939; FP-CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
940; FP-CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
941; FP-CHECK-NEXT:    add x29, sp, #64
942; FP-CHECK-NEXT:    .cfi_def_cfa w29, 32
943; FP-CHECK-NEXT:    .cfi_offset w19, -8
944; FP-CHECK-NEXT:    .cfi_offset w30, -24
945; FP-CHECK-NEXT:    .cfi_offset w29, -32
946; FP-CHECK-NEXT:    .cfi_offset b8, -40
947; FP-CHECK-NEXT:    .cfi_offset b9, -48
948; FP-CHECK-NEXT:    .cfi_offset b10, -56
949; FP-CHECK-NEXT:    .cfi_offset b11, -64
950; FP-CHECK-NEXT:    .cfi_offset b12, -72
951; FP-CHECK-NEXT:    .cfi_offset b13, -80
952; FP-CHECK-NEXT:    .cfi_offset b14, -88
953; FP-CHECK-NEXT:    .cfi_offset b15, -96
954; FP-CHECK-NEXT:    bl __arm_sme_state
955; FP-CHECK-NEXT:    and x19, x0, #0x1
956; FP-CHECK-NEXT:    .cfi_offset vg, -16
957; FP-CHECK-NEXT:    tbz w19, #0, .LBB7_2
958; FP-CHECK-NEXT:  // %bb.1:
959; FP-CHECK-NEXT:    smstop sm
960; FP-CHECK-NEXT:  .LBB7_2:
961; FP-CHECK-NEXT:    bl callee
962; FP-CHECK-NEXT:    tbz w19, #0, .LBB7_4
963; FP-CHECK-NEXT:  // %bb.3:
964; FP-CHECK-NEXT:    smstart sm
965; FP-CHECK-NEXT:  .LBB7_4:
966; FP-CHECK-NEXT:    .cfi_restore vg
967; FP-CHECK-NEXT:    .cfi_def_cfa wsp, 96
968; FP-CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
969; FP-CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload
970; FP-CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
971; FP-CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
972; FP-CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
973; FP-CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
974; FP-CHECK-NEXT:    .cfi_def_cfa_offset 0
975; FP-CHECK-NEXT:    .cfi_restore w19
976; FP-CHECK-NEXT:    .cfi_restore w30
977; FP-CHECK-NEXT:    .cfi_restore w29
978; FP-CHECK-NEXT:    .cfi_restore b8
979; FP-CHECK-NEXT:    .cfi_restore b9
980; FP-CHECK-NEXT:    .cfi_restore b10
981; FP-CHECK-NEXT:    .cfi_restore b11
982; FP-CHECK-NEXT:    .cfi_restore b12
983; FP-CHECK-NEXT:    .cfi_restore b13
984; FP-CHECK-NEXT:    .cfi_restore b14
985; FP-CHECK-NEXT:    .cfi_restore b15
986; FP-CHECK-NEXT:    ret
987;
988; OUTLINER-CHECK-LABEL: streaming_compatible_to_non_streaming:
989; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
990;
991  call void @callee()
992  ret void
993}
994
995; If the target does not have SVE, do not emit cntd in the prologue and
996; instead spill the result returned by __arm_get_current_vg.
997; This requires preserving the argument %x as the vg value is returned
998; in X0.
999;
1000define void @streaming_compatible_no_sve(i32 noundef %x) #4 {
1001; NO-SVE-CHECK-LABEL: streaming_compatible_no_sve:
1002; NO-SVE-CHECK:       // %bb.0:
1003; NO-SVE-CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
1004; NO-SVE-CHECK-NEXT:    .cfi_def_cfa_offset 96
1005; NO-SVE-CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
1006; NO-SVE-CHECK-NEXT:    mov x9, x0
1007; NO-SVE-CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
1008; NO-SVE-CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
1009; NO-SVE-CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
1010; NO-SVE-CHECK-NEXT:    bl __arm_get_current_vg
1011; NO-SVE-CHECK-NEXT:    stp x0, x19, [sp, #80] // 16-byte Folded Spill
1012; NO-SVE-CHECK-NEXT:    mov x0, x9
1013; NO-SVE-CHECK-NEXT:    add x29, sp, #64
1014; NO-SVE-CHECK-NEXT:    .cfi_def_cfa w29, 32
1015; NO-SVE-CHECK-NEXT:    .cfi_offset w19, -8
1016; NO-SVE-CHECK-NEXT:    .cfi_offset w30, -24
1017; NO-SVE-CHECK-NEXT:    .cfi_offset w29, -32
1018; NO-SVE-CHECK-NEXT:    .cfi_offset b8, -40
1019; NO-SVE-CHECK-NEXT:    .cfi_offset b9, -48
1020; NO-SVE-CHECK-NEXT:    .cfi_offset b10, -56
1021; NO-SVE-CHECK-NEXT:    .cfi_offset b11, -64
1022; NO-SVE-CHECK-NEXT:    .cfi_offset b12, -72
1023; NO-SVE-CHECK-NEXT:    .cfi_offset b13, -80
1024; NO-SVE-CHECK-NEXT:    .cfi_offset b14, -88
1025; NO-SVE-CHECK-NEXT:    .cfi_offset b15, -96
1026; NO-SVE-CHECK-NEXT:    mov w8, w0
1027; NO-SVE-CHECK-NEXT:    bl __arm_sme_state
1028; NO-SVE-CHECK-NEXT:    and x19, x0, #0x1
1029; NO-SVE-CHECK-NEXT:    .cfi_offset vg, -16
1030; NO-SVE-CHECK-NEXT:    tbnz w19, #0, .LBB8_2
1031; NO-SVE-CHECK-NEXT:  // %bb.1:
1032; NO-SVE-CHECK-NEXT:    smstart sm
1033; NO-SVE-CHECK-NEXT:  .LBB8_2:
1034; NO-SVE-CHECK-NEXT:    mov w0, w8
1035; NO-SVE-CHECK-NEXT:    bl streaming_callee_with_arg
1036; NO-SVE-CHECK-NEXT:    tbnz w19, #0, .LBB8_4
1037; NO-SVE-CHECK-NEXT:  // %bb.3:
1038; NO-SVE-CHECK-NEXT:    smstop sm
1039; NO-SVE-CHECK-NEXT:  .LBB8_4:
1040; NO-SVE-CHECK-NEXT:    .cfi_restore vg
1041; NO-SVE-CHECK-NEXT:    .cfi_def_cfa wsp, 96
1042; NO-SVE-CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
1043; NO-SVE-CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload
1044; NO-SVE-CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1045; NO-SVE-CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1046; NO-SVE-CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1047; NO-SVE-CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
1048; NO-SVE-CHECK-NEXT:    .cfi_def_cfa_offset 0
1049; NO-SVE-CHECK-NEXT:    .cfi_restore w19
1050; NO-SVE-CHECK-NEXT:    .cfi_restore w30
1051; NO-SVE-CHECK-NEXT:    .cfi_restore w29
1052; NO-SVE-CHECK-NEXT:    .cfi_restore b8
1053; NO-SVE-CHECK-NEXT:    .cfi_restore b9
1054; NO-SVE-CHECK-NEXT:    .cfi_restore b10
1055; NO-SVE-CHECK-NEXT:    .cfi_restore b11
1056; NO-SVE-CHECK-NEXT:    .cfi_restore b12
1057; NO-SVE-CHECK-NEXT:    .cfi_restore b13
1058; NO-SVE-CHECK-NEXT:    .cfi_restore b14
1059; NO-SVE-CHECK-NEXT:    .cfi_restore b15
1060; NO-SVE-CHECK-NEXT:    ret
1061;
1062; OUTLINER-CHECK-LABEL: streaming_compatible_no_sve:
1063; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
1064;
1065  call void @streaming_callee_with_arg(i32 %x)
1066  ret void
1067}
1068
1069; The algorithm that fixes up the offsets of the callee-save/restore
1070; instructions must jump over the instructions that instantiate the current
1071; 'VG' value. We must make sure that it doesn't consider any RDSVL in
1072; user-code as if it is part of the frame-setup when doing so.
1073define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind {
1074; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue:
1075; NO-SVE-CHECK:     // %bb.0:
1076; NO-SVE-CHECK-NEXT: stp     d15, d14, [sp, #-96]!           // 16-byte Folded Spill
1077; NO-SVE-CHECK-NEXT: stp     d13, d12, [sp, #16]             // 16-byte Folded Spill
1078; NO-SVE-CHECK-NEXT: mov     x9, x0
1079; NO-SVE-CHECK-NEXT: stp     d11, d10, [sp, #32]             // 16-byte Folded Spill
1080; NO-SVE-CHECK-NEXT: stp     d9, d8, [sp, #48]               // 16-byte Folded Spill
1081; NO-SVE-CHECK-NEXT: stp     x29, x30, [sp, #64]             // 16-byte Folded Spill
1082; NO-SVE-CHECK-NEXT: bl      __arm_get_current_vg
1083; NO-SVE-CHECK-NEXT: str     x0, [sp, #80]                   // 8-byte Folded Spill
1084; NO-SVE-CHECK-NEXT: mov     x0, x9
1085; NO-SVE-CHECK-NEXT: rdsvl   x8, #1
1086; NO-SVE-CHECK-NEXT: add     x29, sp, #64
1087; NO-SVE-CHECK-NEXT: lsr     x8, x8, #3
1088; NO-SVE-CHECK-NEXT: mov     x1, x0
1089; NO-SVE-CHECK-NEXT: smstart sm
1090; NO-SVE-CHECK-NEXT: mov     x0, x8
1091; NO-SVE-CHECK-NEXT: bl      bar
1092; NO-SVE-CHECK-NEXT: smstop  sm
1093; NO-SVE-CHECK-NEXT: ldp     x29, x30, [sp, #64]             // 16-byte Folded Reload
1094; NO-SVE-CHECK-NEXT: ldp     d9, d8, [sp, #48]               // 16-byte Folded Reload
1095; NO-SVE-CHECK-NEXT: ldp     d11, d10, [sp, #32]             // 16-byte Folded Reload
1096; NO-SVE-CHECK-NEXT: ldp     d13, d12, [sp, #16]             // 16-byte Folded Reload
1097; NO-SVE-CHECK-NEXT: ldp     d15, d14, [sp], #96             // 16-byte Folded Reload
1098; NO-SVE-CHECK-NEXT: ret
1099  %some_alloc = alloca i64, align 8
1100  %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd()
1101  call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled"
1102  ret void
1103}
1104
1105declare void @bar(i64, i64)
1106
1107; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables
1108; if the function contains a streaming-mode change.
1109
1110define void @vg_unwind_noasync() #5 {
1111; CHECK-LABEL: vg_unwind_noasync:
1112; CHECK:       // %bb.0:
1113; CHECK-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
1114; CHECK-NEXT:    .cfi_def_cfa_offset 80
1115; CHECK-NEXT:    cntd x9
1116; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
1117; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
1118; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
1119; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill
1120; CHECK-NEXT:    .cfi_offset w30, -16
1121; CHECK-NEXT:    .cfi_offset b8, -24
1122; CHECK-NEXT:    .cfi_offset b9, -32
1123; CHECK-NEXT:    .cfi_offset b10, -40
1124; CHECK-NEXT:    .cfi_offset b11, -48
1125; CHECK-NEXT:    .cfi_offset b12, -56
1126; CHECK-NEXT:    .cfi_offset b13, -64
1127; CHECK-NEXT:    .cfi_offset b14, -72
1128; CHECK-NEXT:    .cfi_offset b15, -80
1129; CHECK-NEXT:    .cfi_offset vg, -8
1130; CHECK-NEXT:    smstop sm
1131; CHECK-NEXT:    bl callee
1132; CHECK-NEXT:    smstart sm
1133; CHECK-NEXT:    .cfi_restore vg
1134; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1135; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
1136; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1137; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1138; CHECK-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
1139; CHECK-NEXT:    .cfi_def_cfa_offset 0
1140; CHECK-NEXT:    .cfi_restore w30
1141; CHECK-NEXT:    .cfi_restore b8
1142; CHECK-NEXT:    .cfi_restore b9
1143; CHECK-NEXT:    .cfi_restore b10
1144; CHECK-NEXT:    .cfi_restore b11
1145; CHECK-NEXT:    .cfi_restore b12
1146; CHECK-NEXT:    .cfi_restore b13
1147; CHECK-NEXT:    .cfi_restore b14
1148; CHECK-NEXT:    .cfi_restore b15
1149; CHECK-NEXT:    ret
1150;
1151; FP-CHECK-LABEL: vg_unwind_noasync:
1152; FP-CHECK:       // %bb.0:
1153; FP-CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
1154; FP-CHECK-NEXT:    .cfi_def_cfa_offset 96
1155; FP-CHECK-NEXT:    cntd x9
1156; FP-CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
1157; FP-CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
1158; FP-CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
1159; FP-CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
1160; FP-CHECK-NEXT:    str x9, [sp, #80] // 8-byte Folded Spill
1161; FP-CHECK-NEXT:    add x29, sp, #64
1162; FP-CHECK-NEXT:    .cfi_def_cfa w29, 32
1163; FP-CHECK-NEXT:    .cfi_offset w30, -24
1164; FP-CHECK-NEXT:    .cfi_offset w29, -32
1165; FP-CHECK-NEXT:    .cfi_offset b8, -40
1166; FP-CHECK-NEXT:    .cfi_offset b9, -48
1167; FP-CHECK-NEXT:    .cfi_offset b10, -56
1168; FP-CHECK-NEXT:    .cfi_offset b11, -64
1169; FP-CHECK-NEXT:    .cfi_offset b12, -72
1170; FP-CHECK-NEXT:    .cfi_offset b13, -80
1171; FP-CHECK-NEXT:    .cfi_offset b14, -88
1172; FP-CHECK-NEXT:    .cfi_offset b15, -96
1173; FP-CHECK-NEXT:    .cfi_offset vg, -16
1174; FP-CHECK-NEXT:    smstop sm
1175; FP-CHECK-NEXT:    bl callee
1176; FP-CHECK-NEXT:    smstart sm
1177; FP-CHECK-NEXT:    .cfi_restore vg
1178; FP-CHECK-NEXT:    .cfi_def_cfa wsp, 96
1179; FP-CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
1180; FP-CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1181; FP-CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1182; FP-CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1183; FP-CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
1184; FP-CHECK-NEXT:    .cfi_def_cfa_offset 0
1185; FP-CHECK-NEXT:    .cfi_restore w30
1186; FP-CHECK-NEXT:    .cfi_restore w29
1187; FP-CHECK-NEXT:    .cfi_restore b8
1188; FP-CHECK-NEXT:    .cfi_restore b9
1189; FP-CHECK-NEXT:    .cfi_restore b10
1190; FP-CHECK-NEXT:    .cfi_restore b11
1191; FP-CHECK-NEXT:    .cfi_restore b12
1192; FP-CHECK-NEXT:    .cfi_restore b13
1193; FP-CHECK-NEXT:    .cfi_restore b14
1194; FP-CHECK-NEXT:    .cfi_restore b15
1195; FP-CHECK-NEXT:    ret
1196; OUTLINER-CHECK-LABEL: vg_unwind_noasync:
1197; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
1198;
1199  call void @callee();
1200  ret void;
1201}
1202
1203attributes #0 = { "aarch64_pstate_sm_enabled" uwtable(async) }
1204attributes #1 = { "probe-stack"="inline-asm" "aarch64_pstate_sm_enabled" uwtable(async) }
1205attributes #3 = { "aarch64_pstate_sm_body" uwtable(async) }
1206attributes #4 = { "aarch64_pstate_sm_compatible" uwtable(async) }
1207attributes #5 = { "aarch64_pstate_sm_enabled" }
1208