xref: /llvm-project/llvm/test/CodeGen/AArch64/stack-hazard.ll (revision 5248e1d4e1ee950d1703ec57141257b02446dd67)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefixes=CHECK,CHECK0
3; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=64 | FileCheck %s --check-prefixes=CHECK,CHECK64
4; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024
5
6define i32 @basic(i32 noundef %num) {
7; CHECK-LABEL: basic:
8; CHECK:       // %bb.0: // %entry
9; CHECK-NEXT:    mov w0, wzr
10; CHECK-NEXT:    ret
11entry:
12  ret i32 0
13}
14
15; Non-streaming functions don't need hazards
16define i32 @csr_d8_notsc(i32 noundef %num) {
17; CHECK-LABEL: csr_d8_notsc:
18; CHECK:       // %bb.0: // %entry
19; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
20; CHECK-NEXT:    .cfi_def_cfa_offset 16
21; CHECK-NEXT:    .cfi_offset b8, -16
22; CHECK-NEXT:    mov w0, wzr
23; CHECK-NEXT:    //APP
24; CHECK-NEXT:    //NO_APP
25; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
26; CHECK-NEXT:    ret
27entry:
28  tail call void asm sideeffect "", "~{d8}"() #1
29  ret i32 0
30}
31
32; Very simple - doesn't require hazards
33define i32 @basic_sc(i32 noundef %num) "aarch64_pstate_sm_compatible" {
34; CHECK-LABEL: basic_sc:
35; CHECK:       // %bb.0: // %entry
36; CHECK-NEXT:    mov w0, wzr
37; CHECK-NEXT:    ret
38entry:
39  ret i32 0
40}
41
42; No fpr accesses/csrs - doesn't require hazards
43define i32 @nocsr_alloci64(i64 %d) "aarch64_pstate_sm_compatible" {
44; CHECK-LABEL: nocsr_alloci64:
45; CHECK:       // %bb.0: // %entry
46; CHECK-NEXT:    sub sp, sp, #16
47; CHECK-NEXT:    .cfi_def_cfa_offset 16
48; CHECK-NEXT:    mov x8, x0
49; CHECK-NEXT:    mov w0, wzr
50; CHECK-NEXT:    str x8, [sp, #8]
51; CHECK-NEXT:    add sp, sp, #16
52; CHECK-NEXT:    ret
53entry:
54  %a = alloca i64
55  store i64 %d, ptr %a
56  ret i32 0
57}
58
59; No fpr accesses/csrs - doesn't require hazards
60define i32 @csr_x20(i32 noundef %num) "aarch64_pstate_sm_compatible" {
61; CHECK-LABEL: csr_x20:
62; CHECK:       // %bb.0: // %entry
63; CHECK-NEXT:    str x20, [sp, #-16]! // 8-byte Folded Spill
64; CHECK-NEXT:    .cfi_def_cfa_offset 16
65; CHECK-NEXT:    .cfi_offset w20, -16
66; CHECK-NEXT:    mov w0, wzr
67; CHECK-NEXT:    //APP
68; CHECK-NEXT:    //NO_APP
69; CHECK-NEXT:    ldr x20, [sp], #16 // 8-byte Folded Reload
70; CHECK-NEXT:    ret
71entry:
72  tail call void asm sideeffect "", "~{x20}"() #1
73  ret i32 0
74}
75
76; CSR of d8. Make sure there is a gap between FPR and GPR
77define i32 @csr_d8(i32 noundef %num) "aarch64_pstate_sm_compatible" {
78; CHECK0-LABEL: csr_d8:
79; CHECK0:       // %bb.0: // %entry
80; CHECK0-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
81; CHECK0-NEXT:    .cfi_def_cfa_offset 16
82; CHECK0-NEXT:    .cfi_offset b8, -16
83; CHECK0-NEXT:    mov w0, wzr
84; CHECK0-NEXT:    //APP
85; CHECK0-NEXT:    //NO_APP
86; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
87; CHECK0-NEXT:    ret
88;
89; CHECK64-LABEL: csr_d8:
90; CHECK64:       // %bb.0: // %entry
91; CHECK64-NEXT:    sub sp, sp, #144
92; CHECK64-NEXT:    str d8, [sp, #64] // 8-byte Folded Spill
93; CHECK64-NEXT:    .cfi_def_cfa_offset 144
94; CHECK64-NEXT:    .cfi_offset b8, -80
95; CHECK64-NEXT:    //APP
96; CHECK64-NEXT:    //NO_APP
97; CHECK64-NEXT:    mov w0, wzr
98; CHECK64-NEXT:    ldr d8, [sp, #64] // 8-byte Folded Reload
99; CHECK64-NEXT:    add sp, sp, #144
100; CHECK64-NEXT:    ret
101;
102; CHECK1024-LABEL: csr_d8:
103; CHECK1024:       // %bb.0: // %entry
104; CHECK1024-NEXT:    sub sp, sp, #1040
105; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
106; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
107; CHECK1024-NEXT:    sub sp, sp, #1024
108; CHECK1024-NEXT:    .cfi_def_cfa_offset 2064
109; CHECK1024-NEXT:    .cfi_offset w29, -8
110; CHECK1024-NEXT:    .cfi_offset b8, -1040
111; CHECK1024-NEXT:    mov w0, wzr
112; CHECK1024-NEXT:    //APP
113; CHECK1024-NEXT:    //NO_APP
114; CHECK1024-NEXT:    add sp, sp, #1024
115; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
116; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
117; CHECK1024-NEXT:    add sp, sp, #1040
118; CHECK1024-NEXT:    ret
119entry:
120  tail call void asm sideeffect "", "~{d8}"() #1
121  ret i32 0
122}
123
124; Stack fpr objects.
125define i32 @nocsr_allocd(double %d) "aarch64_pstate_sm_compatible" {
126; CHECK0-LABEL: nocsr_allocd:
127; CHECK0:       // %bb.0: // %entry
128; CHECK0-NEXT:    sub sp, sp, #16
129; CHECK0-NEXT:    .cfi_def_cfa_offset 16
130; CHECK0-NEXT:    mov w0, wzr
131; CHECK0-NEXT:    str d0, [sp, #8]
132; CHECK0-NEXT:    add sp, sp, #16
133; CHECK0-NEXT:    ret
134;
135; CHECK64-LABEL: nocsr_allocd:
136; CHECK64:       // %bb.0: // %entry
137; CHECK64-NEXT:    sub sp, sp, #80
138; CHECK64-NEXT:    .cfi_def_cfa_offset 80
139; CHECK64-NEXT:    mov w0, wzr
140; CHECK64-NEXT:    str d0, [sp, #72]
141; CHECK64-NEXT:    add sp, sp, #80
142; CHECK64-NEXT:    ret
143;
144; CHECK1024-LABEL: nocsr_allocd:
145; CHECK1024:       // %bb.0: // %entry
146; CHECK1024-NEXT:    sub sp, sp, #1040
147; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
148; CHECK1024-NEXT:    sub sp, sp, #1040
149; CHECK1024-NEXT:    .cfi_def_cfa_offset 2080
150; CHECK1024-NEXT:    .cfi_offset w29, -16
151; CHECK1024-NEXT:    mov w0, wzr
152; CHECK1024-NEXT:    str d0, [sp, #1032]
153; CHECK1024-NEXT:    add sp, sp, #1040
154; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
155; CHECK1024-NEXT:    add sp, sp, #1040
156; CHECK1024-NEXT:    ret
157entry:
158  %a = alloca double
159  store double %d, ptr %a
160  ret i32 0
161}
162
163define i32 @csr_d8d9(i32 noundef %num) "aarch64_pstate_sm_compatible" {
164; CHECK0-LABEL: csr_d8d9:
165; CHECK0:       // %bb.0: // %entry
166; CHECK0-NEXT:    stp d9, d8, [sp, #-16]! // 16-byte Folded Spill
167; CHECK0-NEXT:    .cfi_def_cfa_offset 16
168; CHECK0-NEXT:    .cfi_offset b8, -8
169; CHECK0-NEXT:    .cfi_offset b9, -16
170; CHECK0-NEXT:    mov w0, wzr
171; CHECK0-NEXT:    //APP
172; CHECK0-NEXT:    //NO_APP
173; CHECK0-NEXT:    ldp d9, d8, [sp], #16 // 16-byte Folded Reload
174; CHECK0-NEXT:    ret
175;
176; CHECK64-LABEL: csr_d8d9:
177; CHECK64:       // %bb.0: // %entry
178; CHECK64-NEXT:    sub sp, sp, #144
179; CHECK64-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
180; CHECK64-NEXT:    .cfi_def_cfa_offset 144
181; CHECK64-NEXT:    .cfi_offset b8, -72
182; CHECK64-NEXT:    .cfi_offset b9, -80
183; CHECK64-NEXT:    //APP
184; CHECK64-NEXT:    //NO_APP
185; CHECK64-NEXT:    mov w0, wzr
186; CHECK64-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
187; CHECK64-NEXT:    add sp, sp, #144
188; CHECK64-NEXT:    ret
189;
190; CHECK1024-LABEL: csr_d8d9:
191; CHECK1024:       // %bb.0: // %entry
192; CHECK1024-NEXT:    sub sp, sp, #1056
193; CHECK1024-NEXT:    stp d9, d8, [sp] // 16-byte Folded Spill
194; CHECK1024-NEXT:    str x29, [sp, #1040] // 8-byte Folded Spill
195; CHECK1024-NEXT:    sub sp, sp, #1024
196; CHECK1024-NEXT:    .cfi_def_cfa_offset 2080
197; CHECK1024-NEXT:    .cfi_offset w29, -16
198; CHECK1024-NEXT:    .cfi_offset b8, -1048
199; CHECK1024-NEXT:    .cfi_offset b9, -1056
200; CHECK1024-NEXT:    mov w0, wzr
201; CHECK1024-NEXT:    //APP
202; CHECK1024-NEXT:    //NO_APP
203; CHECK1024-NEXT:    add sp, sp, #1024
204; CHECK1024-NEXT:    ldp d9, d8, [sp] // 16-byte Folded Reload
205; CHECK1024-NEXT:    ldr x29, [sp, #1040] // 8-byte Folded Reload
206; CHECK1024-NEXT:    add sp, sp, #1056
207; CHECK1024-NEXT:    ret
208entry:
209  tail call void asm sideeffect "", "~{d8},~{d9}"() #1
210  ret i32 0
211}
212
213define i32 @csr_d8_allocd(double %d) "aarch64_pstate_sm_compatible" {
214; CHECK0-LABEL: csr_d8_allocd:
215; CHECK0:       // %bb.0: // %entry
216; CHECK0-NEXT:    stp d8, d0, [sp, #-16]! // 8-byte Folded Spill
217; CHECK0-NEXT:    .cfi_def_cfa_offset 16
218; CHECK0-NEXT:    .cfi_offset b8, -16
219; CHECK0-NEXT:    mov w0, wzr
220; CHECK0-NEXT:    //APP
221; CHECK0-NEXT:    //NO_APP
222; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
223; CHECK0-NEXT:    ret
224;
225; CHECK64-LABEL: csr_d8_allocd:
226; CHECK64:       // %bb.0: // %entry
227; CHECK64-NEXT:    sub sp, sp, #160
228; CHECK64-NEXT:    stp d0, d8, [sp, #72] // 8-byte Folded Spill
229; CHECK64-NEXT:    .cfi_def_cfa_offset 160
230; CHECK64-NEXT:    .cfi_offset b8, -80
231; CHECK64-NEXT:    //APP
232; CHECK64-NEXT:    //NO_APP
233; CHECK64-NEXT:    mov w0, wzr
234; CHECK64-NEXT:    ldr d8, [sp, #80] // 8-byte Folded Reload
235; CHECK64-NEXT:    add sp, sp, #160
236; CHECK64-NEXT:    ret
237;
238; CHECK1024-LABEL: csr_d8_allocd:
239; CHECK1024:       // %bb.0: // %entry
240; CHECK1024-NEXT:    sub sp, sp, #1040
241; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
242; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
243; CHECK1024-NEXT:    sub sp, sp, #1040
244; CHECK1024-NEXT:    .cfi_def_cfa_offset 2080
245; CHECK1024-NEXT:    .cfi_offset w29, -8
246; CHECK1024-NEXT:    .cfi_offset b8, -1040
247; CHECK1024-NEXT:    mov w0, wzr
248; CHECK1024-NEXT:    //APP
249; CHECK1024-NEXT:    //NO_APP
250; CHECK1024-NEXT:    str d0, [sp, #1032]
251; CHECK1024-NEXT:    add sp, sp, #1040
252; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
253; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
254; CHECK1024-NEXT:    add sp, sp, #1040
255; CHECK1024-NEXT:    ret
256entry:
257  %a = alloca double
258  tail call void asm sideeffect "", "~{d8}"() #1
259  store double %d, ptr %a
260  ret i32 0
261}
262
263define i32 @csr_d8_alloci64(i64 %d) "aarch64_pstate_sm_compatible" {
264; CHECK0-LABEL: csr_d8_alloci64:
265; CHECK0:       // %bb.0: // %entry
266; CHECK0-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
267; CHECK0-NEXT:    .cfi_def_cfa_offset 16
268; CHECK0-NEXT:    .cfi_offset b8, -16
269; CHECK0-NEXT:    mov x8, x0
270; CHECK0-NEXT:    mov w0, wzr
271; CHECK0-NEXT:    //APP
272; CHECK0-NEXT:    //NO_APP
273; CHECK0-NEXT:    str x8, [sp, #8]
274; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
275; CHECK0-NEXT:    ret
276;
277; CHECK64-LABEL: csr_d8_alloci64:
278; CHECK64:       // %bb.0: // %entry
279; CHECK64-NEXT:    sub sp, sp, #160
280; CHECK64-NEXT:    str d8, [sp, #80] // 8-byte Folded Spill
281; CHECK64-NEXT:    .cfi_def_cfa_offset 160
282; CHECK64-NEXT:    .cfi_offset b8, -80
283; CHECK64-NEXT:    //APP
284; CHECK64-NEXT:    //NO_APP
285; CHECK64-NEXT:    mov x8, x0
286; CHECK64-NEXT:    mov w0, wzr
287; CHECK64-NEXT:    ldr d8, [sp, #80] // 8-byte Folded Reload
288; CHECK64-NEXT:    str x8, [sp, #8]
289; CHECK64-NEXT:    add sp, sp, #160
290; CHECK64-NEXT:    ret
291;
292; CHECK1024-LABEL: csr_d8_alloci64:
293; CHECK1024:       // %bb.0: // %entry
294; CHECK1024-NEXT:    sub sp, sp, #1040
295; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
296; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
297; CHECK1024-NEXT:    sub sp, sp, #1040
298; CHECK1024-NEXT:    .cfi_def_cfa_offset 2080
299; CHECK1024-NEXT:    .cfi_offset w29, -8
300; CHECK1024-NEXT:    .cfi_offset b8, -1040
301; CHECK1024-NEXT:    mov x8, x0
302; CHECK1024-NEXT:    mov w0, wzr
303; CHECK1024-NEXT:    //APP
304; CHECK1024-NEXT:    //NO_APP
305; CHECK1024-NEXT:    str x8, [sp, #8]
306; CHECK1024-NEXT:    add sp, sp, #1040
307; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
308; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
309; CHECK1024-NEXT:    add sp, sp, #1040
310; CHECK1024-NEXT:    ret
311entry:
312  %a = alloca i64
313  tail call void asm sideeffect "", "~{d8}"() #1
314  store i64 %d, ptr %a
315  ret i32 0
316}
317
318; Check the frame pointer is in the right place
319define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
320; CHECK0-LABEL: csr_d8_allocd_framepointer:
321; CHECK0:       // %bb.0: // %entry
322; CHECK0-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
323; CHECK0-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
324; CHECK0-NEXT:    add x29, sp, #16
325; CHECK0-NEXT:    .cfi_def_cfa w29, 16
326; CHECK0-NEXT:    .cfi_offset w30, -8
327; CHECK0-NEXT:    .cfi_offset w29, -16
328; CHECK0-NEXT:    .cfi_offset b8, -32
329; CHECK0-NEXT:    //APP
330; CHECK0-NEXT:    //NO_APP
331; CHECK0-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
332; CHECK0-NEXT:    mov w0, wzr
333; CHECK0-NEXT:    str d0, [sp, #8]
334; CHECK0-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
335; CHECK0-NEXT:    ret
336;
337; CHECK64-LABEL: csr_d8_allocd_framepointer:
338; CHECK64:       // %bb.0: // %entry
339; CHECK64-NEXT:    sub sp, sp, #176
340; CHECK64-NEXT:    stp d0, d8, [sp, #72] // 8-byte Folded Spill
341; CHECK64-NEXT:    stp x29, x30, [sp, #152] // 16-byte Folded Spill
342; CHECK64-NEXT:    add x29, sp, #152
343; CHECK64-NEXT:    .cfi_def_cfa w29, 24
344; CHECK64-NEXT:    .cfi_offset w30, -16
345; CHECK64-NEXT:    .cfi_offset w29, -24
346; CHECK64-NEXT:    .cfi_offset b8, -96
347; CHECK64-NEXT:    //APP
348; CHECK64-NEXT:    //NO_APP
349; CHECK64-NEXT:    ldr x29, [sp, #152] // 8-byte Folded Reload
350; CHECK64-NEXT:    mov w0, wzr
351; CHECK64-NEXT:    ldr d8, [sp, #80] // 8-byte Folded Reload
352; CHECK64-NEXT:    add sp, sp, #176
353; CHECK64-NEXT:    ret
354;
355; CHECK1024-LABEL: csr_d8_allocd_framepointer:
356; CHECK1024:       // %bb.0: // %entry
357; CHECK1024-NEXT:    sub sp, sp, #1056
358; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
359; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
360; CHECK1024-NEXT:    add x29, sp, #1032
361; CHECK1024-NEXT:    str x30, [sp, #1040] // 8-byte Folded Spill
362; CHECK1024-NEXT:    sub sp, sp, #1040
363; CHECK1024-NEXT:    .cfi_def_cfa w29, 24
364; CHECK1024-NEXT:    .cfi_offset w30, -16
365; CHECK1024-NEXT:    .cfi_offset w29, -24
366; CHECK1024-NEXT:    .cfi_offset b8, -1056
367; CHECK1024-NEXT:    mov w0, wzr
368; CHECK1024-NEXT:    //APP
369; CHECK1024-NEXT:    //NO_APP
370; CHECK1024-NEXT:    str d0, [sp, #1032]
371; CHECK1024-NEXT:    add sp, sp, #1040
372; CHECK1024-NEXT:    ldr x30, [sp, #1040] // 8-byte Folded Reload
373; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
374; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
375; CHECK1024-NEXT:    add sp, sp, #1056
376; CHECK1024-NEXT:    ret
377entry:
378  %a = alloca double
379  tail call void asm sideeffect "", "~{d8}"() #1
380  store double %d, ptr %a
381  ret i32 0
382}
383
384; sve stack objects should live with other fpr registers
385define i32 @csr_d8_allocnxv4i32(i64 %d) "aarch64_pstate_sm_compatible" {
386; CHECK0-LABEL: csr_d8_allocnxv4i32:
387; CHECK0:       // %bb.0: // %entry
388; CHECK0-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
389; CHECK0-NEXT:    str x29, [sp, #8] // 8-byte Folded Spill
390; CHECK0-NEXT:    addvl sp, sp, #-1
391; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
392; CHECK0-NEXT:    .cfi_offset w29, -8
393; CHECK0-NEXT:    .cfi_offset b8, -16
394; CHECK0-NEXT:    mov z0.s, #0 // =0x0
395; CHECK0-NEXT:    ptrue p0.s
396; CHECK0-NEXT:    mov w0, wzr
397; CHECK0-NEXT:    //APP
398; CHECK0-NEXT:    //NO_APP
399; CHECK0-NEXT:    st1w { z0.s }, p0, [sp]
400; CHECK0-NEXT:    addvl sp, sp, #1
401; CHECK0-NEXT:    ldr x29, [sp, #8] // 8-byte Folded Reload
402; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
403; CHECK0-NEXT:    ret
404;
405; CHECK64-LABEL: csr_d8_allocnxv4i32:
406; CHECK64:       // %bb.0: // %entry
407; CHECK64-NEXT:    str d8, [sp, #-80]! // 8-byte Folded Spill
408; CHECK64-NEXT:    str x29, [sp, #72] // 8-byte Folded Spill
409; CHECK64-NEXT:    sub sp, sp, #64
410; CHECK64-NEXT:    addvl sp, sp, #-1
411; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 8 * VG
412; CHECK64-NEXT:    .cfi_offset w29, -8
413; CHECK64-NEXT:    .cfi_offset b8, -80
414; CHECK64-NEXT:    mov z0.s, #0 // =0x0
415; CHECK64-NEXT:    ptrue p0.s
416; CHECK64-NEXT:    add x8, sp, #64
417; CHECK64-NEXT:    mov w0, wzr
418; CHECK64-NEXT:    //APP
419; CHECK64-NEXT:    //NO_APP
420; CHECK64-NEXT:    st1w { z0.s }, p0, [x8]
421; CHECK64-NEXT:    addvl sp, sp, #1
422; CHECK64-NEXT:    add sp, sp, #64
423; CHECK64-NEXT:    ldr x29, [sp, #72] // 8-byte Folded Reload
424; CHECK64-NEXT:    ldr d8, [sp], #80 // 8-byte Folded Reload
425; CHECK64-NEXT:    ret
426;
427; CHECK1024-LABEL: csr_d8_allocnxv4i32:
428; CHECK1024:       // %bb.0: // %entry
429; CHECK1024-NEXT:    sub sp, sp, #1040
430; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
431; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
432; CHECK1024-NEXT:    sub sp, sp, #1024
433; CHECK1024-NEXT:    addvl sp, sp, #-1
434; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 8 * VG
435; CHECK1024-NEXT:    .cfi_offset w29, -8
436; CHECK1024-NEXT:    .cfi_offset b8, -1040
437; CHECK1024-NEXT:    mov z0.s, #0 // =0x0
438; CHECK1024-NEXT:    ptrue p0.s
439; CHECK1024-NEXT:    add x8, sp, #1024
440; CHECK1024-NEXT:    mov w0, wzr
441; CHECK1024-NEXT:    //APP
442; CHECK1024-NEXT:    //NO_APP
443; CHECK1024-NEXT:    st1w { z0.s }, p0, [x8]
444; CHECK1024-NEXT:    addvl sp, sp, #1
445; CHECK1024-NEXT:    add sp, sp, #1024
446; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
447; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
448; CHECK1024-NEXT:    add sp, sp, #1040
449; CHECK1024-NEXT:    ret
450entry:
451  %a = alloca <vscale x 4 x i32>
452  tail call void asm sideeffect "", "~{d8}"() #1
453  store <vscale x 4 x i32> zeroinitializer, ptr %a
454  ret i32 0
455}
456
457define i32 @csr_x18_25_d8_15_allocdi64(i64 %d, double %e) "aarch64_pstate_sm_compatible" {
458; CHECK0-LABEL: csr_x18_25_d8_15_allocdi64:
459; CHECK0:       // %bb.0: // %entry
460; CHECK0-NEXT:    sub sp, sp, #144
461; CHECK0-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
462; CHECK0-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
463; CHECK0-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
464; CHECK0-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
465; CHECK0-NEXT:    str x25, [sp, #80] // 8-byte Folded Spill
466; CHECK0-NEXT:    stp x24, x23, [sp, #96] // 16-byte Folded Spill
467; CHECK0-NEXT:    stp x22, x21, [sp, #112] // 16-byte Folded Spill
468; CHECK0-NEXT:    stp x20, x19, [sp, #128] // 16-byte Folded Spill
469; CHECK0-NEXT:    .cfi_def_cfa_offset 144
470; CHECK0-NEXT:    .cfi_offset w19, -8
471; CHECK0-NEXT:    .cfi_offset w20, -16
472; CHECK0-NEXT:    .cfi_offset w21, -24
473; CHECK0-NEXT:    .cfi_offset w22, -32
474; CHECK0-NEXT:    .cfi_offset w23, -40
475; CHECK0-NEXT:    .cfi_offset w24, -48
476; CHECK0-NEXT:    .cfi_offset w25, -64
477; CHECK0-NEXT:    .cfi_offset b8, -72
478; CHECK0-NEXT:    .cfi_offset b9, -80
479; CHECK0-NEXT:    .cfi_offset b10, -88
480; CHECK0-NEXT:    .cfi_offset b11, -96
481; CHECK0-NEXT:    .cfi_offset b12, -104
482; CHECK0-NEXT:    .cfi_offset b13, -112
483; CHECK0-NEXT:    .cfi_offset b14, -120
484; CHECK0-NEXT:    .cfi_offset b15, -128
485; CHECK0-NEXT:    //APP
486; CHECK0-NEXT:    //NO_APP
487; CHECK0-NEXT:    //APP
488; CHECK0-NEXT:    //NO_APP
489; CHECK0-NEXT:    mov x8, x0
490; CHECK0-NEXT:    ldp x20, x19, [sp, #128] // 16-byte Folded Reload
491; CHECK0-NEXT:    ldr x25, [sp, #80] // 8-byte Folded Reload
492; CHECK0-NEXT:    ldp x22, x21, [sp, #112] // 16-byte Folded Reload
493; CHECK0-NEXT:    mov w0, wzr
494; CHECK0-NEXT:    ldp x24, x23, [sp, #96] // 16-byte Folded Reload
495; CHECK0-NEXT:    str x8, [sp, #88]
496; CHECK0-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
497; CHECK0-NEXT:    str d0, [sp, #8]
498; CHECK0-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
499; CHECK0-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
500; CHECK0-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
501; CHECK0-NEXT:    add sp, sp, #144
502; CHECK0-NEXT:    ret
503;
504; CHECK64-LABEL: csr_x18_25_d8_15_allocdi64:
505; CHECK64:       // %bb.0: // %entry
506; CHECK64-NEXT:    sub sp, sp, #288
507; CHECK64-NEXT:    stp d15, d14, [sp, #96] // 16-byte Folded Spill
508; CHECK64-NEXT:    stp d13, d12, [sp, #112] // 16-byte Folded Spill
509; CHECK64-NEXT:    stp d11, d10, [sp, #128] // 16-byte Folded Spill
510; CHECK64-NEXT:    stp d9, d8, [sp, #144] // 16-byte Folded Spill
511; CHECK64-NEXT:    stp x29, x25, [sp, #224] // 16-byte Folded Spill
512; CHECK64-NEXT:    stp x24, x23, [sp, #240] // 16-byte Folded Spill
513; CHECK64-NEXT:    stp x22, x21, [sp, #256] // 16-byte Folded Spill
514; CHECK64-NEXT:    stp x20, x19, [sp, #272] // 16-byte Folded Spill
515; CHECK64-NEXT:    .cfi_def_cfa_offset 288
516; CHECK64-NEXT:    .cfi_offset w19, -8
517; CHECK64-NEXT:    .cfi_offset w20, -16
518; CHECK64-NEXT:    .cfi_offset w21, -24
519; CHECK64-NEXT:    .cfi_offset w22, -32
520; CHECK64-NEXT:    .cfi_offset w23, -40
521; CHECK64-NEXT:    .cfi_offset w24, -48
522; CHECK64-NEXT:    .cfi_offset w25, -56
523; CHECK64-NEXT:    .cfi_offset w29, -64
524; CHECK64-NEXT:    .cfi_offset b8, -136
525; CHECK64-NEXT:    .cfi_offset b9, -144
526; CHECK64-NEXT:    .cfi_offset b10, -152
527; CHECK64-NEXT:    .cfi_offset b11, -160
528; CHECK64-NEXT:    .cfi_offset b12, -168
529; CHECK64-NEXT:    .cfi_offset b13, -176
530; CHECK64-NEXT:    .cfi_offset b14, -184
531; CHECK64-NEXT:    .cfi_offset b15, -192
532; CHECK64-NEXT:    //APP
533; CHECK64-NEXT:    //NO_APP
534; CHECK64-NEXT:    //APP
535; CHECK64-NEXT:    //NO_APP
536; CHECK64-NEXT:    mov x8, x0
537; CHECK64-NEXT:    ldp x20, x19, [sp, #272] // 16-byte Folded Reload
538; CHECK64-NEXT:    mov w0, wzr
539; CHECK64-NEXT:    ldp x22, x21, [sp, #256] // 16-byte Folded Reload
540; CHECK64-NEXT:    str x8, [sp, #8]
541; CHECK64-NEXT:    ldp x24, x23, [sp, #240] // 16-byte Folded Reload
542; CHECK64-NEXT:    str d0, [sp, #88]
543; CHECK64-NEXT:    ldp x29, x25, [sp, #224] // 16-byte Folded Reload
544; CHECK64-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
545; CHECK64-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
546; CHECK64-NEXT:    ldp d13, d12, [sp, #112] // 16-byte Folded Reload
547; CHECK64-NEXT:    ldp d15, d14, [sp, #96] // 16-byte Folded Reload
548; CHECK64-NEXT:    add sp, sp, #288
549; CHECK64-NEXT:    ret
550;
551; CHECK1024-LABEL: csr_x18_25_d8_15_allocdi64:
552; CHECK1024:       // %bb.0: // %entry
553; CHECK1024-NEXT:    sub sp, sp, #1152
554; CHECK1024-NEXT:    stp d15, d14, [sp] // 16-byte Folded Spill
555; CHECK1024-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
556; CHECK1024-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
557; CHECK1024-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
558; CHECK1024-NEXT:    str x29, [sp, #1088] // 8-byte Folded Spill
559; CHECK1024-NEXT:    str x25, [sp, #1096] // 8-byte Folded Spill
560; CHECK1024-NEXT:    str x24, [sp, #1104] // 8-byte Folded Spill
561; CHECK1024-NEXT:    str x23, [sp, #1112] // 8-byte Folded Spill
562; CHECK1024-NEXT:    str x22, [sp, #1120] // 8-byte Folded Spill
563; CHECK1024-NEXT:    str x21, [sp, #1128] // 8-byte Folded Spill
564; CHECK1024-NEXT:    str x20, [sp, #1136] // 8-byte Folded Spill
565; CHECK1024-NEXT:    str x19, [sp, #1144] // 8-byte Folded Spill
566; CHECK1024-NEXT:    sub sp, sp, #1056
567; CHECK1024-NEXT:    .cfi_def_cfa_offset 2208
568; CHECK1024-NEXT:    .cfi_offset w19, -8
569; CHECK1024-NEXT:    .cfi_offset w20, -16
570; CHECK1024-NEXT:    .cfi_offset w21, -24
571; CHECK1024-NEXT:    .cfi_offset w22, -32
572; CHECK1024-NEXT:    .cfi_offset w23, -40
573; CHECK1024-NEXT:    .cfi_offset w24, -48
574; CHECK1024-NEXT:    .cfi_offset w25, -56
575; CHECK1024-NEXT:    .cfi_offset w29, -64
576; CHECK1024-NEXT:    .cfi_offset b8, -1096
577; CHECK1024-NEXT:    .cfi_offset b9, -1104
578; CHECK1024-NEXT:    .cfi_offset b10, -1112
579; CHECK1024-NEXT:    .cfi_offset b11, -1120
580; CHECK1024-NEXT:    .cfi_offset b12, -1128
581; CHECK1024-NEXT:    .cfi_offset b13, -1136
582; CHECK1024-NEXT:    .cfi_offset b14, -1144
583; CHECK1024-NEXT:    .cfi_offset b15, -1152
584; CHECK1024-NEXT:    mov x8, x0
585; CHECK1024-NEXT:    mov w0, wzr
586; CHECK1024-NEXT:    //APP
587; CHECK1024-NEXT:    //NO_APP
588; CHECK1024-NEXT:    //APP
589; CHECK1024-NEXT:    //NO_APP
590; CHECK1024-NEXT:    str x8, [sp, #8]
591; CHECK1024-NEXT:    str d0, [sp, #1048]
592; CHECK1024-NEXT:    add sp, sp, #1056
593; CHECK1024-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
594; CHECK1024-NEXT:    ldr x19, [sp, #1144] // 8-byte Folded Reload
595; CHECK1024-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
596; CHECK1024-NEXT:    ldr x20, [sp, #1136] // 8-byte Folded Reload
597; CHECK1024-NEXT:    ldr x21, [sp, #1128] // 8-byte Folded Reload
598; CHECK1024-NEXT:    ldr x22, [sp, #1120] // 8-byte Folded Reload
599; CHECK1024-NEXT:    ldr x23, [sp, #1112] // 8-byte Folded Reload
600; CHECK1024-NEXT:    ldr x24, [sp, #1104] // 8-byte Folded Reload
601; CHECK1024-NEXT:    ldr x25, [sp, #1096] // 8-byte Folded Reload
602; CHECK1024-NEXT:    ldr x29, [sp, #1088] // 8-byte Folded Reload
603; CHECK1024-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
604; CHECK1024-NEXT:    ldp d15, d14, [sp] // 16-byte Folded Reload
605; CHECK1024-NEXT:    add sp, sp, #1152
606; CHECK1024-NEXT:    ret
607entry:
608  %a = alloca i64
609  %b = alloca double
610  tail call void asm sideeffect "", "~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25}"()
611  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"()
612  store i64 %d, ptr %a
613  store double %e, ptr %b
614  ret i32 0
615}
616
617define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarch64_pstate_sm_body" "target-features"="+sme" {
618; CHECK0-LABEL: csr_x18_25_d8_15_allocdi64_locallystreaming:
619; CHECK0:       // %bb.0: // %entry
620; CHECK0-NEXT:    sub sp, sp, #176
621; CHECK0-NEXT:    .cfi_def_cfa_offset 176
622; CHECK0-NEXT:    rdsvl x9, #1
623; CHECK0-NEXT:    stp d15, d14, [sp, #48] // 16-byte Folded Spill
624; CHECK0-NEXT:    lsr x9, x9, #3
625; CHECK0-NEXT:    stp d13, d12, [sp, #64] // 16-byte Folded Spill
626; CHECK0-NEXT:    stp d11, d10, [sp, #80] // 16-byte Folded Spill
627; CHECK0-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
628; CHECK0-NEXT:    cntd x9
629; CHECK0-NEXT:    str x9, [sp, #40] // 8-byte Folded Spill
630; CHECK0-NEXT:    stp d9, d8, [sp, #96] // 16-byte Folded Spill
631; CHECK0-NEXT:    str x25, [sp, #112] // 8-byte Folded Spill
632; CHECK0-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
633; CHECK0-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
634; CHECK0-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
635; CHECK0-NEXT:    .cfi_offset w19, -8
636; CHECK0-NEXT:    .cfi_offset w20, -16
637; CHECK0-NEXT:    .cfi_offset w21, -24
638; CHECK0-NEXT:    .cfi_offset w22, -32
639; CHECK0-NEXT:    .cfi_offset w23, -40
640; CHECK0-NEXT:    .cfi_offset w24, -48
641; CHECK0-NEXT:    .cfi_offset w25, -64
642; CHECK0-NEXT:    .cfi_offset b8, -72
643; CHECK0-NEXT:    .cfi_offset b9, -80
644; CHECK0-NEXT:    .cfi_offset b10, -88
645; CHECK0-NEXT:    .cfi_offset b11, -96
646; CHECK0-NEXT:    .cfi_offset b12, -104
647; CHECK0-NEXT:    .cfi_offset b13, -112
648; CHECK0-NEXT:    .cfi_offset b14, -120
649; CHECK0-NEXT:    .cfi_offset b15, -128
650; CHECK0-NEXT:    .cfi_offset vg, -136
651; CHECK0-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
652; CHECK0-NEXT:    smstart sm
653; CHECK0-NEXT:    //APP
654; CHECK0-NEXT:    //NO_APP
655; CHECK0-NEXT:    //APP
656; CHECK0-NEXT:    //NO_APP
657; CHECK0-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
658; CHECK0-NEXT:    str x0, [sp, #24]
659; CHECK0-NEXT:    str d0, [sp, #16]
660; CHECK0-NEXT:    smstop sm
661; CHECK0-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
662; CHECK0-NEXT:    mov w0, wzr
663; CHECK0-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
664; CHECK0-NEXT:    ldr x25, [sp, #112] // 8-byte Folded Reload
665; CHECK0-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
666; CHECK0-NEXT:    ldp d9, d8, [sp, #96] // 16-byte Folded Reload
667; CHECK0-NEXT:    ldp d11, d10, [sp, #80] // 16-byte Folded Reload
668; CHECK0-NEXT:    ldp d13, d12, [sp, #64] // 16-byte Folded Reload
669; CHECK0-NEXT:    ldp d15, d14, [sp, #48] // 16-byte Folded Reload
670; CHECK0-NEXT:    add sp, sp, #176
671; CHECK0-NEXT:    .cfi_def_cfa_offset 0
672; CHECK0-NEXT:    .cfi_restore w19
673; CHECK0-NEXT:    .cfi_restore w20
674; CHECK0-NEXT:    .cfi_restore w21
675; CHECK0-NEXT:    .cfi_restore w22
676; CHECK0-NEXT:    .cfi_restore w23
677; CHECK0-NEXT:    .cfi_restore w24
678; CHECK0-NEXT:    .cfi_restore w25
679; CHECK0-NEXT:    .cfi_restore b8
680; CHECK0-NEXT:    .cfi_restore b9
681; CHECK0-NEXT:    .cfi_restore b10
682; CHECK0-NEXT:    .cfi_restore b11
683; CHECK0-NEXT:    .cfi_restore b12
684; CHECK0-NEXT:    .cfi_restore b13
685; CHECK0-NEXT:    .cfi_restore b14
686; CHECK0-NEXT:    .cfi_restore b15
687; CHECK0-NEXT:    ret
688;
689; CHECK64-LABEL: csr_x18_25_d8_15_allocdi64_locallystreaming:
690; CHECK64:       // %bb.0: // %entry
691; CHECK64-NEXT:    sub sp, sp, #304
692; CHECK64-NEXT:    .cfi_def_cfa_offset 304
693; CHECK64-NEXT:    rdsvl x9, #1
694; CHECK64-NEXT:    stp d15, d14, [sp, #112] // 16-byte Folded Spill
695; CHECK64-NEXT:    lsr x9, x9, #3
696; CHECK64-NEXT:    stp d13, d12, [sp, #128] // 16-byte Folded Spill
697; CHECK64-NEXT:    stp d11, d10, [sp, #144] // 16-byte Folded Spill
698; CHECK64-NEXT:    str x9, [sp, #96] // 8-byte Folded Spill
699; CHECK64-NEXT:    cntd x9
700; CHECK64-NEXT:    str x9, [sp, #104] // 8-byte Folded Spill
701; CHECK64-NEXT:    stp d9, d8, [sp, #160] // 16-byte Folded Spill
702; CHECK64-NEXT:    stp x29, x25, [sp, #240] // 16-byte Folded Spill
703; CHECK64-NEXT:    stp x24, x23, [sp, #256] // 16-byte Folded Spill
704; CHECK64-NEXT:    stp x22, x21, [sp, #272] // 16-byte Folded Spill
705; CHECK64-NEXT:    stp x20, x19, [sp, #288] // 16-byte Folded Spill
706; CHECK64-NEXT:    .cfi_offset w19, -8
707; CHECK64-NEXT:    .cfi_offset w20, -16
708; CHECK64-NEXT:    .cfi_offset w21, -24
709; CHECK64-NEXT:    .cfi_offset w22, -32
710; CHECK64-NEXT:    .cfi_offset w23, -40
711; CHECK64-NEXT:    .cfi_offset w24, -48
712; CHECK64-NEXT:    .cfi_offset w25, -56
713; CHECK64-NEXT:    .cfi_offset w29, -64
714; CHECK64-NEXT:    .cfi_offset b8, -136
715; CHECK64-NEXT:    .cfi_offset b9, -144
716; CHECK64-NEXT:    .cfi_offset b10, -152
717; CHECK64-NEXT:    .cfi_offset b11, -160
718; CHECK64-NEXT:    .cfi_offset b12, -168
719; CHECK64-NEXT:    .cfi_offset b13, -176
720; CHECK64-NEXT:    .cfi_offset b14, -184
721; CHECK64-NEXT:    .cfi_offset b15, -192
722; CHECK64-NEXT:    .cfi_offset vg, -200
723; CHECK64-NEXT:    str d0, [sp, #80] // 8-byte Folded Spill
724; CHECK64-NEXT:    smstart sm
725; CHECK64-NEXT:    //APP
726; CHECK64-NEXT:    //NO_APP
727; CHECK64-NEXT:    //APP
728; CHECK64-NEXT:    //NO_APP
729; CHECK64-NEXT:    ldr d0, [sp, #80] // 8-byte Folded Reload
730; CHECK64-NEXT:    str x0, [sp, #8]
731; CHECK64-NEXT:    str d0, [sp, #88]
732; CHECK64-NEXT:    smstop sm
733; CHECK64-NEXT:    ldp x20, x19, [sp, #288] // 16-byte Folded Reload
734; CHECK64-NEXT:    mov w0, wzr
735; CHECK64-NEXT:    ldp x22, x21, [sp, #272] // 16-byte Folded Reload
736; CHECK64-NEXT:    ldp x24, x23, [sp, #256] // 16-byte Folded Reload
737; CHECK64-NEXT:    ldp x29, x25, [sp, #240] // 16-byte Folded Reload
738; CHECK64-NEXT:    ldp d9, d8, [sp, #160] // 16-byte Folded Reload
739; CHECK64-NEXT:    ldp d11, d10, [sp, #144] // 16-byte Folded Reload
740; CHECK64-NEXT:    ldp d13, d12, [sp, #128] // 16-byte Folded Reload
741; CHECK64-NEXT:    ldp d15, d14, [sp, #112] // 16-byte Folded Reload
742; CHECK64-NEXT:    add sp, sp, #304
743; CHECK64-NEXT:    .cfi_def_cfa_offset 0
744; CHECK64-NEXT:    .cfi_restore w19
745; CHECK64-NEXT:    .cfi_restore w20
746; CHECK64-NEXT:    .cfi_restore w21
747; CHECK64-NEXT:    .cfi_restore w22
748; CHECK64-NEXT:    .cfi_restore w23
749; CHECK64-NEXT:    .cfi_restore w24
750; CHECK64-NEXT:    .cfi_restore w25
751; CHECK64-NEXT:    .cfi_restore w29
752; CHECK64-NEXT:    .cfi_restore b8
753; CHECK64-NEXT:    .cfi_restore b9
754; CHECK64-NEXT:    .cfi_restore b10
755; CHECK64-NEXT:    .cfi_restore b11
756; CHECK64-NEXT:    .cfi_restore b12
757; CHECK64-NEXT:    .cfi_restore b13
758; CHECK64-NEXT:    .cfi_restore b14
759; CHECK64-NEXT:    .cfi_restore b15
760; CHECK64-NEXT:    ret
761;
762; CHECK1024-LABEL: csr_x18_25_d8_15_allocdi64_locallystreaming:
763; CHECK1024:       // %bb.0: // %entry
764; CHECK1024-NEXT:    rdsvl x9, #1
765; CHECK1024-NEXT:    lsr x9, x9, #3
766; CHECK1024-NEXT:    sub sp, sp, #1168
767; CHECK1024-NEXT:    .cfi_def_cfa_offset 1168
768; CHECK1024-NEXT:    str x9, [sp] // 8-byte Folded Spill
769; CHECK1024-NEXT:    cntd x9
770; CHECK1024-NEXT:    str x9, [sp, #8] // 8-byte Folded Spill
771; CHECK1024-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
772; CHECK1024-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
773; CHECK1024-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
774; CHECK1024-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
775; CHECK1024-NEXT:    str x29, [sp, #1104] // 8-byte Folded Spill
776; CHECK1024-NEXT:    str x25, [sp, #1112] // 8-byte Folded Spill
777; CHECK1024-NEXT:    str x24, [sp, #1120] // 8-byte Folded Spill
778; CHECK1024-NEXT:    str x23, [sp, #1128] // 8-byte Folded Spill
779; CHECK1024-NEXT:    str x22, [sp, #1136] // 8-byte Folded Spill
780; CHECK1024-NEXT:    str x21, [sp, #1144] // 8-byte Folded Spill
781; CHECK1024-NEXT:    str x20, [sp, #1152] // 8-byte Folded Spill
782; CHECK1024-NEXT:    str x19, [sp, #1160] // 8-byte Folded Spill
783; CHECK1024-NEXT:    .cfi_offset w19, -8
784; CHECK1024-NEXT:    .cfi_offset w20, -16
785; CHECK1024-NEXT:    .cfi_offset w21, -24
786; CHECK1024-NEXT:    .cfi_offset w22, -32
787; CHECK1024-NEXT:    .cfi_offset w23, -40
788; CHECK1024-NEXT:    .cfi_offset w24, -48
789; CHECK1024-NEXT:    .cfi_offset w25, -56
790; CHECK1024-NEXT:    .cfi_offset w29, -64
791; CHECK1024-NEXT:    .cfi_offset b8, -1096
792; CHECK1024-NEXT:    .cfi_offset b9, -1104
793; CHECK1024-NEXT:    .cfi_offset b10, -1112
794; CHECK1024-NEXT:    .cfi_offset b11, -1120
795; CHECK1024-NEXT:    .cfi_offset b12, -1128
796; CHECK1024-NEXT:    .cfi_offset b13, -1136
797; CHECK1024-NEXT:    .cfi_offset b14, -1144
798; CHECK1024-NEXT:    .cfi_offset b15, -1152
799; CHECK1024-NEXT:    .cfi_offset vg, -1160
800; CHECK1024-NEXT:    sub sp, sp, #1056
801; CHECK1024-NEXT:    .cfi_def_cfa_offset 2224
802; CHECK1024-NEXT:    str d0, [sp, #1040] // 8-byte Folded Spill
803; CHECK1024-NEXT:    smstart sm
804; CHECK1024-NEXT:    //APP
805; CHECK1024-NEXT:    //NO_APP
806; CHECK1024-NEXT:    //APP
807; CHECK1024-NEXT:    //NO_APP
808; CHECK1024-NEXT:    ldr d0, [sp, #1040] // 8-byte Folded Reload
809; CHECK1024-NEXT:    str x0, [sp, #8]
810; CHECK1024-NEXT:    str d0, [sp, #1048]
811; CHECK1024-NEXT:    smstop sm
812; CHECK1024-NEXT:    mov w0, wzr
813; CHECK1024-NEXT:    add sp, sp, #1056
814; CHECK1024-NEXT:    .cfi_def_cfa_offset 1168
815; CHECK1024-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
816; CHECK1024-NEXT:    ldr x19, [sp, #1160] // 8-byte Folded Reload
817; CHECK1024-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
818; CHECK1024-NEXT:    ldr x20, [sp, #1152] // 8-byte Folded Reload
819; CHECK1024-NEXT:    ldr x21, [sp, #1144] // 8-byte Folded Reload
820; CHECK1024-NEXT:    ldr x22, [sp, #1136] // 8-byte Folded Reload
821; CHECK1024-NEXT:    ldr x23, [sp, #1128] // 8-byte Folded Reload
822; CHECK1024-NEXT:    ldr x24, [sp, #1120] // 8-byte Folded Reload
823; CHECK1024-NEXT:    ldr x25, [sp, #1112] // 8-byte Folded Reload
824; CHECK1024-NEXT:    ldr x29, [sp, #1104] // 8-byte Folded Reload
825; CHECK1024-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
826; CHECK1024-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
827; CHECK1024-NEXT:    add sp, sp, #1168
828; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
829; CHECK1024-NEXT:    .cfi_restore w19
830; CHECK1024-NEXT:    .cfi_restore w20
831; CHECK1024-NEXT:    .cfi_restore w21
832; CHECK1024-NEXT:    .cfi_restore w22
833; CHECK1024-NEXT:    .cfi_restore w23
834; CHECK1024-NEXT:    .cfi_restore w24
835; CHECK1024-NEXT:    .cfi_restore w25
836; CHECK1024-NEXT:    .cfi_restore w29
837; CHECK1024-NEXT:    .cfi_restore b8
838; CHECK1024-NEXT:    .cfi_restore b9
839; CHECK1024-NEXT:    .cfi_restore b10
840; CHECK1024-NEXT:    .cfi_restore b11
841; CHECK1024-NEXT:    .cfi_restore b12
842; CHECK1024-NEXT:    .cfi_restore b13
843; CHECK1024-NEXT:    .cfi_restore b14
844; CHECK1024-NEXT:    .cfi_restore b15
845; CHECK1024-NEXT:    ret
846entry:
847  %a = alloca i64
848  %b = alloca double
849  tail call void asm sideeffect "", "~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25}"()
850  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"()
851  store i64 %d, ptr %a
852  store double %e, ptr %b
853  ret i32 0
854}
855
856; We don't currently handle fpr stack arguments very well (they are hopefully relatively rare).
857define float @nocsr_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) "aarch64_pstate_sm_compatible" {
858; CHECK-LABEL: nocsr_stackargs:
859; CHECK:       // %bb.0: // %entry
860; CHECK-NEXT:    ldr s0, [sp]
861; CHECK-NEXT:    ret
862entry:
863  ret float %i
864}
865
866define float @csr_x20_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) "aarch64_pstate_sm_compatible" {
867; CHECK-LABEL: csr_x20_stackargs:
868; CHECK:       // %bb.0: // %entry
869; CHECK-NEXT:    str x20, [sp, #-16]! // 8-byte Folded Spill
870; CHECK-NEXT:    .cfi_def_cfa_offset 16
871; CHECK-NEXT:    .cfi_offset w20, -16
872; CHECK-NEXT:    ldr s0, [sp, #16]
873; CHECK-NEXT:    //APP
874; CHECK-NEXT:    //NO_APP
875; CHECK-NEXT:    ldr x20, [sp], #16 // 8-byte Folded Reload
876; CHECK-NEXT:    ret
877entry:
878  tail call void asm sideeffect "", "~{x20}"() #1
879  ret float %i
880}
881
882define float @csr_d8_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) "aarch64_pstate_sm_compatible" {
883; CHECK0-LABEL: csr_d8_stackargs:
884; CHECK0:       // %bb.0: // %entry
885; CHECK0-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
886; CHECK0-NEXT:    .cfi_def_cfa_offset 16
887; CHECK0-NEXT:    .cfi_offset b8, -16
888; CHECK0-NEXT:    ldr s0, [sp, #16]
889; CHECK0-NEXT:    //APP
890; CHECK0-NEXT:    //NO_APP
891; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
892; CHECK0-NEXT:    ret
893;
894; CHECK64-LABEL: csr_d8_stackargs:
895; CHECK64:       // %bb.0: // %entry
896; CHECK64-NEXT:    sub sp, sp, #144
897; CHECK64-NEXT:    str d8, [sp, #64] // 8-byte Folded Spill
898; CHECK64-NEXT:    .cfi_def_cfa_offset 144
899; CHECK64-NEXT:    .cfi_offset b8, -80
900; CHECK64-NEXT:    //APP
901; CHECK64-NEXT:    //NO_APP
902; CHECK64-NEXT:    ldr s0, [sp, #144]
903; CHECK64-NEXT:    ldr d8, [sp, #64] // 8-byte Folded Reload
904; CHECK64-NEXT:    add sp, sp, #144
905; CHECK64-NEXT:    ret
906;
907; CHECK1024-LABEL: csr_d8_stackargs:
908; CHECK1024:       // %bb.0: // %entry
909; CHECK1024-NEXT:    sub sp, sp, #1040
910; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
911; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
912; CHECK1024-NEXT:    sub sp, sp, #1024
913; CHECK1024-NEXT:    .cfi_def_cfa_offset 2064
914; CHECK1024-NEXT:    .cfi_offset w29, -8
915; CHECK1024-NEXT:    .cfi_offset b8, -1040
916; CHECK1024-NEXT:    ldr s0, [sp, #2064]
917; CHECK1024-NEXT:    //APP
918; CHECK1024-NEXT:    //NO_APP
919; CHECK1024-NEXT:    add sp, sp, #1024
920; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
921; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
922; CHECK1024-NEXT:    add sp, sp, #1040
923; CHECK1024-NEXT:    ret
924entry:
925  tail call void asm sideeffect "", "~{d8}"() #1
926  ret float %i
927}
928
929; SVE calling conventions
930define i32 @svecc_basic(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
931; CHECK-LABEL: svecc_basic:
932; CHECK:       // %bb.0: // %entry
933; CHECK-NEXT:    mov w0, wzr
934; CHECK-NEXT:    ret
935entry:
936  ret i32 0
937}
938
939define i32 @svecc_csr_x20(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
940; CHECK-LABEL: svecc_csr_x20:
941; CHECK:       // %bb.0: // %entry
942; CHECK-NEXT:    str x20, [sp, #-16]! // 8-byte Folded Spill
943; CHECK-NEXT:    .cfi_def_cfa_offset 16
944; CHECK-NEXT:    .cfi_offset w20, -16
945; CHECK-NEXT:    mov w0, wzr
946; CHECK-NEXT:    //APP
947; CHECK-NEXT:    //NO_APP
948; CHECK-NEXT:    ldr x20, [sp], #16 // 8-byte Folded Reload
949; CHECK-NEXT:    ret
950entry:
951  tail call void asm sideeffect "", "~{x20}"() #1
952  ret i32 0
953}
954
955define i32 @svecc_csr_d8(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
956; CHECK0-LABEL: svecc_csr_d8:
957; CHECK0:       // %bb.0: // %entry
958; CHECK0-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
959; CHECK0-NEXT:    addvl sp, sp, #-1
960; CHECK0-NEXT:    str z8, [sp] // 16-byte Folded Spill
961; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
962; CHECK0-NEXT:    .cfi_offset w29, -16
963; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
964; CHECK0-NEXT:    //APP
965; CHECK0-NEXT:    //NO_APP
966; CHECK0-NEXT:    mov w0, wzr
967; CHECK0-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
968; CHECK0-NEXT:    addvl sp, sp, #1
969; CHECK0-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
970; CHECK0-NEXT:    ret
971;
972; CHECK64-LABEL: svecc_csr_d8:
973; CHECK64:       // %bb.0: // %entry
974; CHECK64-NEXT:    sub sp, sp, #80
975; CHECK64-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
976; CHECK64-NEXT:    addvl sp, sp, #-1
977; CHECK64-NEXT:    str z8, [sp] // 16-byte Folded Spill
978; CHECK64-NEXT:    sub sp, sp, #64
979; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 8 * VG
980; CHECK64-NEXT:    .cfi_offset w29, -16
981; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG
982; CHECK64-NEXT:    mov w0, wzr
983; CHECK64-NEXT:    //APP
984; CHECK64-NEXT:    //NO_APP
985; CHECK64-NEXT:    add sp, sp, #64
986; CHECK64-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
987; CHECK64-NEXT:    addvl sp, sp, #1
988; CHECK64-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
989; CHECK64-NEXT:    add sp, sp, #80
990; CHECK64-NEXT:    ret
991;
992; CHECK1024-LABEL: svecc_csr_d8:
993; CHECK1024:       // %bb.0: // %entry
994; CHECK1024-NEXT:    sub sp, sp, #1040
995; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
996; CHECK1024-NEXT:    addvl sp, sp, #-1
997; CHECK1024-NEXT:    str z8, [sp] // 16-byte Folded Spill
998; CHECK1024-NEXT:    sub sp, sp, #1024
999; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 8 * VG
1000; CHECK1024-NEXT:    .cfi_offset w29, -16
1001; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG
1002; CHECK1024-NEXT:    mov w0, wzr
1003; CHECK1024-NEXT:    //APP
1004; CHECK1024-NEXT:    //NO_APP
1005; CHECK1024-NEXT:    add sp, sp, #1024
1006; CHECK1024-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
1007; CHECK1024-NEXT:    addvl sp, sp, #1
1008; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
1009; CHECK1024-NEXT:    add sp, sp, #1040
1010; CHECK1024-NEXT:    ret
1011entry:
1012  tail call void asm sideeffect "", "~{d8}"() #1
1013  ret i32 0
1014}
1015
1016define i32 @svecc_csr_d8d9(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
1017; CHECK0-LABEL: svecc_csr_d8d9:
1018; CHECK0:       // %bb.0: // %entry
1019; CHECK0-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
1020; CHECK0-NEXT:    addvl sp, sp, #-2
1021; CHECK0-NEXT:    str z9, [sp] // 16-byte Folded Spill
1022; CHECK0-NEXT:    str z8, [sp, #1, mul vl] // 16-byte Folded Spill
1023; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
1024; CHECK0-NEXT:    .cfi_offset w29, -16
1025; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
1026; CHECK0-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
1027; CHECK0-NEXT:    //APP
1028; CHECK0-NEXT:    //NO_APP
1029; CHECK0-NEXT:    mov w0, wzr
1030; CHECK0-NEXT:    ldr z9, [sp] // 16-byte Folded Reload
1031; CHECK0-NEXT:    ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
1032; CHECK0-NEXT:    addvl sp, sp, #2
1033; CHECK0-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
1034; CHECK0-NEXT:    ret
1035;
1036; CHECK64-LABEL: svecc_csr_d8d9:
1037; CHECK64:       // %bb.0: // %entry
1038; CHECK64-NEXT:    sub sp, sp, #80
1039; CHECK64-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
1040; CHECK64-NEXT:    addvl sp, sp, #-2
1041; CHECK64-NEXT:    str z9, [sp] // 16-byte Folded Spill
1042; CHECK64-NEXT:    str z8, [sp, #1, mul vl] // 16-byte Folded Spill
1043; CHECK64-NEXT:    sub sp, sp, #64
1044; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG
1045; CHECK64-NEXT:    .cfi_offset w29, -16
1046; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG
1047; CHECK64-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 80 - 16 * VG
1048; CHECK64-NEXT:    mov w0, wzr
1049; CHECK64-NEXT:    //APP
1050; CHECK64-NEXT:    //NO_APP
1051; CHECK64-NEXT:    add sp, sp, #64
1052; CHECK64-NEXT:    ldr z9, [sp] // 16-byte Folded Reload
1053; CHECK64-NEXT:    ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
1054; CHECK64-NEXT:    addvl sp, sp, #2
1055; CHECK64-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
1056; CHECK64-NEXT:    add sp, sp, #80
1057; CHECK64-NEXT:    ret
1058;
1059; CHECK1024-LABEL: svecc_csr_d8d9:
1060; CHECK1024:       // %bb.0: // %entry
1061; CHECK1024-NEXT:    sub sp, sp, #1040
1062; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
1063; CHECK1024-NEXT:    addvl sp, sp, #-2
1064; CHECK1024-NEXT:    str z9, [sp] // 16-byte Folded Spill
1065; CHECK1024-NEXT:    str z8, [sp, #1, mul vl] // 16-byte Folded Spill
1066; CHECK1024-NEXT:    sub sp, sp, #1024
1067; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 16 * VG
1068; CHECK1024-NEXT:    .cfi_offset w29, -16
1069; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG
1070; CHECK1024-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1040 - 16 * VG
1071; CHECK1024-NEXT:    mov w0, wzr
1072; CHECK1024-NEXT:    //APP
1073; CHECK1024-NEXT:    //NO_APP
1074; CHECK1024-NEXT:    add sp, sp, #1024
1075; CHECK1024-NEXT:    ldr z9, [sp] // 16-byte Folded Reload
1076; CHECK1024-NEXT:    ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
1077; CHECK1024-NEXT:    addvl sp, sp, #2
1078; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
1079; CHECK1024-NEXT:    add sp, sp, #1040
1080; CHECK1024-NEXT:    ret
1081entry:
1082  tail call void asm sideeffect "", "~{d8},~{d9}"() #1
1083  ret i32 0
1084}
1085
1086define i32 @svecc_csr_d8_allocd(double %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
1087; CHECK0-LABEL: svecc_csr_d8_allocd:
1088; CHECK0:       // %bb.0: // %entry
1089; CHECK0-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
1090; CHECK0-NEXT:    addvl sp, sp, #-1
1091; CHECK0-NEXT:    str z8, [sp] // 16-byte Folded Spill
1092; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
1093; CHECK0-NEXT:    .cfi_offset w29, -16
1094; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
1095; CHECK0-NEXT:    //APP
1096; CHECK0-NEXT:    //NO_APP
1097; CHECK0-NEXT:    addvl x8, sp, #1
1098; CHECK0-NEXT:    mov w0, wzr
1099; CHECK0-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
1100; CHECK0-NEXT:    str d0, [x8, #8]
1101; CHECK0-NEXT:    addvl sp, sp, #1
1102; CHECK0-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
1103; CHECK0-NEXT:    ret
1104;
1105; CHECK64-LABEL: svecc_csr_d8_allocd:
1106; CHECK64:       // %bb.0: // %entry
1107; CHECK64-NEXT:    sub sp, sp, #80
1108; CHECK64-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
1109; CHECK64-NEXT:    addvl sp, sp, #-1
1110; CHECK64-NEXT:    str z8, [sp] // 16-byte Folded Spill
1111; CHECK64-NEXT:    sub sp, sp, #80
1112; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 160 + 8 * VG
1113; CHECK64-NEXT:    .cfi_offset w29, -16
1114; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG
1115; CHECK64-NEXT:    mov w0, wzr
1116; CHECK64-NEXT:    //APP
1117; CHECK64-NEXT:    //NO_APP
1118; CHECK64-NEXT:    str d0, [sp, #72]
1119; CHECK64-NEXT:    add sp, sp, #80
1120; CHECK64-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
1121; CHECK64-NEXT:    addvl sp, sp, #1
1122; CHECK64-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
1123; CHECK64-NEXT:    add sp, sp, #80
1124; CHECK64-NEXT:    ret
1125;
1126; CHECK1024-LABEL: svecc_csr_d8_allocd:
1127; CHECK1024:       // %bb.0: // %entry
1128; CHECK1024-NEXT:    sub sp, sp, #1040
1129; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
1130; CHECK1024-NEXT:    addvl sp, sp, #-1
1131; CHECK1024-NEXT:    str z8, [sp] // 16-byte Folded Spill
1132; CHECK1024-NEXT:    sub sp, sp, #1040
1133; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2080 + 8 * VG
1134; CHECK1024-NEXT:    .cfi_offset w29, -16
1135; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG
1136; CHECK1024-NEXT:    mov w0, wzr
1137; CHECK1024-NEXT:    //APP
1138; CHECK1024-NEXT:    //NO_APP
1139; CHECK1024-NEXT:    str d0, [sp, #1032]
1140; CHECK1024-NEXT:    add sp, sp, #1040
1141; CHECK1024-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
1142; CHECK1024-NEXT:    addvl sp, sp, #1
1143; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
1144; CHECK1024-NEXT:    add sp, sp, #1040
1145; CHECK1024-NEXT:    ret
1146entry:
1147  %a = alloca double
1148  tail call void asm sideeffect "", "~{d8}"() #1
1149  store double %d, ptr %a
1150  ret i32 0
1151}
1152
1153define i32 @svecc_csr_d8_alloci64(i64 %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
1154; CHECK0-LABEL: svecc_csr_d8_alloci64:
1155; CHECK0:       // %bb.0: // %entry
1156; CHECK0-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
1157; CHECK0-NEXT:    addvl sp, sp, #-1
1158; CHECK0-NEXT:    str z8, [sp] // 16-byte Folded Spill
1159; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
1160; CHECK0-NEXT:    .cfi_offset w29, -16
1161; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
1162; CHECK0-NEXT:    //APP
1163; CHECK0-NEXT:    //NO_APP
1164; CHECK0-NEXT:    mov x8, x0
1165; CHECK0-NEXT:    addvl x9, sp, #1
1166; CHECK0-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
1167; CHECK0-NEXT:    mov w0, wzr
1168; CHECK0-NEXT:    str x8, [x9, #8]
1169; CHECK0-NEXT:    addvl sp, sp, #1
1170; CHECK0-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
1171; CHECK0-NEXT:    ret
1172;
1173; CHECK64-LABEL: svecc_csr_d8_alloci64:
1174; CHECK64:       // %bb.0: // %entry
1175; CHECK64-NEXT:    sub sp, sp, #80
1176; CHECK64-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
1177; CHECK64-NEXT:    addvl sp, sp, #-1
1178; CHECK64-NEXT:    str z8, [sp] // 16-byte Folded Spill
1179; CHECK64-NEXT:    sub sp, sp, #80
1180; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 160 + 8 * VG
1181; CHECK64-NEXT:    .cfi_offset w29, -16
1182; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG
1183; CHECK64-NEXT:    mov x8, x0
1184; CHECK64-NEXT:    mov w0, wzr
1185; CHECK64-NEXT:    //APP
1186; CHECK64-NEXT:    //NO_APP
1187; CHECK64-NEXT:    str x8, [sp, #8]
1188; CHECK64-NEXT:    add sp, sp, #80
1189; CHECK64-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
1190; CHECK64-NEXT:    addvl sp, sp, #1
1191; CHECK64-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
1192; CHECK64-NEXT:    add sp, sp, #80
1193; CHECK64-NEXT:    ret
1194;
1195; CHECK1024-LABEL: svecc_csr_d8_alloci64:
1196; CHECK1024:       // %bb.0: // %entry
1197; CHECK1024-NEXT:    sub sp, sp, #1040
1198; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
1199; CHECK1024-NEXT:    addvl sp, sp, #-1
1200; CHECK1024-NEXT:    str z8, [sp] // 16-byte Folded Spill
1201; CHECK1024-NEXT:    sub sp, sp, #1040
1202; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2080 + 8 * VG
1203; CHECK1024-NEXT:    .cfi_offset w29, -16
1204; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG
1205; CHECK1024-NEXT:    mov x8, x0
1206; CHECK1024-NEXT:    mov w0, wzr
1207; CHECK1024-NEXT:    //APP
1208; CHECK1024-NEXT:    //NO_APP
1209; CHECK1024-NEXT:    str x8, [sp, #8]
1210; CHECK1024-NEXT:    add sp, sp, #1040
1211; CHECK1024-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
1212; CHECK1024-NEXT:    addvl sp, sp, #1
1213; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
1214; CHECK1024-NEXT:    add sp, sp, #1040
1215; CHECK1024-NEXT:    ret
1216entry:
1217  %a = alloca i64
1218  tail call void asm sideeffect "", "~{d8}"() #1
1219  store i64 %d, ptr %a
1220  ret i32 0
1221}
1222
1223define i32 @svecc_csr_d8_allocnxv4i32(i64 %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
1224; CHECK0-LABEL: svecc_csr_d8_allocnxv4i32:
1225; CHECK0:       // %bb.0: // %entry
1226; CHECK0-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
1227; CHECK0-NEXT:    addvl sp, sp, #-1
1228; CHECK0-NEXT:    str z8, [sp] // 16-byte Folded Spill
1229; CHECK0-NEXT:    addvl sp, sp, #-1
1230; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
1231; CHECK0-NEXT:    .cfi_offset w29, -16
1232; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
1233; CHECK0-NEXT:    mov z0.s, #0 // =0x0
1234; CHECK0-NEXT:    ptrue p0.s
1235; CHECK0-NEXT:    mov w0, wzr
1236; CHECK0-NEXT:    //APP
1237; CHECK0-NEXT:    //NO_APP
1238; CHECK0-NEXT:    st1w { z0.s }, p0, [sp]
1239; CHECK0-NEXT:    addvl sp, sp, #1
1240; CHECK0-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
1241; CHECK0-NEXT:    addvl sp, sp, #1
1242; CHECK0-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
1243; CHECK0-NEXT:    ret
1244;
1245; CHECK64-LABEL: svecc_csr_d8_allocnxv4i32:
1246; CHECK64:       // %bb.0: // %entry
1247; CHECK64-NEXT:    sub sp, sp, #80
1248; CHECK64-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
1249; CHECK64-NEXT:    addvl sp, sp, #-1
1250; CHECK64-NEXT:    str z8, [sp] // 16-byte Folded Spill
1251; CHECK64-NEXT:    sub sp, sp, #64
1252; CHECK64-NEXT:    addvl sp, sp, #-1
1253; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG
1254; CHECK64-NEXT:    .cfi_offset w29, -16
1255; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG
1256; CHECK64-NEXT:    mov z0.s, #0 // =0x0
1257; CHECK64-NEXT:    ptrue p0.s
1258; CHECK64-NEXT:    add x8, sp, #64
1259; CHECK64-NEXT:    mov w0, wzr
1260; CHECK64-NEXT:    //APP
1261; CHECK64-NEXT:    //NO_APP
1262; CHECK64-NEXT:    st1w { z0.s }, p0, [x8]
1263; CHECK64-NEXT:    add sp, sp, #64
1264; CHECK64-NEXT:    addvl sp, sp, #1
1265; CHECK64-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
1266; CHECK64-NEXT:    addvl sp, sp, #1
1267; CHECK64-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
1268; CHECK64-NEXT:    add sp, sp, #80
1269; CHECK64-NEXT:    ret
1270;
1271; CHECK1024-LABEL: svecc_csr_d8_allocnxv4i32:
1272; CHECK1024:       // %bb.0: // %entry
1273; CHECK1024-NEXT:    sub sp, sp, #1040
1274; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
1275; CHECK1024-NEXT:    addvl sp, sp, #-1
1276; CHECK1024-NEXT:    str z8, [sp] // 16-byte Folded Spill
1277; CHECK1024-NEXT:    sub sp, sp, #1024
1278; CHECK1024-NEXT:    addvl sp, sp, #-1
1279; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 16 * VG
1280; CHECK1024-NEXT:    .cfi_offset w29, -16
1281; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG
1282; CHECK1024-NEXT:    mov z0.s, #0 // =0x0
1283; CHECK1024-NEXT:    ptrue p0.s
1284; CHECK1024-NEXT:    add x8, sp, #1024
1285; CHECK1024-NEXT:    mov w0, wzr
1286; CHECK1024-NEXT:    //APP
1287; CHECK1024-NEXT:    //NO_APP
1288; CHECK1024-NEXT:    st1w { z0.s }, p0, [x8]
1289; CHECK1024-NEXT:    add sp, sp, #1024
1290; CHECK1024-NEXT:    addvl sp, sp, #1
1291; CHECK1024-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
1292; CHECK1024-NEXT:    addvl sp, sp, #1
1293; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
1294; CHECK1024-NEXT:    add sp, sp, #1040
1295; CHECK1024-NEXT:    ret
1296entry:
1297  %a = alloca <vscale x 4 x i32>
1298  tail call void asm sideeffect "", "~{d8}"() #1
1299  store <vscale x 4 x i32> zeroinitializer, ptr %a
1300  ret i32 0
1301}
1302
1303define i32 @svecc_csr_x18_25_d8_15_allocdi64(i64 %d, double %e, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" {
1304; CHECK0-LABEL: svecc_csr_x18_25_d8_15_allocdi64:
1305; CHECK0:       // %bb.0: // %entry
1306; CHECK0-NEXT:    stp x29, x25, [sp, #-64]! // 16-byte Folded Spill
1307; CHECK0-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
1308; CHECK0-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
1309; CHECK0-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
1310; CHECK0-NEXT:    addvl sp, sp, #-8
1311; CHECK0-NEXT:    str z15, [sp] // 16-byte Folded Spill
1312; CHECK0-NEXT:    str z14, [sp, #1, mul vl] // 16-byte Folded Spill
1313; CHECK0-NEXT:    str z13, [sp, #2, mul vl] // 16-byte Folded Spill
1314; CHECK0-NEXT:    str z12, [sp, #3, mul vl] // 16-byte Folded Spill
1315; CHECK0-NEXT:    str z11, [sp, #4, mul vl] // 16-byte Folded Spill
1316; CHECK0-NEXT:    str z10, [sp, #5, mul vl] // 16-byte Folded Spill
1317; CHECK0-NEXT:    str z9, [sp, #6, mul vl] // 16-byte Folded Spill
1318; CHECK0-NEXT:    str z8, [sp, #7, mul vl] // 16-byte Folded Spill
1319; CHECK0-NEXT:    sub sp, sp, #16
1320; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 64 * VG
1321; CHECK0-NEXT:    .cfi_offset w19, -8
1322; CHECK0-NEXT:    .cfi_offset w20, -16
1323; CHECK0-NEXT:    .cfi_offset w21, -24
1324; CHECK0-NEXT:    .cfi_offset w22, -32
1325; CHECK0-NEXT:    .cfi_offset w23, -40
1326; CHECK0-NEXT:    .cfi_offset w24, -48
1327; CHECK0-NEXT:    .cfi_offset w25, -56
1328; CHECK0-NEXT:    .cfi_offset w29, -64
1329; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG
1330; CHECK0-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG
1331; CHECK0-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG
1332; CHECK0-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG
1333; CHECK0-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG
1334; CHECK0-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG
1335; CHECK0-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG
1336; CHECK0-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG
1337; CHECK0-NEXT:    mov x8, x0
1338; CHECK0-NEXT:    mov w0, wzr
1339; CHECK0-NEXT:    //APP
1340; CHECK0-NEXT:    //NO_APP
1341; CHECK0-NEXT:    //APP
1342; CHECK0-NEXT:    //NO_APP
1343; CHECK0-NEXT:    str x8, [sp, #8]
1344; CHECK0-NEXT:    str d0, [sp], #16
1345; CHECK0-NEXT:    ldr z15, [sp] // 16-byte Folded Reload
1346; CHECK0-NEXT:    ldr z14, [sp, #1, mul vl] // 16-byte Folded Reload
1347; CHECK0-NEXT:    ldr z13, [sp, #2, mul vl] // 16-byte Folded Reload
1348; CHECK0-NEXT:    ldr z12, [sp, #3, mul vl] // 16-byte Folded Reload
1349; CHECK0-NEXT:    ldr z11, [sp, #4, mul vl] // 16-byte Folded Reload
1350; CHECK0-NEXT:    ldr z10, [sp, #5, mul vl] // 16-byte Folded Reload
1351; CHECK0-NEXT:    ldr z9, [sp, #6, mul vl] // 16-byte Folded Reload
1352; CHECK0-NEXT:    ldr z8, [sp, #7, mul vl] // 16-byte Folded Reload
1353; CHECK0-NEXT:    addvl sp, sp, #8
1354; CHECK0-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
1355; CHECK0-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
1356; CHECK0-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
1357; CHECK0-NEXT:    ldp x29, x25, [sp], #64 // 16-byte Folded Reload
1358; CHECK0-NEXT:    ret
1359;
1360; CHECK64-LABEL: svecc_csr_x18_25_d8_15_allocdi64:
1361; CHECK64:       // %bb.0: // %entry
1362; CHECK64-NEXT:    sub sp, sp, #128
1363; CHECK64-NEXT:    stp x29, x25, [sp, #64] // 16-byte Folded Spill
1364; CHECK64-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
1365; CHECK64-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
1366; CHECK64-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
1367; CHECK64-NEXT:    addvl sp, sp, #-8
1368; CHECK64-NEXT:    str z15, [sp] // 16-byte Folded Spill
1369; CHECK64-NEXT:    str z14, [sp, #1, mul vl] // 16-byte Folded Spill
1370; CHECK64-NEXT:    str z13, [sp, #2, mul vl] // 16-byte Folded Spill
1371; CHECK64-NEXT:    str z12, [sp, #3, mul vl] // 16-byte Folded Spill
1372; CHECK64-NEXT:    str z11, [sp, #4, mul vl] // 16-byte Folded Spill
1373; CHECK64-NEXT:    str z10, [sp, #5, mul vl] // 16-byte Folded Spill
1374; CHECK64-NEXT:    str z9, [sp, #6, mul vl] // 16-byte Folded Spill
1375; CHECK64-NEXT:    str z8, [sp, #7, mul vl] // 16-byte Folded Spill
1376; CHECK64-NEXT:    sub sp, sp, #96
1377; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x01, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 224 + 64 * VG
1378; CHECK64-NEXT:    .cfi_offset w19, -8
1379; CHECK64-NEXT:    .cfi_offset w20, -16
1380; CHECK64-NEXT:    .cfi_offset w21, -24
1381; CHECK64-NEXT:    .cfi_offset w22, -32
1382; CHECK64-NEXT:    .cfi_offset w23, -40
1383; CHECK64-NEXT:    .cfi_offset w24, -48
1384; CHECK64-NEXT:    .cfi_offset w25, -56
1385; CHECK64-NEXT:    .cfi_offset w29, -64
1386; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG
1387; CHECK64-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG
1388; CHECK64-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG
1389; CHECK64-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG
1390; CHECK64-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG
1391; CHECK64-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG
1392; CHECK64-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG
1393; CHECK64-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG
1394; CHECK64-NEXT:    mov x8, x0
1395; CHECK64-NEXT:    mov w0, wzr
1396; CHECK64-NEXT:    //APP
1397; CHECK64-NEXT:    //NO_APP
1398; CHECK64-NEXT:    //APP
1399; CHECK64-NEXT:    //NO_APP
1400; CHECK64-NEXT:    str x8, [sp, #8]
1401; CHECK64-NEXT:    str d0, [sp, #88]
1402; CHECK64-NEXT:    add sp, sp, #96
1403; CHECK64-NEXT:    ldr z15, [sp] // 16-byte Folded Reload
1404; CHECK64-NEXT:    ldr z14, [sp, #1, mul vl] // 16-byte Folded Reload
1405; CHECK64-NEXT:    ldr z13, [sp, #2, mul vl] // 16-byte Folded Reload
1406; CHECK64-NEXT:    ldr z12, [sp, #3, mul vl] // 16-byte Folded Reload
1407; CHECK64-NEXT:    ldr z11, [sp, #4, mul vl] // 16-byte Folded Reload
1408; CHECK64-NEXT:    ldr z10, [sp, #5, mul vl] // 16-byte Folded Reload
1409; CHECK64-NEXT:    ldr z9, [sp, #6, mul vl] // 16-byte Folded Reload
1410; CHECK64-NEXT:    ldr z8, [sp, #7, mul vl] // 16-byte Folded Reload
1411; CHECK64-NEXT:    addvl sp, sp, #8
1412; CHECK64-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
1413; CHECK64-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
1414; CHECK64-NEXT:    ldp x24, x23, [sp, #80] // 16-byte Folded Reload
1415; CHECK64-NEXT:    ldp x29, x25, [sp, #64] // 16-byte Folded Reload
1416; CHECK64-NEXT:    add sp, sp, #128
1417; CHECK64-NEXT:    ret
1418;
1419; CHECK1024-LABEL: svecc_csr_x18_25_d8_15_allocdi64:
1420; CHECK1024:       // %bb.0: // %entry
1421; CHECK1024-NEXT:    sub sp, sp, #1088
1422; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
1423; CHECK1024-NEXT:    str x25, [sp, #1032] // 8-byte Folded Spill
1424; CHECK1024-NEXT:    str x24, [sp, #1040] // 8-byte Folded Spill
1425; CHECK1024-NEXT:    str x23, [sp, #1048] // 8-byte Folded Spill
1426; CHECK1024-NEXT:    str x22, [sp, #1056] // 8-byte Folded Spill
1427; CHECK1024-NEXT:    str x21, [sp, #1064] // 8-byte Folded Spill
1428; CHECK1024-NEXT:    str x20, [sp, #1072] // 8-byte Folded Spill
1429; CHECK1024-NEXT:    str x19, [sp, #1080] // 8-byte Folded Spill
1430; CHECK1024-NEXT:    addvl sp, sp, #-8
1431; CHECK1024-NEXT:    str z15, [sp] // 16-byte Folded Spill
1432; CHECK1024-NEXT:    str z14, [sp, #1, mul vl] // 16-byte Folded Spill
1433; CHECK1024-NEXT:    str z13, [sp, #2, mul vl] // 16-byte Folded Spill
1434; CHECK1024-NEXT:    str z12, [sp, #3, mul vl] // 16-byte Folded Spill
1435; CHECK1024-NEXT:    str z11, [sp, #4, mul vl] // 16-byte Folded Spill
1436; CHECK1024-NEXT:    str z10, [sp, #5, mul vl] // 16-byte Folded Spill
1437; CHECK1024-NEXT:    str z9, [sp, #6, mul vl] // 16-byte Folded Spill
1438; CHECK1024-NEXT:    str z8, [sp, #7, mul vl] // 16-byte Folded Spill
1439; CHECK1024-NEXT:    sub sp, sp, #1056
1440; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2144 + 64 * VG
1441; CHECK1024-NEXT:    .cfi_offset w19, -8
1442; CHECK1024-NEXT:    .cfi_offset w20, -16
1443; CHECK1024-NEXT:    .cfi_offset w21, -24
1444; CHECK1024-NEXT:    .cfi_offset w22, -32
1445; CHECK1024-NEXT:    .cfi_offset w23, -40
1446; CHECK1024-NEXT:    .cfi_offset w24, -48
1447; CHECK1024-NEXT:    .cfi_offset w25, -56
1448; CHECK1024-NEXT:    .cfi_offset w29, -64
1449; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG
1450; CHECK1024-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG
1451; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG
1452; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG
1453; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG
1454; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG
1455; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG
1456; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG
1457; CHECK1024-NEXT:    mov x8, x0
1458; CHECK1024-NEXT:    mov w0, wzr
1459; CHECK1024-NEXT:    //APP
1460; CHECK1024-NEXT:    //NO_APP
1461; CHECK1024-NEXT:    //APP
1462; CHECK1024-NEXT:    //NO_APP
1463; CHECK1024-NEXT:    str x8, [sp, #8]
1464; CHECK1024-NEXT:    str d0, [sp, #1048]
1465; CHECK1024-NEXT:    add sp, sp, #1056
1466; CHECK1024-NEXT:    ldr z15, [sp] // 16-byte Folded Reload
1467; CHECK1024-NEXT:    ldr z14, [sp, #1, mul vl] // 16-byte Folded Reload
1468; CHECK1024-NEXT:    ldr z13, [sp, #2, mul vl] // 16-byte Folded Reload
1469; CHECK1024-NEXT:    ldr z12, [sp, #3, mul vl] // 16-byte Folded Reload
1470; CHECK1024-NEXT:    ldr z11, [sp, #4, mul vl] // 16-byte Folded Reload
1471; CHECK1024-NEXT:    ldr z10, [sp, #5, mul vl] // 16-byte Folded Reload
1472; CHECK1024-NEXT:    ldr z9, [sp, #6, mul vl] // 16-byte Folded Reload
1473; CHECK1024-NEXT:    ldr z8, [sp, #7, mul vl] // 16-byte Folded Reload
1474; CHECK1024-NEXT:    addvl sp, sp, #8
1475; CHECK1024-NEXT:    ldr x19, [sp, #1080] // 8-byte Folded Reload
1476; CHECK1024-NEXT:    ldr x20, [sp, #1072] // 8-byte Folded Reload
1477; CHECK1024-NEXT:    ldr x21, [sp, #1064] // 8-byte Folded Reload
1478; CHECK1024-NEXT:    ldr x22, [sp, #1056] // 8-byte Folded Reload
1479; CHECK1024-NEXT:    ldr x23, [sp, #1048] // 8-byte Folded Reload
1480; CHECK1024-NEXT:    ldr x24, [sp, #1040] // 8-byte Folded Reload
1481; CHECK1024-NEXT:    ldr x25, [sp, #1032] // 8-byte Folded Reload
1482; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
1483; CHECK1024-NEXT:    add sp, sp, #1088
1484; CHECK1024-NEXT:    ret
1485entry:
1486  %a = alloca i64
1487  %b = alloca double
1488  tail call void asm sideeffect "", "~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25}"()
1489  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"()
1490  store i64 %d, ptr %a
1491  store double %e, ptr %b
1492  ret i32 0
1493}
1494
1495
1496define [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1([2 x <vscale x 4 x i1>] %arg1, [2 x <vscale x 4 x i1>] %arg2) nounwind "aarch64_pstate_sm_compatible" {
1497; CHECK-LABEL: sve_signature_pred_2xv4i1:
1498; CHECK:       // %bb.0:
1499; CHECK-NEXT:    mov p1.b, p3.b
1500; CHECK-NEXT:    mov p0.b, p2.b
1501; CHECK-NEXT:    ret
1502  ret [2 x <vscale x 4 x i1>] %arg2
1503}
1504
1505define [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1_caller([2 x <vscale x 4 x i1>] %arg1, [2 x <vscale x 4 x i1>] %arg2) nounwind "aarch64_pstate_sm_compatible" {
1506; CHECK0-LABEL: sve_signature_pred_2xv4i1_caller:
1507; CHECK0:       // %bb.0:
1508; CHECK0-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
1509; CHECK0-NEXT:    addvl sp, sp, #-1
1510; CHECK0-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
1511; CHECK0-NEXT:    mov p5.b, p0.b
1512; CHECK0-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
1513; CHECK0-NEXT:    mov p4.b, p1.b
1514; CHECK0-NEXT:    mov p0.b, p2.b
1515; CHECK0-NEXT:    mov p1.b, p3.b
1516; CHECK0-NEXT:    mov p2.b, p5.b
1517; CHECK0-NEXT:    mov p3.b, p4.b
1518; CHECK0-NEXT:    bl sve_signature_pred_2xv4i1
1519; CHECK0-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
1520; CHECK0-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
1521; CHECK0-NEXT:    addvl sp, sp, #1
1522; CHECK0-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
1523; CHECK0-NEXT:    ret
1524;
1525; CHECK64-LABEL: sve_signature_pred_2xv4i1_caller:
1526; CHECK64:       // %bb.0:
1527; CHECK64-NEXT:    sub sp, sp, #80
1528; CHECK64-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
1529; CHECK64-NEXT:    addvl sp, sp, #-1
1530; CHECK64-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
1531; CHECK64-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
1532; CHECK64-NEXT:    sub sp, sp, #64
1533; CHECK64-NEXT:    mov p4.b, p1.b
1534; CHECK64-NEXT:    mov p5.b, p0.b
1535; CHECK64-NEXT:    mov p0.b, p2.b
1536; CHECK64-NEXT:    mov p1.b, p3.b
1537; CHECK64-NEXT:    mov p2.b, p5.b
1538; CHECK64-NEXT:    mov p3.b, p4.b
1539; CHECK64-NEXT:    bl sve_signature_pred_2xv4i1
1540; CHECK64-NEXT:    add sp, sp, #64
1541; CHECK64-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
1542; CHECK64-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
1543; CHECK64-NEXT:    addvl sp, sp, #1
1544; CHECK64-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
1545; CHECK64-NEXT:    add sp, sp, #80
1546; CHECK64-NEXT:    ret
1547;
1548; CHECK1024-LABEL: sve_signature_pred_2xv4i1_caller:
1549; CHECK1024:       // %bb.0:
1550; CHECK1024-NEXT:    sub sp, sp, #1040
1551; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
1552; CHECK1024-NEXT:    str x30, [sp, #1032] // 8-byte Folded Spill
1553; CHECK1024-NEXT:    addvl sp, sp, #-1
1554; CHECK1024-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
1555; CHECK1024-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
1556; CHECK1024-NEXT:    sub sp, sp, #1024
1557; CHECK1024-NEXT:    mov p4.b, p1.b
1558; CHECK1024-NEXT:    mov p5.b, p0.b
1559; CHECK1024-NEXT:    mov p0.b, p2.b
1560; CHECK1024-NEXT:    mov p1.b, p3.b
1561; CHECK1024-NEXT:    mov p2.b, p5.b
1562; CHECK1024-NEXT:    mov p3.b, p4.b
1563; CHECK1024-NEXT:    bl sve_signature_pred_2xv4i1
1564; CHECK1024-NEXT:    add sp, sp, #1024
1565; CHECK1024-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
1566; CHECK1024-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
1567; CHECK1024-NEXT:    addvl sp, sp, #1
1568; CHECK1024-NEXT:    ldr x30, [sp, #1032] // 8-byte Folded Reload
1569; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
1570; CHECK1024-NEXT:    add sp, sp, #1040
1571; CHECK1024-NEXT:    ret
1572  %res = call [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1([2 x <vscale x 4 x i1>] %arg2, [2 x <vscale x 4 x i1>] %arg1)
1573  ret [2 x <vscale x 4 x i1>] %res
1574}
1575
1576define i32 @f128_libcall(fp128 %v0, fp128 %v1, fp128 %v2, fp128 %v3, i32 %a, i32 %b) "aarch64_pstate_sm_compatible" {
1577; CHECK0-LABEL: f128_libcall:
1578; CHECK0:       // %bb.0:
1579; CHECK0-NEXT:    sub sp, sp, #176
1580; CHECK0-NEXT:    .cfi_def_cfa_offset 176
1581; CHECK0-NEXT:    cntd x9
1582; CHECK0-NEXT:    stp d15, d14, [sp, #64] // 16-byte Folded Spill
1583; CHECK0-NEXT:    stp d13, d12, [sp, #80] // 16-byte Folded Spill
1584; CHECK0-NEXT:    stp d11, d10, [sp, #96] // 16-byte Folded Spill
1585; CHECK0-NEXT:    stp d9, d8, [sp, #112] // 16-byte Folded Spill
1586; CHECK0-NEXT:    stp x30, x9, [sp, #128] // 16-byte Folded Spill
1587; CHECK0-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
1588; CHECK0-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
1589; CHECK0-NEXT:    .cfi_offset w19, -8
1590; CHECK0-NEXT:    .cfi_offset w20, -16
1591; CHECK0-NEXT:    .cfi_offset w21, -24
1592; CHECK0-NEXT:    .cfi_offset w22, -32
1593; CHECK0-NEXT:    .cfi_offset w30, -48
1594; CHECK0-NEXT:    .cfi_offset b8, -56
1595; CHECK0-NEXT:    .cfi_offset b9, -64
1596; CHECK0-NEXT:    .cfi_offset b10, -72
1597; CHECK0-NEXT:    .cfi_offset b11, -80
1598; CHECK0-NEXT:    .cfi_offset b12, -88
1599; CHECK0-NEXT:    .cfi_offset b13, -96
1600; CHECK0-NEXT:    .cfi_offset b14, -104
1601; CHECK0-NEXT:    .cfi_offset b15, -112
1602; CHECK0-NEXT:    mov w19, w1
1603; CHECK0-NEXT:    mov w20, w0
1604; CHECK0-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
1605; CHECK0-NEXT:    stp q2, q3, [sp, #32] // 32-byte Folded Spill
1606; CHECK0-NEXT:    bl __arm_sme_state
1607; CHECK0-NEXT:    and x21, x0, #0x1
1608; CHECK0-NEXT:    .cfi_offset vg, -40
1609; CHECK0-NEXT:    tbz w21, #0, .LBB27_2
1610; CHECK0-NEXT:  // %bb.1:
1611; CHECK0-NEXT:    smstop sm
1612; CHECK0-NEXT:  .LBB27_2:
1613; CHECK0-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
1614; CHECK0-NEXT:    bl __lttf2
1615; CHECK0-NEXT:    tbz w21, #0, .LBB27_4
1616; CHECK0-NEXT:  // %bb.3:
1617; CHECK0-NEXT:    smstart sm
1618; CHECK0-NEXT:  .LBB27_4:
1619; CHECK0-NEXT:    cmp w0, #0
1620; CHECK0-NEXT:    .cfi_restore vg
1621; CHECK0-NEXT:    cset w21, lt
1622; CHECK0-NEXT:    bl __arm_sme_state
1623; CHECK0-NEXT:    and x22, x0, #0x1
1624; CHECK0-NEXT:    .cfi_offset vg, -40
1625; CHECK0-NEXT:    tbz w22, #0, .LBB27_6
1626; CHECK0-NEXT:  // %bb.5:
1627; CHECK0-NEXT:    smstop sm
1628; CHECK0-NEXT:  .LBB27_6:
1629; CHECK0-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
1630; CHECK0-NEXT:    bl __getf2
1631; CHECK0-NEXT:    tbz w22, #0, .LBB27_8
1632; CHECK0-NEXT:  // %bb.7:
1633; CHECK0-NEXT:    smstart sm
1634; CHECK0-NEXT:  .LBB27_8:
1635; CHECK0-NEXT:    cmp w0, #0
1636; CHECK0-NEXT:    cset w8, ge
1637; CHECK0-NEXT:    tst w8, w21
1638; CHECK0-NEXT:    csel w0, w20, w19, ne
1639; CHECK0-NEXT:    .cfi_restore vg
1640; CHECK0-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
1641; CHECK0-NEXT:    ldr x30, [sp, #128] // 8-byte Folded Reload
1642; CHECK0-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
1643; CHECK0-NEXT:    ldp d9, d8, [sp, #112] // 16-byte Folded Reload
1644; CHECK0-NEXT:    ldp d11, d10, [sp, #96] // 16-byte Folded Reload
1645; CHECK0-NEXT:    ldp d13, d12, [sp, #80] // 16-byte Folded Reload
1646; CHECK0-NEXT:    ldp d15, d14, [sp, #64] // 16-byte Folded Reload
1647; CHECK0-NEXT:    add sp, sp, #176
1648; CHECK0-NEXT:    .cfi_def_cfa_offset 0
1649; CHECK0-NEXT:    .cfi_restore w19
1650; CHECK0-NEXT:    .cfi_restore w20
1651; CHECK0-NEXT:    .cfi_restore w21
1652; CHECK0-NEXT:    .cfi_restore w22
1653; CHECK0-NEXT:    .cfi_restore w30
1654; CHECK0-NEXT:    .cfi_restore b8
1655; CHECK0-NEXT:    .cfi_restore b9
1656; CHECK0-NEXT:    .cfi_restore b10
1657; CHECK0-NEXT:    .cfi_restore b11
1658; CHECK0-NEXT:    .cfi_restore b12
1659; CHECK0-NEXT:    .cfi_restore b13
1660; CHECK0-NEXT:    .cfi_restore b14
1661; CHECK0-NEXT:    .cfi_restore b15
1662; CHECK0-NEXT:    ret
1663;
1664; CHECK64-LABEL: f128_libcall:
1665; CHECK64:       // %bb.0:
1666; CHECK64-NEXT:    sub sp, sp, #320
1667; CHECK64-NEXT:    .cfi_def_cfa_offset 320
1668; CHECK64-NEXT:    cntd x9
1669; CHECK64-NEXT:    stp d15, d14, [sp, #128] // 16-byte Folded Spill
1670; CHECK64-NEXT:    stp d13, d12, [sp, #144] // 16-byte Folded Spill
1671; CHECK64-NEXT:    stp d11, d10, [sp, #160] // 16-byte Folded Spill
1672; CHECK64-NEXT:    stp d9, d8, [sp, #176] // 16-byte Folded Spill
1673; CHECK64-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
1674; CHECK64-NEXT:    stp x9, x22, [sp, #272] // 16-byte Folded Spill
1675; CHECK64-NEXT:    stp x21, x20, [sp, #288] // 16-byte Folded Spill
1676; CHECK64-NEXT:    str x19, [sp, #304] // 8-byte Folded Spill
1677; CHECK64-NEXT:    .cfi_offset w19, -16
1678; CHECK64-NEXT:    .cfi_offset w20, -24
1679; CHECK64-NEXT:    .cfi_offset w21, -32
1680; CHECK64-NEXT:    .cfi_offset w22, -40
1681; CHECK64-NEXT:    .cfi_offset w30, -56
1682; CHECK64-NEXT:    .cfi_offset w29, -64
1683; CHECK64-NEXT:    .cfi_offset b8, -136
1684; CHECK64-NEXT:    .cfi_offset b9, -144
1685; CHECK64-NEXT:    .cfi_offset b10, -152
1686; CHECK64-NEXT:    .cfi_offset b11, -160
1687; CHECK64-NEXT:    .cfi_offset b12, -168
1688; CHECK64-NEXT:    .cfi_offset b13, -176
1689; CHECK64-NEXT:    .cfi_offset b14, -184
1690; CHECK64-NEXT:    .cfi_offset b15, -192
1691; CHECK64-NEXT:    mov w19, w1
1692; CHECK64-NEXT:    mov w20, w0
1693; CHECK64-NEXT:    stp q0, q1, [sp, #64] // 32-byte Folded Spill
1694; CHECK64-NEXT:    stp q2, q3, [sp, #96] // 32-byte Folded Spill
1695; CHECK64-NEXT:    bl __arm_sme_state
1696; CHECK64-NEXT:    and x21, x0, #0x1
1697; CHECK64-NEXT:    .cfi_offset vg, -48
1698; CHECK64-NEXT:    tbz w21, #0, .LBB27_2
1699; CHECK64-NEXT:  // %bb.1:
1700; CHECK64-NEXT:    smstop sm
1701; CHECK64-NEXT:  .LBB27_2:
1702; CHECK64-NEXT:    ldp q0, q1, [sp, #64] // 32-byte Folded Reload
1703; CHECK64-NEXT:    bl __lttf2
1704; CHECK64-NEXT:    tbz w21, #0, .LBB27_4
1705; CHECK64-NEXT:  // %bb.3:
1706; CHECK64-NEXT:    smstart sm
1707; CHECK64-NEXT:  .LBB27_4:
1708; CHECK64-NEXT:    cmp w0, #0
1709; CHECK64-NEXT:    .cfi_restore vg
1710; CHECK64-NEXT:    cset w21, lt
1711; CHECK64-NEXT:    bl __arm_sme_state
1712; CHECK64-NEXT:    and x22, x0, #0x1
1713; CHECK64-NEXT:    .cfi_offset vg, -48
1714; CHECK64-NEXT:    tbz w22, #0, .LBB27_6
1715; CHECK64-NEXT:  // %bb.5:
1716; CHECK64-NEXT:    smstop sm
1717; CHECK64-NEXT:  .LBB27_6:
1718; CHECK64-NEXT:    ldp q0, q1, [sp, #96] // 32-byte Folded Reload
1719; CHECK64-NEXT:    bl __getf2
1720; CHECK64-NEXT:    tbz w22, #0, .LBB27_8
1721; CHECK64-NEXT:  // %bb.7:
1722; CHECK64-NEXT:    smstart sm
1723; CHECK64-NEXT:  .LBB27_8:
1724; CHECK64-NEXT:    cmp w0, #0
1725; CHECK64-NEXT:    cset w8, ge
1726; CHECK64-NEXT:    tst w8, w21
1727; CHECK64-NEXT:    csel w0, w20, w19, ne
1728; CHECK64-NEXT:    .cfi_restore vg
1729; CHECK64-NEXT:    ldp x20, x19, [sp, #296] // 16-byte Folded Reload
1730; CHECK64-NEXT:    ldp x22, x21, [sp, #280] // 16-byte Folded Reload
1731; CHECK64-NEXT:    ldp x29, x30, [sp, #256] // 16-byte Folded Reload
1732; CHECK64-NEXT:    ldp d9, d8, [sp, #176] // 16-byte Folded Reload
1733; CHECK64-NEXT:    ldp d11, d10, [sp, #160] // 16-byte Folded Reload
1734; CHECK64-NEXT:    ldp d13, d12, [sp, #144] // 16-byte Folded Reload
1735; CHECK64-NEXT:    ldp d15, d14, [sp, #128] // 16-byte Folded Reload
1736; CHECK64-NEXT:    add sp, sp, #320
1737; CHECK64-NEXT:    .cfi_def_cfa_offset 0
1738; CHECK64-NEXT:    .cfi_restore w19
1739; CHECK64-NEXT:    .cfi_restore w20
1740; CHECK64-NEXT:    .cfi_restore w21
1741; CHECK64-NEXT:    .cfi_restore w22
1742; CHECK64-NEXT:    .cfi_restore w30
1743; CHECK64-NEXT:    .cfi_restore w29
1744; CHECK64-NEXT:    .cfi_restore b8
1745; CHECK64-NEXT:    .cfi_restore b9
1746; CHECK64-NEXT:    .cfi_restore b10
1747; CHECK64-NEXT:    .cfi_restore b11
1748; CHECK64-NEXT:    .cfi_restore b12
1749; CHECK64-NEXT:    .cfi_restore b13
1750; CHECK64-NEXT:    .cfi_restore b14
1751; CHECK64-NEXT:    .cfi_restore b15
1752; CHECK64-NEXT:    ret
1753;
1754; CHECK1024-LABEL: f128_libcall:
1755; CHECK1024:       // %bb.0:
1756; CHECK1024-NEXT:    sub sp, sp, #1152
1757; CHECK1024-NEXT:    .cfi_def_cfa_offset 1152
1758; CHECK1024-NEXT:    cntd x9
1759; CHECK1024-NEXT:    stp d15, d14, [sp] // 16-byte Folded Spill
1760; CHECK1024-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
1761; CHECK1024-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
1762; CHECK1024-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
1763; CHECK1024-NEXT:    str x29, [sp, #1088] // 8-byte Folded Spill
1764; CHECK1024-NEXT:    str x30, [sp, #1096] // 8-byte Folded Spill
1765; CHECK1024-NEXT:    str x9, [sp, #1104] // 8-byte Folded Spill
1766; CHECK1024-NEXT:    str x22, [sp, #1112] // 8-byte Folded Spill
1767; CHECK1024-NEXT:    str x21, [sp, #1120] // 8-byte Folded Spill
1768; CHECK1024-NEXT:    str x20, [sp, #1128] // 8-byte Folded Spill
1769; CHECK1024-NEXT:    str x19, [sp, #1136] // 8-byte Folded Spill
1770; CHECK1024-NEXT:    .cfi_offset w19, -16
1771; CHECK1024-NEXT:    .cfi_offset w20, -24
1772; CHECK1024-NEXT:    .cfi_offset w21, -32
1773; CHECK1024-NEXT:    .cfi_offset w22, -40
1774; CHECK1024-NEXT:    .cfi_offset w30, -56
1775; CHECK1024-NEXT:    .cfi_offset w29, -64
1776; CHECK1024-NEXT:    .cfi_offset b8, -1096
1777; CHECK1024-NEXT:    .cfi_offset b9, -1104
1778; CHECK1024-NEXT:    .cfi_offset b10, -1112
1779; CHECK1024-NEXT:    .cfi_offset b11, -1120
1780; CHECK1024-NEXT:    .cfi_offset b12, -1128
1781; CHECK1024-NEXT:    .cfi_offset b13, -1136
1782; CHECK1024-NEXT:    .cfi_offset b14, -1144
1783; CHECK1024-NEXT:    .cfi_offset b15, -1152
1784; CHECK1024-NEXT:    sub sp, sp, #1088
1785; CHECK1024-NEXT:    .cfi_def_cfa_offset 2240
1786; CHECK1024-NEXT:    mov w19, w1
1787; CHECK1024-NEXT:    mov w20, w0
1788; CHECK1024-NEXT:    str q3, [sp, #1072] // 16-byte Folded Spill
1789; CHECK1024-NEXT:    str q2, [sp, #1056] // 16-byte Folded Spill
1790; CHECK1024-NEXT:    str q1, [sp, #1040] // 16-byte Folded Spill
1791; CHECK1024-NEXT:    str q0, [sp, #1024] // 16-byte Folded Spill
1792; CHECK1024-NEXT:    bl __arm_sme_state
1793; CHECK1024-NEXT:    and x21, x0, #0x1
1794; CHECK1024-NEXT:    .cfi_offset vg, -48
1795; CHECK1024-NEXT:    tbz w21, #0, .LBB27_2
1796; CHECK1024-NEXT:  // %bb.1:
1797; CHECK1024-NEXT:    smstop sm
1798; CHECK1024-NEXT:  .LBB27_2:
1799; CHECK1024-NEXT:    ldr q0, [sp, #1024] // 16-byte Folded Reload
1800; CHECK1024-NEXT:    ldr q1, [sp, #1040] // 16-byte Folded Reload
1801; CHECK1024-NEXT:    bl __lttf2
1802; CHECK1024-NEXT:    tbz w21, #0, .LBB27_4
1803; CHECK1024-NEXT:  // %bb.3:
1804; CHECK1024-NEXT:    smstart sm
1805; CHECK1024-NEXT:  .LBB27_4:
1806; CHECK1024-NEXT:    cmp w0, #0
1807; CHECK1024-NEXT:    .cfi_restore vg
1808; CHECK1024-NEXT:    cset w21, lt
1809; CHECK1024-NEXT:    bl __arm_sme_state
1810; CHECK1024-NEXT:    and x22, x0, #0x1
1811; CHECK1024-NEXT:    .cfi_offset vg, -48
1812; CHECK1024-NEXT:    tbz w22, #0, .LBB27_6
1813; CHECK1024-NEXT:  // %bb.5:
1814; CHECK1024-NEXT:    smstop sm
1815; CHECK1024-NEXT:  .LBB27_6:
1816; CHECK1024-NEXT:    ldr q0, [sp, #1056] // 16-byte Folded Reload
1817; CHECK1024-NEXT:    ldr q1, [sp, #1072] // 16-byte Folded Reload
1818; CHECK1024-NEXT:    bl __getf2
1819; CHECK1024-NEXT:    tbz w22, #0, .LBB27_8
1820; CHECK1024-NEXT:  // %bb.7:
1821; CHECK1024-NEXT:    smstart sm
1822; CHECK1024-NEXT:  .LBB27_8:
1823; CHECK1024-NEXT:    cmp w0, #0
1824; CHECK1024-NEXT:    cset w8, ge
1825; CHECK1024-NEXT:    tst w8, w21
1826; CHECK1024-NEXT:    csel w0, w20, w19, ne
1827; CHECK1024-NEXT:    .cfi_restore vg
1828; CHECK1024-NEXT:    add sp, sp, #1088
1829; CHECK1024-NEXT:    .cfi_def_cfa_offset 1152
1830; CHECK1024-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1831; CHECK1024-NEXT:    ldr x19, [sp, #1136] // 8-byte Folded Reload
1832; CHECK1024-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1833; CHECK1024-NEXT:    ldr x20, [sp, #1128] // 8-byte Folded Reload
1834; CHECK1024-NEXT:    ldr x21, [sp, #1120] // 8-byte Folded Reload
1835; CHECK1024-NEXT:    ldr x22, [sp, #1112] // 8-byte Folded Reload
1836; CHECK1024-NEXT:    ldr x30, [sp, #1096] // 8-byte Folded Reload
1837; CHECK1024-NEXT:    ldr x29, [sp, #1088] // 8-byte Folded Reload
1838; CHECK1024-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1839; CHECK1024-NEXT:    ldp d15, d14, [sp] // 16-byte Folded Reload
1840; CHECK1024-NEXT:    add sp, sp, #1152
1841; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
1842; CHECK1024-NEXT:    .cfi_restore w19
1843; CHECK1024-NEXT:    .cfi_restore w20
1844; CHECK1024-NEXT:    .cfi_restore w21
1845; CHECK1024-NEXT:    .cfi_restore w22
1846; CHECK1024-NEXT:    .cfi_restore w30
1847; CHECK1024-NEXT:    .cfi_restore w29
1848; CHECK1024-NEXT:    .cfi_restore b8
1849; CHECK1024-NEXT:    .cfi_restore b9
1850; CHECK1024-NEXT:    .cfi_restore b10
1851; CHECK1024-NEXT:    .cfi_restore b11
1852; CHECK1024-NEXT:    .cfi_restore b12
1853; CHECK1024-NEXT:    .cfi_restore b13
1854; CHECK1024-NEXT:    .cfi_restore b14
1855; CHECK1024-NEXT:    .cfi_restore b15
1856; CHECK1024-NEXT:    ret
1857  %c0 = fcmp olt fp128 %v0, %v1
1858  %c1 = fcmp oge fp128 %v2, %v3
1859  %cr = and i1 %c1, %c0
1860  %sel = select i1 %cr, i32 %a, i32 %b
1861  ret i32 %sel
1862}
1863
1864define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
1865; CHECK0-LABEL: svecc_call:
1866; CHECK0:       // %bb.0: // %entry
1867; CHECK0-NEXT:    stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
1868; CHECK0-NEXT:    .cfi_def_cfa_offset 48
1869; CHECK0-NEXT:    cntd x9
1870; CHECK0-NEXT:    stp x9, x28, [sp, #16] // 16-byte Folded Spill
1871; CHECK0-NEXT:    stp x27, x19, [sp, #32] // 16-byte Folded Spill
1872; CHECK0-NEXT:    .cfi_offset w19, -8
1873; CHECK0-NEXT:    .cfi_offset w27, -16
1874; CHECK0-NEXT:    .cfi_offset w28, -24
1875; CHECK0-NEXT:    .cfi_offset w30, -40
1876; CHECK0-NEXT:    .cfi_offset w29, -48
1877; CHECK0-NEXT:    addvl sp, sp, #-18
1878; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 144 * VG
1879; CHECK0-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
1880; CHECK0-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
1881; CHECK0-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
1882; CHECK0-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
1883; CHECK0-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
1884; CHECK0-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
1885; CHECK0-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
1886; CHECK0-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
1887; CHECK0-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
1888; CHECK0-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
1889; CHECK0-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
1890; CHECK0-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
1891; CHECK0-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
1892; CHECK0-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
1893; CHECK0-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
1894; CHECK0-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
1895; CHECK0-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
1896; CHECK0-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
1897; CHECK0-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
1898; CHECK0-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
1899; CHECK0-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
1900; CHECK0-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
1901; CHECK0-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
1902; CHECK0-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
1903; CHECK0-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
1904; CHECK0-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
1905; CHECK0-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
1906; CHECK0-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
1907; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
1908; CHECK0-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
1909; CHECK0-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
1910; CHECK0-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG
1911; CHECK0-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG
1912; CHECK0-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG
1913; CHECK0-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG
1914; CHECK0-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG
1915; CHECK0-NEXT:    mov x8, x0
1916; CHECK0-NEXT:    //APP
1917; CHECK0-NEXT:    //NO_APP
1918; CHECK0-NEXT:    bl __arm_sme_state
1919; CHECK0-NEXT:    and x19, x0, #0x1
1920; CHECK0-NEXT:    .cfi_offset vg, -32
1921; CHECK0-NEXT:    tbz w19, #0, .LBB28_2
1922; CHECK0-NEXT:  // %bb.1: // %entry
1923; CHECK0-NEXT:    smstop sm
1924; CHECK0-NEXT:  .LBB28_2: // %entry
1925; CHECK0-NEXT:    mov x0, x8
1926; CHECK0-NEXT:    mov w1, #45 // =0x2d
1927; CHECK0-NEXT:    mov w2, #37 // =0x25
1928; CHECK0-NEXT:    bl memset
1929; CHECK0-NEXT:    tbz w19, #0, .LBB28_4
1930; CHECK0-NEXT:  // %bb.3: // %entry
1931; CHECK0-NEXT:    smstart sm
1932; CHECK0-NEXT:  .LBB28_4: // %entry
1933; CHECK0-NEXT:    mov w0, #22647 // =0x5877
1934; CHECK0-NEXT:    movk w0, #59491, lsl #16
1935; CHECK0-NEXT:    .cfi_restore vg
1936; CHECK0-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
1937; CHECK0-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
1938; CHECK0-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
1939; CHECK0-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
1940; CHECK0-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
1941; CHECK0-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
1942; CHECK0-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
1943; CHECK0-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
1944; CHECK0-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
1945; CHECK0-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
1946; CHECK0-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
1947; CHECK0-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
1948; CHECK0-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
1949; CHECK0-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
1950; CHECK0-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
1951; CHECK0-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
1952; CHECK0-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
1953; CHECK0-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
1954; CHECK0-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
1955; CHECK0-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
1956; CHECK0-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
1957; CHECK0-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
1958; CHECK0-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
1959; CHECK0-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
1960; CHECK0-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
1961; CHECK0-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
1962; CHECK0-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
1963; CHECK0-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
1964; CHECK0-NEXT:    addvl sp, sp, #18
1965; CHECK0-NEXT:    .cfi_def_cfa wsp, 48
1966; CHECK0-NEXT:    .cfi_restore z8
1967; CHECK0-NEXT:    .cfi_restore z9
1968; CHECK0-NEXT:    .cfi_restore z10
1969; CHECK0-NEXT:    .cfi_restore z11
1970; CHECK0-NEXT:    .cfi_restore z12
1971; CHECK0-NEXT:    .cfi_restore z13
1972; CHECK0-NEXT:    .cfi_restore z14
1973; CHECK0-NEXT:    .cfi_restore z15
1974; CHECK0-NEXT:    ldp x27, x19, [sp, #32] // 16-byte Folded Reload
1975; CHECK0-NEXT:    ldr x28, [sp, #24] // 8-byte Folded Reload
1976; CHECK0-NEXT:    ldp x29, x30, [sp], #48 // 16-byte Folded Reload
1977; CHECK0-NEXT:    .cfi_def_cfa_offset 0
1978; CHECK0-NEXT:    .cfi_restore w19
1979; CHECK0-NEXT:    .cfi_restore w27
1980; CHECK0-NEXT:    .cfi_restore w28
1981; CHECK0-NEXT:    .cfi_restore w30
1982; CHECK0-NEXT:    .cfi_restore w29
1983; CHECK0-NEXT:    ret
1984;
1985; CHECK64-LABEL: svecc_call:
1986; CHECK64:       // %bb.0: // %entry
1987; CHECK64-NEXT:    sub sp, sp, #112
1988; CHECK64-NEXT:    .cfi_def_cfa_offset 112
1989; CHECK64-NEXT:    cntd x9
1990; CHECK64-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
1991; CHECK64-NEXT:    stp x9, x28, [sp, #80] // 16-byte Folded Spill
1992; CHECK64-NEXT:    stp x27, x19, [sp, #96] // 16-byte Folded Spill
1993; CHECK64-NEXT:    .cfi_offset w19, -8
1994; CHECK64-NEXT:    .cfi_offset w27, -16
1995; CHECK64-NEXT:    .cfi_offset w28, -24
1996; CHECK64-NEXT:    .cfi_offset w30, -40
1997; CHECK64-NEXT:    .cfi_offset w29, -48
1998; CHECK64-NEXT:    addvl sp, sp, #-18
1999; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG
2000; CHECK64-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
2001; CHECK64-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
2002; CHECK64-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
2003; CHECK64-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
2004; CHECK64-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
2005; CHECK64-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
2006; CHECK64-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
2007; CHECK64-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
2008; CHECK64-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
2009; CHECK64-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
2010; CHECK64-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
2011; CHECK64-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
2012; CHECK64-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
2013; CHECK64-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
2014; CHECK64-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
2015; CHECK64-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
2016; CHECK64-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
2017; CHECK64-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
2018; CHECK64-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
2019; CHECK64-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
2020; CHECK64-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
2021; CHECK64-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
2022; CHECK64-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
2023; CHECK64-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
2024; CHECK64-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
2025; CHECK64-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
2026; CHECK64-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
2027; CHECK64-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
2028; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 112 - 8 * VG
2029; CHECK64-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 112 - 16 * VG
2030; CHECK64-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 112 - 24 * VG
2031; CHECK64-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 112 - 32 * VG
2032; CHECK64-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 112 - 40 * VG
2033; CHECK64-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 112 - 48 * VG
2034; CHECK64-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 112 - 56 * VG
2035; CHECK64-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 112 - 64 * VG
2036; CHECK64-NEXT:    sub sp, sp, #64
2037; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x01, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 176 + 144 * VG
2038; CHECK64-NEXT:    mov x8, x0
2039; CHECK64-NEXT:    //APP
2040; CHECK64-NEXT:    //NO_APP
2041; CHECK64-NEXT:    bl __arm_sme_state
2042; CHECK64-NEXT:    and x19, x0, #0x1
2043; CHECK64-NEXT:    .cfi_offset vg, -32
2044; CHECK64-NEXT:    tbz w19, #0, .LBB28_2
2045; CHECK64-NEXT:  // %bb.1: // %entry
2046; CHECK64-NEXT:    smstop sm
2047; CHECK64-NEXT:  .LBB28_2: // %entry
2048; CHECK64-NEXT:    mov x0, x8
2049; CHECK64-NEXT:    mov w1, #45 // =0x2d
2050; CHECK64-NEXT:    mov w2, #37 // =0x25
2051; CHECK64-NEXT:    bl memset
2052; CHECK64-NEXT:    tbz w19, #0, .LBB28_4
2053; CHECK64-NEXT:  // %bb.3: // %entry
2054; CHECK64-NEXT:    smstart sm
2055; CHECK64-NEXT:  .LBB28_4: // %entry
2056; CHECK64-NEXT:    mov w0, #22647 // =0x5877
2057; CHECK64-NEXT:    movk w0, #59491, lsl #16
2058; CHECK64-NEXT:    .cfi_restore vg
2059; CHECK64-NEXT:    add sp, sp, #64
2060; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG
2061; CHECK64-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
2062; CHECK64-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
2063; CHECK64-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
2064; CHECK64-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
2065; CHECK64-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
2066; CHECK64-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
2067; CHECK64-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
2068; CHECK64-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
2069; CHECK64-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
2070; CHECK64-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
2071; CHECK64-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
2072; CHECK64-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
2073; CHECK64-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
2074; CHECK64-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
2075; CHECK64-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
2076; CHECK64-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
2077; CHECK64-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
2078; CHECK64-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
2079; CHECK64-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
2080; CHECK64-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
2081; CHECK64-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
2082; CHECK64-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
2083; CHECK64-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
2084; CHECK64-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
2085; CHECK64-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
2086; CHECK64-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
2087; CHECK64-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
2088; CHECK64-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
2089; CHECK64-NEXT:    addvl sp, sp, #18
2090; CHECK64-NEXT:    .cfi_def_cfa wsp, 112
2091; CHECK64-NEXT:    .cfi_restore z8
2092; CHECK64-NEXT:    .cfi_restore z9
2093; CHECK64-NEXT:    .cfi_restore z10
2094; CHECK64-NEXT:    .cfi_restore z11
2095; CHECK64-NEXT:    .cfi_restore z12
2096; CHECK64-NEXT:    .cfi_restore z13
2097; CHECK64-NEXT:    .cfi_restore z14
2098; CHECK64-NEXT:    .cfi_restore z15
2099; CHECK64-NEXT:    ldp x27, x19, [sp, #96] // 16-byte Folded Reload
2100; CHECK64-NEXT:    ldr x28, [sp, #88] // 8-byte Folded Reload
2101; CHECK64-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
2102; CHECK64-NEXT:    add sp, sp, #112
2103; CHECK64-NEXT:    .cfi_def_cfa_offset 0
2104; CHECK64-NEXT:    .cfi_restore w19
2105; CHECK64-NEXT:    .cfi_restore w27
2106; CHECK64-NEXT:    .cfi_restore w28
2107; CHECK64-NEXT:    .cfi_restore w30
2108; CHECK64-NEXT:    .cfi_restore w29
2109; CHECK64-NEXT:    ret
2110;
2111; CHECK1024-LABEL: svecc_call:
2112; CHECK1024:       // %bb.0: // %entry
2113; CHECK1024-NEXT:    sub sp, sp, #1072
2114; CHECK1024-NEXT:    .cfi_def_cfa_offset 1072
2115; CHECK1024-NEXT:    cntd x9
2116; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
2117; CHECK1024-NEXT:    str x30, [sp, #1032] // 8-byte Folded Spill
2118; CHECK1024-NEXT:    str x9, [sp, #1040] // 8-byte Folded Spill
2119; CHECK1024-NEXT:    str x28, [sp, #1048] // 8-byte Folded Spill
2120; CHECK1024-NEXT:    str x27, [sp, #1056] // 8-byte Folded Spill
2121; CHECK1024-NEXT:    str x19, [sp, #1064] // 8-byte Folded Spill
2122; CHECK1024-NEXT:    .cfi_offset w19, -8
2123; CHECK1024-NEXT:    .cfi_offset w27, -16
2124; CHECK1024-NEXT:    .cfi_offset w28, -24
2125; CHECK1024-NEXT:    .cfi_offset w30, -40
2126; CHECK1024-NEXT:    .cfi_offset w29, -48
2127; CHECK1024-NEXT:    addvl sp, sp, #-18
2128; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG
2129; CHECK1024-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
2130; CHECK1024-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
2131; CHECK1024-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
2132; CHECK1024-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
2133; CHECK1024-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
2134; CHECK1024-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
2135; CHECK1024-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
2136; CHECK1024-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
2137; CHECK1024-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
2138; CHECK1024-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
2139; CHECK1024-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
2140; CHECK1024-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
2141; CHECK1024-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
2142; CHECK1024-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
2143; CHECK1024-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
2144; CHECK1024-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
2145; CHECK1024-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
2146; CHECK1024-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
2147; CHECK1024-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
2148; CHECK1024-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
2149; CHECK1024-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
2150; CHECK1024-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
2151; CHECK1024-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
2152; CHECK1024-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
2153; CHECK1024-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
2154; CHECK1024-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
2155; CHECK1024-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
2156; CHECK1024-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
2157; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1072 - 8 * VG
2158; CHECK1024-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1072 - 16 * VG
2159; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1072 - 24 * VG
2160; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1072 - 32 * VG
2161; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1072 - 40 * VG
2162; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1072 - 48 * VG
2163; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1072 - 56 * VG
2164; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1072 - 64 * VG
2165; CHECK1024-NEXT:    sub sp, sp, #1024
2166; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2096 + 144 * VG
2167; CHECK1024-NEXT:    mov x8, x0
2168; CHECK1024-NEXT:    //APP
2169; CHECK1024-NEXT:    //NO_APP
2170; CHECK1024-NEXT:    bl __arm_sme_state
2171; CHECK1024-NEXT:    and x19, x0, #0x1
2172; CHECK1024-NEXT:    .cfi_offset vg, -32
2173; CHECK1024-NEXT:    tbz w19, #0, .LBB28_2
2174; CHECK1024-NEXT:  // %bb.1: // %entry
2175; CHECK1024-NEXT:    smstop sm
2176; CHECK1024-NEXT:  .LBB28_2: // %entry
2177; CHECK1024-NEXT:    mov x0, x8
2178; CHECK1024-NEXT:    mov w1, #45 // =0x2d
2179; CHECK1024-NEXT:    mov w2, #37 // =0x25
2180; CHECK1024-NEXT:    bl memset
2181; CHECK1024-NEXT:    tbz w19, #0, .LBB28_4
2182; CHECK1024-NEXT:  // %bb.3: // %entry
2183; CHECK1024-NEXT:    smstart sm
2184; CHECK1024-NEXT:  .LBB28_4: // %entry
2185; CHECK1024-NEXT:    mov w0, #22647 // =0x5877
2186; CHECK1024-NEXT:    movk w0, #59491, lsl #16
2187; CHECK1024-NEXT:    .cfi_restore vg
2188; CHECK1024-NEXT:    add sp, sp, #1024
2189; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG
2190; CHECK1024-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
2191; CHECK1024-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
2192; CHECK1024-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
2193; CHECK1024-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
2194; CHECK1024-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
2195; CHECK1024-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
2196; CHECK1024-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
2197; CHECK1024-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
2198; CHECK1024-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
2199; CHECK1024-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
2200; CHECK1024-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
2201; CHECK1024-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
2202; CHECK1024-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
2203; CHECK1024-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
2204; CHECK1024-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
2205; CHECK1024-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
2206; CHECK1024-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
2207; CHECK1024-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
2208; CHECK1024-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
2209; CHECK1024-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
2210; CHECK1024-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
2211; CHECK1024-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
2212; CHECK1024-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
2213; CHECK1024-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
2214; CHECK1024-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
2215; CHECK1024-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
2216; CHECK1024-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
2217; CHECK1024-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
2218; CHECK1024-NEXT:    addvl sp, sp, #18
2219; CHECK1024-NEXT:    .cfi_def_cfa wsp, 1072
2220; CHECK1024-NEXT:    .cfi_restore z8
2221; CHECK1024-NEXT:    .cfi_restore z9
2222; CHECK1024-NEXT:    .cfi_restore z10
2223; CHECK1024-NEXT:    .cfi_restore z11
2224; CHECK1024-NEXT:    .cfi_restore z12
2225; CHECK1024-NEXT:    .cfi_restore z13
2226; CHECK1024-NEXT:    .cfi_restore z14
2227; CHECK1024-NEXT:    .cfi_restore z15
2228; CHECK1024-NEXT:    ldr x19, [sp, #1064] // 8-byte Folded Reload
2229; CHECK1024-NEXT:    ldr x27, [sp, #1056] // 8-byte Folded Reload
2230; CHECK1024-NEXT:    ldr x28, [sp, #1048] // 8-byte Folded Reload
2231; CHECK1024-NEXT:    ldr x30, [sp, #1032] // 8-byte Folded Reload
2232; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
2233; CHECK1024-NEXT:    add sp, sp, #1072
2234; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
2235; CHECK1024-NEXT:    .cfi_restore w19
2236; CHECK1024-NEXT:    .cfi_restore w27
2237; CHECK1024-NEXT:    .cfi_restore w28
2238; CHECK1024-NEXT:    .cfi_restore w30
2239; CHECK1024-NEXT:    .cfi_restore w29
2240; CHECK1024-NEXT:    ret
2241entry:
2242  tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
2243  %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37)
2244  ret i32 -396142473
2245}
2246
2247define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
2248; CHECK0-LABEL: svecc_alloca_call:
2249; CHECK0:       // %bb.0: // %entry
2250; CHECK0-NEXT:    stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
2251; CHECK0-NEXT:    .cfi_def_cfa_offset 48
2252; CHECK0-NEXT:    cntd x9
2253; CHECK0-NEXT:    stp x9, x28, [sp, #16] // 16-byte Folded Spill
2254; CHECK0-NEXT:    stp x27, x19, [sp, #32] // 16-byte Folded Spill
2255; CHECK0-NEXT:    .cfi_offset w19, -8
2256; CHECK0-NEXT:    .cfi_offset w27, -16
2257; CHECK0-NEXT:    .cfi_offset w28, -24
2258; CHECK0-NEXT:    .cfi_offset w30, -40
2259; CHECK0-NEXT:    .cfi_offset w29, -48
2260; CHECK0-NEXT:    addvl sp, sp, #-18
2261; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 144 * VG
2262; CHECK0-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
2263; CHECK0-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
2264; CHECK0-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
2265; CHECK0-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
2266; CHECK0-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
2267; CHECK0-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
2268; CHECK0-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
2269; CHECK0-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
2270; CHECK0-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
2271; CHECK0-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
2272; CHECK0-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
2273; CHECK0-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
2274; CHECK0-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
2275; CHECK0-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
2276; CHECK0-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
2277; CHECK0-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
2278; CHECK0-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
2279; CHECK0-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
2280; CHECK0-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
2281; CHECK0-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
2282; CHECK0-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
2283; CHECK0-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
2284; CHECK0-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
2285; CHECK0-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
2286; CHECK0-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
2287; CHECK0-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
2288; CHECK0-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
2289; CHECK0-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
2290; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
2291; CHECK0-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
2292; CHECK0-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
2293; CHECK0-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG
2294; CHECK0-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG
2295; CHECK0-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG
2296; CHECK0-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG
2297; CHECK0-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG
2298; CHECK0-NEXT:    sub sp, sp, #48
2299; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 96 + 144 * VG
2300; CHECK0-NEXT:    //APP
2301; CHECK0-NEXT:    //NO_APP
2302; CHECK0-NEXT:    bl __arm_sme_state
2303; CHECK0-NEXT:    and x19, x0, #0x1
2304; CHECK0-NEXT:    .cfi_offset vg, -32
2305; CHECK0-NEXT:    tbz w19, #0, .LBB29_2
2306; CHECK0-NEXT:  // %bb.1: // %entry
2307; CHECK0-NEXT:    smstop sm
2308; CHECK0-NEXT:  .LBB29_2: // %entry
2309; CHECK0-NEXT:    mov x0, sp
2310; CHECK0-NEXT:    mov w1, #45 // =0x2d
2311; CHECK0-NEXT:    mov w2, #37 // =0x25
2312; CHECK0-NEXT:    bl memset
2313; CHECK0-NEXT:    tbz w19, #0, .LBB29_4
2314; CHECK0-NEXT:  // %bb.3: // %entry
2315; CHECK0-NEXT:    smstart sm
2316; CHECK0-NEXT:  .LBB29_4: // %entry
2317; CHECK0-NEXT:    mov w0, #22647 // =0x5877
2318; CHECK0-NEXT:    movk w0, #59491, lsl #16
2319; CHECK0-NEXT:    .cfi_restore vg
2320; CHECK0-NEXT:    add sp, sp, #48
2321; CHECK0-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 144 * VG
2322; CHECK0-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
2323; CHECK0-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
2324; CHECK0-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
2325; CHECK0-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
2326; CHECK0-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
2327; CHECK0-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
2328; CHECK0-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
2329; CHECK0-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
2330; CHECK0-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
2331; CHECK0-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
2332; CHECK0-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
2333; CHECK0-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
2334; CHECK0-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
2335; CHECK0-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
2336; CHECK0-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
2337; CHECK0-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
2338; CHECK0-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
2339; CHECK0-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
2340; CHECK0-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
2341; CHECK0-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
2342; CHECK0-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
2343; CHECK0-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
2344; CHECK0-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
2345; CHECK0-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
2346; CHECK0-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
2347; CHECK0-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
2348; CHECK0-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
2349; CHECK0-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
2350; CHECK0-NEXT:    addvl sp, sp, #18
2351; CHECK0-NEXT:    .cfi_def_cfa wsp, 48
2352; CHECK0-NEXT:    .cfi_restore z8
2353; CHECK0-NEXT:    .cfi_restore z9
2354; CHECK0-NEXT:    .cfi_restore z10
2355; CHECK0-NEXT:    .cfi_restore z11
2356; CHECK0-NEXT:    .cfi_restore z12
2357; CHECK0-NEXT:    .cfi_restore z13
2358; CHECK0-NEXT:    .cfi_restore z14
2359; CHECK0-NEXT:    .cfi_restore z15
2360; CHECK0-NEXT:    ldp x27, x19, [sp, #32] // 16-byte Folded Reload
2361; CHECK0-NEXT:    ldr x28, [sp, #24] // 8-byte Folded Reload
2362; CHECK0-NEXT:    ldp x29, x30, [sp], #48 // 16-byte Folded Reload
2363; CHECK0-NEXT:    .cfi_def_cfa_offset 0
2364; CHECK0-NEXT:    .cfi_restore w19
2365; CHECK0-NEXT:    .cfi_restore w27
2366; CHECK0-NEXT:    .cfi_restore w28
2367; CHECK0-NEXT:    .cfi_restore w30
2368; CHECK0-NEXT:    .cfi_restore w29
2369; CHECK0-NEXT:    ret
2370;
2371; CHECK64-LABEL: svecc_alloca_call:
2372; CHECK64:       // %bb.0: // %entry
2373; CHECK64-NEXT:    sub sp, sp, #112
2374; CHECK64-NEXT:    .cfi_def_cfa_offset 112
2375; CHECK64-NEXT:    cntd x9
2376; CHECK64-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
2377; CHECK64-NEXT:    stp x9, x28, [sp, #80] // 16-byte Folded Spill
2378; CHECK64-NEXT:    stp x27, x19, [sp, #96] // 16-byte Folded Spill
2379; CHECK64-NEXT:    .cfi_offset w19, -8
2380; CHECK64-NEXT:    .cfi_offset w27, -16
2381; CHECK64-NEXT:    .cfi_offset w28, -24
2382; CHECK64-NEXT:    .cfi_offset w30, -40
2383; CHECK64-NEXT:    .cfi_offset w29, -48
2384; CHECK64-NEXT:    addvl sp, sp, #-18
2385; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG
2386; CHECK64-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
2387; CHECK64-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
2388; CHECK64-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
2389; CHECK64-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
2390; CHECK64-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
2391; CHECK64-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
2392; CHECK64-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
2393; CHECK64-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
2394; CHECK64-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
2395; CHECK64-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
2396; CHECK64-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
2397; CHECK64-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
2398; CHECK64-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
2399; CHECK64-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
2400; CHECK64-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
2401; CHECK64-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
2402; CHECK64-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
2403; CHECK64-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
2404; CHECK64-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
2405; CHECK64-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
2406; CHECK64-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
2407; CHECK64-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
2408; CHECK64-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
2409; CHECK64-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
2410; CHECK64-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
2411; CHECK64-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
2412; CHECK64-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
2413; CHECK64-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
2414; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 112 - 8 * VG
2415; CHECK64-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 112 - 16 * VG
2416; CHECK64-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 112 - 24 * VG
2417; CHECK64-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 112 - 32 * VG
2418; CHECK64-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 112 - 40 * VG
2419; CHECK64-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 112 - 48 * VG
2420; CHECK64-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 112 - 56 * VG
2421; CHECK64-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 112 - 64 * VG
2422; CHECK64-NEXT:    sub sp, sp, #112
2423; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x01, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 224 + 144 * VG
2424; CHECK64-NEXT:    //APP
2425; CHECK64-NEXT:    //NO_APP
2426; CHECK64-NEXT:    bl __arm_sme_state
2427; CHECK64-NEXT:    and x19, x0, #0x1
2428; CHECK64-NEXT:    .cfi_offset vg, -32
2429; CHECK64-NEXT:    tbz w19, #0, .LBB29_2
2430; CHECK64-NEXT:  // %bb.1: // %entry
2431; CHECK64-NEXT:    smstop sm
2432; CHECK64-NEXT:  .LBB29_2: // %entry
2433; CHECK64-NEXT:    mov x0, sp
2434; CHECK64-NEXT:    mov w1, #45 // =0x2d
2435; CHECK64-NEXT:    mov w2, #37 // =0x25
2436; CHECK64-NEXT:    bl memset
2437; CHECK64-NEXT:    tbz w19, #0, .LBB29_4
2438; CHECK64-NEXT:  // %bb.3: // %entry
2439; CHECK64-NEXT:    smstart sm
2440; CHECK64-NEXT:  .LBB29_4: // %entry
2441; CHECK64-NEXT:    mov w0, #22647 // =0x5877
2442; CHECK64-NEXT:    movk w0, #59491, lsl #16
2443; CHECK64-NEXT:    .cfi_restore vg
2444; CHECK64-NEXT:    add sp, sp, #112
2445; CHECK64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG
2446; CHECK64-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
2447; CHECK64-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
2448; CHECK64-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
2449; CHECK64-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
2450; CHECK64-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
2451; CHECK64-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
2452; CHECK64-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
2453; CHECK64-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
2454; CHECK64-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
2455; CHECK64-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
2456; CHECK64-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
2457; CHECK64-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
2458; CHECK64-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
2459; CHECK64-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
2460; CHECK64-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
2461; CHECK64-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
2462; CHECK64-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
2463; CHECK64-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
2464; CHECK64-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
2465; CHECK64-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
2466; CHECK64-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
2467; CHECK64-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
2468; CHECK64-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
2469; CHECK64-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
2470; CHECK64-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
2471; CHECK64-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
2472; CHECK64-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
2473; CHECK64-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
2474; CHECK64-NEXT:    addvl sp, sp, #18
2475; CHECK64-NEXT:    .cfi_def_cfa wsp, 112
2476; CHECK64-NEXT:    .cfi_restore z8
2477; CHECK64-NEXT:    .cfi_restore z9
2478; CHECK64-NEXT:    .cfi_restore z10
2479; CHECK64-NEXT:    .cfi_restore z11
2480; CHECK64-NEXT:    .cfi_restore z12
2481; CHECK64-NEXT:    .cfi_restore z13
2482; CHECK64-NEXT:    .cfi_restore z14
2483; CHECK64-NEXT:    .cfi_restore z15
2484; CHECK64-NEXT:    ldp x27, x19, [sp, #96] // 16-byte Folded Reload
2485; CHECK64-NEXT:    ldr x28, [sp, #88] // 8-byte Folded Reload
2486; CHECK64-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
2487; CHECK64-NEXT:    add sp, sp, #112
2488; CHECK64-NEXT:    .cfi_def_cfa_offset 0
2489; CHECK64-NEXT:    .cfi_restore w19
2490; CHECK64-NEXT:    .cfi_restore w27
2491; CHECK64-NEXT:    .cfi_restore w28
2492; CHECK64-NEXT:    .cfi_restore w30
2493; CHECK64-NEXT:    .cfi_restore w29
2494; CHECK64-NEXT:    ret
2495;
2496; CHECK1024-LABEL: svecc_alloca_call:
2497; CHECK1024:       // %bb.0: // %entry
2498; CHECK1024-NEXT:    sub sp, sp, #1072
2499; CHECK1024-NEXT:    .cfi_def_cfa_offset 1072
2500; CHECK1024-NEXT:    cntd x9
2501; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
2502; CHECK1024-NEXT:    str x30, [sp, #1032] // 8-byte Folded Spill
2503; CHECK1024-NEXT:    str x9, [sp, #1040] // 8-byte Folded Spill
2504; CHECK1024-NEXT:    str x28, [sp, #1048] // 8-byte Folded Spill
2505; CHECK1024-NEXT:    str x27, [sp, #1056] // 8-byte Folded Spill
2506; CHECK1024-NEXT:    str x19, [sp, #1064] // 8-byte Folded Spill
2507; CHECK1024-NEXT:    .cfi_offset w19, -8
2508; CHECK1024-NEXT:    .cfi_offset w27, -16
2509; CHECK1024-NEXT:    .cfi_offset w28, -24
2510; CHECK1024-NEXT:    .cfi_offset w30, -40
2511; CHECK1024-NEXT:    .cfi_offset w29, -48
2512; CHECK1024-NEXT:    addvl sp, sp, #-18
2513; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG
2514; CHECK1024-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
2515; CHECK1024-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
2516; CHECK1024-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
2517; CHECK1024-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
2518; CHECK1024-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
2519; CHECK1024-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
2520; CHECK1024-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
2521; CHECK1024-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
2522; CHECK1024-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
2523; CHECK1024-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
2524; CHECK1024-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
2525; CHECK1024-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
2526; CHECK1024-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
2527; CHECK1024-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
2528; CHECK1024-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
2529; CHECK1024-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
2530; CHECK1024-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
2531; CHECK1024-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
2532; CHECK1024-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
2533; CHECK1024-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
2534; CHECK1024-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
2535; CHECK1024-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
2536; CHECK1024-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
2537; CHECK1024-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
2538; CHECK1024-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
2539; CHECK1024-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
2540; CHECK1024-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
2541; CHECK1024-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
2542; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1072 - 8 * VG
2543; CHECK1024-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1072 - 16 * VG
2544; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1072 - 24 * VG
2545; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1072 - 32 * VG
2546; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1072 - 40 * VG
2547; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1072 - 48 * VG
2548; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1072 - 56 * VG
2549; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1072 - 64 * VG
2550; CHECK1024-NEXT:    sub sp, sp, #1072
2551; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2144 + 144 * VG
2552; CHECK1024-NEXT:    //APP
2553; CHECK1024-NEXT:    //NO_APP
2554; CHECK1024-NEXT:    bl __arm_sme_state
2555; CHECK1024-NEXT:    and x19, x0, #0x1
2556; CHECK1024-NEXT:    .cfi_offset vg, -32
2557; CHECK1024-NEXT:    tbz w19, #0, .LBB29_2
2558; CHECK1024-NEXT:  // %bb.1: // %entry
2559; CHECK1024-NEXT:    smstop sm
2560; CHECK1024-NEXT:  .LBB29_2: // %entry
2561; CHECK1024-NEXT:    mov x0, sp
2562; CHECK1024-NEXT:    mov w1, #45 // =0x2d
2563; CHECK1024-NEXT:    mov w2, #37 // =0x25
2564; CHECK1024-NEXT:    bl memset
2565; CHECK1024-NEXT:    tbz w19, #0, .LBB29_4
2566; CHECK1024-NEXT:  // %bb.3: // %entry
2567; CHECK1024-NEXT:    smstart sm
2568; CHECK1024-NEXT:  .LBB29_4: // %entry
2569; CHECK1024-NEXT:    mov w0, #22647 // =0x5877
2570; CHECK1024-NEXT:    movk w0, #59491, lsl #16
2571; CHECK1024-NEXT:    .cfi_restore vg
2572; CHECK1024-NEXT:    add sp, sp, #1072
2573; CHECK1024-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG
2574; CHECK1024-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
2575; CHECK1024-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
2576; CHECK1024-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
2577; CHECK1024-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
2578; CHECK1024-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
2579; CHECK1024-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
2580; CHECK1024-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
2581; CHECK1024-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
2582; CHECK1024-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
2583; CHECK1024-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
2584; CHECK1024-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
2585; CHECK1024-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
2586; CHECK1024-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
2587; CHECK1024-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
2588; CHECK1024-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
2589; CHECK1024-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
2590; CHECK1024-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
2591; CHECK1024-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
2592; CHECK1024-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
2593; CHECK1024-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
2594; CHECK1024-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
2595; CHECK1024-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
2596; CHECK1024-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
2597; CHECK1024-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
2598; CHECK1024-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
2599; CHECK1024-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
2600; CHECK1024-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
2601; CHECK1024-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
2602; CHECK1024-NEXT:    addvl sp, sp, #18
2603; CHECK1024-NEXT:    .cfi_def_cfa wsp, 1072
2604; CHECK1024-NEXT:    .cfi_restore z8
2605; CHECK1024-NEXT:    .cfi_restore z9
2606; CHECK1024-NEXT:    .cfi_restore z10
2607; CHECK1024-NEXT:    .cfi_restore z11
2608; CHECK1024-NEXT:    .cfi_restore z12
2609; CHECK1024-NEXT:    .cfi_restore z13
2610; CHECK1024-NEXT:    .cfi_restore z14
2611; CHECK1024-NEXT:    .cfi_restore z15
2612; CHECK1024-NEXT:    ldr x19, [sp, #1064] // 8-byte Folded Reload
2613; CHECK1024-NEXT:    ldr x27, [sp, #1056] // 8-byte Folded Reload
2614; CHECK1024-NEXT:    ldr x28, [sp, #1048] // 8-byte Folded Reload
2615; CHECK1024-NEXT:    ldr x30, [sp, #1032] // 8-byte Folded Reload
2616; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
2617; CHECK1024-NEXT:    add sp, sp, #1072
2618; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
2619; CHECK1024-NEXT:    .cfi_restore w19
2620; CHECK1024-NEXT:    .cfi_restore w27
2621; CHECK1024-NEXT:    .cfi_restore w28
2622; CHECK1024-NEXT:    .cfi_restore w30
2623; CHECK1024-NEXT:    .cfi_restore w29
2624; CHECK1024-NEXT:    ret
2625entry:
2626  tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
2627  %0 = alloca [37 x i8], align 16
2628  %call = call ptr @memset(ptr noundef nonnull %0, i32 noundef 45, i32 noundef 37)
2629  ret i32 -396142473
2630}
2631declare ptr @memset(ptr, i32, i32)
2632
2633define void @call_with_doubles() "aarch64_pstate_sm_compatible" {
2634; CHECK0-LABEL: call_with_doubles:
2635; CHECK0:       // %bb.0: // %entry
2636; CHECK0-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
2637; CHECK0-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
2638; CHECK0-NEXT:    .cfi_def_cfa_offset 16
2639; CHECK0-NEXT:    .cfi_offset w30, -8
2640; CHECK0-NEXT:    .cfi_offset b8, -16
2641; CHECK0-NEXT:    mov x8, #9221120237041090560 // =0x7ff8000000000000
2642; CHECK0-NEXT:    fmov d8, x8
2643; CHECK0-NEXT:    fmov d0, d8
2644; CHECK0-NEXT:    bl calld
2645; CHECK0-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
2646; CHECK0-NEXT:    fmov d0, d8
2647; CHECK0-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
2648; CHECK0-NEXT:    b calld
2649;
2650; CHECK64-LABEL: call_with_doubles:
2651; CHECK64:       // %bb.0: // %entry
2652; CHECK64-NEXT:    sub sp, sp, #144
2653; CHECK64-NEXT:    str d8, [sp, #64] // 8-byte Folded Spill
2654; CHECK64-NEXT:    str x30, [sp, #136] // 8-byte Folded Spill
2655; CHECK64-NEXT:    .cfi_def_cfa_offset 144
2656; CHECK64-NEXT:    .cfi_offset w30, -8
2657; CHECK64-NEXT:    .cfi_offset b8, -80
2658; CHECK64-NEXT:    mov x8, #9221120237041090560 // =0x7ff8000000000000
2659; CHECK64-NEXT:    fmov d8, x8
2660; CHECK64-NEXT:    fmov d0, d8
2661; CHECK64-NEXT:    bl calld
2662; CHECK64-NEXT:    fmov d0, d8
2663; CHECK64-NEXT:    ldr x30, [sp, #136] // 8-byte Folded Reload
2664; CHECK64-NEXT:    ldr d8, [sp, #64] // 8-byte Folded Reload
2665; CHECK64-NEXT:    add sp, sp, #144
2666; CHECK64-NEXT:    b calld
2667;
2668; CHECK1024-LABEL: call_with_doubles:
2669; CHECK1024:       // %bb.0: // %entry
2670; CHECK1024-NEXT:    sub sp, sp, #1056
2671; CHECK1024-NEXT:    str d8, [sp] // 8-byte Folded Spill
2672; CHECK1024-NEXT:    str x29, [sp, #1032] // 8-byte Folded Spill
2673; CHECK1024-NEXT:    str x30, [sp, #1040] // 8-byte Folded Spill
2674; CHECK1024-NEXT:    sub sp, sp, #1024
2675; CHECK1024-NEXT:    .cfi_def_cfa_offset 2080
2676; CHECK1024-NEXT:    .cfi_offset w30, -16
2677; CHECK1024-NEXT:    .cfi_offset w29, -24
2678; CHECK1024-NEXT:    .cfi_offset b8, -1056
2679; CHECK1024-NEXT:    mov x8, #9221120237041090560 // =0x7ff8000000000000
2680; CHECK1024-NEXT:    fmov d8, x8
2681; CHECK1024-NEXT:    fmov d0, d8
2682; CHECK1024-NEXT:    bl calld
2683; CHECK1024-NEXT:    fmov d0, d8
2684; CHECK1024-NEXT:    add sp, sp, #1024
2685; CHECK1024-NEXT:    ldr x30, [sp, #1040] // 8-byte Folded Reload
2686; CHECK1024-NEXT:    ldr x29, [sp, #1032] // 8-byte Folded Reload
2687; CHECK1024-NEXT:    ldr d8, [sp] // 8-byte Folded Reload
2688; CHECK1024-NEXT:    add sp, sp, #1056
2689; CHECK1024-NEXT:    b calld
2690entry:
2691  %call = tail call i32 @calld(double 0x7FF8000000000000)
2692  %call.1 = tail call i32 @calld(double 0x7FF8000000000000)
2693  ret void
2694}
2695declare i32 @calld(double) "aarch64_pstate_sm_compatible"
2696
2697; Check that stack objects are ordererd fpr > hazard > gpr
2698define void @ordering_test(double %d, half %h, <4 x i32> %v) "aarch64_pstate_sm_compatible" {
2699; CHECK0-LABEL: ordering_test:
2700; CHECK0:       // %bb.0: // %entry
2701; CHECK0-NEXT:    sub sp, sp, #48
2702; CHECK0-NEXT:    .cfi_def_cfa_offset 48
2703; CHECK0-NEXT:    str wzr, [sp, #32]
2704; CHECK0-NEXT:    str d0, [sp, #24]
2705; CHECK0-NEXT:    str wzr, [sp, #44]
2706; CHECK0-NEXT:    str h1, [sp, #22]
2707; CHECK0-NEXT:    str wzr, [sp, #16]
2708; CHECK0-NEXT:    str q2, [sp], #48
2709; CHECK0-NEXT:    ret
2710;
2711; CHECK64-LABEL: ordering_test:
2712; CHECK64:       // %bb.0: // %entry
2713; CHECK64-NEXT:    sub sp, sp, #128
2714; CHECK64-NEXT:    .cfi_def_cfa_offset 128
2715; CHECK64-NEXT:    stp wzr, wzr, [sp, #12]
2716; CHECK64-NEXT:    str d0, [sp, #120]
2717; CHECK64-NEXT:    str wzr, [sp, #28]
2718; CHECK64-NEXT:    str h1, [sp, #118]
2719; CHECK64-NEXT:    str q2, [sp, #96]
2720; CHECK64-NEXT:    add sp, sp, #128
2721; CHECK64-NEXT:    ret
2722;
2723; CHECK1024-LABEL: ordering_test:
2724; CHECK1024:       // %bb.0: // %entry
2725; CHECK1024-NEXT:    sub sp, sp, #1040
2726; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
2727; CHECK1024-NEXT:    sub sp, sp, #1088
2728; CHECK1024-NEXT:    .cfi_def_cfa_offset 2128
2729; CHECK1024-NEXT:    .cfi_offset w29, -16
2730; CHECK1024-NEXT:    stp wzr, wzr, [sp, #12]
2731; CHECK1024-NEXT:    str d0, [sp, #1080]
2732; CHECK1024-NEXT:    str wzr, [sp, #28]
2733; CHECK1024-NEXT:    str h1, [sp, #1078]
2734; CHECK1024-NEXT:    str q2, [sp, #1056]
2735; CHECK1024-NEXT:    add sp, sp, #1088
2736; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
2737; CHECK1024-NEXT:    add sp, sp, #1040
2738; CHECK1024-NEXT:    ret
2739entry:
2740  %i32 = alloca i32
2741  %i64 = alloca i64
2742  %f64 = alloca double
2743  %f16 = alloca half
2744  %i32b = alloca i32
2745  %v4i32 = alloca <4 x i32>
2746  store i32 0, ptr %i64
2747  store double %d, ptr %f64
2748  store i32 0, ptr %i32
2749  store half %h, ptr %f16
2750  store i32 0, ptr %i32b
2751  store <4 x i32> %v, ptr %v4i32
2752  ret void
2753}
2754
2755
2756define void @ordering_test_array(i64 %o, i64 %p, float %f, i32 %x) "aarch64_pstate_sm_compatible" {
2757; CHECK0-LABEL: ordering_test_array:
2758; CHECK0:       // %bb.0: // %entry
2759; CHECK0-NEXT:    sub sp, sp, #272
2760; CHECK0-NEXT:    str x29, [sp, #256] // 8-byte Folded Spill
2761; CHECK0-NEXT:    .cfi_def_cfa_offset 272
2762; CHECK0-NEXT:    .cfi_offset w29, -16
2763; CHECK0-NEXT:    add x8, sp, #128
2764; CHECK0-NEXT:    str w2, [x8, x0, lsl #2]
2765; CHECK0-NEXT:    mov x8, sp
2766; CHECK0-NEXT:    str s0, [x8, x1, lsl #2]
2767; CHECK0-NEXT:    add sp, sp, #272
2768; CHECK0-NEXT:    ret
2769;
2770; CHECK64-LABEL: ordering_test_array:
2771; CHECK64:       // %bb.0: // %entry
2772; CHECK64-NEXT:    sub sp, sp, #400
2773; CHECK64-NEXT:    str x29, [sp, #384] // 8-byte Folded Spill
2774; CHECK64-NEXT:    .cfi_def_cfa_offset 400
2775; CHECK64-NEXT:    .cfi_offset w29, -16
2776; CHECK64-NEXT:    mov x8, sp
2777; CHECK64-NEXT:    str w2, [x8, x0, lsl #2]
2778; CHECK64-NEXT:    add x8, sp, #192
2779; CHECK64-NEXT:    str s0, [x8, x1, lsl #2]
2780; CHECK64-NEXT:    add sp, sp, #400
2781; CHECK64-NEXT:    ret
2782;
2783; CHECK1024-LABEL: ordering_test_array:
2784; CHECK1024:       // %bb.0: // %entry
2785; CHECK1024-NEXT:    sub sp, sp, #1040
2786; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
2787; CHECK1024-NEXT:    sub sp, sp, #1280
2788; CHECK1024-NEXT:    .cfi_def_cfa_offset 2320
2789; CHECK1024-NEXT:    .cfi_offset w29, -16
2790; CHECK1024-NEXT:    mov x8, sp
2791; CHECK1024-NEXT:    str w2, [x8, x0, lsl #2]
2792; CHECK1024-NEXT:    add x8, sp, #1152
2793; CHECK1024-NEXT:    str s0, [x8, x1, lsl #2]
2794; CHECK1024-NEXT:    add sp, sp, #1280
2795; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
2796; CHECK1024-NEXT:    add sp, sp, #1040
2797; CHECK1024-NEXT:    ret
2798entry:
2799  %i32 = alloca [32 x i32]
2800  %f32 = alloca [32 x float]
2801  %g = getelementptr i32, ptr %i32, i64 %o
2802  store i32 %x, ptr %g
2803  %h = getelementptr float, ptr %f32, i64 %p
2804  store float %f, ptr %h
2805  ret void
2806}
2807
2808; The VA register currently ends up in VLA space. Lets hope that doesn't come up very often.
2809define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "target-features"="+sme" {
2810; CHECK0-LABEL: vastate:
2811; CHECK0:       // %bb.0: // %entry
2812; CHECK0-NEXT:    stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
2813; CHECK0-NEXT:    .cfi_def_cfa_offset 112
2814; CHECK0-NEXT:    cntd x9
2815; CHECK0-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
2816; CHECK0-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
2817; CHECK0-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
2818; CHECK0-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
2819; CHECK0-NEXT:    str x9, [sp, #80] // 8-byte Folded Spill
2820; CHECK0-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
2821; CHECK0-NEXT:    add x29, sp, #64
2822; CHECK0-NEXT:    .cfi_def_cfa w29, 48
2823; CHECK0-NEXT:    .cfi_offset w19, -8
2824; CHECK0-NEXT:    .cfi_offset w20, -16
2825; CHECK0-NEXT:    .cfi_offset w30, -40
2826; CHECK0-NEXT:    .cfi_offset w29, -48
2827; CHECK0-NEXT:    .cfi_offset b8, -56
2828; CHECK0-NEXT:    .cfi_offset b9, -64
2829; CHECK0-NEXT:    .cfi_offset b10, -72
2830; CHECK0-NEXT:    .cfi_offset b11, -80
2831; CHECK0-NEXT:    .cfi_offset b12, -88
2832; CHECK0-NEXT:    .cfi_offset b13, -96
2833; CHECK0-NEXT:    .cfi_offset b14, -104
2834; CHECK0-NEXT:    .cfi_offset b15, -112
2835; CHECK0-NEXT:    sub sp, sp, #16
2836; CHECK0-NEXT:    rdsvl x8, #1
2837; CHECK0-NEXT:    mov x9, sp
2838; CHECK0-NEXT:    mov w20, w0
2839; CHECK0-NEXT:    msub x9, x8, x8, x9
2840; CHECK0-NEXT:    mov sp, x9
2841; CHECK0-NEXT:    stur x9, [x29, #-80]
2842; CHECK0-NEXT:    sub x9, x29, #80
2843; CHECK0-NEXT:    sturh wzr, [x29, #-70]
2844; CHECK0-NEXT:    stur wzr, [x29, #-68]
2845; CHECK0-NEXT:    sturh w8, [x29, #-72]
2846; CHECK0-NEXT:    msr TPIDR2_EL0, x9
2847; CHECK0-NEXT:    .cfi_offset vg, -32
2848; CHECK0-NEXT:    smstop sm
2849; CHECK0-NEXT:    bl other
2850; CHECK0-NEXT:    smstart sm
2851; CHECK0-NEXT:    .cfi_restore vg
2852; CHECK0-NEXT:    smstart za
2853; CHECK0-NEXT:    mrs x8, TPIDR2_EL0
2854; CHECK0-NEXT:    sub x0, x29, #80
2855; CHECK0-NEXT:    cbnz x8, .LBB33_2
2856; CHECK0-NEXT:  // %bb.1: // %entry
2857; CHECK0-NEXT:    bl __arm_tpidr2_restore
2858; CHECK0-NEXT:  .LBB33_2: // %entry
2859; CHECK0-NEXT:    mov w0, w20
2860; CHECK0-NEXT:    msr TPIDR2_EL0, xzr
2861; CHECK0-NEXT:    sub sp, x29, #64
2862; CHECK0-NEXT:    .cfi_def_cfa wsp, 112
2863; CHECK0-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
2864; CHECK0-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
2865; CHECK0-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
2866; CHECK0-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
2867; CHECK0-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
2868; CHECK0-NEXT:    ldp d15, d14, [sp], #112 // 16-byte Folded Reload
2869; CHECK0-NEXT:    .cfi_def_cfa_offset 0
2870; CHECK0-NEXT:    .cfi_restore w19
2871; CHECK0-NEXT:    .cfi_restore w20
2872; CHECK0-NEXT:    .cfi_restore w30
2873; CHECK0-NEXT:    .cfi_restore w29
2874; CHECK0-NEXT:    .cfi_restore b8
2875; CHECK0-NEXT:    .cfi_restore b9
2876; CHECK0-NEXT:    .cfi_restore b10
2877; CHECK0-NEXT:    .cfi_restore b11
2878; CHECK0-NEXT:    .cfi_restore b12
2879; CHECK0-NEXT:    .cfi_restore b13
2880; CHECK0-NEXT:    .cfi_restore b14
2881; CHECK0-NEXT:    .cfi_restore b15
2882; CHECK0-NEXT:    ret
2883;
2884; CHECK64-LABEL: vastate:
2885; CHECK64:       // %bb.0: // %entry
2886; CHECK64-NEXT:    stp d15, d14, [sp, #-176]! // 16-byte Folded Spill
2887; CHECK64-NEXT:    .cfi_def_cfa_offset 176
2888; CHECK64-NEXT:    cntd x9
2889; CHECK64-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
2890; CHECK64-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
2891; CHECK64-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
2892; CHECK64-NEXT:    stp x29, x30, [sp, #128] // 16-byte Folded Spill
2893; CHECK64-NEXT:    stp x9, x20, [sp, #144] // 16-byte Folded Spill
2894; CHECK64-NEXT:    str x19, [sp, #160] // 8-byte Folded Spill
2895; CHECK64-NEXT:    add x29, sp, #128
2896; CHECK64-NEXT:    .cfi_def_cfa w29, 48
2897; CHECK64-NEXT:    .cfi_offset w19, -16
2898; CHECK64-NEXT:    .cfi_offset w20, -24
2899; CHECK64-NEXT:    .cfi_offset w30, -40
2900; CHECK64-NEXT:    .cfi_offset w29, -48
2901; CHECK64-NEXT:    .cfi_offset b8, -120
2902; CHECK64-NEXT:    .cfi_offset b9, -128
2903; CHECK64-NEXT:    .cfi_offset b10, -136
2904; CHECK64-NEXT:    .cfi_offset b11, -144
2905; CHECK64-NEXT:    .cfi_offset b12, -152
2906; CHECK64-NEXT:    .cfi_offset b13, -160
2907; CHECK64-NEXT:    .cfi_offset b14, -168
2908; CHECK64-NEXT:    .cfi_offset b15, -176
2909; CHECK64-NEXT:    sub sp, sp, #80
2910; CHECK64-NEXT:    rdsvl x8, #1
2911; CHECK64-NEXT:    mov x9, sp
2912; CHECK64-NEXT:    mov w20, w0
2913; CHECK64-NEXT:    msub x9, x8, x8, x9
2914; CHECK64-NEXT:    mov sp, x9
2915; CHECK64-NEXT:    stur x9, [x29, #-208]
2916; CHECK64-NEXT:    sub x9, x29, #208
2917; CHECK64-NEXT:    sturh wzr, [x29, #-198]
2918; CHECK64-NEXT:    stur wzr, [x29, #-196]
2919; CHECK64-NEXT:    sturh w8, [x29, #-200]
2920; CHECK64-NEXT:    msr TPIDR2_EL0, x9
2921; CHECK64-NEXT:    .cfi_offset vg, -32
2922; CHECK64-NEXT:    smstop sm
2923; CHECK64-NEXT:    bl other
2924; CHECK64-NEXT:    smstart sm
2925; CHECK64-NEXT:    .cfi_restore vg
2926; CHECK64-NEXT:    smstart za
2927; CHECK64-NEXT:    mrs x8, TPIDR2_EL0
2928; CHECK64-NEXT:    sub x0, x29, #208
2929; CHECK64-NEXT:    cbnz x8, .LBB33_2
2930; CHECK64-NEXT:  // %bb.1: // %entry
2931; CHECK64-NEXT:    bl __arm_tpidr2_restore
2932; CHECK64-NEXT:  .LBB33_2: // %entry
2933; CHECK64-NEXT:    mov w0, w20
2934; CHECK64-NEXT:    msr TPIDR2_EL0, xzr
2935; CHECK64-NEXT:    sub sp, x29, #128
2936; CHECK64-NEXT:    .cfi_def_cfa wsp, 176
2937; CHECK64-NEXT:    ldp x20, x19, [sp, #152] // 16-byte Folded Reload
2938; CHECK64-NEXT:    ldr d14, [sp, #8] // 8-byte Folded Reload
2939; CHECK64-NEXT:    ldp x29, x30, [sp, #128] // 16-byte Folded Reload
2940; CHECK64-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
2941; CHECK64-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
2942; CHECK64-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
2943; CHECK64-NEXT:    ldr d15, [sp], #176 // 8-byte Folded Reload
2944; CHECK64-NEXT:    .cfi_def_cfa_offset 0
2945; CHECK64-NEXT:    .cfi_restore w19
2946; CHECK64-NEXT:    .cfi_restore w20
2947; CHECK64-NEXT:    .cfi_restore w30
2948; CHECK64-NEXT:    .cfi_restore w29
2949; CHECK64-NEXT:    .cfi_restore b8
2950; CHECK64-NEXT:    .cfi_restore b9
2951; CHECK64-NEXT:    .cfi_restore b10
2952; CHECK64-NEXT:    .cfi_restore b11
2953; CHECK64-NEXT:    .cfi_restore b12
2954; CHECK64-NEXT:    .cfi_restore b13
2955; CHECK64-NEXT:    .cfi_restore b14
2956; CHECK64-NEXT:    .cfi_restore b15
2957; CHECK64-NEXT:    ret
2958;
2959; CHECK1024-LABEL: vastate:
2960; CHECK1024:       // %bb.0: // %entry
2961; CHECK1024-NEXT:    sub sp, sp, #1136
2962; CHECK1024-NEXT:    .cfi_def_cfa_offset 1136
2963; CHECK1024-NEXT:    cntd x9
2964; CHECK1024-NEXT:    stp d15, d14, [sp] // 16-byte Folded Spill
2965; CHECK1024-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
2966; CHECK1024-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
2967; CHECK1024-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
2968; CHECK1024-NEXT:    str x29, [sp, #1088] // 8-byte Folded Spill
2969; CHECK1024-NEXT:    str x30, [sp, #1096] // 8-byte Folded Spill
2970; CHECK1024-NEXT:    str x9, [sp, #1104] // 8-byte Folded Spill
2971; CHECK1024-NEXT:    str x28, [sp, #1112] // 8-byte Folded Spill
2972; CHECK1024-NEXT:    str x20, [sp, #1120] // 8-byte Folded Spill
2973; CHECK1024-NEXT:    str x19, [sp, #1128] // 8-byte Folded Spill
2974; CHECK1024-NEXT:    add x29, sp, #1088
2975; CHECK1024-NEXT:    .cfi_def_cfa w29, 48
2976; CHECK1024-NEXT:    .cfi_offset w19, -8
2977; CHECK1024-NEXT:    .cfi_offset w20, -16
2978; CHECK1024-NEXT:    .cfi_offset w28, -24
2979; CHECK1024-NEXT:    .cfi_offset w30, -40
2980; CHECK1024-NEXT:    .cfi_offset w29, -48
2981; CHECK1024-NEXT:    .cfi_offset b8, -1080
2982; CHECK1024-NEXT:    .cfi_offset b9, -1088
2983; CHECK1024-NEXT:    .cfi_offset b10, -1096
2984; CHECK1024-NEXT:    .cfi_offset b11, -1104
2985; CHECK1024-NEXT:    .cfi_offset b12, -1112
2986; CHECK1024-NEXT:    .cfi_offset b13, -1120
2987; CHECK1024-NEXT:    .cfi_offset b14, -1128
2988; CHECK1024-NEXT:    .cfi_offset b15, -1136
2989; CHECK1024-NEXT:    sub sp, sp, #1040
2990; CHECK1024-NEXT:    rdsvl x8, #1
2991; CHECK1024-NEXT:    mov x9, sp
2992; CHECK1024-NEXT:    mov w20, w0
2993; CHECK1024-NEXT:    msub x9, x8, x8, x9
2994; CHECK1024-NEXT:    mov sp, x9
2995; CHECK1024-NEXT:    sub x10, x29, #1872
2996; CHECK1024-NEXT:    stur x9, [x10, #-256]
2997; CHECK1024-NEXT:    sub x9, x29, #1862
2998; CHECK1024-NEXT:    sub x10, x29, #1860
2999; CHECK1024-NEXT:    sturh wzr, [x9, #-256]
3000; CHECK1024-NEXT:    sub x9, x29, #2128
3001; CHECK1024-NEXT:    stur wzr, [x10, #-256]
3002; CHECK1024-NEXT:    sub x10, x29, #1864
3003; CHECK1024-NEXT:    sturh w8, [x10, #-256]
3004; CHECK1024-NEXT:    msr TPIDR2_EL0, x9
3005; CHECK1024-NEXT:    .cfi_offset vg, -32
3006; CHECK1024-NEXT:    smstop sm
3007; CHECK1024-NEXT:    bl other
3008; CHECK1024-NEXT:    smstart sm
3009; CHECK1024-NEXT:    .cfi_restore vg
3010; CHECK1024-NEXT:    smstart za
3011; CHECK1024-NEXT:    mrs x8, TPIDR2_EL0
3012; CHECK1024-NEXT:    sub x0, x29, #2128
3013; CHECK1024-NEXT:    cbnz x8, .LBB33_2
3014; CHECK1024-NEXT:  // %bb.1: // %entry
3015; CHECK1024-NEXT:    bl __arm_tpidr2_restore
3016; CHECK1024-NEXT:  .LBB33_2: // %entry
3017; CHECK1024-NEXT:    mov w0, w20
3018; CHECK1024-NEXT:    msr TPIDR2_EL0, xzr
3019; CHECK1024-NEXT:    sub sp, x29, #1088
3020; CHECK1024-NEXT:    .cfi_def_cfa wsp, 1136
3021; CHECK1024-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
3022; CHECK1024-NEXT:    ldr x19, [sp, #1128] // 8-byte Folded Reload
3023; CHECK1024-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
3024; CHECK1024-NEXT:    ldr x20, [sp, #1120] // 8-byte Folded Reload
3025; CHECK1024-NEXT:    ldr x28, [sp, #1112] // 8-byte Folded Reload
3026; CHECK1024-NEXT:    ldr x30, [sp, #1096] // 8-byte Folded Reload
3027; CHECK1024-NEXT:    ldr x29, [sp, #1088] // 8-byte Folded Reload
3028; CHECK1024-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
3029; CHECK1024-NEXT:    ldp d15, d14, [sp] // 16-byte Folded Reload
3030; CHECK1024-NEXT:    add sp, sp, #1136
3031; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
3032; CHECK1024-NEXT:    .cfi_restore w19
3033; CHECK1024-NEXT:    .cfi_restore w20
3034; CHECK1024-NEXT:    .cfi_restore w28
3035; CHECK1024-NEXT:    .cfi_restore w30
3036; CHECK1024-NEXT:    .cfi_restore w29
3037; CHECK1024-NEXT:    .cfi_restore b8
3038; CHECK1024-NEXT:    .cfi_restore b9
3039; CHECK1024-NEXT:    .cfi_restore b10
3040; CHECK1024-NEXT:    .cfi_restore b11
3041; CHECK1024-NEXT:    .cfi_restore b12
3042; CHECK1024-NEXT:    .cfi_restore b13
3043; CHECK1024-NEXT:    .cfi_restore b14
3044; CHECK1024-NEXT:    .cfi_restore b15
3045; CHECK1024-NEXT:    ret
3046entry:
3047  tail call void @other()
3048  ret i32 %x
3049}
3050declare void @other()
3051
3052declare void @bar(ptr noundef) "aarch64_pstate_sm_compatible"
3053
3054define i32 @sve_stack_object_and_vla(double %d, i64 %sz) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
3055; CHECK0-LABEL: sve_stack_object_and_vla:
3056; CHECK0:       // %bb.0: // %entry
3057; CHECK0-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
3058; CHECK0-NEXT:    stp x28, x19, [sp, #16] // 16-byte Folded Spill
3059; CHECK0-NEXT:    mov x29, sp
3060; CHECK0-NEXT:    addvl sp, sp, #-1
3061; CHECK0-NEXT:    mov x19, sp
3062; CHECK0-NEXT:    .cfi_def_cfa w29, 32
3063; CHECK0-NEXT:    .cfi_offset w19, -8
3064; CHECK0-NEXT:    .cfi_offset w28, -16
3065; CHECK0-NEXT:    .cfi_offset w30, -24
3066; CHECK0-NEXT:    .cfi_offset w29, -32
3067; CHECK0-NEXT:    lsl x9, x0, #2
3068; CHECK0-NEXT:    mov x8, sp
3069; CHECK0-NEXT:    add x9, x9, #15
3070; CHECK0-NEXT:    and x9, x9, #0xfffffffffffffff0
3071; CHECK0-NEXT:    sub x0, x8, x9
3072; CHECK0-NEXT:    mov sp, x0
3073; CHECK0-NEXT:    mov z0.s, #0 // =0x0
3074; CHECK0-NEXT:    ptrue p0.s
3075; CHECK0-NEXT:    st1w { z0.s }, p0, [x29, #-1, mul vl]
3076; CHECK0-NEXT:    bl bar
3077; CHECK0-NEXT:    mov w0, wzr
3078; CHECK0-NEXT:    mov sp, x29
3079; CHECK0-NEXT:    ldp x28, x19, [sp, #16] // 16-byte Folded Reload
3080; CHECK0-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
3081; CHECK0-NEXT:    ret
3082;
3083; CHECK64-LABEL: sve_stack_object_and_vla:
3084; CHECK64:       // %bb.0: // %entry
3085; CHECK64-NEXT:    sub sp, sp, #96
3086; CHECK64-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
3087; CHECK64-NEXT:    add x29, sp, #64
3088; CHECK64-NEXT:    stp x28, x19, [sp, #80] // 16-byte Folded Spill
3089; CHECK64-NEXT:    sub sp, sp, #64
3090; CHECK64-NEXT:    addvl sp, sp, #-1
3091; CHECK64-NEXT:    mov x19, sp
3092; CHECK64-NEXT:    .cfi_def_cfa w29, 32
3093; CHECK64-NEXT:    .cfi_offset w19, -8
3094; CHECK64-NEXT:    .cfi_offset w28, -16
3095; CHECK64-NEXT:    .cfi_offset w30, -24
3096; CHECK64-NEXT:    .cfi_offset w29, -32
3097; CHECK64-NEXT:    lsl x9, x0, #2
3098; CHECK64-NEXT:    mov x8, sp
3099; CHECK64-NEXT:    add x9, x9, #15
3100; CHECK64-NEXT:    and x9, x9, #0xfffffffffffffff0
3101; CHECK64-NEXT:    sub x0, x8, x9
3102; CHECK64-NEXT:    mov sp, x0
3103; CHECK64-NEXT:    mov z0.s, #0 // =0x0
3104; CHECK64-NEXT:    ptrue p0.s
3105; CHECK64-NEXT:    sub x8, x29, #64
3106; CHECK64-NEXT:    st1w { z0.s }, p0, [x8, #-1, mul vl]
3107; CHECK64-NEXT:    bl bar
3108; CHECK64-NEXT:    mov w0, wzr
3109; CHECK64-NEXT:    sub sp, x29, #64
3110; CHECK64-NEXT:    ldp x28, x19, [sp, #80] // 16-byte Folded Reload
3111; CHECK64-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
3112; CHECK64-NEXT:    add sp, sp, #96
3113; CHECK64-NEXT:    ret
3114;
3115; CHECK1024-LABEL: sve_stack_object_and_vla:
3116; CHECK1024:       // %bb.0: // %entry
3117; CHECK1024-NEXT:    sub sp, sp, #1056
3118; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
3119; CHECK1024-NEXT:    add x29, sp, #1024
3120; CHECK1024-NEXT:    str x30, [sp, #1032] // 8-byte Folded Spill
3121; CHECK1024-NEXT:    str x28, [sp, #1040] // 8-byte Folded Spill
3122; CHECK1024-NEXT:    str x19, [sp, #1048] // 8-byte Folded Spill
3123; CHECK1024-NEXT:    sub sp, sp, #1024
3124; CHECK1024-NEXT:    addvl sp, sp, #-1
3125; CHECK1024-NEXT:    mov x19, sp
3126; CHECK1024-NEXT:    .cfi_def_cfa w29, 32
3127; CHECK1024-NEXT:    .cfi_offset w19, -8
3128; CHECK1024-NEXT:    .cfi_offset w28, -16
3129; CHECK1024-NEXT:    .cfi_offset w30, -24
3130; CHECK1024-NEXT:    .cfi_offset w29, -32
3131; CHECK1024-NEXT:    lsl x9, x0, #2
3132; CHECK1024-NEXT:    mov x8, sp
3133; CHECK1024-NEXT:    add x9, x9, #15
3134; CHECK1024-NEXT:    and x9, x9, #0xfffffffffffffff0
3135; CHECK1024-NEXT:    sub x0, x8, x9
3136; CHECK1024-NEXT:    mov sp, x0
3137; CHECK1024-NEXT:    mov z0.s, #0 // =0x0
3138; CHECK1024-NEXT:    ptrue p0.s
3139; CHECK1024-NEXT:    sub x8, x29, #1024
3140; CHECK1024-NEXT:    st1w { z0.s }, p0, [x8, #-1, mul vl]
3141; CHECK1024-NEXT:    bl bar
3142; CHECK1024-NEXT:    mov w0, wzr
3143; CHECK1024-NEXT:    sub sp, x29, #1024
3144; CHECK1024-NEXT:    ldr x19, [sp, #1048] // 8-byte Folded Reload
3145; CHECK1024-NEXT:    ldr x28, [sp, #1040] // 8-byte Folded Reload
3146; CHECK1024-NEXT:    ldr x30, [sp, #1032] // 8-byte Folded Reload
3147; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
3148; CHECK1024-NEXT:    add sp, sp, #1056
3149; CHECK1024-NEXT:    ret
3150entry:
3151  %a = alloca <vscale x 4 x i32>
3152  %b = alloca i32, i64 %sz, align 4
3153  store <vscale x 4 x i32> zeroinitializer, ptr %a
3154  call void @bar(ptr noundef nonnull %b)
3155  ret i32 0
3156}
3157