xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+sve | FileCheck %s
3
4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5target triple = "aarch64-unknown-linux-gnu"
6
7; Make sure callers set up the arguments correctly - tests AArch64ISelLowering::LowerCALL
8
9define float @foo1(ptr %x0, ptr %x1, ptr %x2) nounwind {
10; CHECK-LABEL: foo1:
11; CHECK:       // %bb.0: // %entry
12; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
13; CHECK-NEXT:    addvl sp, sp, #-4
14; CHECK-NEXT:    ptrue p0.b
15; CHECK-NEXT:    fmov s0, #1.00000000
16; CHECK-NEXT:    ld4d { z1.d - z4.d }, p0/z, [x0]
17; CHECK-NEXT:    mov x0, sp
18; CHECK-NEXT:    ld4d { z16.d - z19.d }, p0/z, [x1]
19; CHECK-NEXT:    ld1d { z5.d }, p0/z, [x2]
20; CHECK-NEXT:    ptrue p0.d
21; CHECK-NEXT:    st1d { z19.d }, p0, [sp, #3, mul vl]
22; CHECK-NEXT:    st1d { z18.d }, p0, [sp, #2, mul vl]
23; CHECK-NEXT:    st1d { z17.d }, p0, [sp, #1, mul vl]
24; CHECK-NEXT:    st1d { z16.d }, p0, [sp]
25; CHECK-NEXT:    bl callee1
26; CHECK-NEXT:    addvl sp, sp, #4
27; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
28; CHECK-NEXT:    ret
29entry:
30  %0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
31  %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
32  %2 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x0)
33  %3 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x1)
34  %4 = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %1, ptr %x2)
35  %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  0
36  %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  1
37  %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  2
38  %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  3
39  %9 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %5, i64 0)
40  %10 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %9, <vscale x 2 x double> %6, i64 2)
41  %11 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %10, <vscale x 2 x double> %7, i64 4)
42  %12 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %11, <vscale x 2 x double> %8, i64 6)
43  %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3,  0
44  %14 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3,  1
45  %15 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3,  2
46  %16 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3,  3
47  %17 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> undef, <vscale x 2 x double> %13, i64 0)
48  %18 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %17, <vscale x 2 x double> %14, i64 2)
49  %19 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %18, <vscale x 2 x double> %15, i64 4)
50  %20 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %19, <vscale x 2 x double> %16, i64 6)
51  %call = call float @callee1(float 1.000000e+00, <vscale x 8 x double> %12, <vscale x 8 x double> %20, <vscale x 2 x double> %4)
52  ret float %call
53}
54
55define float @foo2(ptr %x0, ptr %x1) nounwind {
56; CHECK-LABEL: foo2:
57; CHECK:       // %bb.0: // %entry
58; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
59; CHECK-NEXT:    sub sp, sp, #16
60; CHECK-NEXT:    addvl sp, sp, #-4
61; CHECK-NEXT:    ptrue p0.b
62; CHECK-NEXT:    fmov s0, #1.00000000
63; CHECK-NEXT:    add x8, sp, #16
64; CHECK-NEXT:    add x9, sp, #16
65; CHECK-NEXT:    mov w2, #2 // =0x2
66; CHECK-NEXT:    mov w3, #3 // =0x3
67; CHECK-NEXT:    ld4d { z1.d - z4.d }, p0/z, [x0]
68; CHECK-NEXT:    mov w0, wzr
69; CHECK-NEXT:    mov w4, #4 // =0x4
70; CHECK-NEXT:    mov w5, #5 // =0x5
71; CHECK-NEXT:    mov w6, #6 // =0x6
72; CHECK-NEXT:    mov w7, #7 // =0x7
73; CHECK-NEXT:    ld4d { z16.d - z19.d }, p0/z, [x1]
74; CHECK-NEXT:    ptrue p0.d
75; CHECK-NEXT:    mov w1, #1 // =0x1
76; CHECK-NEXT:    st1d { z19.d }, p0, [x8, #3, mul vl]
77; CHECK-NEXT:    st1d { z18.d }, p0, [x8, #2, mul vl]
78; CHECK-NEXT:    st1d { z17.d }, p0, [x8, #1, mul vl]
79; CHECK-NEXT:    st1d { z16.d }, p0, [x9]
80; CHECK-NEXT:    str x8, [sp]
81; CHECK-NEXT:    bl callee2
82; CHECK-NEXT:    addvl sp, sp, #4
83; CHECK-NEXT:    add sp, sp, #16
84; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
85; CHECK-NEXT:    ret
86entry:
87  %0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
88  %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
89  %2 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x0)
90  %3 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x1)
91  %4 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  0
92  %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  1
93  %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  2
94  %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  3
95  %8 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %4, i64 0)
96  %9 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %8, <vscale x 2 x double> %5, i64 2)
97  %10 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %9, <vscale x 2 x double> %6, i64 4)
98  %11 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %10, <vscale x 2 x double> %7, i64 6)
99  %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3,  0
100  %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3,  1
101  %14 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3,  2
102  %15 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3,  3
103  %16 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %12, i64 0)
104  %17 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %16, <vscale x 2 x double> %13, i64 2)
105  %18 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %17, <vscale x 2 x double> %14, i64 4)
106  %19 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %18, <vscale x 2 x double> %15, i64 6)
107  %call = call float @callee2(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, float 1.000000e+00, <vscale x 8 x double> %11, <vscale x 8 x double> %19)
108  ret float %call
109}
110
111define float @foo3(ptr %x0, ptr %x1, ptr %x2) nounwind {
112; CHECK-LABEL: foo3:
113; CHECK:       // %bb.0: // %entry
114; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
115; CHECK-NEXT:    addvl sp, sp, #-3
116; CHECK-NEXT:    ptrue p0.b
117; CHECK-NEXT:    fmov s0, #1.00000000
118; CHECK-NEXT:    fmov s1, #2.00000000
119; CHECK-NEXT:    ld4d { z2.d - z5.d }, p0/z, [x0]
120; CHECK-NEXT:    mov x0, sp
121; CHECK-NEXT:    ld3d { z16.d - z18.d }, p0/z, [x1]
122; CHECK-NEXT:    ld1d { z6.d }, p0/z, [x2]
123; CHECK-NEXT:    ptrue p0.d
124; CHECK-NEXT:    st1d { z18.d }, p0, [sp, #2, mul vl]
125; CHECK-NEXT:    st1d { z17.d }, p0, [sp, #1, mul vl]
126; CHECK-NEXT:    st1d { z16.d }, p0, [sp]
127; CHECK-NEXT:    bl callee3
128; CHECK-NEXT:    addvl sp, sp, #3
129; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
130; CHECK-NEXT:    ret
131entry:
132  %0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
133  %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
134  %2 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x0)
135  %3 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x1)
136  %4 = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %1, ptr %x2)
137  %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  0
138  %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  1
139  %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  2
140  %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2,  3
141  %9 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %5, i64 0)
142  %10 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %9, <vscale x 2 x double> %6, i64 2)
143  %11 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %10, <vscale x 2 x double> %7, i64 4)
144  %12 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %11, <vscale x 2 x double> %8, i64 6)
145  %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} %3,  0
146  %14 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3,  1
147  %15 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3,  2
148  %16 = call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double> poison, <vscale x 2 x double> %13, i64 0)
149  %17 = call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double> %16 , <vscale x 2 x double> %14, i64 2)
150  %18 = call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double> %17 , <vscale x 2 x double> %15, i64 4)
151  %call = call float @callee3(float 1.000000e+00, float 2.000000e+00, <vscale x 8 x double> %12, <vscale x 6 x double> %18, <vscale x 2 x double> %4)
152  ret float %call
153}
154
155; Make sure callees read the arguments correctly - tests AArch64ISelLowering::LowerFormalArguments
156
157define double @foo4(double %x0, ptr %ptr1, ptr %ptr2, ptr %ptr3, <vscale x 8 x double> %x1, <vscale x 8 x double> %x2, <vscale x 2 x double> %x3) nounwind {
158; CHECK-LABEL: foo4:
159; CHECK:       // %bb.0: // %entry
160; CHECK-NEXT:    ptrue p0.d
161; CHECK-NEXT:    ld1d { z6.d }, p0/z, [x3, #1, mul vl]
162; CHECK-NEXT:    ld1d { z7.d }, p0/z, [x3]
163; CHECK-NEXT:    ld1d { z24.d }, p0/z, [x3, #3, mul vl]
164; CHECK-NEXT:    ld1d { z25.d }, p0/z, [x3, #2, mul vl]
165; CHECK-NEXT:    st1d { z4.d }, p0, [x0, #3, mul vl]
166; CHECK-NEXT:    st1d { z3.d }, p0, [x0, #2, mul vl]
167; CHECK-NEXT:    st1d { z2.d }, p0, [x0, #1, mul vl]
168; CHECK-NEXT:    st1d { z1.d }, p0, [x0]
169; CHECK-NEXT:    st1d { z25.d }, p0, [x1, #2, mul vl]
170; CHECK-NEXT:    st1d { z24.d }, p0, [x1, #3, mul vl]
171; CHECK-NEXT:    st1d { z7.d }, p0, [x1]
172; CHECK-NEXT:    st1d { z6.d }, p0, [x1, #1, mul vl]
173; CHECK-NEXT:    st1d { z5.d }, p0, [x2]
174; CHECK-NEXT:    ret
175entry:
176  store volatile <vscale x 8 x double> %x1, ptr %ptr1
177  store volatile <vscale x 8 x double> %x2, ptr %ptr2
178  store volatile <vscale x 2 x double> %x3, ptr %ptr3
179  ret double %x0
180}
181
182define double @foo5(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, ptr %ptr1, ptr %ptr2, double %x0, <vscale x 8 x double> %x1, <vscale x 8 x double> %x2) nounwind {
183; CHECK-LABEL: foo5:
184; CHECK:       // %bb.0: // %entry
185; CHECK-NEXT:    ldr x8, [sp]
186; CHECK-NEXT:    ptrue p0.d
187; CHECK-NEXT:    ld1d { z5.d }, p0/z, [x8, #1, mul vl]
188; CHECK-NEXT:    ld1d { z6.d }, p0/z, [x8]
189; CHECK-NEXT:    ld1d { z7.d }, p0/z, [x8, #3, mul vl]
190; CHECK-NEXT:    ld1d { z24.d }, p0/z, [x8, #2, mul vl]
191; CHECK-NEXT:    st1d { z4.d }, p0, [x6, #3, mul vl]
192; CHECK-NEXT:    st1d { z3.d }, p0, [x6, #2, mul vl]
193; CHECK-NEXT:    st1d { z2.d }, p0, [x6, #1, mul vl]
194; CHECK-NEXT:    st1d { z1.d }, p0, [x6]
195; CHECK-NEXT:    st1d { z24.d }, p0, [x7, #2, mul vl]
196; CHECK-NEXT:    st1d { z7.d }, p0, [x7, #3, mul vl]
197; CHECK-NEXT:    st1d { z6.d }, p0, [x7]
198; CHECK-NEXT:    st1d { z5.d }, p0, [x7, #1, mul vl]
199; CHECK-NEXT:    ret
200entry:
201  store volatile <vscale x 8 x double> %x1, ptr %ptr1
202  store volatile <vscale x 8 x double> %x2, ptr %ptr2
203  ret double %x0
204}
205
206define double @foo6(double %x0, double %x1, ptr %ptr1, ptr %ptr2, <vscale x 8 x double> %x2, <vscale x 6 x double> %x3) nounwind {
207; CHECK-LABEL: foo6:
208; CHECK:       // %bb.0: // %entry
209; CHECK-NEXT:    ptrue p0.d
210; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x2]
211; CHECK-NEXT:    ld1d { z6.d }, p0/z, [x2, #2, mul vl]
212; CHECK-NEXT:    ld1d { z7.d }, p0/z, [x2, #1, mul vl]
213; CHECK-NEXT:    st1d { z5.d }, p0, [x0, #3, mul vl]
214; CHECK-NEXT:    st1d { z4.d }, p0, [x0, #2, mul vl]
215; CHECK-NEXT:    st1d { z3.d }, p0, [x0, #1, mul vl]
216; CHECK-NEXT:    st1d { z2.d }, p0, [x0]
217; CHECK-NEXT:    st1d { z7.d }, p0, [x1, #1, mul vl]
218; CHECK-NEXT:    st1d { z6.d }, p0, [x1, #2, mul vl]
219; CHECK-NEXT:    st1d { z1.d }, p0, [x1]
220; CHECK-NEXT:    ret
221entry:
222  store volatile <vscale x 8 x double> %x2, ptr %ptr1
223  store volatile <vscale x 6 x double> %x3, ptr %ptr2
224  ret double %x0
225}
226
227; Use AAVPCS, SVE register in z0 - z7 used
228
229define void @aavpcs1(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, <vscale x 4 x i32> %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, ptr %ptr) nounwind {
230; CHECK-LABEL: aavpcs1:
231; CHECK:       // %bb.0: // %entry
232; CHECK-NEXT:    ldp x8, x9, [sp]
233; CHECK-NEXT:    ptrue p0.s
234; CHECK-NEXT:    ld1w { z24.s }, p0/z, [x7]
235; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x8]
236; CHECK-NEXT:    st1w { z0.s }, p0, [x9]
237; CHECK-NEXT:    st1w { z1.s }, p0, [x9]
238; CHECK-NEXT:    st1w { z2.s }, p0, [x9]
239; CHECK-NEXT:    st1w { z4.s }, p0, [x9]
240; CHECK-NEXT:    st1w { z5.s }, p0, [x9]
241; CHECK-NEXT:    st1w { z6.s }, p0, [x9]
242; CHECK-NEXT:    st1w { z7.s }, p0, [x9]
243; CHECK-NEXT:    st1w { z24.s }, p0, [x9]
244; CHECK-NEXT:    st1w { z3.s }, p0, [x9]
245; CHECK-NEXT:    ret
246entry:
247  store volatile <vscale x 4 x i32> %s7, ptr %ptr
248  store volatile <vscale x 4 x i32> %s8, ptr %ptr
249  store volatile <vscale x 4 x i32> %s9, ptr %ptr
250  store volatile <vscale x 4 x i32> %s11, ptr %ptr
251  store volatile <vscale x 4 x i32> %s12, ptr %ptr
252  store volatile <vscale x 4 x i32> %s13, ptr %ptr
253  store volatile <vscale x 4 x i32> %s14, ptr %ptr
254  store volatile <vscale x 4 x i32> %s15, ptr %ptr
255  store volatile <vscale x 4 x i32> %s16, ptr %ptr
256  ret void
257}
258
259; Use AAVPCS, SVE register in z0 - z7 used
260
261define void @aavpcs2(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, <vscale x 4 x float> %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12,<vscale x 4 x float> %s13,<vscale x 4 x float> %s14,<vscale x 4 x float> %s15,<vscale x 4 x float> %s16,ptr %ptr) nounwind {
262; CHECK-LABEL: aavpcs2:
263; CHECK:       // %bb.0: // %entry
264; CHECK-NEXT:    ldp x8, x9, [sp]
265; CHECK-NEXT:    ptrue p0.s
266; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x7]
267; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x0]
268; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x6]
269; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8]
270; CHECK-NEXT:    ld1w { z4.s }, p0/z, [x5]
271; CHECK-NEXT:    ld1w { z5.s }, p0/z, [x1]
272; CHECK-NEXT:    ld1w { z6.s }, p0/z, [x4]
273; CHECK-NEXT:    ld1w { z24.s }, p0/z, [x3]
274; CHECK-NEXT:    st1w { z7.s }, p0, [x9]
275; CHECK-NEXT:    st1w { z2.s }, p0, [x9]
276; CHECK-NEXT:    st1w { z5.s }, p0, [x9]
277; CHECK-NEXT:    st1w { z24.s }, p0, [x9]
278; CHECK-NEXT:    st1w { z6.s }, p0, [x9]
279; CHECK-NEXT:    st1w { z4.s }, p0, [x9]
280; CHECK-NEXT:    st1w { z3.s }, p0, [x9]
281; CHECK-NEXT:    st1w { z1.s }, p0, [x9]
282; CHECK-NEXT:    st1w { z0.s }, p0, [x9]
283; CHECK-NEXT:    ret
284entry:
285  store volatile <vscale x 4 x float> %s7, ptr %ptr
286  store volatile <vscale x 4 x float> %s8, ptr %ptr
287  store volatile <vscale x 4 x float> %s9, ptr %ptr
288  store volatile <vscale x 4 x float> %s11, ptr %ptr
289  store volatile <vscale x 4 x float> %s12, ptr %ptr
290  store volatile <vscale x 4 x float> %s13, ptr %ptr
291  store volatile <vscale x 4 x float> %s14, ptr %ptr
292  store volatile <vscale x 4 x float> %s15, ptr %ptr
293  store volatile <vscale x 4 x float> %s16, ptr %ptr
294  ret void
295}
296
297; Use AAVPCS, no SVE register in z0 - z7 used (floats occupy z0 - z7) but predicate arg is used
298
299define void @aavpcs3(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, <vscale x 16 x i1> %p0, ptr %ptr) nounwind {
300; CHECK-LABEL: aavpcs3:
301; CHECK:       // %bb.0: // %entry
302; CHECK-NEXT:    ldr x8, [sp]
303; CHECK-NEXT:    ptrue p0.s
304; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8]
305; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
306; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x7]
307; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x1]
308; CHECK-NEXT:    ld1w { z4.s }, p0/z, [x6]
309; CHECK-NEXT:    ld1w { z5.s }, p0/z, [x5]
310; CHECK-NEXT:    ld1w { z6.s }, p0/z, [x2]
311; CHECK-NEXT:    ld1w { z7.s }, p0/z, [x4]
312; CHECK-NEXT:    ld1w { z24.s }, p0/z, [x3]
313; CHECK-NEXT:    ldr x8, [sp, #16]
314; CHECK-NEXT:    st1w { z1.s }, p0, [x8]
315; CHECK-NEXT:    st1w { z3.s }, p0, [x8]
316; CHECK-NEXT:    st1w { z6.s }, p0, [x8]
317; CHECK-NEXT:    st1w { z24.s }, p0, [x8]
318; CHECK-NEXT:    st1w { z7.s }, p0, [x8]
319; CHECK-NEXT:    st1w { z5.s }, p0, [x8]
320; CHECK-NEXT:    st1w { z4.s }, p0, [x8]
321; CHECK-NEXT:    st1w { z2.s }, p0, [x8]
322; CHECK-NEXT:    st1w { z0.s }, p0, [x8]
323; CHECK-NEXT:    ret
324entry:
325  store volatile <vscale x 4 x float> %s8, ptr %ptr
326  store volatile <vscale x 4 x float> %s9, ptr %ptr
327  store volatile <vscale x 4 x float> %s10, ptr %ptr
328  store volatile <vscale x 4 x float> %s11, ptr %ptr
329  store volatile <vscale x 4 x float> %s12, ptr %ptr
330  store volatile <vscale x 4 x float> %s13, ptr %ptr
331  store volatile <vscale x 4 x float> %s14, ptr %ptr
332  store volatile <vscale x 4 x float> %s15, ptr %ptr
333  store volatile <vscale x 4 x float> %s16, ptr %ptr
334  ret void
335}
336
337; use AAVPCS, SVE register in z0 - z7 used (i32s dont occupy z0 - z7)
338
339define void @aavpcs4(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, i32 %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, <vscale x 4 x i32> %s17, ptr %ptr) nounwind {
340; CHECK-LABEL: aavpcs4:
341; CHECK:       // %bb.0: // %entry
342; CHECK-NEXT:    ldr x8, [sp]
343; CHECK-NEXT:    ptrue p0.s
344; CHECK-NEXT:    ldr x9, [sp, #16]
345; CHECK-NEXT:    ld1w { z24.s }, p0/z, [x8]
346; CHECK-NEXT:    st1w { z0.s }, p0, [x9]
347; CHECK-NEXT:    st1w { z1.s }, p0, [x9]
348; CHECK-NEXT:    st1w { z2.s }, p0, [x9]
349; CHECK-NEXT:    st1w { z3.s }, p0, [x9]
350; CHECK-NEXT:    st1w { z4.s }, p0, [x9]
351; CHECK-NEXT:    st1w { z5.s }, p0, [x9]
352; CHECK-NEXT:    st1w { z6.s }, p0, [x9]
353; CHECK-NEXT:    st1w { z7.s }, p0, [x9]
354; CHECK-NEXT:    st1w { z24.s }, p0, [x9]
355; CHECK-NEXT:    ret
356entry:
357  store volatile <vscale x 4 x i32> %s8, ptr %ptr
358  store volatile <vscale x 4 x i32> %s9, ptr %ptr
359  store volatile <vscale x 4 x i32> %s10, ptr %ptr
360  store volatile <vscale x 4 x i32> %s11, ptr %ptr
361  store volatile <vscale x 4 x i32> %s12, ptr %ptr
362  store volatile <vscale x 4 x i32> %s13, ptr %ptr
363  store volatile <vscale x 4 x i32> %s14, ptr %ptr
364  store volatile <vscale x 4 x i32> %s15, ptr %ptr
365  store volatile <vscale x 4 x i32> %s16, ptr %ptr
366  ret void
367}
368
369; Use AAVPCS, SVE register used in return
370
371define <vscale x 4 x float> @aavpcs5(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, ptr %ptr) nounwind {
372; CHECK-LABEL: aavpcs5:
373; CHECK:       // %bb.0: // %entry
374; CHECK-NEXT:    ldr x8, [sp]
375; CHECK-NEXT:    ptrue p0.s
376; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x8]
377; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
378; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x7]
379; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x1]
380; CHECK-NEXT:    ld1w { z4.s }, p0/z, [x6]
381; CHECK-NEXT:    ld1w { z5.s }, p0/z, [x5]
382; CHECK-NEXT:    ld1w { z6.s }, p0/z, [x2]
383; CHECK-NEXT:    ld1w { z7.s }, p0/z, [x4]
384; CHECK-NEXT:    ld1w { z24.s }, p0/z, [x3]
385; CHECK-NEXT:    ldr x8, [sp, #16]
386; CHECK-NEXT:    st1w { z0.s }, p0, [x8]
387; CHECK-NEXT:    st1w { z3.s }, p0, [x8]
388; CHECK-NEXT:    st1w { z6.s }, p0, [x8]
389; CHECK-NEXT:    st1w { z24.s }, p0, [x8]
390; CHECK-NEXT:    st1w { z7.s }, p0, [x8]
391; CHECK-NEXT:    st1w { z5.s }, p0, [x8]
392; CHECK-NEXT:    st1w { z4.s }, p0, [x8]
393; CHECK-NEXT:    st1w { z2.s }, p0, [x8]
394; CHECK-NEXT:    st1w { z1.s }, p0, [x8]
395; CHECK-NEXT:    ret
396entry:
397  store volatile <vscale x 4 x float> %s8, ptr %ptr
398  store volatile <vscale x 4 x float> %s9, ptr %ptr
399  store volatile <vscale x 4 x float> %s10, ptr %ptr
400  store volatile <vscale x 4 x float> %s11, ptr %ptr
401  store volatile <vscale x 4 x float> %s12, ptr %ptr
402  store volatile <vscale x 4 x float> %s13, ptr %ptr
403  store volatile <vscale x 4 x float> %s14, ptr %ptr
404  store volatile <vscale x 4 x float> %s15, ptr %ptr
405  store volatile <vscale x 4 x float> %s16, ptr %ptr
406  ret <vscale x 4 x float> %s8
407}
408
409define void @aapcs1(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, ptr %ptr) nounwind {
410; CHECK-LABEL: aapcs1:
411; CHECK:       // %bb.0: // %entry
412; CHECK-NEXT:    ldr x8, [sp]
413; CHECK-NEXT:    ptrue p0.s
414; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8]
415; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
416; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x7]
417; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x1]
418; CHECK-NEXT:    ld1w { z4.s }, p0/z, [x6]
419; CHECK-NEXT:    ld1w { z5.s }, p0/z, [x5]
420; CHECK-NEXT:    ld1w { z6.s }, p0/z, [x2]
421; CHECK-NEXT:    ld1w { z7.s }, p0/z, [x4]
422; CHECK-NEXT:    ld1w { z16.s }, p0/z, [x3]
423; CHECK-NEXT:    ldr x8, [sp, #16]
424; CHECK-NEXT:    st1w { z1.s }, p0, [x8]
425; CHECK-NEXT:    st1w { z3.s }, p0, [x8]
426; CHECK-NEXT:    st1w { z6.s }, p0, [x8]
427; CHECK-NEXT:    st1w { z16.s }, p0, [x8]
428; CHECK-NEXT:    st1w { z7.s }, p0, [x8]
429; CHECK-NEXT:    st1w { z5.s }, p0, [x8]
430; CHECK-NEXT:    st1w { z4.s }, p0, [x8]
431; CHECK-NEXT:    st1w { z2.s }, p0, [x8]
432; CHECK-NEXT:    st1w { z0.s }, p0, [x8]
433; CHECK-NEXT:    ret
434entry:
435  store volatile <vscale x 4 x float> %s8, ptr %ptr
436  store volatile <vscale x 4 x float> %s9, ptr %ptr
437  store volatile <vscale x 4 x float> %s10, ptr %ptr
438  store volatile <vscale x 4 x float> %s11, ptr %ptr
439  store volatile <vscale x 4 x float> %s12, ptr %ptr
440  store volatile <vscale x 4 x float> %s13, ptr %ptr
441  store volatile <vscale x 4 x float> %s14, ptr %ptr
442  store volatile <vscale x 4 x float> %s15, ptr %ptr
443  store volatile <vscale x 4 x float> %s16, ptr %ptr
444  ret void
445}
446
447declare void @non_sve_callee_high_range(float %f0, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1)
448
449define void @non_sve_caller_non_sve_callee_high_range()  {
450; CHECK-LABEL: non_sve_caller_non_sve_callee_high_range:
451; CHECK:       // %bb.0:
452; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
453; CHECK-NEXT:    addvl sp, sp, #-2
454; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
455; CHECK-NEXT:    .cfi_offset w30, -8
456; CHECK-NEXT:    .cfi_offset w29, -16
457; CHECK-NEXT:    movi d0, #0000000000000000
458; CHECK-NEXT:    fmov s1, #1.00000000
459; CHECK-NEXT:    addvl x0, sp, #1
460; CHECK-NEXT:    fmov s2, #2.00000000
461; CHECK-NEXT:    fmov s3, #3.00000000
462; CHECK-NEXT:    mov x1, sp
463; CHECK-NEXT:    fmov s4, #4.00000000
464; CHECK-NEXT:    fmov s5, #5.00000000
465; CHECK-NEXT:    fmov s6, #6.00000000
466; CHECK-NEXT:    fmov s7, #7.00000000
467; CHECK-NEXT:    bl non_sve_callee_high_range
468; CHECK-NEXT:    addvl sp, sp, #2
469; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
470; CHECK-NEXT:    ret
471  call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
472  ret void
473}
474
475define void @non_sve_caller_high_range_non_sve_callee_high_range(float %f0, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1)  {
476; CHECK-LABEL: non_sve_caller_high_range_non_sve_callee_high_range:
477; CHECK:       // %bb.0:
478; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
479; CHECK-NEXT:    addvl sp, sp, #-2
480; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
481; CHECK-NEXT:    .cfi_offset w30, -8
482; CHECK-NEXT:    .cfi_offset w29, -16
483; CHECK-NEXT:    ptrue p0.s
484; CHECK-NEXT:    movi d0, #0000000000000000
485; CHECK-NEXT:    fmov s1, #1.00000000
486; CHECK-NEXT:    fmov s2, #2.00000000
487; CHECK-NEXT:    fmov s3, #3.00000000
488; CHECK-NEXT:    fmov s4, #4.00000000
489; CHECK-NEXT:    ld1w { z16.s }, p0/z, [x0]
490; CHECK-NEXT:    ld1w { z17.s }, p0/z, [x1]
491; CHECK-NEXT:    addvl x0, sp, #1
492; CHECK-NEXT:    fmov s5, #5.00000000
493; CHECK-NEXT:    fmov s6, #6.00000000
494; CHECK-NEXT:    mov x1, sp
495; CHECK-NEXT:    fmov s7, #7.00000000
496; CHECK-NEXT:    st1w { z17.s }, p0, [sp]
497; CHECK-NEXT:    st1w { z16.s }, p0, [sp, #1, mul vl]
498; CHECK-NEXT:    bl non_sve_callee_high_range
499; CHECK-NEXT:    addvl sp, sp, #2
500; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
501; CHECK-NEXT:    ret
502  call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1)
503  ret void
504}
505
506define <vscale x 4 x float> @sve_caller_non_sve_callee_high_range(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1)  {
507; CHECK-LABEL: sve_caller_non_sve_callee_high_range:
508; CHECK:       // %bb.0:
509; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
510; CHECK-NEXT:    addvl sp, sp, #-18
511; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
512; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
513; CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
514; CHECK-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
515; CHECK-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
516; CHECK-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
517; CHECK-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
518; CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
519; CHECK-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
520; CHECK-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
521; CHECK-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
522; CHECK-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
523; CHECK-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
524; CHECK-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
525; CHECK-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
526; CHECK-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
527; CHECK-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
528; CHECK-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
529; CHECK-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
530; CHECK-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
531; CHECK-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
532; CHECK-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
533; CHECK-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
534; CHECK-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
535; CHECK-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
536; CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
537; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
538; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
539; CHECK-NEXT:    addvl sp, sp, #-3
540; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa8, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 168 * VG
541; CHECK-NEXT:    .cfi_offset w30, -8
542; CHECK-NEXT:    .cfi_offset w29, -16
543; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
544; CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
545; CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
546; CHECK-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG
547; CHECK-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG
548; CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG
549; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG
550; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
551; CHECK-NEXT:    mov z25.d, z0.d
552; CHECK-NEXT:    str z0, [sp] // 16-byte Folded Spill
553; CHECK-NEXT:    movi d0, #0000000000000000
554; CHECK-NEXT:    mov z24.d, z1.d
555; CHECK-NEXT:    fmov s1, #1.00000000
556; CHECK-NEXT:    addvl x0, sp, #2
557; CHECK-NEXT:    fmov s2, #2.00000000
558; CHECK-NEXT:    fmov s3, #3.00000000
559; CHECK-NEXT:    addvl x1, sp, #1
560; CHECK-NEXT:    fmov s4, #4.00000000
561; CHECK-NEXT:    fmov s5, #5.00000000
562; CHECK-NEXT:    fmov s6, #6.00000000
563; CHECK-NEXT:    fmov s7, #7.00000000
564; CHECK-NEXT:    ptrue p0.s
565; CHECK-NEXT:    st1w { z24.s }, p0, [sp, #1, mul vl]
566; CHECK-NEXT:    st1w { z25.s }, p0, [sp, #2, mul vl]
567; CHECK-NEXT:    bl non_sve_callee_high_range
568; CHECK-NEXT:    ldr z0, [sp] // 16-byte Folded Reload
569; CHECK-NEXT:    addvl sp, sp, #3
570; CHECK-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
571; CHECK-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
572; CHECK-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
573; CHECK-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
574; CHECK-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
575; CHECK-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
576; CHECK-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
577; CHECK-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
578; CHECK-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
579; CHECK-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
580; CHECK-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
581; CHECK-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
582; CHECK-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
583; CHECK-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
584; CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
585; CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
586; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
587; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
588; CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
589; CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
590; CHECK-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
591; CHECK-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
592; CHECK-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
593; CHECK-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
594; CHECK-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
595; CHECK-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
596; CHECK-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
597; CHECK-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
598; CHECK-NEXT:    addvl sp, sp, #18
599; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
600; CHECK-NEXT:    ret
601  call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1)
602  ret <vscale x 4 x float> %v0
603}
604
605define <vscale x 4 x float> @sve_ret_caller_non_sve_callee_high_range()  {
606; CHECK-LABEL: sve_ret_caller_non_sve_callee_high_range:
607; CHECK:       // %bb.0:
608; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
609; CHECK-NEXT:    addvl sp, sp, #-18
610; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
611; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
612; CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
613; CHECK-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
614; CHECK-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
615; CHECK-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
616; CHECK-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
617; CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
618; CHECK-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
619; CHECK-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
620; CHECK-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
621; CHECK-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
622; CHECK-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
623; CHECK-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
624; CHECK-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
625; CHECK-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
626; CHECK-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
627; CHECK-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
628; CHECK-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
629; CHECK-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
630; CHECK-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
631; CHECK-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
632; CHECK-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
633; CHECK-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
634; CHECK-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
635; CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
636; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
637; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
638; CHECK-NEXT:    addvl sp, sp, #-2
639; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG
640; CHECK-NEXT:    .cfi_offset w30, -8
641; CHECK-NEXT:    .cfi_offset w29, -16
642; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
643; CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
644; CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
645; CHECK-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG
646; CHECK-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG
647; CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG
648; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG
649; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
650; CHECK-NEXT:    movi d0, #0000000000000000
651; CHECK-NEXT:    fmov s1, #1.00000000
652; CHECK-NEXT:    addvl x0, sp, #1
653; CHECK-NEXT:    fmov s2, #2.00000000
654; CHECK-NEXT:    fmov s3, #3.00000000
655; CHECK-NEXT:    mov x1, sp
656; CHECK-NEXT:    fmov s4, #4.00000000
657; CHECK-NEXT:    fmov s5, #5.00000000
658; CHECK-NEXT:    fmov s6, #6.00000000
659; CHECK-NEXT:    fmov s7, #7.00000000
660; CHECK-NEXT:    bl non_sve_callee_high_range
661; CHECK-NEXT:    addvl sp, sp, #2
662; CHECK-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
663; CHECK-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
664; CHECK-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
665; CHECK-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
666; CHECK-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
667; CHECK-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
668; CHECK-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
669; CHECK-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
670; CHECK-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
671; CHECK-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
672; CHECK-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
673; CHECK-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
674; CHECK-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
675; CHECK-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
676; CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
677; CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
678; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
679; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
680; CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
681; CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
682; CHECK-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
683; CHECK-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
684; CHECK-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
685; CHECK-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
686; CHECK-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
687; CHECK-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
688; CHECK-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
689; CHECK-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
690; CHECK-NEXT:    addvl sp, sp, #18
691; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
692; CHECK-NEXT:    ret
693  call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
694  ret <vscale x 4 x float> undef
695}
696
697declare void @func_f8_and_v0_passed_via_memory(float %f0, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, float %f8, <vscale x 4 x float> %v0)
698define void @verify_all_operands_are_initialised() {
699; CHECK-LABEL: verify_all_operands_are_initialised:
700; CHECK:       // %bb.0:
701; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
702; CHECK-NEXT:    sub sp, sp, #16
703; CHECK-NEXT:    addvl sp, sp, #-1
704; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
705; CHECK-NEXT:    .cfi_offset w30, -8
706; CHECK-NEXT:    .cfi_offset w29, -16
707; CHECK-NEXT:    movi d0, #0000000000000000
708; CHECK-NEXT:    fmov z16.s, #9.00000000
709; CHECK-NEXT:    add x8, sp, #16
710; CHECK-NEXT:    ptrue p0.s
711; CHECK-NEXT:    fmov s1, #1.00000000
712; CHECK-NEXT:    fmov s2, #2.00000000
713; CHECK-NEXT:    fmov s3, #3.00000000
714; CHECK-NEXT:    add x0, sp, #16
715; CHECK-NEXT:    fmov s4, #4.00000000
716; CHECK-NEXT:    fmov s5, #5.00000000
717; CHECK-NEXT:    st1w { z16.s }, p0, [x8]
718; CHECK-NEXT:    mov w8, #1090519040 // =0x41000000
719; CHECK-NEXT:    fmov s6, #6.00000000
720; CHECK-NEXT:    fmov s7, #7.00000000
721; CHECK-NEXT:    str w8, [sp]
722; CHECK-NEXT:    bl func_f8_and_v0_passed_via_memory
723; CHECK-NEXT:    addvl sp, sp, #1
724; CHECK-NEXT:    add sp, sp, #16
725; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
726; CHECK-NEXT:    ret
727  call void @func_f8_and_v0_passed_via_memory(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, <vscale x 4 x float> splat (float 9.000000e+00))
728  ret void
729}
730
731declare float @callee1(float, <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 2 x double>)
732declare float @callee2(i32, i32, i32, i32, i32, i32, i32, i32, float, <vscale x 8 x double>, <vscale x 8 x double>)
733declare float @callee3(float, float, <vscale x 8 x double>, <vscale x 6 x double>, <vscale x 2 x double>)
734
735declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg)
736declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
737declare {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1>, ptr)
738declare {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1>, ptr)
739declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1>, ptr)
740declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
741declare <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double>, <vscale x 2 x double>, i64)
742declare <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double>, <vscale x 2 x double>, i64)
743