1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+sve | FileCheck %s 3 4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 5target triple = "aarch64-unknown-linux-gnu" 6 7; Make sure callers set up the arguments correctly - tests AArch64ISelLowering::LowerCALL 8 9define float @foo1(ptr %x0, ptr %x1, ptr %x2) nounwind { 10; CHECK-LABEL: foo1: 11; CHECK: // %bb.0: // %entry 12; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 13; CHECK-NEXT: addvl sp, sp, #-4 14; CHECK-NEXT: ptrue p0.b 15; CHECK-NEXT: fmov s0, #1.00000000 16; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0] 17; CHECK-NEXT: mov x0, sp 18; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1] 19; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2] 20; CHECK-NEXT: ptrue p0.d 21; CHECK-NEXT: st1d { z19.d }, p0, [sp, #3, mul vl] 22; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl] 23; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl] 24; CHECK-NEXT: st1d { z16.d }, p0, [sp] 25; CHECK-NEXT: bl callee1 26; CHECK-NEXT: addvl sp, sp, #4 27; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 28; CHECK-NEXT: ret 29entry: 30 %0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 31 %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0) 32 %2 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x0) 33 %3 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x1) 34 %4 = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %1, ptr %x2) 35 %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 0 36 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 1 37 %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 2 38 %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 3 39 %9 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %5, i64 0) 40 %10 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %9, <vscale x 2 x double> %6, i64 2) 41 %11 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %10, <vscale x 2 x double> %7, i64 4) 42 %12 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %11, <vscale x 2 x double> %8, i64 6) 43 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 0 44 %14 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 1 45 %15 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 2 46 %16 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 3 47 %17 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> undef, <vscale x 2 x double> %13, i64 0) 48 %18 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %17, <vscale x 2 x double> %14, i64 2) 49 %19 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %18, <vscale x 2 x double> %15, i64 4) 50 %20 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %19, <vscale x 2 x double> %16, i64 6) 51 %call = call float @callee1(float 1.000000e+00, <vscale x 8 x double> %12, <vscale x 8 x double> %20, <vscale x 2 x double> %4) 52 ret float %call 53} 54 55define float @foo2(ptr %x0, ptr %x1) nounwind { 56; CHECK-LABEL: foo2: 57; CHECK: // %bb.0: // %entry 58; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 59; CHECK-NEXT: sub sp, sp, #16 60; CHECK-NEXT: addvl sp, sp, #-4 61; CHECK-NEXT: ptrue p0.b 62; CHECK-NEXT: fmov s0, #1.00000000 63; CHECK-NEXT: add x8, sp, #16 64; CHECK-NEXT: add x9, sp, #16 65; CHECK-NEXT: mov w2, #2 // =0x2 66; CHECK-NEXT: mov w3, #3 // =0x3 67; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0] 68; CHECK-NEXT: mov w0, wzr 69; CHECK-NEXT: mov w4, #4 // =0x4 70; CHECK-NEXT: mov w5, #5 // =0x5 71; CHECK-NEXT: mov w6, #6 // =0x6 72; CHECK-NEXT: mov w7, #7 // =0x7 73; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1] 74; CHECK-NEXT: ptrue p0.d 75; CHECK-NEXT: mov w1, #1 // =0x1 76; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] 77; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] 78; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] 79; CHECK-NEXT: st1d { z16.d }, p0, [x9] 80; CHECK-NEXT: str x8, [sp] 81; CHECK-NEXT: bl callee2 82; CHECK-NEXT: addvl sp, sp, #4 83; CHECK-NEXT: add sp, sp, #16 84; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 85; CHECK-NEXT: ret 86entry: 87 %0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 88 %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0) 89 %2 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x0) 90 %3 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x1) 91 %4 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 0 92 %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 1 93 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 2 94 %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 3 95 %8 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %4, i64 0) 96 %9 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %8, <vscale x 2 x double> %5, i64 2) 97 %10 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %9, <vscale x 2 x double> %6, i64 4) 98 %11 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %10, <vscale x 2 x double> %7, i64 6) 99 %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 0 100 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 1 101 %14 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 2 102 %15 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 3 103 %16 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %12, i64 0) 104 %17 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %16, <vscale x 2 x double> %13, i64 2) 105 %18 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %17, <vscale x 2 x double> %14, i64 4) 106 %19 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %18, <vscale x 2 x double> %15, i64 6) 107 %call = call float @callee2(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, float 1.000000e+00, <vscale x 8 x double> %11, <vscale x 8 x double> %19) 108 ret float %call 109} 110 111define float @foo3(ptr %x0, ptr %x1, ptr %x2) nounwind { 112; CHECK-LABEL: foo3: 113; CHECK: // %bb.0: // %entry 114; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 115; CHECK-NEXT: addvl sp, sp, #-3 116; CHECK-NEXT: ptrue p0.b 117; CHECK-NEXT: fmov s0, #1.00000000 118; CHECK-NEXT: fmov s1, #2.00000000 119; CHECK-NEXT: ld4d { z2.d - z5.d }, p0/z, [x0] 120; CHECK-NEXT: mov x0, sp 121; CHECK-NEXT: ld3d { z16.d - z18.d }, p0/z, [x1] 122; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2] 123; CHECK-NEXT: ptrue p0.d 124; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl] 125; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl] 126; CHECK-NEXT: st1d { z16.d }, p0, [sp] 127; CHECK-NEXT: bl callee3 128; CHECK-NEXT: addvl sp, sp, #3 129; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 130; CHECK-NEXT: ret 131entry: 132 %0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 133 %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0) 134 %2 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x0) 135 %3 = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> %1, ptr %x1) 136 %4 = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %1, ptr %x2) 137 %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 0 138 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 1 139 %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 2 140 %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %2, 3 141 %9 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> poison, <vscale x 2 x double> %5, i64 0) 142 %10 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %9, <vscale x 2 x double> %6, i64 2) 143 %11 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %10, <vscale x 2 x double> %7, i64 4) 144 %12 = call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double> %11, <vscale x 2 x double> %8, i64 6) 145 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} %3, 0 146 %14 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 1 147 %15 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %3, 2 148 %16 = call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double> poison, <vscale x 2 x double> %13, i64 0) 149 %17 = call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double> %16 , <vscale x 2 x double> %14, i64 2) 150 %18 = call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double> %17 , <vscale x 2 x double> %15, i64 4) 151 %call = call float @callee3(float 1.000000e+00, float 2.000000e+00, <vscale x 8 x double> %12, <vscale x 6 x double> %18, <vscale x 2 x double> %4) 152 ret float %call 153} 154 155; Make sure callees read the arguments correctly - tests AArch64ISelLowering::LowerFormalArguments 156 157define double @foo4(double %x0, ptr %ptr1, ptr %ptr2, ptr %ptr3, <vscale x 8 x double> %x1, <vscale x 8 x double> %x2, <vscale x 2 x double> %x3) nounwind { 158; CHECK-LABEL: foo4: 159; CHECK: // %bb.0: // %entry 160; CHECK-NEXT: ptrue p0.d 161; CHECK-NEXT: ld1d { z6.d }, p0/z, [x3, #1, mul vl] 162; CHECK-NEXT: ld1d { z7.d }, p0/z, [x3] 163; CHECK-NEXT: ld1d { z24.d }, p0/z, [x3, #3, mul vl] 164; CHECK-NEXT: ld1d { z25.d }, p0/z, [x3, #2, mul vl] 165; CHECK-NEXT: st1d { z4.d }, p0, [x0, #3, mul vl] 166; CHECK-NEXT: st1d { z3.d }, p0, [x0, #2, mul vl] 167; CHECK-NEXT: st1d { z2.d }, p0, [x0, #1, mul vl] 168; CHECK-NEXT: st1d { z1.d }, p0, [x0] 169; CHECK-NEXT: st1d { z25.d }, p0, [x1, #2, mul vl] 170; CHECK-NEXT: st1d { z24.d }, p0, [x1, #3, mul vl] 171; CHECK-NEXT: st1d { z7.d }, p0, [x1] 172; CHECK-NEXT: st1d { z6.d }, p0, [x1, #1, mul vl] 173; CHECK-NEXT: st1d { z5.d }, p0, [x2] 174; CHECK-NEXT: ret 175entry: 176 store volatile <vscale x 8 x double> %x1, ptr %ptr1 177 store volatile <vscale x 8 x double> %x2, ptr %ptr2 178 store volatile <vscale x 2 x double> %x3, ptr %ptr3 179 ret double %x0 180} 181 182define double @foo5(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, ptr %ptr1, ptr %ptr2, double %x0, <vscale x 8 x double> %x1, <vscale x 8 x double> %x2) nounwind { 183; CHECK-LABEL: foo5: 184; CHECK: // %bb.0: // %entry 185; CHECK-NEXT: ldr x8, [sp] 186; CHECK-NEXT: ptrue p0.d 187; CHECK-NEXT: ld1d { z5.d }, p0/z, [x8, #1, mul vl] 188; CHECK-NEXT: ld1d { z6.d }, p0/z, [x8] 189; CHECK-NEXT: ld1d { z7.d }, p0/z, [x8, #3, mul vl] 190; CHECK-NEXT: ld1d { z24.d }, p0/z, [x8, #2, mul vl] 191; CHECK-NEXT: st1d { z4.d }, p0, [x6, #3, mul vl] 192; CHECK-NEXT: st1d { z3.d }, p0, [x6, #2, mul vl] 193; CHECK-NEXT: st1d { z2.d }, p0, [x6, #1, mul vl] 194; CHECK-NEXT: st1d { z1.d }, p0, [x6] 195; CHECK-NEXT: st1d { z24.d }, p0, [x7, #2, mul vl] 196; CHECK-NEXT: st1d { z7.d }, p0, [x7, #3, mul vl] 197; CHECK-NEXT: st1d { z6.d }, p0, [x7] 198; CHECK-NEXT: st1d { z5.d }, p0, [x7, #1, mul vl] 199; CHECK-NEXT: ret 200entry: 201 store volatile <vscale x 8 x double> %x1, ptr %ptr1 202 store volatile <vscale x 8 x double> %x2, ptr %ptr2 203 ret double %x0 204} 205 206define double @foo6(double %x0, double %x1, ptr %ptr1, ptr %ptr2, <vscale x 8 x double> %x2, <vscale x 6 x double> %x3) nounwind { 207; CHECK-LABEL: foo6: 208; CHECK: // %bb.0: // %entry 209; CHECK-NEXT: ptrue p0.d 210; CHECK-NEXT: ld1d { z1.d }, p0/z, [x2] 211; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2, #2, mul vl] 212; CHECK-NEXT: ld1d { z7.d }, p0/z, [x2, #1, mul vl] 213; CHECK-NEXT: st1d { z5.d }, p0, [x0, #3, mul vl] 214; CHECK-NEXT: st1d { z4.d }, p0, [x0, #2, mul vl] 215; CHECK-NEXT: st1d { z3.d }, p0, [x0, #1, mul vl] 216; CHECK-NEXT: st1d { z2.d }, p0, [x0] 217; CHECK-NEXT: st1d { z7.d }, p0, [x1, #1, mul vl] 218; CHECK-NEXT: st1d { z6.d }, p0, [x1, #2, mul vl] 219; CHECK-NEXT: st1d { z1.d }, p0, [x1] 220; CHECK-NEXT: ret 221entry: 222 store volatile <vscale x 8 x double> %x2, ptr %ptr1 223 store volatile <vscale x 6 x double> %x3, ptr %ptr2 224 ret double %x0 225} 226 227; Use AAVPCS, SVE register in z0 - z7 used 228 229define void @aavpcs1(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, <vscale x 4 x i32> %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, ptr %ptr) nounwind { 230; CHECK-LABEL: aavpcs1: 231; CHECK: // %bb.0: // %entry 232; CHECK-NEXT: ldp x8, x9, [sp] 233; CHECK-NEXT: ptrue p0.s 234; CHECK-NEXT: ld1w { z24.s }, p0/z, [x7] 235; CHECK-NEXT: ld1w { z3.s }, p0/z, [x8] 236; CHECK-NEXT: st1w { z0.s }, p0, [x9] 237; CHECK-NEXT: st1w { z1.s }, p0, [x9] 238; CHECK-NEXT: st1w { z2.s }, p0, [x9] 239; CHECK-NEXT: st1w { z4.s }, p0, [x9] 240; CHECK-NEXT: st1w { z5.s }, p0, [x9] 241; CHECK-NEXT: st1w { z6.s }, p0, [x9] 242; CHECK-NEXT: st1w { z7.s }, p0, [x9] 243; CHECK-NEXT: st1w { z24.s }, p0, [x9] 244; CHECK-NEXT: st1w { z3.s }, p0, [x9] 245; CHECK-NEXT: ret 246entry: 247 store volatile <vscale x 4 x i32> %s7, ptr %ptr 248 store volatile <vscale x 4 x i32> %s8, ptr %ptr 249 store volatile <vscale x 4 x i32> %s9, ptr %ptr 250 store volatile <vscale x 4 x i32> %s11, ptr %ptr 251 store volatile <vscale x 4 x i32> %s12, ptr %ptr 252 store volatile <vscale x 4 x i32> %s13, ptr %ptr 253 store volatile <vscale x 4 x i32> %s14, ptr %ptr 254 store volatile <vscale x 4 x i32> %s15, ptr %ptr 255 store volatile <vscale x 4 x i32> %s16, ptr %ptr 256 ret void 257} 258 259; Use AAVPCS, SVE register in z0 - z7 used 260 261define void @aavpcs2(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, <vscale x 4 x float> %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12,<vscale x 4 x float> %s13,<vscale x 4 x float> %s14,<vscale x 4 x float> %s15,<vscale x 4 x float> %s16,ptr %ptr) nounwind { 262; CHECK-LABEL: aavpcs2: 263; CHECK: // %bb.0: // %entry 264; CHECK-NEXT: ldp x8, x9, [sp] 265; CHECK-NEXT: ptrue p0.s 266; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7] 267; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0] 268; CHECK-NEXT: ld1w { z3.s }, p0/z, [x6] 269; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] 270; CHECK-NEXT: ld1w { z4.s }, p0/z, [x5] 271; CHECK-NEXT: ld1w { z5.s }, p0/z, [x1] 272; CHECK-NEXT: ld1w { z6.s }, p0/z, [x4] 273; CHECK-NEXT: ld1w { z24.s }, p0/z, [x3] 274; CHECK-NEXT: st1w { z7.s }, p0, [x9] 275; CHECK-NEXT: st1w { z2.s }, p0, [x9] 276; CHECK-NEXT: st1w { z5.s }, p0, [x9] 277; CHECK-NEXT: st1w { z24.s }, p0, [x9] 278; CHECK-NEXT: st1w { z6.s }, p0, [x9] 279; CHECK-NEXT: st1w { z4.s }, p0, [x9] 280; CHECK-NEXT: st1w { z3.s }, p0, [x9] 281; CHECK-NEXT: st1w { z1.s }, p0, [x9] 282; CHECK-NEXT: st1w { z0.s }, p0, [x9] 283; CHECK-NEXT: ret 284entry: 285 store volatile <vscale x 4 x float> %s7, ptr %ptr 286 store volatile <vscale x 4 x float> %s8, ptr %ptr 287 store volatile <vscale x 4 x float> %s9, ptr %ptr 288 store volatile <vscale x 4 x float> %s11, ptr %ptr 289 store volatile <vscale x 4 x float> %s12, ptr %ptr 290 store volatile <vscale x 4 x float> %s13, ptr %ptr 291 store volatile <vscale x 4 x float> %s14, ptr %ptr 292 store volatile <vscale x 4 x float> %s15, ptr %ptr 293 store volatile <vscale x 4 x float> %s16, ptr %ptr 294 ret void 295} 296 297; Use AAVPCS, no SVE register in z0 - z7 used (floats occupy z0 - z7) but predicate arg is used 298 299define void @aavpcs3(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, <vscale x 16 x i1> %p0, ptr %ptr) nounwind { 300; CHECK-LABEL: aavpcs3: 301; CHECK: // %bb.0: // %entry 302; CHECK-NEXT: ldr x8, [sp] 303; CHECK-NEXT: ptrue p0.s 304; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] 305; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] 306; CHECK-NEXT: ld1w { z2.s }, p0/z, [x7] 307; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1] 308; CHECK-NEXT: ld1w { z4.s }, p0/z, [x6] 309; CHECK-NEXT: ld1w { z5.s }, p0/z, [x5] 310; CHECK-NEXT: ld1w { z6.s }, p0/z, [x2] 311; CHECK-NEXT: ld1w { z7.s }, p0/z, [x4] 312; CHECK-NEXT: ld1w { z24.s }, p0/z, [x3] 313; CHECK-NEXT: ldr x8, [sp, #16] 314; CHECK-NEXT: st1w { z1.s }, p0, [x8] 315; CHECK-NEXT: st1w { z3.s }, p0, [x8] 316; CHECK-NEXT: st1w { z6.s }, p0, [x8] 317; CHECK-NEXT: st1w { z24.s }, p0, [x8] 318; CHECK-NEXT: st1w { z7.s }, p0, [x8] 319; CHECK-NEXT: st1w { z5.s }, p0, [x8] 320; CHECK-NEXT: st1w { z4.s }, p0, [x8] 321; CHECK-NEXT: st1w { z2.s }, p0, [x8] 322; CHECK-NEXT: st1w { z0.s }, p0, [x8] 323; CHECK-NEXT: ret 324entry: 325 store volatile <vscale x 4 x float> %s8, ptr %ptr 326 store volatile <vscale x 4 x float> %s9, ptr %ptr 327 store volatile <vscale x 4 x float> %s10, ptr %ptr 328 store volatile <vscale x 4 x float> %s11, ptr %ptr 329 store volatile <vscale x 4 x float> %s12, ptr %ptr 330 store volatile <vscale x 4 x float> %s13, ptr %ptr 331 store volatile <vscale x 4 x float> %s14, ptr %ptr 332 store volatile <vscale x 4 x float> %s15, ptr %ptr 333 store volatile <vscale x 4 x float> %s16, ptr %ptr 334 ret void 335} 336 337; use AAVPCS, SVE register in z0 - z7 used (i32s dont occupy z0 - z7) 338 339define void @aavpcs4(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, i32 %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, <vscale x 4 x i32> %s17, ptr %ptr) nounwind { 340; CHECK-LABEL: aavpcs4: 341; CHECK: // %bb.0: // %entry 342; CHECK-NEXT: ldr x8, [sp] 343; CHECK-NEXT: ptrue p0.s 344; CHECK-NEXT: ldr x9, [sp, #16] 345; CHECK-NEXT: ld1w { z24.s }, p0/z, [x8] 346; CHECK-NEXT: st1w { z0.s }, p0, [x9] 347; CHECK-NEXT: st1w { z1.s }, p0, [x9] 348; CHECK-NEXT: st1w { z2.s }, p0, [x9] 349; CHECK-NEXT: st1w { z3.s }, p0, [x9] 350; CHECK-NEXT: st1w { z4.s }, p0, [x9] 351; CHECK-NEXT: st1w { z5.s }, p0, [x9] 352; CHECK-NEXT: st1w { z6.s }, p0, [x9] 353; CHECK-NEXT: st1w { z7.s }, p0, [x9] 354; CHECK-NEXT: st1w { z24.s }, p0, [x9] 355; CHECK-NEXT: ret 356entry: 357 store volatile <vscale x 4 x i32> %s8, ptr %ptr 358 store volatile <vscale x 4 x i32> %s9, ptr %ptr 359 store volatile <vscale x 4 x i32> %s10, ptr %ptr 360 store volatile <vscale x 4 x i32> %s11, ptr %ptr 361 store volatile <vscale x 4 x i32> %s12, ptr %ptr 362 store volatile <vscale x 4 x i32> %s13, ptr %ptr 363 store volatile <vscale x 4 x i32> %s14, ptr %ptr 364 store volatile <vscale x 4 x i32> %s15, ptr %ptr 365 store volatile <vscale x 4 x i32> %s16, ptr %ptr 366 ret void 367} 368 369; Use AAVPCS, SVE register used in return 370 371define <vscale x 4 x float> @aavpcs5(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, ptr %ptr) nounwind { 372; CHECK-LABEL: aavpcs5: 373; CHECK: // %bb.0: // %entry 374; CHECK-NEXT: ldr x8, [sp] 375; CHECK-NEXT: ptrue p0.s 376; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8] 377; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 378; CHECK-NEXT: ld1w { z2.s }, p0/z, [x7] 379; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1] 380; CHECK-NEXT: ld1w { z4.s }, p0/z, [x6] 381; CHECK-NEXT: ld1w { z5.s }, p0/z, [x5] 382; CHECK-NEXT: ld1w { z6.s }, p0/z, [x2] 383; CHECK-NEXT: ld1w { z7.s }, p0/z, [x4] 384; CHECK-NEXT: ld1w { z24.s }, p0/z, [x3] 385; CHECK-NEXT: ldr x8, [sp, #16] 386; CHECK-NEXT: st1w { z0.s }, p0, [x8] 387; CHECK-NEXT: st1w { z3.s }, p0, [x8] 388; CHECK-NEXT: st1w { z6.s }, p0, [x8] 389; CHECK-NEXT: st1w { z24.s }, p0, [x8] 390; CHECK-NEXT: st1w { z7.s }, p0, [x8] 391; CHECK-NEXT: st1w { z5.s }, p0, [x8] 392; CHECK-NEXT: st1w { z4.s }, p0, [x8] 393; CHECK-NEXT: st1w { z2.s }, p0, [x8] 394; CHECK-NEXT: st1w { z1.s }, p0, [x8] 395; CHECK-NEXT: ret 396entry: 397 store volatile <vscale x 4 x float> %s8, ptr %ptr 398 store volatile <vscale x 4 x float> %s9, ptr %ptr 399 store volatile <vscale x 4 x float> %s10, ptr %ptr 400 store volatile <vscale x 4 x float> %s11, ptr %ptr 401 store volatile <vscale x 4 x float> %s12, ptr %ptr 402 store volatile <vscale x 4 x float> %s13, ptr %ptr 403 store volatile <vscale x 4 x float> %s14, ptr %ptr 404 store volatile <vscale x 4 x float> %s15, ptr %ptr 405 store volatile <vscale x 4 x float> %s16, ptr %ptr 406 ret <vscale x 4 x float> %s8 407} 408 409define void @aapcs1(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, ptr %ptr) nounwind { 410; CHECK-LABEL: aapcs1: 411; CHECK: // %bb.0: // %entry 412; CHECK-NEXT: ldr x8, [sp] 413; CHECK-NEXT: ptrue p0.s 414; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] 415; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] 416; CHECK-NEXT: ld1w { z2.s }, p0/z, [x7] 417; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1] 418; CHECK-NEXT: ld1w { z4.s }, p0/z, [x6] 419; CHECK-NEXT: ld1w { z5.s }, p0/z, [x5] 420; CHECK-NEXT: ld1w { z6.s }, p0/z, [x2] 421; CHECK-NEXT: ld1w { z7.s }, p0/z, [x4] 422; CHECK-NEXT: ld1w { z16.s }, p0/z, [x3] 423; CHECK-NEXT: ldr x8, [sp, #16] 424; CHECK-NEXT: st1w { z1.s }, p0, [x8] 425; CHECK-NEXT: st1w { z3.s }, p0, [x8] 426; CHECK-NEXT: st1w { z6.s }, p0, [x8] 427; CHECK-NEXT: st1w { z16.s }, p0, [x8] 428; CHECK-NEXT: st1w { z7.s }, p0, [x8] 429; CHECK-NEXT: st1w { z5.s }, p0, [x8] 430; CHECK-NEXT: st1w { z4.s }, p0, [x8] 431; CHECK-NEXT: st1w { z2.s }, p0, [x8] 432; CHECK-NEXT: st1w { z0.s }, p0, [x8] 433; CHECK-NEXT: ret 434entry: 435 store volatile <vscale x 4 x float> %s8, ptr %ptr 436 store volatile <vscale x 4 x float> %s9, ptr %ptr 437 store volatile <vscale x 4 x float> %s10, ptr %ptr 438 store volatile <vscale x 4 x float> %s11, ptr %ptr 439 store volatile <vscale x 4 x float> %s12, ptr %ptr 440 store volatile <vscale x 4 x float> %s13, ptr %ptr 441 store volatile <vscale x 4 x float> %s14, ptr %ptr 442 store volatile <vscale x 4 x float> %s15, ptr %ptr 443 store volatile <vscale x 4 x float> %s16, ptr %ptr 444 ret void 445} 446 447declare void @non_sve_callee_high_range(float %f0, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1) 448 449define void @non_sve_caller_non_sve_callee_high_range() { 450; CHECK-LABEL: non_sve_caller_non_sve_callee_high_range: 451; CHECK: // %bb.0: 452; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 453; CHECK-NEXT: addvl sp, sp, #-2 454; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG 455; CHECK-NEXT: .cfi_offset w30, -8 456; CHECK-NEXT: .cfi_offset w29, -16 457; CHECK-NEXT: movi d0, #0000000000000000 458; CHECK-NEXT: fmov s1, #1.00000000 459; CHECK-NEXT: addvl x0, sp, #1 460; CHECK-NEXT: fmov s2, #2.00000000 461; CHECK-NEXT: fmov s3, #3.00000000 462; CHECK-NEXT: mov x1, sp 463; CHECK-NEXT: fmov s4, #4.00000000 464; CHECK-NEXT: fmov s5, #5.00000000 465; CHECK-NEXT: fmov s6, #6.00000000 466; CHECK-NEXT: fmov s7, #7.00000000 467; CHECK-NEXT: bl non_sve_callee_high_range 468; CHECK-NEXT: addvl sp, sp, #2 469; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 470; CHECK-NEXT: ret 471 call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> undef, <vscale x 4 x float> undef) 472 ret void 473} 474 475define void @non_sve_caller_high_range_non_sve_callee_high_range(float %f0, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1) { 476; CHECK-LABEL: non_sve_caller_high_range_non_sve_callee_high_range: 477; CHECK: // %bb.0: 478; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 479; CHECK-NEXT: addvl sp, sp, #-2 480; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG 481; CHECK-NEXT: .cfi_offset w30, -8 482; CHECK-NEXT: .cfi_offset w29, -16 483; CHECK-NEXT: ptrue p0.s 484; CHECK-NEXT: movi d0, #0000000000000000 485; CHECK-NEXT: fmov s1, #1.00000000 486; CHECK-NEXT: fmov s2, #2.00000000 487; CHECK-NEXT: fmov s3, #3.00000000 488; CHECK-NEXT: fmov s4, #4.00000000 489; CHECK-NEXT: ld1w { z16.s }, p0/z, [x0] 490; CHECK-NEXT: ld1w { z17.s }, p0/z, [x1] 491; CHECK-NEXT: addvl x0, sp, #1 492; CHECK-NEXT: fmov s5, #5.00000000 493; CHECK-NEXT: fmov s6, #6.00000000 494; CHECK-NEXT: mov x1, sp 495; CHECK-NEXT: fmov s7, #7.00000000 496; CHECK-NEXT: st1w { z17.s }, p0, [sp] 497; CHECK-NEXT: st1w { z16.s }, p0, [sp, #1, mul vl] 498; CHECK-NEXT: bl non_sve_callee_high_range 499; CHECK-NEXT: addvl sp, sp, #2 500; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 501; CHECK-NEXT: ret 502 call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1) 503 ret void 504} 505 506define <vscale x 4 x float> @sve_caller_non_sve_callee_high_range(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1) { 507; CHECK-LABEL: sve_caller_non_sve_callee_high_range: 508; CHECK: // %bb.0: 509; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 510; CHECK-NEXT: addvl sp, sp, #-18 511; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill 512; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill 513; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill 514; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill 515; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill 516; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill 517; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill 518; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill 519; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill 520; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill 521; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill 522; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill 523; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill 524; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill 525; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill 526; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill 527; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill 528; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill 529; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill 530; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill 531; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill 532; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill 533; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill 534; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill 535; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill 536; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill 537; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill 538; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill 539; CHECK-NEXT: addvl sp, sp, #-3 540; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa8, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 168 * VG 541; CHECK-NEXT: .cfi_offset w30, -8 542; CHECK-NEXT: .cfi_offset w29, -16 543; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG 544; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG 545; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG 546; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG 547; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG 548; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG 549; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG 550; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG 551; CHECK-NEXT: mov z25.d, z0.d 552; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill 553; CHECK-NEXT: movi d0, #0000000000000000 554; CHECK-NEXT: mov z24.d, z1.d 555; CHECK-NEXT: fmov s1, #1.00000000 556; CHECK-NEXT: addvl x0, sp, #2 557; CHECK-NEXT: fmov s2, #2.00000000 558; CHECK-NEXT: fmov s3, #3.00000000 559; CHECK-NEXT: addvl x1, sp, #1 560; CHECK-NEXT: fmov s4, #4.00000000 561; CHECK-NEXT: fmov s5, #5.00000000 562; CHECK-NEXT: fmov s6, #6.00000000 563; CHECK-NEXT: fmov s7, #7.00000000 564; CHECK-NEXT: ptrue p0.s 565; CHECK-NEXT: st1w { z24.s }, p0, [sp, #1, mul vl] 566; CHECK-NEXT: st1w { z25.s }, p0, [sp, #2, mul vl] 567; CHECK-NEXT: bl non_sve_callee_high_range 568; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload 569; CHECK-NEXT: addvl sp, sp, #3 570; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload 571; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload 572; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload 573; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload 574; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload 575; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload 576; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload 577; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload 578; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload 579; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload 580; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload 581; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload 582; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload 583; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload 584; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload 585; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload 586; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload 587; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload 588; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload 589; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload 590; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload 591; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload 592; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload 593; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload 594; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload 595; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload 596; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload 597; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload 598; CHECK-NEXT: addvl sp, sp, #18 599; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 600; CHECK-NEXT: ret 601 call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> %v0, <vscale x 4 x float> %v1) 602 ret <vscale x 4 x float> %v0 603} 604 605define <vscale x 4 x float> @sve_ret_caller_non_sve_callee_high_range() { 606; CHECK-LABEL: sve_ret_caller_non_sve_callee_high_range: 607; CHECK: // %bb.0: 608; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 609; CHECK-NEXT: addvl sp, sp, #-18 610; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill 611; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill 612; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill 613; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill 614; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill 615; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill 616; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill 617; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill 618; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill 619; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill 620; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill 621; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill 622; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill 623; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill 624; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill 625; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill 626; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill 627; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill 628; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill 629; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill 630; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill 631; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill 632; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill 633; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill 634; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill 635; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill 636; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill 637; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill 638; CHECK-NEXT: addvl sp, sp, #-2 639; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG 640; CHECK-NEXT: .cfi_offset w30, -8 641; CHECK-NEXT: .cfi_offset w29, -16 642; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG 643; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG 644; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG 645; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG 646; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG 647; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG 648; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG 649; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG 650; CHECK-NEXT: movi d0, #0000000000000000 651; CHECK-NEXT: fmov s1, #1.00000000 652; CHECK-NEXT: addvl x0, sp, #1 653; CHECK-NEXT: fmov s2, #2.00000000 654; CHECK-NEXT: fmov s3, #3.00000000 655; CHECK-NEXT: mov x1, sp 656; CHECK-NEXT: fmov s4, #4.00000000 657; CHECK-NEXT: fmov s5, #5.00000000 658; CHECK-NEXT: fmov s6, #6.00000000 659; CHECK-NEXT: fmov s7, #7.00000000 660; CHECK-NEXT: bl non_sve_callee_high_range 661; CHECK-NEXT: addvl sp, sp, #2 662; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload 663; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload 664; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload 665; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload 666; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload 667; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload 668; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload 669; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload 670; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload 671; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload 672; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload 673; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload 674; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload 675; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload 676; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload 677; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload 678; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload 679; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload 680; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload 681; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload 682; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload 683; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload 684; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload 685; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload 686; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload 687; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload 688; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload 689; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload 690; CHECK-NEXT: addvl sp, sp, #18 691; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 692; CHECK-NEXT: ret 693 call void @non_sve_callee_high_range(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, <vscale x 4 x float> undef, <vscale x 4 x float> undef) 694 ret <vscale x 4 x float> undef 695} 696 697declare void @func_f8_and_v0_passed_via_memory(float %f0, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, float %f8, <vscale x 4 x float> %v0) 698define void @verify_all_operands_are_initialised() { 699; CHECK-LABEL: verify_all_operands_are_initialised: 700; CHECK: // %bb.0: 701; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 702; CHECK-NEXT: sub sp, sp, #16 703; CHECK-NEXT: addvl sp, sp, #-1 704; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG 705; CHECK-NEXT: .cfi_offset w30, -8 706; CHECK-NEXT: .cfi_offset w29, -16 707; CHECK-NEXT: movi d0, #0000000000000000 708; CHECK-NEXT: fmov z16.s, #9.00000000 709; CHECK-NEXT: add x8, sp, #16 710; CHECK-NEXT: ptrue p0.s 711; CHECK-NEXT: fmov s1, #1.00000000 712; CHECK-NEXT: fmov s2, #2.00000000 713; CHECK-NEXT: fmov s3, #3.00000000 714; CHECK-NEXT: add x0, sp, #16 715; CHECK-NEXT: fmov s4, #4.00000000 716; CHECK-NEXT: fmov s5, #5.00000000 717; CHECK-NEXT: st1w { z16.s }, p0, [x8] 718; CHECK-NEXT: mov w8, #1090519040 // =0x41000000 719; CHECK-NEXT: fmov s6, #6.00000000 720; CHECK-NEXT: fmov s7, #7.00000000 721; CHECK-NEXT: str w8, [sp] 722; CHECK-NEXT: bl func_f8_and_v0_passed_via_memory 723; CHECK-NEXT: addvl sp, sp, #1 724; CHECK-NEXT: add sp, sp, #16 725; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 726; CHECK-NEXT: ret 727 call void @func_f8_and_v0_passed_via_memory(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, <vscale x 4 x float> splat (float 9.000000e+00)) 728 ret void 729} 730 731declare float @callee1(float, <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 2 x double>) 732declare float @callee2(i32, i32, i32, i32, i32, i32, i32, i32, float, <vscale x 8 x double>, <vscale x 8 x double>) 733declare float @callee3(float, float, <vscale x 8 x double>, <vscale x 6 x double>, <vscale x 2 x double>) 734 735declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) 736declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>) 737declare {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1>, ptr) 738declare {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1>, ptr) 739declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1>, ptr) 740declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) 741declare <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nx2f64(<vscale x 8 x double>, <vscale x 2 x double>, i64) 742declare <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nx2f64(<vscale x 6 x double>, <vscale x 2 x double>, i64) 743