1; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s 2; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=FP-CHECK 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=NO-SVE-CHECK 4; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs -enable-machine-outliner < %s | FileCheck %s --check-prefix=OUTLINER-CHECK 5 6declare void @callee(); 7declare void @fixed_callee(<4 x i32>); 8declare void @scalable_callee(<vscale x 2 x i64>); 9 10declare void @streaming_callee() #0; 11declare void @streaming_callee_with_arg(i32) #0; 12 13; Simple example of a function with one call requiring a streaming mode change 14; 15define void @vg_unwind_simple() #0 { 16; CHECK-LABEL: vg_unwind_simple: 17; CHECK: // %bb.0: 18; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill 19; CHECK-NEXT: .cfi_def_cfa_offset 80 20; CHECK-NEXT: cntd x9 21; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 22; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 23; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 24; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 25; CHECK-NEXT: .cfi_offset w30, -16 26; CHECK-NEXT: .cfi_offset b8, -24 27; CHECK-NEXT: .cfi_offset b9, -32 28; CHECK-NEXT: .cfi_offset b10, -40 29; CHECK-NEXT: .cfi_offset b11, -48 30; CHECK-NEXT: .cfi_offset b12, -56 31; CHECK-NEXT: .cfi_offset b13, -64 32; CHECK-NEXT: .cfi_offset b14, -72 33; CHECK-NEXT: .cfi_offset b15, -80 34; CHECK-NEXT: .cfi_offset vg, -8 35; CHECK-NEXT: smstop sm 36; CHECK-NEXT: bl callee 37; CHECK-NEXT: smstart sm 38; CHECK-NEXT: .cfi_restore vg 39; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 40; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 41; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 42; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 43; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload 44; CHECK-NEXT: .cfi_def_cfa_offset 0 45; CHECK-NEXT: .cfi_restore w30 46; CHECK-NEXT: .cfi_restore b8 47; CHECK-NEXT: .cfi_restore b9 48; CHECK-NEXT: .cfi_restore b10 49; CHECK-NEXT: .cfi_restore b11 50; CHECK-NEXT: .cfi_restore b12 51; CHECK-NEXT: .cfi_restore b13 52; CHECK-NEXT: .cfi_restore b14 53; CHECK-NEXT: .cfi_restore b15 54; CHECK-NEXT: ret 55; 56; FP-CHECK-LABEL: vg_unwind_simple: 57; FP-CHECK: // %bb.0: 58; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 59; FP-CHECK-NEXT: .cfi_def_cfa_offset 96 60; FP-CHECK-NEXT: cntd x9 61; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 62; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 63; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 64; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 65; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill 66; FP-CHECK-NEXT: add x29, sp, #64 67; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 68; FP-CHECK-NEXT: .cfi_offset w30, -24 69; FP-CHECK-NEXT: .cfi_offset w29, -32 70; FP-CHECK-NEXT: .cfi_offset b8, -40 71; FP-CHECK-NEXT: .cfi_offset b9, -48 72; FP-CHECK-NEXT: .cfi_offset b10, -56 73; FP-CHECK-NEXT: .cfi_offset b11, -64 74; FP-CHECK-NEXT: .cfi_offset b12, -72 75; FP-CHECK-NEXT: .cfi_offset b13, -80 76; FP-CHECK-NEXT: .cfi_offset b14, -88 77; FP-CHECK-NEXT: .cfi_offset b15, -96 78; FP-CHECK-NEXT: .cfi_offset vg, -16 79; FP-CHECK-NEXT: smstop sm 80; FP-CHECK-NEXT: bl callee 81; FP-CHECK-NEXT: smstart sm 82; FP-CHECK-NEXT: .cfi_restore vg 83; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 84; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 85; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 86; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 87; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 88; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 89; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 90; FP-CHECK-NEXT: .cfi_restore w30 91; FP-CHECK-NEXT: .cfi_restore w29 92; FP-CHECK-NEXT: .cfi_restore b8 93; FP-CHECK-NEXT: .cfi_restore b9 94; FP-CHECK-NEXT: .cfi_restore b10 95; FP-CHECK-NEXT: .cfi_restore b11 96; FP-CHECK-NEXT: .cfi_restore b12 97; FP-CHECK-NEXT: .cfi_restore b13 98; FP-CHECK-NEXT: .cfi_restore b14 99; FP-CHECK-NEXT: .cfi_restore b15 100; FP-CHECK-NEXT: ret 101; 102; OUTLINER-CHECK-LABEL: vg_unwind_simple: 103; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ 104; 105 call void @callee(); 106 ret void; 107} 108 109; As above, with an extra register clobbered by the inline asm call which 110; changes NeedsGapToAlignStack to false 111; 112define void @vg_unwind_needs_gap() #0 { 113; CHECK-LABEL: vg_unwind_needs_gap: 114; CHECK: // %bb.0: 115; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 116; CHECK-NEXT: .cfi_def_cfa_offset 96 117; CHECK-NEXT: cntd x9 118; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 119; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 120; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 121; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 122; CHECK-NEXT: str x20, [sp, #80] // 8-byte Folded Spill 123; CHECK-NEXT: .cfi_offset w20, -16 124; CHECK-NEXT: .cfi_offset w30, -32 125; CHECK-NEXT: .cfi_offset b8, -40 126; CHECK-NEXT: .cfi_offset b9, -48 127; CHECK-NEXT: .cfi_offset b10, -56 128; CHECK-NEXT: .cfi_offset b11, -64 129; CHECK-NEXT: .cfi_offset b12, -72 130; CHECK-NEXT: .cfi_offset b13, -80 131; CHECK-NEXT: .cfi_offset b14, -88 132; CHECK-NEXT: .cfi_offset b15, -96 133; CHECK-NEXT: //APP 134; CHECK-NEXT: //NO_APP 135; CHECK-NEXT: .cfi_offset vg, -24 136; CHECK-NEXT: smstop sm 137; CHECK-NEXT: bl callee 138; CHECK-NEXT: smstart sm 139; CHECK-NEXT: .cfi_restore vg 140; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 141; CHECK-NEXT: ldr x20, [sp, #80] // 8-byte Folded Reload 142; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 143; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 144; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 145; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 146; CHECK-NEXT: .cfi_def_cfa_offset 0 147; CHECK-NEXT: .cfi_restore w20 148; CHECK-NEXT: .cfi_restore w30 149; CHECK-NEXT: .cfi_restore b8 150; CHECK-NEXT: .cfi_restore b9 151; CHECK-NEXT: .cfi_restore b10 152; CHECK-NEXT: .cfi_restore b11 153; CHECK-NEXT: .cfi_restore b12 154; CHECK-NEXT: .cfi_restore b13 155; CHECK-NEXT: .cfi_restore b14 156; CHECK-NEXT: .cfi_restore b15 157; CHECK-NEXT: ret 158; 159; FP-CHECK-LABEL: vg_unwind_needs_gap: 160; FP-CHECK: // %bb.0: 161; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 162; FP-CHECK-NEXT: .cfi_def_cfa_offset 96 163; FP-CHECK-NEXT: cntd x9 164; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 165; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 166; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 167; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 168; FP-CHECK-NEXT: stp x9, x20, [sp, #80] // 16-byte Folded Spill 169; FP-CHECK-NEXT: add x29, sp, #64 170; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 171; FP-CHECK-NEXT: .cfi_offset w20, -8 172; FP-CHECK-NEXT: .cfi_offset w30, -24 173; FP-CHECK-NEXT: .cfi_offset w29, -32 174; FP-CHECK-NEXT: .cfi_offset b8, -40 175; FP-CHECK-NEXT: .cfi_offset b9, -48 176; FP-CHECK-NEXT: .cfi_offset b10, -56 177; FP-CHECK-NEXT: .cfi_offset b11, -64 178; FP-CHECK-NEXT: .cfi_offset b12, -72 179; FP-CHECK-NEXT: .cfi_offset b13, -80 180; FP-CHECK-NEXT: .cfi_offset b14, -88 181; FP-CHECK-NEXT: .cfi_offset b15, -96 182; FP-CHECK-NEXT: //APP 183; FP-CHECK-NEXT: //NO_APP 184; FP-CHECK-NEXT: .cfi_offset vg, -16 185; FP-CHECK-NEXT: smstop sm 186; FP-CHECK-NEXT: bl callee 187; FP-CHECK-NEXT: smstart sm 188; FP-CHECK-NEXT: .cfi_restore vg 189; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 190; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 191; FP-CHECK-NEXT: ldr x20, [sp, #88] // 8-byte Folded Reload 192; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 193; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 194; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 195; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 196; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 197; FP-CHECK-NEXT: .cfi_restore w20 198; FP-CHECK-NEXT: .cfi_restore w30 199; FP-CHECK-NEXT: .cfi_restore w29 200; FP-CHECK-NEXT: .cfi_restore b8 201; FP-CHECK-NEXT: .cfi_restore b9 202; FP-CHECK-NEXT: .cfi_restore b10 203; FP-CHECK-NEXT: .cfi_restore b11 204; FP-CHECK-NEXT: .cfi_restore b12 205; FP-CHECK-NEXT: .cfi_restore b13 206; FP-CHECK-NEXT: .cfi_restore b14 207; FP-CHECK-NEXT: .cfi_restore b15 208; FP-CHECK-NEXT: ret 209; 210; OUTLINER-CHECK-LABEL: vg_unwind_needs_gap: 211; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ 212; 213 call void asm sideeffect "", "~{x20}"() 214 call void @callee(); 215 ret void; 216} 217 218define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 { 219; CHECK-LABEL: vg_unwind_with_fixed_args: 220; CHECK: // %bb.0: 221; CHECK-NEXT: sub sp, sp, #96 222; CHECK-NEXT: .cfi_def_cfa_offset 96 223; CHECK-NEXT: cntd x9 224; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill 225; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill 226; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill 227; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill 228; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill 229; CHECK-NEXT: .cfi_offset w30, -16 230; CHECK-NEXT: .cfi_offset b8, -24 231; CHECK-NEXT: .cfi_offset b9, -32 232; CHECK-NEXT: .cfi_offset b10, -40 233; CHECK-NEXT: .cfi_offset b11, -48 234; CHECK-NEXT: .cfi_offset b12, -56 235; CHECK-NEXT: .cfi_offset b13, -64 236; CHECK-NEXT: .cfi_offset b14, -72 237; CHECK-NEXT: .cfi_offset b15, -80 238; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill 239; CHECK-NEXT: .cfi_offset vg, -8 240; CHECK-NEXT: smstop sm 241; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload 242; CHECK-NEXT: bl fixed_callee 243; CHECK-NEXT: smstart sm 244; CHECK-NEXT: .cfi_restore vg 245; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload 246; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload 247; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload 248; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload 249; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload 250; CHECK-NEXT: add sp, sp, #96 251; CHECK-NEXT: .cfi_def_cfa_offset 0 252; CHECK-NEXT: .cfi_restore w30 253; CHECK-NEXT: .cfi_restore b8 254; CHECK-NEXT: .cfi_restore b9 255; CHECK-NEXT: .cfi_restore b10 256; CHECK-NEXT: .cfi_restore b11 257; CHECK-NEXT: .cfi_restore b12 258; CHECK-NEXT: .cfi_restore b13 259; CHECK-NEXT: .cfi_restore b14 260; CHECK-NEXT: .cfi_restore b15 261; CHECK-NEXT: ret 262; 263; FP-CHECK-LABEL: vg_unwind_with_fixed_args: 264; FP-CHECK: // %bb.0: 265; FP-CHECK-NEXT: sub sp, sp, #112 266; FP-CHECK-NEXT: .cfi_def_cfa_offset 112 267; FP-CHECK-NEXT: cntd x9 268; FP-CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill 269; FP-CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill 270; FP-CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill 271; FP-CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill 272; FP-CHECK-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill 273; FP-CHECK-NEXT: str x9, [sp, #96] // 8-byte Folded Spill 274; FP-CHECK-NEXT: add x29, sp, #80 275; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 276; FP-CHECK-NEXT: .cfi_offset w30, -24 277; FP-CHECK-NEXT: .cfi_offset w29, -32 278; FP-CHECK-NEXT: .cfi_offset b8, -40 279; FP-CHECK-NEXT: .cfi_offset b9, -48 280; FP-CHECK-NEXT: .cfi_offset b10, -56 281; FP-CHECK-NEXT: .cfi_offset b11, -64 282; FP-CHECK-NEXT: .cfi_offset b12, -72 283; FP-CHECK-NEXT: .cfi_offset b13, -80 284; FP-CHECK-NEXT: .cfi_offset b14, -88 285; FP-CHECK-NEXT: .cfi_offset b15, -96 286; FP-CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill 287; FP-CHECK-NEXT: .cfi_offset vg, -16 288; FP-CHECK-NEXT: smstop sm 289; FP-CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload 290; FP-CHECK-NEXT: bl fixed_callee 291; FP-CHECK-NEXT: smstart sm 292; FP-CHECK-NEXT: .cfi_restore vg 293; FP-CHECK-NEXT: .cfi_def_cfa wsp, 112 294; FP-CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload 295; FP-CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload 296; FP-CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload 297; FP-CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload 298; FP-CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload 299; FP-CHECK-NEXT: add sp, sp, #112 300; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 301; FP-CHECK-NEXT: .cfi_restore w30 302; FP-CHECK-NEXT: .cfi_restore w29 303; FP-CHECK-NEXT: .cfi_restore b8 304; FP-CHECK-NEXT: .cfi_restore b9 305; FP-CHECK-NEXT: .cfi_restore b10 306; FP-CHECK-NEXT: .cfi_restore b11 307; FP-CHECK-NEXT: .cfi_restore b12 308; FP-CHECK-NEXT: .cfi_restore b13 309; FP-CHECK-NEXT: .cfi_restore b14 310; FP-CHECK-NEXT: .cfi_restore b15 311; FP-CHECK-NEXT: ret 312; 313; OUTLINER-CHECK-LABEL: vg_unwind_with_fixed_args: 314; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ 315; 316 call void @fixed_callee(<4 x i32> %x); 317 ret void; 318} 319 320define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 { 321; CHECK-LABEL: vg_unwind_with_sve_args: 322; CHECK: // %bb.0: 323; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 324; CHECK-NEXT: .cfi_def_cfa_offset 32 325; CHECK-NEXT: cntd x9 326; CHECK-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill 327; CHECK-NEXT: .cfi_offset w28, -8 328; CHECK-NEXT: .cfi_offset w30, -24 329; CHECK-NEXT: .cfi_offset w29, -32 330; CHECK-NEXT: addvl sp, sp, #-18 331; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG 332; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill 333; CHECK-NEXT: ptrue pn8.b 334; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill 335; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill 336; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill 337; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill 338; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill 339; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill 340; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill 341; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill 342; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill 343; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill 344; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill 345; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill 346; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill 347; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill 348; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill 349; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill 350; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill 351; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill 352; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill 353; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill 354; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG 355; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG 356; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG 357; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 32 - 32 * VG 358; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 32 - 40 * VG 359; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG 360; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG 361; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG 362; CHECK-NEXT: addvl sp, sp, #-1 363; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG 364; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill 365; CHECK-NEXT: //APP 366; CHECK-NEXT: //NO_APP 367; CHECK-NEXT: .cfi_offset vg, -16 368; CHECK-NEXT: smstop sm 369; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload 370; CHECK-NEXT: bl scalable_callee 371; CHECK-NEXT: smstart sm 372; CHECK-NEXT: .cfi_restore vg 373; CHECK-NEXT: addvl sp, sp, #1 374; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG 375; CHECK-NEXT: ptrue pn8.b 376; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload 377; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload 378; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload 379; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload 380; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload 381; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload 382; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload 383; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload 384; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload 385; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload 386; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload 387; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload 388; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload 389; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload 390; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload 391; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload 392; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload 393; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload 394; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload 395; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload 396; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload 397; CHECK-NEXT: addvl sp, sp, #18 398; CHECK-NEXT: .cfi_def_cfa wsp, 32 399; CHECK-NEXT: .cfi_restore z8 400; CHECK-NEXT: .cfi_restore z9 401; CHECK-NEXT: .cfi_restore z10 402; CHECK-NEXT: .cfi_restore z11 403; CHECK-NEXT: .cfi_restore z12 404; CHECK-NEXT: .cfi_restore z13 405; CHECK-NEXT: .cfi_restore z14 406; CHECK-NEXT: .cfi_restore z15 407; CHECK-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload 408; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 409; CHECK-NEXT: .cfi_def_cfa_offset 0 410; CHECK-NEXT: .cfi_restore w28 411; CHECK-NEXT: .cfi_restore w30 412; CHECK-NEXT: .cfi_restore w29 413; CHECK-NEXT: ret 414; 415; FP-CHECK-LABEL: vg_unwind_with_sve_args: 416; FP-CHECK: // %bb.0: 417; FP-CHECK-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill 418; FP-CHECK-NEXT: .cfi_def_cfa_offset 48 419; FP-CHECK-NEXT: cntd x9 420; FP-CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill 421; FP-CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill 422; FP-CHECK-NEXT: mov x29, sp 423; FP-CHECK-NEXT: .cfi_def_cfa w29, 48 424; FP-CHECK-NEXT: .cfi_offset w27, -8 425; FP-CHECK-NEXT: .cfi_offset w28, -16 426; FP-CHECK-NEXT: .cfi_offset w30, -40 427; FP-CHECK-NEXT: .cfi_offset w29, -48 428; FP-CHECK-NEXT: addvl sp, sp, #-18 429; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill 430; FP-CHECK-NEXT: ptrue pn8.b 431; FP-CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill 432; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill 433; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill 434; FP-CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill 435; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill 436; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill 437; FP-CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill 438; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill 439; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill 440; FP-CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill 441; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill 442; FP-CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill 443; FP-CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill 444; FP-CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill 445; FP-CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill 446; FP-CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill 447; FP-CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill 448; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill 449; FP-CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill 450; FP-CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill 451; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG 452; FP-CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG 453; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG 454; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG 455; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG 456; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG 457; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG 458; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG 459; FP-CHECK-NEXT: addvl sp, sp, #-1 460; FP-CHECK-NEXT: str z0, [x29, #-19, mul vl] // 16-byte Folded Spill 461; FP-CHECK-NEXT: //APP 462; FP-CHECK-NEXT: //NO_APP 463; FP-CHECK-NEXT: .cfi_offset vg, -32 464; FP-CHECK-NEXT: smstop sm 465; FP-CHECK-NEXT: ldr z0, [x29, #-19, mul vl] // 16-byte Folded Reload 466; FP-CHECK-NEXT: bl scalable_callee 467; FP-CHECK-NEXT: smstart sm 468; FP-CHECK-NEXT: .cfi_restore vg 469; FP-CHECK-NEXT: addvl sp, sp, #1 470; FP-CHECK-NEXT: ptrue pn8.b 471; FP-CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload 472; FP-CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload 473; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload 474; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload 475; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload 476; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload 477; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload 478; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload 479; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload 480; FP-CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload 481; FP-CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload 482; FP-CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload 483; FP-CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload 484; FP-CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload 485; FP-CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload 486; FP-CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload 487; FP-CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload 488; FP-CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload 489; FP-CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload 490; FP-CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload 491; FP-CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload 492; FP-CHECK-NEXT: addvl sp, sp, #18 493; FP-CHECK-NEXT: .cfi_restore z8 494; FP-CHECK-NEXT: .cfi_restore z9 495; FP-CHECK-NEXT: .cfi_restore z10 496; FP-CHECK-NEXT: .cfi_restore z11 497; FP-CHECK-NEXT: .cfi_restore z12 498; FP-CHECK-NEXT: .cfi_restore z13 499; FP-CHECK-NEXT: .cfi_restore z14 500; FP-CHECK-NEXT: .cfi_restore z15 501; FP-CHECK-NEXT: .cfi_def_cfa wsp, 48 502; FP-CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload 503; FP-CHECK-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload 504; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 505; FP-CHECK-NEXT: .cfi_restore w27 506; FP-CHECK-NEXT: .cfi_restore w28 507; FP-CHECK-NEXT: .cfi_restore w30 508; FP-CHECK-NEXT: .cfi_restore w29 509; FP-CHECK-NEXT: ret 510; 511; OUTLINER-CHECK-LABEL: vg_unwind_with_sve_args: 512; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ 513; 514 call void asm sideeffect "", "~{x28}"() 515 call void @scalable_callee(<vscale x 2 x i64> %x); 516 ret void; 517} 518 519; This test was based on stack-probing-64k.ll and tries to test multiple uses of 520; findScratchNonCalleeSaveRegister. 521; 522define void @vg_unwind_multiple_scratch_regs(ptr %out) #1 { 523; CHECK-LABEL: vg_unwind_multiple_scratch_regs: 524; CHECK: // %bb.0: // %entry 525; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 526; CHECK-NEXT: .cfi_def_cfa_offset 96 527; CHECK-NEXT: cntd x9 528; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 529; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 530; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 531; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 532; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill 533; CHECK-NEXT: .cfi_offset w30, -24 534; CHECK-NEXT: .cfi_offset w29, -32 535; CHECK-NEXT: .cfi_offset b8, -40 536; CHECK-NEXT: .cfi_offset b9, -48 537; CHECK-NEXT: .cfi_offset b10, -56 538; CHECK-NEXT: .cfi_offset b11, -64 539; CHECK-NEXT: .cfi_offset b12, -72 540; CHECK-NEXT: .cfi_offset b13, -80 541; CHECK-NEXT: .cfi_offset b14, -88 542; CHECK-NEXT: .cfi_offset b15, -96 543; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 544; CHECK-NEXT: .cfi_def_cfa w9, 327776 545; CHECK-NEXT: .LBB4_1: // %entry 546; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 547; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 548; CHECK-NEXT: cmp sp, x9 549; CHECK-NEXT: str xzr, [sp] 550; CHECK-NEXT: b.ne .LBB4_1 551; CHECK-NEXT: // %bb.2: // %entry 552; CHECK-NEXT: .cfi_def_cfa_register wsp 553; CHECK-NEXT: mov x8, sp 554; CHECK-NEXT: str x8, [x0] 555; CHECK-NEXT: .cfi_offset vg, -16 556; CHECK-NEXT: smstop sm 557; CHECK-NEXT: bl callee 558; CHECK-NEXT: smstart sm 559; CHECK-NEXT: .cfi_restore vg 560; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 561; CHECK-NEXT: .cfi_def_cfa_offset 96 562; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 563; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 564; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 565; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 566; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 567; CHECK-NEXT: .cfi_def_cfa_offset 0 568; CHECK-NEXT: .cfi_restore w30 569; CHECK-NEXT: .cfi_restore w29 570; CHECK-NEXT: .cfi_restore b8 571; CHECK-NEXT: .cfi_restore b9 572; CHECK-NEXT: .cfi_restore b10 573; CHECK-NEXT: .cfi_restore b11 574; CHECK-NEXT: .cfi_restore b12 575; CHECK-NEXT: .cfi_restore b13 576; CHECK-NEXT: .cfi_restore b14 577; CHECK-NEXT: .cfi_restore b15 578; CHECK-NEXT: ret 579; 580; FP-CHECK-LABEL: vg_unwind_multiple_scratch_regs: 581; FP-CHECK: // %bb.0: // %entry 582; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 583; FP-CHECK-NEXT: .cfi_def_cfa_offset 96 584; FP-CHECK-NEXT: cntd x9 585; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 586; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 587; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 588; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 589; FP-CHECK-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill 590; FP-CHECK-NEXT: add x29, sp, #64 591; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 592; FP-CHECK-NEXT: .cfi_offset w28, -8 593; FP-CHECK-NEXT: .cfi_offset w30, -24 594; FP-CHECK-NEXT: .cfi_offset w29, -32 595; FP-CHECK-NEXT: .cfi_offset b8, -40 596; FP-CHECK-NEXT: .cfi_offset b9, -48 597; FP-CHECK-NEXT: .cfi_offset b10, -56 598; FP-CHECK-NEXT: .cfi_offset b11, -64 599; FP-CHECK-NEXT: .cfi_offset b12, -72 600; FP-CHECK-NEXT: .cfi_offset b13, -80 601; FP-CHECK-NEXT: .cfi_offset b14, -88 602; FP-CHECK-NEXT: .cfi_offset b15, -96 603; FP-CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 604; FP-CHECK-NEXT: .LBB4_1: // %entry 605; FP-CHECK-NEXT: // =>This Inner Loop Header: Depth=1 606; FP-CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 607; FP-CHECK-NEXT: cmp sp, x9 608; FP-CHECK-NEXT: str xzr, [sp] 609; FP-CHECK-NEXT: b.ne .LBB4_1 610; FP-CHECK-NEXT: // %bb.2: // %entry 611; FP-CHECK-NEXT: mov x8, sp 612; FP-CHECK-NEXT: str x8, [x0] 613; FP-CHECK-NEXT: .cfi_offset vg, -16 614; FP-CHECK-NEXT: smstop sm 615; FP-CHECK-NEXT: bl callee 616; FP-CHECK-NEXT: smstart sm 617; FP-CHECK-NEXT: .cfi_restore vg 618; FP-CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 619; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 620; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 621; FP-CHECK-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload 622; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 623; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 624; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 625; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 626; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 627; FP-CHECK-NEXT: .cfi_restore w28 628; FP-CHECK-NEXT: .cfi_restore w30 629; FP-CHECK-NEXT: .cfi_restore w29 630; FP-CHECK-NEXT: .cfi_restore b8 631; FP-CHECK-NEXT: .cfi_restore b9 632; FP-CHECK-NEXT: .cfi_restore b10 633; FP-CHECK-NEXT: .cfi_restore b11 634; FP-CHECK-NEXT: .cfi_restore b12 635; FP-CHECK-NEXT: .cfi_restore b13 636; FP-CHECK-NEXT: .cfi_restore b14 637; FP-CHECK-NEXT: .cfi_restore b15 638; FP-CHECK-NEXT: ret 639; 640; OUTLINER-CHECK-LABEL: vg_unwind_multiple_scratch_regs: 641; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ 642; 643entry: 644 %v = alloca i8, i64 327680, align 1 645 store ptr %v, ptr %out, align 8 646 call void @callee() 647 ret void 648} 649 650; Locally streaming functions require storing both the streaming and 651; non-streaming values of VG. 652; 653define void @vg_locally_streaming_fn() #3 { 654; CHECK-LABEL: vg_locally_streaming_fn: 655; CHECK: // %bb.0: 656; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 657; CHECK-NEXT: .cfi_def_cfa_offset 96 658; CHECK-NEXT: rdsvl x9, #1 659; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 660; CHECK-NEXT: lsr x9, x9, #3 661; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 662; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 663; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 664; CHECK-NEXT: cntd x9 665; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill 666; CHECK-NEXT: .cfi_offset vg, -16 667; CHECK-NEXT: .cfi_offset w30, -32 668; CHECK-NEXT: .cfi_offset b8, -40 669; CHECK-NEXT: .cfi_offset b9, -48 670; CHECK-NEXT: .cfi_offset b10, -56 671; CHECK-NEXT: .cfi_offset b11, -64 672; CHECK-NEXT: .cfi_offset b12, -72 673; CHECK-NEXT: .cfi_offset b13, -80 674; CHECK-NEXT: .cfi_offset b14, -88 675; CHECK-NEXT: .cfi_offset b15, -96 676; CHECK-NEXT: bl callee 677; CHECK-NEXT: smstart sm 678; CHECK-NEXT: .cfi_restore vg 679; CHECK-NEXT: bl streaming_callee 680; CHECK-NEXT: .cfi_offset vg, -24 681; CHECK-NEXT: smstop sm 682; CHECK-NEXT: bl callee 683; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 684; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 685; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 686; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 687; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 688; CHECK-NEXT: .cfi_def_cfa_offset 0 689; CHECK-NEXT: .cfi_restore w30 690; CHECK-NEXT: .cfi_restore b8 691; CHECK-NEXT: .cfi_restore b9 692; CHECK-NEXT: .cfi_restore b10 693; CHECK-NEXT: .cfi_restore b11 694; CHECK-NEXT: .cfi_restore b12 695; CHECK-NEXT: .cfi_restore b13 696; CHECK-NEXT: .cfi_restore b14 697; CHECK-NEXT: .cfi_restore b15 698; CHECK-NEXT: ret 699; 700; FP-CHECK-LABEL: vg_locally_streaming_fn: 701; FP-CHECK: // %bb.0: 702; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 703; FP-CHECK-NEXT: .cfi_def_cfa_offset 96 704; FP-CHECK-NEXT: rdsvl x9, #1 705; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 706; FP-CHECK-NEXT: lsr x9, x9, #3 707; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 708; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 709; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill 710; FP-CHECK-NEXT: cntd x9 711; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 712; FP-CHECK-NEXT: str x9, [sp, #88] // 8-byte Folded Spill 713; FP-CHECK-NEXT: add x29, sp, #64 714; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 715; FP-CHECK-NEXT: .cfi_offset vg, -8 716; FP-CHECK-NEXT: .cfi_offset w30, -24 717; FP-CHECK-NEXT: .cfi_offset w29, -32 718; FP-CHECK-NEXT: .cfi_offset b8, -40 719; FP-CHECK-NEXT: .cfi_offset b9, -48 720; FP-CHECK-NEXT: .cfi_offset b10, -56 721; FP-CHECK-NEXT: .cfi_offset b11, -64 722; FP-CHECK-NEXT: .cfi_offset b12, -72 723; FP-CHECK-NEXT: .cfi_offset b13, -80 724; FP-CHECK-NEXT: .cfi_offset b14, -88 725; FP-CHECK-NEXT: .cfi_offset b15, -96 726; FP-CHECK-NEXT: bl callee 727; FP-CHECK-NEXT: smstart sm 728; FP-CHECK-NEXT: .cfi_restore vg 729; FP-CHECK-NEXT: bl streaming_callee 730; FP-CHECK-NEXT: .cfi_offset vg, -16 731; FP-CHECK-NEXT: smstop sm 732; FP-CHECK-NEXT: bl callee 733; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 734; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 735; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 736; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 737; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 738; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 739; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 740; FP-CHECK-NEXT: .cfi_restore w30 741; FP-CHECK-NEXT: .cfi_restore w29 742; FP-CHECK-NEXT: .cfi_restore b8 743; FP-CHECK-NEXT: .cfi_restore b9 744; FP-CHECK-NEXT: .cfi_restore b10 745; FP-CHECK-NEXT: .cfi_restore b11 746; FP-CHECK-NEXT: .cfi_restore b12 747; FP-CHECK-NEXT: .cfi_restore b13 748; FP-CHECK-NEXT: .cfi_restore b14 749; FP-CHECK-NEXT: .cfi_restore b15 750; FP-CHECK-NEXT: ret 751; 752; OUTLINER-CHECK-LABEL: vg_locally_streaming_fn: 753; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ 754; 755 call void @callee() 756 call void @streaming_callee() 757 call void @callee() 758 ret void 759} 760 761define void @streaming_compatible_to_streaming() #4 { 762; CHECK-LABEL: streaming_compatible_to_streaming: 763; CHECK: // %bb.0: 764; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 765; CHECK-NEXT: .cfi_def_cfa_offset 96 766; CHECK-NEXT: cntd x9 767; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 768; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 769; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 770; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 771; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill 772; CHECK-NEXT: .cfi_offset w19, -16 773; CHECK-NEXT: .cfi_offset w30, -32 774; CHECK-NEXT: .cfi_offset b8, -40 775; CHECK-NEXT: .cfi_offset b9, -48 776; CHECK-NEXT: .cfi_offset b10, -56 777; CHECK-NEXT: .cfi_offset b11, -64 778; CHECK-NEXT: .cfi_offset b12, -72 779; CHECK-NEXT: .cfi_offset b13, -80 780; CHECK-NEXT: .cfi_offset b14, -88 781; CHECK-NEXT: .cfi_offset b15, -96 782; CHECK-NEXT: bl __arm_sme_state 783; CHECK-NEXT: and x19, x0, #0x1 784; CHECK-NEXT: .cfi_offset vg, -24 785; CHECK-NEXT: tbnz w19, #0, .LBB6_2 786; CHECK-NEXT: // %bb.1: 787; CHECK-NEXT: smstart sm 788; CHECK-NEXT: .LBB6_2: 789; CHECK-NEXT: bl streaming_callee 790; CHECK-NEXT: tbnz w19, #0, .LBB6_4 791; CHECK-NEXT: // %bb.3: 792; CHECK-NEXT: smstop sm 793; CHECK-NEXT: .LBB6_4: 794; CHECK-NEXT: .cfi_restore vg 795; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 796; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload 797; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 798; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 799; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 800; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 801; CHECK-NEXT: .cfi_def_cfa_offset 0 802; CHECK-NEXT: .cfi_restore w19 803; CHECK-NEXT: .cfi_restore w30 804; CHECK-NEXT: .cfi_restore b8 805; CHECK-NEXT: .cfi_restore b9 806; CHECK-NEXT: .cfi_restore b10 807; CHECK-NEXT: .cfi_restore b11 808; CHECK-NEXT: .cfi_restore b12 809; CHECK-NEXT: .cfi_restore b13 810; CHECK-NEXT: .cfi_restore b14 811; CHECK-NEXT: .cfi_restore b15 812; CHECK-NEXT: ret 813; 814; FP-CHECK-LABEL: streaming_compatible_to_streaming: 815; FP-CHECK: // %bb.0: 816; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 817; FP-CHECK-NEXT: .cfi_def_cfa_offset 96 818; FP-CHECK-NEXT: cntd x9 819; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 820; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 821; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 822; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 823; FP-CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill 824; FP-CHECK-NEXT: add x29, sp, #64 825; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 826; FP-CHECK-NEXT: .cfi_offset w19, -8 827; FP-CHECK-NEXT: .cfi_offset w30, -24 828; FP-CHECK-NEXT: .cfi_offset w29, -32 829; FP-CHECK-NEXT: .cfi_offset b8, -40 830; FP-CHECK-NEXT: .cfi_offset b9, -48 831; FP-CHECK-NEXT: .cfi_offset b10, -56 832; FP-CHECK-NEXT: .cfi_offset b11, -64 833; FP-CHECK-NEXT: .cfi_offset b12, -72 834; FP-CHECK-NEXT: .cfi_offset b13, -80 835; FP-CHECK-NEXT: .cfi_offset b14, -88 836; FP-CHECK-NEXT: .cfi_offset b15, -96 837; FP-CHECK-NEXT: bl __arm_sme_state 838; FP-CHECK-NEXT: and x19, x0, #0x1 839; FP-CHECK-NEXT: .cfi_offset vg, -16 840; FP-CHECK-NEXT: tbnz w19, #0, .LBB6_2 841; FP-CHECK-NEXT: // %bb.1: 842; FP-CHECK-NEXT: smstart sm 843; FP-CHECK-NEXT: .LBB6_2: 844; FP-CHECK-NEXT: bl streaming_callee 845; FP-CHECK-NEXT: tbnz w19, #0, .LBB6_4 846; FP-CHECK-NEXT: // %bb.3: 847; FP-CHECK-NEXT: smstop sm 848; FP-CHECK-NEXT: .LBB6_4: 849; FP-CHECK-NEXT: .cfi_restore vg 850; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 851; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 852; FP-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload 853; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 854; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 855; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 856; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 857; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 858; FP-CHECK-NEXT: .cfi_restore w19 859; FP-CHECK-NEXT: .cfi_restore w30 860; FP-CHECK-NEXT: .cfi_restore w29 861; FP-CHECK-NEXT: .cfi_restore b8 862; FP-CHECK-NEXT: .cfi_restore b9 863; FP-CHECK-NEXT: .cfi_restore b10 864; FP-CHECK-NEXT: .cfi_restore b11 865; FP-CHECK-NEXT: .cfi_restore b12 866; FP-CHECK-NEXT: .cfi_restore b13 867; FP-CHECK-NEXT: .cfi_restore b14 868; FP-CHECK-NEXT: .cfi_restore b15 869; FP-CHECK-NEXT: ret 870; 871; OUTLINER-CHECK-LABEL: streaming_compatible_to_streaming: 872; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ 873; 874 call void @streaming_callee() 875 ret void 876} 877 878define void @streaming_compatible_to_non_streaming() #4 { 879; CHECK-LABEL: streaming_compatible_to_non_streaming: 880; CHECK: // %bb.0: 881; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 882; CHECK-NEXT: .cfi_def_cfa_offset 96 883; CHECK-NEXT: cntd x9 884; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 885; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 886; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 887; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 888; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill 889; CHECK-NEXT: .cfi_offset w19, -16 890; CHECK-NEXT: .cfi_offset w30, -32 891; CHECK-NEXT: .cfi_offset b8, -40 892; CHECK-NEXT: .cfi_offset b9, -48 893; CHECK-NEXT: .cfi_offset b10, -56 894; CHECK-NEXT: .cfi_offset b11, -64 895; CHECK-NEXT: .cfi_offset b12, -72 896; CHECK-NEXT: .cfi_offset b13, -80 897; CHECK-NEXT: .cfi_offset b14, -88 898; CHECK-NEXT: .cfi_offset b15, -96 899; CHECK-NEXT: bl __arm_sme_state 900; CHECK-NEXT: and x19, x0, #0x1 901; CHECK-NEXT: .cfi_offset vg, -24 902; CHECK-NEXT: tbz w19, #0, .LBB7_2 903; CHECK-NEXT: // %bb.1: 904; CHECK-NEXT: smstop sm 905; CHECK-NEXT: .LBB7_2: 906; CHECK-NEXT: bl callee 907; CHECK-NEXT: tbz w19, #0, .LBB7_4 908; CHECK-NEXT: // %bb.3: 909; CHECK-NEXT: smstart sm 910; CHECK-NEXT: .LBB7_4: 911; CHECK-NEXT: .cfi_restore vg 912; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 913; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload 914; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 915; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 916; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 917; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 918; CHECK-NEXT: .cfi_def_cfa_offset 0 919; CHECK-NEXT: .cfi_restore w19 920; CHECK-NEXT: .cfi_restore w30 921; CHECK-NEXT: .cfi_restore b8 922; CHECK-NEXT: .cfi_restore b9 923; CHECK-NEXT: .cfi_restore b10 924; CHECK-NEXT: .cfi_restore b11 925; CHECK-NEXT: .cfi_restore b12 926; CHECK-NEXT: .cfi_restore b13 927; CHECK-NEXT: .cfi_restore b14 928; CHECK-NEXT: .cfi_restore b15 929; CHECK-NEXT: ret 930; 931; FP-CHECK-LABEL: streaming_compatible_to_non_streaming: 932; FP-CHECK: // %bb.0: 933; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 934; FP-CHECK-NEXT: .cfi_def_cfa_offset 96 935; FP-CHECK-NEXT: cntd x9 936; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 937; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 938; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 939; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 940; FP-CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill 941; FP-CHECK-NEXT: add x29, sp, #64 942; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 943; FP-CHECK-NEXT: .cfi_offset w19, -8 944; FP-CHECK-NEXT: .cfi_offset w30, -24 945; FP-CHECK-NEXT: .cfi_offset w29, -32 946; FP-CHECK-NEXT: .cfi_offset b8, -40 947; FP-CHECK-NEXT: .cfi_offset b9, -48 948; FP-CHECK-NEXT: .cfi_offset b10, -56 949; FP-CHECK-NEXT: .cfi_offset b11, -64 950; FP-CHECK-NEXT: .cfi_offset b12, -72 951; FP-CHECK-NEXT: .cfi_offset b13, -80 952; FP-CHECK-NEXT: .cfi_offset b14, -88 953; FP-CHECK-NEXT: .cfi_offset b15, -96 954; FP-CHECK-NEXT: bl __arm_sme_state 955; FP-CHECK-NEXT: and x19, x0, #0x1 956; FP-CHECK-NEXT: .cfi_offset vg, -16 957; FP-CHECK-NEXT: tbz w19, #0, .LBB7_2 958; FP-CHECK-NEXT: // %bb.1: 959; FP-CHECK-NEXT: smstop sm 960; FP-CHECK-NEXT: .LBB7_2: 961; FP-CHECK-NEXT: bl callee 962; FP-CHECK-NEXT: tbz w19, #0, .LBB7_4 963; FP-CHECK-NEXT: // %bb.3: 964; FP-CHECK-NEXT: smstart sm 965; FP-CHECK-NEXT: .LBB7_4: 966; FP-CHECK-NEXT: .cfi_restore vg 967; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 968; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 969; FP-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload 970; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 971; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 972; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 973; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 974; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 975; FP-CHECK-NEXT: .cfi_restore w19 976; FP-CHECK-NEXT: .cfi_restore w30 977; FP-CHECK-NEXT: .cfi_restore w29 978; FP-CHECK-NEXT: .cfi_restore b8 979; FP-CHECK-NEXT: .cfi_restore b9 980; FP-CHECK-NEXT: .cfi_restore b10 981; FP-CHECK-NEXT: .cfi_restore b11 982; FP-CHECK-NEXT: .cfi_restore b12 983; FP-CHECK-NEXT: .cfi_restore b13 984; FP-CHECK-NEXT: .cfi_restore b14 985; FP-CHECK-NEXT: .cfi_restore b15 986; FP-CHECK-NEXT: ret 987; 988; OUTLINER-CHECK-LABEL: streaming_compatible_to_non_streaming: 989; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ 990; 991 call void @callee() 992 ret void 993} 994 995; If the target does not have SVE, do not emit cntd in the prologue and 996; instead spill the result returned by __arm_get_current_vg. 997; This requires preserving the argument %x as the vg value is returned 998; in X0. 999; 1000define void @streaming_compatible_no_sve(i32 noundef %x) #4 { 1001; NO-SVE-CHECK-LABEL: streaming_compatible_no_sve: 1002; NO-SVE-CHECK: // %bb.0: 1003; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 1004; NO-SVE-CHECK-NEXT: .cfi_def_cfa_offset 96 1005; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 1006; NO-SVE-CHECK-NEXT: mov x9, x0 1007; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 1008; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 1009; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 1010; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg 1011; NO-SVE-CHECK-NEXT: stp x0, x19, [sp, #80] // 16-byte Folded Spill 1012; NO-SVE-CHECK-NEXT: mov x0, x9 1013; NO-SVE-CHECK-NEXT: add x29, sp, #64 1014; NO-SVE-CHECK-NEXT: .cfi_def_cfa w29, 32 1015; NO-SVE-CHECK-NEXT: .cfi_offset w19, -8 1016; NO-SVE-CHECK-NEXT: .cfi_offset w30, -24 1017; NO-SVE-CHECK-NEXT: .cfi_offset w29, -32 1018; NO-SVE-CHECK-NEXT: .cfi_offset b8, -40 1019; NO-SVE-CHECK-NEXT: .cfi_offset b9, -48 1020; NO-SVE-CHECK-NEXT: .cfi_offset b10, -56 1021; NO-SVE-CHECK-NEXT: .cfi_offset b11, -64 1022; NO-SVE-CHECK-NEXT: .cfi_offset b12, -72 1023; NO-SVE-CHECK-NEXT: .cfi_offset b13, -80 1024; NO-SVE-CHECK-NEXT: .cfi_offset b14, -88 1025; NO-SVE-CHECK-NEXT: .cfi_offset b15, -96 1026; NO-SVE-CHECK-NEXT: mov w8, w0 1027; NO-SVE-CHECK-NEXT: bl __arm_sme_state 1028; NO-SVE-CHECK-NEXT: and x19, x0, #0x1 1029; NO-SVE-CHECK-NEXT: .cfi_offset vg, -16 1030; NO-SVE-CHECK-NEXT: tbnz w19, #0, .LBB8_2 1031; NO-SVE-CHECK-NEXT: // %bb.1: 1032; NO-SVE-CHECK-NEXT: smstart sm 1033; NO-SVE-CHECK-NEXT: .LBB8_2: 1034; NO-SVE-CHECK-NEXT: mov w0, w8 1035; NO-SVE-CHECK-NEXT: bl streaming_callee_with_arg 1036; NO-SVE-CHECK-NEXT: tbnz w19, #0, .LBB8_4 1037; NO-SVE-CHECK-NEXT: // %bb.3: 1038; NO-SVE-CHECK-NEXT: smstop sm 1039; NO-SVE-CHECK-NEXT: .LBB8_4: 1040; NO-SVE-CHECK-NEXT: .cfi_restore vg 1041; NO-SVE-CHECK-NEXT: .cfi_def_cfa wsp, 96 1042; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 1043; NO-SVE-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload 1044; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 1045; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 1046; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 1047; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 1048; NO-SVE-CHECK-NEXT: .cfi_def_cfa_offset 0 1049; NO-SVE-CHECK-NEXT: .cfi_restore w19 1050; NO-SVE-CHECK-NEXT: .cfi_restore w30 1051; NO-SVE-CHECK-NEXT: .cfi_restore w29 1052; NO-SVE-CHECK-NEXT: .cfi_restore b8 1053; NO-SVE-CHECK-NEXT: .cfi_restore b9 1054; NO-SVE-CHECK-NEXT: .cfi_restore b10 1055; NO-SVE-CHECK-NEXT: .cfi_restore b11 1056; NO-SVE-CHECK-NEXT: .cfi_restore b12 1057; NO-SVE-CHECK-NEXT: .cfi_restore b13 1058; NO-SVE-CHECK-NEXT: .cfi_restore b14 1059; NO-SVE-CHECK-NEXT: .cfi_restore b15 1060; NO-SVE-CHECK-NEXT: ret 1061; 1062; OUTLINER-CHECK-LABEL: streaming_compatible_no_sve: 1063; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ 1064; 1065 call void @streaming_callee_with_arg(i32 %x) 1066 ret void 1067} 1068 1069; The algorithm that fixes up the offsets of the callee-save/restore 1070; instructions must jump over the instructions that instantiate the current 1071; 'VG' value. We must make sure that it doesn't consider any RDSVL in 1072; user-code as if it is part of the frame-setup when doing so. 1073define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind { 1074; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue: 1075; NO-SVE-CHECK: // %bb.0: 1076; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 1077; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 1078; NO-SVE-CHECK-NEXT: mov x9, x0 1079; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 1080; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 1081; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 1082; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg 1083; NO-SVE-CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill 1084; NO-SVE-CHECK-NEXT: mov x0, x9 1085; NO-SVE-CHECK-NEXT: rdsvl x8, #1 1086; NO-SVE-CHECK-NEXT: add x29, sp, #64 1087; NO-SVE-CHECK-NEXT: lsr x8, x8, #3 1088; NO-SVE-CHECK-NEXT: mov x1, x0 1089; NO-SVE-CHECK-NEXT: smstart sm 1090; NO-SVE-CHECK-NEXT: mov x0, x8 1091; NO-SVE-CHECK-NEXT: bl bar 1092; NO-SVE-CHECK-NEXT: smstop sm 1093; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 1094; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 1095; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 1096; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 1097; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 1098; NO-SVE-CHECK-NEXT: ret 1099 %some_alloc = alloca i64, align 8 1100 %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd() 1101 call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled" 1102 ret void 1103} 1104 1105declare void @bar(i64, i64) 1106 1107; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables 1108; if the function contains a streaming-mode change. 1109 1110define void @vg_unwind_noasync() #5 { 1111; CHECK-LABEL: vg_unwind_noasync: 1112; CHECK: // %bb.0: 1113; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill 1114; CHECK-NEXT: .cfi_def_cfa_offset 80 1115; CHECK-NEXT: cntd x9 1116; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 1117; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 1118; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 1119; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 1120; CHECK-NEXT: .cfi_offset w30, -16 1121; CHECK-NEXT: .cfi_offset b8, -24 1122; CHECK-NEXT: .cfi_offset b9, -32 1123; CHECK-NEXT: .cfi_offset b10, -40 1124; CHECK-NEXT: .cfi_offset b11, -48 1125; CHECK-NEXT: .cfi_offset b12, -56 1126; CHECK-NEXT: .cfi_offset b13, -64 1127; CHECK-NEXT: .cfi_offset b14, -72 1128; CHECK-NEXT: .cfi_offset b15, -80 1129; CHECK-NEXT: .cfi_offset vg, -8 1130; CHECK-NEXT: smstop sm 1131; CHECK-NEXT: bl callee 1132; CHECK-NEXT: smstart sm 1133; CHECK-NEXT: .cfi_restore vg 1134; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 1135; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 1136; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 1137; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 1138; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload 1139; CHECK-NEXT: .cfi_def_cfa_offset 0 1140; CHECK-NEXT: .cfi_restore w30 1141; CHECK-NEXT: .cfi_restore b8 1142; CHECK-NEXT: .cfi_restore b9 1143; CHECK-NEXT: .cfi_restore b10 1144; CHECK-NEXT: .cfi_restore b11 1145; CHECK-NEXT: .cfi_restore b12 1146; CHECK-NEXT: .cfi_restore b13 1147; CHECK-NEXT: .cfi_restore b14 1148; CHECK-NEXT: .cfi_restore b15 1149; CHECK-NEXT: ret 1150; 1151; FP-CHECK-LABEL: vg_unwind_noasync: 1152; FP-CHECK: // %bb.0: 1153; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 1154; FP-CHECK-NEXT: .cfi_def_cfa_offset 96 1155; FP-CHECK-NEXT: cntd x9 1156; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 1157; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 1158; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 1159; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 1160; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill 1161; FP-CHECK-NEXT: add x29, sp, #64 1162; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 1163; FP-CHECK-NEXT: .cfi_offset w30, -24 1164; FP-CHECK-NEXT: .cfi_offset w29, -32 1165; FP-CHECK-NEXT: .cfi_offset b8, -40 1166; FP-CHECK-NEXT: .cfi_offset b9, -48 1167; FP-CHECK-NEXT: .cfi_offset b10, -56 1168; FP-CHECK-NEXT: .cfi_offset b11, -64 1169; FP-CHECK-NEXT: .cfi_offset b12, -72 1170; FP-CHECK-NEXT: .cfi_offset b13, -80 1171; FP-CHECK-NEXT: .cfi_offset b14, -88 1172; FP-CHECK-NEXT: .cfi_offset b15, -96 1173; FP-CHECK-NEXT: .cfi_offset vg, -16 1174; FP-CHECK-NEXT: smstop sm 1175; FP-CHECK-NEXT: bl callee 1176; FP-CHECK-NEXT: smstart sm 1177; FP-CHECK-NEXT: .cfi_restore vg 1178; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 1179; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 1180; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 1181; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 1182; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 1183; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 1184; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 1185; FP-CHECK-NEXT: .cfi_restore w30 1186; FP-CHECK-NEXT: .cfi_restore w29 1187; FP-CHECK-NEXT: .cfi_restore b8 1188; FP-CHECK-NEXT: .cfi_restore b9 1189; FP-CHECK-NEXT: .cfi_restore b10 1190; FP-CHECK-NEXT: .cfi_restore b11 1191; FP-CHECK-NEXT: .cfi_restore b12 1192; FP-CHECK-NEXT: .cfi_restore b13 1193; FP-CHECK-NEXT: .cfi_restore b14 1194; FP-CHECK-NEXT: .cfi_restore b15 1195; FP-CHECK-NEXT: ret 1196; OUTLINER-CHECK-LABEL: vg_unwind_noasync: 1197; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ 1198; 1199 call void @callee(); 1200 ret void; 1201} 1202 1203attributes #0 = { "aarch64_pstate_sm_enabled" uwtable(async) } 1204attributes #1 = { "probe-stack"="inline-asm" "aarch64_pstate_sm_enabled" uwtable(async) } 1205attributes #3 = { "aarch64_pstate_sm_body" uwtable(async) } 1206attributes #4 = { "aarch64_pstate_sm_compatible" uwtable(async) } 1207attributes #5 = { "aarch64_pstate_sm_enabled" } 1208