1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme < %s | FileCheck %s 3 4declare void @private_za_callee() 5declare float @llvm.cos.f32(float) 6 7; Test lazy-save mechanism for a single callee. 8define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" { 9; CHECK-LABEL: test_lazy_save_1_callee: 10; CHECK: // %bb.0: 11; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 12; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill 13; CHECK-NEXT: mov x29, sp 14; CHECK-NEXT: sub sp, sp, #16 15; CHECK-NEXT: rdsvl x8, #1 16; CHECK-NEXT: mov x9, sp 17; CHECK-NEXT: msub x9, x8, x8, x9 18; CHECK-NEXT: mov sp, x9 19; CHECK-NEXT: stur x9, [x29, #-16] 20; CHECK-NEXT: sub x9, x29, #16 21; CHECK-NEXT: sturh wzr, [x29, #-6] 22; CHECK-NEXT: stur wzr, [x29, #-4] 23; CHECK-NEXT: sturh w8, [x29, #-8] 24; CHECK-NEXT: msr TPIDR2_EL0, x9 25; CHECK-NEXT: bl private_za_callee 26; CHECK-NEXT: smstart za 27; CHECK-NEXT: mrs x8, TPIDR2_EL0 28; CHECK-NEXT: sub x0, x29, #16 29; CHECK-NEXT: cbnz x8, .LBB0_2 30; CHECK-NEXT: // %bb.1: 31; CHECK-NEXT: bl __arm_tpidr2_restore 32; CHECK-NEXT: .LBB0_2: 33; CHECK-NEXT: msr TPIDR2_EL0, xzr 34; CHECK-NEXT: mov sp, x29 35; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload 36; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 37; CHECK-NEXT: ret 38 call void @private_za_callee() 39 ret void 40} 41 42; Test lazy-save mechanism for multiple callees. 43define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" { 44; CHECK-LABEL: test_lazy_save_2_callees: 45; CHECK: // %bb.0: 46; CHECK-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill 47; CHECK-NEXT: str x21, [sp, #16] // 8-byte Folded Spill 48; CHECK-NEXT: mov x29, sp 49; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill 50; CHECK-NEXT: sub sp, sp, #16 51; CHECK-NEXT: rdsvl x20, #1 52; CHECK-NEXT: mov x8, sp 53; CHECK-NEXT: msub x8, x20, x20, x8 54; CHECK-NEXT: mov sp, x8 55; CHECK-NEXT: sub x21, x29, #16 56; CHECK-NEXT: stur x8, [x29, #-16] 57; CHECK-NEXT: sturh wzr, [x29, #-6] 58; CHECK-NEXT: stur wzr, [x29, #-4] 59; CHECK-NEXT: sturh w20, [x29, #-8] 60; CHECK-NEXT: msr TPIDR2_EL0, x21 61; CHECK-NEXT: bl private_za_callee 62; CHECK-NEXT: smstart za 63; CHECK-NEXT: mrs x8, TPIDR2_EL0 64; CHECK-NEXT: sub x0, x29, #16 65; CHECK-NEXT: cbnz x8, .LBB1_2 66; CHECK-NEXT: // %bb.1: 67; CHECK-NEXT: bl __arm_tpidr2_restore 68; CHECK-NEXT: .LBB1_2: 69; CHECK-NEXT: msr TPIDR2_EL0, xzr 70; CHECK-NEXT: sturh w20, [x29, #-8] 71; CHECK-NEXT: msr TPIDR2_EL0, x21 72; CHECK-NEXT: bl private_za_callee 73; CHECK-NEXT: smstart za 74; CHECK-NEXT: mrs x8, TPIDR2_EL0 75; CHECK-NEXT: sub x0, x29, #16 76; CHECK-NEXT: cbnz x8, .LBB1_4 77; CHECK-NEXT: // %bb.3: 78; CHECK-NEXT: bl __arm_tpidr2_restore 79; CHECK-NEXT: .LBB1_4: 80; CHECK-NEXT: msr TPIDR2_EL0, xzr 81; CHECK-NEXT: mov sp, x29 82; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload 83; CHECK-NEXT: ldr x21, [sp, #16] // 8-byte Folded Reload 84; CHECK-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload 85; CHECK-NEXT: ret 86 call void @private_za_callee() 87 call void @private_za_callee() 88 ret void 89} 90 91; Test a call of an intrinsic that gets expanded to a library call. 92define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" { 93; CHECK-LABEL: test_lazy_save_expanded_intrinsic: 94; CHECK: // %bb.0: 95; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 96; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill 97; CHECK-NEXT: mov x29, sp 98; CHECK-NEXT: sub sp, sp, #16 99; CHECK-NEXT: rdsvl x8, #1 100; CHECK-NEXT: mov x9, sp 101; CHECK-NEXT: msub x9, x8, x8, x9 102; CHECK-NEXT: mov sp, x9 103; CHECK-NEXT: stur x9, [x29, #-16] 104; CHECK-NEXT: sub x9, x29, #16 105; CHECK-NEXT: sturh wzr, [x29, #-6] 106; CHECK-NEXT: stur wzr, [x29, #-4] 107; CHECK-NEXT: sturh w8, [x29, #-8] 108; CHECK-NEXT: msr TPIDR2_EL0, x9 109; CHECK-NEXT: bl cosf 110; CHECK-NEXT: smstart za 111; CHECK-NEXT: mrs x8, TPIDR2_EL0 112; CHECK-NEXT: sub x0, x29, #16 113; CHECK-NEXT: cbnz x8, .LBB2_2 114; CHECK-NEXT: // %bb.1: 115; CHECK-NEXT: bl __arm_tpidr2_restore 116; CHECK-NEXT: .LBB2_2: 117; CHECK-NEXT: msr TPIDR2_EL0, xzr 118; CHECK-NEXT: mov sp, x29 119; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload 120; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 121; CHECK-NEXT: ret 122 %res = call float @llvm.cos.f32(float %a) 123 ret float %res 124} 125 126; Test a combination of streaming-compatible -> normal call with lazy-save. 127define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za" "aarch64_pstate_sm_compatible" { 128; CHECK-LABEL: test_lazy_save_and_conditional_smstart: 129; CHECK: // %bb.0: 130; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill 131; CHECK-NEXT: cntd x9 132; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 133; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 134; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 135; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 136; CHECK-NEXT: add x29, sp, #64 137; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill 138; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill 139; CHECK-NEXT: sub sp, sp, #16 140; CHECK-NEXT: rdsvl x8, #1 141; CHECK-NEXT: mov x9, sp 142; CHECK-NEXT: msub x9, x8, x8, x9 143; CHECK-NEXT: mov sp, x9 144; CHECK-NEXT: stur x9, [x29, #-80] 145; CHECK-NEXT: sub x9, x29, #80 146; CHECK-NEXT: sturh wzr, [x29, #-70] 147; CHECK-NEXT: stur wzr, [x29, #-68] 148; CHECK-NEXT: sturh w8, [x29, #-72] 149; CHECK-NEXT: msr TPIDR2_EL0, x9 150; CHECK-NEXT: bl __arm_sme_state 151; CHECK-NEXT: and x20, x0, #0x1 152; CHECK-NEXT: tbz w20, #0, .LBB3_2 153; CHECK-NEXT: // %bb.1: 154; CHECK-NEXT: smstop sm 155; CHECK-NEXT: .LBB3_2: 156; CHECK-NEXT: bl private_za_callee 157; CHECK-NEXT: tbz w20, #0, .LBB3_4 158; CHECK-NEXT: // %bb.3: 159; CHECK-NEXT: smstart sm 160; CHECK-NEXT: .LBB3_4: 161; CHECK-NEXT: smstart za 162; CHECK-NEXT: mrs x8, TPIDR2_EL0 163; CHECK-NEXT: sub x0, x29, #80 164; CHECK-NEXT: cbnz x8, .LBB3_6 165; CHECK-NEXT: // %bb.5: 166; CHECK-NEXT: bl __arm_tpidr2_restore 167; CHECK-NEXT: .LBB3_6: 168; CHECK-NEXT: msr TPIDR2_EL0, xzr 169; CHECK-NEXT: sub sp, x29, #64 170; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload 171; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 172; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 173; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 174; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 175; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload 176; CHECK-NEXT: ret 177 call void @private_za_callee() 178 ret void 179} 180