1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s 3 4declare void @callee(); 5 6; 7; Private-ZA Callee 8; 9 10; Expect spill & fill of ZT0 around call 11; Expect smstop/smstart za around call 12define void @zt0_in_caller_no_state_callee() "aarch64_in_zt0" nounwind { 13; CHECK-LABEL: zt0_in_caller_no_state_callee: 14; CHECK: // %bb.0: 15; CHECK-NEXT: sub sp, sp, #80 16; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill 17; CHECK-NEXT: mov x19, sp 18; CHECK-NEXT: str zt0, [x19] 19; CHECK-NEXT: smstop za 20; CHECK-NEXT: bl callee 21; CHECK-NEXT: smstart za 22; CHECK-NEXT: ldr zt0, [x19] 23; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload 24; CHECK-NEXT: add sp, sp, #80 25; CHECK-NEXT: ret 26 call void @callee(); 27 ret void; 28} 29 30; Expect spill & fill of ZT0 around call 31; Expect setup and restore lazy-save around call 32; Expect smstart za after call 33define void @za_zt0_shared_caller_no_state_callee() "aarch64_inout_za" "aarch64_in_zt0" nounwind { 34; CHECK-LABEL: za_zt0_shared_caller_no_state_callee: 35; CHECK: // %bb.0: 36; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 37; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill 38; CHECK-NEXT: mov x29, sp 39; CHECK-NEXT: sub sp, sp, #80 40; CHECK-NEXT: rdsvl x8, #1 41; CHECK-NEXT: mov x9, sp 42; CHECK-NEXT: msub x9, x8, x8, x9 43; CHECK-NEXT: mov sp, x9 44; CHECK-NEXT: stur x9, [x29, #-16] 45; CHECK-NEXT: sub x9, x29, #16 46; CHECK-NEXT: sub x19, x29, #80 47; CHECK-NEXT: sturh wzr, [x29, #-6] 48; CHECK-NEXT: stur wzr, [x29, #-4] 49; CHECK-NEXT: sturh w8, [x29, #-8] 50; CHECK-NEXT: msr TPIDR2_EL0, x9 51; CHECK-NEXT: str zt0, [x19] 52; CHECK-NEXT: bl callee 53; CHECK-NEXT: smstart za 54; CHECK-NEXT: ldr zt0, [x19] 55; CHECK-NEXT: mrs x8, TPIDR2_EL0 56; CHECK-NEXT: sub x0, x29, #16 57; CHECK-NEXT: cbnz x8, .LBB1_2 58; CHECK-NEXT: // %bb.1: 59; CHECK-NEXT: bl __arm_tpidr2_restore 60; CHECK-NEXT: .LBB1_2: 61; CHECK-NEXT: msr TPIDR2_EL0, xzr 62; CHECK-NEXT: mov sp, x29 63; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload 64; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 65; CHECK-NEXT: ret 66 call void @callee(); 67 ret void; 68} 69 70; 71; Shared-ZA Callee 72; 73 74; Caller and callee have shared ZT0 state, no spill/fill of ZT0 required 75define void @zt0_shared_caller_zt0_shared_callee() "aarch64_in_zt0" nounwind { 76; CHECK-LABEL: zt0_shared_caller_zt0_shared_callee: 77; CHECK: // %bb.0: 78; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 79; CHECK-NEXT: bl callee 80; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 81; CHECK-NEXT: ret 82 call void @callee() "aarch64_in_zt0"; 83 ret void; 84} 85 86; Expect spill & fill of ZT0 around call 87define void @za_zt0_shared_caller_za_shared_callee() "aarch64_inout_za" "aarch64_in_zt0" nounwind { 88; CHECK-LABEL: za_zt0_shared_caller_za_shared_callee: 89; CHECK: // %bb.0: 90; CHECK-NEXT: sub sp, sp, #80 91; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill 92; CHECK-NEXT: mov x19, sp 93; CHECK-NEXT: str zt0, [x19] 94; CHECK-NEXT: bl callee 95; CHECK-NEXT: ldr zt0, [x19] 96; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload 97; CHECK-NEXT: add sp, sp, #80 98; CHECK-NEXT: ret 99 call void @callee() "aarch64_inout_za"; 100 ret void; 101} 102 103; Caller and callee have shared ZA & ZT0 104define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_inout_za" "aarch64_in_zt0" nounwind { 105; CHECK-LABEL: za_zt0_shared_caller_za_zt0_shared_callee: 106; CHECK: // %bb.0: 107; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 108; CHECK-NEXT: bl callee 109; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 110; CHECK-NEXT: ret 111 call void @callee() "aarch64_inout_za" "aarch64_in_zt0"; 112 ret void; 113} 114 115; New-ZA Callee 116 117; Expect spill & fill of ZT0 around call 118; Expect smstop/smstart za around call 119define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind { 120; CHECK-LABEL: zt0_in_caller_zt0_new_callee: 121; CHECK: // %bb.0: 122; CHECK-NEXT: sub sp, sp, #80 123; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill 124; CHECK-NEXT: mov x19, sp 125; CHECK-NEXT: str zt0, [x19] 126; CHECK-NEXT: smstop za 127; CHECK-NEXT: bl callee 128; CHECK-NEXT: smstart za 129; CHECK-NEXT: ldr zt0, [x19] 130; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload 131; CHECK-NEXT: add sp, sp, #80 132; CHECK-NEXT: ret 133 call void @callee() "aarch64_new_zt0"; 134 ret void; 135} 136 137; 138; New-ZA Caller 139; 140 141; Expect commit of lazy-save if ZA is dormant 142; Expect smstart ZA & clear ZT0 143; Before return, expect smstop ZA 144define void @zt0_new_caller() "aarch64_new_zt0" nounwind { 145; CHECK-LABEL: zt0_new_caller: 146; CHECK: // %bb.0: // %prelude 147; CHECK-NEXT: sub sp, sp, #80 148; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill 149; CHECK-NEXT: mrs x8, TPIDR2_EL0 150; CHECK-NEXT: cbz x8, .LBB6_2 151; CHECK-NEXT: // %bb.1: // %save.za 152; CHECK-NEXT: mov x8, sp 153; CHECK-NEXT: str zt0, [x8] 154; CHECK-NEXT: bl __arm_tpidr2_save 155; CHECK-NEXT: ldr zt0, [x8] 156; CHECK-NEXT: msr TPIDR2_EL0, xzr 157; CHECK-NEXT: .LBB6_2: 158; CHECK-NEXT: smstart za 159; CHECK-NEXT: zero { zt0 } 160; CHECK-NEXT: bl callee 161; CHECK-NEXT: smstop za 162; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 163; CHECK-NEXT: add sp, sp, #80 164; CHECK-NEXT: ret 165 call void @callee() "aarch64_in_zt0"; 166 ret void; 167} 168 169; Expect commit of lazy-save if ZA is dormant 170; Expect smstart ZA, clear ZA & clear ZT0 171; Before return, expect smstop ZA 172define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind { 173; CHECK-LABEL: new_za_zt0_caller: 174; CHECK: // %bb.0: // %prelude 175; CHECK-NEXT: sub sp, sp, #80 176; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill 177; CHECK-NEXT: mrs x8, TPIDR2_EL0 178; CHECK-NEXT: cbz x8, .LBB7_2 179; CHECK-NEXT: // %bb.1: // %save.za 180; CHECK-NEXT: mov x8, sp 181; CHECK-NEXT: str zt0, [x8] 182; CHECK-NEXT: bl __arm_tpidr2_save 183; CHECK-NEXT: ldr zt0, [x8] 184; CHECK-NEXT: msr TPIDR2_EL0, xzr 185; CHECK-NEXT: .LBB7_2: 186; CHECK-NEXT: smstart za 187; CHECK-NEXT: zero {za} 188; CHECK-NEXT: zero { zt0 } 189; CHECK-NEXT: bl callee 190; CHECK-NEXT: smstop za 191; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 192; CHECK-NEXT: add sp, sp, #80 193; CHECK-NEXT: ret 194 call void @callee() "aarch64_inout_za" "aarch64_in_zt0"; 195 ret void; 196} 197 198; Expect clear ZA on entry 199define void @new_za_shared_zt0_caller() "aarch64_new_za" "aarch64_in_zt0" nounwind { 200; CHECK-LABEL: new_za_shared_zt0_caller: 201; CHECK: // %bb.0: 202; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 203; CHECK-NEXT: zero {za} 204; CHECK-NEXT: bl callee 205; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 206; CHECK-NEXT: ret 207 call void @callee() "aarch64_inout_za" "aarch64_in_zt0"; 208 ret void; 209} 210 211; Expect clear ZT0 on entry 212define void @shared_za_new_zt0() "aarch64_inout_za" "aarch64_new_zt0" nounwind { 213; CHECK-LABEL: shared_za_new_zt0: 214; CHECK: // %bb.0: 215; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 216; CHECK-NEXT: zero { zt0 } 217; CHECK-NEXT: bl callee 218; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 219; CHECK-NEXT: ret 220 call void @callee() "aarch64_inout_za" "aarch64_in_zt0"; 221 ret void; 222} 223