1// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 2// See https://llvm.org/LICENSE.txt for license information. 3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 4 5// This patch implements the support routines for the SME ABI, 6// described here: 7// https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines 8 9#include "../assembly.h" 10 11.set FEAT_SVE_BIT, 30 12.set FEAT_SME_BIT, 42 13.set FEAT_SME2_BIT, 57 14.set FEAT_SME2_MASK, 1 << 57 15.set SVCR_PSTATE_SM_BIT, 0 16 17#if !defined(__APPLE__) 18#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features) 19#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features) 20#else 21// MachO requires @page/@pageoff directives because the global is defined 22// in a different file. Otherwise this file may fail to build. 23#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page 24#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff 25#endif 26 27.arch armv9-a+sme2 28 29// Utility function which calls a system's abort() routine. Because the function 30// is streaming-compatible it should disable streaming-SVE mode before calling 31// abort(). Note that there is no need to preserve any state before the call, 32// because the function does not return. 33DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort) 34 .cfi_startproc 35 .variant_pcs SYMBOL_NAME(do_abort) 36 BTI_C 37 stp x29, x30, [sp, #-32]! 38 cntd x0 39 // Store VG to a stack location that we describe with .cfi_offset 40 str x0, [sp, #16] 41 .cfi_def_cfa_offset 32 42 .cfi_offset w30, -24 43 .cfi_offset w29, -32 44 .cfi_offset 46, -16 45 bl SYMBOL_NAME(__arm_sme_state) 46 tbz x0, #0, 2f 471: 48 smstop sm 492: 50 // We can't make this into a tail-call because the unwinder would 51 // need to restore the value of VG. 52 bl SYMBOL_NAME(abort) 53 .cfi_endproc 54END_COMPILERRT_FUNCTION(do_abort) 55 56// __arm_sme_state fills the result registers based on a local 57// that is set as part of the compiler-rt startup code. 58// __aarch64_has_sme_and_tpidr2_el0 59DEFINE_COMPILERRT_FUNCTION(__arm_sme_state) 60 .variant_pcs __arm_sme_state 61 BTI_C 62 mov x0, xzr 63 mov x1, xzr 64 65 adrp x16, CPU_FEATS_SYMBOL 66 ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET] 67 tbz x16, #FEAT_SME_BIT, 1f 680: 69 orr x0, x0, #0xC000000000000000 70 mrs x16, SVCR 71 bfxil x0, x16, #0, #2 72 mrs x1, TPIDR2_EL0 731: 74 ret 75END_COMPILERRT_FUNCTION(__arm_sme_state) 76 77DEFINE_COMPILERRT_FUNCTION(__arm_tpidr2_restore) 78 .variant_pcs __arm_tpidr2_restore 79 BTI_C 80 // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific 81 // manner. 82 mrs x14, TPIDR2_EL0 83 cbnz x14, 2f 84 85 // If any of the reserved bytes in the first 16 bytes of BLK are nonzero, 86 // the subroutine [..] aborts in some platform-defined manner. 87 ldrh w14, [x0, #10] 88 cbnz w14, 2f 89 ldr w14, [x0, #12] 90 cbnz w14, 2f 91 92 // If BLK.za_save_buffer is NULL, the subroutine does nothing. 93 ldr x16, [x0] 94 cbz x16, 1f 95 96 // If BLK.num_za_save_slices is zero, the subroutine does nothing. 97 ldrh w14, [x0, #8] 98 cbz x14, 1f 99 100 mov x15, xzr 1010: 102 ldr za[w15,0], [x16] 103 addsvl x16, x16, #1 104 add x15, x15, #1 105 cmp x14, x15 106 b.ne 0b 1071: 108 ret 1092: 110 b SYMBOL_NAME(do_abort) 111END_COMPILERRT_FUNCTION(__arm_tpidr2_restore) 112 113DEFINE_COMPILERRT_FUNCTION(__arm_tpidr2_save) 114 .variant_pcs __arm_tpidr2_save 115 BTI_C 116 // If the current thread does not have access to TPIDR2_EL0, the subroutine 117 // does nothing. 118 adrp x14, CPU_FEATS_SYMBOL 119 ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET] 120 tbz x14, #FEAT_SME_BIT, 1f 121 122 // If TPIDR2_EL0 is null, the subroutine does nothing. 123 mrs x16, TPIDR2_EL0 124 cbz x16, 1f 125 126 // If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are 127 // nonzero, the subroutine [..] aborts in some platform-defined manner. 128 ldrh w14, [x16, #10] 129 cbnz w14, 2f 130 ldr w14, [x16, #12] 131 cbnz w14, 2f 132 133 // If num_za_save_slices is zero, the subroutine does nothing. 134 ldrh w14, [x16, #8] 135 cbz x14, 1f 136 137 // If za_save_buffer is NULL, the subroutine does nothing. 138 ldr x16, [x16] 139 cbz x16, 1f 140 141 mov x15, xzr 1420: 143 str za[w15,0], [x16] 144 addsvl x16, x16, #1 145 add x15, x15, #1 146 cmp x14, x15 147 b.ne 0b 1481: 149 ret 1502: 151 b SYMBOL_NAME(do_abort) 152END_COMPILERRT_FUNCTION(__arm_tpidr2_save) 153 154DEFINE_COMPILERRT_FUNCTION(__arm_za_disable) 155 .cfi_startproc 156 .variant_pcs __arm_za_disable 157 BTI_C 158 // If the current thread does not have access to SME, the subroutine does 159 // nothing. 160 adrp x14, CPU_FEATS_SYMBOL 161 ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET] 162 tbz x14, #FEAT_SME_BIT, 0f 163 164 // Otherwise, the subroutine behaves as if it did the following: 165 // * Call __arm_tpidr2_save. 166 stp x29, x30, [sp, #-16]! 167 .cfi_def_cfa_offset 16 168 mov x29, sp 169 .cfi_def_cfa w29, 16 170 .cfi_offset w30, -8 171 .cfi_offset w29, -16 172 bl SYMBOL_NAME(__arm_tpidr2_save) 173 174 // * Set TPIDR2_EL0 to null. 175 msr TPIDR2_EL0, xzr 176 177 // * Set PSTATE.ZA to 0. 178 smstop za 179 180 .cfi_def_cfa wsp, 16 181 ldp x29, x30, [sp], #16 182 .cfi_def_cfa_offset 0 183 .cfi_restore w30 184 .cfi_restore w29 1850: 186 ret 187 .cfi_endproc 188END_COMPILERRT_FUNCTION(__arm_za_disable) 189 190DEFINE_COMPILERRT_FUNCTION(__arm_get_current_vg) 191 .variant_pcs __arm_get_current_vg 192 BTI_C 193 194 adrp x17, CPU_FEATS_SYMBOL 195 ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET] 196 tbnz w17, #FEAT_SVE_BIT, 1f 197 tbz x17, #FEAT_SME_BIT, 2f 1980: 199 mrs x17, SVCR 200 tbz x17, #SVCR_PSTATE_SM_BIT, 2f 2011: 202 cntd x0 203 ret 2042: 205 mov x0, xzr 206 ret 207END_COMPILERRT_FUNCTION(__arm_get_current_vg) 208 209// The diagram below describes the layout used in the following routines: 210// * __arm_sme_state_size 211// * __arm_sme_save 212// * __arm_sme_restore 213// 214// +---------------------------------+ 215// | ... | 216// | ZA buffer | 217// | ... | 218// +---------------------------------+ <- @96 219// | ZT0 contents | 220// +---------------------------------+ <- @32 221// | byte 15-10: zero (reserved) | 222// | byte 9-8: num_za_save_slices | TPIDR2 block 223// | byte 7-0: za_save_buffer | 224// +---------------------------------+ <- @16 225// | bit 127-1: zero (reserved) | Internal state for __arm_sme_save/restore 226// | bit 0: VALID | 227// +---------------------------------+ <- @0 228 229DEFINE_COMPILERRT_FUNCTION(__arm_sme_state_size) 230 .variant_pcs __arm_sme_state_size 231 BTI_C 232 233 // Test if SME is available and ZA state is 'active'. 234 adrp x17, CPU_FEATS_SYMBOL 235 ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET] 236 tbz x17, #FEAT_SME_BIT, 0f 237 mrs x16, SVCR 238 tbz x16, #1, 0f 239 mrs x16, TPIDR2_EL0 240 cbnz x16, 0f 241 242 // Size = HAS_FEAT_SME2 ? 96 : 32 243 tst x17, #FEAT_SME2_MASK 244 mov w17, #32 245 mov w16, #96 246 csel x16, x17, x16, eq 247 248 // Size = Size + (SVLB * SVLB) 249 rdsvl x17, #1 250 madd x0, x17, x17, x16 251 ret 252 2530: 254 // Default case, 16 bytes is minimum (to encode VALID bit, multiple of 16 bytes) 255 mov w0, #16 256 ret 257END_COMPILERRT_FUNCTION(__arm_sme_state_size) 258 259DEFINE_COMPILERRT_FUNCTION(__arm_sme_save) 260 .variant_pcs __arm_sme_save 261 BTI_C 262 263 // If PTR is not 16-byte aligned, abort. 264 tst x0, #0xF 265 b.ne 3f 266 267 // Clear internal state bits 268 stp xzr, xzr, [x0] 269 270 // If SME is not available, PSTATE.ZA = 0 or TPIDR2_EL0 != 0, return. 271 adrp x17, CPU_FEATS_SYMBOL 272 ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET] 273 tbz x17, #FEAT_SME_BIT, 2f 274 mrs x16, SVCR 275 tbz x16, #1, 2f 276 mrs x16, TPIDR2_EL0 277 cbnz x16, 2f 278 279 # ZA or ZT0 need saving, we can now set internal VALID bit to 1 280 mov w16, #1 281 str x16, [x0] 282 283 add x18, x0, #32 284 tbz x17, #FEAT_SME2_BIT, 1f 285 286 // Store ZT0 287 str zt0, [x18] 288 add x18, x18, #64 289 2901: 291 // Set up lazy-save (x18 = pointer to buffer) 292 rdsvl x17, #1 293 str x18, [x0, #16]! 294 strh w17, [x0, #8] 295 strh wzr, [x0, #10] 296 str wzr, [x0, #12] 297 msr TPIDR2_EL0, x0 298 2992: 300 // Do nothing 301 ret 302 3033: 304 b SYMBOL_NAME(do_abort) 305END_COMPILERRT_FUNCTION(__arm_sme_save) 306 307DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore) 308 .cfi_startproc 309 .variant_pcs __arm_sme_restore 310 BTI_C 311 312 stp x29, x30, [sp, #-16]! 313 .cfi_def_cfa_offset 16 314 mov x29, sp 315 .cfi_def_cfa w29, 16 316 .cfi_offset w30, -8 317 .cfi_offset w29, -16 318 319 // If PTR is not 16-byte aligned, abort. 320 tst x0, #0xF 321 b.ne 3f 322 323 // If the VALID bit is 0, return early. 324 ldr x16, [x0] 325 cbz x16, 2f 326 327 // If SME is not available, abort. 328 adrp x17, CPU_FEATS_SYMBOL 329 ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET] 330 tbz x17, #FEAT_SME_BIT, 3f 331 332 // If TPIDR2_EL0 != nullptr, no lazy-save was committed, try to reload zt0. 333 mrs x16, TPIDR2_EL0 334 cbnz x16, 1f 335 336 // If TPIDR2_EL0 == nullptr and PSTATE.ZA = 1 (<=> ZA state is 'active'), 337 // abort. 338 mrs x16, SVCR 339 tbnz x16, #1, 3f 340 341 // Restore za. 342 smstart za 343 add x0, x0, #16 344 bl __arm_tpidr2_restore 345 sub x0, x0, #16 346 3471: 348 smstart za 349 msr TPIDR2_EL0, xzr 350 351 // Check if zt0 needs restoring. 352 tbz x17, #FEAT_SME2_BIT, 2f 353 354 // Restore zt0. 355 add x16, x0, #32 356 ldr zt0, [x16] 357 3582: 359 // Do nothing 360 .cfi_def_cfa wsp, 16 361 ldp x29, x30, [sp], #16 362 .cfi_def_cfa_offset 0 363 .cfi_restore w30 364 .cfi_restore w29 365 ret 366 3673: 368 b SYMBOL_NAME(do_abort) 369 .cfi_endproc 370END_COMPILERRT_FUNCTION(__arm_sme_restore) 371 372NO_EXEC_STACK_DIRECTIVE 373 374// GNU property note for BTI and PAC 375GNU_PROPERTY_BTI_PAC 376