xref: /llvm-project/compiler-rt/lib/builtins/aarch64/sme-abi.S (revision 811f2a652b6232f203e51a17d553e268fae3a29c)
1// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
2// See https://llvm.org/LICENSE.txt for license information.
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
5// This patch implements the support routines for the SME ABI,
6// described here:
7//  https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines
8
9#include "../assembly.h"
10
11.set FEAT_SVE_BIT, 30
12.set FEAT_SME_BIT, 42
13.set FEAT_SME2_BIT, 57
14.set FEAT_SME2_MASK, 1 << 57
15.set SVCR_PSTATE_SM_BIT, 0
16
17#if !defined(__APPLE__)
18#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
19#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
20#else
21// MachO requires @page/@pageoff directives because the global is defined
22// in a different file. Otherwise this file may fail to build.
23#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
24#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
25#endif
26
27.arch armv9-a+sme2
28
29// Utility function which calls a system's abort() routine. Because the function
30// is streaming-compatible it should disable streaming-SVE mode before calling
31// abort(). Note that there is no need to preserve any state before the call,
32// because the function does not return.
33DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
34  .cfi_startproc
35  .variant_pcs SYMBOL_NAME(do_abort)
36  BTI_C
37  stp  x29, x30, [sp, #-32]!
38  cntd x0
39  // Store VG to a stack location that we describe with .cfi_offset
40  str x0, [sp, #16]
41  .cfi_def_cfa_offset 32
42  .cfi_offset w30, -24
43  .cfi_offset w29, -32
44  .cfi_offset 46, -16
45  bl  SYMBOL_NAME(__arm_sme_state)
46  tbz  x0, #0, 2f
471:
48  smstop sm
492:
50  // We can't make this into a tail-call because the unwinder would
51  // need to restore the value of VG.
52  bl  SYMBOL_NAME(abort)
53  .cfi_endproc
54END_COMPILERRT_FUNCTION(do_abort)
55
56// __arm_sme_state fills the result registers based on a local
57// that is set as part of the compiler-rt startup code.
58//   __aarch64_has_sme_and_tpidr2_el0
59DEFINE_COMPILERRT_FUNCTION(__arm_sme_state)
60  .variant_pcs __arm_sme_state
61  BTI_C
62  mov x0, xzr
63  mov x1, xzr
64
65  adrp x16, CPU_FEATS_SYMBOL
66  ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
67  tbz x16, #FEAT_SME_BIT, 1f
680:
69  orr x0, x0, #0xC000000000000000
70  mrs x16, SVCR
71  bfxil x0, x16, #0, #2
72  mrs x1, TPIDR2_EL0
731:
74  ret
75END_COMPILERRT_FUNCTION(__arm_sme_state)
76
77DEFINE_COMPILERRT_FUNCTION(__arm_tpidr2_restore)
78  .variant_pcs __arm_tpidr2_restore
79  BTI_C
80  // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific
81  // manner.
82  mrs x14, TPIDR2_EL0
83  cbnz  x14, 2f
84
85  // If any of the reserved bytes in the first 16 bytes of BLK are nonzero,
86  // the subroutine [..] aborts in some platform-defined manner.
87  ldrh  w14, [x0, #10]
88  cbnz  w14, 2f
89  ldr w14, [x0, #12]
90  cbnz  w14, 2f
91
92  // If BLK.za_save_buffer is NULL, the subroutine does nothing.
93  ldr x16, [x0]
94  cbz x16, 1f
95
96  // If BLK.num_za_save_slices is zero, the subroutine does nothing.
97  ldrh  w14, [x0, #8]
98  cbz x14, 1f
99
100  mov x15, xzr
1010:
102  ldr za[w15,0], [x16]
103  addsvl x16, x16, #1
104  add x15, x15, #1
105  cmp x14, x15
106  b.ne  0b
1071:
108  ret
1092:
110  b  SYMBOL_NAME(do_abort)
111END_COMPILERRT_FUNCTION(__arm_tpidr2_restore)
112
113DEFINE_COMPILERRT_FUNCTION(__arm_tpidr2_save)
114  .variant_pcs __arm_tpidr2_save
115  BTI_C
116  // If the current thread does not have access to TPIDR2_EL0, the subroutine
117  // does nothing.
118  adrp x14, CPU_FEATS_SYMBOL
119  ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET]
120  tbz x14, #FEAT_SME_BIT, 1f
121
122  // If TPIDR2_EL0 is null, the subroutine does nothing.
123  mrs x16, TPIDR2_EL0
124  cbz x16, 1f
125
126  // If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are
127  // nonzero, the subroutine [..] aborts in some platform-defined manner.
128  ldrh  w14, [x16, #10]
129  cbnz  w14, 2f
130  ldr w14, [x16, #12]
131  cbnz  w14, 2f
132
133  // If num_za_save_slices is zero, the subroutine does nothing.
134  ldrh  w14, [x16, #8]
135  cbz x14, 1f
136
137  // If za_save_buffer is NULL, the subroutine does nothing.
138  ldr x16, [x16]
139  cbz x16, 1f
140
141  mov x15, xzr
1420:
143  str za[w15,0], [x16]
144  addsvl x16, x16, #1
145  add x15, x15, #1
146  cmp x14, x15
147  b.ne  0b
1481:
149  ret
1502:
151  b  SYMBOL_NAME(do_abort)
152END_COMPILERRT_FUNCTION(__arm_tpidr2_save)
153
154DEFINE_COMPILERRT_FUNCTION(__arm_za_disable)
155  .cfi_startproc
156  .variant_pcs __arm_za_disable
157  BTI_C
158  // If the current thread does not have access to SME, the subroutine does
159  // nothing.
160  adrp x14, CPU_FEATS_SYMBOL
161  ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET]
162  tbz x14, #FEAT_SME_BIT, 0f
163
164  // Otherwise, the subroutine behaves as if it did the following:
165  // * Call __arm_tpidr2_save.
166  stp x29, x30, [sp, #-16]!
167  .cfi_def_cfa_offset 16
168  mov x29, sp
169  .cfi_def_cfa w29, 16
170  .cfi_offset w30, -8
171  .cfi_offset w29, -16
172  bl  SYMBOL_NAME(__arm_tpidr2_save)
173
174  // * Set TPIDR2_EL0 to null.
175  msr TPIDR2_EL0, xzr
176
177  // * Set PSTATE.ZA to 0.
178  smstop za
179
180  .cfi_def_cfa wsp, 16
181  ldp x29, x30, [sp], #16
182  .cfi_def_cfa_offset 0
183  .cfi_restore w30
184  .cfi_restore w29
1850:
186  ret
187  .cfi_endproc
188END_COMPILERRT_FUNCTION(__arm_za_disable)
189
190DEFINE_COMPILERRT_FUNCTION(__arm_get_current_vg)
191  .variant_pcs __arm_get_current_vg
192  BTI_C
193
194  adrp    x17, CPU_FEATS_SYMBOL
195  ldr     x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
196  tbnz    w17, #FEAT_SVE_BIT, 1f
197  tbz     x17, #FEAT_SME_BIT, 2f
1980:
199  mrs     x17, SVCR
200  tbz     x17, #SVCR_PSTATE_SM_BIT, 2f
2011:
202  cntd    x0
203  ret
2042:
205  mov     x0, xzr
206  ret
207END_COMPILERRT_FUNCTION(__arm_get_current_vg)
208
209// The diagram below describes the layout used in the following routines:
210// * __arm_sme_state_size
211// * __arm_sme_save
212// * __arm_sme_restore
213//
214// +---------------------------------+
215// |             ...                 |
216// |           ZA buffer             |
217// |             ...                 |
218// +---------------------------------+ <- @96
219// |         ZT0 contents            |
220// +---------------------------------+ <- @32
221// | byte 15-10: zero (reserved)     |
222// | byte   9-8: num_za_save_slices  |           TPIDR2 block
223// | byte   7-0: za_save_buffer      |
224// +---------------------------------+ <- @16
225// | bit  127-1: zero (reserved)     |           Internal state for __arm_sme_save/restore
226// | bit      0: VALID               |
227// +---------------------------------+ <- @0
228
229DEFINE_COMPILERRT_FUNCTION(__arm_sme_state_size)
230  .variant_pcs __arm_sme_state_size
231  BTI_C
232
233  // Test if SME is available and ZA state is 'active'.
234  adrp    x17, CPU_FEATS_SYMBOL
235  ldr     x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
236  tbz     x17, #FEAT_SME_BIT, 0f
237  mrs     x16, SVCR
238  tbz     x16, #1, 0f
239  mrs     x16, TPIDR2_EL0
240  cbnz    x16, 0f
241
242  // Size = HAS_FEAT_SME2 ? 96 : 32
243  tst     x17, #FEAT_SME2_MASK
244  mov     w17, #32
245  mov     w16, #96
246  csel    x16, x17, x16, eq
247
248  // Size = Size + (SVLB * SVLB)
249  rdsvl   x17, #1
250  madd    x0, x17, x17, x16
251  ret
252
2530:
254  // Default case, 16 bytes is minimum (to encode VALID bit, multiple of 16 bytes)
255  mov w0, #16
256  ret
257END_COMPILERRT_FUNCTION(__arm_sme_state_size)
258
259DEFINE_COMPILERRT_FUNCTION(__arm_sme_save)
260  .variant_pcs __arm_sme_save
261  BTI_C
262
263  // If PTR is not 16-byte aligned, abort.
264  tst     x0, #0xF
265  b.ne    3f
266
267  // Clear internal state bits
268  stp     xzr, xzr, [x0]
269
270  // If SME is not available, PSTATE.ZA = 0 or TPIDR2_EL0 != 0, return.
271  adrp    x17, CPU_FEATS_SYMBOL
272  ldr     x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
273  tbz     x17, #FEAT_SME_BIT, 2f
274  mrs     x16, SVCR
275  tbz     x16, #1, 2f
276  mrs     x16, TPIDR2_EL0
277  cbnz    x16, 2f
278
279  # ZA or ZT0 need saving, we can now set internal VALID bit to 1
280  mov     w16, #1
281  str     x16, [x0]
282
283  add     x18, x0, #32
284  tbz     x17, #FEAT_SME2_BIT, 1f
285
286  // Store ZT0
287  str     zt0, [x18]
288  add     x18, x18, #64
289
2901:
291  // Set up lazy-save (x18 = pointer to buffer)
292  rdsvl   x17, #1
293  str     x18, [x0, #16]!
294  strh    w17, [x0, #8]
295  strh    wzr, [x0, #10]
296  str     wzr, [x0, #12]
297  msr     TPIDR2_EL0, x0
298
2992:
300  // Do nothing
301  ret
302
3033:
304  b       SYMBOL_NAME(do_abort)
305END_COMPILERRT_FUNCTION(__arm_sme_save)
306
307DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore)
308  .cfi_startproc
309  .variant_pcs __arm_sme_restore
310  BTI_C
311
312  stp     x29, x30, [sp, #-16]!
313  .cfi_def_cfa_offset 16
314  mov     x29, sp
315  .cfi_def_cfa w29, 16
316  .cfi_offset w30, -8
317  .cfi_offset w29, -16
318
319  // If PTR is not 16-byte aligned, abort.
320  tst     x0, #0xF
321  b.ne    3f
322
323  // If the VALID bit is 0, return early.
324  ldr     x16, [x0]
325  cbz     x16, 2f
326
327  // If SME is not available, abort.
328  adrp    x17, CPU_FEATS_SYMBOL
329  ldr     x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
330  tbz     x17, #FEAT_SME_BIT, 3f
331
332  // If TPIDR2_EL0 != nullptr, no lazy-save was committed, try to reload zt0.
333  mrs     x16, TPIDR2_EL0
334  cbnz    x16, 1f
335
336  // If TPIDR2_EL0 == nullptr and PSTATE.ZA = 1 (<=> ZA state is 'active'),
337  // abort.
338  mrs     x16, SVCR
339  tbnz    x16, #1, 3f
340
341  // Restore za.
342  smstart za
343  add     x0, x0, #16
344  bl      __arm_tpidr2_restore
345  sub     x0, x0, #16
346
3471:
348  smstart za
349  msr     TPIDR2_EL0, xzr
350
351  // Check if zt0 needs restoring.
352  tbz     x17, #FEAT_SME2_BIT, 2f
353
354  // Restore zt0.
355  add     x16, x0, #32
356  ldr     zt0, [x16]
357
3582:
359  // Do nothing
360  .cfi_def_cfa wsp, 16
361  ldp     x29, x30, [sp], #16
362  .cfi_def_cfa_offset 0
363  .cfi_restore w30
364  .cfi_restore w29
365  ret
366
3673:
368  b       SYMBOL_NAME(do_abort)
369  .cfi_endproc
370END_COMPILERRT_FUNCTION(__arm_sme_restore)
371
372NO_EXEC_STACK_DIRECTIVE
373
374// GNU property note for BTI and PAC
375GNU_PROPERTY_BTI_PAC
376