1 /* $NetBSD: cpufunc.c,v 1.36 2024/02/07 04:20:26 msaitoh Exp $ */
2
3 /*
4 * Copyright (c) 2017 Ryo Shimizu
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "opt_cpuoptions.h"
30 #include "opt_multiprocessor.h"
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.36 2024/02/07 04:20:26 msaitoh Exp $");
34
35 #include <sys/param.h>
36 #include <sys/types.h>
37 #include <sys/kmem.h>
38 #include <sys/cpu.h>
39
40 #include <uvm/uvm.h>
41 #include <uvm/uvm_page.h>
42
43 #include <arm/cpufunc.h>
44
45 u_int cputype; /* compat arm */
46 u_int arm_dcache_align; /* compat arm */
47 u_int arm_dcache_align_mask; /* compat arm */
48 u_int arm_dcache_maxline;
49
50 u_int aarch64_cache_vindexsize;
51 u_int aarch64_cache_prefer_mask;
52
53 int aarch64_hafdbs_enabled __read_mostly;
54 int aarch64_pan_enabled __read_mostly;
55 int aarch64_pac_enabled __read_mostly;
56
57 static void __noasan
extract_cacheunit(int level,bool insn,int cachetype,struct aarch64_cache_info * cacheinfo)58 extract_cacheunit(int level, bool insn, int cachetype,
59 struct aarch64_cache_info *cacheinfo)
60 {
61 struct aarch64_cache_unit *cunit;
62 uint64_t ccsidr, mmfr2;
63
64 /* select and extract level N data cache */
65 reg_csselr_el1_write(__SHIFTIN(level, CSSELR_LEVEL) |
66 __SHIFTIN(insn ? 1 : 0, CSSELR_IND));
67 isb();
68
69 ccsidr = reg_ccsidr_el1_read();
70 mmfr2 = reg_id_aa64mmfr2_el1_read();
71
72 if (insn)
73 cunit = &cacheinfo[level].icache;
74 else
75 cunit = &cacheinfo[level].dcache;
76
77 cunit->cache_type = cachetype;
78
79 switch (__SHIFTOUT(mmfr2, ID_AA64MMFR2_EL1_CCIDX)) {
80 case ID_AA64MMFR2_EL1_CCIDX_32BIT:
81 cunit->cache_line_size =
82 1 << (__SHIFTOUT(ccsidr, CCSIDR_LINESIZE) + 4);
83 cunit->cache_ways = __SHIFTOUT(ccsidr, CCSIDR_ASSOC) + 1;
84 cunit->cache_sets = __SHIFTOUT(ccsidr, CCSIDR_NUMSET) + 1;
85 break;
86 case ID_AA64MMFR2_EL1_CCIDX_64BIT:
87 cunit->cache_line_size =
88 1 << (__SHIFTOUT(ccsidr, CCSIDR64_LINESIZE) + 4);
89 cunit->cache_ways = __SHIFTOUT(ccsidr, CCSIDR64_ASSOC) + 1;
90 cunit->cache_sets = __SHIFTOUT(ccsidr, CCSIDR64_NUMSET) + 1;
91 break;
92 }
93
94 /* calc waysize and whole size */
95 cunit->cache_way_size = cunit->cache_line_size * cunit->cache_sets;
96 cunit->cache_size = cunit->cache_way_size * cunit->cache_ways;
97 }
98
99
100 /* Must be called on each processor */
101 void __noasan
aarch64_getcacheinfo(struct cpu_info * ci)102 aarch64_getcacheinfo(struct cpu_info *ci)
103 {
104 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo;
105 uint32_t clidr, ctr;
106 int level, cachetype;
107
108 /*
109 * CTR - Cache Type Register
110 */
111 ctr = reg_ctr_el0_read();
112 switch (__SHIFTOUT(ctr, CTR_EL0_L1IP_MASK)) {
113 case CTR_EL0_L1IP_VPIPT:
114 cachetype = CACHE_TYPE_VPIPT;
115 break;
116 case CTR_EL0_L1IP_AIVIVT:
117 cachetype = CACHE_TYPE_VIVT;
118 break;
119 case CTR_EL0_L1IP_VIPT:
120 cachetype = CACHE_TYPE_VIPT;
121 break;
122 case CTR_EL0_L1IP_PIPT:
123 cachetype = CACHE_TYPE_PIPT;
124 break;
125 }
126
127 /*
128 * CLIDR - Cache Level ID Register
129 * CSSELR - Cache Size Selection Register
130 * CCSIDR - CurrentCache Size ID Register (selected by CSSELR)
131 */
132
133 /* L1, L2, L3, ..., L8 cache */
134 for (level = 0, clidr = reg_clidr_el1_read();
135 level < MAX_CACHE_LEVEL; level++, clidr >>= 3) {
136
137 int cacheable;
138
139 switch (clidr & 7) {
140 case CLIDR_TYPE_NOCACHE:
141 cacheable = CACHE_CACHEABLE_NONE;
142 break;
143 case CLIDR_TYPE_ICACHE:
144 cacheable = CACHE_CACHEABLE_ICACHE;
145 extract_cacheunit(level, true, cachetype, cinfo);
146 break;
147 case CLIDR_TYPE_DCACHE:
148 cacheable = CACHE_CACHEABLE_DCACHE;
149 extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo);
150 break;
151 case CLIDR_TYPE_IDCACHE:
152 cacheable = CACHE_CACHEABLE_IDCACHE;
153 extract_cacheunit(level, true, cachetype, cinfo);
154 extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo);
155 break;
156 case CLIDR_TYPE_UNIFIEDCACHE:
157 cacheable = CACHE_CACHEABLE_UNIFIED;
158 extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo);
159 break;
160 default:
161 cacheable = CACHE_CACHEABLE_NONE;
162 break;
163 }
164
165 cinfo[level].cacheable = cacheable;
166 if (cacheable == CACHE_CACHEABLE_NONE) {
167 /* no more level */
168 break;
169 }
170
171 /*
172 * L1 insn cachetype is CTR_EL0:L1IP,
173 * all other cachetype is PIPT.
174 */
175 cachetype = CACHE_TYPE_PIPT;
176 }
177 }
178
179
180 void
aarch64_parsecacheinfo(struct cpu_info * ci)181 aarch64_parsecacheinfo(struct cpu_info *ci)
182 {
183 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo;
184 struct aarch64_sysctl_cpu_id *id = &ci->ci_id;
185 const uint32_t ctr = id->ac_ctr;
186 u_int vindexsize;
187
188 /* remember maximum alignment */
189 if (arm_dcache_maxline < __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE)) {
190 arm_dcache_maxline = __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE);
191 arm_dcache_align = sizeof(int) << arm_dcache_maxline;
192 arm_dcache_align_mask = arm_dcache_align - 1;
193 }
194
195 #ifdef MULTIPROCESSOR
196 if (coherency_unit < arm_dcache_align)
197 panic("coherency_unit %ld < %d; increase COHERENCY_UNIT",
198 coherency_unit, arm_dcache_align);
199 #endif
200
201 /* calculate L1 icache virtual index size */
202 if ((cinfo[0].icache.cache_type == CACHE_TYPE_VIVT ||
203 cinfo[0].icache.cache_type == CACHE_TYPE_VIPT) &&
204 (cinfo[0].cacheable == CACHE_CACHEABLE_ICACHE ||
205 cinfo[0].cacheable == CACHE_CACHEABLE_IDCACHE)) {
206
207 vindexsize =
208 cinfo[0].icache.cache_size /
209 cinfo[0].icache.cache_ways;
210
211 KASSERT(vindexsize != 0);
212 } else {
213 vindexsize = 0;
214 }
215
216 if (vindexsize > aarch64_cache_vindexsize) {
217 aarch64_cache_vindexsize = vindexsize;
218 aarch64_cache_prefer_mask = vindexsize - 1;
219
220 if (uvm.page_init_done)
221 uvm_page_recolor(vindexsize / PAGE_SIZE);
222 }
223 }
224
225 static int
prt_cache(device_t self,struct aarch64_cache_info * cinfo,int level)226 prt_cache(device_t self, struct aarch64_cache_info *cinfo, int level)
227 {
228 struct aarch64_cache_unit *cunit;
229 int i;
230 const char *cacheable, *cachetype;
231
232 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE)
233 return -1;
234
235 for (i = 0; i < 2; i++) {
236 switch (cinfo[level].cacheable) {
237 case CACHE_CACHEABLE_ICACHE:
238 cunit = &cinfo[level].icache;
239 cacheable = "Instruction";
240 break;
241 case CACHE_CACHEABLE_DCACHE:
242 cunit = &cinfo[level].dcache;
243 cacheable = "Data";
244 break;
245 case CACHE_CACHEABLE_IDCACHE:
246 if (i == 0) {
247 cunit = &cinfo[level].icache;
248 cacheable = "Instruction";
249 } else {
250 cunit = &cinfo[level].dcache;
251 cacheable = "Data";
252 }
253 break;
254 case CACHE_CACHEABLE_UNIFIED:
255 cunit = &cinfo[level].dcache;
256 cacheable = "Unified";
257 break;
258 default:
259 cunit = &cinfo[level].dcache;
260 cacheable = "*UNK*";
261 break;
262 }
263
264 switch (cunit->cache_type) {
265 case CACHE_TYPE_VPIPT:
266 cachetype = "VPIPT";
267 break;
268 case CACHE_TYPE_VIVT:
269 cachetype = "VIVT";
270 break;
271 case CACHE_TYPE_VIPT:
272 cachetype = "VIPT";
273 break;
274 case CACHE_TYPE_PIPT:
275 cachetype = "PIPT";
276 break;
277 default:
278 cachetype = "*UNK*";
279 break;
280 }
281
282 aprint_verbose_dev(self,
283 "L%d %uKB/%uB %u-way (%u set) %s %s cache\n",
284 level + 1,
285 cunit->cache_size / 1024,
286 cunit->cache_line_size,
287 cunit->cache_ways,
288 cunit->cache_sets,
289 cachetype, cacheable);
290
291 if (cinfo[level].cacheable != CACHE_CACHEABLE_IDCACHE)
292 break;
293 }
294
295 return 0;
296 }
297
298 void
aarch64_printcacheinfo(device_t dev,struct cpu_info * ci)299 aarch64_printcacheinfo(device_t dev, struct cpu_info *ci)
300 {
301 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo;
302 int level;
303
304 for (level = 0; level < MAX_CACHE_LEVEL; level++)
305 if (prt_cache(dev, cinfo, level) < 0)
306 break;
307 }
308
309
310
311 static inline void
ln_dcache_wb_all(int level,struct aarch64_cache_unit * cunit)312 ln_dcache_wb_all(int level, struct aarch64_cache_unit *cunit)
313 {
314 uint64_t x;
315 unsigned int set, way, setshift, wayshift;
316
317 setshift = ffs(cunit->cache_line_size) - 1;
318 wayshift = 32 - (ffs(cunit->cache_ways) - 1);
319
320 for (way = 0; way < cunit->cache_ways; way++) {
321 for (set = 0; set < cunit->cache_sets; set++) {
322 x = (way << wayshift) | (set << setshift) |
323 (level << 1);
324 __asm __volatile ("dc csw, %0; dsb sy" :: "r"(x));
325 }
326 }
327 }
328
329 static inline void
ln_dcache_wbinv_all(int level,struct aarch64_cache_unit * cunit)330 ln_dcache_wbinv_all(int level, struct aarch64_cache_unit *cunit)
331 {
332 uint64_t x;
333 unsigned int set, way, setshift, wayshift;
334
335 setshift = ffs(cunit->cache_line_size) - 1;
336 wayshift = 32 - (ffs(cunit->cache_ways) - 1);
337
338 for (way = 0; way < cunit->cache_ways; way++) {
339 for (set = 0; set < cunit->cache_sets; set++) {
340 x = (way << wayshift) | (set << setshift) |
341 (level << 1);
342 __asm __volatile ("dc cisw, %0; dsb sy" :: "r"(x));
343 }
344 }
345 }
346
347 static inline void
ln_dcache_inv_all(int level,struct aarch64_cache_unit * cunit)348 ln_dcache_inv_all(int level, struct aarch64_cache_unit *cunit)
349 {
350 uint64_t x;
351 unsigned int set, way, setshift, wayshift;
352
353 setshift = ffs(cunit->cache_line_size) - 1;
354 wayshift = 32 - (ffs(cunit->cache_ways) - 1);
355
356 for (way = 0; way < cunit->cache_ways; way++) {
357 for (set = 0; set < cunit->cache_sets; set++) {
358 x = (way << wayshift) | (set << setshift) |
359 (level << 1);
360 __asm __volatile ("dc isw, %0; dsb sy" :: "r"(x));
361 }
362 }
363 }
364
365 void
aarch64_dcache_wbinv_all(void)366 aarch64_dcache_wbinv_all(void)
367 {
368 KASSERT(kpreempt_disabled());
369
370 struct cpu_info * const ci = curcpu();
371 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo;
372 int level;
373
374 for (level = 0; level < MAX_CACHE_LEVEL; level++) {
375 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE)
376 break;
377
378 dsb(ish);
379 ln_dcache_wbinv_all(level, &cinfo[level].dcache);
380 }
381 dsb(ish);
382 }
383
384 void
aarch64_dcache_inv_all(void)385 aarch64_dcache_inv_all(void)
386 {
387 KASSERT(kpreempt_disabled());
388
389 struct cpu_info * const ci = curcpu();
390 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo;
391 int level;
392
393 for (level = 0; level < MAX_CACHE_LEVEL; level++) {
394 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE)
395 break;
396
397 dsb(ish);
398 ln_dcache_inv_all(level, &cinfo[level].dcache);
399 }
400 dsb(ish);
401 }
402
403 void
aarch64_dcache_wb_all(void)404 aarch64_dcache_wb_all(void)
405 {
406 KASSERT(kpreempt_disabled());
407
408 struct cpu_info * const ci = curcpu();
409 struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo;
410 int level;
411
412 for (level = 0; level < MAX_CACHE_LEVEL; level++) {
413 if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE)
414 break;
415
416 dsb(ish);
417 ln_dcache_wb_all(level, &cinfo[level].dcache);
418 }
419 dsb(ish);
420 }
421
422 int
set_cpufuncs(void)423 set_cpufuncs(void)
424 {
425 // This is only called from the BP
426
427 return aarch64_setcpufuncs(&cpu_info_store[0]);
428 }
429
430
431 int
aarch64_setcpufuncs(struct cpu_info * ci)432 aarch64_setcpufuncs(struct cpu_info *ci)
433 {
434 const uint64_t ctr = reg_ctr_el0_read();
435 const uint64_t clidr = reg_clidr_el1_read();
436
437 /* install default functions */
438 ci->ci_cpufuncs.cf_set_ttbr0 = aarch64_set_ttbr0;
439 ci->ci_cpufuncs.cf_icache_sync_range = aarch64_icache_sync_range;
440
441 /*
442 * install core/cluster specific functions
443 */
444
445 /* Icache sync op */
446 if (__SHIFTOUT(ctr, CTR_EL0_DIC) == 1) {
447 /* Icache invalidation to the PoU is not required */
448 ci->ci_cpufuncs.cf_icache_sync_range =
449 aarch64_icache_barrier_range;
450 } else if (__SHIFTOUT(ctr, CTR_EL0_IDC) == 1 ||
451 __SHIFTOUT(clidr, CLIDR_LOC) == 0 ||
452 (__SHIFTOUT(clidr, CLIDR_LOUIS) == 0 && __SHIFTOUT(clidr, CLIDR_LOUU) == 0)) {
453 /* Dcache clean to the PoU is not required for Icache */
454 ci->ci_cpufuncs.cf_icache_sync_range =
455 aarch64_icache_inv_range;
456 }
457
458 #ifdef CPU_THUNDERX
459 const uint32_t midr = reg_midr_el1_read();
460
461 /* Cavium erratum 27456 */
462 if ((midr == CPU_ID_THUNDERXP1d0) ||
463 (midr == CPU_ID_THUNDERXP1d1) ||
464 (midr == CPU_ID_THUNDERXP2d1) ||
465 (midr == CPU_ID_THUNDERX81XXRX)) {
466 ci->ci_cpufuncs.cf_set_ttbr0 = aarch64_set_ttbr0_thunderx;
467 }
468 #endif
469
470 return 0;
471 }
472
473 void
aarch64_hafdbs_init(int primary)474 aarch64_hafdbs_init(int primary)
475 {
476 #ifdef ARMV81_HAFDBS
477 uint64_t tcr;
478 int hafdbs;
479
480 hafdbs = __SHIFTOUT(reg_id_aa64mmfr1_el1_read(),
481 ID_AA64MMFR1_EL1_HAFDBS);
482
483 /*
484 * hafdbs
485 * 0:HAFDBS_NONE - no support for any hardware flags
486 * 1:HAFDBS_A - only hardware access flag supported
487 * 2:HAFDBS_AD - hardware access and modified flags supported.
488 */
489
490 if (primary) {
491 /* CPU0 does the detection. */
492 switch (hafdbs) {
493 case ID_AA64MMFR1_EL1_HAFDBS_NONE:
494 default:
495 aarch64_hafdbs_enabled = 0;
496 break;
497 case ID_AA64MMFR1_EL1_HAFDBS_A:
498 case ID_AA64MMFR1_EL1_HAFDBS_AD:
499 aarch64_hafdbs_enabled = hafdbs;
500 break;
501 }
502 } else {
503 /*
504 * The support status of HAFDBS on the primary CPU is different
505 * from that of the application processor.
506 *
507 * XXX:
508 * The correct way to do this is to disable it on all cores,
509 * or call pmap_fault_fixup() only on the unsupported cores,
510 * but for now, do panic().
511 */
512 if (aarch64_hafdbs_enabled != hafdbs)
513 panic("HAFDBS is supported (%d) on primary cpu, "
514 "but isn't equal (%d) on secondary cpu",
515 aarch64_hafdbs_enabled, hafdbs);
516 }
517
518 /* enable Hardware updates to Access flag and Dirty state */
519 tcr = reg_tcr_el1_read();
520 switch (hafdbs) {
521 case ID_AA64MMFR1_EL1_HAFDBS_NONE:
522 default:
523 break;
524 case ID_AA64MMFR1_EL1_HAFDBS_A:
525 /* enable only access */
526 reg_tcr_el1_write(tcr | TCR_HA);
527 isb();
528 break;
529 case ID_AA64MMFR1_EL1_HAFDBS_AD:
530 /* enable both access and dirty */
531 reg_tcr_el1_write(tcr | TCR_HD | TCR_HA);
532 isb();
533 break;
534 }
535 #endif
536 }
537
538 void
aarch64_pan_init(int primary)539 aarch64_pan_init(int primary)
540 {
541 #ifdef ARMV81_PAN
542 uint64_t reg, sctlr;
543
544 /* CPU0 does the detection. */
545 if (primary) {
546 reg = reg_id_aa64mmfr1_el1_read();
547 if (__SHIFTOUT(reg, ID_AA64MMFR1_EL1_PAN) !=
548 ID_AA64MMFR1_EL1_PAN_NONE)
549 aarch64_pan_enabled = 1;
550 }
551
552 if (!aarch64_pan_enabled)
553 return;
554
555 /*
556 * On an exception to EL1, have the CPU set the PAN bit automatically.
557 * This ensures PAN is enabled each time the kernel is entered.
558 */
559 sctlr = reg_sctlr_el1_read();
560 sctlr &= ~SCTLR_SPAN;
561 reg_sctlr_el1_write(sctlr);
562
563 /* Set the PAN bit right now. */
564 reg_pan_write(1);
565 #endif
566 }
567
568 /*
569 * In order to avoid inconsistencies with pointer authentication
570 * in this function itself, the caller must enable PAC according
571 * to the return value.
572 */
573 int
aarch64_pac_init(int primary)574 aarch64_pac_init(int primary)
575 {
576 #ifdef ARMV83_PAC
577 uint64_t reg;
578
579 /* CPU0 does the detection. */
580 if (primary) {
581 reg = reg_id_aa64isar1_el1_read();
582 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_APA) !=
583 ID_AA64ISAR1_EL1_APA_NONE)
584 aarch64_pac_enabled = 1;
585 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_API) !=
586 ID_AA64ISAR1_EL1_API_NONE)
587 aarch64_pac_enabled = 1;
588 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_GPA) !=
589 ID_AA64ISAR1_EL1_GPA_NONE)
590 aarch64_pac_enabled = 1;
591 if (__SHIFTOUT(reg, ID_AA64ISAR1_EL1_GPI) !=
592 ID_AA64ISAR1_EL1_GPI_NONE)
593 aarch64_pac_enabled = 1;
594 }
595
596 if (!aarch64_pac_enabled)
597 return -1;
598
599 /* Set the key. Curlwp here is the CPU's idlelwp. */
600 reg_APIAKeyLo_EL1_write(curlwp->l_md.md_ia_kern[0]);
601 reg_APIAKeyHi_EL1_write(curlwp->l_md.md_ia_kern[1]);
602
603 return 0;
604 #else
605 return -1;
606 #endif
607 }
608