1 /* $NetBSD: cache.c,v 1.102 2021/01/24 07:36:54 mrg Exp $ */
2
3 /*
4 * Copyright (c) 1996
5 * The President and Fellows of Harvard College. All rights reserved.
6 * Copyright (c) 1992, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * This software was developed by the Computer Systems Engineering group
10 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
11 * contributed to Berkeley.
12 *
13 * All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by Harvard University.
16 * This product includes software developed by the University of
17 * California, Lawrence Berkeley Laboratory.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 *
23 * 1. Redistributions of source code must retain the above copyright
24 * notice, this list of conditions and the following disclaimer.
25 * 2. Redistributions in binary form must reproduce the above copyright
26 * notice, this list of conditions and the following disclaimer in the
27 * documentation and/or other materials provided with the distribution.
28 * 3. All advertising materials mentioning features or use of this software
29 * must display the following acknowledgement:
30 * This product includes software developed by Aaron Brown and
31 * Harvard University.
32 * This product includes software developed by the University of
33 * California, Berkeley and its contributors.
34 * 4. Neither the name of the University nor the names of its contributors
35 * may be used to endorse or promote products derived from this software
36 * without specific prior written permission.
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 *
50 * @(#)cache.c 8.2 (Berkeley) 10/30/93
51 *
52 */
53
54 /*
55 * Cache routines.
56 *
57 * TODO:
58 * - rework range flush
59 */
60
61 #include <sys/cdefs.h>
62 __KERNEL_RCSID(0, "$NetBSD: cache.c,v 1.102 2021/01/24 07:36:54 mrg Exp $");
63
64 #include "opt_multiprocessor.h"
65 #include "opt_sparc_arch.h"
66
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/kernel.h>
70
71 #include <uvm/uvm_extern.h>
72
73 #include <machine/ctlreg.h>
74 #include <machine/pte.h>
75 #include <machine/locore.h>
76
77 #include <sparc/sparc/asm.h>
78 #include <sparc/sparc/cache.h>
79 #include <sparc/sparc/cpuvar.h>
80
81 struct evcnt vcache_flush_pg =
82 EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","pg");
83 EVCNT_ATTACH_STATIC(vcache_flush_pg);
84 struct evcnt vcache_flush_seg =
85 EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","seg");
86 EVCNT_ATTACH_STATIC(vcache_flush_seg);
87 struct evcnt vcache_flush_reg =
88 EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","reg");
89 EVCNT_ATTACH_STATIC(vcache_flush_reg);
90 struct evcnt vcache_flush_ctx =
91 EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","ctx");
92 EVCNT_ATTACH_STATIC(vcache_flush_ctx);
93 struct evcnt vcache_flush_range =
94 EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","rng");
95 EVCNT_ATTACH_STATIC(vcache_flush_range);
96
97 int cache_alias_dist; /* Cache anti-aliasing constants */
98 int cache_alias_bits;
99 u_long dvma_cachealign;
100
101 /*
102 * Enable the cache.
103 * We need to clear out the valid bits first.
104 */
105 void
sun4_cache_enable(void)106 sun4_cache_enable(void)
107 {
108 u_int i, lim, ls, ts;
109
110 cache_alias_bits = CPU_ISSUN4
111 ? CACHE_ALIAS_BITS_SUN4
112 : CACHE_ALIAS_BITS_SUN4C;
113 cache_alias_dist = CPU_ISSUN4
114 ? CACHE_ALIAS_DIST_SUN4
115 : CACHE_ALIAS_DIST_SUN4C;
116
117 ls = CACHEINFO.c_linesize;
118 ts = CACHEINFO.c_totalsize;
119
120 for (i = AC_CACHETAGS, lim = i + ts; i < lim; i += ls)
121 sta(i, ASI_CONTROL, 0);
122
123 stba(AC_SYSENABLE, ASI_CONTROL,
124 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_CACHE);
125 CACHEINFO.c_enabled = 1;
126
127 #ifdef notyet
128 if (cpuinfo.flags & SUN4_IOCACHE) {
129 stba(AC_SYSENABLE, ASI_CONTROL,
130 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_IOCACHE);
131 printf("iocache enabled\n");
132 }
133 #endif
134 }
135
136 /*
137 * XXX Hammer is a bit too big, here; SUN4D systems only have Viking.
138 */
139 #if defined(SUN4M) || defined(SUN4D)
140 void
ms1_cache_enable(void)141 ms1_cache_enable(void)
142 {
143 u_int pcr;
144
145 cache_alias_dist = uimax(
146 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
147 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
148 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
149
150 pcr = lda(SRMMU_PCR, ASI_SRMMU);
151
152 /* We "flash-clear" the I/D caches. */
153 if ((pcr & MS1_PCR_ICE) == 0)
154 sta(0, ASI_ICACHECLR, 0);
155 if ((pcr & MS1_PCR_DCE) == 0)
156 sta(0, ASI_DCACHECLR, 0);
157
158 /* Turn on caches */
159 sta(SRMMU_PCR, ASI_SRMMU, pcr | MS1_PCR_DCE | MS1_PCR_ICE);
160
161 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
162
163 /*
164 * When zeroing or copying pages, there might still be entries in
165 * the cache, since we don't flush pages from the cache when
166 * unmapping them (`vactype' is VAC_NONE). Fortunately, the
167 * MS1 cache is write-through and not write-allocate, so we can
168 * use cacheable access while not displacing cache lines.
169 */
170 CACHEINFO.c_flags |= CACHE_MANDATORY;
171 }
172
173 void
viking_cache_enable(void)174 viking_cache_enable(void)
175 {
176 u_int pcr;
177
178 pcr = lda(SRMMU_PCR, ASI_SRMMU);
179
180 if ((pcr & VIKING_PCR_ICE) == 0) {
181 /* I-cache not on; "flash-clear" it now. */
182 sta(0x80000000, ASI_ICACHECLR, 0); /* Unlock */
183 sta(0, ASI_ICACHECLR, 0); /* clear */
184 }
185 if ((pcr & VIKING_PCR_DCE) == 0) {
186 /* D-cache not on: "flash-clear" it. */
187 sta(0x80000000, ASI_DCACHECLR, 0);
188 sta(0, ASI_DCACHECLR, 0);
189 }
190
191 /* Turn on caches via MMU */
192 sta(SRMMU_PCR, ASI_SRMMU, pcr | VIKING_PCR_DCE | VIKING_PCR_ICE);
193
194 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
195
196 /* Now turn on MultiCache if it exists */
197 if (cpuinfo.mxcc && CACHEINFO.ec_totalsize > 0) {
198 /* Set external cache enable bit in MXCC control register */
199 stda(MXCC_CTRLREG, ASI_CONTROL,
200 ldda(MXCC_CTRLREG, ASI_CONTROL) | MXCC_CTRLREG_CE);
201 CACHEINFO.c_flags |= CACHE_PAGETABLES; /* Ok to cache PTEs */
202 CACHEINFO.ec_enabled = 1;
203 }
204 }
205
206 void
hypersparc_cache_enable(void)207 hypersparc_cache_enable(void)
208 {
209 int i, ls, ts;
210 u_int pcr, v;
211 int alias_dist;
212
213 /*
214 * Setup the anti-aliasing constants and DVMA alignment constraint.
215 */
216 alias_dist = CACHEINFO.c_totalsize;
217 if (alias_dist > cache_alias_dist) {
218 cache_alias_dist = alias_dist;
219 cache_alias_bits = (alias_dist - 1) & ~PGOFSET;
220 dvma_cachealign = cache_alias_dist;
221 }
222
223 ls = CACHEINFO.c_linesize;
224 ts = CACHEINFO.c_totalsize;
225 pcr = lda(SRMMU_PCR, ASI_SRMMU);
226
227 /* Now reset cache tag memory if cache not yet enabled */
228 if ((pcr & HYPERSPARC_PCR_CE) == 0)
229 for (i = 0; i < ts; i += ls)
230 sta(i, ASI_DCACHETAG, 0);
231
232 pcr &= ~(HYPERSPARC_PCR_CE | HYPERSPARC_PCR_CM);
233 hypersparc_cache_flush_all();
234
235 /* Enable write-back cache */
236 pcr |= HYPERSPARC_PCR_CE;
237 if (CACHEINFO.c_vactype == VAC_WRITEBACK)
238 pcr |= HYPERSPARC_PCR_CM;
239
240 sta(SRMMU_PCR, ASI_SRMMU, pcr);
241 CACHEINFO.c_enabled = 1;
242
243 /* XXX: should add support */
244 if (CACHEINFO.c_hwflush)
245 panic("cache_enable: can't handle 4M with hw-flush cache");
246
247 /*
248 * Enable instruction cache and, on single-processor machines,
249 * disable `Unimplemented Flush Traps'.
250 */
251 v = HYPERSPARC_ICCR_ICE | (sparc_ncpus <= 1 ? HYPERSPARC_ICCR_FTD : 0);
252 wrasr(v, HYPERSPARC_ASRNUM_ICCR);
253 }
254
255
256 void
swift_cache_enable(void)257 swift_cache_enable(void)
258 {
259 int i, ls, ts;
260 u_int pcr;
261
262 cache_alias_dist = uimax(
263 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
264 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
265 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
266
267 pcr = lda(SRMMU_PCR, ASI_SRMMU);
268
269 /* Now reset cache tag memory if cache not yet enabled */
270 ls = CACHEINFO.ic_linesize;
271 ts = CACHEINFO.ic_totalsize;
272 if ((pcr & SWIFT_PCR_ICE) == 0)
273 for (i = 0; i < ts; i += ls)
274 sta(i, ASI_ICACHETAG, 0);
275
276 ls = CACHEINFO.dc_linesize;
277 ts = CACHEINFO.dc_totalsize;
278 if ((pcr & SWIFT_PCR_DCE) == 0)
279 for (i = 0; i < ts; i += ls)
280 sta(i, ASI_DCACHETAG, 0);
281
282 pcr |= (SWIFT_PCR_ICE | SWIFT_PCR_DCE);
283 sta(SRMMU_PCR, ASI_SRMMU, pcr);
284 CACHEINFO.c_enabled = 1;
285 }
286
287 void
cypress_cache_enable(void)288 cypress_cache_enable(void)
289 {
290 int i, ls, ts;
291 u_int pcr;
292 int alias_dist;
293
294 alias_dist = CACHEINFO.c_totalsize;
295 if (alias_dist > cache_alias_dist) {
296 cache_alias_dist = alias_dist;
297 cache_alias_bits = (alias_dist - 1) & ~PGOFSET;
298 dvma_cachealign = alias_dist;
299 }
300
301 pcr = lda(SRMMU_PCR, ASI_SRMMU);
302 pcr &= ~CYPRESS_PCR_CM;
303
304 /* Now reset cache tag memory if cache not yet enabled */
305 ls = CACHEINFO.c_linesize;
306 ts = CACHEINFO.c_totalsize;
307 if ((pcr & CYPRESS_PCR_CE) == 0)
308 for (i = 0; i < ts; i += ls)
309 sta(i, ASI_DCACHETAG, 0);
310
311 pcr |= CYPRESS_PCR_CE;
312 /* If put in write-back mode, turn it on */
313 if (CACHEINFO.c_vactype == VAC_WRITEBACK)
314 pcr |= CYPRESS_PCR_CM;
315 sta(SRMMU_PCR, ASI_SRMMU, pcr);
316 CACHEINFO.c_enabled = 1;
317 }
318
319 void
turbosparc_cache_enable(void)320 turbosparc_cache_enable(void)
321 {
322 int i, ls, ts;
323 u_int pcr, pcf;
324 /* External cache sizes in KB; see Turbo sparc manual */
325 static const int ts_ecache_table[8] = {0,256,512,1024,512,1024,1024,0};
326
327 cache_alias_dist = uimax(
328 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
329 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
330 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
331
332 pcr = lda(SRMMU_PCR, ASI_SRMMU);
333
334 /* Now reset cache tag memory if cache not yet enabled */
335 ls = CACHEINFO.ic_linesize;
336 ts = CACHEINFO.ic_totalsize;
337 if ((pcr & TURBOSPARC_PCR_ICE) == 0)
338 for (i = 0; i < ts; i += ls)
339 sta(i, ASI_ICACHETAG, 0);
340
341 ls = CACHEINFO.dc_linesize;
342 ts = CACHEINFO.dc_totalsize;
343 if ((pcr & TURBOSPARC_PCR_DCE) == 0)
344 for (i = 0; i < ts; i += ls)
345 sta(i, ASI_DCACHETAG, 0);
346
347 pcr |= (TURBOSPARC_PCR_ICE | TURBOSPARC_PCR_DCE);
348 sta(SRMMU_PCR, ASI_SRMMU, pcr);
349
350 pcf = lda(SRMMU_PCFG, ASI_SRMMU);
351 if (pcf & TURBOSPARC_PCFG_SE) {
352 /*
353 * Record external cache info. The Turbosparc's second-
354 * level cache is physically addressed/tagged and is
355 * not exposed by the PROM.
356 */
357 CACHEINFO.ec_totalsize = 1024 *
358 ts_ecache_table[(pcf & TURBOSPARC_PCFG_SCC)];
359 CACHEINFO.ec_linesize = 32;
360 }
361 if (pcf & TURBOSPARC_PCFG_SNP)
362 printf(": DVMA coherent ");
363
364 CACHEINFO.c_enabled = 1;
365 }
366 #endif /* SUN4M || SUN4D */
367
368
369 /*
370 * Note: the sun4 & sun4c the cache flush functions ignore the `ctx'
371 * parameter. This can be done since the pmap operations that need
372 * to flush cache lines will already have switched to the proper
373 * context to manipulate the MMU. Hence we can avoid the overhead
374 * if saving and restoring the context here.
375 */
376
377 /*
378 * Flush the current context from the cache.
379 *
380 * This is done by writing to each cache line in the `flush context'
381 * address space (or, for hardware flush, once to each page in the
382 * hardware flush space, for all cache pages).
383 */
384 void
sun4_vcache_flush_context(int ctx)385 sun4_vcache_flush_context(int ctx)
386 {
387 char *p;
388 int i, ls;
389
390 vcache_flush_ctx.ev_count++;
391 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */
392 if (CACHEINFO.c_hwflush) {
393 ls = PAGE_SIZE;
394 i = CACHEINFO.c_totalsize >> PGSHIFT;
395 for (; --i >= 0; p += ls)
396 sta(p, ASI_HWFLUSHCTX, 0);
397 } else {
398 ls = CACHEINFO.c_linesize;
399 i = CACHEINFO.c_nlines;
400 for (; --i >= 0; p += ls)
401 sta(p, ASI_FLUSHCTX, 0);
402 }
403 }
404
405 /*
406 * Flush the given virtual region from the cache.
407 *
408 * This is also done by writing to each cache line, except that
409 * now the addresses must include the virtual region number, and
410 * we use the `flush region' space.
411 *
412 * This function is only called on sun4's with 3-level MMUs; there's
413 * no hw-flush space.
414 */
415 void
sun4_vcache_flush_region(int vreg,int ctx)416 sun4_vcache_flush_region(int vreg, int ctx)
417 {
418 int i, ls;
419 char *p;
420
421 vcache_flush_reg.ev_count++;
422 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */
423 ls = CACHEINFO.c_linesize;
424 i = CACHEINFO.c_nlines;
425 for (; --i >= 0; p += ls)
426 sta(p, ASI_FLUSHREG, 0);
427 }
428
429 /*
430 * Flush the given virtual segment from the cache.
431 *
432 * This is also done by writing to each cache line, except that
433 * now the addresses must include the virtual segment number, and
434 * we use the `flush segment' space.
435 *
436 * Again, for hardware, we just write each page (in hw-flush space).
437 */
438 void
sun4_vcache_flush_segment(int vreg,int vseg,int ctx)439 sun4_vcache_flush_segment(int vreg, int vseg, int ctx)
440 {
441 int i, ls;
442 char *p;
443
444 vcache_flush_seg.ev_count++;
445 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */
446 if (CACHEINFO.c_hwflush) {
447 ls = PAGE_SIZE;
448 i = CACHEINFO.c_totalsize >> PGSHIFT;
449 for (; --i >= 0; p += ls)
450 sta(p, ASI_HWFLUSHSEG, 0);
451 } else {
452 ls = CACHEINFO.c_linesize;
453 i = CACHEINFO.c_nlines;
454 for (; --i >= 0; p += ls)
455 sta(p, ASI_FLUSHSEG, 0);
456 }
457 }
458
459 /*
460 * Flush the given virtual page from the cache.
461 * (va is the actual address, and must be aligned on a page boundary.)
462 * Again we write to each cache line.
463 */
464 void
sun4_vcache_flush_page(int va,int ctx)465 sun4_vcache_flush_page(int va, int ctx)
466 {
467 int i, ls;
468 char *p;
469
470 #ifdef DEBUG
471 if (va & PGOFSET)
472 panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
473 #endif
474
475 vcache_flush_pg.ev_count++;
476 p = (char *)va;
477 ls = CACHEINFO.c_linesize;
478 i = PAGE_SIZE >> CACHEINFO.c_l2linesize;
479 for (; --i >= 0; p += ls)
480 sta(p, ASI_FLUSHPG, 0);
481 }
482
483 /*
484 * Flush the given virtual page from the cache.
485 * (va is the actual address, and must be aligned on a page boundary.)
486 * This version uses hardware-assisted flush operation and just needs
487 * one write into ASI_HWFLUSHPG space to flush all cache lines.
488 */
489 void
sun4_vcache_flush_page_hw(int va,int ctx)490 sun4_vcache_flush_page_hw(int va, int ctx)
491 {
492 char *p;
493
494 #ifdef DEBUG
495 if (va & PGOFSET)
496 panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
497 #endif
498
499 vcache_flush_pg.ev_count++;
500 p = (char *)va;
501 sta(p, ASI_HWFLUSHPG, 0);
502 }
503
504 /*
505 * Flush a range of virtual addresses (in the current context).
506 * The first byte is at (base&~PGOFSET) and the last one is just
507 * before byte (base+len).
508 *
509 * We choose the best of (context,segment,page) here.
510 */
511
512 #define CACHE_FLUSH_MAGIC (CACHEINFO.c_totalsize / PAGE_SIZE)
513
514 void
sun4_cache_flush(void * base,u_int len)515 sun4_cache_flush(void *base, u_int len)
516 {
517 int i, ls, baseoff;
518 char *p;
519
520 if (CACHEINFO.c_vactype == VAC_NONE)
521 return;
522
523 /*
524 * Figure out how much must be flushed.
525 *
526 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment
527 * in the same number of loop iterations. We can also do the whole
528 * region. If we need to do between 2 and NSEGRG, do the region.
529 * If we need to do two or more regions, just go ahead and do the
530 * whole context. This might not be ideal (e.g., fsck likes to do
531 * 65536-byte reads, which might not necessarily be aligned).
532 *
533 * We could try to be sneaky here and use the direct mapping
534 * to avoid flushing things `below' the start and `above' the
535 * ending address (rather than rounding to whole pages and
536 * segments), but I did not want to debug that now and it is
537 * not clear it would help much.
538 *
539 * (XXX the magic number 16 is now wrong, must review policy)
540 */
541 baseoff = (int)base & PGOFSET;
542 i = (baseoff + len + PGOFSET) >> PGSHIFT;
543
544 vcache_flush_range.ev_count++;
545
546 if (__predict_true(i < CACHE_FLUSH_MAGIC)) {
547 /* cache_flush_page, for i pages */
548 p = (char *)((int)base & ~baseoff);
549 if (CACHEINFO.c_hwflush) {
550 for (; --i >= 0; p += PAGE_SIZE)
551 sta(p, ASI_HWFLUSHPG, 0);
552 } else {
553 ls = CACHEINFO.c_linesize;
554 i <<= PGSHIFT - CACHEINFO.c_l2linesize;
555 for (; --i >= 0; p += ls)
556 sta(p, ASI_FLUSHPG, 0);
557 }
558 return;
559 }
560
561 baseoff = (u_int)base & SGOFSET;
562 i = (baseoff + len + SGOFSET) >> SGSHIFT;
563 if (__predict_true(i == 1)) {
564 sun4_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), 0);
565 return;
566 }
567
568 if (HASSUN4_MMU3L) {
569 baseoff = (u_int)base & RGOFSET;
570 i = (baseoff + len + RGOFSET) >> RGSHIFT;
571 if (i == 1)
572 sun4_vcache_flush_region(VA_VREG(base), 0);
573 else
574 sun4_vcache_flush_context(0);
575 } else
576 sun4_vcache_flush_context(0);
577 }
578
579
580 #if defined(SUN4M) || defined(SUN4D)
581 #define trapoff() do { setpsr(getpsr() & ~PSR_ET); } while(0)
582 #define trapon() do { setpsr(getpsr() | PSR_ET); } while(0)
583 /*
584 * Flush the current context from the cache.
585 *
586 * This is done by writing to each cache line in the `flush context'
587 * address space.
588 */
589 void
srmmu_vcache_flush_context(int ctx)590 srmmu_vcache_flush_context(int ctx)
591 {
592 int i, ls, octx;
593 char *p;
594
595 vcache_flush_ctx.ev_count++;
596 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */
597 ls = CACHEINFO.c_linesize;
598 i = CACHEINFO.c_nlines;
599 octx = getcontext4m();
600 trapoff();
601 setcontext4m(ctx);
602 for (; --i >= 0; p += ls)
603 sta(p, ASI_IDCACHELFC, 0);
604 setcontext4m(octx);
605 trapon();
606 }
607
608 /*
609 * Flush the given virtual region from the cache.
610 *
611 * This is also done by writing to each cache line, except that
612 * now the addresses must include the virtual region number, and
613 * we use the `flush region' space.
614 */
615 void
srmmu_vcache_flush_region(int vreg,int ctx)616 srmmu_vcache_flush_region(int vreg, int ctx)
617 {
618 int i, ls, octx;
619 char *p;
620
621 vcache_flush_reg.ev_count++;
622 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */
623 ls = CACHEINFO.c_linesize;
624 i = CACHEINFO.c_nlines;
625 octx = getcontext4m();
626 trapoff();
627 setcontext4m(ctx);
628 for (; --i >= 0; p += ls)
629 sta(p, ASI_IDCACHELFR, 0);
630 setcontext4m(octx);
631 trapon();
632 }
633
634 /*
635 * Flush the given virtual segment from the cache.
636 *
637 * This is also done by writing to each cache line, except that
638 * now the addresses must include the virtual segment number, and
639 * we use the `flush segment' space.
640 *
641 * Again, for hardware, we just write each page (in hw-flush space).
642 */
643 void
srmmu_vcache_flush_segment(int vreg,int vseg,int ctx)644 srmmu_vcache_flush_segment(int vreg, int vseg, int ctx)
645 {
646 int i, ls, octx;
647 char *p;
648
649 vcache_flush_seg.ev_count++;
650 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */
651 ls = CACHEINFO.c_linesize;
652 i = CACHEINFO.c_nlines;
653 octx = getcontext4m();
654 trapoff();
655 setcontext4m(ctx);
656 for (; --i >= 0; p += ls)
657 sta(p, ASI_IDCACHELFS, 0);
658 setcontext4m(octx);
659 trapon();
660 }
661
662 /*
663 * Flush the given virtual page from the cache.
664 * (va is the actual address, and must be aligned on a page boundary.)
665 * Again we write to each cache line.
666 */
667 void
srmmu_vcache_flush_page(int va,int ctx)668 srmmu_vcache_flush_page(int va, int ctx)
669 {
670 int i, ls, octx;
671 char *p;
672
673 #ifdef DEBUG
674 if (va & PGOFSET)
675 panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
676 #endif
677
678 vcache_flush_pg.ev_count++;
679 p = (char *)va;
680
681 /*
682 * XXX - if called early during bootstrap, we don't have the cache
683 * info yet. Make up a cache line size (double-word aligned)
684 */
685 if ((ls = CACHEINFO.c_linesize) == 0)
686 ls = 8;
687 i = PAGE_SIZE;
688 octx = getcontext4m();
689 trapoff();
690 setcontext4m(ctx);
691 for (; i > 0; p += ls, i -= ls)
692 sta(p, ASI_IDCACHELFP, 0);
693 #if defined(MULTIPROCESSOR)
694 /*
695 * The page flush operation will have caused a MMU table walk
696 * on Hypersparc because the is physically tagged. Since the pmap
697 * functions will not always cross flush it in the MP case (because
698 * may not be active on this CPU) we flush the TLB entry now.
699 */
700 /*if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) -- more work than it's worth */
701 sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0);
702
703 #endif
704 setcontext4m(octx);
705 trapon();
706 }
707
708 /*
709 * Flush entire cache.
710 */
711 void
srmmu_cache_flush_all(void)712 srmmu_cache_flush_all(void)
713 {
714
715 srmmu_vcache_flush_context(0);
716 }
717
718 void
srmmu_vcache_flush_range(int va,int len,int ctx)719 srmmu_vcache_flush_range(int va, int len, int ctx)
720 {
721 int i, ls, offset;
722 char *p;
723 int octx;
724
725 /*
726 * XXX - if called early during bootstrap, we don't have the cache
727 * info yet. Make up a cache line size (double-word aligned)
728 */
729 if ((ls = CACHEINFO.c_linesize) == 0)
730 ls = 8;
731
732 vcache_flush_range.ev_count++;
733
734 /* Compute # of cache lines covered by this range */
735 offset = va & (ls - 1);
736 i = len + offset;
737 p = (char *)(va & ~(ls - 1));
738
739 octx = getcontext4m();
740 trapoff();
741 setcontext4m(ctx);
742 for (; i > 0; p += ls, i -= ls)
743 sta(p, ASI_IDCACHELFP, 0);
744
745 #if defined(MULTIPROCESSOR)
746 if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) {
747 /*
748 * See hypersparc comment in srmmu_vcache_flush_page().
749 */
750 offset = va & PGOFSET;
751 i = (offset + len + PGOFSET) >> PGSHIFT;
752
753 va = va & ~PGOFSET;
754 for (; --i >= 0; va += PAGE_SIZE)
755 sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0);
756 }
757 #endif
758 setcontext4m(octx);
759 trapon();
760 return;
761 }
762
763 /*
764 * Flush a range of virtual addresses (in the current context).
765 *
766 * We choose the best of (context,segment,page) here.
767 */
768
769 void
srmmu_cache_flush(void * base,u_int len)770 srmmu_cache_flush(void *base, u_int len)
771 {
772 int ctx = getcontext4m();
773 int i, baseoff;
774
775
776 /*
777 * Figure out the most efficient way to flush.
778 *
779 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment
780 * in the same number of loop iterations. We can also do the whole
781 * region. If we need to do between 2 and NSEGRG, do the region.
782 * If we need to do two or more regions, just go ahead and do the
783 * whole context. This might not be ideal (e.g., fsck likes to do
784 * 65536-byte reads, which might not necessarily be aligned).
785 *
786 * We could try to be sneaky here and use the direct mapping
787 * to avoid flushing things `below' the start and `above' the
788 * ending address (rather than rounding to whole pages and
789 * segments), but I did not want to debug that now and it is
790 * not clear it would help much.
791 *
792 */
793
794 if (__predict_true(len < CACHEINFO.c_totalsize)) {
795 #if defined(MULTIPROCESSOR)
796 FXCALL3(cpuinfo.sp_vcache_flush_range,
797 cpuinfo.ft_vcache_flush_range,
798 (int)base, len, ctx, CPUSET_ALL);
799 #else
800 cpuinfo.sp_vcache_flush_range((int)base, len, ctx);
801 #endif
802 return;
803 }
804
805 baseoff = (u_int)base & SGOFSET;
806 i = (baseoff + len + SGOFSET) >> SGSHIFT;
807 if (__predict_true(i == 1)) {
808 #if defined(MULTIPROCESSOR)
809 FXCALL3(cpuinfo.sp_vcache_flush_segment,
810 cpuinfo.ft_vcache_flush_segment,
811 VA_VREG(base), VA_VSEG(base), ctx, CPUSET_ALL);
812 #else
813 srmmu_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), ctx);
814 #endif
815 return;
816 }
817
818 baseoff = (u_int)base & RGOFSET;
819 i = (baseoff + len + RGOFSET) >> RGSHIFT;
820 while (i--) {
821 #if defined(MULTIPROCESSOR)
822 FXCALL2(cpuinfo.sp_vcache_flush_region,
823 cpuinfo.ft_vcache_flush_region,
824 VA_VREG(base), ctx, CPUSET_ALL);
825 #else
826 srmmu_vcache_flush_region(VA_VREG(base), ctx);
827 #endif
828 base = ((char *)base + NBPRG);
829 }
830 }
831
832 int ms1_cacheflush_magic = 0;
833 #define MS1_CACHEFLUSH_MAGIC ms1_cacheflush_magic
834
835 void
ms1_cache_flush(void * base,u_int len)836 ms1_cache_flush(void *base, u_int len)
837 {
838
839 /*
840 * Although physically tagged, we still need to flush the
841 * data cache after (if we have a write-through cache) or before
842 * (in case of write-back caches) DMA operations.
843 */
844
845 #if MS1_CACHEFLUSH_MAGIC
846 if (len <= MS1_CACHEFLUSH_MAGIC) {
847 /*
848 * If the range to be flushed is sufficiently small
849 * invalidate the covered cache lines by hand.
850 *
851 * The MicroSPARC I has a direct-mapped virtually addressed
852 * physically tagged data cache which is organised as
853 * 128 lines of 16 bytes. Virtual address bits [4-10]
854 * select the cache line. The cache tags are accessed
855 * through the standard DCACHE control space using the
856 * same address bits as those used to select the cache
857 * line in the virtual address.
858 *
859 * Note: we don't bother to compare the actual tags
860 * since that would require looking up physical addresses.
861 *
862 * The format of the tags we read from ASI_DCACHE control
863 * space is:
864 *
865 * 31 27 26 11 10 1 0
866 * +--------+----------------+------------+-+
867 * | xxx | PA[26-11] | xxx |V|
868 * +--------+----------------+------------+-+
869 *
870 * PA: bits 11-26 of the physical address
871 * V: line valid bit
872 */
873 int tagaddr = ((u_int)base & 0x7f0);
874
875 len = roundup(len, 16);
876 while (len != 0) {
877 int tag = lda(tagaddr, ASI_DCACHETAG);
878 if ((tag & 1) == 1) {
879 /* Mark this cache line invalid */
880 sta(tagaddr, ASI_DCACHETAG, 0);
881 }
882 len -= 16;
883 tagaddr = (tagaddr + 16) & 0x7f0;
884 }
885 } else
886 #endif
887 /* Flush entire data cache */
888 sta(0, ASI_DCACHECLR, 0);
889 }
890
891
892 /*
893 * Flush entire cache.
894 */
895 void
ms1_cache_flush_all(void)896 ms1_cache_flush_all(void)
897 {
898
899 /* Flash-clear both caches */
900 sta(0, ASI_ICACHECLR, 0);
901 sta(0, ASI_DCACHECLR, 0);
902 }
903
904 void
hypersparc_cache_flush_all(void)905 hypersparc_cache_flush_all(void)
906 {
907
908 srmmu_vcache_flush_context(getcontext4m());
909 /* Flush instruction cache */
910 hypersparc_pure_vcache_flush();
911 }
912
913 void
cypress_cache_flush_all(void)914 cypress_cache_flush_all(void)
915 {
916 char *p;
917 int i, ls;
918
919 /* Fill the cache with known read-only content */
920 p = (char *)kernel_text;
921 ls = CACHEINFO.c_linesize;
922 i = CACHEINFO.c_nlines;
923 for (; --i >= 0; p += ls)
924 (*(volatile char *)p);
925 }
926
927
928 void
viking_cache_flush(void * base,u_int len)929 viking_cache_flush(void *base, u_int len)
930 {
931 }
932
933 void
viking_pcache_flush_page(paddr_t pa,int invalidate_only)934 viking_pcache_flush_page(paddr_t pa, int invalidate_only)
935 {
936 int set, i;
937
938 /*
939 * The viking's on-chip data cache is 4-way set associative,
940 * consisting of 128 sets, each holding 4 lines of 32 bytes.
941 * Note that one 4096 byte page exactly covers all 128 sets
942 * in the cache.
943 */
944 if (invalidate_only) {
945 u_int pa_tag = (pa >> 12);
946 u_int tagaddr;
947 uint64_t tag;
948
949 /*
950 * Loop over all sets and invalidate all entries tagged
951 * with the given physical address by resetting the cache
952 * tag in ASI_DCACHETAG control space.
953 *
954 * The address format for accessing a tag is:
955 *
956 * 31 30 27 26 11 5 4 3 2 0
957 * +------+-----+------+-------//--------+--------+----+-----+
958 * | type | xxx | line | xxx | set | xx | 0 |
959 * +------+-----+------+-------//--------+--------+----+-----+
960 *
961 * set: the cache set tag to be read (0-127)
962 * line: the line within the set (0-3)
963 * type: 1: read set tag; 2: read physical tag
964 *
965 * The (type 2) tag read from this address is a 64-bit word
966 * formatted as follows:
967 *
968 * 5 4 4
969 * 63 6 8 0 23 0
970 * +-------+-+-------+-+-------+-+-----------+----------------+
971 * | xxx |V| xxx |D| xxx |S| xxx | PA[35-12] |
972 * +-------+-+-------+-+-------+-+-----------+----------------+
973 *
974 * PA: bits 12-35 of the physical address
975 * S: line shared bit (not present on SuperSPARC-II)
976 * D: line dirty bit (not present on SuperSPARC-II)
977 * V: line valid bit
978 */
979
980 #define VIKING_DCACHETAG_S 0x0000010000000000ULL /* line valid bit */
981 #define VIKING_DCACHETAG_D 0x0001000000000000ULL /* line dirty bit */
982 #define VIKING_DCACHETAG_V 0x0100000000000000ULL /* line shared bit */
983 #define VIKING_DCACHETAG_PAMASK 0x0000000000ffffffULL /* PA tag field */
984
985 for (set = 0; set < 128; set++) {
986 /* Set set number and access type */
987 tagaddr = (set << 5) | (2 << 30);
988
989 /* Examine the tag for each line in the set */
990 for (i = 0 ; i < 4; i++) {
991 tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG);
992 /*
993 * If this is a valid tag and the PA field
994 * matches clear the tag.
995 */
996 if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag &&
997 (tag & VIKING_DCACHETAG_V) != 0)
998 stda(tagaddr | (i << 26),
999 ASI_DCACHETAG, 0);
1000 }
1001 }
1002
1003 } else {
1004 /*
1005 * Force the cache to validate its backing memory
1006 * by displacing all cache lines with known read-only
1007 * content from the start of kernel text.
1008 *
1009 * Note that this thrashes the entire cache. However,
1010 * we currently only need to call upon this code
1011 * once at boot time.
1012 */
1013 for (set = 0; set < 128; set++) {
1014 int *v = (int *)(kernel_text + (set << 5));
1015
1016 /*
1017 * We need to read (2*associativity-1) different
1018 * locations to be sure to displace the entire set.
1019 */
1020 i = 2 * 4 - 1;
1021 while (i--) {
1022 (*(volatile int *)v);
1023 v += 4096;
1024 }
1025 }
1026 }
1027 }
1028 #endif /* SUN4M || SUN4D */
1029
1030
1031 #if defined(MULTIPROCESSOR)
1032 /*
1033 * Cache flushing on multi-processor systems involves sending
1034 * inter-processor messages to flush the cache on each module.
1035 *
1036 * The current context of the originating processor is passed in the
1037 * message. This assumes the allocation of CPU contextses is a global
1038 * operation (remember that the actual context tables for the CPUs
1039 * are distinct).
1040 */
1041
1042 void
smp_vcache_flush_page(int va,int ctx)1043 smp_vcache_flush_page(int va, int ctx)
1044 {
1045
1046 FXCALL2(cpuinfo.sp_vcache_flush_page, cpuinfo.ft_vcache_flush_page,
1047 va, ctx, CPUSET_ALL);
1048 }
1049
1050 void
smp_vcache_flush_segment(int vr,int vs,int ctx)1051 smp_vcache_flush_segment(int vr, int vs, int ctx)
1052 {
1053
1054 FXCALL3(cpuinfo.sp_vcache_flush_segment, cpuinfo.ft_vcache_flush_segment,
1055 vr, vs, ctx, CPUSET_ALL);
1056 }
1057
1058 void
smp_vcache_flush_region(int vr,int ctx)1059 smp_vcache_flush_region(int vr, int ctx)
1060 {
1061
1062 FXCALL2(cpuinfo.sp_vcache_flush_region, cpuinfo.ft_vcache_flush_region,
1063 vr, ctx, CPUSET_ALL);
1064 }
1065
1066 void
smp_vcache_flush_context(int ctx)1067 smp_vcache_flush_context(int ctx)
1068 {
1069
1070 FXCALL1(cpuinfo.sp_vcache_flush_context, cpuinfo.ft_vcache_flush_context,
1071 ctx, CPUSET_ALL);
1072 }
1073 #endif /* MULTIPROCESSOR */
1074