xref: /netbsd-src/sys/arch/sparc/sparc/cache.c (revision 5eadbc3ac174cf47c88f8d339df4a13c7a9c5e96)
1 /*	$NetBSD: cache.c,v 1.102 2021/01/24 07:36:54 mrg Exp $ */
2 
3 /*
4  * Copyright (c) 1996
5  *	The President and Fellows of Harvard College. All rights reserved.
6  * Copyright (c) 1992, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This software was developed by the Computer Systems Engineering group
10  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
11  * contributed to Berkeley.
12  *
13  * All advertising materials mentioning features or use of this software
14  * must display the following acknowledgement:
15  *	This product includes software developed by Harvard University.
16  *	This product includes software developed by the University of
17  *	California, Lawrence Berkeley Laboratory.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions
21  * are met:
22  *
23  * 1. Redistributions of source code must retain the above copyright
24  *    notice, this list of conditions and the following disclaimer.
25  * 2. Redistributions in binary form must reproduce the above copyright
26  *    notice, this list of conditions and the following disclaimer in the
27  *    documentation and/or other materials provided with the distribution.
28  * 3. All advertising materials mentioning features or use of this software
29  *    must display the following acknowledgement:
30  *	This product includes software developed by Aaron Brown and
31  *	Harvard University.
32  *	This product includes software developed by the University of
33  *	California, Berkeley and its contributors.
34  * 4. Neither the name of the University nor the names of its contributors
35  *    may be used to endorse or promote products derived from this software
36  *    without specific prior written permission.
37  *
38  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  *
50  *	@(#)cache.c	8.2 (Berkeley) 10/30/93
51  *
52  */
53 
54 /*
55  * Cache routines.
56  *
57  * TODO:
58  *	- rework range flush
59  */
60 
61 #include <sys/cdefs.h>
62 __KERNEL_RCSID(0, "$NetBSD: cache.c,v 1.102 2021/01/24 07:36:54 mrg Exp $");
63 
64 #include "opt_multiprocessor.h"
65 #include "opt_sparc_arch.h"
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/kernel.h>
70 
71 #include <uvm/uvm_extern.h>
72 
73 #include <machine/ctlreg.h>
74 #include <machine/pte.h>
75 #include <machine/locore.h>
76 
77 #include <sparc/sparc/asm.h>
78 #include <sparc/sparc/cache.h>
79 #include <sparc/sparc/cpuvar.h>
80 
81 struct evcnt vcache_flush_pg =
82 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","pg");
83 EVCNT_ATTACH_STATIC(vcache_flush_pg);
84 struct evcnt vcache_flush_seg =
85 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","seg");
86 EVCNT_ATTACH_STATIC(vcache_flush_seg);
87 struct evcnt vcache_flush_reg =
88 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","reg");
89 EVCNT_ATTACH_STATIC(vcache_flush_reg);
90 struct evcnt vcache_flush_ctx =
91 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","ctx");
92 EVCNT_ATTACH_STATIC(vcache_flush_ctx);
93 struct evcnt vcache_flush_range =
94 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC,0,"vcfl","rng");
95 EVCNT_ATTACH_STATIC(vcache_flush_range);
96 
97 int cache_alias_dist;		/* Cache anti-aliasing constants */
98 int cache_alias_bits;
99 u_long dvma_cachealign;
100 
101 /*
102  * Enable the cache.
103  * We need to clear out the valid bits first.
104  */
105 void
sun4_cache_enable(void)106 sun4_cache_enable(void)
107 {
108 	u_int i, lim, ls, ts;
109 
110 	cache_alias_bits = CPU_ISSUN4
111 				? CACHE_ALIAS_BITS_SUN4
112 				: CACHE_ALIAS_BITS_SUN4C;
113 	cache_alias_dist = CPU_ISSUN4
114 				? CACHE_ALIAS_DIST_SUN4
115 				: CACHE_ALIAS_DIST_SUN4C;
116 
117 	ls = CACHEINFO.c_linesize;
118 	ts = CACHEINFO.c_totalsize;
119 
120 	for (i = AC_CACHETAGS, lim = i + ts; i < lim; i += ls)
121 		sta(i, ASI_CONTROL, 0);
122 
123 	stba(AC_SYSENABLE, ASI_CONTROL,
124 	     lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_CACHE);
125 	CACHEINFO.c_enabled = 1;
126 
127 #ifdef notyet
128 	if (cpuinfo.flags & SUN4_IOCACHE) {
129 		stba(AC_SYSENABLE, ASI_CONTROL,
130 		     lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_IOCACHE);
131 		printf("iocache enabled\n");
132 	}
133 #endif
134 }
135 
136 /*
137  * XXX Hammer is a bit too big, here; SUN4D systems only have Viking.
138  */
139 #if defined(SUN4M) || defined(SUN4D)
140 void
ms1_cache_enable(void)141 ms1_cache_enable(void)
142 {
143 	u_int pcr;
144 
145 	cache_alias_dist = uimax(
146 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
147 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
148 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
149 
150 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
151 
152 	/* We "flash-clear" the I/D caches. */
153 	if ((pcr & MS1_PCR_ICE) == 0)
154 		sta(0, ASI_ICACHECLR, 0);
155 	if ((pcr & MS1_PCR_DCE) == 0)
156 		sta(0, ASI_DCACHECLR, 0);
157 
158 	/* Turn on caches */
159 	sta(SRMMU_PCR, ASI_SRMMU, pcr | MS1_PCR_DCE | MS1_PCR_ICE);
160 
161 	CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
162 
163 	/*
164 	 * When zeroing or copying pages, there might still be entries in
165 	 * the cache, since we don't flush pages from the cache when
166 	 * unmapping them (`vactype' is VAC_NONE).  Fortunately, the
167 	 * MS1 cache is write-through and not write-allocate, so we can
168 	 * use cacheable access while not displacing cache lines.
169 	 */
170 	CACHEINFO.c_flags |= CACHE_MANDATORY;
171 }
172 
173 void
viking_cache_enable(void)174 viking_cache_enable(void)
175 {
176 	u_int pcr;
177 
178 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
179 
180 	if ((pcr & VIKING_PCR_ICE) == 0) {
181 		/* I-cache not on; "flash-clear" it now. */
182 		sta(0x80000000, ASI_ICACHECLR, 0);	/* Unlock */
183 		sta(0, ASI_ICACHECLR, 0);		/* clear */
184 	}
185 	if ((pcr & VIKING_PCR_DCE) == 0) {
186 		/* D-cache not on: "flash-clear" it. */
187 		sta(0x80000000, ASI_DCACHECLR, 0);
188 		sta(0, ASI_DCACHECLR, 0);
189 	}
190 
191 	/* Turn on caches via MMU */
192 	sta(SRMMU_PCR, ASI_SRMMU, pcr | VIKING_PCR_DCE | VIKING_PCR_ICE);
193 
194 	CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
195 
196 	/* Now turn on MultiCache if it exists */
197 	if (cpuinfo.mxcc && CACHEINFO.ec_totalsize > 0) {
198 		/* Set external cache enable bit in MXCC control register */
199 		stda(MXCC_CTRLREG, ASI_CONTROL,
200 		     ldda(MXCC_CTRLREG, ASI_CONTROL) | MXCC_CTRLREG_CE);
201 		CACHEINFO.c_flags |= CACHE_PAGETABLES; /* Ok to cache PTEs */
202 		CACHEINFO.ec_enabled = 1;
203 	}
204 }
205 
206 void
hypersparc_cache_enable(void)207 hypersparc_cache_enable(void)
208 {
209 	int i, ls, ts;
210 	u_int pcr, v;
211 	int alias_dist;
212 
213 	/*
214 	 * Setup the anti-aliasing constants and DVMA alignment constraint.
215 	 */
216 	alias_dist = CACHEINFO.c_totalsize;
217 	if (alias_dist > cache_alias_dist) {
218 		cache_alias_dist = alias_dist;
219 		cache_alias_bits = (alias_dist - 1) & ~PGOFSET;
220 		dvma_cachealign = cache_alias_dist;
221 	}
222 
223 	ls = CACHEINFO.c_linesize;
224 	ts = CACHEINFO.c_totalsize;
225 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
226 
227 	/* Now reset cache tag memory if cache not yet enabled */
228 	if ((pcr & HYPERSPARC_PCR_CE) == 0)
229 		for (i = 0; i < ts; i += ls)
230 			sta(i, ASI_DCACHETAG, 0);
231 
232 	pcr &= ~(HYPERSPARC_PCR_CE | HYPERSPARC_PCR_CM);
233 	hypersparc_cache_flush_all();
234 
235 	/* Enable write-back cache */
236 	pcr |= HYPERSPARC_PCR_CE;
237 	if (CACHEINFO.c_vactype == VAC_WRITEBACK)
238 		pcr |= HYPERSPARC_PCR_CM;
239 
240 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
241 	CACHEINFO.c_enabled = 1;
242 
243 	/* XXX: should add support */
244 	if (CACHEINFO.c_hwflush)
245 		panic("cache_enable: can't handle 4M with hw-flush cache");
246 
247 	/*
248 	 * Enable instruction cache and, on single-processor machines,
249 	 * disable `Unimplemented Flush Traps'.
250 	 */
251 	v = HYPERSPARC_ICCR_ICE | (sparc_ncpus <= 1 ? HYPERSPARC_ICCR_FTD : 0);
252 	wrasr(v, HYPERSPARC_ASRNUM_ICCR);
253 }
254 
255 
256 void
swift_cache_enable(void)257 swift_cache_enable(void)
258 {
259 	int i, ls, ts;
260 	u_int pcr;
261 
262 	cache_alias_dist = uimax(
263 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
264 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
265 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
266 
267 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
268 
269 	/* Now reset cache tag memory if cache not yet enabled */
270 	ls = CACHEINFO.ic_linesize;
271 	ts = CACHEINFO.ic_totalsize;
272 	if ((pcr & SWIFT_PCR_ICE) == 0)
273 		for (i = 0; i < ts; i += ls)
274 			sta(i, ASI_ICACHETAG, 0);
275 
276 	ls = CACHEINFO.dc_linesize;
277 	ts = CACHEINFO.dc_totalsize;
278 	if ((pcr & SWIFT_PCR_DCE) == 0)
279 		for (i = 0; i < ts; i += ls)
280 			sta(i, ASI_DCACHETAG, 0);
281 
282 	pcr |= (SWIFT_PCR_ICE | SWIFT_PCR_DCE);
283 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
284 	CACHEINFO.c_enabled = 1;
285 }
286 
287 void
cypress_cache_enable(void)288 cypress_cache_enable(void)
289 {
290 	int i, ls, ts;
291 	u_int pcr;
292 	int alias_dist;
293 
294 	alias_dist = CACHEINFO.c_totalsize;
295 	if (alias_dist > cache_alias_dist) {
296 		cache_alias_dist = alias_dist;
297 		cache_alias_bits = (alias_dist - 1) & ~PGOFSET;
298 		dvma_cachealign = alias_dist;
299 	}
300 
301 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
302 	pcr &= ~CYPRESS_PCR_CM;
303 
304 	/* Now reset cache tag memory if cache not yet enabled */
305 	ls = CACHEINFO.c_linesize;
306 	ts = CACHEINFO.c_totalsize;
307 	if ((pcr & CYPRESS_PCR_CE) == 0)
308 		for (i = 0; i < ts; i += ls)
309 			sta(i, ASI_DCACHETAG, 0);
310 
311 	pcr |= CYPRESS_PCR_CE;
312 	/* If put in write-back mode, turn it on */
313 	if (CACHEINFO.c_vactype == VAC_WRITEBACK)
314 		pcr |= CYPRESS_PCR_CM;
315 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
316 	CACHEINFO.c_enabled = 1;
317 }
318 
319 void
turbosparc_cache_enable(void)320 turbosparc_cache_enable(void)
321 {
322 	int i, ls, ts;
323 	u_int pcr, pcf;
324 	/* External cache sizes in KB; see Turbo sparc manual */
325 	static const int ts_ecache_table[8] = {0,256,512,1024,512,1024,1024,0};
326 
327 	cache_alias_dist = uimax(
328 		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
329 		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
330 	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
331 
332 	pcr = lda(SRMMU_PCR, ASI_SRMMU);
333 
334 	/* Now reset cache tag memory if cache not yet enabled */
335 	ls = CACHEINFO.ic_linesize;
336 	ts = CACHEINFO.ic_totalsize;
337 	if ((pcr & TURBOSPARC_PCR_ICE) == 0)
338 		for (i = 0; i < ts; i += ls)
339 			sta(i, ASI_ICACHETAG, 0);
340 
341 	ls = CACHEINFO.dc_linesize;
342 	ts = CACHEINFO.dc_totalsize;
343 	if ((pcr & TURBOSPARC_PCR_DCE) == 0)
344 		for (i = 0; i < ts; i += ls)
345 			sta(i, ASI_DCACHETAG, 0);
346 
347 	pcr |= (TURBOSPARC_PCR_ICE | TURBOSPARC_PCR_DCE);
348 	sta(SRMMU_PCR, ASI_SRMMU, pcr);
349 
350 	pcf = lda(SRMMU_PCFG, ASI_SRMMU);
351 	if (pcf & TURBOSPARC_PCFG_SE) {
352 		/*
353 		 * Record external cache info. The Turbosparc's second-
354 		 * level cache is physically addressed/tagged and is
355 		 * not exposed by the PROM.
356 		 */
357 		CACHEINFO.ec_totalsize = 1024 *
358 			ts_ecache_table[(pcf & TURBOSPARC_PCFG_SCC)];
359 		CACHEINFO.ec_linesize = 32;
360 	}
361 	if (pcf & TURBOSPARC_PCFG_SNP)
362 		printf(": DVMA coherent ");
363 
364 	CACHEINFO.c_enabled = 1;
365 }
366 #endif /* SUN4M || SUN4D */
367 
368 
369 /*
370  * Note: the sun4 & sun4c the cache flush functions ignore the `ctx'
371  * parameter. This can be done since the pmap operations that need
372  * to flush cache lines will already have switched to the proper
373  * context to manipulate the MMU. Hence we can avoid the overhead
374  * if saving and restoring the context here.
375  */
376 
377 /*
378  * Flush the current context from the cache.
379  *
380  * This is done by writing to each cache line in the `flush context'
381  * address space (or, for hardware flush, once to each page in the
382  * hardware flush space, for all cache pages).
383  */
384 void
sun4_vcache_flush_context(int ctx)385 sun4_vcache_flush_context(int ctx)
386 {
387 	char *p;
388 	int i, ls;
389 
390 	vcache_flush_ctx.ev_count++;
391 	p = (char *)0;	/* addresses 0..cacheinfo.c_totalsize will do fine */
392 	if (CACHEINFO.c_hwflush) {
393 		ls = PAGE_SIZE;
394 		i = CACHEINFO.c_totalsize >> PGSHIFT;
395 		for (; --i >= 0; p += ls)
396 			sta(p, ASI_HWFLUSHCTX, 0);
397 	} else {
398 		ls = CACHEINFO.c_linesize;
399 		i = CACHEINFO.c_nlines;
400 		for (; --i >= 0; p += ls)
401 			sta(p, ASI_FLUSHCTX, 0);
402 	}
403 }
404 
405 /*
406  * Flush the given virtual region from the cache.
407  *
408  * This is also done by writing to each cache line, except that
409  * now the addresses must include the virtual region number, and
410  * we use the `flush region' space.
411  *
412  * This function is only called on sun4's with 3-level MMUs; there's
413  * no hw-flush space.
414  */
415 void
sun4_vcache_flush_region(int vreg,int ctx)416 sun4_vcache_flush_region(int vreg, int ctx)
417 {
418 	int i, ls;
419 	char *p;
420 
421 	vcache_flush_reg.ev_count++;
422 	p = (char *)VRTOVA(vreg);	/* reg..reg+sz rather than 0..sz */
423 	ls = CACHEINFO.c_linesize;
424 	i = CACHEINFO.c_nlines;
425 	for (; --i >= 0; p += ls)
426 		sta(p, ASI_FLUSHREG, 0);
427 }
428 
429 /*
430  * Flush the given virtual segment from the cache.
431  *
432  * This is also done by writing to each cache line, except that
433  * now the addresses must include the virtual segment number, and
434  * we use the `flush segment' space.
435  *
436  * Again, for hardware, we just write each page (in hw-flush space).
437  */
438 void
sun4_vcache_flush_segment(int vreg,int vseg,int ctx)439 sun4_vcache_flush_segment(int vreg, int vseg, int ctx)
440 {
441 	int i, ls;
442 	char *p;
443 
444 	vcache_flush_seg.ev_count++;
445 	p = (char *)VSTOVA(vreg, vseg);	/* seg..seg+sz rather than 0..sz */
446 	if (CACHEINFO.c_hwflush) {
447 		ls = PAGE_SIZE;
448 		i = CACHEINFO.c_totalsize >> PGSHIFT;
449 		for (; --i >= 0; p += ls)
450 			sta(p, ASI_HWFLUSHSEG, 0);
451 	} else {
452 		ls = CACHEINFO.c_linesize;
453 		i = CACHEINFO.c_nlines;
454 		for (; --i >= 0; p += ls)
455 			sta(p, ASI_FLUSHSEG, 0);
456 	}
457 }
458 
459 /*
460  * Flush the given virtual page from the cache.
461  * (va is the actual address, and must be aligned on a page boundary.)
462  * Again we write to each cache line.
463  */
464 void
sun4_vcache_flush_page(int va,int ctx)465 sun4_vcache_flush_page(int va, int ctx)
466 {
467 	int i, ls;
468 	char *p;
469 
470 #ifdef DEBUG
471 	if (va & PGOFSET)
472 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
473 #endif
474 
475 	vcache_flush_pg.ev_count++;
476 	p = (char *)va;
477 	ls = CACHEINFO.c_linesize;
478 	i = PAGE_SIZE >> CACHEINFO.c_l2linesize;
479 	for (; --i >= 0; p += ls)
480 		sta(p, ASI_FLUSHPG, 0);
481 }
482 
483 /*
484  * Flush the given virtual page from the cache.
485  * (va is the actual address, and must be aligned on a page boundary.)
486  * This version uses hardware-assisted flush operation and just needs
487  * one write into ASI_HWFLUSHPG space to flush all cache lines.
488  */
489 void
sun4_vcache_flush_page_hw(int va,int ctx)490 sun4_vcache_flush_page_hw(int va, int ctx)
491 {
492 	char *p;
493 
494 #ifdef DEBUG
495 	if (va & PGOFSET)
496 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
497 #endif
498 
499 	vcache_flush_pg.ev_count++;
500 	p = (char *)va;
501 	sta(p, ASI_HWFLUSHPG, 0);
502 }
503 
504 /*
505  * Flush a range of virtual addresses (in the current context).
506  * The first byte is at (base&~PGOFSET) and the last one is just
507  * before byte (base+len).
508  *
509  * We choose the best of (context,segment,page) here.
510  */
511 
512 #define CACHE_FLUSH_MAGIC	(CACHEINFO.c_totalsize / PAGE_SIZE)
513 
514 void
sun4_cache_flush(void * base,u_int len)515 sun4_cache_flush(void *base, u_int len)
516 {
517 	int i, ls, baseoff;
518 	char *p;
519 
520 	if (CACHEINFO.c_vactype == VAC_NONE)
521 		return;
522 
523 	/*
524 	 * Figure out how much must be flushed.
525 	 *
526 	 * If we need to do CACHE_FLUSH_MAGIC pages,  we can do a segment
527 	 * in the same number of loop iterations.  We can also do the whole
528 	 * region. If we need to do between 2 and NSEGRG, do the region.
529 	 * If we need to do two or more regions, just go ahead and do the
530 	 * whole context. This might not be ideal (e.g., fsck likes to do
531 	 * 65536-byte reads, which might not necessarily be aligned).
532 	 *
533 	 * We could try to be sneaky here and use the direct mapping
534 	 * to avoid flushing things `below' the start and `above' the
535 	 * ending address (rather than rounding to whole pages and
536 	 * segments), but I did not want to debug that now and it is
537 	 * not clear it would help much.
538 	 *
539 	 * (XXX the magic number 16 is now wrong, must review policy)
540 	 */
541 	baseoff = (int)base & PGOFSET;
542 	i = (baseoff + len + PGOFSET) >> PGSHIFT;
543 
544 	vcache_flush_range.ev_count++;
545 
546 	if (__predict_true(i < CACHE_FLUSH_MAGIC)) {
547 		/* cache_flush_page, for i pages */
548 		p = (char *)((int)base & ~baseoff);
549 		if (CACHEINFO.c_hwflush) {
550 			for (; --i >= 0; p += PAGE_SIZE)
551 				sta(p, ASI_HWFLUSHPG, 0);
552 		} else {
553 			ls = CACHEINFO.c_linesize;
554 			i <<= PGSHIFT - CACHEINFO.c_l2linesize;
555 			for (; --i >= 0; p += ls)
556 				sta(p, ASI_FLUSHPG, 0);
557 		}
558 		return;
559 	}
560 
561 	baseoff = (u_int)base & SGOFSET;
562 	i = (baseoff + len + SGOFSET) >> SGSHIFT;
563 	if (__predict_true(i == 1)) {
564 		sun4_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), 0);
565 		return;
566 	}
567 
568 	if (HASSUN4_MMU3L) {
569 		baseoff = (u_int)base & RGOFSET;
570 		i = (baseoff + len + RGOFSET) >> RGSHIFT;
571 		if (i == 1)
572 			sun4_vcache_flush_region(VA_VREG(base), 0);
573 		else
574 			sun4_vcache_flush_context(0);
575 	} else
576 		sun4_vcache_flush_context(0);
577 }
578 
579 
580 #if defined(SUN4M) || defined(SUN4D)
581 #define trapoff()	do { setpsr(getpsr() & ~PSR_ET); } while(0)
582 #define trapon()	do { setpsr(getpsr() | PSR_ET); } while(0)
583 /*
584  * Flush the current context from the cache.
585  *
586  * This is done by writing to each cache line in the `flush context'
587  * address space.
588  */
589 void
srmmu_vcache_flush_context(int ctx)590 srmmu_vcache_flush_context(int ctx)
591 {
592 	int i, ls, octx;
593 	char *p;
594 
595 	vcache_flush_ctx.ev_count++;
596 	p = (char *)0;	/* addresses 0..cacheinfo.c_totalsize will do fine */
597 	ls = CACHEINFO.c_linesize;
598 	i = CACHEINFO.c_nlines;
599 	octx = getcontext4m();
600 	trapoff();
601 	setcontext4m(ctx);
602 	for (; --i >= 0; p += ls)
603 		sta(p, ASI_IDCACHELFC, 0);
604 	setcontext4m(octx);
605 	trapon();
606 }
607 
608 /*
609  * Flush the given virtual region from the cache.
610  *
611  * This is also done by writing to each cache line, except that
612  * now the addresses must include the virtual region number, and
613  * we use the `flush region' space.
614  */
615 void
srmmu_vcache_flush_region(int vreg,int ctx)616 srmmu_vcache_flush_region(int vreg, int ctx)
617 {
618 	int i, ls, octx;
619 	char *p;
620 
621 	vcache_flush_reg.ev_count++;
622 	p = (char *)VRTOVA(vreg);	/* reg..reg+sz rather than 0..sz */
623 	ls = CACHEINFO.c_linesize;
624 	i = CACHEINFO.c_nlines;
625 	octx = getcontext4m();
626 	trapoff();
627 	setcontext4m(ctx);
628 	for (; --i >= 0; p += ls)
629 		sta(p, ASI_IDCACHELFR, 0);
630 	setcontext4m(octx);
631 	trapon();
632 }
633 
634 /*
635  * Flush the given virtual segment from the cache.
636  *
637  * This is also done by writing to each cache line, except that
638  * now the addresses must include the virtual segment number, and
639  * we use the `flush segment' space.
640  *
641  * Again, for hardware, we just write each page (in hw-flush space).
642  */
643 void
srmmu_vcache_flush_segment(int vreg,int vseg,int ctx)644 srmmu_vcache_flush_segment(int vreg, int vseg, int ctx)
645 {
646 	int i, ls, octx;
647 	char *p;
648 
649 	vcache_flush_seg.ev_count++;
650 	p = (char *)VSTOVA(vreg, vseg);	/* seg..seg+sz rather than 0..sz */
651 	ls = CACHEINFO.c_linesize;
652 	i = CACHEINFO.c_nlines;
653 	octx = getcontext4m();
654 	trapoff();
655 	setcontext4m(ctx);
656 	for (; --i >= 0; p += ls)
657 		sta(p, ASI_IDCACHELFS, 0);
658 	setcontext4m(octx);
659 	trapon();
660 }
661 
662 /*
663  * Flush the given virtual page from the cache.
664  * (va is the actual address, and must be aligned on a page boundary.)
665  * Again we write to each cache line.
666  */
667 void
srmmu_vcache_flush_page(int va,int ctx)668 srmmu_vcache_flush_page(int va, int ctx)
669 {
670 	int i, ls, octx;
671 	char *p;
672 
673 #ifdef DEBUG
674 	if (va & PGOFSET)
675 		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
676 #endif
677 
678 	vcache_flush_pg.ev_count++;
679 	p = (char *)va;
680 
681 	/*
682 	 * XXX - if called early during bootstrap, we don't have the cache
683 	 *	 info yet. Make up a cache line size (double-word aligned)
684 	 */
685 	if ((ls = CACHEINFO.c_linesize) == 0)
686 		ls = 8;
687 	i = PAGE_SIZE;
688 	octx = getcontext4m();
689 	trapoff();
690 	setcontext4m(ctx);
691 	for (; i > 0; p += ls, i -= ls)
692 		sta(p, ASI_IDCACHELFP, 0);
693 #if defined(MULTIPROCESSOR)
694 	/*
695 	 * The page flush operation will have caused a MMU table walk
696 	 * on Hypersparc because the is physically tagged. Since the pmap
697 	 * functions will not always cross flush it in the MP case (because
698 	 * may not be active on this CPU) we flush the TLB entry now.
699 	 */
700 	/*if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) -- more work than it's worth */
701 		sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0);
702 
703 #endif
704 	setcontext4m(octx);
705 	trapon();
706 }
707 
708 /*
709  * Flush entire cache.
710  */
711 void
srmmu_cache_flush_all(void)712 srmmu_cache_flush_all(void)
713 {
714 
715 	srmmu_vcache_flush_context(0);
716 }
717 
718 void
srmmu_vcache_flush_range(int va,int len,int ctx)719 srmmu_vcache_flush_range(int va, int len, int ctx)
720 {
721 	int i, ls, offset;
722 	char *p;
723 	int octx;
724 
725 	/*
726 	 * XXX - if called early during bootstrap, we don't have the cache
727 	 *	 info yet. Make up a cache line size (double-word aligned)
728 	 */
729 	if ((ls = CACHEINFO.c_linesize) == 0)
730 		ls = 8;
731 
732 	vcache_flush_range.ev_count++;
733 
734 	/* Compute # of cache lines covered by this range */
735 	offset = va & (ls - 1);
736 	i = len + offset;
737 	p = (char *)(va & ~(ls - 1));
738 
739 	octx = getcontext4m();
740 	trapoff();
741 	setcontext4m(ctx);
742 	for (; i > 0; p += ls, i -= ls)
743 		sta(p, ASI_IDCACHELFP, 0);
744 
745 #if defined(MULTIPROCESSOR)
746 	if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) {
747 		/*
748 		 * See hypersparc comment in srmmu_vcache_flush_page().
749 		 */
750 		offset = va & PGOFSET;
751 		i = (offset + len + PGOFSET) >> PGSHIFT;
752 
753 		va = va & ~PGOFSET;
754 		for (; --i >= 0; va += PAGE_SIZE)
755 			sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0);
756 	}
757 #endif
758 	setcontext4m(octx);
759 	trapon();
760 	return;
761 }
762 
763 /*
764  * Flush a range of virtual addresses (in the current context).
765  *
766  * We choose the best of (context,segment,page) here.
767  */
768 
769 void
srmmu_cache_flush(void * base,u_int len)770 srmmu_cache_flush(void *base, u_int len)
771 {
772 	int ctx = getcontext4m();
773 	int i, baseoff;
774 
775 
776 	/*
777 	 * Figure out the most efficient way to flush.
778 	 *
779 	 * If we need to do CACHE_FLUSH_MAGIC pages,  we can do a segment
780 	 * in the same number of loop iterations.  We can also do the whole
781 	 * region. If we need to do between 2 and NSEGRG, do the region.
782 	 * If we need to do two or more regions, just go ahead and do the
783 	 * whole context. This might not be ideal (e.g., fsck likes to do
784 	 * 65536-byte reads, which might not necessarily be aligned).
785 	 *
786 	 * We could try to be sneaky here and use the direct mapping
787 	 * to avoid flushing things `below' the start and `above' the
788 	 * ending address (rather than rounding to whole pages and
789 	 * segments), but I did not want to debug that now and it is
790 	 * not clear it would help much.
791 	 *
792 	 */
793 
794 	if (__predict_true(len < CACHEINFO.c_totalsize)) {
795 #if defined(MULTIPROCESSOR)
796 		FXCALL3(cpuinfo.sp_vcache_flush_range,
797 			cpuinfo.ft_vcache_flush_range,
798 			(int)base, len, ctx, CPUSET_ALL);
799 #else
800 		cpuinfo.sp_vcache_flush_range((int)base, len, ctx);
801 #endif
802 		return;
803 	}
804 
805 	baseoff = (u_int)base & SGOFSET;
806 	i = (baseoff + len + SGOFSET) >> SGSHIFT;
807 	if (__predict_true(i == 1)) {
808 #if defined(MULTIPROCESSOR)
809 		FXCALL3(cpuinfo.sp_vcache_flush_segment,
810 			cpuinfo.ft_vcache_flush_segment,
811 			VA_VREG(base), VA_VSEG(base), ctx, CPUSET_ALL);
812 #else
813 		srmmu_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), ctx);
814 #endif
815 		return;
816 	}
817 
818 	baseoff = (u_int)base & RGOFSET;
819 	i = (baseoff + len + RGOFSET) >> RGSHIFT;
820 	while (i--) {
821 #if defined(MULTIPROCESSOR)
822 		FXCALL2(cpuinfo.sp_vcache_flush_region,
823 		       cpuinfo.ft_vcache_flush_region,
824 		       VA_VREG(base), ctx, CPUSET_ALL);
825 #else
826 		srmmu_vcache_flush_region(VA_VREG(base), ctx);
827 #endif
828 		base = ((char *)base + NBPRG);
829 	}
830 }
831 
832 int ms1_cacheflush_magic = 0;
833 #define MS1_CACHEFLUSH_MAGIC	ms1_cacheflush_magic
834 
835 void
ms1_cache_flush(void * base,u_int len)836 ms1_cache_flush(void *base, u_int len)
837 {
838 
839 	/*
840 	 * Although physically tagged, we still need to flush the
841 	 * data cache after (if we have a write-through cache) or before
842 	 * (in case of write-back caches) DMA operations.
843 	 */
844 
845 #if MS1_CACHEFLUSH_MAGIC
846 	if (len <= MS1_CACHEFLUSH_MAGIC) {
847 		/*
848 		 * If the range to be flushed is sufficiently small
849 		 * invalidate the covered cache lines by hand.
850 		 *
851 		 * The MicroSPARC I has a direct-mapped virtually addressed
852 		 * physically tagged data cache which is organised as
853 		 * 128 lines of 16 bytes. Virtual address bits [4-10]
854 		 * select the cache line. The cache tags are accessed
855 		 * through the standard DCACHE control space using the
856 		 * same address bits as those used to select the cache
857 		 * line in the virtual address.
858 		 *
859 		 * Note: we don't bother to compare the actual tags
860 		 * since that would require looking up physical addresses.
861 		 *
862 		 * The format of the tags we read from ASI_DCACHE control
863 		 * space is:
864 		 *
865 		 * 31     27 26            11 10         1 0
866 		 * +--------+----------------+------------+-+
867 		 * |  xxx   |    PA[26-11]   |    xxx     |V|
868 		 * +--------+----------------+------------+-+
869 		 *
870 		 * PA: bits 11-26 of the physical address
871 		 * V:  line valid bit
872 		 */
873 		int tagaddr = ((u_int)base & 0x7f0);
874 
875 		len = roundup(len, 16);
876 		while (len != 0) {
877 			int tag = lda(tagaddr, ASI_DCACHETAG);
878 			if ((tag & 1) == 1) {
879 				/* Mark this cache line invalid */
880 				sta(tagaddr, ASI_DCACHETAG, 0);
881 			}
882 			len -= 16;
883 			tagaddr = (tagaddr + 16) & 0x7f0;
884 		}
885 	} else
886 #endif
887 		/* Flush entire data cache */
888 		sta(0, ASI_DCACHECLR, 0);
889 }
890 
891 
892 /*
893  * Flush entire cache.
894  */
895 void
ms1_cache_flush_all(void)896 ms1_cache_flush_all(void)
897 {
898 
899 	/* Flash-clear both caches */
900 	sta(0, ASI_ICACHECLR, 0);
901 	sta(0, ASI_DCACHECLR, 0);
902 }
903 
904 void
hypersparc_cache_flush_all(void)905 hypersparc_cache_flush_all(void)
906 {
907 
908 	srmmu_vcache_flush_context(getcontext4m());
909 	/* Flush instruction cache */
910 	hypersparc_pure_vcache_flush();
911 }
912 
913 void
cypress_cache_flush_all(void)914 cypress_cache_flush_all(void)
915 {
916 	char *p;
917 	int i, ls;
918 
919 	/* Fill the cache with known read-only content */
920 	p = (char *)kernel_text;
921 	ls = CACHEINFO.c_linesize;
922 	i = CACHEINFO.c_nlines;
923 	for (; --i >= 0; p += ls)
924 		(*(volatile char *)p);
925 }
926 
927 
928 void
viking_cache_flush(void * base,u_int len)929 viking_cache_flush(void *base, u_int len)
930 {
931 }
932 
933 void
viking_pcache_flush_page(paddr_t pa,int invalidate_only)934 viking_pcache_flush_page(paddr_t pa, int invalidate_only)
935 {
936 	int set, i;
937 
938 	/*
939 	 * The viking's on-chip data cache is 4-way set associative,
940 	 * consisting of 128 sets, each holding 4 lines of 32 bytes.
941 	 * Note that one 4096 byte page exactly covers all 128 sets
942 	 * in the cache.
943 	 */
944 	if (invalidate_only) {
945 		u_int pa_tag = (pa >> 12);
946 		u_int tagaddr;
947 		uint64_t tag;
948 
949 		/*
950 		 * Loop over all sets and invalidate all entries tagged
951 		 * with the given physical address by resetting the cache
952 		 * tag in ASI_DCACHETAG control space.
953 		 *
954 		 * The address format for accessing a tag is:
955 		 *
956 		 * 31   30      27   26                  11      5 4  3 2    0
957 		 * +------+-----+------+-------//--------+--------+----+-----+
958 		 * | type | xxx | line |       xxx       |  set   | xx | 0   |
959 		 * +------+-----+------+-------//--------+--------+----+-----+
960 		 *
961 		 * set:  the cache set tag to be read (0-127)
962 		 * line: the line within the set (0-3)
963 		 * type: 1: read set tag; 2: read physical tag
964 		 *
965 		 * The (type 2) tag read from this address is a 64-bit word
966 		 * formatted as follows:
967 		 *
968 		 *          5         4         4
969 		 * 63       6         8         0            23               0
970 		 * +-------+-+-------+-+-------+-+-----------+----------------+
971 		 * |  xxx  |V|  xxx  |D|  xxx  |S|    xxx    |    PA[35-12]   |
972 		 * +-------+-+-------+-+-------+-+-----------+----------------+
973 		 *
974 		 * PA: bits 12-35 of the physical address
975 		 * S:  line shared bit (not present on SuperSPARC-II)
976 		 * D:  line dirty bit (not present on SuperSPARC-II)
977 		 * V:  line valid bit
978 		 */
979 
980 #define VIKING_DCACHETAG_S	0x0000010000000000ULL	/* line valid bit */
981 #define VIKING_DCACHETAG_D	0x0001000000000000ULL	/* line dirty bit */
982 #define VIKING_DCACHETAG_V	0x0100000000000000ULL	/* line shared bit */
983 #define VIKING_DCACHETAG_PAMASK	0x0000000000ffffffULL	/* PA tag field */
984 
985 		for (set = 0; set < 128; set++) {
986 			/* Set set number and access type */
987 			tagaddr = (set << 5) | (2 << 30);
988 
989 			/* Examine the tag for each line in the set */
990 			for (i = 0 ; i < 4; i++) {
991 				tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG);
992 				/*
993 				 * If this is a valid tag and the PA field
994 				 * matches clear the tag.
995 				 */
996 				if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag &&
997 				    (tag & VIKING_DCACHETAG_V) != 0)
998 					stda(tagaddr | (i << 26),
999 					     ASI_DCACHETAG, 0);
1000 			}
1001 		}
1002 
1003 	} else {
1004 		/*
1005 		 * Force the cache to validate its backing memory
1006 		 * by displacing all cache lines with known read-only
1007 		 * content from the start of kernel text.
1008 		 *
1009 		 * Note that this thrashes the entire cache. However,
1010 		 * we currently only need to call upon this code
1011 		 * once at boot time.
1012 		 */
1013 		for (set = 0; set < 128; set++) {
1014 			int *v = (int *)(kernel_text + (set << 5));
1015 
1016 			/*
1017 			 * We need to read (2*associativity-1) different
1018 			 * locations to be sure to displace the entire set.
1019 			 */
1020 			i = 2 * 4 - 1;
1021 			while (i--) {
1022 				(*(volatile int *)v);
1023 				v += 4096;
1024 			}
1025 		}
1026 	}
1027 }
1028 #endif /* SUN4M || SUN4D */
1029 
1030 
1031 #if defined(MULTIPROCESSOR)
1032 /*
1033  * Cache flushing on multi-processor systems involves sending
1034  * inter-processor messages to flush the cache on each module.
1035  *
1036  * The current context of the originating processor is passed in the
1037  * message. This assumes the allocation of CPU contextses is a global
1038  * operation (remember that the actual context tables for the CPUs
1039  * are distinct).
1040  */
1041 
1042 void
smp_vcache_flush_page(int va,int ctx)1043 smp_vcache_flush_page(int va, int ctx)
1044 {
1045 
1046 	FXCALL2(cpuinfo.sp_vcache_flush_page, cpuinfo.ft_vcache_flush_page,
1047 		va, ctx, CPUSET_ALL);
1048 }
1049 
1050 void
smp_vcache_flush_segment(int vr,int vs,int ctx)1051 smp_vcache_flush_segment(int vr, int vs, int ctx)
1052 {
1053 
1054 	FXCALL3(cpuinfo.sp_vcache_flush_segment, cpuinfo.ft_vcache_flush_segment,
1055 		vr, vs, ctx, CPUSET_ALL);
1056 }
1057 
1058 void
smp_vcache_flush_region(int vr,int ctx)1059 smp_vcache_flush_region(int vr, int ctx)
1060 {
1061 
1062 	FXCALL2(cpuinfo.sp_vcache_flush_region, cpuinfo.ft_vcache_flush_region,
1063 		vr, ctx, CPUSET_ALL);
1064 }
1065 
1066 void
smp_vcache_flush_context(int ctx)1067 smp_vcache_flush_context(int ctx)
1068 {
1069 
1070 	FXCALL1(cpuinfo.sp_vcache_flush_context, cpuinfo.ft_vcache_flush_context,
1071 		ctx, CPUSET_ALL);
1072 }
1073 #endif /* MULTIPROCESSOR */
1074