xref: /netbsd-src/sys/arch/mips/mips/cache_r5k.c (revision 23c8222edbfb0f0932d88a8351d3a0cf817dfb9e)
1 /*	$NetBSD: cache_r5k.c,v 1.9 2003/07/15 02:43:37 lukem Exp $	*/
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project by
20  *	Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: cache_r5k.c,v 1.9 2003/07/15 02:43:37 lukem Exp $");
40 
41 #include <sys/param.h>
42 
43 #include <mips/cache.h>
44 #include <mips/cache_r4k.h>
45 #include <mips/cache_r5k.h>
46 #include <mips/locore.h>
47 
48 /*
49  * Cache operations for R5000-style caches:
50  *
51  *	- 2-way set-associative
52  *	- Write-back
53  *	- Virtually indexed, physically tagged
54  *
55  * Since the R4600 is so similar (2-way set-associative, 32b/l),
56  * we handle that here, too.  Note for R4600, we have to work
57  * around some chip bugs.  From the v1.7 errata:
58  *
59  *  18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
60  *      Hit_Invalidate_D and Create_Dirty_Excl_D should only be
61  *      executed if there is no other dcache activity. If the dcache is
62  *      accessed for another instruction immeidately preceding when these
63  *      cache instructions are executing, it is possible that the dcache
64  *      tag match outputs used by these cache instructions will be
65  *      incorrect. These cache instructions should be preceded by at least
66  *      four instructions that are not any kind of load or store
67  *      instruction.
68  *
69  * ...and from the v2.0 errata:
70  *
71  *   The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D,
72  *   Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate
73  *   correctly if the internal data cache refill buffer is empty.  These
74  *   CACHE instructions should be separated from any potential data cache
75  *   miss by a load instruction to an uncached address to empty the response
76  *   buffer.
77  *
78  * XXX Does not handle split secondary caches.
79  */
80 
81 #define	round_line16(x)		(((x) + 15) & ~15)
82 #define	trunc_line16(x)		((x) & ~15)
83 #define	round_line(x)		(((x) + 31) & ~31)
84 #define	trunc_line(x)		((x) & ~31)
85 
86 __asm(".set mips3");
87 
88 void
89 r5k_icache_sync_all_32(void)
90 {
91 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
92 	vaddr_t eva = va + mips_picache_size;
93 
94 	/*
95 	 * Since we're hitting the whole thing, we don't have to
96 	 * worry about the 2 different "ways".
97 	 */
98 
99 	mips_dcache_wbinv_all();
100 
101 	__asm __volatile("sync");
102 
103 	while (va < eva) {
104 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
105 		va += (32 * 32);
106 	}
107 }
108 
109 void
110 r5k_icache_sync_range_32(vaddr_t va, vsize_t size)
111 {
112 	vaddr_t eva = round_line(va + size);
113 
114 	va = trunc_line(va);
115 
116 	mips_dcache_wb_range(va, (eva - va));
117 
118 	__asm __volatile("sync");
119 
120 	while ((eva - va) >= (32 * 32)) {
121 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
122 		va += (32 * 32);
123 	}
124 
125 	while (va < eva) {
126 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
127 		va += 32;
128 	}
129 }
130 
131 void
132 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size)
133 {
134 	vaddr_t w2va, eva, orig_va;
135 
136 	orig_va = va;
137 
138 	eva = round_line(va + size);
139 	va = trunc_line(va);
140 
141 	mips_dcache_wbinv_range_index(va, (eva - va));
142 
143 	__asm __volatile("sync");
144 
145 	/*
146 	 * Since we're doing Index ops, we expect to not be able
147 	 * to access the address we've been given.  So, get the
148 	 * bits that determine the cache index, and make a KSEG0
149 	 * address out of them.
150 	 */
151 	va = MIPS_PHYS_TO_KSEG0(orig_va & mips_picache_way_mask);
152 
153 	eva = round_line(va + size);
154 	va = trunc_line(va);
155 	w2va = va + mips_picache_way_size;
156 
157 	while ((eva - va) >= (16 * 32)) {
158 		cache_r4k_op_16lines_32_2way(va, w2va,
159 		    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
160 		va   += (16 * 32);
161 		w2va += (16 * 32);
162 	}
163 
164 	while (va < eva) {
165 		cache_op_r4k_line(  va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
166 		cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
167 		va   += 32;
168 		w2va += 32;
169 	}
170 }
171 
172 void
173 r5k_pdcache_wbinv_all_16(void)
174 {
175 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
176 	vaddr_t eva = va + mips_pdcache_size;
177 
178 	/*
179 	 * Since we're hitting the whole thing, we don't have to
180 	 * worry about the 2 different "ways".
181 	 */
182 
183 	while (va < eva) {
184 		cache_r4k_op_32lines_16(va,
185 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
186 		va += (32 * 16);
187 	}
188 }
189 
190 void
191 r5k_pdcache_wbinv_all_32(void)
192 {
193 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
194 	vaddr_t eva = va + mips_pdcache_size;
195 
196 	/*
197 	 * Since we're hitting the whole thing, we don't have to
198 	 * worry about the 2 different "ways".
199 	 */
200 
201 	while (va < eva) {
202 		cache_r4k_op_32lines_32(va,
203 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
204 		va += (32 * 32);
205 	}
206 }
207 
208 void
209 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
210 {
211 	vaddr_t eva = round_line(va + size);
212 	uint32_t ostatus;
213 
214 	/*
215 	 * This is pathetically slow, but the chip bug is pretty
216 	 * nasty, and we hope that not too many v1.x R4600s are
217 	 * around.
218 	 */
219 
220 	va = trunc_line(va);
221 
222 	/*
223 	 * To make this a little less painful, just hit the entire
224 	 * cache if we have a range >= the cache size.
225 	 */
226 	if ((eva - va) >= mips_pdcache_size) {
227 		r5k_pdcache_wbinv_all_32();
228 		return;
229 	}
230 
231 	ostatus = mips_cp0_status_read();
232 
233 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
234 
235 	while (va < eva) {
236 		__asm __volatile("nop; nop; nop; nop;");
237 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
238 		va += 32;
239 	}
240 
241 	mips_cp0_status_write(ostatus);
242 }
243 
244 void
245 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
246 {
247 	vaddr_t eva = round_line(va + size);
248 	uint32_t ostatus;
249 
250 	va = trunc_line(va);
251 
252 	ostatus = mips_cp0_status_read();
253 
254 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
255 
256 	while ((eva - va) >= (32 * 32)) {
257 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
258 		cache_r4k_op_32lines_32(va,
259 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
260 		va += (32 * 32);
261 	}
262 
263 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
264 	while (va < eva) {
265 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
266 		va += 32;
267 	}
268 
269 	mips_cp0_status_write(ostatus);
270 }
271 
272 void
273 vr4131v1_pdcache_wbinv_range_16(vaddr_t va, vsize_t size)
274 {
275 	vaddr_t eva = round_line16(va + size);
276 
277 	va = trunc_line16(va);
278 
279 	while ((eva - va) >= (32 * 16)) {
280 		cache_r4k_op_32lines_16(va,
281 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
282 		cache_r4k_op_32lines_16(va,
283 		    CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
284 		va += (32 * 16);
285 	}
286 
287 	while (va < eva) {
288 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
289 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
290 		va += 16;
291 	}
292 }
293 
294 void
295 r5k_pdcache_wbinv_range_16(vaddr_t va, vsize_t size)
296 {
297 	vaddr_t eva = round_line16(va + size);
298 
299 	va = trunc_line16(va);
300 
301 	while ((eva - va) >= (32 * 16)) {
302 		cache_r4k_op_32lines_16(va,
303 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
304 		va += (32 * 16);
305 	}
306 
307 	while (va < eva) {
308 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
309 		va += 16;
310 	}
311 }
312 
313 void
314 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
315 {
316 	vaddr_t eva = round_line(va + size);
317 
318 	va = trunc_line(va);
319 
320 	while ((eva - va) >= (32 * 32)) {
321 		cache_r4k_op_32lines_32(va,
322 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
323 		va += (32 * 32);
324 	}
325 
326 	while (va < eva) {
327 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
328 		va += 32;
329 	}
330 }
331 
332 void
333 r5k_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size)
334 {
335 	vaddr_t w2va, eva;
336 
337 	/*
338 	 * Since we're doing Index ops, we expect to not be able
339 	 * to access the address we've been given.  So, get the
340 	 * bits that determine the cache index, and make a KSEG0
341 	 * address out of them.
342 	 */
343 	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
344 
345 	eva = round_line16(va + size);
346 	va = trunc_line16(va);
347 	w2va = va + mips_pdcache_way_size;
348 
349 	while ((eva - va) >= (16 * 16)) {
350 		cache_r4k_op_16lines_16_2way(va, w2va,
351 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
352 		va   += (16 * 16);
353 		w2va += (16 * 16);
354 	}
355 
356 	while (va < eva) {
357 		cache_op_r4k_line(  va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
358 		cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
359 		va   += 16;
360 		w2va += 16;
361 	}
362 }
363 
364 void
365 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size)
366 {
367 	vaddr_t w2va, eva;
368 
369 	/*
370 	 * Since we're doing Index ops, we expect to not be able
371 	 * to access the address we've been given.  So, get the
372 	 * bits that determine the cache index, and make a KSEG0
373 	 * address out of them.
374 	 */
375 	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
376 
377 	eva = round_line(va + size);
378 	va = trunc_line(va);
379 	w2va = va + mips_pdcache_way_size;
380 
381 	while ((eva - va) >= (16 * 32)) {
382 		cache_r4k_op_16lines_32_2way(va, w2va,
383 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
384 		va   += (16 * 32);
385 		w2va += (16 * 32);
386 	}
387 
388 	while (va < eva) {
389 		cache_op_r4k_line(  va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
390 		cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
391 		va   += 32;
392 		w2va += 32;
393 	}
394 }
395 
396 void
397 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size)
398 {
399 	vaddr_t eva = round_line(va + size);
400 	uint32_t ostatus;
401 
402 	/*
403 	 * This is pathetically slow, but the chip bug is pretty
404 	 * nasty, and we hope that not too many v1.x R4600s are
405 	 * around.
406 	 */
407 
408 	va = trunc_line(va);
409 
410 	ostatus = mips_cp0_status_read();
411 
412 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
413 
414 	while (va < eva) {
415 		__asm __volatile("nop; nop; nop; nop;");
416 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
417 		va += 32;
418 	}
419 
420 	mips_cp0_status_write(ostatus);
421 }
422 
423 void
424 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size)
425 {
426 	vaddr_t eva = round_line(va + size);
427 	uint32_t ostatus;
428 
429 	va = trunc_line(va);
430 
431 	ostatus = mips_cp0_status_read();
432 
433 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
434 
435 	/*
436 	 * Between blasts of big cache chunks, give interrupts
437 	 * a chance to get though.
438 	 */
439 	while ((eva - va) >= (32 * 32)) {
440 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
441 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
442 		va += (32 * 32);
443 	}
444 
445 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
446 	while (va < eva) {
447 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
448 		va += 32;
449 	}
450 
451 	mips_cp0_status_write(ostatus);
452 }
453 
454 void
455 r5k_pdcache_inv_range_16(vaddr_t va, vsize_t size)
456 {
457 	vaddr_t eva = round_line16(va + size);
458 
459 	va = trunc_line16(va);
460 
461 	while ((eva - va) >= (32 * 16)) {
462 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
463 		va += (32 * 16);
464 	}
465 
466 	while (va < eva) {
467 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
468 		va += 16;
469 	}
470 }
471 
472 void
473 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size)
474 {
475 	vaddr_t eva = round_line(va + size);
476 
477 	va = trunc_line(va);
478 
479 	while ((eva - va) >= (32 * 32)) {
480 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
481 		va += (32 * 32);
482 	}
483 
484 	while (va < eva) {
485 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
486 		va += 32;
487 	}
488 }
489 
490 void
491 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size)
492 {
493 	vaddr_t eva = round_line(va + size);
494 	uint32_t ostatus;
495 
496 	/*
497 	 * This is pathetically slow, but the chip bug is pretty
498 	 * nasty, and we hope that not too many v1.x R4600s are
499 	 * around.
500 	 */
501 
502 	va = trunc_line(va);
503 
504 	ostatus = mips_cp0_status_read();
505 
506 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
507 
508 	while (va < eva) {
509 		__asm __volatile("nop; nop; nop; nop;");
510 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
511 		va += 32;
512 	}
513 
514 	mips_cp0_status_write(ostatus);
515 }
516 
517 void
518 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size)
519 {
520 	vaddr_t eva = round_line(va + size);
521 	uint32_t ostatus;
522 
523 	va = trunc_line(va);
524 
525 	ostatus = mips_cp0_status_read();
526 
527 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
528 
529 	/*
530 	 * Between blasts of big cache chunks, give interrupts
531 	 * a chance to get though.
532 	 */
533 	while ((eva - va) >= (32 * 32)) {
534 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
535 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
536 		va += (32 * 32);
537 	}
538 
539 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
540 	while (va < eva) {
541 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
542 		va += 32;
543 	}
544 
545 	mips_cp0_status_write(ostatus);
546 }
547 
548 void
549 r5k_pdcache_wb_range_16(vaddr_t va, vsize_t size)
550 {
551 	vaddr_t eva = round_line16(va + size);
552 
553 	va = trunc_line16(va);
554 
555 	while ((eva - va) >= (32 * 16)) {
556 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
557 		va += (32 * 16);
558 	}
559 
560 	while (va < eva) {
561 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
562 		va += 16;
563 	}
564 }
565 
566 void
567 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size)
568 {
569 	vaddr_t eva = round_line(va + size);
570 
571 	va = trunc_line(va);
572 
573 	while ((eva - va) >= (32 * 32)) {
574 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
575 		va += (32 * 32);
576 	}
577 
578 	while (va < eva) {
579 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
580 		va += 32;
581 	}
582 }
583 
584 #undef round_line16
585 #undef trunc_line16
586 #undef round_line
587 #undef trunc_line
588 
589 /*
590  * Cache operations for R5000-style secondary caches:
591  *
592  *	- Direct-mapped
593  *	- Write-through
594  *	- Physically indexed, physically tagged
595  *
596  */
597 
598 
599 __asm(".set mips3");
600 
601 #define R5K_Page_Invalidate_S   0x17
602 
603 void
604 r5k_sdcache_wbinv_all(void)
605 {
606 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
607 	vaddr_t eva = va + mips_sdcache_size;
608 
609 	while (va < eva) {
610 		cache_op_r4k_line(va, R5K_Page_Invalidate_S);
611 		va += (128 * 32);
612 	}
613 }
614 
615 /* XXX: want wbinv_range_index here instead? */
616 void
617 r5k_sdcache_wbinv_rangeall(vaddr_t va, vsize_t size)
618 {
619 	r5k_sdcache_wbinv_all();
620 }
621 
622 #define	round_page(x)		(((x) + (128 * 32 - 1)) & ~(128 * 32 - 1))
623 #define	trunc_page(x)		((x) & ~(128 * 32 - 1))
624 
625 void
626 r5k_sdcache_wbinv_range(vaddr_t va, vsize_t size)
627 {
628 	vaddr_t eva = round_page(va + size);
629 	va = trunc_page(va);
630 
631 	while (va < eva) {
632 		cache_op_r4k_line(va, R5K_Page_Invalidate_S);
633 		va += (128 * 32);
634 	}
635 }
636 
637 void
638 r5k_sdcache_wb_range(vaddr_t va, vsize_t size)
639 {
640 	/* Write-through cache, no need to WB */
641 }
642