xref: /netbsd-src/sys/arch/mips/mips/cache_r5k.c (revision 001c68bd94f75ce9270b69227c4199fbf34ee396)
1 /*	$NetBSD: cache_r5k.c,v 1.8 2003/03/08 04:43:25 rafal Exp $	*/
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project by
20  *	Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/param.h>
39 
40 #include <mips/cache.h>
41 #include <mips/cache_r4k.h>
42 #include <mips/cache_r5k.h>
43 #include <mips/locore.h>
44 
45 /*
46  * Cache operations for R5000-style caches:
47  *
48  *	- 2-way set-associative
49  *	- Write-back
50  *	- Virtually indexed, physically tagged
51  *
52  * Since the R4600 is so similar (2-way set-associative, 32b/l),
53  * we handle that here, too.  Note for R4600, we have to work
54  * around some chip bugs.  From the v1.7 errata:
55  *
56  *  18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
57  *      Hit_Invalidate_D and Create_Dirty_Excl_D should only be
58  *      executed if there is no other dcache activity. If the dcache is
59  *      accessed for another instruction immeidately preceding when these
60  *      cache instructions are executing, it is possible that the dcache
61  *      tag match outputs used by these cache instructions will be
62  *      incorrect. These cache instructions should be preceded by at least
63  *      four instructions that are not any kind of load or store
64  *      instruction.
65  *
66  * ...and from the v2.0 errata:
67  *
68  *   The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D,
69  *   Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate
70  *   correctly if the internal data cache refill buffer is empty.  These
71  *   CACHE instructions should be separated from any potential data cache
72  *   miss by a load instruction to an uncached address to empty the response
73  *   buffer.
74  *
75  * XXX Does not handle split secondary caches.
76  */
77 
78 #define	round_line16(x)		(((x) + 15) & ~15)
79 #define	trunc_line16(x)		((x) & ~15)
80 #define	round_line(x)		(((x) + 31) & ~31)
81 #define	trunc_line(x)		((x) & ~31)
82 
83 __asm(".set mips3");
84 
85 void
86 r5k_icache_sync_all_32(void)
87 {
88 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
89 	vaddr_t eva = va + mips_picache_size;
90 
91 	/*
92 	 * Since we're hitting the whole thing, we don't have to
93 	 * worry about the 2 different "ways".
94 	 */
95 
96 	mips_dcache_wbinv_all();
97 
98 	__asm __volatile("sync");
99 
100 	while (va < eva) {
101 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
102 		va += (32 * 32);
103 	}
104 }
105 
106 void
107 r5k_icache_sync_range_32(vaddr_t va, vsize_t size)
108 {
109 	vaddr_t eva = round_line(va + size);
110 
111 	va = trunc_line(va);
112 
113 	mips_dcache_wb_range(va, (eva - va));
114 
115 	__asm __volatile("sync");
116 
117 	while ((eva - va) >= (32 * 32)) {
118 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
119 		va += (32 * 32);
120 	}
121 
122 	while (va < eva) {
123 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
124 		va += 32;
125 	}
126 }
127 
128 void
129 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size)
130 {
131 	vaddr_t w2va, eva, orig_va;
132 
133 	orig_va = va;
134 
135 	eva = round_line(va + size);
136 	va = trunc_line(va);
137 
138 	mips_dcache_wbinv_range_index(va, (eva - va));
139 
140 	__asm __volatile("sync");
141 
142 	/*
143 	 * Since we're doing Index ops, we expect to not be able
144 	 * to access the address we've been given.  So, get the
145 	 * bits that determine the cache index, and make a KSEG0
146 	 * address out of them.
147 	 */
148 	va = MIPS_PHYS_TO_KSEG0(orig_va & mips_picache_way_mask);
149 
150 	eva = round_line(va + size);
151 	va = trunc_line(va);
152 	w2va = va + mips_picache_way_size;
153 
154 	while ((eva - va) >= (16 * 32)) {
155 		cache_r4k_op_16lines_32_2way(va, w2va,
156 		    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
157 		va   += (16 * 32);
158 		w2va += (16 * 32);
159 	}
160 
161 	while (va < eva) {
162 		cache_op_r4k_line(  va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
163 		cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
164 		va   += 32;
165 		w2va += 32;
166 	}
167 }
168 
169 void
170 r5k_pdcache_wbinv_all_16(void)
171 {
172 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
173 	vaddr_t eva = va + mips_pdcache_size;
174 
175 	/*
176 	 * Since we're hitting the whole thing, we don't have to
177 	 * worry about the 2 different "ways".
178 	 */
179 
180 	while (va < eva) {
181 		cache_r4k_op_32lines_16(va,
182 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
183 		va += (32 * 16);
184 	}
185 }
186 
187 void
188 r5k_pdcache_wbinv_all_32(void)
189 {
190 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
191 	vaddr_t eva = va + mips_pdcache_size;
192 
193 	/*
194 	 * Since we're hitting the whole thing, we don't have to
195 	 * worry about the 2 different "ways".
196 	 */
197 
198 	while (va < eva) {
199 		cache_r4k_op_32lines_32(va,
200 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
201 		va += (32 * 32);
202 	}
203 }
204 
205 void
206 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
207 {
208 	vaddr_t eva = round_line(va + size);
209 	uint32_t ostatus;
210 
211 	/*
212 	 * This is pathetically slow, but the chip bug is pretty
213 	 * nasty, and we hope that not too many v1.x R4600s are
214 	 * around.
215 	 */
216 
217 	va = trunc_line(va);
218 
219 	/*
220 	 * To make this a little less painful, just hit the entire
221 	 * cache if we have a range >= the cache size.
222 	 */
223 	if ((eva - va) >= mips_pdcache_size) {
224 		r5k_pdcache_wbinv_all_32();
225 		return;
226 	}
227 
228 	ostatus = mips_cp0_status_read();
229 
230 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
231 
232 	while (va < eva) {
233 		__asm __volatile("nop; nop; nop; nop;");
234 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
235 		va += 32;
236 	}
237 
238 	mips_cp0_status_write(ostatus);
239 }
240 
241 void
242 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
243 {
244 	vaddr_t eva = round_line(va + size);
245 	uint32_t ostatus;
246 
247 	va = trunc_line(va);
248 
249 	ostatus = mips_cp0_status_read();
250 
251 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
252 
253 	while ((eva - va) >= (32 * 32)) {
254 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
255 		cache_r4k_op_32lines_32(va,
256 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
257 		va += (32 * 32);
258 	}
259 
260 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
261 	while (va < eva) {
262 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
263 		va += 32;
264 	}
265 
266 	mips_cp0_status_write(ostatus);
267 }
268 
269 void
270 vr4131v1_pdcache_wbinv_range_16(vaddr_t va, vsize_t size)
271 {
272 	vaddr_t eva = round_line16(va + size);
273 
274 	va = trunc_line16(va);
275 
276 	while ((eva - va) >= (32 * 16)) {
277 		cache_r4k_op_32lines_16(va,
278 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
279 		cache_r4k_op_32lines_16(va,
280 		    CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
281 		va += (32 * 16);
282 	}
283 
284 	while (va < eva) {
285 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
286 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
287 		va += 16;
288 	}
289 }
290 
291 void
292 r5k_pdcache_wbinv_range_16(vaddr_t va, vsize_t size)
293 {
294 	vaddr_t eva = round_line16(va + size);
295 
296 	va = trunc_line16(va);
297 
298 	while ((eva - va) >= (32 * 16)) {
299 		cache_r4k_op_32lines_16(va,
300 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
301 		va += (32 * 16);
302 	}
303 
304 	while (va < eva) {
305 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
306 		va += 16;
307 	}
308 }
309 
310 void
311 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
312 {
313 	vaddr_t eva = round_line(va + size);
314 
315 	va = trunc_line(va);
316 
317 	while ((eva - va) >= (32 * 32)) {
318 		cache_r4k_op_32lines_32(va,
319 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
320 		va += (32 * 32);
321 	}
322 
323 	while (va < eva) {
324 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
325 		va += 32;
326 	}
327 }
328 
329 void
330 r5k_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size)
331 {
332 	vaddr_t w2va, eva;
333 
334 	/*
335 	 * Since we're doing Index ops, we expect to not be able
336 	 * to access the address we've been given.  So, get the
337 	 * bits that determine the cache index, and make a KSEG0
338 	 * address out of them.
339 	 */
340 	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
341 
342 	eva = round_line16(va + size);
343 	va = trunc_line16(va);
344 	w2va = va + mips_pdcache_way_size;
345 
346 	while ((eva - va) >= (16 * 16)) {
347 		cache_r4k_op_16lines_16_2way(va, w2va,
348 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
349 		va   += (16 * 16);
350 		w2va += (16 * 16);
351 	}
352 
353 	while (va < eva) {
354 		cache_op_r4k_line(  va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
355 		cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
356 		va   += 16;
357 		w2va += 16;
358 	}
359 }
360 
361 void
362 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size)
363 {
364 	vaddr_t w2va, eva;
365 
366 	/*
367 	 * Since we're doing Index ops, we expect to not be able
368 	 * to access the address we've been given.  So, get the
369 	 * bits that determine the cache index, and make a KSEG0
370 	 * address out of them.
371 	 */
372 	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
373 
374 	eva = round_line(va + size);
375 	va = trunc_line(va);
376 	w2va = va + mips_pdcache_way_size;
377 
378 	while ((eva - va) >= (16 * 32)) {
379 		cache_r4k_op_16lines_32_2way(va, w2va,
380 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
381 		va   += (16 * 32);
382 		w2va += (16 * 32);
383 	}
384 
385 	while (va < eva) {
386 		cache_op_r4k_line(  va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
387 		cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
388 		va   += 32;
389 		w2va += 32;
390 	}
391 }
392 
393 void
394 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size)
395 {
396 	vaddr_t eva = round_line(va + size);
397 	uint32_t ostatus;
398 
399 	/*
400 	 * This is pathetically slow, but the chip bug is pretty
401 	 * nasty, and we hope that not too many v1.x R4600s are
402 	 * around.
403 	 */
404 
405 	va = trunc_line(va);
406 
407 	ostatus = mips_cp0_status_read();
408 
409 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
410 
411 	while (va < eva) {
412 		__asm __volatile("nop; nop; nop; nop;");
413 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
414 		va += 32;
415 	}
416 
417 	mips_cp0_status_write(ostatus);
418 }
419 
420 void
421 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size)
422 {
423 	vaddr_t eva = round_line(va + size);
424 	uint32_t ostatus;
425 
426 	va = trunc_line(va);
427 
428 	ostatus = mips_cp0_status_read();
429 
430 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
431 
432 	/*
433 	 * Between blasts of big cache chunks, give interrupts
434 	 * a chance to get though.
435 	 */
436 	while ((eva - va) >= (32 * 32)) {
437 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
438 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
439 		va += (32 * 32);
440 	}
441 
442 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
443 	while (va < eva) {
444 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
445 		va += 32;
446 	}
447 
448 	mips_cp0_status_write(ostatus);
449 }
450 
451 void
452 r5k_pdcache_inv_range_16(vaddr_t va, vsize_t size)
453 {
454 	vaddr_t eva = round_line16(va + size);
455 
456 	va = trunc_line16(va);
457 
458 	while ((eva - va) >= (32 * 16)) {
459 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
460 		va += (32 * 16);
461 	}
462 
463 	while (va < eva) {
464 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
465 		va += 16;
466 	}
467 }
468 
469 void
470 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size)
471 {
472 	vaddr_t eva = round_line(va + size);
473 
474 	va = trunc_line(va);
475 
476 	while ((eva - va) >= (32 * 32)) {
477 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
478 		va += (32 * 32);
479 	}
480 
481 	while (va < eva) {
482 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
483 		va += 32;
484 	}
485 }
486 
487 void
488 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size)
489 {
490 	vaddr_t eva = round_line(va + size);
491 	uint32_t ostatus;
492 
493 	/*
494 	 * This is pathetically slow, but the chip bug is pretty
495 	 * nasty, and we hope that not too many v1.x R4600s are
496 	 * around.
497 	 */
498 
499 	va = trunc_line(va);
500 
501 	ostatus = mips_cp0_status_read();
502 
503 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
504 
505 	while (va < eva) {
506 		__asm __volatile("nop; nop; nop; nop;");
507 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
508 		va += 32;
509 	}
510 
511 	mips_cp0_status_write(ostatus);
512 }
513 
514 void
515 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size)
516 {
517 	vaddr_t eva = round_line(va + size);
518 	uint32_t ostatus;
519 
520 	va = trunc_line(va);
521 
522 	ostatus = mips_cp0_status_read();
523 
524 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
525 
526 	/*
527 	 * Between blasts of big cache chunks, give interrupts
528 	 * a chance to get though.
529 	 */
530 	while ((eva - va) >= (32 * 32)) {
531 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
532 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
533 		va += (32 * 32);
534 	}
535 
536 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
537 	while (va < eva) {
538 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
539 		va += 32;
540 	}
541 
542 	mips_cp0_status_write(ostatus);
543 }
544 
545 void
546 r5k_pdcache_wb_range_16(vaddr_t va, vsize_t size)
547 {
548 	vaddr_t eva = round_line16(va + size);
549 
550 	va = trunc_line16(va);
551 
552 	while ((eva - va) >= (32 * 16)) {
553 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
554 		va += (32 * 16);
555 	}
556 
557 	while (va < eva) {
558 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
559 		va += 16;
560 	}
561 }
562 
563 void
564 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size)
565 {
566 	vaddr_t eva = round_line(va + size);
567 
568 	va = trunc_line(va);
569 
570 	while ((eva - va) >= (32 * 32)) {
571 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
572 		va += (32 * 32);
573 	}
574 
575 	while (va < eva) {
576 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
577 		va += 32;
578 	}
579 }
580 
581 #undef round_line16
582 #undef trunc_line16
583 #undef round_line
584 #undef trunc_line
585 
586 /*
587  * Cache operations for R5000-style secondary caches:
588  *
589  *	- Direct-mapped
590  *	- Write-through
591  *	- Physically indexed, physically tagged
592  *
593  */
594 
595 
596 __asm(".set mips3");
597 
598 #define R5K_Page_Invalidate_S   0x17
599 
600 void
601 r5k_sdcache_wbinv_all(void)
602 {
603 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
604 	vaddr_t eva = va + mips_sdcache_size;
605 
606 	while (va < eva) {
607 		cache_op_r4k_line(va, R5K_Page_Invalidate_S);
608 		va += (128 * 32);
609 	}
610 }
611 
612 /* XXX: want wbinv_range_index here instead? */
613 void
614 r5k_sdcache_wbinv_rangeall(vaddr_t va, vsize_t size)
615 {
616 	r5k_sdcache_wbinv_all();
617 }
618 
619 #define	round_page(x)		(((x) + (128 * 32 - 1)) & ~(128 * 32 - 1))
620 #define	trunc_page(x)		((x) & ~(128 * 32 - 1))
621 
622 void
623 r5k_sdcache_wbinv_range(vaddr_t va, vsize_t size)
624 {
625 	vaddr_t eva = round_page(va + size);
626 	va = trunc_page(va);
627 
628 	while (va < eva) {
629 		cache_op_r4k_line(va, R5K_Page_Invalidate_S);
630 		va += (128 * 32);
631 	}
632 }
633 
634 void
635 r5k_sdcache_wb_range(vaddr_t va, vsize_t size)
636 {
637 	/* Write-through cache, no need to WB */
638 }
639