xref: /netbsd-src/sys/arch/mips/mips/cache_r5k.c (revision b519c70ad771d0a55b3c2277db6b97a05fa6465d)
1 /*	$NetBSD: cache_r5k.c,v 1.4 2002/01/07 07:43:52 shin Exp $	*/
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project by
20  *	Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/param.h>
39 
40 #include <mips/cache.h>
41 #include <mips/cache_r4k.h>
42 #include <mips/locore.h>
43 
44 /*
45  * Cache operations for R5000-style caches:
46  *
47  *	- 2-way set-associative
48  *	- Write-back
49  *	- Virtually indexed, physically tagged
50  *
51  * Since the R4600 is so similar (2-way set-associative, 32b/l),
52  * we handle that here, too.  Note for R4600, we have to work
53  * around some chip bugs.  From the v1.7 errata:
54  *
55  *  18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
56  *      Hit_Invalidate_D and Create_Dirty_Excl_D should only be
57  *      executed if there is no other dcache activity. If the dcache is
58  *      accessed for another instruction immeidately preceding when these
59  *      cache instructions are executing, it is possible that the dcache
60  *      tag match outputs used by these cache instructions will be
61  *      incorrect. These cache instructions should be preceded by at least
62  *      four instructions that are not any kind of load or store
63  *      instruction.
64  *
65  * ...and from the v2.0 errata:
66  *
67  *   The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D,
68  *   Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate
69  *   correctly if the internal data cache refill buffer is empty.  These
70  *   CACHE instructions should be separated from any potential data cache
71  *   miss by a load instruction to an uncached address to empty the response
72  *   buffer.
73  *
74  * XXX Does not handle split secondary caches.
75  */
76 
77 #define	round_line16(x)		(((x) + 15) & ~15)
78 #define	trunc_line16(x)		((x) & ~15)
79 #define	round_line(x)		(((x) + 31) & ~31)
80 #define	trunc_line(x)		((x) & ~31)
81 
82 __asm(".set mips3");
83 
84 void
85 r5k_icache_sync_all_32(void)
86 {
87 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
88 	vaddr_t eva = va + mips_picache_size;
89 
90 	/*
91 	 * Since we're hitting the whole thing, we don't have to
92 	 * worry about the 2 different "ways".
93 	 */
94 
95 	mips_dcache_wbinv_all();
96 
97 	__asm __volatile("sync");
98 
99 	while (va < eva) {
100 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
101 		va += (32 * 32);
102 	}
103 }
104 
105 void
106 r5k_icache_sync_range_32(vaddr_t va, vsize_t size)
107 {
108 	vaddr_t eva = round_line(va + size);
109 
110 	va = trunc_line(va);
111 
112 	mips_dcache_wb_range(va, (eva - va));
113 
114 	__asm __volatile("sync");
115 
116 	while ((eva - va) >= (32 * 32)) {
117 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
118 		va += (32 * 32);
119 	}
120 
121 	while (va < eva) {
122 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
123 		va += 32;
124 	}
125 }
126 
127 void
128 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size)
129 {
130 	vaddr_t w2va, eva;
131 
132 	eva = round_line(va + size);
133 	va = trunc_line(va);
134 
135 	mips_dcache_wbinv_range_index(va, (eva - va));
136 
137 	__asm __volatile("sync");
138 
139 	/*
140 	 * Since we're doing Index ops, we expect to not be able
141 	 * to access the address we've been given.  So, get the
142 	 * bits that determine the cache index, and make a KSEG0
143 	 * address out of them.
144 	 */
145 	va = MIPS_PHYS_TO_KSEG0(va & mips_picache_way_mask);
146 
147 	eva = round_line(va + size);
148 	va = trunc_line(va);
149 	w2va = va + mips_picache_way_size;
150 
151 	while ((eva - va) >= (16 * 32)) {
152 		cache_r4k_op_16lines_32_2way(va, w2va,
153 		    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
154 		va += (16 * 32);
155 		w2va += (16 * 32);
156 	}
157 
158 	while (va < eva) {
159 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
160 		cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
161 		va += 32;
162 		w2va += 32;
163 	}
164 }
165 
166 void
167 r5k_pdcache_wbinv_all_16(void)
168 {
169 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
170 	vaddr_t eva = va + mips_pdcache_size;
171 
172 	/*
173 	 * Since we're hitting the whole thing, we don't have to
174 	 * worry about the 2 different "ways".
175 	 */
176 
177 	while (va < eva) {
178 		cache_r4k_op_32lines_16(va,
179 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
180 		va += (32 * 16);
181 	}
182 }
183 
184 void
185 r5k_pdcache_wbinv_all_32(void)
186 {
187 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
188 	vaddr_t eva = va + mips_pdcache_size;
189 
190 	/*
191 	 * Since we're hitting the whole thing, we don't have to
192 	 * worry about the 2 different "ways".
193 	 */
194 
195 	while (va < eva) {
196 		cache_r4k_op_32lines_32(va,
197 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
198 		va += (32 * 32);
199 	}
200 }
201 
202 void
203 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
204 {
205 	vaddr_t eva = round_line(va + size);
206 	uint32_t ostatus;
207 
208 	/*
209 	 * This is pathetically slow, but the chip bug is pretty
210 	 * nasty, and we hope that not too many v1.x R4600s are
211 	 * around.
212 	 */
213 
214 	va = trunc_line(va);
215 
216 	/*
217 	 * To make this a little less painful, just hit the entire
218 	 * cache if we have a range >= the cache size.
219 	 */
220 	if ((eva - va) >= mips_pdcache_size) {
221 		r5k_pdcache_wbinv_all_32();
222 		return;
223 	}
224 
225 	ostatus = mips_cp0_status_read();
226 
227 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
228 
229 	while (va < eva) {
230 		__asm __volatile("nop; nop; nop; nop;");
231 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
232 		va += 32;
233 	}
234 
235 	mips_cp0_status_write(ostatus);
236 }
237 
238 void
239 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
240 {
241 	vaddr_t eva = round_line(va + size);
242 	uint32_t ostatus;
243 
244 	va = trunc_line(va);
245 
246 	ostatus = mips_cp0_status_read();
247 
248 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
249 
250 	while ((eva - va) >= (32 * 32)) {
251 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
252 		cache_r4k_op_32lines_32(va,
253 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
254 		va += (32 * 32);
255 	}
256 
257 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
258 	while (va < eva) {
259 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
260 		va += 32;
261 	}
262 
263 	mips_cp0_status_write(ostatus);
264 }
265 
266 void
267 r5k_pdcache_wbinv_range_16(vaddr_t va, vsize_t size)
268 {
269 	vaddr_t eva = round_line16(va + size);
270 
271 	va = trunc_line16(va);
272 
273 	while ((eva - va) >= (32 * 16)) {
274 		cache_r4k_op_32lines_16(va,
275 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
276 		va += (32 * 16);
277 	}
278 
279 	while (va < eva) {
280 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
281 		va += 16;
282 	}
283 }
284 
285 void
286 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
287 {
288 	vaddr_t eva = round_line(va + size);
289 
290 	va = trunc_line(va);
291 
292 	while ((eva - va) >= (32 * 32)) {
293 		cache_r4k_op_32lines_32(va,
294 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
295 		va += (32 * 32);
296 	}
297 
298 	while (va < eva) {
299 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
300 		va += 32;
301 	}
302 }
303 
304 void
305 r5k_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size)
306 {
307 	vaddr_t w2va, eva;
308 
309 	/*
310 	 * Since we're doing Index ops, we expect to not be able
311 	 * to access the address we've been given.  So, get the
312 	 * bits that determine the cache index, and make a KSEG0
313 	 * address out of them.
314 	 */
315 	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
316 
317 	eva = round_line16(va + size);
318 	va = trunc_line16(va);
319 	w2va = va + mips_pdcache_way_size;
320 
321 	while ((eva - va) >= (16 * 16)) {
322 		cache_r4k_op_16lines_16_2way(va, w2va,
323 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
324 		va += (16 * 16);
325 		w2va += (16 * 16);
326 	}
327 
328 	while (va < eva) {
329 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
330 		cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
331 		va += 16;
332 		w2va += 16;
333 	}
334 }
335 
336 void
337 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size)
338 {
339 	vaddr_t w2va, eva;
340 
341 	/*
342 	 * Since we're doing Index ops, we expect to not be able
343 	 * to access the address we've been given.  So, get the
344 	 * bits that determine the cache index, and make a KSEG0
345 	 * address out of them.
346 	 */
347 	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
348 
349 	eva = round_line(va + size);
350 	va = trunc_line(va);
351 	w2va = va + mips_pdcache_way_size;
352 
353 	while ((eva - va) >= (16 * 32)) {
354 		cache_r4k_op_16lines_32_2way(va, w2va,
355 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
356 		va += (16 * 32);
357 		w2va += (16 * 32);
358 	}
359 
360 	while (va < eva) {
361 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
362 		cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
363 		va += 32;
364 		w2va += 32;
365 	}
366 }
367 
368 void
369 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size)
370 {
371 	vaddr_t eva = round_line(va + size);
372 	uint32_t ostatus;
373 
374 	/*
375 	 * This is pathetically slow, but the chip bug is pretty
376 	 * nasty, and we hope that not too many v1.x R4600s are
377 	 * around.
378 	 */
379 
380 	va = trunc_line(va);
381 
382 	ostatus = mips_cp0_status_read();
383 
384 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
385 
386 	while (va < eva) {
387 		__asm __volatile("nop; nop; nop; nop;");
388 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
389 		va += 32;
390 	}
391 
392 	mips_cp0_status_write(ostatus);
393 }
394 
395 void
396 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size)
397 {
398 	vaddr_t eva = round_line(va + size);
399 	uint32_t ostatus;
400 
401 	va = trunc_line(va);
402 
403 	ostatus = mips_cp0_status_read();
404 
405 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
406 
407 	/*
408 	 * Between blasts of big cache chunks, give interrupts
409 	 * a chance to get though.
410 	 */
411 	while ((eva - va) >= (32 * 32)) {
412 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
413 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
414 		va += (32 * 32);
415 	}
416 
417 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
418 	while (va < eva) {
419 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
420 		va += 32;
421 	}
422 
423 	mips_cp0_status_write(ostatus);
424 }
425 
426 void
427 r5k_pdcache_inv_range_16(vaddr_t va, vsize_t size)
428 {
429 	vaddr_t eva = round_line16(va + size);
430 
431 	va = trunc_line16(va);
432 
433 	while ((eva - va) >= (32 * 16)) {
434 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
435 		va += (32 * 16);
436 	}
437 
438 	while (va < eva) {
439 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
440 		va += 16;
441 	}
442 }
443 
444 void
445 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size)
446 {
447 	vaddr_t eva = round_line(va + size);
448 
449 	va = trunc_line(va);
450 
451 	while ((eva - va) >= (32 * 32)) {
452 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
453 		va += (32 * 32);
454 	}
455 
456 	while (va < eva) {
457 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
458 		va += 32;
459 	}
460 }
461 
462 void
463 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size)
464 {
465 	vaddr_t eva = round_line(va + size);
466 	uint32_t ostatus;
467 
468 	/*
469 	 * This is pathetically slow, but the chip bug is pretty
470 	 * nasty, and we hope that not too many v1.x R4600s are
471 	 * around.
472 	 */
473 
474 	va = trunc_line(va);
475 
476 	ostatus = mips_cp0_status_read();
477 
478 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
479 
480 	while (va < eva) {
481 		__asm __volatile("nop; nop; nop; nop;");
482 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
483 		va += 32;
484 	}
485 
486 	mips_cp0_status_write(ostatus);
487 }
488 
489 void
490 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size)
491 {
492 	vaddr_t eva = round_line(va + size);
493 	uint32_t ostatus;
494 
495 	va = trunc_line(va);
496 
497 	ostatus = mips_cp0_status_read();
498 
499 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
500 
501 	/*
502 	 * Between blasts of big cache chunks, give interrupts
503 	 * a chance to get though.
504 	 */
505 	while ((eva - va) >= (32 * 32)) {
506 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
507 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
508 		va += (32 * 32);
509 	}
510 
511 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
512 	while (va < eva) {
513 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
514 		va += 32;
515 	}
516 
517 	mips_cp0_status_write(ostatus);
518 }
519 
520 void
521 r5k_pdcache_wb_range_16(vaddr_t va, vsize_t size)
522 {
523 	vaddr_t eva = round_line16(va + size);
524 
525 	va = trunc_line16(va);
526 
527 	while ((eva - va) >= (32 * 16)) {
528 		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
529 		va += (32 * 16);
530 	}
531 
532 	while (va < eva) {
533 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
534 		va += 16;
535 	}
536 }
537 
538 void
539 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size)
540 {
541 	vaddr_t eva = round_line(va + size);
542 
543 	va = trunc_line(va);
544 
545 	while ((eva - va) >= (32 * 32)) {
546 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
547 		va += (32 * 32);
548 	}
549 
550 	while (va < eva) {
551 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
552 		va += 32;
553 	}
554 }
555 
556 #undef round_line16
557 #undef trunc_line16
558 #undef round_line
559 #undef trunc_line
560