xref: /netbsd-src/sys/arch/mips/mips/cache_r5k.c (revision 220b5c059a84c51ea44107ea8951a57ffaecdc8c)
1 /*	$NetBSD: cache_r5k.c,v 1.2 2001/11/14 18:26:23 thorpej Exp $	*/
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project by
20  *	Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/param.h>
39 
40 #include <mips/cache.h>
41 #include <mips/cache_r4k.h>
42 #include <mips/locore.h>
43 
44 /*
45  * Cache operations for R5000-style caches:
46  *
47  *	- 2-way set-associative
48  *	- Write-back
49  *	- Virtually indexed, physically tagged
50  *
51  * Since the R4600 is so similar (2-way set-associative, 32b/l),
52  * we handle that here, too.  Note for R4600, we have to work
53  * around some chip bugs.  From the v1.7 errata:
54  *
55  *  18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
56  *      Hit_Invalidate_D and Create_Dirty_Excl_D should only be
57  *      executed if there is no other dcache activity. If the dcache is
58  *      accessed for another instruction immeidately preceding when these
59  *      cache instructions are executing, it is possible that the dcache
60  *      tag match outputs used by these cache instructions will be
61  *      incorrect. These cache instructions should be preceded by at least
62  *      four instructions that are not any kind of load or store
63  *      instruction.
64  *
65  * ...and from the v2.0 errata:
66  *
67  *   The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D,
68  *   Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate
69  *   correctly if the internal data cache refill buffer is empty.  These
70  *   CACHE instructions should be separated from any potential data cache
71  *   miss by a load instruction to an uncached address to empty the response
72  *   buffer.
73  *
74  * XXX Does not handle split secondary caches.
75  */
76 
77 #define	round_line(x)		(((x) + 31) & ~31)
78 #define	trunc_line(x)		((x) & ~31)
79 
80 __asm(".set mips3");
81 
82 void
83 r5k_icache_sync_all_32(void)
84 {
85 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
86 	vaddr_t eva = va + mips_picache_size;
87 
88 	/*
89 	 * Since we're hitting the whole thing, we don't have to
90 	 * worry about the 2 different "ways".
91 	 */
92 
93 	mips_dcache_wbinv_all();
94 
95 	__asm __volatile("sync");
96 
97 	while (va < eva) {
98 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
99 		va += (32 * 32);
100 	}
101 }
102 
103 void
104 r5k_icache_sync_range_32(vaddr_t va, vsize_t size)
105 {
106 	vaddr_t eva = round_line(va + size);
107 
108 	va = trunc_line(va);
109 
110 	mips_dcache_wb_range(va, (eva - va));
111 
112 	__asm __volatile("sync");
113 
114 	while ((eva - va) >= (32 * 32)) {
115 		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
116 		va += (32 * 32);
117 	}
118 
119 	while (va < eva) {
120 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
121 		va += 32;
122 	}
123 }
124 
125 void
126 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size)
127 {
128 	vaddr_t w2va, eva;
129 
130 	eva = round_line(va + size);
131 	va = trunc_line(va);
132 
133 	mips_dcache_wbinv_range_index(va, (eva - va));
134 
135 	__asm __volatile("sync");
136 
137 	/*
138 	 * Since we're doing Index ops, we expect to not be able
139 	 * to access the address we've been given.  So, get the
140 	 * bits that determine the cache index, and make a KSEG0
141 	 * address out of them.
142 	 */
143 	va = MIPS_PHYS_TO_KSEG0(va & mips_picache_way_mask);
144 
145 	eva = round_line(va + size);
146 	va = trunc_line(va);
147 	w2va = va + mips_picache_way_size;
148 
149 	while ((eva - va) >= (16 * 32)) {
150 		cache_r4k_op_16lines_32_2way(va, w2va,
151 		    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
152 		va += (16 * 32);
153 		w2va += (16 * 32);
154 	}
155 
156 	while (va < eva) {
157 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
158 		cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
159 		va += 32;
160 		w2va += 32;
161 	}
162 }
163 
164 void
165 r5k_pdcache_wbinv_all_32(void)
166 {
167 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
168 	vaddr_t eva = va + mips_pdcache_size;
169 
170 	/*
171 	 * Since we're hitting the whole thing, we don't have to
172 	 * worry about the 2 different "ways".
173 	 */
174 
175 	while (va < eva) {
176 		cache_r4k_op_32lines_32(va,
177 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
178 		va += (32 * 32);
179 	}
180 }
181 
182 void
183 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
184 {
185 	vaddr_t eva = round_line(va + size);
186 	uint32_t ostatus;
187 
188 	/*
189 	 * This is pathetically slow, but the chip bug is pretty
190 	 * nasty, and we hope that not too many v1.x R4600s are
191 	 * around.
192 	 */
193 
194 	va = trunc_line(va);
195 
196 	/*
197 	 * To make this a little less painful, just hit the entire
198 	 * cache if we have a range >= the cache size.
199 	 */
200 	if ((eva - va) >= mips_pdcache_size) {
201 		r5k_pdcache_wbinv_all_32();
202 		return;
203 	}
204 
205 	ostatus = mips_cp0_status_read();
206 
207 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
208 
209 	while (va < eva) {
210 		__asm __volatile("nop; nop; nop; nop;");
211 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
212 		va += 32;
213 	}
214 
215 	mips_cp0_status_write(ostatus);
216 }
217 
218 void
219 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
220 {
221 	vaddr_t eva = round_line(va + size);
222 	uint32_t ostatus;
223 
224 	va = trunc_line(va);
225 
226 	ostatus = mips_cp0_status_read();
227 
228 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
229 
230 	while ((eva - va) >= (32 * 32)) {
231 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
232 		cache_r4k_op_32lines_32(va,
233 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
234 		va += (32 * 32);
235 	}
236 
237 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
238 	while (va < eva) {
239 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
240 		va += 32;
241 	}
242 
243 	mips_cp0_status_write(ostatus);
244 }
245 
246 void
247 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size)
248 {
249 	vaddr_t eva = round_line(va + size);
250 
251 	va = trunc_line(va);
252 
253 	while ((eva - va) >= (32 * 32)) {
254 		cache_r4k_op_32lines_32(va,
255 		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
256 		va += (32 * 32);
257 	}
258 
259 	while (va < eva) {
260 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
261 		va += 32;
262 	}
263 }
264 
265 void
266 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size)
267 {
268 	vaddr_t w2va, eva;
269 
270 	/*
271 	 * Since we're doing Index ops, we expect to not be able
272 	 * to access the address we've been given.  So, get the
273 	 * bits that determine the cache index, and make a KSEG0
274 	 * address out of them.
275 	 */
276 	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
277 
278 	eva = round_line(va + size);
279 	va = trunc_line(va);
280 	w2va = va + mips_pdcache_way_size;
281 
282 	while ((eva - va) >= (16 * 32)) {
283 		cache_r4k_op_16lines_32_2way(va, w2va,
284 		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
285 		va += (16 * 32);
286 		w2va += (16 * 32);
287 	}
288 
289 	while (va < eva) {
290 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
291 		cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
292 		va += 32;
293 		w2va += 32;
294 	}
295 }
296 
297 void
298 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size)
299 {
300 	vaddr_t eva = round_line(va + size);
301 	uint32_t ostatus;
302 
303 	/*
304 	 * This is pathetically slow, but the chip bug is pretty
305 	 * nasty, and we hope that not too many v1.x R4600s are
306 	 * around.
307 	 */
308 
309 	va = trunc_line(va);
310 
311 	ostatus = mips_cp0_status_read();
312 
313 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
314 
315 	while (va < eva) {
316 		__asm __volatile("nop; nop; nop; nop;");
317 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
318 		va += 32;
319 	}
320 
321 	mips_cp0_status_write(ostatus);
322 }
323 
324 void
325 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size)
326 {
327 	vaddr_t eva = round_line(va + size);
328 	uint32_t ostatus;
329 
330 	va = trunc_line(va);
331 
332 	ostatus = mips_cp0_status_read();
333 
334 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
335 
336 	/*
337 	 * Between blasts of big cache chunks, give interrupts
338 	 * a chance to get though.
339 	 */
340 	while ((eva - va) >= (32 * 32)) {
341 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
342 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
343 		va += (32 * 32);
344 	}
345 
346 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
347 	while (va < eva) {
348 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
349 		va += 32;
350 	}
351 
352 	mips_cp0_status_write(ostatus);
353 }
354 
355 void
356 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size)
357 {
358 	vaddr_t eva = round_line(va + size);
359 
360 	va = trunc_line(va);
361 
362 	while ((eva - va) >= (32 * 32)) {
363 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
364 		va += (32 * 32);
365 	}
366 
367 	while (va < eva) {
368 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
369 		va += 32;
370 	}
371 }
372 
373 void
374 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size)
375 {
376 	vaddr_t eva = round_line(va + size);
377 	uint32_t ostatus;
378 
379 	/*
380 	 * This is pathetically slow, but the chip bug is pretty
381 	 * nasty, and we hope that not too many v1.x R4600s are
382 	 * around.
383 	 */
384 
385 	va = trunc_line(va);
386 
387 	ostatus = mips_cp0_status_read();
388 
389 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
390 
391 	while (va < eva) {
392 		__asm __volatile("nop; nop; nop; nop;");
393 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
394 		va += 32;
395 	}
396 
397 	mips_cp0_status_write(ostatus);
398 }
399 
400 void
401 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size)
402 {
403 	vaddr_t eva = round_line(va + size);
404 	uint32_t ostatus;
405 
406 	va = trunc_line(va);
407 
408 	ostatus = mips_cp0_status_read();
409 
410 	mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
411 
412 	/*
413 	 * Between blasts of big cache chunks, give interrupts
414 	 * a chance to get though.
415 	 */
416 	while ((eva - va) >= (32 * 32)) {
417 		(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
418 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
419 		va += (32 * 32);
420 	}
421 
422 	(void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0);
423 	while (va < eva) {
424 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
425 		va += 32;
426 	}
427 
428 	mips_cp0_status_write(ostatus);
429 }
430 
431 void
432 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size)
433 {
434 	vaddr_t eva = round_line(va + size);
435 
436 	va = trunc_line(va);
437 
438 	while ((eva - va) >= (32 * 32)) {
439 		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
440 		va += (32 * 32);
441 	}
442 
443 	while (va < eva) {
444 		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
445 		va += 32;
446 	}
447 }
448 
449 #undef round_line
450 #undef trunc_line
451