1 /* $NetBSD: cache_r5k.c,v 1.21 2020/06/14 15:12:56 tsutsui Exp $ */
2
3 /*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: cache_r5k.c,v 1.21 2020/06/14 15:12:56 tsutsui Exp $");
40
41 #include <sys/param.h>
42
43 #include <mips/cache.h>
44 #include <mips/cache_r4k.h>
45 #include <mips/cache_r5k.h>
46 #include <mips/locore.h>
47
48 /*
49 * Cache operations for R5000-style caches:
50 *
51 * - 2-way set-associative
52 * - Write-back
53 * - Virtually indexed, physically tagged
54 *
55 * Since the R4600 is so similar (2-way set-associative, 32b/l),
56 * we handle that here, too. Note for R4600, we have to work
57 * around some chip bugs. From the v1.7 errata:
58 *
59 * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D,
60 * Hit_Invalidate_D and Create_Dirty_Excl_D should only be
61 * executed if there is no other dcache activity. If the dcache is
62 * accessed for another instruction immeidately preceding when these
63 * cache instructions are executing, it is possible that the dcache
64 * tag match outputs used by these cache instructions will be
65 * incorrect. These cache instructions should be preceded by at least
66 * four instructions that are not any kind of load or store
67 * instruction.
68 *
69 * ...and from the v2.0 errata:
70 *
71 * The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D,
72 * Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate
73 * correctly if the internal data cache refill buffer is empty. These
74 * CACHE instructions should be separated from any potential data cache
75 * miss by a load instruction to an uncached address to empty the response
76 * buffer.
77 *
78 * XXX Does not handle split secondary caches.
79 */
80
81 #define round_line16(x) round_line(x, 16)
82 #define trunc_line16(x) trunc_line(x, 16)
83 #define round_line32(x) round_line(x, 32)
84 #define trunc_line32(x) trunc_line(x, 32)
85 #define round_line(x,n) (((x) + (register_t)(n) - 1) & -(register_t)(n))
86 #define trunc_line(x,n) ((x) & -(register_t)(n))
87
88 __asm(".set mips3");
89
90 void
r5k_picache_sync_all(void)91 r5k_picache_sync_all(void)
92 {
93 struct mips_cache_info * const mci = &mips_cache_info;
94
95 /*
96 * Since we're hitting the whole thing, we don't have to
97 * worry about the N different "ways".
98 */
99 mips_intern_dcache_sync_all();
100 __asm volatile("sync");
101 mips_intern_icache_sync_range_index(MIPS_KSEG0_START,
102 mci->mci_picache_size);
103 }
104
105 void
r5k_picache_sync_range(register_t va,vsize_t size)106 r5k_picache_sync_range(register_t va, vsize_t size)
107 {
108
109 mips_intern_dcache_sync_range(va, size);
110 mips_intern_icache_sync_range(va, size);
111 }
112
113 void
r5k_picache_sync_range_index(vaddr_t va,vsize_t size)114 r5k_picache_sync_range_index(vaddr_t va, vsize_t size)
115 {
116 struct mips_cache_info * const mci = &mips_cache_info;
117 const size_t ways = mci->mci_picache_ways;
118 const size_t line_size = mci->mci_picache_line_size;
119 const size_t way_size = mci->mci_picache_way_size;
120 const size_t way_mask = way_size - 1;
121 vaddr_t eva;
122
123 /*
124 * Since we're doing Index ops, we expect to not be able
125 * to access the address we've been given. So, get the
126 * bits that determine the cache index, and make a KSEG0
127 * address out of them.
128 */
129 va = MIPS_PHYS_TO_KSEG0(va & way_mask);
130
131 eva = round_line(va + size, line_size);
132 va = trunc_line(va, line_size);
133 size = eva - va;
134
135 /*
136 * If we are going to flush more than is in a way (or the stride
137 * needed for that way), we are flushing everything.
138 */
139 if (size >= way_size) {
140 r5k_picache_sync_all();
141 return;
142 }
143
144 for (size_t way = 0; way < ways; way++) {
145 mips_intern_dcache_sync_range_index(va, size);
146 mips_intern_icache_sync_range_index(va, size);
147 va += way_size;
148 eva += way_size;
149 }
150 }
151
152 void
r5k_pdcache_wbinv_all(void)153 r5k_pdcache_wbinv_all(void)
154 {
155 struct mips_cache_info * const mci = &mips_cache_info;
156
157 /*
158 * Since we're hitting the whole thing, we don't have to
159 * worry about the N different "ways".
160 */
161 mips_intern_pdcache_wbinv_range_index(MIPS_KSEG0_START,
162 mci->mci_pdcache_size);
163 }
164
165 void
r5k_pdcache_wbinv_range_index(vaddr_t va,vsize_t size)166 r5k_pdcache_wbinv_range_index(vaddr_t va, vsize_t size)
167 {
168 struct mips_cache_info * const mci = &mips_cache_info;
169 const size_t ways = mci->mci_pdcache_ways;
170 const size_t line_size = mci->mci_pdcache_line_size;
171 const vaddr_t way_size = mci->mci_pdcache_way_size;
172 const vaddr_t way_mask = way_size - 1;
173 vaddr_t eva;
174
175 /*
176 * Since we're doing Index ops, we expect to not be able
177 * to access the address we've been given. So, get the
178 * bits that determine the cache index, and make a KSEG0
179 * address out of them.
180 */
181 va = MIPS_PHYS_TO_KSEG0(va & way_mask);
182 eva = round_line(va + size, line_size);
183 va = trunc_line(va, line_size);
184 size = eva - va;
185
186 /*
187 * If we are going to flush more than is in a way, we are flushing
188 * everything.
189 */
190 if (size >= way_size) {
191 mips_intern_pdcache_wbinv_range_index(MIPS_KSEG0_START,
192 mci->mci_pdcache_size);
193 return;
194 }
195
196 /*
197 * Invalidate each way. If the address range wraps past the end of
198 * the way, we will be invalidating in two ways but eventually things
199 * work out since the last way will wrap into the first way.
200 */
201 for (size_t way = 0; way < ways; way++) {
202 mips_intern_pdcache_wbinv_range_index(va, size);
203 va += way_size;
204 eva += way_size;
205 }
206 }
207
208 void
r4600v1_pdcache_wbinv_range_32(register_t va,vsize_t size)209 r4600v1_pdcache_wbinv_range_32(register_t va, vsize_t size)
210 {
211 const register_t eva = round_line32(va + size);
212
213 /*
214 * This is pathetically slow, but the chip bug is pretty
215 * nasty, and we hope that not too many v1.x R4600s are
216 * around.
217 */
218
219 va = trunc_line32(va);
220
221 /*
222 * To make this a little less painful, just hit the entire
223 * cache if we have a range >= the cache size.
224 */
225 if (eva - va >= mips_cache_info.mci_pdcache_size) {
226 r5k_pdcache_wbinv_all();
227 return;
228 }
229
230 const uint32_t ostatus = mips_cp0_status_read();
231
232 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
233
234 while (va < eva) {
235 __asm volatile("nop; nop; nop; nop");
236 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
237 va += 32;
238 }
239
240 mips_cp0_status_write(ostatus);
241 }
242
243 void
r4600v2_pdcache_wbinv_range_32(register_t va,vsize_t size)244 r4600v2_pdcache_wbinv_range_32(register_t va, vsize_t size)
245 {
246 const register_t eva = round_line32(va + size);
247
248 va = trunc_line32(va);
249
250 const uint32_t ostatus = mips_cp0_status_read();
251
252 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
253
254 for (; (eva - va) >= (32 * 32); va += (32 * 32)) {
255 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
256 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
257 }
258
259 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
260 for (; va < eva; va += 32) {
261 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
262
263 }
264
265 mips_cp0_status_write(ostatus);
266 }
267
268 void
vr4131v1_pdcache_wbinv_range_16(register_t va,vsize_t size)269 vr4131v1_pdcache_wbinv_range_16(register_t va, vsize_t size)
270 {
271 register_t eva = round_line16(va + size);
272
273 va = trunc_line16(va);
274
275 for (; (eva - va) >= (32 * 16); va += (32 * 16)) {
276 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
277 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
278
279 }
280
281 for (; va < eva; va += 16) {
282 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
283 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
284 }
285 }
286
287 void
r4600v1_pdcache_inv_range_32(register_t va,vsize_t size)288 r4600v1_pdcache_inv_range_32(register_t va, vsize_t size)
289 {
290 const register_t eva = round_line32(va + size);
291
292 /*
293 * This is pathetically slow, but the chip bug is pretty
294 * nasty, and we hope that not too many v1.x R4600s are
295 * around.
296 */
297
298 va = trunc_line32(va);
299
300 const uint32_t ostatus = mips_cp0_status_read();
301
302 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
303
304 for (; va < eva; va += 32) {
305 __asm volatile("nop; nop; nop; nop;");
306 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
307
308 }
309
310 mips_cp0_status_write(ostatus);
311 }
312
313 void
r4600v2_pdcache_inv_range_32(register_t va,vsize_t size)314 r4600v2_pdcache_inv_range_32(register_t va, vsize_t size)
315 {
316 const register_t eva = round_line32(va + size);
317
318 va = trunc_line32(va);
319
320 const uint32_t ostatus = mips_cp0_status_read();
321
322 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
323
324 /*
325 * Between blasts of big cache chunks, give interrupts
326 * a chance to get though.
327 */
328 for (; (eva - va) >= (32 * 32); va += (32 * 32)) {
329 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
330 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
331
332 }
333
334 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
335 for (; va < eva; va += 32) {
336 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
337
338 }
339
340 mips_cp0_status_write(ostatus);
341 }
342
343 void
r4600v1_pdcache_wb_range_32(register_t va,vsize_t size)344 r4600v1_pdcache_wb_range_32(register_t va, vsize_t size)
345 {
346 const register_t eva = round_line32(va + size);
347
348 /*
349 * This is pathetically slow, but the chip bug is pretty
350 * nasty, and we hope that not too many v1.x R4600s are
351 * around.
352 */
353
354 va = trunc_line32(va);
355
356 const uint32_t ostatus = mips_cp0_status_read();
357
358 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
359
360 for (; va < eva; va += 32) {
361 __asm volatile("nop; nop; nop; nop;");
362 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
363
364 }
365
366 mips_cp0_status_write(ostatus);
367 }
368
369 void
r4600v2_pdcache_wb_range_32(register_t va,vsize_t size)370 r4600v2_pdcache_wb_range_32(register_t va, vsize_t size)
371 {
372 const register_t eva = round_line32(va + size);
373
374 va = trunc_line32(va);
375
376 const uint32_t ostatus = mips_cp0_status_read();
377
378 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
379
380 /*
381 * Between blasts of big cache chunks, give interrupts
382 * a chance to get though.
383 */
384 for (; (eva - va) >= (32 * 32); va += (32 * 32)) {
385 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
386 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
387
388 }
389
390 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0);
391 for (; va < eva; va += 32) {
392 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
393 }
394
395 mips_cp0_status_write(ostatus);
396 }
397
398 /*
399 * Cache operations for R5000-style secondary caches:
400 *
401 * - Direct-mapped
402 * - Write-through
403 * - Physically indexed, physically tagged
404 *
405 */
406
407
408 __asm(".set mips3");
409
410 void
r5k_sdcache_wbinv_all(void)411 r5k_sdcache_wbinv_all(void)
412 {
413
414 r5k_sdcache_wbinv_range(MIPS_PHYS_TO_KSEG0(0), mips_cache_info.mci_sdcache_size);
415 }
416
417 void
r5k_sdcache_wbinv_range_index(vaddr_t va,vsize_t size)418 r5k_sdcache_wbinv_range_index(vaddr_t va, vsize_t size)
419 {
420
421 /*
422 * Since we're doing Index ops, we expect to not be able
423 * to access the address we've been given. So, get the
424 * bits that determine the cache index, and make a KSEG0
425 * address out of them.
426 */
427 va = MIPS_PHYS_TO_KSEG0(va & (mips_cache_info.mci_sdcache_size - 1));
428 r5k_sdcache_wbinv_range((intptr_t)va, size);
429 }
430
431 void
r5k_sdcache_wbinv_range(register_t va,vsize_t size)432 r5k_sdcache_wbinv_range(register_t va, vsize_t size)
433 {
434 uint32_t ostatus, taglo;
435 register_t eva = mips_r5k_round_page(va + size);
436
437 va = mips_r5k_trunc_page(va);
438
439 ostatus = mips_cp0_status_read();
440 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE);
441
442 __asm volatile("mfc0 %0, $28" : "=r"(taglo));
443 __asm volatile("mtc0 $0, $28");
444
445 for (; va < eva; va += R5K_SC_PAGESIZE) {
446 cache_op_r4k_line(va, CACHEOP_R5K_Page_Invalidate_S);
447 }
448
449 mips_cp0_status_write(ostatus);
450 __asm volatile("mtc0 %0, $28; nop" :: "r"(taglo));
451 }
452