1 /* $NetBSD: cache_sh4.c,v 1.25 2020/07/25 23:38:48 uwe Exp $ */
2
3 /*-
4 * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by UCHIYAMA Yasushi.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: cache_sh4.c,v 1.25 2020/07/25 23:38:48 uwe Exp $");
34
35 #include "opt_cache.h"
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39
40 #include <sh3/cache.h>
41 #include <sh3/cache_sh4.h>
42 #include <sh3/vmparam.h>
43
44 #define round_line(x) (((x) + 31) & ~31)
45 #define trunc_line(x) ((x) & ~31)
46
47 void sh4_icache_sync_all(void);
48 void sh4_icache_sync_range(vaddr_t, vsize_t);
49 void sh4_icache_sync_range_index(vaddr_t, vsize_t);
50 void sh4_dcache_wbinv_all(void);
51 void sh4_dcache_wbinv_range(vaddr_t, vsize_t);
52 void sh4_dcache_wbinv_range_index(vaddr_t, vsize_t);
53 void sh4_dcache_inv_range(vaddr_t, vsize_t);
54 void sh4_dcache_wb_range(vaddr_t, vsize_t);
55
56 /* EMODE */
57 void sh4_emode_icache_sync_all(void);
58 void sh4_emode_icache_sync_range_index(vaddr_t, vsize_t);
59 void sh4_emode_dcache_wbinv_all(void);
60 void sh4_emode_dcache_wbinv_range_index(vaddr_t, vsize_t);
61
62 /* Must be inlined because we "call" them while running on P2 */
63 static inline void cache_sh4_op_line_32(vaddr_t, vaddr_t, uint32_t,
64 uint32_t) __attribute__((always_inline));
65 static inline void cache_sh4_op_8lines_32(vaddr_t, vaddr_t, uint32_t,
66 uint32_t) __attribute__((always_inline));
67 static inline void cache_sh4_emode_op_line_32(vaddr_t, vaddr_t,
68 uint32_t, uint32_t, uint32_t) __attribute__((always_inline));
69 static inline void cache_sh4_emode_op_8lines_32(vaddr_t, vaddr_t,
70 uint32_t, uint32_t, uint32_t) __attribute__((always_inline));
71
72
73 void
sh4_cache_config(void)74 sh4_cache_config(void)
75 {
76 int icache_size;
77 int dcache_size;
78 int ways;
79 uint32_t r;
80
81 /* Determine cache size */
82 switch (cpu_product) {
83 default:
84 /* FALLTHROUGH */
85 case CPU_PRODUCT_7750:
86 case CPU_PRODUCT_7750S:
87 case CPU_PRODUCT_7751:
88 #if defined(SH4_CACHE_DISABLE_EMODE)
89 case CPU_PRODUCT_7750R:
90 case CPU_PRODUCT_7751R:
91 #endif
92 icache_size = SH4_ICACHE_SIZE;
93 dcache_size = SH4_DCACHE_SIZE;
94 ways = 1;
95 r = SH4_CCR_ICE|SH4_CCR_OCE|SH4_CCR_WT;
96 break;
97
98 #if !defined(SH4_CACHE_DISABLE_EMODE)
99 case CPU_PRODUCT_7750R:
100 case CPU_PRODUCT_7751R:
101 icache_size = SH4_EMODE_ICACHE_SIZE;
102 dcache_size = SH4_EMODE_DCACHE_SIZE;
103 ways = 2;
104 r = SH4_CCR_EMODE|SH4_CCR_ICE|SH4_CCR_OCE|SH4_CCR_WT;
105 break;
106 #endif
107
108 /*
109 * The ST40 cache sizes can be customized for each product.
110 * Refer to product-specific documentation for the cache sizes.
111 */
112 case CPU_PRODUCT_STX7105:
113 icache_size = 32 * 1024;
114 dcache_size = 32 * 1024;
115 ways = 2;
116 r = SH4_CCR_EMODE|SH4_CCR_ICE|SH4_CCR_OCE|SH4_CCR_WT;
117 break;
118 }
119 #if defined(SH4_CACHE_DISABLE_ICACHE)
120 r &= ~SH4_CCR_ICE;
121 #endif
122 #if defined(SH4_CACHE_DISABLE_DCACHE)
123 r &= ~SH4_CCR_OCE;
124 #endif
125 #if defined(SH4_CACHE_WB_U0_P0_P3)
126 r &= ~SH4_CCR_WT;
127 #endif
128 #if defined(SH4_CACHE_WB_P1)
129 r |= SH4_CCR_CB;
130 #endif
131
132 RUN_P2;
133 if (r & SH4_CCR_EMODE)
134 SH4_EMODE_CACHE_FLUSH();
135 else
136 SH4_CACHE_FLUSH();
137 _reg_write_4(SH4_CCR, SH4_CCR_ICI|SH4_CCR_OCI);
138 _reg_write_4(SH4_CCR, r);
139 RUN_P1;
140
141 r = _reg_read_4(SH4_CCR);
142
143 sh_cache_unified = 0;
144 sh_cache_enable_icache = (r & SH4_CCR_ICE);
145 sh_cache_enable_dcache = (r & SH4_CCR_OCE);
146 sh_cache_ways = ways;
147 sh_cache_line_size = SH4_CACHE_LINESZ;
148 sh_cache_alias_mask = (dcache_size / ways - 1) & ~PAGE_MASK;
149 sh_cache_prefer_mask = (dcache_size / ways - 1);
150 sh_cache_write_through_p0_u0_p3 = (r & SH4_CCR_WT);
151 sh_cache_write_through_p1 = !(r & SH4_CCR_CB);
152 sh_cache_write_through = sh_cache_write_through_p0_u0_p3 &&
153 sh_cache_write_through_p1;
154 sh_cache_ram_mode = (r & SH4_CCR_ORA);
155 sh_cache_index_mode_icache = (r & SH4_CCR_IIX);
156 sh_cache_index_mode_dcache = (r & SH4_CCR_OIX);
157
158 sh_cache_size_dcache = dcache_size;
159 if (sh_cache_ram_mode)
160 sh_cache_size_dcache /= 2;
161 sh_cache_size_icache = icache_size;
162
163 sh_cache_ops._icache_sync_all = sh4_icache_sync_all;
164 sh_cache_ops._icache_sync_range = sh4_icache_sync_range;
165 sh_cache_ops._icache_sync_range_index = sh4_icache_sync_range_index;
166
167 sh_cache_ops._dcache_wbinv_all = sh4_dcache_wbinv_all;
168 sh_cache_ops._dcache_wbinv_range = sh4_dcache_wbinv_range;
169 sh_cache_ops._dcache_wbinv_range_index = sh4_dcache_wbinv_range_index;
170 sh_cache_ops._dcache_inv_range = sh4_dcache_inv_range;
171 sh_cache_ops._dcache_wb_range = sh4_dcache_wb_range;
172
173 switch (cpu_product) {
174 case CPU_PRODUCT_7750: /* FALLTHROUGH */
175 case CPU_PRODUCT_7750S:
176 /* memory mapped d$ can only be accessed from p2 */
177 sh_cache_ops._dcache_wbinv_all
178 = (void *)SH3_P1SEG_TO_P2SEG(sh4_dcache_wbinv_all);
179 sh_cache_ops._dcache_wbinv_range_index
180 = (void *)SH3_P1SEG_TO_P2SEG(sh4_dcache_wbinv_range_index);
181 break;
182
183 #if !defined(SH4_CACHE_DISABLE_EMODE)
184 case CPU_PRODUCT_7750R:
185 case CPU_PRODUCT_7751R:
186 if (!(r & SH4_CCR_EMODE)) {
187 break;
188 }
189 sh_cache_ops._icache_sync_all = sh4_emode_icache_sync_all;
190 sh_cache_ops._icache_sync_range_index = sh4_emode_icache_sync_range_index;
191 sh_cache_ops._dcache_wbinv_all = sh4_emode_dcache_wbinv_all;
192 sh_cache_ops._dcache_wbinv_range_index = sh4_emode_dcache_wbinv_range_index;
193 break;
194 #endif
195 }
196 }
197
198 /*
199 * cache_sh4_op_line_32: (index-operation)
200 *
201 * Clear the specified bits on single 32-byte cache line.
202 */
203 static inline void
cache_sh4_op_line_32(vaddr_t va,vaddr_t base,uint32_t mask,uint32_t bits)204 cache_sh4_op_line_32(vaddr_t va, vaddr_t base, uint32_t mask, uint32_t bits)
205 {
206 vaddr_t cca;
207
208 cca = base | (va & mask);
209 _reg_bclr_4(cca, bits);
210 }
211
212 /*
213 * cache_sh4_op_8lines_32: (index-operation)
214 *
215 * Clear the specified bits on 8 32-byte cache lines.
216 */
217 static inline void
cache_sh4_op_8lines_32(vaddr_t va,vaddr_t base,uint32_t mask,uint32_t bits)218 cache_sh4_op_8lines_32(vaddr_t va, vaddr_t base, uint32_t mask, uint32_t bits)
219 {
220 volatile uint32_t *cca = (volatile uint32_t *)
221 (base | (va & mask));
222
223 cca[ 0] &= ~bits;
224 cca[ 8] &= ~bits;
225 cca[16] &= ~bits;
226 cca[24] &= ~bits;
227 cca[32] &= ~bits;
228 cca[40] &= ~bits;
229 cca[48] &= ~bits;
230 cca[56] &= ~bits;
231 }
232
233 void
sh4_icache_sync_all(void)234 sh4_icache_sync_all(void)
235 {
236 vaddr_t va = 0;
237 vaddr_t eva = SH4_ICACHE_SIZE;
238
239 /* d$ index ops must be called via P2 on 7750 and 7750S */
240 (*sh_cache_ops._dcache_wbinv_all)();
241
242 RUN_P2;
243 while (va < eva) {
244 cache_sh4_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
245 va += 32 * 8;
246 }
247 /* assume we are returning into a P1 caller */
248 PAD_P1_SWITCH;
249 }
250
251 void
sh4_icache_sync_range(vaddr_t va,vsize_t sz)252 sh4_icache_sync_range(vaddr_t va, vsize_t sz)
253 {
254 vaddr_t ccia;
255 vaddr_t eva = round_line(va + sz);
256 va = trunc_line(va);
257
258 sh4_dcache_wbinv_range(va, (eva - va));
259
260 RUN_P2;
261 while (va < eva) {
262 /* CCR.IIX has no effect on this entry specification */
263 ccia = SH4_CCIA | CCIA_A | (va & CCIA_ENTRY_MASK);
264 _reg_write_4(ccia, va & CCIA_TAGADDR_MASK); /* V = 0 */
265 va += 32;
266 }
267 /* assume we are returning into a P1 caller */
268 PAD_P1_SWITCH;
269 }
270
271 void
sh4_icache_sync_range_index(vaddr_t va,vsize_t sz)272 sh4_icache_sync_range_index(vaddr_t va, vsize_t sz)
273 {
274 vaddr_t eva = round_line(va + sz);
275 va = trunc_line(va);
276
277 /* d$ index ops must be called via P2 on 7750 and 7750S */
278 (*sh_cache_ops._dcache_wbinv_range_index)(va, eva - va);
279
280 RUN_P2;
281 while ((eva - va) >= (8 * 32)) {
282 cache_sh4_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
283 va += 32 * 8;
284 }
285
286 while (va < eva) {
287 cache_sh4_op_line_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
288 va += 32;
289 }
290 /* assume we are returning into a P1 caller */
291 PAD_P1_SWITCH;
292 }
293
294 void
sh4_dcache_wbinv_all(void)295 sh4_dcache_wbinv_all(void)
296 {
297 vaddr_t va = 0;
298 vaddr_t eva = SH4_DCACHE_SIZE;
299
300 /* RUN_P2; */ /* called via P2 address if necessary */
301 while (va < eva) {
302 cache_sh4_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
303 (CCDA_U | CCDA_V));
304 va += 32 * 8;
305 }
306 /* assume we are returning into a P1 caller */
307 PAD_P1_SWITCH;
308 }
309
310 void
sh4_dcache_wbinv_range(vaddr_t va,vsize_t sz)311 sh4_dcache_wbinv_range(vaddr_t va, vsize_t sz)
312 {
313 vaddr_t eva = round_line(va + sz);
314 va = trunc_line(va);
315
316 while (va < eva) {
317 __asm volatile("ocbp @%0" : : "r"(va));
318 va += 32;
319 }
320 }
321
322 void
sh4_dcache_wbinv_range_index(vaddr_t va,vsize_t sz)323 sh4_dcache_wbinv_range_index(vaddr_t va, vsize_t sz)
324 {
325 vaddr_t eva = round_line(va + sz);
326 va = trunc_line(va);
327
328 /* RUN_P2; */ /* called via P2 address if necessary */
329 while ((eva - va) >= (8 * 32)) {
330 cache_sh4_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
331 (CCDA_U | CCDA_V));
332 va += 32 * 8;
333 }
334
335 while (va < eva) {
336 cache_sh4_op_line_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
337 (CCDA_U | CCDA_V));
338 va += 32;
339 }
340 /* assume we are returning into a P1 caller */
341 PAD_P1_SWITCH;
342 }
343
344 void
sh4_dcache_inv_range(vaddr_t va,vsize_t sz)345 sh4_dcache_inv_range(vaddr_t va, vsize_t sz)
346 {
347 vaddr_t eva = round_line(va + sz);
348 va = trunc_line(va);
349
350 while (va < eva) {
351 __asm volatile("ocbi @%0" : : "r"(va));
352 va += 32;
353 }
354 }
355
356 void
sh4_dcache_wb_range(vaddr_t va,vsize_t sz)357 sh4_dcache_wb_range(vaddr_t va, vsize_t sz)
358 {
359 vaddr_t eva = round_line(va + sz);
360 va = trunc_line(va);
361
362 while (va < eva) {
363 __asm volatile("ocbwb @%0" : : "r"(va));
364 va += 32;
365 }
366 }
367
368 /*
369 * EMODE operation
370 */
371 /*
372 * cache_sh4_emode_op_line_32: (index-operation)
373 *
374 * Clear the specified bits on single 32-byte cache line. 2-ways.
375 */
376 static inline void
cache_sh4_emode_op_line_32(vaddr_t va,vaddr_t base,uint32_t mask,uint32_t bits,uint32_t way_shift)377 cache_sh4_emode_op_line_32(vaddr_t va, vaddr_t base, uint32_t mask,
378 uint32_t bits, uint32_t way_shift)
379 {
380 vaddr_t cca;
381
382 /* extract entry # */
383 va &= mask;
384
385 /* operate for each way */
386 cca = base | (0 << way_shift) | va;
387 _reg_bclr_4(cca, bits);
388
389 cca = base | (1 << way_shift) | va;
390 _reg_bclr_4(cca, bits);
391 }
392
393 /*
394 * cache_sh4_emode_op_8lines_32: (index-operation)
395 *
396 * Clear the specified bits on 8 32-byte cache lines. 2-ways.
397 */
398 static inline void
cache_sh4_emode_op_8lines_32(vaddr_t va,vaddr_t base,uint32_t mask,uint32_t bits,uint32_t way_shift)399 cache_sh4_emode_op_8lines_32(vaddr_t va, vaddr_t base, uint32_t mask,
400 uint32_t bits, uint32_t way_shift)
401 {
402 volatile uint32_t *cca;
403
404 /* extract entry # */
405 va &= mask;
406
407 /* operate for each way */
408 cca = (volatile uint32_t *)(base | (0 << way_shift) | va);
409 cca[ 0] &= ~bits;
410 cca[ 8] &= ~bits;
411 cca[16] &= ~bits;
412 cca[24] &= ~bits;
413 cca[32] &= ~bits;
414 cca[40] &= ~bits;
415 cca[48] &= ~bits;
416 cca[56] &= ~bits;
417
418 cca = (volatile uint32_t *)(base | (1 << way_shift) | va);
419 cca[ 0] &= ~bits;
420 cca[ 8] &= ~bits;
421 cca[16] &= ~bits;
422 cca[24] &= ~bits;
423 cca[32] &= ~bits;
424 cca[40] &= ~bits;
425 cca[48] &= ~bits;
426 cca[56] &= ~bits;
427 }
428
429 void
sh4_emode_icache_sync_all(void)430 sh4_emode_icache_sync_all(void)
431 {
432 vaddr_t va = 0;
433 vaddr_t eva = SH4_EMODE_ICACHE_SIZE;
434
435 sh4_emode_dcache_wbinv_all();
436
437 RUN_P2;
438 while (va < eva) {
439 cache_sh4_emode_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
440 CCIA_V, 13);
441 va += 32 * 8;
442 }
443 /* assume we are returning into a P1 caller */
444 PAD_P1_SWITCH;
445 }
446
447 void
sh4_emode_icache_sync_range_index(vaddr_t va,vsize_t sz)448 sh4_emode_icache_sync_range_index(vaddr_t va, vsize_t sz)
449 {
450 vaddr_t eva = round_line(va + sz);
451 va = trunc_line(va);
452
453 sh4_emode_dcache_wbinv_range_index(va, eva - va);
454
455 RUN_P2;
456 while ((eva - va) >= (8 * 32)) {
457 cache_sh4_emode_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
458 CCIA_V, 13);
459 va += 32 * 8;
460 }
461
462 while (va < eva) {
463 cache_sh4_emode_op_line_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
464 CCIA_V, 13);
465 va += 32;
466 }
467 /* assume we are returning into a P1 caller */
468 PAD_P1_SWITCH;
469 }
470
471 void
sh4_emode_dcache_wbinv_all(void)472 sh4_emode_dcache_wbinv_all(void)
473 {
474 vaddr_t va = 0;
475 vaddr_t eva = SH4_EMODE_DCACHE_SIZE;
476
477 while (va < eva) {
478 cache_sh4_emode_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
479 (CCDA_U | CCDA_V), 14);
480 va += 32 * 8;
481 }
482 }
483
484 void
sh4_emode_dcache_wbinv_range_index(vaddr_t va,vsize_t sz)485 sh4_emode_dcache_wbinv_range_index(vaddr_t va, vsize_t sz)
486 {
487 vaddr_t eva = round_line(va + sz);
488 va = trunc_line(va);
489
490 while ((eva - va) >= (8 * 32)) {
491 cache_sh4_emode_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
492 (CCDA_U | CCDA_V), 14);
493 va += 32 * 8;
494 }
495
496 while (va < eva) {
497 cache_sh4_emode_op_line_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
498 (CCDA_U | CCDA_V), 14);
499 va += 32;
500 }
501 }
502