xref: /netbsd-src/sys/arch/sh3/sh3/cache_sh4.c (revision ad207696b82e0875014a0e2f830240df08164b4a)
1 /*	$NetBSD: cache_sh4.c,v 1.25 2020/07/25 23:38:48 uwe Exp $	*/
2 
3 /*-
4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by UCHIYAMA Yasushi.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: cache_sh4.c,v 1.25 2020/07/25 23:38:48 uwe Exp $");
34 
35 #include "opt_cache.h"
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 
40 #include <sh3/cache.h>
41 #include <sh3/cache_sh4.h>
42 #include <sh3/vmparam.h>
43 
44 #define	round_line(x)		(((x) + 31) & ~31)
45 #define	trunc_line(x)		((x) & ~31)
46 
47 void sh4_icache_sync_all(void);
48 void sh4_icache_sync_range(vaddr_t, vsize_t);
49 void sh4_icache_sync_range_index(vaddr_t, vsize_t);
50 void sh4_dcache_wbinv_all(void);
51 void sh4_dcache_wbinv_range(vaddr_t, vsize_t);
52 void sh4_dcache_wbinv_range_index(vaddr_t, vsize_t);
53 void sh4_dcache_inv_range(vaddr_t, vsize_t);
54 void sh4_dcache_wb_range(vaddr_t, vsize_t);
55 
56 /* EMODE */
57 void sh4_emode_icache_sync_all(void);
58 void sh4_emode_icache_sync_range_index(vaddr_t, vsize_t);
59 void sh4_emode_dcache_wbinv_all(void);
60 void sh4_emode_dcache_wbinv_range_index(vaddr_t, vsize_t);
61 
62 /* Must be inlined because we "call" them while running on P2 */
63 static inline void cache_sh4_op_line_32(vaddr_t, vaddr_t, uint32_t,
64     uint32_t) __attribute__((always_inline));
65 static inline void cache_sh4_op_8lines_32(vaddr_t, vaddr_t, uint32_t,
66     uint32_t) __attribute__((always_inline));
67 static inline void cache_sh4_emode_op_line_32(vaddr_t, vaddr_t,
68     uint32_t, uint32_t, uint32_t) __attribute__((always_inline));
69 static inline void cache_sh4_emode_op_8lines_32(vaddr_t, vaddr_t,
70     uint32_t, uint32_t, uint32_t) __attribute__((always_inline));
71 
72 
73 void
sh4_cache_config(void)74 sh4_cache_config(void)
75 {
76 	int icache_size;
77 	int dcache_size;
78 	int ways;
79 	uint32_t r;
80 
81         /* Determine cache size */
82 	switch (cpu_product) {
83 	default:
84 		/* FALLTHROUGH */
85 	case CPU_PRODUCT_7750:
86 	case CPU_PRODUCT_7750S:
87 	case CPU_PRODUCT_7751:
88 #if defined(SH4_CACHE_DISABLE_EMODE)
89 	case CPU_PRODUCT_7750R:
90 	case CPU_PRODUCT_7751R:
91 #endif
92 		icache_size = SH4_ICACHE_SIZE;
93 		dcache_size = SH4_DCACHE_SIZE;
94 		ways = 1;
95 		r = SH4_CCR_ICE|SH4_CCR_OCE|SH4_CCR_WT;
96 		break;
97 
98 #if !defined(SH4_CACHE_DISABLE_EMODE)
99 	case CPU_PRODUCT_7750R:
100 	case CPU_PRODUCT_7751R:
101 		icache_size = SH4_EMODE_ICACHE_SIZE;
102 		dcache_size = SH4_EMODE_DCACHE_SIZE;
103 		ways = 2;
104 		r = SH4_CCR_EMODE|SH4_CCR_ICE|SH4_CCR_OCE|SH4_CCR_WT;
105 		break;
106 #endif
107 
108 	/*
109 	 * The ST40 cache sizes can be customized for each product.
110 	 * Refer to product-specific documentation for the cache sizes.
111 	 */
112 	case CPU_PRODUCT_STX7105:
113 		icache_size = 32 * 1024;
114 		dcache_size = 32 * 1024;
115 		ways = 2;
116 		r = SH4_CCR_EMODE|SH4_CCR_ICE|SH4_CCR_OCE|SH4_CCR_WT;
117 		break;
118 	}
119 #if defined(SH4_CACHE_DISABLE_ICACHE)
120 	r &= ~SH4_CCR_ICE;
121 #endif
122 #if defined(SH4_CACHE_DISABLE_DCACHE)
123 	r &= ~SH4_CCR_OCE;
124 #endif
125 #if defined(SH4_CACHE_WB_U0_P0_P3)
126 	r &= ~SH4_CCR_WT;
127 #endif
128 #if defined(SH4_CACHE_WB_P1)
129 	r |= SH4_CCR_CB;
130 #endif
131 
132 	RUN_P2;
133 	if (r & SH4_CCR_EMODE)
134 		SH4_EMODE_CACHE_FLUSH();
135 	else
136 		SH4_CACHE_FLUSH();
137 	_reg_write_4(SH4_CCR, SH4_CCR_ICI|SH4_CCR_OCI);
138 	_reg_write_4(SH4_CCR, r);
139 	RUN_P1;
140 
141 	r = _reg_read_4(SH4_CCR);
142 
143 	sh_cache_unified = 0;
144 	sh_cache_enable_icache = (r & SH4_CCR_ICE);
145 	sh_cache_enable_dcache = (r & SH4_CCR_OCE);
146 	sh_cache_ways = ways;
147 	sh_cache_line_size = SH4_CACHE_LINESZ;
148 	sh_cache_alias_mask = (dcache_size / ways - 1) & ~PAGE_MASK;
149 	sh_cache_prefer_mask = (dcache_size / ways - 1);
150 	sh_cache_write_through_p0_u0_p3 = (r & SH4_CCR_WT);
151 	sh_cache_write_through_p1 = !(r & SH4_CCR_CB);
152 	sh_cache_write_through = sh_cache_write_through_p0_u0_p3 &&
153 	    sh_cache_write_through_p1;
154 	sh_cache_ram_mode = (r & SH4_CCR_ORA);
155 	sh_cache_index_mode_icache = (r & SH4_CCR_IIX);
156 	sh_cache_index_mode_dcache = (r & SH4_CCR_OIX);
157 
158 	sh_cache_size_dcache = dcache_size;
159 	if (sh_cache_ram_mode)
160 		sh_cache_size_dcache /= 2;
161 	sh_cache_size_icache = icache_size;
162 
163 	sh_cache_ops._icache_sync_all		= sh4_icache_sync_all;
164 	sh_cache_ops._icache_sync_range		= sh4_icache_sync_range;
165 	sh_cache_ops._icache_sync_range_index	= sh4_icache_sync_range_index;
166 
167 	sh_cache_ops._dcache_wbinv_all		= sh4_dcache_wbinv_all;
168 	sh_cache_ops._dcache_wbinv_range	= sh4_dcache_wbinv_range;
169 	sh_cache_ops._dcache_wbinv_range_index	= sh4_dcache_wbinv_range_index;
170 	sh_cache_ops._dcache_inv_range		= sh4_dcache_inv_range;
171 	sh_cache_ops._dcache_wb_range		= sh4_dcache_wb_range;
172 
173 	switch (cpu_product) {
174 	case CPU_PRODUCT_7750:	/* FALLTHROUGH */
175 	case CPU_PRODUCT_7750S:
176 		/* memory mapped d$ can only be accessed from p2 */
177 		sh_cache_ops._dcache_wbinv_all
178 			= (void *)SH3_P1SEG_TO_P2SEG(sh4_dcache_wbinv_all);
179 		sh_cache_ops._dcache_wbinv_range_index
180 			= (void *)SH3_P1SEG_TO_P2SEG(sh4_dcache_wbinv_range_index);
181 		break;
182 
183 #if !defined(SH4_CACHE_DISABLE_EMODE)
184 	case CPU_PRODUCT_7750R:
185 	case CPU_PRODUCT_7751R:
186 		if (!(r & SH4_CCR_EMODE)) {
187 			break;
188 		}
189 		sh_cache_ops._icache_sync_all = sh4_emode_icache_sync_all;
190 		sh_cache_ops._icache_sync_range_index = sh4_emode_icache_sync_range_index;
191 		sh_cache_ops._dcache_wbinv_all = sh4_emode_dcache_wbinv_all;
192 		sh_cache_ops._dcache_wbinv_range_index = sh4_emode_dcache_wbinv_range_index;
193 		break;
194 #endif
195 	}
196 }
197 
198 /*
199  * cache_sh4_op_line_32: (index-operation)
200  *
201  *	Clear the specified bits on single 32-byte cache line.
202  */
203 static inline void
cache_sh4_op_line_32(vaddr_t va,vaddr_t base,uint32_t mask,uint32_t bits)204 cache_sh4_op_line_32(vaddr_t va, vaddr_t base, uint32_t mask, uint32_t bits)
205 {
206 	vaddr_t cca;
207 
208 	cca = base | (va & mask);
209 	_reg_bclr_4(cca, bits);
210 }
211 
212 /*
213  * cache_sh4_op_8lines_32: (index-operation)
214  *
215  *	Clear the specified bits on 8 32-byte cache lines.
216  */
217 static inline void
cache_sh4_op_8lines_32(vaddr_t va,vaddr_t base,uint32_t mask,uint32_t bits)218 cache_sh4_op_8lines_32(vaddr_t va, vaddr_t base, uint32_t mask, uint32_t bits)
219 {
220 	volatile uint32_t *cca = (volatile uint32_t *)
221 	    (base | (va & mask));
222 
223 	cca[ 0] &= ~bits;
224 	cca[ 8] &= ~bits;
225 	cca[16] &= ~bits;
226 	cca[24] &= ~bits;
227 	cca[32] &= ~bits;
228 	cca[40] &= ~bits;
229 	cca[48] &= ~bits;
230 	cca[56] &= ~bits;
231 }
232 
233 void
sh4_icache_sync_all(void)234 sh4_icache_sync_all(void)
235 {
236 	vaddr_t va = 0;
237 	vaddr_t eva = SH4_ICACHE_SIZE;
238 
239 	/* d$ index ops must be called via P2 on 7750 and 7750S */
240 	(*sh_cache_ops._dcache_wbinv_all)();
241 
242 	RUN_P2;
243 	while (va < eva) {
244 		cache_sh4_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
245 		va += 32 * 8;
246 	}
247 	/* assume we are returning into a P1 caller */
248 	PAD_P1_SWITCH;
249 }
250 
251 void
sh4_icache_sync_range(vaddr_t va,vsize_t sz)252 sh4_icache_sync_range(vaddr_t va, vsize_t sz)
253 {
254 	vaddr_t ccia;
255 	vaddr_t eva = round_line(va + sz);
256 	va = trunc_line(va);
257 
258 	sh4_dcache_wbinv_range(va, (eva - va));
259 
260 	RUN_P2;
261 	while (va < eva) {
262 		/* CCR.IIX has no effect on this entry specification */
263 		ccia = SH4_CCIA | CCIA_A | (va & CCIA_ENTRY_MASK);
264 		_reg_write_4(ccia, va & CCIA_TAGADDR_MASK); /* V = 0 */
265 		va += 32;
266 	}
267 	/* assume we are returning into a P1 caller */
268 	PAD_P1_SWITCH;
269 }
270 
271 void
sh4_icache_sync_range_index(vaddr_t va,vsize_t sz)272 sh4_icache_sync_range_index(vaddr_t va, vsize_t sz)
273 {
274 	vaddr_t eva = round_line(va + sz);
275 	va = trunc_line(va);
276 
277 	/* d$ index ops must be called via P2 on 7750 and 7750S */
278 	(*sh_cache_ops._dcache_wbinv_range_index)(va, eva - va);
279 
280 	RUN_P2;
281 	while ((eva - va) >= (8 * 32)) {
282 		cache_sh4_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
283 		va += 32 * 8;
284 	}
285 
286 	while (va < eva) {
287 		cache_sh4_op_line_32(va, SH4_CCIA, CCIA_ENTRY_MASK, CCIA_V);
288 		va += 32;
289 	}
290 	/* assume we are returning into a P1 caller */
291 	PAD_P1_SWITCH;
292 }
293 
294 void
sh4_dcache_wbinv_all(void)295 sh4_dcache_wbinv_all(void)
296 {
297 	vaddr_t va = 0;
298 	vaddr_t eva = SH4_DCACHE_SIZE;
299 
300 	/* RUN_P2; */ /* called via P2 address if necessary */
301 	while (va < eva) {
302 		cache_sh4_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
303 		    (CCDA_U | CCDA_V));
304 		va += 32 * 8;
305 	}
306 	/* assume we are returning into a P1 caller */
307 	PAD_P1_SWITCH;
308 }
309 
310 void
sh4_dcache_wbinv_range(vaddr_t va,vsize_t sz)311 sh4_dcache_wbinv_range(vaddr_t va, vsize_t sz)
312 {
313 	vaddr_t eva = round_line(va + sz);
314 	va = trunc_line(va);
315 
316 	while (va < eva) {
317 		__asm volatile("ocbp @%0" : : "r"(va));
318 		va += 32;
319 	}
320 }
321 
322 void
sh4_dcache_wbinv_range_index(vaddr_t va,vsize_t sz)323 sh4_dcache_wbinv_range_index(vaddr_t va, vsize_t sz)
324 {
325 	vaddr_t eva = round_line(va + sz);
326 	va = trunc_line(va);
327 
328 	/* RUN_P2; */ /* called via P2 address if necessary */
329 	while ((eva - va) >= (8 * 32)) {
330 		cache_sh4_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
331 		    (CCDA_U | CCDA_V));
332 		va += 32 * 8;
333 	}
334 
335 	while (va < eva) {
336 		cache_sh4_op_line_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
337 		    (CCDA_U | CCDA_V));
338 		va += 32;
339 	}
340 	/* assume we are returning into a P1 caller */
341 	PAD_P1_SWITCH;
342 }
343 
344 void
sh4_dcache_inv_range(vaddr_t va,vsize_t sz)345 sh4_dcache_inv_range(vaddr_t va, vsize_t sz)
346 {
347 	vaddr_t eva = round_line(va + sz);
348 	va = trunc_line(va);
349 
350 	while (va < eva) {
351 		__asm volatile("ocbi @%0" : : "r"(va));
352 		va += 32;
353 	}
354 }
355 
356 void
sh4_dcache_wb_range(vaddr_t va,vsize_t sz)357 sh4_dcache_wb_range(vaddr_t va, vsize_t sz)
358 {
359 	vaddr_t eva = round_line(va + sz);
360 	va = trunc_line(va);
361 
362 	while (va < eva) {
363 		__asm volatile("ocbwb @%0" : : "r"(va));
364 		va += 32;
365 	}
366 }
367 
368 /*
369  * EMODE operation
370  */
371 /*
372  * cache_sh4_emode_op_line_32: (index-operation)
373  *
374  *	Clear the specified bits on single 32-byte cache line. 2-ways.
375  */
376 static inline void
cache_sh4_emode_op_line_32(vaddr_t va,vaddr_t base,uint32_t mask,uint32_t bits,uint32_t way_shift)377 cache_sh4_emode_op_line_32(vaddr_t va, vaddr_t base, uint32_t mask,
378     uint32_t bits, uint32_t way_shift)
379 {
380 	vaddr_t cca;
381 
382 	/* extract entry # */
383 	va &= mask;
384 
385 	/* operate for each way */
386 	cca = base | (0 << way_shift) | va;
387 	_reg_bclr_4(cca, bits);
388 
389 	cca = base | (1 << way_shift) | va;
390 	_reg_bclr_4(cca, bits);
391 }
392 
393 /*
394  * cache_sh4_emode_op_8lines_32: (index-operation)
395  *
396  *	Clear the specified bits on 8 32-byte cache lines. 2-ways.
397  */
398 static inline void
cache_sh4_emode_op_8lines_32(vaddr_t va,vaddr_t base,uint32_t mask,uint32_t bits,uint32_t way_shift)399 cache_sh4_emode_op_8lines_32(vaddr_t va, vaddr_t base, uint32_t mask,
400     uint32_t bits, uint32_t way_shift)
401 {
402 	volatile uint32_t *cca;
403 
404 	/* extract entry # */
405 	va &= mask;
406 
407 	/* operate for each way */
408 	cca = (volatile uint32_t *)(base | (0 << way_shift) | va);
409 	cca[ 0] &= ~bits;
410 	cca[ 8] &= ~bits;
411 	cca[16] &= ~bits;
412 	cca[24] &= ~bits;
413 	cca[32] &= ~bits;
414 	cca[40] &= ~bits;
415 	cca[48] &= ~bits;
416 	cca[56] &= ~bits;
417 
418 	cca = (volatile uint32_t *)(base | (1 << way_shift) | va);
419 	cca[ 0] &= ~bits;
420 	cca[ 8] &= ~bits;
421 	cca[16] &= ~bits;
422 	cca[24] &= ~bits;
423 	cca[32] &= ~bits;
424 	cca[40] &= ~bits;
425 	cca[48] &= ~bits;
426 	cca[56] &= ~bits;
427 }
428 
429 void
sh4_emode_icache_sync_all(void)430 sh4_emode_icache_sync_all(void)
431 {
432 	vaddr_t va = 0;
433 	vaddr_t eva = SH4_EMODE_ICACHE_SIZE;
434 
435 	sh4_emode_dcache_wbinv_all();
436 
437 	RUN_P2;
438 	while (va < eva) {
439 		cache_sh4_emode_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
440 		    CCIA_V, 13);
441 		va += 32 * 8;
442 	}
443 	/* assume we are returning into a P1 caller */
444 	PAD_P1_SWITCH;
445 }
446 
447 void
sh4_emode_icache_sync_range_index(vaddr_t va,vsize_t sz)448 sh4_emode_icache_sync_range_index(vaddr_t va, vsize_t sz)
449 {
450 	vaddr_t eva = round_line(va + sz);
451 	va = trunc_line(va);
452 
453 	sh4_emode_dcache_wbinv_range_index(va, eva - va);
454 
455 	RUN_P2;
456 	while ((eva - va) >= (8 * 32)) {
457 		cache_sh4_emode_op_8lines_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
458 		    CCIA_V, 13);
459 		va += 32 * 8;
460 	}
461 
462 	while (va < eva) {
463 		cache_sh4_emode_op_line_32(va, SH4_CCIA, CCIA_ENTRY_MASK,
464 		    CCIA_V, 13);
465 		va += 32;
466 	}
467 	/* assume we are returning into a P1 caller */
468 	PAD_P1_SWITCH;
469 }
470 
471 void
sh4_emode_dcache_wbinv_all(void)472 sh4_emode_dcache_wbinv_all(void)
473 {
474 	vaddr_t va = 0;
475 	vaddr_t eva = SH4_EMODE_DCACHE_SIZE;
476 
477 	while (va < eva) {
478 		cache_sh4_emode_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
479 		    (CCDA_U | CCDA_V), 14);
480 		va += 32 * 8;
481 	}
482 }
483 
484 void
sh4_emode_dcache_wbinv_range_index(vaddr_t va,vsize_t sz)485 sh4_emode_dcache_wbinv_range_index(vaddr_t va, vsize_t sz)
486 {
487 	vaddr_t eva = round_line(va + sz);
488 	va = trunc_line(va);
489 
490 	while ((eva - va) >= (8 * 32)) {
491 		cache_sh4_emode_op_8lines_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
492 		    (CCDA_U | CCDA_V), 14);
493 		va += 32 * 8;
494 	}
495 
496 	while (va < eva) {
497 		cache_sh4_emode_op_line_32(va, SH4_CCDA, CCDA_ENTRY_MASK,
498 		    (CCDA_U | CCDA_V), 14);
499 		va += 32;
500 	}
501 }
502