1 /* $NetBSD: cache_r5k.c,v 1.21 2020/06/14 15:12:56 tsutsui Exp $ */ 2 3 /* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 #include <sys/cdefs.h> 39 __KERNEL_RCSID(0, "$NetBSD: cache_r5k.c,v 1.21 2020/06/14 15:12:56 tsutsui Exp $"); 40 41 #include <sys/param.h> 42 43 #include <mips/cache.h> 44 #include <mips/cache_r4k.h> 45 #include <mips/cache_r5k.h> 46 #include <mips/locore.h> 47 48 /* 49 * Cache operations for R5000-style caches: 50 * 51 * - 2-way set-associative 52 * - Write-back 53 * - Virtually indexed, physically tagged 54 * 55 * Since the R4600 is so similar (2-way set-associative, 32b/l), 56 * we handle that here, too. Note for R4600, we have to work 57 * around some chip bugs. From the v1.7 errata: 58 * 59 * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D, 60 * Hit_Invalidate_D and Create_Dirty_Excl_D should only be 61 * executed if there is no other dcache activity. If the dcache is 62 * accessed for another instruction immeidately preceding when these 63 * cache instructions are executing, it is possible that the dcache 64 * tag match outputs used by these cache instructions will be 65 * incorrect. These cache instructions should be preceded by at least 66 * four instructions that are not any kind of load or store 67 * instruction. 68 * 69 * ...and from the v2.0 errata: 70 * 71 * The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D, 72 * Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate 73 * correctly if the internal data cache refill buffer is empty. These 74 * CACHE instructions should be separated from any potential data cache 75 * miss by a load instruction to an uncached address to empty the response 76 * buffer. 77 * 78 * XXX Does not handle split secondary caches. 79 */ 80 81 #define round_line16(x) round_line(x, 16) 82 #define trunc_line16(x) trunc_line(x, 16) 83 #define round_line32(x) round_line(x, 32) 84 #define trunc_line32(x) trunc_line(x, 32) 85 #define round_line(x,n) (((x) + (register_t)(n) - 1) & -(register_t)(n)) 86 #define trunc_line(x,n) ((x) & -(register_t)(n)) 87 88 __asm(".set mips3"); 89 90 void 91 r5k_picache_sync_all(void) 92 { 93 struct mips_cache_info * const mci = &mips_cache_info; 94 95 /* 96 * Since we're hitting the whole thing, we don't have to 97 * worry about the N different "ways". 98 */ 99 mips_intern_dcache_sync_all(); 100 __asm volatile("sync"); 101 mips_intern_icache_sync_range_index(MIPS_KSEG0_START, 102 mci->mci_picache_size); 103 } 104 105 void 106 r5k_picache_sync_range(register_t va, vsize_t size) 107 { 108 109 mips_intern_dcache_sync_range(va, size); 110 mips_intern_icache_sync_range(va, size); 111 } 112 113 void 114 r5k_picache_sync_range_index(vaddr_t va, vsize_t size) 115 { 116 struct mips_cache_info * const mci = &mips_cache_info; 117 const size_t ways = mci->mci_picache_ways; 118 const size_t line_size = mci->mci_picache_line_size; 119 const size_t way_size = mci->mci_picache_way_size; 120 const size_t way_mask = way_size - 1; 121 vaddr_t eva; 122 123 /* 124 * Since we're doing Index ops, we expect to not be able 125 * to access the address we've been given. So, get the 126 * bits that determine the cache index, and make a KSEG0 127 * address out of them. 128 */ 129 va = MIPS_PHYS_TO_KSEG0(va & way_mask); 130 131 eva = round_line(va + size, line_size); 132 va = trunc_line(va, line_size); 133 size = eva - va; 134 135 /* 136 * If we are going to flush more than is in a way (or the stride 137 * needed for that way), we are flushing everything. 138 */ 139 if (size >= way_size) { 140 r5k_picache_sync_all(); 141 return; 142 } 143 144 for (size_t way = 0; way < ways; way++) { 145 mips_intern_dcache_sync_range_index(va, size); 146 mips_intern_icache_sync_range_index(va, size); 147 va += way_size; 148 eva += way_size; 149 } 150 } 151 152 void 153 r5k_pdcache_wbinv_all(void) 154 { 155 struct mips_cache_info * const mci = &mips_cache_info; 156 157 /* 158 * Since we're hitting the whole thing, we don't have to 159 * worry about the N different "ways". 160 */ 161 mips_intern_pdcache_wbinv_range_index(MIPS_KSEG0_START, 162 mci->mci_pdcache_size); 163 } 164 165 void 166 r5k_pdcache_wbinv_range_index(vaddr_t va, vsize_t size) 167 { 168 struct mips_cache_info * const mci = &mips_cache_info; 169 const size_t ways = mci->mci_pdcache_ways; 170 const size_t line_size = mci->mci_pdcache_line_size; 171 const vaddr_t way_size = mci->mci_pdcache_way_size; 172 const vaddr_t way_mask = way_size - 1; 173 vaddr_t eva; 174 175 /* 176 * Since we're doing Index ops, we expect to not be able 177 * to access the address we've been given. So, get the 178 * bits that determine the cache index, and make a KSEG0 179 * address out of them. 180 */ 181 va = MIPS_PHYS_TO_KSEG0(va & way_mask); 182 eva = round_line(va + size, line_size); 183 va = trunc_line(va, line_size); 184 size = eva - va; 185 186 /* 187 * If we are going to flush more than is in a way, we are flushing 188 * everything. 189 */ 190 if (size >= way_size) { 191 mips_intern_pdcache_wbinv_range_index(MIPS_KSEG0_START, 192 mci->mci_pdcache_size); 193 return; 194 } 195 196 /* 197 * Invalidate each way. If the address range wraps past the end of 198 * the way, we will be invalidating in two ways but eventually things 199 * work out since the last way will wrap into the first way. 200 */ 201 for (size_t way = 0; way < ways; way++) { 202 mips_intern_pdcache_wbinv_range_index(va, size); 203 va += way_size; 204 eva += way_size; 205 } 206 } 207 208 void 209 r4600v1_pdcache_wbinv_range_32(register_t va, vsize_t size) 210 { 211 const register_t eva = round_line32(va + size); 212 213 /* 214 * This is pathetically slow, but the chip bug is pretty 215 * nasty, and we hope that not too many v1.x R4600s are 216 * around. 217 */ 218 219 va = trunc_line32(va); 220 221 /* 222 * To make this a little less painful, just hit the entire 223 * cache if we have a range >= the cache size. 224 */ 225 if (eva - va >= mips_cache_info.mci_pdcache_size) { 226 r5k_pdcache_wbinv_all(); 227 return; 228 } 229 230 const uint32_t ostatus = mips_cp0_status_read(); 231 232 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 233 234 while (va < eva) { 235 __asm volatile("nop; nop; nop; nop"); 236 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 237 va += 32; 238 } 239 240 mips_cp0_status_write(ostatus); 241 } 242 243 void 244 r4600v2_pdcache_wbinv_range_32(register_t va, vsize_t size) 245 { 246 const register_t eva = round_line32(va + size); 247 248 va = trunc_line32(va); 249 250 const uint32_t ostatus = mips_cp0_status_read(); 251 252 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 253 254 for (; (eva - va) >= (32 * 32); va += (32 * 32)) { 255 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 256 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 257 } 258 259 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 260 for (; va < eva; va += 32) { 261 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 262 263 } 264 265 mips_cp0_status_write(ostatus); 266 } 267 268 void 269 vr4131v1_pdcache_wbinv_range_16(register_t va, vsize_t size) 270 { 271 register_t eva = round_line16(va + size); 272 273 va = trunc_line16(va); 274 275 for (; (eva - va) >= (32 * 16); va += (32 * 16)) { 276 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 277 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 278 279 } 280 281 for (; va < eva; va += 16) { 282 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 283 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 284 } 285 } 286 287 void 288 r4600v1_pdcache_inv_range_32(register_t va, vsize_t size) 289 { 290 const register_t eva = round_line32(va + size); 291 292 /* 293 * This is pathetically slow, but the chip bug is pretty 294 * nasty, and we hope that not too many v1.x R4600s are 295 * around. 296 */ 297 298 va = trunc_line32(va); 299 300 const uint32_t ostatus = mips_cp0_status_read(); 301 302 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 303 304 for (; va < eva; va += 32) { 305 __asm volatile("nop; nop; nop; nop;"); 306 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 307 308 } 309 310 mips_cp0_status_write(ostatus); 311 } 312 313 void 314 r4600v2_pdcache_inv_range_32(register_t va, vsize_t size) 315 { 316 const register_t eva = round_line32(va + size); 317 318 va = trunc_line32(va); 319 320 const uint32_t ostatus = mips_cp0_status_read(); 321 322 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 323 324 /* 325 * Between blasts of big cache chunks, give interrupts 326 * a chance to get though. 327 */ 328 for (; (eva - va) >= (32 * 32); va += (32 * 32)) { 329 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 330 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 331 332 } 333 334 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 335 for (; va < eva; va += 32) { 336 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 337 338 } 339 340 mips_cp0_status_write(ostatus); 341 } 342 343 void 344 r4600v1_pdcache_wb_range_32(register_t va, vsize_t size) 345 { 346 const register_t eva = round_line32(va + size); 347 348 /* 349 * This is pathetically slow, but the chip bug is pretty 350 * nasty, and we hope that not too many v1.x R4600s are 351 * around. 352 */ 353 354 va = trunc_line32(va); 355 356 const uint32_t ostatus = mips_cp0_status_read(); 357 358 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 359 360 for (; va < eva; va += 32) { 361 __asm volatile("nop; nop; nop; nop;"); 362 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 363 364 } 365 366 mips_cp0_status_write(ostatus); 367 } 368 369 void 370 r4600v2_pdcache_wb_range_32(register_t va, vsize_t size) 371 { 372 const register_t eva = round_line32(va + size); 373 374 va = trunc_line32(va); 375 376 const uint32_t ostatus = mips_cp0_status_read(); 377 378 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 379 380 /* 381 * Between blasts of big cache chunks, give interrupts 382 * a chance to get though. 383 */ 384 for (; (eva - va) >= (32 * 32); va += (32 * 32)) { 385 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 386 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 387 388 } 389 390 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 391 for (; va < eva; va += 32) { 392 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 393 } 394 395 mips_cp0_status_write(ostatus); 396 } 397 398 /* 399 * Cache operations for R5000-style secondary caches: 400 * 401 * - Direct-mapped 402 * - Write-through 403 * - Physically indexed, physically tagged 404 * 405 */ 406 407 408 __asm(".set mips3"); 409 410 void 411 r5k_sdcache_wbinv_all(void) 412 { 413 414 r5k_sdcache_wbinv_range(MIPS_PHYS_TO_KSEG0(0), mips_cache_info.mci_sdcache_size); 415 } 416 417 void 418 r5k_sdcache_wbinv_range_index(vaddr_t va, vsize_t size) 419 { 420 421 /* 422 * Since we're doing Index ops, we expect to not be able 423 * to access the address we've been given. So, get the 424 * bits that determine the cache index, and make a KSEG0 425 * address out of them. 426 */ 427 va = MIPS_PHYS_TO_KSEG0(va & (mips_cache_info.mci_sdcache_size - 1)); 428 r5k_sdcache_wbinv_range((intptr_t)va, size); 429 } 430 431 void 432 r5k_sdcache_wbinv_range(register_t va, vsize_t size) 433 { 434 uint32_t ostatus, taglo; 435 register_t eva = mips_r5k_round_page(va + size); 436 437 va = mips_r5k_trunc_page(va); 438 439 ostatus = mips_cp0_status_read(); 440 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 441 442 __asm volatile("mfc0 %0, $28" : "=r"(taglo)); 443 __asm volatile("mtc0 $0, $28"); 444 445 for (; va < eva; va += R5K_SC_PAGESIZE) { 446 cache_op_r4k_line(va, CACHEOP_R5K_Page_Invalidate_S); 447 } 448 449 mips_cp0_status_write(ostatus); 450 __asm volatile("mtc0 %0, $28; nop" :: "r"(taglo)); 451 } 452