1 /* $NetBSD: cache_r5k.c,v 1.4 2002/01/07 07:43:52 shin Exp $ */ 2 3 /* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 #include <sys/param.h> 39 40 #include <mips/cache.h> 41 #include <mips/cache_r4k.h> 42 #include <mips/locore.h> 43 44 /* 45 * Cache operations for R5000-style caches: 46 * 47 * - 2-way set-associative 48 * - Write-back 49 * - Virtually indexed, physically tagged 50 * 51 * Since the R4600 is so similar (2-way set-associative, 32b/l), 52 * we handle that here, too. Note for R4600, we have to work 53 * around some chip bugs. From the v1.7 errata: 54 * 55 * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D, 56 * Hit_Invalidate_D and Create_Dirty_Excl_D should only be 57 * executed if there is no other dcache activity. If the dcache is 58 * accessed for another instruction immeidately preceding when these 59 * cache instructions are executing, it is possible that the dcache 60 * tag match outputs used by these cache instructions will be 61 * incorrect. These cache instructions should be preceded by at least 62 * four instructions that are not any kind of load or store 63 * instruction. 64 * 65 * ...and from the v2.0 errata: 66 * 67 * The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D, 68 * Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate 69 * correctly if the internal data cache refill buffer is empty. These 70 * CACHE instructions should be separated from any potential data cache 71 * miss by a load instruction to an uncached address to empty the response 72 * buffer. 73 * 74 * XXX Does not handle split secondary caches. 75 */ 76 77 #define round_line16(x) (((x) + 15) & ~15) 78 #define trunc_line16(x) ((x) & ~15) 79 #define round_line(x) (((x) + 31) & ~31) 80 #define trunc_line(x) ((x) & ~31) 81 82 __asm(".set mips3"); 83 84 void 85 r5k_icache_sync_all_32(void) 86 { 87 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 88 vaddr_t eva = va + mips_picache_size; 89 90 /* 91 * Since we're hitting the whole thing, we don't have to 92 * worry about the 2 different "ways". 93 */ 94 95 mips_dcache_wbinv_all(); 96 97 __asm __volatile("sync"); 98 99 while (va < eva) { 100 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 101 va += (32 * 32); 102 } 103 } 104 105 void 106 r5k_icache_sync_range_32(vaddr_t va, vsize_t size) 107 { 108 vaddr_t eva = round_line(va + size); 109 110 va = trunc_line(va); 111 112 mips_dcache_wb_range(va, (eva - va)); 113 114 __asm __volatile("sync"); 115 116 while ((eva - va) >= (32 * 32)) { 117 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 118 va += (32 * 32); 119 } 120 121 while (va < eva) { 122 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 123 va += 32; 124 } 125 } 126 127 void 128 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size) 129 { 130 vaddr_t w2va, eva; 131 132 eva = round_line(va + size); 133 va = trunc_line(va); 134 135 mips_dcache_wbinv_range_index(va, (eva - va)); 136 137 __asm __volatile("sync"); 138 139 /* 140 * Since we're doing Index ops, we expect to not be able 141 * to access the address we've been given. So, get the 142 * bits that determine the cache index, and make a KSEG0 143 * address out of them. 144 */ 145 va = MIPS_PHYS_TO_KSEG0(va & mips_picache_way_mask); 146 147 eva = round_line(va + size); 148 va = trunc_line(va); 149 w2va = va + mips_picache_way_size; 150 151 while ((eva - va) >= (16 * 32)) { 152 cache_r4k_op_16lines_32_2way(va, w2va, 153 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 154 va += (16 * 32); 155 w2va += (16 * 32); 156 } 157 158 while (va < eva) { 159 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 160 cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 161 va += 32; 162 w2va += 32; 163 } 164 } 165 166 void 167 r5k_pdcache_wbinv_all_16(void) 168 { 169 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 170 vaddr_t eva = va + mips_pdcache_size; 171 172 /* 173 * Since we're hitting the whole thing, we don't have to 174 * worry about the 2 different "ways". 175 */ 176 177 while (va < eva) { 178 cache_r4k_op_32lines_16(va, 179 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 180 va += (32 * 16); 181 } 182 } 183 184 void 185 r5k_pdcache_wbinv_all_32(void) 186 { 187 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 188 vaddr_t eva = va + mips_pdcache_size; 189 190 /* 191 * Since we're hitting the whole thing, we don't have to 192 * worry about the 2 different "ways". 193 */ 194 195 while (va < eva) { 196 cache_r4k_op_32lines_32(va, 197 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 198 va += (32 * 32); 199 } 200 } 201 202 void 203 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 204 { 205 vaddr_t eva = round_line(va + size); 206 uint32_t ostatus; 207 208 /* 209 * This is pathetically slow, but the chip bug is pretty 210 * nasty, and we hope that not too many v1.x R4600s are 211 * around. 212 */ 213 214 va = trunc_line(va); 215 216 /* 217 * To make this a little less painful, just hit the entire 218 * cache if we have a range >= the cache size. 219 */ 220 if ((eva - va) >= mips_pdcache_size) { 221 r5k_pdcache_wbinv_all_32(); 222 return; 223 } 224 225 ostatus = mips_cp0_status_read(); 226 227 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 228 229 while (va < eva) { 230 __asm __volatile("nop; nop; nop; nop;"); 231 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 232 va += 32; 233 } 234 235 mips_cp0_status_write(ostatus); 236 } 237 238 void 239 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 240 { 241 vaddr_t eva = round_line(va + size); 242 uint32_t ostatus; 243 244 va = trunc_line(va); 245 246 ostatus = mips_cp0_status_read(); 247 248 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 249 250 while ((eva - va) >= (32 * 32)) { 251 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 252 cache_r4k_op_32lines_32(va, 253 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 254 va += (32 * 32); 255 } 256 257 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 258 while (va < eva) { 259 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 260 va += 32; 261 } 262 263 mips_cp0_status_write(ostatus); 264 } 265 266 void 267 r5k_pdcache_wbinv_range_16(vaddr_t va, vsize_t size) 268 { 269 vaddr_t eva = round_line16(va + size); 270 271 va = trunc_line16(va); 272 273 while ((eva - va) >= (32 * 16)) { 274 cache_r4k_op_32lines_16(va, 275 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 276 va += (32 * 16); 277 } 278 279 while (va < eva) { 280 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 281 va += 16; 282 } 283 } 284 285 void 286 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 287 { 288 vaddr_t eva = round_line(va + size); 289 290 va = trunc_line(va); 291 292 while ((eva - va) >= (32 * 32)) { 293 cache_r4k_op_32lines_32(va, 294 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 295 va += (32 * 32); 296 } 297 298 while (va < eva) { 299 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 300 va += 32; 301 } 302 } 303 304 void 305 r5k_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size) 306 { 307 vaddr_t w2va, eva; 308 309 /* 310 * Since we're doing Index ops, we expect to not be able 311 * to access the address we've been given. So, get the 312 * bits that determine the cache index, and make a KSEG0 313 * address out of them. 314 */ 315 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 316 317 eva = round_line16(va + size); 318 va = trunc_line16(va); 319 w2va = va + mips_pdcache_way_size; 320 321 while ((eva - va) >= (16 * 16)) { 322 cache_r4k_op_16lines_16_2way(va, w2va, 323 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 324 va += (16 * 16); 325 w2va += (16 * 16); 326 } 327 328 while (va < eva) { 329 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 330 cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 331 va += 16; 332 w2va += 16; 333 } 334 } 335 336 void 337 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size) 338 { 339 vaddr_t w2va, eva; 340 341 /* 342 * Since we're doing Index ops, we expect to not be able 343 * to access the address we've been given. So, get the 344 * bits that determine the cache index, and make a KSEG0 345 * address out of them. 346 */ 347 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 348 349 eva = round_line(va + size); 350 va = trunc_line(va); 351 w2va = va + mips_pdcache_way_size; 352 353 while ((eva - va) >= (16 * 32)) { 354 cache_r4k_op_16lines_32_2way(va, w2va, 355 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 356 va += (16 * 32); 357 w2va += (16 * 32); 358 } 359 360 while (va < eva) { 361 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 362 cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 363 va += 32; 364 w2va += 32; 365 } 366 } 367 368 void 369 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size) 370 { 371 vaddr_t eva = round_line(va + size); 372 uint32_t ostatus; 373 374 /* 375 * This is pathetically slow, but the chip bug is pretty 376 * nasty, and we hope that not too many v1.x R4600s are 377 * around. 378 */ 379 380 va = trunc_line(va); 381 382 ostatus = mips_cp0_status_read(); 383 384 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 385 386 while (va < eva) { 387 __asm __volatile("nop; nop; nop; nop;"); 388 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 389 va += 32; 390 } 391 392 mips_cp0_status_write(ostatus); 393 } 394 395 void 396 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size) 397 { 398 vaddr_t eva = round_line(va + size); 399 uint32_t ostatus; 400 401 va = trunc_line(va); 402 403 ostatus = mips_cp0_status_read(); 404 405 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 406 407 /* 408 * Between blasts of big cache chunks, give interrupts 409 * a chance to get though. 410 */ 411 while ((eva - va) >= (32 * 32)) { 412 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 413 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 414 va += (32 * 32); 415 } 416 417 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 418 while (va < eva) { 419 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 420 va += 32; 421 } 422 423 mips_cp0_status_write(ostatus); 424 } 425 426 void 427 r5k_pdcache_inv_range_16(vaddr_t va, vsize_t size) 428 { 429 vaddr_t eva = round_line16(va + size); 430 431 va = trunc_line16(va); 432 433 while ((eva - va) >= (32 * 16)) { 434 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 435 va += (32 * 16); 436 } 437 438 while (va < eva) { 439 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 440 va += 16; 441 } 442 } 443 444 void 445 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size) 446 { 447 vaddr_t eva = round_line(va + size); 448 449 va = trunc_line(va); 450 451 while ((eva - va) >= (32 * 32)) { 452 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 453 va += (32 * 32); 454 } 455 456 while (va < eva) { 457 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 458 va += 32; 459 } 460 } 461 462 void 463 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size) 464 { 465 vaddr_t eva = round_line(va + size); 466 uint32_t ostatus; 467 468 /* 469 * This is pathetically slow, but the chip bug is pretty 470 * nasty, and we hope that not too many v1.x R4600s are 471 * around. 472 */ 473 474 va = trunc_line(va); 475 476 ostatus = mips_cp0_status_read(); 477 478 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 479 480 while (va < eva) { 481 __asm __volatile("nop; nop; nop; nop;"); 482 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 483 va += 32; 484 } 485 486 mips_cp0_status_write(ostatus); 487 } 488 489 void 490 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size) 491 { 492 vaddr_t eva = round_line(va + size); 493 uint32_t ostatus; 494 495 va = trunc_line(va); 496 497 ostatus = mips_cp0_status_read(); 498 499 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 500 501 /* 502 * Between blasts of big cache chunks, give interrupts 503 * a chance to get though. 504 */ 505 while ((eva - va) >= (32 * 32)) { 506 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 507 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 508 va += (32 * 32); 509 } 510 511 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 512 while (va < eva) { 513 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 514 va += 32; 515 } 516 517 mips_cp0_status_write(ostatus); 518 } 519 520 void 521 r5k_pdcache_wb_range_16(vaddr_t va, vsize_t size) 522 { 523 vaddr_t eva = round_line16(va + size); 524 525 va = trunc_line16(va); 526 527 while ((eva - va) >= (32 * 16)) { 528 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 529 va += (32 * 16); 530 } 531 532 while (va < eva) { 533 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 534 va += 16; 535 } 536 } 537 538 void 539 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size) 540 { 541 vaddr_t eva = round_line(va + size); 542 543 va = trunc_line(va); 544 545 while ((eva - va) >= (32 * 32)) { 546 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 547 va += (32 * 32); 548 } 549 550 while (va < eva) { 551 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 552 va += 32; 553 } 554 } 555 556 #undef round_line16 557 #undef trunc_line16 558 #undef round_line 559 #undef trunc_line 560