1 /* $NetBSD: cache_r5k.c,v 1.12 2005/12/24 20:07:19 perry Exp $ */ 2 3 /* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 #include <sys/cdefs.h> 39 __KERNEL_RCSID(0, "$NetBSD: cache_r5k.c,v 1.12 2005/12/24 20:07:19 perry Exp $"); 40 41 #include <sys/param.h> 42 43 #include <mips/cache.h> 44 #include <mips/cache_r4k.h> 45 #include <mips/cache_r5k.h> 46 #include <mips/locore.h> 47 48 /* 49 * Cache operations for R5000-style caches: 50 * 51 * - 2-way set-associative 52 * - Write-back 53 * - Virtually indexed, physically tagged 54 * 55 * Since the R4600 is so similar (2-way set-associative, 32b/l), 56 * we handle that here, too. Note for R4600, we have to work 57 * around some chip bugs. From the v1.7 errata: 58 * 59 * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D, 60 * Hit_Invalidate_D and Create_Dirty_Excl_D should only be 61 * executed if there is no other dcache activity. If the dcache is 62 * accessed for another instruction immeidately preceding when these 63 * cache instructions are executing, it is possible that the dcache 64 * tag match outputs used by these cache instructions will be 65 * incorrect. These cache instructions should be preceded by at least 66 * four instructions that are not any kind of load or store 67 * instruction. 68 * 69 * ...and from the v2.0 errata: 70 * 71 * The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D, 72 * Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate 73 * correctly if the internal data cache refill buffer is empty. These 74 * CACHE instructions should be separated from any potential data cache 75 * miss by a load instruction to an uncached address to empty the response 76 * buffer. 77 * 78 * XXX Does not handle split secondary caches. 79 */ 80 81 #define round_line16(x) (((x) + 15) & ~15) 82 #define trunc_line16(x) ((x) & ~15) 83 #define round_line(x) (((x) + 31) & ~31) 84 #define trunc_line(x) ((x) & ~31) 85 86 __asm(".set mips3"); 87 88 void 89 r5k_icache_sync_all_32(void) 90 { 91 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 92 vaddr_t eva = va + mips_picache_size; 93 94 /* 95 * Since we're hitting the whole thing, we don't have to 96 * worry about the 2 different "ways". 97 */ 98 99 mips_dcache_wbinv_all(); 100 101 __asm volatile("sync"); 102 103 while (va < eva) { 104 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 105 va += (32 * 32); 106 } 107 } 108 109 void 110 r5k_icache_sync_range_32(vaddr_t va, vsize_t size) 111 { 112 vaddr_t eva = round_line(va + size); 113 114 va = trunc_line(va); 115 116 mips_dcache_wb_range(va, (eva - va)); 117 118 __asm volatile("sync"); 119 120 while ((eva - va) >= (32 * 32)) { 121 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 122 va += (32 * 32); 123 } 124 125 while (va < eva) { 126 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 127 va += 32; 128 } 129 } 130 131 void 132 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size) 133 { 134 vaddr_t w2va, eva, orig_va; 135 136 orig_va = va; 137 138 eva = round_line(va + size); 139 va = trunc_line(va); 140 141 mips_dcache_wbinv_range_index(va, (eva - va)); 142 143 __asm volatile("sync"); 144 145 /* 146 * Since we're doing Index ops, we expect to not be able 147 * to access the address we've been given. So, get the 148 * bits that determine the cache index, and make a KSEG0 149 * address out of them. 150 */ 151 va = MIPS_PHYS_TO_KSEG0(orig_va & mips_picache_way_mask); 152 153 eva = round_line(va + size); 154 va = trunc_line(va); 155 w2va = va + mips_picache_way_size; 156 157 while ((eva - va) >= (16 * 32)) { 158 cache_r4k_op_16lines_32_2way(va, w2va, 159 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 160 va += (16 * 32); 161 w2va += (16 * 32); 162 } 163 164 while (va < eva) { 165 cache_op_r4k_line( va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 166 cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 167 va += 32; 168 w2va += 32; 169 } 170 } 171 172 void 173 r5k_pdcache_wbinv_all_16(void) 174 { 175 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 176 vaddr_t eva = va + mips_pdcache_size; 177 178 /* 179 * Since we're hitting the whole thing, we don't have to 180 * worry about the 2 different "ways". 181 */ 182 183 while (va < eva) { 184 cache_r4k_op_32lines_16(va, 185 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 186 va += (32 * 16); 187 } 188 } 189 190 void 191 r5k_pdcache_wbinv_all_32(void) 192 { 193 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 194 vaddr_t eva = va + mips_pdcache_size; 195 196 /* 197 * Since we're hitting the whole thing, we don't have to 198 * worry about the 2 different "ways". 199 */ 200 201 while (va < eva) { 202 cache_r4k_op_32lines_32(va, 203 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 204 va += (32 * 32); 205 } 206 } 207 208 void 209 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 210 { 211 vaddr_t eva = round_line(va + size); 212 uint32_t ostatus; 213 214 /* 215 * This is pathetically slow, but the chip bug is pretty 216 * nasty, and we hope that not too many v1.x R4600s are 217 * around. 218 */ 219 220 va = trunc_line(va); 221 222 /* 223 * To make this a little less painful, just hit the entire 224 * cache if we have a range >= the cache size. 225 */ 226 if ((eva - va) >= mips_pdcache_size) { 227 r5k_pdcache_wbinv_all_32(); 228 return; 229 } 230 231 ostatus = mips_cp0_status_read(); 232 233 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 234 235 while (va < eva) { 236 __asm volatile("nop; nop; nop; nop;"); 237 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 238 va += 32; 239 } 240 241 mips_cp0_status_write(ostatus); 242 } 243 244 void 245 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 246 { 247 vaddr_t eva = round_line(va + size); 248 uint32_t ostatus; 249 250 va = trunc_line(va); 251 252 ostatus = mips_cp0_status_read(); 253 254 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 255 256 while ((eva - va) >= (32 * 32)) { 257 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 258 cache_r4k_op_32lines_32(va, 259 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 260 va += (32 * 32); 261 } 262 263 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 264 while (va < eva) { 265 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 266 va += 32; 267 } 268 269 mips_cp0_status_write(ostatus); 270 } 271 272 void 273 vr4131v1_pdcache_wbinv_range_16(vaddr_t va, vsize_t size) 274 { 275 vaddr_t eva = round_line16(va + size); 276 277 va = trunc_line16(va); 278 279 while ((eva - va) >= (32 * 16)) { 280 cache_r4k_op_32lines_16(va, 281 CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 282 cache_r4k_op_32lines_16(va, 283 CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 284 va += (32 * 16); 285 } 286 287 while (va < eva) { 288 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 289 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 290 va += 16; 291 } 292 } 293 294 void 295 r5k_pdcache_wbinv_range_16(vaddr_t va, vsize_t size) 296 { 297 vaddr_t eva = round_line16(va + size); 298 299 va = trunc_line16(va); 300 301 while ((eva - va) >= (32 * 16)) { 302 cache_r4k_op_32lines_16(va, 303 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 304 va += (32 * 16); 305 } 306 307 while (va < eva) { 308 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 309 va += 16; 310 } 311 } 312 313 void 314 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 315 { 316 vaddr_t eva = round_line(va + size); 317 318 va = trunc_line(va); 319 320 while ((eva - va) >= (32 * 32)) { 321 cache_r4k_op_32lines_32(va, 322 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 323 va += (32 * 32); 324 } 325 326 while (va < eva) { 327 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 328 va += 32; 329 } 330 } 331 332 void 333 r5k_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size) 334 { 335 vaddr_t w2va, eva; 336 337 /* 338 * Since we're doing Index ops, we expect to not be able 339 * to access the address we've been given. So, get the 340 * bits that determine the cache index, and make a KSEG0 341 * address out of them. 342 */ 343 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 344 345 eva = round_line16(va + size); 346 va = trunc_line16(va); 347 w2va = va + mips_pdcache_way_size; 348 349 while ((eva - va) >= (16 * 16)) { 350 cache_r4k_op_16lines_16_2way(va, w2va, 351 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 352 va += (16 * 16); 353 w2va += (16 * 16); 354 } 355 356 while (va < eva) { 357 cache_op_r4k_line( va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 358 cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 359 va += 16; 360 w2va += 16; 361 } 362 } 363 364 void 365 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size) 366 { 367 vaddr_t w2va, eva; 368 369 /* 370 * Since we're doing Index ops, we expect to not be able 371 * to access the address we've been given. So, get the 372 * bits that determine the cache index, and make a KSEG0 373 * address out of them. 374 */ 375 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 376 377 eva = round_line(va + size); 378 va = trunc_line(va); 379 w2va = va + mips_pdcache_way_size; 380 381 while ((eva - va) >= (16 * 32)) { 382 cache_r4k_op_16lines_32_2way(va, w2va, 383 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 384 va += (16 * 32); 385 w2va += (16 * 32); 386 } 387 388 while (va < eva) { 389 cache_op_r4k_line( va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 390 cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 391 va += 32; 392 w2va += 32; 393 } 394 } 395 396 void 397 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size) 398 { 399 vaddr_t eva = round_line(va + size); 400 uint32_t ostatus; 401 402 /* 403 * This is pathetically slow, but the chip bug is pretty 404 * nasty, and we hope that not too many v1.x R4600s are 405 * around. 406 */ 407 408 va = trunc_line(va); 409 410 ostatus = mips_cp0_status_read(); 411 412 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 413 414 while (va < eva) { 415 __asm volatile("nop; nop; nop; nop;"); 416 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 417 va += 32; 418 } 419 420 mips_cp0_status_write(ostatus); 421 } 422 423 void 424 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size) 425 { 426 vaddr_t eva = round_line(va + size); 427 uint32_t ostatus; 428 429 va = trunc_line(va); 430 431 ostatus = mips_cp0_status_read(); 432 433 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 434 435 /* 436 * Between blasts of big cache chunks, give interrupts 437 * a chance to get though. 438 */ 439 while ((eva - va) >= (32 * 32)) { 440 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 441 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 442 va += (32 * 32); 443 } 444 445 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 446 while (va < eva) { 447 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 448 va += 32; 449 } 450 451 mips_cp0_status_write(ostatus); 452 } 453 454 void 455 r5k_pdcache_inv_range_16(vaddr_t va, vsize_t size) 456 { 457 vaddr_t eva = round_line16(va + size); 458 459 va = trunc_line16(va); 460 461 while ((eva - va) >= (32 * 16)) { 462 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 463 va += (32 * 16); 464 } 465 466 while (va < eva) { 467 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 468 va += 16; 469 } 470 } 471 472 void 473 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size) 474 { 475 vaddr_t eva = round_line(va + size); 476 477 va = trunc_line(va); 478 479 while ((eva - va) >= (32 * 32)) { 480 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 481 va += (32 * 32); 482 } 483 484 while (va < eva) { 485 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 486 va += 32; 487 } 488 } 489 490 void 491 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size) 492 { 493 vaddr_t eva = round_line(va + size); 494 uint32_t ostatus; 495 496 /* 497 * This is pathetically slow, but the chip bug is pretty 498 * nasty, and we hope that not too many v1.x R4600s are 499 * around. 500 */ 501 502 va = trunc_line(va); 503 504 ostatus = mips_cp0_status_read(); 505 506 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 507 508 while (va < eva) { 509 __asm volatile("nop; nop; nop; nop;"); 510 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 511 va += 32; 512 } 513 514 mips_cp0_status_write(ostatus); 515 } 516 517 void 518 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size) 519 { 520 vaddr_t eva = round_line(va + size); 521 uint32_t ostatus; 522 523 va = trunc_line(va); 524 525 ostatus = mips_cp0_status_read(); 526 527 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 528 529 /* 530 * Between blasts of big cache chunks, give interrupts 531 * a chance to get though. 532 */ 533 while ((eva - va) >= (32 * 32)) { 534 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 535 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 536 va += (32 * 32); 537 } 538 539 (void) *(volatile int *)MIPS_PHYS_TO_KSEG1(0); 540 while (va < eva) { 541 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 542 va += 32; 543 } 544 545 mips_cp0_status_write(ostatus); 546 } 547 548 void 549 r5k_pdcache_wb_range_16(vaddr_t va, vsize_t size) 550 { 551 vaddr_t eva = round_line16(va + size); 552 553 va = trunc_line16(va); 554 555 while ((eva - va) >= (32 * 16)) { 556 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 557 va += (32 * 16); 558 } 559 560 while (va < eva) { 561 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 562 va += 16; 563 } 564 } 565 566 void 567 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size) 568 { 569 vaddr_t eva = round_line(va + size); 570 571 va = trunc_line(va); 572 573 while ((eva - va) >= (32 * 32)) { 574 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 575 va += (32 * 32); 576 } 577 578 while (va < eva) { 579 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 580 va += 32; 581 } 582 } 583 584 #undef round_line16 585 #undef trunc_line16 586 #undef round_line 587 #undef trunc_line 588 589 /* 590 * Cache operations for R5000-style secondary caches: 591 * 592 * - Direct-mapped 593 * - Write-through 594 * - Physically indexed, physically tagged 595 * 596 */ 597 598 599 __asm(".set mips3"); 600 601 #define R5K_Page_Invalidate_S 0x17 602 603 void 604 r5k_sdcache_wbinv_all(void) 605 { 606 607 r5k_sdcache_wbinv_range(MIPS_PHYS_TO_KSEG0(0), mips_sdcache_size); 608 } 609 610 void 611 r5k_sdcache_wbinv_range_index(vaddr_t va, vsize_t size) 612 { 613 614 /* 615 * Since we're doing Index ops, we expect to not be able 616 * to access the address we've been given. So, get the 617 * bits that determine the cache index, and make a KSEG0 618 * address out of them. 619 */ 620 va = MIPS_PHYS_TO_KSEG0(va & (mips_sdcache_size - 1)); 621 r5k_sdcache_wbinv_range(va, size); 622 } 623 624 #define round_page(x) (((x) + (128 * 32 - 1)) & ~(128 * 32 - 1)) 625 #define trunc_page(x) ((x) & ~(128 * 32 - 1)) 626 627 void 628 r5k_sdcache_wbinv_range(vaddr_t va, vsize_t size) 629 { 630 uint32_t ostatus, taglo; 631 vaddr_t eva = round_page(va + size); 632 633 va = trunc_page(va); 634 635 __asm volatile( 636 ".set noreorder \n\t" 637 ".set noat \n\t" 638 "mfc0 %0, $12 \n\t" 639 "mtc0 $0, $12 \n\t" 640 ".set reorder \n\t" 641 ".set at" 642 : "=r"(ostatus)); 643 644 __asm volatile("mfc0 %0, $28" : "=r"(taglo)); 645 __asm volatile("mtc0 $0, $28"); 646 647 while (va < eva) { 648 cache_op_r4k_line(va, R5K_Page_Invalidate_S); 649 va += (128 * 32); 650 } 651 652 __asm volatile("mtc0 %0, $12; nop" :: "r"(ostatus)); 653 __asm volatile("mtc0 %0, $28; nop" :: "r"(taglo)); 654 } 655 656 void 657 r5k_sdcache_wb_range(vaddr_t va, vsize_t size) 658 { 659 /* Write-through cache, no need to WB */ 660 } 661