1 /* $NetBSD: cache_r5k.c,v 1.8 2003/03/08 04:43:25 rafal Exp $ */ 2 3 /* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 #include <sys/param.h> 39 40 #include <mips/cache.h> 41 #include <mips/cache_r4k.h> 42 #include <mips/cache_r5k.h> 43 #include <mips/locore.h> 44 45 /* 46 * Cache operations for R5000-style caches: 47 * 48 * - 2-way set-associative 49 * - Write-back 50 * - Virtually indexed, physically tagged 51 * 52 * Since the R4600 is so similar (2-way set-associative, 32b/l), 53 * we handle that here, too. Note for R4600, we have to work 54 * around some chip bugs. From the v1.7 errata: 55 * 56 * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D, 57 * Hit_Invalidate_D and Create_Dirty_Excl_D should only be 58 * executed if there is no other dcache activity. If the dcache is 59 * accessed for another instruction immeidately preceding when these 60 * cache instructions are executing, it is possible that the dcache 61 * tag match outputs used by these cache instructions will be 62 * incorrect. These cache instructions should be preceded by at least 63 * four instructions that are not any kind of load or store 64 * instruction. 65 * 66 * ...and from the v2.0 errata: 67 * 68 * The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D, 69 * Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate 70 * correctly if the internal data cache refill buffer is empty. These 71 * CACHE instructions should be separated from any potential data cache 72 * miss by a load instruction to an uncached address to empty the response 73 * buffer. 74 * 75 * XXX Does not handle split secondary caches. 76 */ 77 78 #define round_line16(x) (((x) + 15) & ~15) 79 #define trunc_line16(x) ((x) & ~15) 80 #define round_line(x) (((x) + 31) & ~31) 81 #define trunc_line(x) ((x) & ~31) 82 83 __asm(".set mips3"); 84 85 void 86 r5k_icache_sync_all_32(void) 87 { 88 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 89 vaddr_t eva = va + mips_picache_size; 90 91 /* 92 * Since we're hitting the whole thing, we don't have to 93 * worry about the 2 different "ways". 94 */ 95 96 mips_dcache_wbinv_all(); 97 98 __asm __volatile("sync"); 99 100 while (va < eva) { 101 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 102 va += (32 * 32); 103 } 104 } 105 106 void 107 r5k_icache_sync_range_32(vaddr_t va, vsize_t size) 108 { 109 vaddr_t eva = round_line(va + size); 110 111 va = trunc_line(va); 112 113 mips_dcache_wb_range(va, (eva - va)); 114 115 __asm __volatile("sync"); 116 117 while ((eva - va) >= (32 * 32)) { 118 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 119 va += (32 * 32); 120 } 121 122 while (va < eva) { 123 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 124 va += 32; 125 } 126 } 127 128 void 129 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size) 130 { 131 vaddr_t w2va, eva, orig_va; 132 133 orig_va = va; 134 135 eva = round_line(va + size); 136 va = trunc_line(va); 137 138 mips_dcache_wbinv_range_index(va, (eva - va)); 139 140 __asm __volatile("sync"); 141 142 /* 143 * Since we're doing Index ops, we expect to not be able 144 * to access the address we've been given. So, get the 145 * bits that determine the cache index, and make a KSEG0 146 * address out of them. 147 */ 148 va = MIPS_PHYS_TO_KSEG0(orig_va & mips_picache_way_mask); 149 150 eva = round_line(va + size); 151 va = trunc_line(va); 152 w2va = va + mips_picache_way_size; 153 154 while ((eva - va) >= (16 * 32)) { 155 cache_r4k_op_16lines_32_2way(va, w2va, 156 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 157 va += (16 * 32); 158 w2va += (16 * 32); 159 } 160 161 while (va < eva) { 162 cache_op_r4k_line( va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 163 cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 164 va += 32; 165 w2va += 32; 166 } 167 } 168 169 void 170 r5k_pdcache_wbinv_all_16(void) 171 { 172 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 173 vaddr_t eva = va + mips_pdcache_size; 174 175 /* 176 * Since we're hitting the whole thing, we don't have to 177 * worry about the 2 different "ways". 178 */ 179 180 while (va < eva) { 181 cache_r4k_op_32lines_16(va, 182 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 183 va += (32 * 16); 184 } 185 } 186 187 void 188 r5k_pdcache_wbinv_all_32(void) 189 { 190 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 191 vaddr_t eva = va + mips_pdcache_size; 192 193 /* 194 * Since we're hitting the whole thing, we don't have to 195 * worry about the 2 different "ways". 196 */ 197 198 while (va < eva) { 199 cache_r4k_op_32lines_32(va, 200 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 201 va += (32 * 32); 202 } 203 } 204 205 void 206 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 207 { 208 vaddr_t eva = round_line(va + size); 209 uint32_t ostatus; 210 211 /* 212 * This is pathetically slow, but the chip bug is pretty 213 * nasty, and we hope that not too many v1.x R4600s are 214 * around. 215 */ 216 217 va = trunc_line(va); 218 219 /* 220 * To make this a little less painful, just hit the entire 221 * cache if we have a range >= the cache size. 222 */ 223 if ((eva - va) >= mips_pdcache_size) { 224 r5k_pdcache_wbinv_all_32(); 225 return; 226 } 227 228 ostatus = mips_cp0_status_read(); 229 230 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 231 232 while (va < eva) { 233 __asm __volatile("nop; nop; nop; nop;"); 234 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 235 va += 32; 236 } 237 238 mips_cp0_status_write(ostatus); 239 } 240 241 void 242 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 243 { 244 vaddr_t eva = round_line(va + size); 245 uint32_t ostatus; 246 247 va = trunc_line(va); 248 249 ostatus = mips_cp0_status_read(); 250 251 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 252 253 while ((eva - va) >= (32 * 32)) { 254 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 255 cache_r4k_op_32lines_32(va, 256 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 257 va += (32 * 32); 258 } 259 260 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 261 while (va < eva) { 262 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 263 va += 32; 264 } 265 266 mips_cp0_status_write(ostatus); 267 } 268 269 void 270 vr4131v1_pdcache_wbinv_range_16(vaddr_t va, vsize_t size) 271 { 272 vaddr_t eva = round_line16(va + size); 273 274 va = trunc_line16(va); 275 276 while ((eva - va) >= (32 * 16)) { 277 cache_r4k_op_32lines_16(va, 278 CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 279 cache_r4k_op_32lines_16(va, 280 CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 281 va += (32 * 16); 282 } 283 284 while (va < eva) { 285 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 286 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 287 va += 16; 288 } 289 } 290 291 void 292 r5k_pdcache_wbinv_range_16(vaddr_t va, vsize_t size) 293 { 294 vaddr_t eva = round_line16(va + size); 295 296 va = trunc_line16(va); 297 298 while ((eva - va) >= (32 * 16)) { 299 cache_r4k_op_32lines_16(va, 300 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 301 va += (32 * 16); 302 } 303 304 while (va < eva) { 305 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 306 va += 16; 307 } 308 } 309 310 void 311 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 312 { 313 vaddr_t eva = round_line(va + size); 314 315 va = trunc_line(va); 316 317 while ((eva - va) >= (32 * 32)) { 318 cache_r4k_op_32lines_32(va, 319 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 320 va += (32 * 32); 321 } 322 323 while (va < eva) { 324 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 325 va += 32; 326 } 327 } 328 329 void 330 r5k_pdcache_wbinv_range_index_16(vaddr_t va, vsize_t size) 331 { 332 vaddr_t w2va, eva; 333 334 /* 335 * Since we're doing Index ops, we expect to not be able 336 * to access the address we've been given. So, get the 337 * bits that determine the cache index, and make a KSEG0 338 * address out of them. 339 */ 340 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 341 342 eva = round_line16(va + size); 343 va = trunc_line16(va); 344 w2va = va + mips_pdcache_way_size; 345 346 while ((eva - va) >= (16 * 16)) { 347 cache_r4k_op_16lines_16_2way(va, w2va, 348 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 349 va += (16 * 16); 350 w2va += (16 * 16); 351 } 352 353 while (va < eva) { 354 cache_op_r4k_line( va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 355 cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 356 va += 16; 357 w2va += 16; 358 } 359 } 360 361 void 362 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size) 363 { 364 vaddr_t w2va, eva; 365 366 /* 367 * Since we're doing Index ops, we expect to not be able 368 * to access the address we've been given. So, get the 369 * bits that determine the cache index, and make a KSEG0 370 * address out of them. 371 */ 372 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 373 374 eva = round_line(va + size); 375 va = trunc_line(va); 376 w2va = va + mips_pdcache_way_size; 377 378 while ((eva - va) >= (16 * 32)) { 379 cache_r4k_op_16lines_32_2way(va, w2va, 380 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 381 va += (16 * 32); 382 w2va += (16 * 32); 383 } 384 385 while (va < eva) { 386 cache_op_r4k_line( va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 387 cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 388 va += 32; 389 w2va += 32; 390 } 391 } 392 393 void 394 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size) 395 { 396 vaddr_t eva = round_line(va + size); 397 uint32_t ostatus; 398 399 /* 400 * This is pathetically slow, but the chip bug is pretty 401 * nasty, and we hope that not too many v1.x R4600s are 402 * around. 403 */ 404 405 va = trunc_line(va); 406 407 ostatus = mips_cp0_status_read(); 408 409 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 410 411 while (va < eva) { 412 __asm __volatile("nop; nop; nop; nop;"); 413 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 414 va += 32; 415 } 416 417 mips_cp0_status_write(ostatus); 418 } 419 420 void 421 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size) 422 { 423 vaddr_t eva = round_line(va + size); 424 uint32_t ostatus; 425 426 va = trunc_line(va); 427 428 ostatus = mips_cp0_status_read(); 429 430 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 431 432 /* 433 * Between blasts of big cache chunks, give interrupts 434 * a chance to get though. 435 */ 436 while ((eva - va) >= (32 * 32)) { 437 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 438 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 439 va += (32 * 32); 440 } 441 442 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 443 while (va < eva) { 444 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 445 va += 32; 446 } 447 448 mips_cp0_status_write(ostatus); 449 } 450 451 void 452 r5k_pdcache_inv_range_16(vaddr_t va, vsize_t size) 453 { 454 vaddr_t eva = round_line16(va + size); 455 456 va = trunc_line16(va); 457 458 while ((eva - va) >= (32 * 16)) { 459 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 460 va += (32 * 16); 461 } 462 463 while (va < eva) { 464 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 465 va += 16; 466 } 467 } 468 469 void 470 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size) 471 { 472 vaddr_t eva = round_line(va + size); 473 474 va = trunc_line(va); 475 476 while ((eva - va) >= (32 * 32)) { 477 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 478 va += (32 * 32); 479 } 480 481 while (va < eva) { 482 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 483 va += 32; 484 } 485 } 486 487 void 488 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size) 489 { 490 vaddr_t eva = round_line(va + size); 491 uint32_t ostatus; 492 493 /* 494 * This is pathetically slow, but the chip bug is pretty 495 * nasty, and we hope that not too many v1.x R4600s are 496 * around. 497 */ 498 499 va = trunc_line(va); 500 501 ostatus = mips_cp0_status_read(); 502 503 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 504 505 while (va < eva) { 506 __asm __volatile("nop; nop; nop; nop;"); 507 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 508 va += 32; 509 } 510 511 mips_cp0_status_write(ostatus); 512 } 513 514 void 515 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size) 516 { 517 vaddr_t eva = round_line(va + size); 518 uint32_t ostatus; 519 520 va = trunc_line(va); 521 522 ostatus = mips_cp0_status_read(); 523 524 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 525 526 /* 527 * Between blasts of big cache chunks, give interrupts 528 * a chance to get though. 529 */ 530 while ((eva - va) >= (32 * 32)) { 531 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 532 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 533 va += (32 * 32); 534 } 535 536 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 537 while (va < eva) { 538 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 539 va += 32; 540 } 541 542 mips_cp0_status_write(ostatus); 543 } 544 545 void 546 r5k_pdcache_wb_range_16(vaddr_t va, vsize_t size) 547 { 548 vaddr_t eva = round_line16(va + size); 549 550 va = trunc_line16(va); 551 552 while ((eva - va) >= (32 * 16)) { 553 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 554 va += (32 * 16); 555 } 556 557 while (va < eva) { 558 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 559 va += 16; 560 } 561 } 562 563 void 564 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size) 565 { 566 vaddr_t eva = round_line(va + size); 567 568 va = trunc_line(va); 569 570 while ((eva - va) >= (32 * 32)) { 571 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 572 va += (32 * 32); 573 } 574 575 while (va < eva) { 576 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 577 va += 32; 578 } 579 } 580 581 #undef round_line16 582 #undef trunc_line16 583 #undef round_line 584 #undef trunc_line 585 586 /* 587 * Cache operations for R5000-style secondary caches: 588 * 589 * - Direct-mapped 590 * - Write-through 591 * - Physically indexed, physically tagged 592 * 593 */ 594 595 596 __asm(".set mips3"); 597 598 #define R5K_Page_Invalidate_S 0x17 599 600 void 601 r5k_sdcache_wbinv_all(void) 602 { 603 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 604 vaddr_t eva = va + mips_sdcache_size; 605 606 while (va < eva) { 607 cache_op_r4k_line(va, R5K_Page_Invalidate_S); 608 va += (128 * 32); 609 } 610 } 611 612 /* XXX: want wbinv_range_index here instead? */ 613 void 614 r5k_sdcache_wbinv_rangeall(vaddr_t va, vsize_t size) 615 { 616 r5k_sdcache_wbinv_all(); 617 } 618 619 #define round_page(x) (((x) + (128 * 32 - 1)) & ~(128 * 32 - 1)) 620 #define trunc_page(x) ((x) & ~(128 * 32 - 1)) 621 622 void 623 r5k_sdcache_wbinv_range(vaddr_t va, vsize_t size) 624 { 625 vaddr_t eva = round_page(va + size); 626 va = trunc_page(va); 627 628 while (va < eva) { 629 cache_op_r4k_line(va, R5K_Page_Invalidate_S); 630 va += (128 * 32); 631 } 632 } 633 634 void 635 r5k_sdcache_wb_range(vaddr_t va, vsize_t size) 636 { 637 /* Write-through cache, no need to WB */ 638 } 639