1 /* $NetBSD: cache_r5k.c,v 1.2 2001/11/14 18:26:23 thorpej Exp $ */ 2 3 /* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 #include <sys/param.h> 39 40 #include <mips/cache.h> 41 #include <mips/cache_r4k.h> 42 #include <mips/locore.h> 43 44 /* 45 * Cache operations for R5000-style caches: 46 * 47 * - 2-way set-associative 48 * - Write-back 49 * - Virtually indexed, physically tagged 50 * 51 * Since the R4600 is so similar (2-way set-associative, 32b/l), 52 * we handle that here, too. Note for R4600, we have to work 53 * around some chip bugs. From the v1.7 errata: 54 * 55 * 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D, 56 * Hit_Invalidate_D and Create_Dirty_Excl_D should only be 57 * executed if there is no other dcache activity. If the dcache is 58 * accessed for another instruction immeidately preceding when these 59 * cache instructions are executing, it is possible that the dcache 60 * tag match outputs used by these cache instructions will be 61 * incorrect. These cache instructions should be preceded by at least 62 * four instructions that are not any kind of load or store 63 * instruction. 64 * 65 * ...and from the v2.0 errata: 66 * 67 * The CACHE instructions Hit_Writeback_Inv_D, Hit_Writeback_D, 68 * Hit_Invalidate_D and Create_Dirty_Exclusive_D will only operate 69 * correctly if the internal data cache refill buffer is empty. These 70 * CACHE instructions should be separated from any potential data cache 71 * miss by a load instruction to an uncached address to empty the response 72 * buffer. 73 * 74 * XXX Does not handle split secondary caches. 75 */ 76 77 #define round_line(x) (((x) + 31) & ~31) 78 #define trunc_line(x) ((x) & ~31) 79 80 __asm(".set mips3"); 81 82 void 83 r5k_icache_sync_all_32(void) 84 { 85 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 86 vaddr_t eva = va + mips_picache_size; 87 88 /* 89 * Since we're hitting the whole thing, we don't have to 90 * worry about the 2 different "ways". 91 */ 92 93 mips_dcache_wbinv_all(); 94 95 __asm __volatile("sync"); 96 97 while (va < eva) { 98 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 99 va += (32 * 32); 100 } 101 } 102 103 void 104 r5k_icache_sync_range_32(vaddr_t va, vsize_t size) 105 { 106 vaddr_t eva = round_line(va + size); 107 108 va = trunc_line(va); 109 110 mips_dcache_wb_range(va, (eva - va)); 111 112 __asm __volatile("sync"); 113 114 while ((eva - va) >= (32 * 32)) { 115 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 116 va += (32 * 32); 117 } 118 119 while (va < eva) { 120 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 121 va += 32; 122 } 123 } 124 125 void 126 r5k_icache_sync_range_index_32(vaddr_t va, vsize_t size) 127 { 128 vaddr_t w2va, eva; 129 130 eva = round_line(va + size); 131 va = trunc_line(va); 132 133 mips_dcache_wbinv_range_index(va, (eva - va)); 134 135 __asm __volatile("sync"); 136 137 /* 138 * Since we're doing Index ops, we expect to not be able 139 * to access the address we've been given. So, get the 140 * bits that determine the cache index, and make a KSEG0 141 * address out of them. 142 */ 143 va = MIPS_PHYS_TO_KSEG0(va & mips_picache_way_mask); 144 145 eva = round_line(va + size); 146 va = trunc_line(va); 147 w2va = va + mips_picache_way_size; 148 149 while ((eva - va) >= (16 * 32)) { 150 cache_r4k_op_16lines_32_2way(va, w2va, 151 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 152 va += (16 * 32); 153 w2va += (16 * 32); 154 } 155 156 while (va < eva) { 157 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 158 cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 159 va += 32; 160 w2va += 32; 161 } 162 } 163 164 void 165 r5k_pdcache_wbinv_all_32(void) 166 { 167 vaddr_t va = MIPS_PHYS_TO_KSEG0(0); 168 vaddr_t eva = va + mips_pdcache_size; 169 170 /* 171 * Since we're hitting the whole thing, we don't have to 172 * worry about the 2 different "ways". 173 */ 174 175 while (va < eva) { 176 cache_r4k_op_32lines_32(va, 177 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 178 va += (32 * 32); 179 } 180 } 181 182 void 183 r4600v1_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 184 { 185 vaddr_t eva = round_line(va + size); 186 uint32_t ostatus; 187 188 /* 189 * This is pathetically slow, but the chip bug is pretty 190 * nasty, and we hope that not too many v1.x R4600s are 191 * around. 192 */ 193 194 va = trunc_line(va); 195 196 /* 197 * To make this a little less painful, just hit the entire 198 * cache if we have a range >= the cache size. 199 */ 200 if ((eva - va) >= mips_pdcache_size) { 201 r5k_pdcache_wbinv_all_32(); 202 return; 203 } 204 205 ostatus = mips_cp0_status_read(); 206 207 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 208 209 while (va < eva) { 210 __asm __volatile("nop; nop; nop; nop;"); 211 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 212 va += 32; 213 } 214 215 mips_cp0_status_write(ostatus); 216 } 217 218 void 219 r4600v2_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 220 { 221 vaddr_t eva = round_line(va + size); 222 uint32_t ostatus; 223 224 va = trunc_line(va); 225 226 ostatus = mips_cp0_status_read(); 227 228 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 229 230 while ((eva - va) >= (32 * 32)) { 231 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 232 cache_r4k_op_32lines_32(va, 233 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 234 va += (32 * 32); 235 } 236 237 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 238 while (va < eva) { 239 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 240 va += 32; 241 } 242 243 mips_cp0_status_write(ostatus); 244 } 245 246 void 247 r5k_pdcache_wbinv_range_32(vaddr_t va, vsize_t size) 248 { 249 vaddr_t eva = round_line(va + size); 250 251 va = trunc_line(va); 252 253 while ((eva - va) >= (32 * 32)) { 254 cache_r4k_op_32lines_32(va, 255 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 256 va += (32 * 32); 257 } 258 259 while (va < eva) { 260 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 261 va += 32; 262 } 263 } 264 265 void 266 r5k_pdcache_wbinv_range_index_32(vaddr_t va, vsize_t size) 267 { 268 vaddr_t w2va, eva; 269 270 /* 271 * Since we're doing Index ops, we expect to not be able 272 * to access the address we've been given. So, get the 273 * bits that determine the cache index, and make a KSEG0 274 * address out of them. 275 */ 276 va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask); 277 278 eva = round_line(va + size); 279 va = trunc_line(va); 280 w2va = va + mips_pdcache_way_size; 281 282 while ((eva - va) >= (16 * 32)) { 283 cache_r4k_op_16lines_32_2way(va, w2va, 284 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 285 va += (16 * 32); 286 w2va += (16 * 32); 287 } 288 289 while (va < eva) { 290 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 291 cache_op_r4k_line(w2va, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 292 va += 32; 293 w2va += 32; 294 } 295 } 296 297 void 298 r4600v1_pdcache_inv_range_32(vaddr_t va, vsize_t size) 299 { 300 vaddr_t eva = round_line(va + size); 301 uint32_t ostatus; 302 303 /* 304 * This is pathetically slow, but the chip bug is pretty 305 * nasty, and we hope that not too many v1.x R4600s are 306 * around. 307 */ 308 309 va = trunc_line(va); 310 311 ostatus = mips_cp0_status_read(); 312 313 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 314 315 while (va < eva) { 316 __asm __volatile("nop; nop; nop; nop;"); 317 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 318 va += 32; 319 } 320 321 mips_cp0_status_write(ostatus); 322 } 323 324 void 325 r4600v2_pdcache_inv_range_32(vaddr_t va, vsize_t size) 326 { 327 vaddr_t eva = round_line(va + size); 328 uint32_t ostatus; 329 330 va = trunc_line(va); 331 332 ostatus = mips_cp0_status_read(); 333 334 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 335 336 /* 337 * Between blasts of big cache chunks, give interrupts 338 * a chance to get though. 339 */ 340 while ((eva - va) >= (32 * 32)) { 341 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 342 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 343 va += (32 * 32); 344 } 345 346 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 347 while (va < eva) { 348 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 349 va += 32; 350 } 351 352 mips_cp0_status_write(ostatus); 353 } 354 355 void 356 r5k_pdcache_inv_range_32(vaddr_t va, vsize_t size) 357 { 358 vaddr_t eva = round_line(va + size); 359 360 va = trunc_line(va); 361 362 while ((eva - va) >= (32 * 32)) { 363 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 364 va += (32 * 32); 365 } 366 367 while (va < eva) { 368 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 369 va += 32; 370 } 371 } 372 373 void 374 r4600v1_pdcache_wb_range_32(vaddr_t va, vsize_t size) 375 { 376 vaddr_t eva = round_line(va + size); 377 uint32_t ostatus; 378 379 /* 380 * This is pathetically slow, but the chip bug is pretty 381 * nasty, and we hope that not too many v1.x R4600s are 382 * around. 383 */ 384 385 va = trunc_line(va); 386 387 ostatus = mips_cp0_status_read(); 388 389 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 390 391 while (va < eva) { 392 __asm __volatile("nop; nop; nop; nop;"); 393 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 394 va += 32; 395 } 396 397 mips_cp0_status_write(ostatus); 398 } 399 400 void 401 r4600v2_pdcache_wb_range_32(vaddr_t va, vsize_t size) 402 { 403 vaddr_t eva = round_line(va + size); 404 uint32_t ostatus; 405 406 va = trunc_line(va); 407 408 ostatus = mips_cp0_status_read(); 409 410 mips_cp0_status_write(ostatus & ~MIPS_SR_INT_IE); 411 412 /* 413 * Between blasts of big cache chunks, give interrupts 414 * a chance to get though. 415 */ 416 while ((eva - va) >= (32 * 32)) { 417 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 418 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 419 va += (32 * 32); 420 } 421 422 (void) *(__volatile int *)MIPS_PHYS_TO_KSEG1(0); 423 while (va < eva) { 424 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 425 va += 32; 426 } 427 428 mips_cp0_status_write(ostatus); 429 } 430 431 void 432 r5k_pdcache_wb_range_32(vaddr_t va, vsize_t size) 433 { 434 vaddr_t eva = round_line(va + size); 435 436 va = trunc_line(va); 437 438 while ((eva - va) >= (32 * 32)) { 439 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 440 va += (32 * 32); 441 } 442 443 while (va < eva) { 444 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 445 va += 32; 446 } 447 } 448 449 #undef round_line 450 #undef trunc_line 451