1 /* $NetBSD: e500_tlb.c,v 1.7 2011/06/30 00:52:58 matt Exp $ */ 2 /*- 3 * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Raytheon BBN Technologies Corp and Defense Advanced Research Projects 8 * Agency and which was developed by Matt Thomas of 3am Software Foundry. 9 * 10 * This material is based upon work supported by the Defense Advanced Research 11 * Projects Agency and Space and Naval Warfare Systems Center, Pacific, under 12 * Contract No. N66001-09-C-2073. 13 * Approved for Public Release, Distribution Unlimited 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 39 __KERNEL_RCSID(0, "$NetBSD: e500_tlb.c,v 1.7 2011/06/30 00:52:58 matt Exp $"); 40 41 #include <sys/param.h> 42 43 #include <uvm/uvm_extern.h> 44 45 #include <powerpc/spr.h> 46 #include <powerpc/booke/spr.h> 47 #include <powerpc/booke/cpuvar.h> 48 #include <powerpc/booke/e500var.h> 49 #include <powerpc/booke/pmap.h> 50 51 struct e500_tlb { 52 vaddr_t tlb_va; 53 uint32_t tlb_pte; 54 uint32_t tlb_asid; 55 vsize_t tlb_size; 56 }; 57 58 struct e500_hwtlb { 59 uint32_t hwtlb_mas0; 60 uint32_t hwtlb_mas1; 61 uint32_t hwtlb_mas2; 62 uint32_t hwtlb_mas3; 63 }; 64 65 struct e500_xtlb { 66 struct e500_tlb e_tlb; 67 struct e500_hwtlb e_hwtlb; 68 u_long e_refcnt; 69 }; 70 71 static struct e500_tlb1 { 72 uint32_t tlb1_maxsize; 73 uint32_t tlb1_minsize; 74 u_int tlb1_numentries; 75 u_int tlb1_numfree; 76 u_int tlb1_freelist[32]; 77 struct e500_xtlb tlb1_entries[32]; 78 } e500_tlb1; 79 80 static inline register_t mftlb0cfg(void) __pure; 81 static inline register_t mftlb1cfg(void) __pure; 82 83 static inline register_t 84 mftlb0cfg(void) 85 { 86 register_t tlb0cfg; 87 __asm("mfspr %0, %1" : "=r"(tlb0cfg) : "n"(SPR_TLB0CFG)); 88 return tlb0cfg; 89 } 90 91 static inline register_t 92 mftlb1cfg(void) 93 { 94 register_t tlb1cfg; 95 __asm("mfspr %0, %1" : "=r"(tlb1cfg) : "n"(SPR_TLB1CFG)); 96 return tlb1cfg; 97 } 98 99 static struct e500_tlb 100 hwtlb_to_tlb(const struct e500_hwtlb hwtlb) 101 { 102 struct e500_tlb tlb; 103 register_t prot_mask; 104 u_int prot_shift; 105 106 tlb.tlb_va = MAS2_EPN & hwtlb.hwtlb_mas2; 107 tlb.tlb_size = 1024 << (2 * MASX_TSIZE_GET(hwtlb.hwtlb_mas1)); 108 tlb.tlb_asid = MASX_TID_GET(hwtlb.hwtlb_mas1); 109 tlb.tlb_pte = (hwtlb.hwtlb_mas2 & MAS2_WIMGE) 110 | (hwtlb.hwtlb_mas3 & MAS3_RPN); 111 if (hwtlb.hwtlb_mas1 & MAS1_TS) { 112 prot_mask = MAS3_UX|MAS3_UW|MAS3_UR; 113 prot_shift = PTE_RWX_SHIFT - 1; 114 } else { 115 prot_mask = MAS3_SX|MAS3_SW|MAS3_SR; 116 prot_shift = PTE_RWX_SHIFT; 117 } 118 tlb.tlb_pte |= (prot_mask & hwtlb.hwtlb_mas3) << prot_shift; 119 return tlb; 120 } 121 122 static inline struct e500_hwtlb 123 hwtlb_read(uint32_t mas0, u_int slot) 124 { 125 struct e500_hwtlb hwtlb; 126 register_t tlbcfg; 127 128 if (__predict_true(mas0 == MAS0_TLBSEL_TLB0)) { 129 tlbcfg = mftlb0cfg(); 130 } else if (mas0 == MAS0_TLBSEL_TLB1) { 131 tlbcfg = mftlb1cfg(); 132 } else { 133 panic("%s:%d: unexpected MAS0 %#" PRIx32, 134 __func__, __LINE__, mas0); 135 } 136 137 /* 138 * ESEL is the way we want to look up. 139 * If tlbassoc is the same as tlbentries (like in TLB1) then the TLB is 140 * fully associative, the entire slot is placed into ESEL. If tlbassoc 141 * is less then the number of tlb entries, the slot is split in two 142 * fields. Since the TLB is M rows by N ways, the lowers bits are for 143 * row (MAS2[EPN]) and the upper for the way (MAS1[ESEL]). 144 */ 145 const u_int tlbassoc = TLBCFG_ASSOC(tlbcfg); 146 const u_int tlbentries = TLBCFG_NENTRY(tlbcfg); 147 const u_int esel_shift = 148 __builtin_clz(tlbassoc) - __builtin_clz(tlbentries); 149 150 /* 151 * Disable interrupts since we don't want anyone else mucking with 152 * the MMU Assist registers 153 */ 154 const register_t msr = wrtee(0); 155 const register_t saved_mas0 = mfspr(SPR_MAS0); 156 mtspr(SPR_MAS0, mas0 | MAS0_ESEL_MAKE(slot >> esel_shift)); 157 158 if (__predict_true(tlbassoc > tlbentries)) 159 mtspr(SPR_MAS2, slot << PAGE_SHIFT); 160 161 /* 162 * Now select the entry and grab its contents. 163 */ 164 __asm volatile("tlbre"); 165 166 hwtlb.hwtlb_mas0 = mfspr(SPR_MAS0); 167 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1); 168 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2); 169 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3); 170 171 mtspr(SPR_MAS0, saved_mas0); 172 wrtee(msr); /* restore interrupts */ 173 174 return hwtlb; 175 } 176 177 static inline void 178 hwtlb_write(const struct e500_hwtlb hwtlb, bool needs_sync) 179 { 180 const register_t msr = wrtee(0); 181 const uint32_t saved_mas0 = mfspr(SPR_MAS0); 182 183 /* 184 * Need to always write MAS0 and MAS1 185 */ 186 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0); 187 mtspr(SPR_MAS1, hwtlb.hwtlb_mas1); 188 189 /* 190 * Only write the VPN/WIMGE if this is in TLB0 or if a valid mapping. 191 */ 192 if ((hwtlb.hwtlb_mas0 & MAS0_TLBSEL) == MAS0_TLBSEL_TLB0 193 || (hwtlb.hwtlb_mas1 & MAS1_V)) { 194 mtspr(SPR_MAS2, hwtlb.hwtlb_mas2); 195 } 196 /* 197 * Only need to write the RPN/prot if we are dealing with a valid 198 * mapping. 199 */ 200 if (hwtlb.hwtlb_mas1 & MAS1_V) { 201 mtspr(SPR_MAS3, hwtlb.hwtlb_mas3); 202 } 203 204 #if 0 205 printf("%s->[%x,%x,%x,%x]\n", 206 __func__, 207 hwtlb.hwtlb_mas0, hwtlb.hwtlb_mas1, 208 hwtlb.hwtlb_mas2, hwtlb.hwtlb_mas3); 209 #endif 210 __asm volatile("tlbwe"); 211 if (needs_sync) { 212 __asm volatile("tlbsync"); 213 __asm volatile("isync"); 214 } 215 216 mtspr(SPR_MAS0, saved_mas0); 217 wrtee(msr); 218 } 219 220 static struct e500_hwtlb 221 tlb_to_hwtlb(const struct e500_tlb tlb) 222 { 223 struct e500_hwtlb hwtlb; 224 225 KASSERT(trunc_page(tlb.tlb_va) == tlb.tlb_va); 226 KASSERT(tlb.tlb_size != 0); 227 KASSERT((tlb.tlb_size & (tlb.tlb_size - 1)) == 0); 228 const uint32_t prot_mask = tlb.tlb_pte & PTE_RWX_MASK; 229 if (__predict_true(tlb.tlb_size == PAGE_SIZE)) { 230 hwtlb.hwtlb_mas0 = 0; 231 hwtlb.hwtlb_mas1 = MAS1_V | MASX_TSIZE_MAKE(1); 232 /* 233 * A non-zero ASID means this is a user page so mark it as 234 * being in the user's address space. 235 */ 236 if (tlb.tlb_asid) { 237 hwtlb.hwtlb_mas1 |= MAS1_TS 238 | MASX_TID_MAKE(tlb.tlb_asid); 239 hwtlb.hwtlb_mas3 = (prot_mask >> (PTE_RWX_SHIFT - 1)) 240 | ((prot_mask & ~PTE_xX) >> PTE_RWX_SHIFT); 241 KASSERT(prot_mask & PTE_xR); 242 KASSERT(hwtlb.hwtlb_mas3 & MAS3_UR); 243 CTASSERT(MAS3_UR == (PTE_xR >> (PTE_RWX_SHIFT - 1))); 244 CTASSERT(MAS3_SR == (PTE_xR >> PTE_RWX_SHIFT)); 245 } else { 246 hwtlb.hwtlb_mas3 = prot_mask >> PTE_RWX_SHIFT; 247 } 248 if (tlb.tlb_pte & PTE_UNMODIFIED) 249 hwtlb.hwtlb_mas3 &= ~(MAS3_UW|MAS3_SW); 250 if (tlb.tlb_pte & PTE_UNSYNCED) 251 hwtlb.hwtlb_mas3 &= ~(MAS3_UX|MAS3_SX); 252 } else { 253 KASSERT(tlb.tlb_asid == 0); 254 KASSERT((tlb.tlb_size & 0xaaaaa7ff) == 0); 255 u_int cntlz = __builtin_clz(tlb.tlb_size); 256 KASSERT(cntlz & 1); 257 KASSERT(cntlz <= 19); 258 hwtlb.hwtlb_mas0 = MAS0_TLBSEL_TLB1; 259 /* 260 * TSIZE is defined (4^TSIZE) Kbytes except a TSIZE of is not 261 * allowed. So 1K would be 0x00000400 giving 21 leading zero 262 * bits. Subtracting the leading number of zero bits from 21 263 * and dividing by 2 gives us the number that the MMU wants. 264 */ 265 hwtlb.hwtlb_mas1 = MASX_TSIZE_MAKE(((31 - 10) - cntlz) / 2) 266 | MAS1_IPROT | MAS1_V; 267 hwtlb.hwtlb_mas3 = prot_mask >> PTE_RWX_SHIFT; 268 } 269 /* We are done with MAS1, on to MAS2 ... */ 270 hwtlb.hwtlb_mas2 = tlb.tlb_va | (tlb.tlb_pte & PTE_WIMGE_MASK); 271 hwtlb.hwtlb_mas3 |= tlb.tlb_pte & PTE_RPN_MASK; 272 273 return hwtlb; 274 } 275 276 void * 277 e500_tlb1_fetch(size_t slot) 278 { 279 struct e500_tlb1 * const tlb1 = &e500_tlb1; 280 281 return &tlb1->tlb1_entries[slot].e_hwtlb; 282 } 283 284 void 285 e500_tlb1_sync(void) 286 { 287 struct e500_tlb1 * const tlb1 = &e500_tlb1; 288 for (u_int slot = 1; slot < tlb1->tlb1_numentries; slot++) { 289 const struct e500_hwtlb * const new_hwtlb = 290 &tlb1->tlb1_entries[slot].e_hwtlb; 291 const struct e500_hwtlb old_hwtlb = 292 hwtlb_read(MAS0_TLBSEL_TLB1, slot); 293 #define CHANGED(n,o,f) ((n)->f != (o).f) 294 bool mas1_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas1); 295 bool mas2_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas2); 296 bool mas3_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas3); 297 #undef CHANGED 298 bool new_valid_p = (new_hwtlb->hwtlb_mas1 & MAS1_V) != 0; 299 bool old_valid_p = (old_hwtlb.hwtlb_mas1 & MAS1_V) != 0; 300 if ((new_valid_p || old_valid_p) 301 && (mas1_changed_p 302 || (new_valid_p 303 && (mas2_changed_p || mas3_changed_p)))) 304 hwtlb_write(*new_hwtlb, true); 305 } 306 } 307 308 static int 309 e500_alloc_tlb1_entry(void) 310 { 311 struct e500_tlb1 * const tlb1 = &e500_tlb1; 312 313 if (tlb1->tlb1_numfree == 0) 314 return -1; 315 const u_int slot = tlb1->tlb1_freelist[--tlb1->tlb1_numfree]; 316 KASSERT((tlb1->tlb1_entries[slot].e_hwtlb.hwtlb_mas1 & MAS1_V) == 0); 317 tlb1->tlb1_entries[slot].e_hwtlb.hwtlb_mas0 = 318 MAS0_TLBSEL_TLB1 | __SHIFTOUT(slot, MAS0_ESEL); 319 return slot; 320 } 321 322 static void 323 e500_free_tlb1_entry(struct e500_xtlb *xtlb, u_int slot, bool needs_sync) 324 { 325 struct e500_tlb1 * const tlb1 = &e500_tlb1; 326 KASSERT(slot < tlb1->tlb1_numentries); 327 KASSERT(&tlb1->tlb1_entries[slot] == xtlb); 328 329 KASSERT(xtlb->e_hwtlb.hwtlb_mas0 == (MAS0_TLBSEL_TLB1|__SHIFTIN(slot, MAS0_ESEL))); 330 xtlb->e_hwtlb.hwtlb_mas1 &= ~(MAS1_V|MAS1_IPROT); 331 hwtlb_write(xtlb->e_hwtlb, needs_sync); 332 333 const register_t msr = wrtee(0); 334 tlb1->tlb1_freelist[tlb1->tlb1_numfree++] = slot; 335 wrtee(msr); 336 } 337 338 static tlb_asid_t 339 e500_tlb_get_asid(void) 340 { 341 return mfspr(SPR_PID0); 342 } 343 344 static void 345 e500_tlb_set_asid(tlb_asid_t asid) 346 { 347 mtspr(SPR_PID0, asid); 348 } 349 350 static void 351 e500_tlb_invalidate_all(void) 352 { 353 /* 354 * This does a flash invalidate of all entries in TLB0. 355 * We don't touch TLB1 since we don't expect those to be volatile. 356 */ 357 #if 1 358 __asm volatile("tlbivax\t0, %0" :: "b"(4)); /* INV_ALL */ 359 #else 360 mtspr(SPR_MMUCSR0, MMUCSR0_TLB0_FL); 361 while (mfspr(SPR_MMUCSR0) != 0) 362 ; 363 #endif 364 } 365 366 static void 367 e500_tlb_invalidate_globals(void) 368 { 369 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 370 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 371 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 372 const vaddr_t kstack_lo = (uintptr_t)curlwp->l_addr; 373 const vaddr_t kstack_hi = kstack_lo + USPACE - 1; 374 const vaddr_t epn_kstack_lo = kstack_lo & (max_epn - 1); 375 const vaddr_t epn_kstack_hi = kstack_hi & (max_epn - 1); 376 377 const register_t msr = wrtee(0); 378 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 379 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0); 380 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 381 mtspr(SPR_MAS2, epn); 382 __asm volatile("tlbre"); 383 uint32_t mas1 = mfspr(SPR_MAS1); 384 385 /* 386 * Make sure this is a valid kernel entry first. 387 */ 388 if ((mas1 & (MAS1_V|MAS1_TID|MAS1_TS)) != MAS1_V) 389 continue; 390 391 /* 392 * We have a valid kernel TLB entry. But if it matches 393 * the stack we are currently running on, it would 394 * unwise to invalidate it. First see if the epn 395 * overlaps the stack. If it does then get the 396 * VA and see if it really is part of the stack. 397 */ 398 if (epn_kstack_lo < epn_kstack_hi 399 ? (epn_kstack_lo <= epn && epn <= epn_kstack_hi) 400 : (epn <= epn_kstack_hi || epn_kstack_lo <= epn)) { 401 const uint32_t mas2_epn = 402 mfspr(SPR_MAS2) & MAS2_EPN; 403 if (kstack_lo <= mas2_epn 404 && mas2_epn <= kstack_hi) 405 continue; 406 } 407 mtspr(SPR_MAS1, mas1 ^ MAS1_V); 408 __asm volatile("tlbwe"); 409 } 410 } 411 __asm volatile("isync"); 412 wrtee(msr); 413 } 414 415 static void 416 e500_tlb_invalidate_asids(tlb_asid_t asid_lo, tlb_asid_t asid_hi) 417 { 418 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 419 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 420 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 421 422 asid_lo = __SHIFTIN(asid_lo, MAS1_TID); 423 asid_hi = __SHIFTIN(asid_hi, MAS1_TID); 424 425 const register_t msr = wrtee(0); 426 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 427 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0); 428 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 429 mtspr(SPR_MAS2, epn); 430 __asm volatile("tlbre"); 431 const uint32_t mas1 = mfspr(SPR_MAS1); 432 /* 433 * If this is a valid entry for AS space 1 and 434 * its asid matches the constraints of the caller, 435 * clear its valid bit. 436 */ 437 if ((mas1 & (MAS1_V|MAS1_TS)) == (MAS1_V|MAS1_TS) 438 && asid_lo <= (mas1 & MAS1_TID) 439 && (mas1 & MAS1_TID) <= asid_hi) { 440 mtspr(SPR_MAS1, mas1 ^ MAS1_V); 441 #if 0 442 printf("%s[%zu,%zu]->[%x]\n", 443 __func__, assoc, epn, mas1); 444 #endif 445 __asm volatile("tlbwe"); 446 } 447 } 448 } 449 __asm volatile("isync"); 450 wrtee(msr); 451 } 452 453 static u_int 454 e500_tlb_record_asids(u_long *bitmap) 455 { 456 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 457 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 458 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 459 const size_t nbits = 8 * sizeof(bitmap[0]); 460 u_int found = 0; 461 462 const register_t msr = wrtee(0); 463 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 464 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0); 465 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 466 mtspr(SPR_MAS2, epn); 467 __asm volatile("tlbre"); 468 const uint32_t mas1 = mfspr(SPR_MAS1); 469 /* 470 * If this is a valid entry for AS space 1 and 471 * its asid matches the constraints of the caller, 472 * clear its valid bit. 473 */ 474 if ((mas1 & (MAS1_V|MAS1_TS)) == (MAS1_V|MAS1_TS)) { 475 const uint32_t asid = MASX_TID_GET(mas1); 476 const u_int i = asid / nbits; 477 const u_long mask = 1UL << (asid & (nbits - 1)); 478 if ((bitmap[i] & mask) == 0) { 479 bitmap[i] |= mask; 480 found++; 481 } 482 } 483 } 484 } 485 wrtee(msr); 486 487 return found; 488 } 489 490 static void 491 e500_tlb_invalidate_addr(vaddr_t va, tlb_asid_t asid) 492 { 493 KASSERT((va & PAGE_MASK) == 0); 494 /* 495 * Bits 60 & 61 have meaning 496 */ 497 __asm volatile("tlbivax\t0, %0" :: "b"(va)); 498 __asm volatile("tlbsync"); 499 __asm volatile("tlbsync"); 500 } 501 502 static bool 503 e500_tlb_update_addr(vaddr_t va, tlb_asid_t asid, pt_entry_t pte, bool insert) 504 { 505 struct e500_hwtlb hwtlb = tlb_to_hwtlb( 506 (struct e500_tlb){ .tlb_va = va, .tlb_asid = asid, 507 .tlb_size = PAGE_SIZE, .tlb_pte = pte,}); 508 509 register_t msr = wrtee(0); 510 mtspr(SPR_MAS6, asid ? __SHIFTIN(asid, MAS6_SPID0) | MAS6_SAS : 0); 511 __asm volatile("tlbsx 0, %0" :: "b"(va)); 512 register_t mas1 = mfspr(SPR_MAS1); 513 if ((mas1 & MAS1_V) == 0) { 514 if (!insert) { 515 wrtee(msr); 516 #if 0 517 printf("%s(%#lx,%#x,%#x,%x)<no update>\n", 518 __func__, va, asid, pte, insert); 519 #endif 520 return false; 521 } 522 mtspr(SPR_MAS1, hwtlb.hwtlb_mas1); 523 } 524 mtspr(SPR_MAS2, hwtlb.hwtlb_mas2); 525 mtspr(SPR_MAS3, hwtlb.hwtlb_mas3); 526 __asm volatile("tlbwe"); 527 if (asid == 0) 528 __asm volatile("isync"); 529 wrtee(msr); 530 #if 0 531 if (asid) 532 printf("%s(%#lx,%#x,%#x,%x)->[%x,%x,%x]\n", 533 __func__, va, asid, pte, insert, 534 hwtlb.hwtlb_mas1, hwtlb.hwtlb_mas2, hwtlb.hwtlb_mas3); 535 #endif 536 return (mas1 & MAS1_V) != 0; 537 } 538 539 static void 540 e500_tlb_write_entry(size_t index, const struct tlbmask *tlb) 541 { 542 } 543 544 static void 545 e500_tlb_read_entry(size_t index, struct tlbmask *tlb) 546 { 547 } 548 549 static void 550 e500_tlb_dump(void (*pr)(const char *, ...)) 551 { 552 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 553 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 554 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 555 const uint32_t saved_mas0 = mfspr(SPR_MAS0); 556 size_t valid = 0; 557 558 if (pr == NULL) 559 pr = printf; 560 561 const register_t msr = wrtee(0); 562 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 563 struct e500_hwtlb hwtlb; 564 hwtlb.hwtlb_mas0 = MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0; 565 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0); 566 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 567 mtspr(SPR_MAS2, epn); 568 __asm volatile("tlbre"); 569 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1); 570 /* 571 * If this is a valid entry for AS space 1 and 572 * its asid matches the constraints of the caller, 573 * clear its valid bit. 574 */ 575 if (hwtlb.hwtlb_mas1 & MAS1_V) { 576 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2); 577 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3); 578 struct e500_tlb tlb = hwtlb_to_tlb(hwtlb); 579 (*pr)("[%zu,%zu]->[%x,%x,%x]", 580 assoc, atop(epn), 581 hwtlb.hwtlb_mas1, 582 hwtlb.hwtlb_mas2, 583 hwtlb.hwtlb_mas3); 584 (*pr)(": VA=%#lx size=4KB asid=%u pte=%x", 585 tlb.tlb_va, tlb.tlb_asid, tlb.tlb_pte); 586 (*pr)(" (RPN=%#x,%s%s%s%s%s,%s%s%s%s%s)\n", 587 tlb.tlb_pte & PTE_RPN_MASK, 588 tlb.tlb_pte & PTE_xR ? "R" : "", 589 tlb.tlb_pte & PTE_xW ? "W" : "", 590 tlb.tlb_pte & PTE_UNMODIFIED ? "*" : "", 591 tlb.tlb_pte & PTE_xX ? "X" : "", 592 tlb.tlb_pte & PTE_UNSYNCED ? "*" : "", 593 tlb.tlb_pte & PTE_W ? "W" : "", 594 tlb.tlb_pte & PTE_I ? "I" : "", 595 tlb.tlb_pte & PTE_M ? "M" : "", 596 tlb.tlb_pte & PTE_G ? "G" : "", 597 tlb.tlb_pte & PTE_E ? "E" : ""); 598 valid++; 599 } 600 } 601 } 602 mtspr(SPR_MAS0, saved_mas0); 603 wrtee(msr); 604 (*pr)("%s: %zu valid entries\n", __func__, valid); 605 } 606 607 static void 608 e500_tlb_walk(void *ctx, bool (*func)(void *, vaddr_t, uint32_t, uint32_t)) 609 { 610 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 611 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 612 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 613 const uint32_t saved_mas0 = mfspr(SPR_MAS0); 614 615 const register_t msr = wrtee(0); 616 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 617 struct e500_hwtlb hwtlb; 618 hwtlb.hwtlb_mas0 = MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0; 619 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0); 620 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 621 mtspr(SPR_MAS2, epn); 622 __asm volatile("tlbre"); 623 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1); 624 /* 625 * If this is a valid entry for AS space 1 and 626 * its asid matches the constraints of the caller, 627 * clear its valid bit. 628 */ 629 if (hwtlb.hwtlb_mas1 & MAS1_V) { 630 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2); 631 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3); 632 struct e500_tlb tlb = hwtlb_to_tlb(hwtlb); 633 if (!(*func)(ctx, tlb.tlb_va, tlb.tlb_asid, 634 tlb.tlb_pte)) 635 break; 636 } 637 } 638 } 639 mtspr(SPR_MAS0, saved_mas0); 640 wrtee(msr); 641 } 642 643 static struct e500_xtlb * 644 e500_tlb_lookup_xtlb(vaddr_t va, u_int *slotp) 645 { 646 struct e500_tlb1 * const tlb1 = &e500_tlb1; 647 struct e500_xtlb *xtlb = tlb1->tlb1_entries; 648 649 /* 650 * See if we have a TLB entry for the pa. 651 */ 652 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) { 653 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) 654 && xtlb->e_tlb.tlb_va <= va 655 && va < xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size) { 656 if (slotp != NULL) 657 *slotp = i; 658 return xtlb; 659 } 660 } 661 662 return NULL; 663 } 664 665 static struct e500_xtlb * 666 e500_tlb_lookup_xtlb2(vaddr_t va, vsize_t len) 667 { 668 struct e500_tlb1 * const tlb1 = &e500_tlb1; 669 struct e500_xtlb *xtlb = tlb1->tlb1_entries; 670 671 /* 672 * See if we have a TLB entry for the pa. 673 */ 674 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) { 675 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) 676 && xtlb->e_tlb.tlb_va < va + len 677 && va < xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size) { 678 return xtlb; 679 } 680 } 681 682 return NULL; 683 } 684 685 static void * 686 e500_tlb_mapiodev(paddr_t pa, psize_t len, bool prefetchable) 687 { 688 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(pa, NULL); 689 690 /* 691 * See if we have a TLB entry for the pa. If completely falls within 692 * mark the reference and return the pa. But only if the tlb entry 693 * is not cacheable. 694 */ 695 if (xtlb 696 && pa + len <= xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size 697 && (prefetchable 698 || (xtlb->e_tlb.tlb_pte & PTE_WIG) == (PTE_I|PTE_G))) { 699 xtlb->e_refcnt++; 700 return (void *) pa; 701 } 702 return NULL; 703 } 704 705 static void 706 e500_tlb_unmapiodev(vaddr_t va, vsize_t len) 707 { 708 if (va < VM_MIN_KERNEL_ADDRESS || VM_MAX_KERNEL_ADDRESS <= va) { 709 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(va, NULL); 710 if (xtlb) 711 xtlb->e_refcnt--; 712 } 713 } 714 715 static int 716 e500_tlb_ioreserve(vaddr_t va, vsize_t len, pt_entry_t pte) 717 { 718 struct e500_tlb1 * const tlb1 = &e500_tlb1; 719 struct e500_xtlb *xtlb; 720 721 KASSERT(len & 0x55555000); 722 KASSERT((len & ~0x55555000) == 0); 723 KASSERT(len >= PAGE_SIZE); 724 KASSERT((len & (len - 1)) == 0); 725 KASSERT((va & (len - 1)) == 0); 726 KASSERT((pte & (len - 1)) == 0); 727 728 if ((xtlb = e500_tlb_lookup_xtlb2(va, len)) != NULL) { 729 if (va < xtlb->e_tlb.tlb_va 730 || xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size < va + len 731 || va - xtlb->e_tlb.tlb_va != pte - xtlb->e_tlb.tlb_pte) 732 return EBUSY; 733 xtlb->e_refcnt++; 734 return 0; 735 } 736 737 const int slot = e500_alloc_tlb1_entry(); 738 if (slot < 0) 739 return ENOMEM; 740 741 xtlb = &tlb1->tlb1_entries[slot]; 742 xtlb->e_tlb.tlb_va = va; 743 xtlb->e_tlb.tlb_size = len; 744 xtlb->e_tlb.tlb_pte = pte; 745 xtlb->e_tlb.tlb_asid = KERNEL_PID; 746 747 xtlb->e_hwtlb = tlb_to_hwtlb(xtlb->e_tlb); 748 xtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTOUT(slot, MAS0_ESEL); 749 hwtlb_write(xtlb->e_hwtlb, true); 750 return 0; 751 } 752 753 static int 754 e500_tlb_iorelease(vaddr_t va) 755 { 756 u_int slot; 757 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(va, &slot); 758 759 if (xtlb == NULL) 760 return ENOENT; 761 762 if (xtlb->e_refcnt) 763 return EBUSY; 764 765 e500_free_tlb1_entry(xtlb, slot, true); 766 767 return 0; 768 } 769 770 static u_int 771 e500_tlbmemmap(paddr_t memstart, psize_t memsize, struct e500_tlb1 *tlb1) 772 { 773 u_int slotmask = 0; 774 u_int slots = 0, nextslot = 0; 775 KASSERT(tlb1->tlb1_numfree > 1); 776 KASSERT(((memstart + memsize - 1) & -memsize) == memstart); 777 for (paddr_t lastaddr = memstart; 0 < memsize; ) { 778 u_int cnt = __builtin_clz(memsize); 779 psize_t size = min(1UL << (31 - (cnt | 1)), tlb1->tlb1_maxsize); 780 slots += memsize / size; 781 if (slots > 4) 782 panic("%s: %d: can't map memory (%#lx) into TLB1: %s", 783 __func__, __LINE__, memsize, "too fragmented"); 784 if (slots > tlb1->tlb1_numfree - 1) 785 panic("%s: %d: can't map memory (%#lx) into TLB1: %s", 786 __func__, __LINE__, memsize, 787 "insufficent TLB entries"); 788 for (; nextslot < slots; nextslot++) { 789 const u_int freeslot = e500_alloc_tlb1_entry(); 790 struct e500_xtlb * const xtlb = 791 &tlb1->tlb1_entries[freeslot]; 792 xtlb->e_tlb.tlb_asid = KERNEL_PID; 793 xtlb->e_tlb.tlb_size = size; 794 xtlb->e_tlb.tlb_va = lastaddr; 795 xtlb->e_tlb.tlb_pte = lastaddr 796 | PTE_M | PTE_xX | PTE_xW | PTE_xR; 797 lastaddr += size; 798 memsize -= size; 799 slotmask |= 1 << (31 - freeslot); /* clz friendly */ 800 } 801 } 802 803 return nextslot; 804 } 805 static const struct tlb_md_ops e500_tlb_ops = { 806 .md_tlb_get_asid = e500_tlb_get_asid, 807 .md_tlb_set_asid = e500_tlb_set_asid, 808 .md_tlb_invalidate_all = e500_tlb_invalidate_all, 809 .md_tlb_invalidate_globals = e500_tlb_invalidate_globals, 810 .md_tlb_invalidate_asids = e500_tlb_invalidate_asids, 811 .md_tlb_invalidate_addr = e500_tlb_invalidate_addr, 812 .md_tlb_update_addr = e500_tlb_update_addr, 813 .md_tlb_record_asids = e500_tlb_record_asids, 814 .md_tlb_write_entry = e500_tlb_write_entry, 815 .md_tlb_read_entry = e500_tlb_read_entry, 816 .md_tlb_dump = e500_tlb_dump, 817 .md_tlb_walk = e500_tlb_walk, 818 }; 819 820 static const struct tlb_md_io_ops e500_tlb_io_ops = { 821 .md_tlb_mapiodev = e500_tlb_mapiodev, 822 .md_tlb_unmapiodev = e500_tlb_unmapiodev, 823 .md_tlb_ioreserve = e500_tlb_ioreserve, 824 .md_tlb_iorelease = e500_tlb_iorelease, 825 }; 826 827 void 828 e500_tlb_init(vaddr_t endkernel, psize_t memsize) 829 { 830 struct e500_tlb1 * const tlb1 = &e500_tlb1; 831 832 #if 0 833 register_t mmucfg = mfspr(SPR_MMUCFG); 834 register_t mas4 = mfspr(SPR_MAS4); 835 #endif 836 837 const uint32_t tlb1cfg = mftlb1cfg(); 838 tlb1->tlb1_numentries = TLBCFG_NENTRY(tlb1cfg); 839 KASSERT(tlb1->tlb1_numentries <= __arraycount(tlb1->tlb1_entries)); 840 /* 841 * Limit maxsize to 1G since 4G isn't really useful to us. 842 */ 843 tlb1->tlb1_minsize = 1024 << (2 * TLBCFG_MINSIZE(tlb1cfg)); 844 tlb1->tlb1_maxsize = 1024 << (2 * min(10, TLBCFG_MAXSIZE(tlb1cfg))); 845 846 #ifdef VERBOSE_INITPPC 847 printf(" tlb1cfg=%#x numentries=%u minsize=%#xKB maxsize=%#xKB", 848 tlb1cfg, tlb1->tlb1_numentries, tlb1->tlb1_minsize >> 10, 849 tlb1->tlb1_maxsize >> 10); 850 #endif 851 852 /* 853 * Let's see what's in TLB1 and we need to invalidate any entry that 854 * would fit within the kernel's mapped address space. 855 */ 856 psize_t memmapped = 0; 857 for (u_int i = 0; i < tlb1->tlb1_numentries; i++) { 858 struct e500_xtlb * const xtlb = &tlb1->tlb1_entries[i]; 859 860 xtlb->e_hwtlb = hwtlb_read(MAS0_TLBSEL_TLB1, i); 861 862 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) == 0) { 863 tlb1->tlb1_freelist[tlb1->tlb1_numfree++] = i; 864 #ifdef VERBOSE_INITPPC 865 printf(" TLB1[%u]=<unused>", i); 866 #endif 867 continue; 868 } 869 870 xtlb->e_tlb = hwtlb_to_tlb(xtlb->e_hwtlb); 871 #ifdef VERBOSE_INITPPC 872 printf(" TLB1[%u]=<%#lx,%#lx,%#x,%#x>", 873 i, xtlb->e_tlb.tlb_va, xtlb->e_tlb.tlb_size, 874 xtlb->e_tlb.tlb_asid, xtlb->e_tlb.tlb_pte); 875 #endif 876 if ((VM_MIN_KERNEL_ADDRESS <= xtlb->e_tlb.tlb_va 877 && xtlb->e_tlb.tlb_va < VM_MAX_KERNEL_ADDRESS) 878 || (xtlb->e_tlb.tlb_va < VM_MIN_KERNEL_ADDRESS 879 && VM_MIN_KERNEL_ADDRESS < 880 xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size)) { 881 #ifdef VERBOSE_INITPPC 882 printf("free"); 883 #endif 884 e500_free_tlb1_entry(xtlb, i, false); 885 #ifdef VERBOSE_INITPPC 886 printf("d"); 887 #endif 888 continue; 889 } 890 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_IPROT) == 0) { 891 xtlb->e_hwtlb.hwtlb_mas1 |= MAS1_IPROT; 892 hwtlb_write(xtlb->e_hwtlb, false); 893 #ifdef VERBOSE_INITPPC 894 printf("+iprot"); 895 #endif 896 } 897 if (xtlb->e_tlb.tlb_pte & PTE_I) 898 continue; 899 900 if (xtlb->e_tlb.tlb_va == 0 901 || xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size <= memsize) { 902 memmapped += xtlb->e_tlb.tlb_size; 903 } 904 } 905 906 cpu_md_ops.md_tlb_ops = &e500_tlb_ops; 907 cpu_md_ops.md_tlb_io_ops = &e500_tlb_io_ops; 908 909 if (__predict_false(memmapped < memsize)) { 910 /* 911 * Let's see how many TLB entries are needed to map memory. 912 */ 913 u_int slotmask = e500_tlbmemmap(0, memsize, tlb1); 914 915 /* 916 * To map main memory into the TLB, we need to flush any 917 * existing entries from the TLB that overlap the virtual 918 * address space needed to map physical memory. That may 919 * include the entries for the pages currently used by the 920 * stack or that we are executing. So to avoid problems, we 921 * are going to temporarily map the kernel and stack into AS 1, 922 * switch to it, and clear out the TLB entries from AS 0, 923 * install the new TLB entries to map memory, and then switch 924 * back to AS 0 and free the temp entry used for AS1. 925 */ 926 u_int b = __builtin_clz(endkernel); 927 928 /* 929 * If the kernel doesn't end on a clean power of 2, we need 930 * to round the size up (by decrementing the number of leading 931 * zero bits). If the size isn't a power of 4KB, decrement 932 * again to make it one. 933 */ 934 if (endkernel & (endkernel - 1)) 935 b--; 936 if ((b & 1) == 0) 937 b--; 938 939 /* 940 * Create a TLB1 mapping for the kernel in AS1. 941 */ 942 const u_int kslot = e500_alloc_tlb1_entry(); 943 struct e500_xtlb * const kxtlb = &tlb1->tlb1_entries[kslot]; 944 kxtlb->e_tlb.tlb_va = 0; 945 kxtlb->e_tlb.tlb_size = 1UL << (31 - b); 946 kxtlb->e_tlb.tlb_pte = PTE_M|PTE_xR|PTE_xW|PTE_xX; 947 kxtlb->e_tlb.tlb_asid = KERNEL_PID; 948 949 kxtlb->e_hwtlb = tlb_to_hwtlb(kxtlb->e_tlb); 950 kxtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTOUT(kslot, MAS0_ESEL); 951 kxtlb->e_hwtlb.hwtlb_mas1 |= MAS1_TS; 952 hwtlb_write(kxtlb->e_hwtlb, true); 953 954 /* 955 * Now that we have a TLB mapping in AS1 for the kernel and its 956 * stack, we switch to AS1 to cleanup the TLB mappings for TLB0. 957 */ 958 const register_t saved_msr = mfmsr(); 959 mtmsr(saved_msr | PSL_DS | PSL_IS); 960 __asm volatile("isync"); 961 962 /* 963 *** Invalidate all the TLB0 entries. 964 */ 965 e500_tlb_invalidate_all(); 966 967 /* 968 *** Now let's see if we have any entries in TLB1 that would 969 *** overlap the ones we are about to install. If so, nuke 'em. 970 */ 971 for (u_int i = 0; i < tlb1->tlb1_numentries; i++) { 972 struct e500_xtlb * const xtlb = &tlb1->tlb1_entries[i]; 973 struct e500_hwtlb * const hwtlb = &xtlb->e_hwtlb; 974 if ((hwtlb->hwtlb_mas1 & (MAS1_V|MAS1_TS)) == MAS1_V 975 && (hwtlb->hwtlb_mas2 & MAS2_EPN) < memsize) { 976 e500_free_tlb1_entry(xtlb, i, false); 977 } 978 } 979 980 /* 981 *** Now we can add the TLB entries that will map physical 982 *** memory. If bit 0 [MSB] in slotmask is set, then tlb 983 *** entry 0 contains a mapping for physical memory... 984 */ 985 struct e500_xtlb *entries = tlb1->tlb1_entries; 986 while (slotmask != 0) { 987 const u_int slot = __builtin_clz(slotmask); 988 hwtlb_write(entries[slot].e_hwtlb, false); 989 entries += slot + 1; 990 slotmask <<= slot + 1; 991 } 992 993 /* 994 *** Synchronize the TLB and the instruction stream. 995 */ 996 __asm volatile("tlbsync"); 997 __asm volatile("isync"); 998 999 /* 1000 *** Switch back to AS 0. 1001 */ 1002 mtmsr(saved_msr); 1003 __asm volatile("isync"); 1004 1005 /* 1006 * Free the temporary TLB1 entry. 1007 */ 1008 e500_free_tlb1_entry(kxtlb, kslot, true); 1009 } 1010 1011 /* 1012 * Finally set the MAS4 defaults. 1013 */ 1014 mtspr(SPR_MAS4, MAS4_TSIZED_4KB | MAS4_MD); 1015 1016 /* 1017 * Invalidate all the TLB0 entries. 1018 */ 1019 e500_tlb_invalidate_all(); 1020 } 1021