1 /* $NetBSD: e500_tlb.c,v 1.2 2011/01/18 01:02:52 matt Exp $ */ 2 /*- 3 * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Raytheon BBN Technologies Corp and Defense Advanced Research Projects 8 * Agency and which was developed by Matt Thomas of 3am Software Foundry. 9 * 10 * This material is based upon work supported by the Defense Advanced Research 11 * Projects Agency and Space and Naval Warfare Systems Center, Pacific, under 12 * Contract No. N66001-09-C-2073. 13 * Approved for Public Release, Distribution Unlimited 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 39 __KERNEL_RCSID(0, "$NetBSD: e500_tlb.c,v 1.2 2011/01/18 01:02:52 matt Exp $"); 40 41 #include <sys/param.h> 42 43 #include <uvm/uvm_extern.h> 44 45 #include <powerpc/spr.h> 46 #include <powerpc/booke/spr.h> 47 #include <powerpc/booke/cpuvar.h> 48 #include <powerpc/booke/e500var.h> 49 #include <powerpc/booke/pmap.h> 50 51 struct e500_tlb { 52 vaddr_t tlb_va; 53 uint32_t tlb_pte; 54 uint32_t tlb_asid; 55 vsize_t tlb_size; 56 }; 57 58 struct e500_hwtlb { 59 uint32_t hwtlb_mas0; 60 uint32_t hwtlb_mas1; 61 uint32_t hwtlb_mas2; 62 uint32_t hwtlb_mas3; 63 }; 64 65 struct e500_xtlb { 66 struct e500_tlb e_tlb; 67 struct e500_hwtlb e_hwtlb; 68 u_long e_refcnt; 69 }; 70 71 static struct e500_tlb1 { 72 uint32_t tlb1_maxsize; 73 uint32_t tlb1_minsize; 74 u_int tlb1_numentries; 75 u_int tlb1_numfree; 76 u_int tlb1_freelist[32]; 77 struct e500_xtlb tlb1_entries[32]; 78 } e500_tlb1; 79 80 static inline register_t mftlb0cfg(void) __pure; 81 static inline register_t mftlb1cfg(void) __pure; 82 83 static inline register_t 84 mftlb0cfg(void) 85 { 86 register_t tlb0cfg; 87 __asm("mfspr %0, %1" : "=r"(tlb0cfg) : "n"(SPR_TLB0CFG)); 88 return tlb0cfg; 89 } 90 91 static inline register_t 92 mftlb1cfg(void) 93 { 94 register_t tlb1cfg; 95 __asm("mfspr %0, %1" : "=r"(tlb1cfg) : "n"(SPR_TLB1CFG)); 96 return tlb1cfg; 97 } 98 99 static struct e500_tlb 100 hwtlb_to_tlb(const struct e500_hwtlb hwtlb) 101 { 102 struct e500_tlb tlb; 103 register_t prot_mask; 104 u_int prot_shift; 105 106 tlb.tlb_va = MAS2_EPN & hwtlb.hwtlb_mas2; 107 tlb.tlb_size = 1024 << (2 * MASX_TSIZE_GET(hwtlb.hwtlb_mas1)); 108 tlb.tlb_asid = MASX_TID_GET(hwtlb.hwtlb_mas1); 109 tlb.tlb_pte = (hwtlb.hwtlb_mas2 & MAS2_WIMGE) 110 | (hwtlb.hwtlb_mas3 & MAS3_RPN); 111 if (hwtlb.hwtlb_mas1 & MAS1_TS) { 112 prot_mask = MAS3_UX|MAS3_UW|MAS3_UR; 113 prot_shift = PTE_RWX_SHIFT - 1; 114 } else { 115 prot_mask = MAS3_SX|MAS3_SW|MAS3_SR; 116 prot_shift = PTE_RWX_SHIFT; 117 } 118 tlb.tlb_pte |= (prot_mask & hwtlb.hwtlb_mas3) << prot_shift; 119 return tlb; 120 } 121 122 static inline struct e500_hwtlb 123 hwtlb_read(uint32_t mas0, u_int slot) 124 { 125 struct e500_hwtlb hwtlb; 126 register_t tlbcfg; 127 128 if (__predict_true(mas0 == MAS0_TLBSEL_TLB0)) { 129 tlbcfg = mftlb0cfg(); 130 } else if (mas0 == MAS0_TLBSEL_TLB1) { 131 tlbcfg = mftlb1cfg(); 132 } else { 133 panic("%s:%d: unexpected MAS0 %#" PRIx32, 134 __func__, __LINE__, mas0); 135 } 136 137 /* 138 * ESEL is the way we want to look up. 139 * If tlbassoc is the same as tlbentries (like in TLB1) then the TLB is 140 * fully associative, the entire slot is placed into ESEL. If tlbassoc 141 * is less then the number of tlb entries, the slot is split in two 142 * fields. Since the TLB is M rows by N ways, the lowers bits are for 143 * row (MAS2[EPN]) and the upper for the way (MAS1[ESEL]). 144 */ 145 const u_int tlbassoc = TLBCFG_ASSOC(tlbcfg); 146 const u_int tlbentries = TLBCFG_NENTRY(tlbcfg); 147 const u_int esel_shift = 148 __builtin_clz(tlbassoc) - __builtin_clz(tlbentries); 149 150 /* 151 * Disable interrupts since we don't want anyone else mucking with 152 * the MMU Assist registers 153 */ 154 const register_t msr = wrtee(0); 155 const register_t saved_mas0 = mfspr(SPR_MAS0); 156 mtspr(SPR_MAS0, mas0 | MAS0_ESEL_MAKE(slot >> esel_shift)); 157 158 if (__predict_true(tlbassoc > tlbentries)) 159 mtspr(SPR_MAS2, slot << PAGE_SHIFT); 160 161 /* 162 * Now select the entry and grab its contents. 163 */ 164 __asm volatile("tlbre"); 165 166 hwtlb.hwtlb_mas0 = mfspr(SPR_MAS0); 167 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1); 168 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2); 169 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3); 170 171 mtspr(SPR_MAS0, saved_mas0); 172 wrtee(msr); /* restore interrupts */ 173 174 return hwtlb; 175 } 176 177 static inline void 178 hwtlb_write(const struct e500_hwtlb hwtlb, bool needs_sync) 179 { 180 const register_t msr = wrtee(0); 181 const uint32_t saved_mas0 = mfspr(SPR_MAS0); 182 183 /* 184 * Need to always write MAS0 and MAS1 185 */ 186 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0); 187 mtspr(SPR_MAS1, hwtlb.hwtlb_mas1); 188 189 /* 190 * Only write the VPN/WIMGE if this is in TLB0 or if a valid mapping. 191 */ 192 if ((hwtlb.hwtlb_mas0 & MAS0_TLBSEL) == MAS0_TLBSEL_TLB0 193 || (hwtlb.hwtlb_mas1 & MAS1_V)) { 194 mtspr(SPR_MAS2, hwtlb.hwtlb_mas2); 195 } 196 /* 197 * Only need to write the RPN/prot if we are dealing with a valid 198 * mapping. 199 */ 200 if (hwtlb.hwtlb_mas1 & MAS1_V) { 201 mtspr(SPR_MAS3, hwtlb.hwtlb_mas3); 202 } 203 204 #if 0 205 printf("%s->[%x,%x,%x,%x]\n", 206 __func__, 207 hwtlb.hwtlb_mas0, hwtlb.hwtlb_mas1, 208 hwtlb.hwtlb_mas2, hwtlb.hwtlb_mas3); 209 #endif 210 __asm volatile("tlbwe"); 211 if (needs_sync) { 212 __asm volatile("tlbsync"); 213 __asm volatile("isync"); 214 } 215 216 mtspr(SPR_MAS0, saved_mas0); 217 wrtee(msr); 218 } 219 220 static struct e500_hwtlb 221 tlb_to_hwtlb(const struct e500_tlb tlb) 222 { 223 struct e500_hwtlb hwtlb; 224 225 KASSERT(trunc_page(tlb.tlb_va) == tlb.tlb_va); 226 KASSERT(tlb.tlb_size != 0); 227 KASSERT((tlb.tlb_size & (tlb.tlb_size - 1)) == 0); 228 const uint32_t prot_mask = tlb.tlb_pte & PTE_RWX_MASK; 229 if (__predict_true(tlb.tlb_size == PAGE_SIZE)) { 230 hwtlb.hwtlb_mas0 = 0; 231 hwtlb.hwtlb_mas1 = MAS1_V | MASX_TSIZE_MAKE(1); 232 /* 233 * A non-zero ASID means this is a user page so mark it as 234 * being in the user's address space. 235 */ 236 if (tlb.tlb_asid) { 237 hwtlb.hwtlb_mas1 |= MAS1_TS 238 | MASX_TID_MAKE(tlb.tlb_asid); 239 hwtlb.hwtlb_mas3 = (prot_mask >> (PTE_RWX_SHIFT - 1)) 240 | ((prot_mask & ~PTE_xX) >> PTE_RWX_SHIFT); 241 KASSERT(prot_mask & PTE_xR); 242 KASSERT(hwtlb.hwtlb_mas3 & MAS3_UR); 243 CTASSERT(MAS3_UR == (PTE_xR >> (PTE_RWX_SHIFT - 1))); 244 CTASSERT(MAS3_SR == (PTE_xR >> PTE_RWX_SHIFT)); 245 } else { 246 hwtlb.hwtlb_mas3 = prot_mask >> PTE_RWX_SHIFT; 247 } 248 if (tlb.tlb_pte & PTE_UNMODIFIED) 249 hwtlb.hwtlb_mas3 &= ~(MAS3_UW|MAS3_SW); 250 if (tlb.tlb_pte & PTE_UNSYNCED) 251 hwtlb.hwtlb_mas3 &= ~(MAS3_UX|MAS3_SX); 252 } else { 253 KASSERT(tlb.tlb_asid == 0); 254 KASSERT((tlb.tlb_size & 0xaaaaa7ff) == 0); 255 u_int cntlz = __builtin_clz(tlb.tlb_size); 256 KASSERT(cntlz & 1); 257 KASSERT(cntlz <= 19); 258 hwtlb.hwtlb_mas0 = MAS0_TLBSEL_TLB1; 259 /* 260 * TSIZE is defined (4^TSIZE) Kbytes except a TSIZE of is not 261 * allowed. So 1K would be 0x00000400 giving 21 leading zero 262 * bits. Subtracting the leading number of zero bits from 21 263 * and dividing by 2 gives us the number that the MMU wants. 264 */ 265 hwtlb.hwtlb_mas1 = MASX_TSIZE_MAKE(((31 - 10) - cntlz) / 2) 266 | MAS1_IPROT | MAS1_V; 267 hwtlb.hwtlb_mas3 = prot_mask >> PTE_RWX_SHIFT; 268 } 269 /* We are done with MAS1, on to MAS2 ... */ 270 hwtlb.hwtlb_mas2 = tlb.tlb_va | (tlb.tlb_pte & PTE_WIMGE_MASK); 271 hwtlb.hwtlb_mas3 |= tlb.tlb_pte & PTE_RPN_MASK; 272 273 return hwtlb; 274 } 275 276 static int 277 e500_alloc_tlb1_entry(void) 278 { 279 struct e500_tlb1 * const tlb1 = &e500_tlb1; 280 281 if (tlb1->tlb1_numfree == 0) 282 return -1; 283 const u_int slot = tlb1->tlb1_freelist[--tlb1->tlb1_numfree]; 284 KASSERT((tlb1->tlb1_entries[slot].e_hwtlb.hwtlb_mas1 & MAS1_V) == 0); 285 tlb1->tlb1_entries[slot].e_hwtlb.hwtlb_mas0 = 286 MAS0_TLBSEL_TLB1 | __SHIFTOUT(slot, MAS0_ESEL); 287 return slot; 288 } 289 290 static void 291 e500_free_tlb1_entry(struct e500_xtlb *xtlb, u_int slot, bool needs_sync) 292 { 293 struct e500_tlb1 * const tlb1 = &e500_tlb1; 294 KASSERT(slot < tlb1->tlb1_numentries); 295 KASSERT(&tlb1->tlb1_entries[slot] == xtlb); 296 297 KASSERT(xtlb->e_hwtlb.hwtlb_mas0 == (MAS0_TLBSEL_TLB1|__SHIFTIN(slot, MAS0_ESEL))); 298 xtlb->e_hwtlb.hwtlb_mas1 &= ~(MAS1_V|MAS1_IPROT); 299 hwtlb_write(xtlb->e_hwtlb, needs_sync); 300 301 const register_t msr = wrtee(0); 302 tlb1->tlb1_freelist[tlb1->tlb1_numfree++] = slot; 303 wrtee(msr); 304 } 305 306 static void e500_tlb_set_asid(uint32_t asid) 307 { 308 mtspr(SPR_PID0, asid); 309 } 310 311 static void e500_tlb_invalidate_all(void) 312 { 313 /* 314 * This does a flash invalidate of all entries in TLB0. 315 * We don't touch TLB1 since we don't expect those to be volatile. 316 */ 317 #if 1 318 __asm volatile("tlbivax\t0, %0" :: "b"(4)); /* INV_ALL */ 319 #else 320 mtspr(SPR_MMUCSR0, MMUCSR0_TLB0_FL); 321 while (mfspr(SPR_MMUCSR0) != 0) 322 ; 323 #endif 324 } 325 326 static void 327 e500_tlb_invalidate_globals(void) 328 { 329 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 330 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 331 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 332 const vaddr_t kstack_lo = (uintptr_t)curlwp->l_addr; 333 const vaddr_t kstack_hi = kstack_lo + USPACE - 1; 334 const vaddr_t epn_kstack_lo = kstack_lo & (max_epn - 1); 335 const vaddr_t epn_kstack_hi = kstack_hi & (max_epn - 1); 336 337 const register_t msr = wrtee(0); 338 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 339 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0); 340 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 341 mtspr(SPR_MAS2, epn); 342 __asm volatile("tlbre"); 343 uint32_t mas1 = mfspr(SPR_MAS1); 344 345 /* 346 * Make sure this is a valid kernel entry first. 347 */ 348 if ((mas1 & (MAS1_V|MAS1_TID|MAS1_TS)) != MAS1_V) 349 continue; 350 351 /* 352 * We have a valid kernel TLB entry. But if it matches 353 * the stack we are currently running on, it would 354 * unwise to invalidate it. First see if the epn 355 * overlaps the stack. If it does then get the 356 * VA and see if it really is part of the stack. 357 */ 358 if (epn_kstack_lo < epn_kstack_hi 359 ? (epn_kstack_lo <= epn && epn <= epn_kstack_hi) 360 : (epn <= epn_kstack_hi || epn_kstack_lo <= epn)) { 361 const uint32_t mas2_epn = 362 mfspr(SPR_MAS2) & MAS2_EPN; 363 if (kstack_lo <= mas2_epn 364 && mas2_epn <= kstack_hi) 365 continue; 366 } 367 mtspr(SPR_MAS1, mas1 ^ MAS1_V); 368 __asm volatile("tlbwe"); 369 } 370 } 371 __asm volatile("isync"); 372 wrtee(msr); 373 } 374 375 static void 376 e500_tlb_invalidate_asids(uint32_t asid_lo, uint32_t asid_hi) 377 { 378 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 379 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 380 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 381 382 asid_lo = __SHIFTIN(asid_lo, MAS1_TID); 383 asid_hi = __SHIFTIN(asid_hi, MAS1_TID); 384 385 const register_t msr = wrtee(0); 386 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 387 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0); 388 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 389 mtspr(SPR_MAS2, epn); 390 __asm volatile("tlbre"); 391 const uint32_t mas1 = mfspr(SPR_MAS1); 392 /* 393 * If this is a valid entry for AS space 1 and 394 * its asid matches the constraints of the caller, 395 * clear its valid bit. 396 */ 397 if ((mas1 & (MAS1_V|MAS1_TS)) == (MAS1_V|MAS1_TS) 398 && asid_lo <= (mas1 & MAS1_TID) 399 && (mas1 & MAS1_TID) < asid_hi) { 400 mtspr(SPR_MAS1, mas1 ^ MAS1_V); 401 #if 0 402 printf("%s[%zu,%zu]->[%x]\n", 403 __func__, assoc, epn, mas1); 404 #endif 405 __asm volatile("tlbwe"); 406 } 407 } 408 } 409 __asm volatile("isync"); 410 wrtee(msr); 411 } 412 413 static u_int 414 e500_tlb_record_asids(u_long *bitmap, uint32_t start_slot) 415 { 416 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 417 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 418 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 419 const size_t nbits = 8 * sizeof(bitmap[0]); 420 u_int found = 0; 421 422 const register_t msr = wrtee(0); 423 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 424 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0); 425 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 426 mtspr(SPR_MAS2, epn); 427 __asm volatile("tlbre"); 428 const uint32_t mas1 = mfspr(SPR_MAS1); 429 /* 430 * If this is a valid entry for AS space 1 and 431 * its asid matches the constraints of the caller, 432 * clear its valid bit. 433 */ 434 if ((mas1 & (MAS1_V|MAS1_TS)) == (MAS1_V|MAS1_TS)) { 435 const uint32_t asid = MASX_TID_GET(mas1); 436 const u_int i = asid / nbits; 437 const u_long mask = 1UL << (asid & (nbits - 1)); 438 if ((bitmap[i] & mask) == 0) { 439 bitmap[i] |= mask; 440 found++; 441 } 442 } 443 } 444 } 445 wrtee(msr); 446 447 return found; 448 } 449 450 static void 451 e500_tlb_invalidate_addr(vaddr_t va, uint32_t asid) 452 { 453 KASSERT((va & PAGE_MASK) == 0); 454 /* 455 * Bits 60 & 61 have meaning 456 */ 457 __asm volatile("tlbivax\t0, %0" :: "b"(va)); 458 __asm volatile("tlbsync"); 459 __asm volatile("tlbsync"); 460 } 461 462 static bool 463 e500_tlb_update_addr(vaddr_t va, uint32_t asid, uint32_t pte, bool insert) 464 { 465 struct e500_hwtlb hwtlb = tlb_to_hwtlb( 466 (struct e500_tlb){ .tlb_va = va, .tlb_asid = asid, 467 .tlb_size = PAGE_SIZE, .tlb_pte = pte,}); 468 469 register_t msr = wrtee(0); 470 mtspr(SPR_MAS6, asid ? __SHIFTIN(asid, MAS6_SPID0) | MAS6_SAS : 0); 471 __asm volatile("tlbsx 0, %0" :: "b"(va)); 472 register_t mas1 = mfspr(SPR_MAS1); 473 if ((mas1 & MAS1_V) == 0) { 474 if (!insert) { 475 wrtee(msr); 476 #if 0 477 printf("%s(%#lx,%#x,%#x,%x)<no update>\n", 478 __func__, va, asid, pte, insert); 479 #endif 480 return false; 481 } 482 mtspr(SPR_MAS1, hwtlb.hwtlb_mas1); 483 } 484 mtspr(SPR_MAS2, hwtlb.hwtlb_mas2); 485 mtspr(SPR_MAS3, hwtlb.hwtlb_mas3); 486 __asm volatile("tlbwe"); 487 if (asid == 0) 488 __asm volatile("isync"); 489 wrtee(msr); 490 #if 0 491 if (asid) 492 printf("%s(%#lx,%#x,%#x,%x)->[%x,%x,%x]\n", 493 __func__, va, asid, pte, insert, 494 hwtlb.hwtlb_mas1, hwtlb.hwtlb_mas2, hwtlb.hwtlb_mas3); 495 #endif 496 return (mas1 & MAS1_V) != 0; 497 } 498 499 static void 500 e500_tlb_read_entry(size_t index, struct tlbmask *tlb) 501 { 502 } 503 504 static void 505 e500_tlb_dump(void (*pr)(const char *, ...)) 506 { 507 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 508 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 509 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 510 const uint32_t saved_mas0 = mfspr(SPR_MAS0); 511 size_t valid = 0; 512 513 if (pr == NULL) 514 pr = printf; 515 516 const register_t msr = wrtee(0); 517 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 518 struct e500_hwtlb hwtlb; 519 hwtlb.hwtlb_mas0 = MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0; 520 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0); 521 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 522 mtspr(SPR_MAS2, epn); 523 __asm volatile("tlbre"); 524 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1); 525 /* 526 * If this is a valid entry for AS space 1 and 527 * its asid matches the constraints of the caller, 528 * clear its valid bit. 529 */ 530 if (hwtlb.hwtlb_mas1 & MAS1_V) { 531 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2); 532 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3); 533 struct e500_tlb tlb = hwtlb_to_tlb(hwtlb); 534 (*pr)("[%zu,%zu]->[%x,%x,%x]", 535 assoc, atop(epn), 536 hwtlb.hwtlb_mas1, 537 hwtlb.hwtlb_mas2, 538 hwtlb.hwtlb_mas3); 539 (*pr)(": VA=%#lx size=4KB asid=%u pte=%x", 540 tlb.tlb_va, tlb.tlb_asid, tlb.tlb_pte); 541 (*pr)(" (RPN=%#x,%s%s%s%s%s,%s%s%s%s%s)\n", 542 tlb.tlb_pte & PTE_RPN_MASK, 543 tlb.tlb_pte & PTE_xR ? "R" : "", 544 tlb.tlb_pte & PTE_xW ? "W" : "", 545 tlb.tlb_pte & PTE_UNMODIFIED ? "*" : "", 546 tlb.tlb_pte & PTE_xX ? "X" : "", 547 tlb.tlb_pte & PTE_UNSYNCED ? "*" : "", 548 tlb.tlb_pte & PTE_W ? "W" : "", 549 tlb.tlb_pte & PTE_I ? "I" : "", 550 tlb.tlb_pte & PTE_M ? "M" : "", 551 tlb.tlb_pte & PTE_G ? "G" : "", 552 tlb.tlb_pte & PTE_E ? "E" : ""); 553 valid++; 554 } 555 } 556 } 557 mtspr(SPR_MAS0, saved_mas0); 558 wrtee(msr); 559 (*pr)("%s: %zu valid entries\n", __func__, valid); 560 } 561 562 static void 563 e500_tlb_walk(void *ctx, bool (*func)(void *, vaddr_t, uint32_t, uint32_t)) 564 { 565 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg()); 566 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg()); 567 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT; 568 const uint32_t saved_mas0 = mfspr(SPR_MAS0); 569 570 const register_t msr = wrtee(0); 571 for (size_t assoc = 0; assoc < tlbassoc; assoc++) { 572 struct e500_hwtlb hwtlb; 573 hwtlb.hwtlb_mas0 = MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0; 574 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0); 575 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) { 576 mtspr(SPR_MAS2, epn); 577 __asm volatile("tlbre"); 578 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1); 579 /* 580 * If this is a valid entry for AS space 1 and 581 * its asid matches the constraints of the caller, 582 * clear its valid bit. 583 */ 584 if (hwtlb.hwtlb_mas1 & MAS1_V) { 585 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2); 586 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3); 587 struct e500_tlb tlb = hwtlb_to_tlb(hwtlb); 588 if (!(*func)(ctx, tlb.tlb_va, tlb.tlb_asid, 589 tlb.tlb_pte)) 590 break; 591 } 592 } 593 } 594 mtspr(SPR_MAS0, saved_mas0); 595 wrtee(msr); 596 } 597 598 static struct e500_xtlb * 599 e500_tlb_lookup_xtlb(vaddr_t va, u_int *slotp) 600 { 601 struct e500_tlb1 * const tlb1 = &e500_tlb1; 602 struct e500_xtlb *xtlb = tlb1->tlb1_entries; 603 604 /* 605 * See if we have a TLB entry for the pa. 606 */ 607 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) { 608 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) 609 && xtlb->e_tlb.tlb_va <= va 610 && va < xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size) { 611 if (slotp != NULL) 612 *slotp = i; 613 return xtlb; 614 } 615 } 616 617 return NULL; 618 } 619 620 static struct e500_xtlb * 621 e500_tlb_lookup_xtlb2(vaddr_t va, vsize_t len) 622 { 623 struct e500_tlb1 * const tlb1 = &e500_tlb1; 624 struct e500_xtlb *xtlb = tlb1->tlb1_entries; 625 626 /* 627 * See if we have a TLB entry for the pa. 628 */ 629 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) { 630 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) 631 && xtlb->e_tlb.tlb_va < va + len 632 && va < xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size) { 633 return xtlb; 634 } 635 } 636 637 return NULL; 638 } 639 640 static void * 641 e500_tlb_mapiodev(paddr_t pa, psize_t len) 642 { 643 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(pa, NULL); 644 645 /* 646 * See if we have a TLB entry for the pa. If completely falls within 647 * mark the reference and return the pa. 648 */ 649 if (xtlb && pa + len <= xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size) { 650 xtlb->e_refcnt++; 651 return (void *) pa; 652 } 653 return NULL; 654 } 655 656 static void 657 e500_tlb_unmapiodev(vaddr_t va, vsize_t len) 658 { 659 if (va < VM_MIN_KERNEL_ADDRESS || VM_MAX_KERNEL_ADDRESS <= va) { 660 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(va, NULL); 661 if (xtlb) 662 xtlb->e_refcnt--; 663 } 664 } 665 666 static int 667 e500_tlb_ioreserve(vaddr_t va, vsize_t len, uint32_t pte) 668 { 669 struct e500_tlb1 * const tlb1 = &e500_tlb1; 670 struct e500_xtlb *xtlb; 671 672 KASSERT(len & 0x55555000); 673 KASSERT((len & ~0x55555000) == 0); 674 KASSERT(len >= PAGE_SIZE); 675 KASSERT((len & (len - 1)) == 0); 676 KASSERT((va & (len - 1)) == 0); 677 KASSERT((pte & (len - 1)) == 0); 678 679 if ((xtlb = e500_tlb_lookup_xtlb2(va, len)) != NULL) { 680 if (va < xtlb->e_tlb.tlb_va 681 || xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size < va + len 682 || va - xtlb->e_tlb.tlb_va != pte - xtlb->e_tlb.tlb_pte) 683 return EBUSY; 684 xtlb->e_refcnt++; 685 return 0; 686 } 687 688 const int slot = e500_alloc_tlb1_entry(); 689 if (slot < 0) 690 return ENOMEM; 691 692 xtlb = &tlb1->tlb1_entries[slot]; 693 xtlb->e_tlb.tlb_va = va; 694 xtlb->e_tlb.tlb_size = len; 695 xtlb->e_tlb.tlb_pte = pte; 696 xtlb->e_tlb.tlb_asid = KERNEL_PID; 697 698 xtlb->e_hwtlb = tlb_to_hwtlb(xtlb->e_tlb); 699 xtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTOUT(slot, MAS0_ESEL); 700 hwtlb_write(xtlb->e_hwtlb, true); 701 return 0; 702 } 703 704 static int 705 e500_tlb_iorelease(vaddr_t va) 706 { 707 u_int slot; 708 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(va, &slot); 709 710 if (xtlb == NULL) 711 return ENOENT; 712 713 if (xtlb->e_refcnt) 714 return EBUSY; 715 716 e500_free_tlb1_entry(xtlb, slot, true); 717 718 return 0; 719 } 720 721 static u_int 722 e500_tlbmemmap(paddr_t memstart, psize_t memsize, struct e500_tlb1 *tlb1) 723 { 724 u_int slotmask = 0; 725 u_int slots = 0, nextslot = 0; 726 KASSERT(tlb1->tlb1_numfree > 1); 727 KASSERT(((memstart + memsize - 1) & -memsize) == memstart); 728 for (paddr_t lastaddr = memstart; 0 < memsize; ) { 729 u_int cnt = __builtin_clz(memsize); 730 psize_t size = min(1UL << (31 - (cnt | 1)), tlb1->tlb1_maxsize); 731 slots += memsize / size; 732 if (slots > 4) 733 panic("%s: %d: can't map memory (%#lx) into TLB1: %s", 734 __func__, __LINE__, memsize, "too fragmented"); 735 if (slots > tlb1->tlb1_numfree - 1) 736 panic("%s: %d: can't map memory (%#lx) into TLB1: %s", 737 __func__, __LINE__, memsize, 738 "insufficent TLB entries"); 739 for (; nextslot < slots; nextslot++) { 740 const u_int freeslot = e500_alloc_tlb1_entry(); 741 struct e500_xtlb * const xtlb = 742 &tlb1->tlb1_entries[freeslot]; 743 xtlb->e_tlb.tlb_asid = KERNEL_PID; 744 xtlb->e_tlb.tlb_size = size; 745 xtlb->e_tlb.tlb_va = lastaddr; 746 xtlb->e_tlb.tlb_pte = lastaddr 747 | PTE_M | PTE_xX | PTE_xW | PTE_xR; 748 lastaddr += size; 749 memsize -= size; 750 slotmask |= 1 << (31 - freeslot); /* clz friendly */ 751 } 752 } 753 754 return nextslot; 755 } 756 static const struct tlb_md_ops e500_tlb_ops = { 757 .md_tlb_set_asid = e500_tlb_set_asid, 758 .md_tlb_invalidate_all = e500_tlb_invalidate_all, 759 .md_tlb_invalidate_globals = e500_tlb_invalidate_globals, 760 .md_tlb_invalidate_asids = e500_tlb_invalidate_asids, 761 .md_tlb_invalidate_addr = e500_tlb_invalidate_addr, 762 .md_tlb_update_addr = e500_tlb_update_addr, 763 .md_tlb_record_asids = e500_tlb_record_asids, 764 .md_tlb_read_entry = e500_tlb_read_entry, 765 .md_tlb_mapiodev = e500_tlb_mapiodev, 766 .md_tlb_unmapiodev = e500_tlb_unmapiodev, 767 .md_tlb_ioreserve = e500_tlb_ioreserve, 768 .md_tlb_iorelease = e500_tlb_iorelease, 769 .md_tlb_dump = e500_tlb_dump, 770 .md_tlb_walk = e500_tlb_walk, 771 }; 772 773 void 774 e500_tlb_init(vaddr_t endkernel, psize_t memsize) 775 { 776 struct e500_tlb1 * const tlb1 = &e500_tlb1; 777 778 #if 0 779 register_t mmucfg = mfspr(SPR_MMUCFG); 780 register_t mas4 = mfspr(SPR_MAS4); 781 #endif 782 783 const uint32_t tlb1cfg = mftlb1cfg(); 784 tlb1->tlb1_numentries = TLBCFG_NENTRY(tlb1cfg); 785 KASSERT(tlb1->tlb1_numentries <= __arraycount(tlb1->tlb1_entries)); 786 /* 787 * Limit maxsize to 1G since 4G isn't really useful to us. 788 */ 789 tlb1->tlb1_minsize = 1024 << (2 * TLBCFG_MINSIZE(tlb1cfg)); 790 tlb1->tlb1_maxsize = 1024 << (2 * min(10, TLBCFG_MAXSIZE(tlb1cfg))); 791 792 #ifdef VERBOSE_INITPPC 793 printf(" tlb1cfg=%#x numentries=%u minsize=%#xKB maxsize=%#xKB", 794 tlb1cfg, tlb1->tlb1_numentries, tlb1->tlb1_minsize >> 10, 795 tlb1->tlb1_maxsize >> 10); 796 #endif 797 798 /* 799 * Let's see what's in TLB1 and we need to invalidate any entry that 800 * would fit within the kernel's mapped address space. 801 */ 802 psize_t memmapped = 0; 803 for (u_int i = 0; i < tlb1->tlb1_numentries; i++) { 804 struct e500_xtlb * const xtlb = &tlb1->tlb1_entries[i]; 805 806 xtlb->e_hwtlb = hwtlb_read(MAS0_TLBSEL_TLB1, i); 807 808 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) == 0) { 809 tlb1->tlb1_freelist[tlb1->tlb1_numfree++] = i; 810 #ifdef VERBOSE_INITPPC 811 printf(" TLB1[%u]=<unused>", i); 812 #endif 813 continue; 814 } 815 816 xtlb->e_tlb = hwtlb_to_tlb(xtlb->e_hwtlb); 817 #ifdef VERBOSE_INITPPC 818 printf(" TLB1[%u]=<%#lx,%#lx,%#x,%#x>", 819 i, xtlb->e_tlb.tlb_va, xtlb->e_tlb.tlb_size, 820 xtlb->e_tlb.tlb_asid, xtlb->e_tlb.tlb_pte); 821 #endif 822 if ((VM_MIN_KERNEL_ADDRESS <= xtlb->e_tlb.tlb_va 823 && xtlb->e_tlb.tlb_va < VM_MAX_KERNEL_ADDRESS) 824 || (xtlb->e_tlb.tlb_va < VM_MIN_KERNEL_ADDRESS 825 && VM_MIN_KERNEL_ADDRESS < 826 xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size)) { 827 #ifdef VERBOSE_INITPPC 828 printf("free"); 829 #endif 830 e500_free_tlb1_entry(xtlb, i, false); 831 #ifdef VERBOSE_INITPPC 832 printf("d"); 833 #endif 834 continue; 835 } 836 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_IPROT) == 0) { 837 xtlb->e_hwtlb.hwtlb_mas1 |= MAS1_IPROT; 838 hwtlb_write(xtlb->e_hwtlb, false); 839 #ifdef VERBOSE_INITPPC 840 printf("+iprot"); 841 #endif 842 } 843 if (xtlb->e_tlb.tlb_pte & PTE_I) 844 continue; 845 846 if (xtlb->e_tlb.tlb_va == 0 847 || xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size <= memsize) { 848 memmapped += xtlb->e_tlb.tlb_size; 849 } 850 } 851 852 cpu_md_ops.md_tlb_ops = &e500_tlb_ops; 853 854 if (__predict_false(memmapped < memsize)) { 855 /* 856 * Let's see how many TLB entries are needed to map memory. 857 */ 858 u_int slotmask = e500_tlbmemmap(0, memsize, tlb1); 859 860 /* 861 * To map main memory into the TLB, we need to flush any 862 * existing entries from the TLB that overlap the virtual 863 * address space needed to map physical memory. That may 864 * include the entries for the pages currently used by the 865 * stack or that we are executing. So to avoid problems, we 866 * are going to temporarily map the kernel and stack into AS 1, 867 * switch to it, and clear out the TLB entries from AS 0, 868 * install the new TLB entries to map memory, and then switch 869 * back to AS 0 and free the temp entry used for AS1. 870 */ 871 u_int b = __builtin_clz(endkernel); 872 873 /* 874 * If the kernel doesn't end on a clean power of 2, we need 875 * to round the size up (by decrementing the number of leading 876 * zero bits). If the size isn't a power of 4KB, decrement 877 * again to make it one. 878 */ 879 if (endkernel & (endkernel - 1)) 880 b--; 881 if ((b & 1) == 0) 882 b--; 883 884 /* 885 * Create a TLB1 mapping for the kernel in AS1. 886 */ 887 const u_int kslot = e500_alloc_tlb1_entry(); 888 struct e500_xtlb * const kxtlb = &tlb1->tlb1_entries[kslot]; 889 kxtlb->e_tlb.tlb_va = 0; 890 kxtlb->e_tlb.tlb_size = 1UL << (31 - b); 891 kxtlb->e_tlb.tlb_pte = PTE_M|PTE_xR|PTE_xW|PTE_xX; 892 kxtlb->e_tlb.tlb_asid = KERNEL_PID; 893 894 kxtlb->e_hwtlb = tlb_to_hwtlb(kxtlb->e_tlb); 895 kxtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTOUT(kslot, MAS0_ESEL); 896 kxtlb->e_hwtlb.hwtlb_mas1 |= MAS1_TS; 897 hwtlb_write(kxtlb->e_hwtlb, true); 898 899 /* 900 * Now that we have a TLB mapping in AS1 for the kernel and its 901 * stack, we switch to AS1 to cleanup the TLB mappings for TLB0. 902 */ 903 const register_t saved_msr = mfmsr(); 904 mtmsr(saved_msr | PSL_DS | PSL_IS); 905 __asm volatile("isync"); 906 907 /* 908 *** Invalidate all the TLB0 entries. 909 */ 910 e500_tlb_invalidate_all(); 911 912 /* 913 *** Now let's see if we have any entries in TLB1 that would 914 *** overlap the ones we are about to install. If so, nuke 'em. 915 */ 916 for (u_int i = 0; i < tlb1->tlb1_numentries; i++) { 917 struct e500_xtlb * const xtlb = &tlb1->tlb1_entries[i]; 918 struct e500_hwtlb * const hwtlb = &xtlb->e_hwtlb; 919 if ((hwtlb->hwtlb_mas1 & (MAS1_V|MAS1_TS)) == MAS1_V 920 && (hwtlb->hwtlb_mas2 & MAS2_EPN) < memsize) { 921 e500_free_tlb1_entry(xtlb, i, false); 922 } 923 } 924 925 /* 926 *** Now we can add the TLB entries that will map physical 927 *** memory. If bit 0 [MSB] in slotmask is set, then tlb 928 *** entry 0 contains a mapping for physical memory... 929 */ 930 struct e500_xtlb *entries = tlb1->tlb1_entries; 931 while (slotmask != 0) { 932 const u_int slot = __builtin_clz(slotmask); 933 hwtlb_write(entries[slot].e_hwtlb, false); 934 entries += slot + 1; 935 slotmask <<= slot + 1; 936 } 937 938 /* 939 *** Synchronize the TLB and the instruction stream. 940 */ 941 __asm volatile("tlbsync"); 942 __asm volatile("isync"); 943 944 /* 945 *** Switch back to AS 0. 946 */ 947 mtmsr(saved_msr); 948 __asm volatile("isync"); 949 950 /* 951 * Free the temporary TLB1 entry. 952 */ 953 e500_free_tlb1_entry(kxtlb, kslot, true); 954 } 955 956 /* 957 * Finally set the MAS4 defaults. 958 */ 959 mtspr(SPR_MAS4, MAS4_TSIZED_4KB | MAS4_MD); 960 961 /* 962 * Invalidate all the TLB0 entries. 963 */ 964 e500_tlb_invalidate_all(); 965 } 966