1 /* $OpenBSD: pmap.c,v 1.64 2024/11/28 18:54:36 gkoehler Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Martin Pieuchot 5 * Copyright (c) 2001, 2002, 2007 Dale Rahn. 6 * All rights reserved. 7 * 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * 29 * Effort sponsored in part by the Defense Advanced Research Projects 30 * Agency (DARPA) and Air Force Research Laboratory, Air Force 31 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 32 */ 33 34 /* 35 * Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org> 36 * 37 * Permission to use, copy, modify, and distribute this software for any 38 * purpose with or without fee is hereby granted, provided that the above 39 * copyright notice and this permission notice appear in all copies. 40 * 41 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 42 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 43 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 44 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 45 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 46 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 47 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 48 */ 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/atomic.h> 53 #include <sys/pool.h> 54 #include <sys/proc.h> 55 #include <sys/user.h> 56 57 #include <uvm/uvm_extern.h> 58 59 #include <machine/cpufunc.h> 60 #include <machine/pcb.h> 61 #include <machine/pmap.h> 62 #include <machine/pte.h> 63 64 #include <dev/ofw/fdt.h> 65 66 extern char _start[], _etext[], _erodata[], _end[]; 67 68 #ifdef MULTIPROCESSOR 69 70 struct mutex pmap_hash_lock = MUTEX_INITIALIZER(IPL_HIGH); 71 72 #define PMAP_HASH_LOCK(s) \ 73 do { \ 74 (void)s; \ 75 mtx_enter(&pmap_hash_lock); \ 76 } while (0) 77 78 #define PMAP_HASH_UNLOCK(s) \ 79 do { \ 80 mtx_leave(&pmap_hash_lock); \ 81 } while (0) 82 83 #define PMAP_VP_LOCK_INIT(pm) mtx_init(&pm->pm_mtx, IPL_VM) 84 85 #define PMAP_VP_LOCK(pm) \ 86 do { \ 87 if (pm != pmap_kernel()) \ 88 mtx_enter(&pm->pm_mtx); \ 89 } while (0) 90 91 #define PMAP_VP_UNLOCK(pm) \ 92 do { \ 93 if (pm != pmap_kernel()) \ 94 mtx_leave(&pm->pm_mtx); \ 95 } while (0) 96 97 #define PMAP_VP_ASSERT_LOCKED(pm) \ 98 do { \ 99 if (pm != pmap_kernel()) \ 100 MUTEX_ASSERT_LOCKED(&pm->pm_mtx); \ 101 } while (0) 102 103 #else 104 105 #define PMAP_HASH_LOCK(s) (void)s 106 #define PMAP_HASH_UNLOCK(s) /* nothing */ 107 108 #define PMAP_VP_LOCK_INIT(pm) /* nothing */ 109 #define PMAP_VP_LOCK(pm) /* nothing */ 110 #define PMAP_VP_UNLOCK(pm) /* nothing */ 111 #define PMAP_VP_ASSERT_LOCKED(pm) /* nothing */ 112 113 #endif 114 115 struct pmap kernel_pmap_store; 116 117 struct pte *pmap_ptable; 118 int pmap_ptab_cnt; 119 uint64_t pmap_ptab_mask; 120 121 #define HTABMEMSZ (pmap_ptab_cnt * 8 * sizeof(struct pte)) 122 #define HTABSIZE (ffs(pmap_ptab_cnt) - 12) 123 124 struct pate *pmap_pat; 125 126 #define PATMEMSZ (64 * 1024) 127 #define PATSIZE (ffs(PATMEMSZ) - 12) 128 129 struct pte_desc { 130 /* Linked list of phys -> virt entries */ 131 LIST_ENTRY(pte_desc) pted_pv_list; 132 struct pte pted_pte; 133 pmap_t pted_pmap; 134 vaddr_t pted_va; 135 uint64_t pted_vsid; 136 }; 137 138 #define PTED_VA_PTEGIDX_M 0x07 139 #define PTED_VA_HID_M 0x08 140 #define PTED_VA_MANAGED_M 0x10 141 #define PTED_VA_WIRED_M 0x20 142 #define PTED_VA_EXEC_M 0x40 143 144 void pmap_pted_syncicache(struct pte_desc *); 145 void pmap_flush_page(struct vm_page *); 146 147 struct slb_desc { 148 LIST_ENTRY(slb_desc) slbd_list; 149 uint64_t slbd_esid; 150 uint64_t slbd_vsid; 151 struct pmapvp1 *slbd_vp; 152 }; 153 154 /* Preallocated SLB entries for the kernel. */ 155 struct slb_desc kernel_slb_desc[16 + VM_KERNEL_SPACE_SIZE / SEGMENT_SIZE]; 156 157 struct slb_desc *pmap_slbd_lookup(pmap_t, vaddr_t); 158 159 struct pmapvp1 { 160 struct pmapvp2 *vp[VP_IDX1_CNT]; 161 }; 162 163 struct pmapvp2 { 164 struct pte_desc *vp[VP_IDX2_CNT]; 165 }; 166 167 CTASSERT(sizeof(struct pmapvp1) == sizeof(struct pmapvp2)); 168 169 static inline int 170 VP_IDX1(vaddr_t va) 171 { 172 return (va >> VP_IDX1_POS) & VP_IDX1_MASK; 173 } 174 175 static inline int 176 VP_IDX2(vaddr_t va) 177 { 178 return (va >> VP_IDX2_POS) & VP_IDX2_MASK; 179 } 180 181 void pmap_vp_destroy(pmap_t); 182 void pmap_release(pmap_t); 183 184 struct pool pmap_pmap_pool; 185 struct pool pmap_vp_pool; 186 struct pool pmap_pted_pool; 187 struct pool pmap_slbd_pool; 188 189 int pmap_initialized = 0; 190 191 /* 192 * We use only 4K pages and 256MB segments. That means p = b = 12 and 193 * s = 28. 194 */ 195 196 #define KERNEL_VSID_BIT 0x0000001000000000ULL 197 #define VSID_HASH_MASK 0x0000007fffffffffULL 198 199 static inline int 200 PTED_HID(struct pte_desc *pted) 201 { 202 return !!(pted->pted_va & PTED_VA_HID_M); 203 } 204 205 static inline int 206 PTED_PTEGIDX(struct pte_desc *pted) 207 { 208 return (pted->pted_va & PTED_VA_PTEGIDX_M); 209 } 210 211 static inline int 212 PTED_MANAGED(struct pte_desc *pted) 213 { 214 return !!(pted->pted_va & PTED_VA_MANAGED_M); 215 } 216 217 static inline int 218 PTED_WIRED(struct pte_desc *pted) 219 { 220 return !!(pted->pted_va & PTED_VA_WIRED_M); 221 } 222 223 static inline int 224 PTED_VALID(struct pte_desc *pted) 225 { 226 return !!(pted->pted_pte.pte_hi & PTE_VALID); 227 } 228 229 #define TLBIEL_MAX_SETS 4096 230 #define TLBIEL_SET_SHIFT 12 231 #define TLBIEL_INVAL_SET (0x3 << 10) 232 233 void 234 tlbia(void) 235 { 236 int set; 237 238 for (set = 0; set < TLBIEL_MAX_SETS; set++) 239 tlbiel((set << TLBIEL_SET_SHIFT) | TLBIEL_INVAL_SET); 240 } 241 242 /* 243 * Return AVA for use with TLB invalidate instructions. 244 */ 245 static inline uint64_t 246 pmap_ava(uint64_t vsid, vaddr_t va) 247 { 248 return ((vsid << ADDR_VSID_SHIFT) | (va & ADDR_PIDX)); 249 } 250 251 /* 252 * Return AVA for a PTE descriptor. 253 */ 254 static inline uint64_t 255 pmap_pted2ava(struct pte_desc *pted) 256 { 257 return pmap_ava(pted->pted_vsid, pted->pted_va); 258 } 259 260 /* 261 * Return the top 64 bits of the (80-bit) VPN for a PTE descriptor. 262 */ 263 static inline uint64_t 264 pmap_pted2avpn(struct pte_desc *pted) 265 { 266 return (pted->pted_vsid << (PTE_VSID_SHIFT) | 267 (pted->pted_va & ADDR_PIDX) >> 268 (ADDR_VSID_SHIFT - PTE_VSID_SHIFT)); 269 } 270 271 static inline uint64_t 272 pmap_kernel_vsid(uint64_t esid) 273 { 274 uint64_t vsid; 275 vsid = (((esid << 8) | (esid > 28)) * 0x13bb) & (KERNEL_VSID_BIT - 1); 276 return vsid | KERNEL_VSID_BIT; 277 } 278 279 static inline uint64_t 280 pmap_va2vsid(pmap_t pm, vaddr_t va) 281 { 282 uint64_t esid = va >> ADDR_ESID_SHIFT; 283 struct slb_desc *slbd; 284 285 if (pm == pmap_kernel()) 286 return pmap_kernel_vsid(esid); 287 288 slbd = pmap_slbd_lookup(pm, va); 289 if (slbd) 290 return slbd->slbd_vsid; 291 292 return 0; 293 } 294 295 struct pte * 296 pmap_ptedinhash(struct pte_desc *pted) 297 { 298 struct pte *pte; 299 vaddr_t va; 300 uint64_t vsid, hash; 301 int idx; 302 303 va = pted->pted_va & ~PAGE_MASK; 304 vsid = pted->pted_vsid; 305 hash = (vsid & VSID_HASH_MASK) ^ ((va & ADDR_PIDX) >> ADDR_PIDX_SHIFT); 306 idx = (hash & pmap_ptab_mask); 307 308 idx ^= (PTED_HID(pted) ? pmap_ptab_mask : 0); 309 pte = pmap_ptable + (idx * 8); 310 pte += PTED_PTEGIDX(pted); /* increment by index into pteg */ 311 312 /* 313 * We now have the pointer to where it will be, if it is 314 * currently mapped. If the mapping was thrown away in 315 * exchange for another page mapping, then this page is not 316 * currently in the hash. 317 */ 318 if ((pted->pted_pte.pte_hi | 319 (PTED_HID(pted) ? PTE_HID : 0)) == pte->pte_hi) 320 return pte; 321 322 return NULL; 323 } 324 325 struct slb_desc * 326 pmap_slbd_lookup(pmap_t pm, vaddr_t va) 327 { 328 uint64_t esid = va >> ADDR_ESID_SHIFT; 329 struct slb_desc *slbd; 330 331 PMAP_VP_ASSERT_LOCKED(pm); 332 333 LIST_FOREACH(slbd, &pm->pm_slbd, slbd_list) { 334 if (slbd->slbd_esid == esid) 335 return slbd; 336 } 337 338 return NULL; 339 } 340 341 void 342 pmap_slbd_cache(pmap_t pm, struct slb_desc *slbd) 343 { 344 struct pcb *pcb = &curproc->p_addr->u_pcb; 345 uint64_t slbe, slbv; 346 int idx; 347 348 KASSERT(curproc->p_vmspace->vm_map.pmap == pm); 349 350 for (idx = 0; idx < nitems(pcb->pcb_slb); idx++) { 351 if (pcb->pcb_slb[idx].slb_slbe == 0) 352 break; 353 } 354 if (idx == nitems(pcb->pcb_slb)) 355 idx = arc4random_uniform(nitems(pcb->pcb_slb)); 356 357 slbe = (slbd->slbd_esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx; 358 slbv = slbd->slbd_vsid << SLBV_VSID_SHIFT; 359 360 pcb->pcb_slb[idx].slb_slbe = slbe; 361 pcb->pcb_slb[idx].slb_slbv = slbv; 362 } 363 364 int 365 pmap_slbd_fault(pmap_t pm, vaddr_t va) 366 { 367 struct slb_desc *slbd; 368 369 PMAP_VP_LOCK(pm); 370 slbd = pmap_slbd_lookup(pm, va); 371 if (slbd) { 372 pmap_slbd_cache(pm, slbd); 373 PMAP_VP_UNLOCK(pm); 374 return 0; 375 } 376 PMAP_VP_UNLOCK(pm); 377 378 return EFAULT; 379 } 380 381 #define NUM_VSID (1 << 20) 382 uint32_t pmap_vsid[NUM_VSID / 32]; 383 384 uint64_t 385 pmap_alloc_vsid(void) 386 { 387 uint32_t bits; 388 uint32_t vsid, bit; 389 390 for (;;) { 391 do { 392 vsid = arc4random() & (NUM_VSID - 1); 393 bit = (vsid & (32 - 1)); 394 bits = pmap_vsid[vsid / 32]; 395 } while (bits & (1U << bit)); 396 397 if (atomic_cas_uint(&pmap_vsid[vsid / 32], bits, 398 bits | (1U << bit)) == bits) 399 return vsid; 400 } 401 } 402 403 void 404 pmap_free_vsid(uint64_t vsid) 405 { 406 uint32_t bits; 407 int bit; 408 409 KASSERT(vsid < NUM_VSID); 410 411 bit = (vsid & (32 - 1)); 412 for (;;) { 413 bits = pmap_vsid[vsid / 32]; 414 if (atomic_cas_uint(&pmap_vsid[vsid / 32], bits, 415 bits & ~(1U << bit)) == bits) 416 break; 417 } 418 } 419 420 struct slb_desc * 421 pmap_slbd_alloc(pmap_t pm, vaddr_t va) 422 { 423 uint64_t esid = va >> ADDR_ESID_SHIFT; 424 struct slb_desc *slbd; 425 426 KASSERT(pm != pmap_kernel()); 427 PMAP_VP_ASSERT_LOCKED(pm); 428 429 slbd = pool_get(&pmap_slbd_pool, PR_NOWAIT | PR_ZERO); 430 if (slbd == NULL) 431 return NULL; 432 433 slbd->slbd_esid = esid; 434 slbd->slbd_vsid = pmap_alloc_vsid(); 435 KASSERT((slbd->slbd_vsid & KERNEL_VSID_BIT) == 0); 436 LIST_INSERT_HEAD(&pm->pm_slbd, slbd, slbd_list); 437 438 /* We're almost certainly going to use it soon. */ 439 pmap_slbd_cache(pm, slbd); 440 441 return slbd; 442 } 443 444 int 445 pmap_slbd_enter(pmap_t pm, vaddr_t va) 446 { 447 struct slb_desc *slbd; 448 449 PMAP_VP_LOCK(pm); 450 slbd = pmap_slbd_lookup(pm, va); 451 if (slbd == NULL) 452 slbd = pmap_slbd_alloc(pm, va); 453 PMAP_VP_UNLOCK(pm); 454 455 return slbd ? 0 : EFAULT; 456 } 457 458 int 459 pmap_set_user_slb(pmap_t pm, vaddr_t va, vaddr_t *kva, vsize_t *len) 460 { 461 struct cpu_info *ci = curcpu(); 462 struct slb_desc *slbd; 463 uint64_t slbe, slbv; 464 uint64_t vsid; 465 466 KASSERT(pm != pmap_kernel()); 467 468 PMAP_VP_LOCK(pm); 469 slbd = pmap_slbd_lookup(pm, va); 470 if (slbd == NULL) { 471 slbd = pmap_slbd_alloc(pm, va); 472 if (slbd == NULL) { 473 PMAP_VP_UNLOCK(pm); 474 return EFAULT; 475 } 476 } 477 vsid = slbd->slbd_vsid; 478 PMAP_VP_UNLOCK(pm); 479 480 /* 481 * We might get here while another process is sleeping while 482 * handling a page fault. Kill their SLB entry before 483 * inserting our own. 484 */ 485 if (ci->ci_kernel_slb[31].slb_slbe != 0) { 486 isync(); 487 slbie(ci->ci_kernel_slb[31].slb_slbe); 488 isync(); 489 } 490 491 slbe = (USER_ESID << SLBE_ESID_SHIFT) | SLBE_VALID | 31; 492 slbv = vsid << SLBV_VSID_SHIFT; 493 494 ci->ci_kernel_slb[31].slb_slbe = slbe; 495 ci->ci_kernel_slb[31].slb_slbv = slbv; 496 497 isync(); 498 slbmte(slbv, slbe); 499 isync(); 500 501 curpcb->pcb_userva = (va & ~SEGMENT_MASK); 502 503 if (kva) 504 *kva = USER_ADDR | (va & SEGMENT_MASK); 505 if (len) 506 *len = SEGMENT_SIZE - (va & SEGMENT_MASK); 507 508 return 0; 509 } 510 511 void 512 pmap_clear_user_slb(void) 513 { 514 struct cpu_info *ci = curcpu(); 515 516 if (ci->ci_kernel_slb[31].slb_slbe != 0) { 517 isync(); 518 slbie(ci->ci_kernel_slb[31].slb_slbe); 519 isync(); 520 } 521 522 ci->ci_kernel_slb[31].slb_slbe = 0; 523 ci->ci_kernel_slb[31].slb_slbv = 0; 524 } 525 526 void 527 pmap_unset_user_slb(void) 528 { 529 curpcb->pcb_userva = 0; 530 pmap_clear_user_slb(); 531 } 532 533 /* 534 * VP routines, virtual to physical translation information. 535 * These data structures are based off of the pmap, per process. 536 */ 537 538 struct pte_desc * 539 pmap_vp_lookup(pmap_t pm, vaddr_t va) 540 { 541 struct slb_desc *slbd; 542 struct pmapvp1 *vp1; 543 struct pmapvp2 *vp2; 544 545 slbd = pmap_slbd_lookup(pm, va); 546 if (slbd == NULL) 547 return NULL; 548 549 vp1 = slbd->slbd_vp; 550 if (vp1 == NULL) 551 return NULL; 552 553 vp2 = vp1->vp[VP_IDX1(va)]; 554 if (vp2 == NULL) 555 return NULL; 556 557 return vp2->vp[VP_IDX2(va)]; 558 } 559 560 /* 561 * Remove, and return, pted at specified address, NULL if not present. 562 */ 563 struct pte_desc * 564 pmap_vp_remove(pmap_t pm, vaddr_t va) 565 { 566 struct slb_desc *slbd; 567 struct pmapvp1 *vp1; 568 struct pmapvp2 *vp2; 569 struct pte_desc *pted; 570 571 slbd = pmap_slbd_lookup(pm, va); 572 if (slbd == NULL) 573 return NULL; 574 575 vp1 = slbd->slbd_vp; 576 if (vp1 == NULL) 577 return NULL; 578 579 vp2 = vp1->vp[VP_IDX1(va)]; 580 if (vp2 == NULL) 581 return NULL; 582 583 pted = vp2->vp[VP_IDX2(va)]; 584 vp2->vp[VP_IDX2(va)] = NULL; 585 586 return pted; 587 } 588 589 /* 590 * Create a V -> P mapping for the given pmap and virtual address 591 * with reference to the pte descriptor that is used to map the page. 592 * This code should track allocations of vp table allocations 593 * so they can be freed efficiently. 594 */ 595 int 596 pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags) 597 { 598 struct slb_desc *slbd; 599 struct pmapvp1 *vp1; 600 struct pmapvp2 *vp2; 601 602 slbd = pmap_slbd_lookup(pm, va); 603 if (slbd == NULL) { 604 slbd = pmap_slbd_alloc(pm, va); 605 if (slbd == NULL) { 606 if ((flags & PMAP_CANFAIL) == 0) 607 panic("%s: unable to allocate slbd", __func__); 608 return ENOMEM; 609 } 610 } 611 612 vp1 = slbd->slbd_vp; 613 if (vp1 == NULL) { 614 vp1 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO); 615 if (vp1 == NULL) { 616 if ((flags & PMAP_CANFAIL) == 0) 617 panic("%s: unable to allocate L1", __func__); 618 return ENOMEM; 619 } 620 slbd->slbd_vp = vp1; 621 } 622 623 vp2 = vp1->vp[VP_IDX1(va)]; 624 if (vp2 == NULL) { 625 vp2 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO); 626 if (vp2 == NULL) { 627 if ((flags & PMAP_CANFAIL) == 0) 628 panic("%s: unable to allocate L2", __func__); 629 return ENOMEM; 630 } 631 vp1->vp[VP_IDX1(va)] = vp2; 632 } 633 634 vp2->vp[VP_IDX2(va)] = pted; 635 return 0; 636 } 637 638 void 639 pmap_enter_pv(struct pte_desc *pted, struct vm_page *pg) 640 { 641 mtx_enter(&pg->mdpage.pv_mtx); 642 LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list); 643 pted->pted_va |= PTED_VA_MANAGED_M; 644 mtx_leave(&pg->mdpage.pv_mtx); 645 } 646 647 void 648 pmap_remove_pv(struct pte_desc *pted) 649 { 650 struct vm_page *pg = PHYS_TO_VM_PAGE(pted->pted_pte.pte_lo & PTE_RPGN); 651 652 mtx_enter(&pg->mdpage.pv_mtx); 653 LIST_REMOVE(pted, pted_pv_list); 654 mtx_leave(&pg->mdpage.pv_mtx); 655 } 656 657 struct pte * 658 pte_lookup(uint64_t vsid, vaddr_t va) 659 { 660 uint64_t hash, avpn, pte_hi; 661 struct pte *pte; 662 int idx, i; 663 664 /* Primary hash. */ 665 hash = (vsid & VSID_HASH_MASK) ^ ((va & ADDR_PIDX) >> ADDR_PIDX_SHIFT); 666 idx = (hash & pmap_ptab_mask); 667 pte = pmap_ptable + (idx * 8); 668 avpn = (vsid << PTE_VSID_SHIFT) | 669 (va & ADDR_PIDX) >> (ADDR_VSID_SHIFT - PTE_VSID_SHIFT); 670 pte_hi = (avpn & PTE_AVPN) | PTE_VALID; 671 672 for (i = 0; i < 8; i++) { 673 if ((pte[i].pte_hi & ~PTE_WIRED) == pte_hi) 674 return &pte[i]; 675 } 676 677 /* Secondary hash. */ 678 idx ^= pmap_ptab_mask; 679 pte = pmap_ptable + (idx * 8); 680 pte_hi |= PTE_HID; 681 682 for (i = 0; i < 8; i++) { 683 if ((pte[i].pte_hi & ~PTE_WIRED) == pte_hi) 684 return &pte[i]; 685 } 686 687 return NULL; 688 } 689 690 /* 691 * Delete a Page Table Entry, section 5.10.1.3. 692 * 693 * Note: hash table must be locked. 694 */ 695 void 696 pte_del(struct pte *pte, uint64_t ava) 697 { 698 pte->pte_hi &= ~PTE_VALID; 699 ptesync(); /* Ensure update completed. */ 700 tlbie(ava); /* Invalidate old translation. */ 701 eieio(); /* Order tlbie before tlbsync. */ 702 tlbsync(); /* Ensure tlbie completed on all processors. */ 703 ptesync(); /* Ensure tlbsync and update completed. */ 704 } 705 706 void 707 pte_zap(struct pte *pte, struct pte_desc *pted) 708 { 709 pte_del(pte, pmap_pted2ava(pted)); 710 } 711 712 void 713 pmap_fill_pte(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted, 714 vm_prot_t prot, int cache) 715 { 716 struct pte *pte = &pted->pted_pte; 717 718 pted->pted_pmap = pm; 719 pted->pted_va = va & ~PAGE_MASK; 720 pted->pted_vsid = pmap_va2vsid(pm, va); 721 KASSERT(pted->pted_vsid != 0); 722 723 pte->pte_hi = (pmap_pted2avpn(pted) & PTE_AVPN) | PTE_VALID; 724 pte->pte_lo = (pa & PTE_RPGN); 725 726 if (pm == pmap_kernel()) 727 pte->pte_hi |= PTE_WIRED; 728 729 if (prot & PROT_WRITE) 730 pte->pte_lo |= PTE_RW; 731 else 732 pte->pte_lo |= PTE_RO; 733 if (prot & PROT_EXEC) 734 pted->pted_va |= PTED_VA_EXEC_M; 735 else 736 pte->pte_lo |= PTE_N; 737 738 if (cache == PMAP_CACHE_WB) 739 pte->pte_lo |= PTE_M; 740 else 741 pte->pte_lo |= (PTE_M | PTE_I | PTE_G); 742 743 if ((prot & (PROT_READ | PROT_WRITE)) == 0) 744 pte->pte_lo |= PTE_AC; 745 } 746 747 void 748 pte_insert(struct pte_desc *pted) 749 { 750 struct pte *pte; 751 vaddr_t va; 752 uint64_t vsid, hash; 753 int off, try, idx, i; 754 int s; 755 756 PMAP_HASH_LOCK(s); 757 758 if ((pte = pmap_ptedinhash(pted)) != NULL) 759 pte_zap(pte, pted); 760 761 pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M); 762 763 va = pted->pted_va & ~PAGE_MASK; 764 vsid = pted->pted_vsid; 765 hash = (vsid & VSID_HASH_MASK) ^ ((va & ADDR_PIDX) >> ADDR_PIDX_SHIFT); 766 idx = (hash & pmap_ptab_mask); 767 768 /* 769 * instead of starting at the beginning of each pteg, 770 * the code should pick a random location with in the primary 771 * then search all of the entries, then if not yet found, 772 * do the same for the secondary. 773 * this would reduce the frontloading of the pteg. 774 */ 775 776 /* first just try fill of primary hash */ 777 pte = pmap_ptable + (idx * 8); 778 for (i = 0; i < 8; i++) { 779 if (pte[i].pte_hi & PTE_VALID) 780 continue; 781 782 pted->pted_va |= i; 783 784 /* Add a Page Table Entry, section 5.10.1.1. */ 785 pte[i].pte_hi = pted->pted_pte.pte_hi & ~PTE_VALID; 786 pte[i].pte_lo = pted->pted_pte.pte_lo; 787 eieio(); /* Order 1st PTE update before 2nd. */ 788 pte[i].pte_hi |= PTE_VALID; 789 ptesync(); /* Ensure updates completed. */ 790 791 goto out; 792 } 793 794 /* try fill of secondary hash */ 795 pte = pmap_ptable + (idx ^ pmap_ptab_mask) * 8; 796 for (i = 0; i < 8; i++) { 797 if (pte[i].pte_hi & PTE_VALID) 798 continue; 799 800 pted->pted_va |= (i | PTED_VA_HID_M); 801 802 /* Add a Page Table Entry, section 5.10.1.1. */ 803 pte[i].pte_hi = pted->pted_pte.pte_hi & ~PTE_VALID; 804 pte[i].pte_lo = pted->pted_pte.pte_lo; 805 eieio(); /* Order 1st PTE update before 2nd. */ 806 pte[i].pte_hi |= (PTE_HID|PTE_VALID); 807 ptesync(); /* Ensure updates completed. */ 808 809 goto out; 810 } 811 812 /* need decent replacement algorithm */ 813 off = mftb(); 814 815 for (try = 0; try < 16; try++) { 816 pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M); 817 pted->pted_va |= off & (PTED_VA_PTEGIDX_M|PTED_VA_HID_M); 818 819 pte = pmap_ptable; 820 pte += (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)) * 8; 821 pte += PTED_PTEGIDX(pted); /* increment by index into pteg */ 822 823 if ((pte->pte_hi & PTE_WIRED) == 0) 824 break; 825 826 off++; 827 } 828 /* 829 * Since we only wire unmanaged kernel mappings, we should 830 * always find a slot that we can replace. 831 */ 832 KASSERT(try < 16); 833 834 if (pte->pte_hi & PTE_VALID) { 835 uint64_t avpn, vpn; 836 837 avpn = pte->pte_hi & PTE_AVPN; 838 vsid = avpn >> PTE_VSID_SHIFT; 839 vpn = avpn << (ADDR_VSID_SHIFT - PTE_VSID_SHIFT - PAGE_SHIFT); 840 841 idx ^= (PTED_HID(pted) ? pmap_ptab_mask : 0); 842 idx ^= ((pte->pte_hi & PTE_HID) ? pmap_ptab_mask : 0); 843 vpn |= ((idx ^ vsid) & (ADDR_PIDX >> ADDR_PIDX_SHIFT)); 844 845 pte_del(pte, vpn << PAGE_SHIFT); 846 } 847 848 /* Add a Page Table Entry, section 5.10.1.1. */ 849 pte->pte_hi = pted->pted_pte.pte_hi & ~PTE_VALID; 850 if (PTED_HID(pted)) 851 pte->pte_hi |= PTE_HID; 852 pte->pte_lo = pted->pted_pte.pte_lo; 853 eieio(); /* Order 1st PTE update before 2nd. */ 854 pte->pte_hi |= PTE_VALID; 855 ptesync(); /* Ensure updates completed. */ 856 857 out: 858 PMAP_HASH_UNLOCK(s); 859 } 860 861 void 862 pmap_remove_pted(pmap_t pm, struct pte_desc *pted) 863 { 864 struct pte *pte; 865 int s; 866 867 KASSERT(pm == pted->pted_pmap); 868 PMAP_VP_ASSERT_LOCKED(pm); 869 870 pm->pm_stats.resident_count--; 871 872 if (PTED_WIRED(pted)) { 873 pm->pm_stats.wired_count--; 874 pted->pted_va &= ~PTED_VA_WIRED_M; 875 } 876 877 PMAP_HASH_LOCK(s); 878 if ((pte = pmap_ptedinhash(pted)) != NULL) 879 pte_zap(pte, pted); 880 PMAP_HASH_UNLOCK(s); 881 882 pted->pted_va &= ~PTED_VA_EXEC_M; 883 pted->pted_pte.pte_hi &= ~PTE_VALID; 884 885 if (PTED_MANAGED(pted)) 886 pmap_remove_pv(pted); 887 888 pmap_vp_remove(pm, pted->pted_va); 889 pool_put(&pmap_pted_pool, pted); 890 } 891 892 extern struct fdt_reg memreg[]; 893 extern int nmemreg; 894 895 #ifdef DDB 896 extern struct fdt_reg initrd_reg; 897 #endif 898 899 void memreg_add(const struct fdt_reg *); 900 void memreg_remove(const struct fdt_reg *); 901 902 vaddr_t vmmap; 903 vaddr_t zero_page; 904 vaddr_t copy_src_page; 905 vaddr_t copy_dst_page; 906 vaddr_t virtual_avail = VM_MIN_KERNEL_ADDRESS; 907 908 void * 909 pmap_steal_avail(size_t size, size_t align) 910 { 911 struct fdt_reg reg; 912 uint64_t start, end; 913 int i; 914 915 for (i = 0; i < nmemreg; i++) { 916 if (memreg[i].size > size) { 917 start = (memreg[i].addr + (align - 1)) & ~(align - 1); 918 end = start + size; 919 if (end <= memreg[i].addr + memreg[i].size) { 920 reg.addr = start; 921 reg.size = end - start; 922 memreg_remove(®); 923 return (void *)start; 924 } 925 } 926 } 927 panic("can't allocate"); 928 } 929 930 void 931 pmap_virtual_space(vaddr_t *start, vaddr_t *end) 932 { 933 *start = virtual_avail; 934 *end = VM_MAX_KERNEL_ADDRESS; 935 } 936 937 pmap_t 938 pmap_create(void) 939 { 940 pmap_t pm; 941 942 pm = pool_get(&pmap_pmap_pool, PR_WAITOK | PR_ZERO); 943 pm->pm_refs = 1; 944 PMAP_VP_LOCK_INIT(pm); 945 LIST_INIT(&pm->pm_slbd); 946 return pm; 947 } 948 949 /* 950 * Add a reference to a given pmap. 951 */ 952 void 953 pmap_reference(pmap_t pm) 954 { 955 atomic_inc_int(&pm->pm_refs); 956 } 957 958 /* 959 * Retire the given pmap from service. 960 * Should only be called if the map contains no valid mappings. 961 */ 962 void 963 pmap_destroy(pmap_t pm) 964 { 965 int refs; 966 967 refs = atomic_dec_int_nv(&pm->pm_refs); 968 if (refs > 0) 969 return; 970 971 /* 972 * reference count is zero, free pmap resources and free pmap. 973 */ 974 pmap_release(pm); 975 pool_put(&pmap_pmap_pool, pm); 976 } 977 978 /* 979 * Release any resources held by the given physical map. 980 * Called when a pmap initialized by pmap_pinit is being released. 981 */ 982 void 983 pmap_release(pmap_t pm) 984 { 985 pmap_vp_destroy(pm); 986 } 987 988 void 989 pmap_vp_destroy(pmap_t pm) 990 { 991 struct slb_desc *slbd; 992 struct pmapvp1 *vp1; 993 struct pmapvp2 *vp2; 994 struct pte_desc *pted; 995 int i, j; 996 997 while ((slbd = LIST_FIRST(&pm->pm_slbd))) { 998 vp1 = slbd->slbd_vp; 999 if (vp1) { 1000 for (i = 0; i < VP_IDX1_CNT; i++) { 1001 vp2 = vp1->vp[i]; 1002 if (vp2 == NULL) 1003 continue; 1004 1005 for (j = 0; j < VP_IDX2_CNT; j++) { 1006 pted = vp2->vp[j]; 1007 if (pted == NULL) 1008 continue; 1009 1010 pool_put(&pmap_pted_pool, pted); 1011 } 1012 pool_put(&pmap_vp_pool, vp2); 1013 } 1014 pool_put(&pmap_vp_pool, vp1); 1015 } 1016 1017 LIST_REMOVE(slbd, slbd_list); 1018 pmap_free_vsid(slbd->slbd_vsid); 1019 pool_put(&pmap_slbd_pool, slbd); 1020 } 1021 } 1022 1023 void 1024 pmap_init(void) 1025 { 1026 int i; 1027 1028 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_VM, 0, 1029 "pmap", &pool_allocator_single); 1030 pool_setlowat(&pmap_pmap_pool, 2); 1031 pool_init(&pmap_vp_pool, sizeof(struct pmapvp1), 0, IPL_VM, 0, 1032 "vp", &pool_allocator_single); 1033 pool_setlowat(&pmap_vp_pool, 10); 1034 pool_init(&pmap_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0, 1035 "pted", NULL); 1036 pool_setlowat(&pmap_pted_pool, 20); 1037 pool_init(&pmap_slbd_pool, sizeof(struct slb_desc), 0, IPL_VM, 0, 1038 "slbd", NULL); 1039 pool_setlowat(&pmap_slbd_pool, 5); 1040 1041 LIST_INIT(&pmap_kernel()->pm_slbd); 1042 for (i = 0; i < nitems(kernel_slb_desc); i++) { 1043 LIST_INSERT_HEAD(&pmap_kernel()->pm_slbd, 1044 &kernel_slb_desc[i], slbd_list); 1045 } 1046 1047 pmap_initialized = 1; 1048 } 1049 1050 int 1051 pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) 1052 { 1053 struct pte_desc *pted; 1054 struct vm_page *pg; 1055 int cache = PMAP_CACHE_WB; 1056 int need_sync = 0; 1057 int error = 0; 1058 1059 if (pa & PMAP_NOCACHE) 1060 cache = PMAP_CACHE_CI; 1061 pg = PHYS_TO_VM_PAGE(pa); 1062 if (!pmap_initialized) 1063 printf("%s\n", __func__); 1064 1065 PMAP_VP_LOCK(pm); 1066 pted = pmap_vp_lookup(pm, va); 1067 if (pted && PTED_VALID(pted)) { 1068 pmap_remove_pted(pm, pted); 1069 pted = NULL; 1070 } 1071 1072 pm->pm_stats.resident_count++; 1073 1074 /* Do not have pted for this, get one and put it in VP */ 1075 if (pted == NULL) { 1076 pted = pool_get(&pmap_pted_pool, PR_NOWAIT | PR_ZERO); 1077 if (pted == NULL) { 1078 if ((flags & PMAP_CANFAIL) == 0) 1079 panic("%s: failed to allocate pted", __func__); 1080 error = ENOMEM; 1081 goto out; 1082 } 1083 if (pmap_vp_enter(pm, va, pted, flags)) { 1084 if ((flags & PMAP_CANFAIL) == 0) 1085 panic("%s: failed to allocate L2/L3", __func__); 1086 error = ENOMEM; 1087 pool_put(&pmap_pted_pool, pted); 1088 goto out; 1089 } 1090 } 1091 1092 if ((flags & PROT_WRITE) == 0) 1093 prot &= ~PROT_WRITE; 1094 1095 pmap_fill_pte(pm, va, pa, pted, prot, cache); 1096 if (flags & PMAP_WIRED) { 1097 pted->pted_va |= PTED_VA_WIRED_M; 1098 pm->pm_stats.wired_count++; 1099 } 1100 1101 if (pg != NULL) { 1102 pmap_enter_pv(pted, pg); /* only managed mem */ 1103 1104 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF); 1105 if (flags & PROT_WRITE) 1106 atomic_setbits_int(&pg->pg_flags, PG_PMAP_MOD); 1107 1108 if ((pg->pg_flags & PG_DEV) == 0 && cache != PMAP_CACHE_WB) 1109 pmap_flush_page(pg); 1110 } 1111 1112 pte_insert(pted); 1113 1114 if (prot & PROT_EXEC) { 1115 if (pg != NULL) { 1116 need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0); 1117 if (prot & PROT_WRITE) 1118 atomic_clearbits_int(&pg->pg_flags, 1119 PG_PMAP_EXE); 1120 else 1121 atomic_setbits_int(&pg->pg_flags, 1122 PG_PMAP_EXE); 1123 } else 1124 need_sync = 1; 1125 } else { 1126 /* 1127 * Should we be paranoid about writeable non-exec 1128 * mappings ? if so, clear the exec tag 1129 */ 1130 if ((prot & PROT_WRITE) && (pg != NULL)) 1131 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 1132 } 1133 1134 if (need_sync) 1135 pmap_pted_syncicache(pted); 1136 1137 out: 1138 PMAP_VP_UNLOCK(pm); 1139 return error; 1140 } 1141 1142 void 1143 pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva) 1144 { 1145 struct pte_desc *pted; 1146 vaddr_t va; 1147 1148 PMAP_VP_LOCK(pm); 1149 for (va = sva; va < eva; va += PAGE_SIZE) { 1150 pted = pmap_vp_lookup(pm, va); 1151 if (pted && PTED_VALID(pted)) 1152 pmap_remove_pted(pm, pted); 1153 } 1154 PMAP_VP_UNLOCK(pm); 1155 } 1156 1157 void 1158 pmap_pted_syncicache(struct pte_desc *pted) 1159 { 1160 paddr_t pa = pted->pted_pte.pte_lo & PTE_RPGN; 1161 vaddr_t va = pted->pted_va & ~PAGE_MASK; 1162 1163 if (pted->pted_pmap != pmap_kernel()) { 1164 va = zero_page + cpu_number() * PAGE_SIZE; 1165 pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE); 1166 } 1167 1168 __syncicache((void *)va, PAGE_SIZE); 1169 1170 if (pted->pted_pmap != pmap_kernel()) 1171 pmap_kremove(va, PAGE_SIZE); 1172 } 1173 1174 void 1175 pmap_pted_ro(struct pte_desc *pted, vm_prot_t prot) 1176 { 1177 struct vm_page *pg; 1178 struct pte *pte; 1179 int s; 1180 1181 pg = PHYS_TO_VM_PAGE(pted->pted_pte.pte_lo & PTE_RPGN); 1182 if (pg->pg_flags & PG_PMAP_EXE) { 1183 if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_WRITE) 1184 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 1185 else 1186 pmap_pted_syncicache(pted); 1187 } 1188 1189 pted->pted_pte.pte_lo &= ~PTE_PP; 1190 pted->pted_pte.pte_lo |= PTE_RO; 1191 1192 if ((prot & PROT_EXEC) == 0) 1193 pted->pted_pte.pte_lo |= PTE_N; 1194 1195 if ((prot & (PROT_READ | PROT_WRITE)) == 0) 1196 pted->pted_pte.pte_lo |= PTE_AC; 1197 1198 PMAP_HASH_LOCK(s); 1199 if ((pte = pmap_ptedinhash(pted)) != NULL) { 1200 pte_del(pte, pmap_pted2ava(pted)); 1201 1202 /* Add a Page Table Entry, section 5.10.1.1. */ 1203 pte->pte_lo = pted->pted_pte.pte_lo; 1204 eieio(); /* Order 1st PTE update before 2nd. */ 1205 pte->pte_hi |= PTE_VALID; 1206 ptesync(); /* Ensure updates completed. */ 1207 } 1208 PMAP_HASH_UNLOCK(s); 1209 } 1210 1211 /* 1212 * Lower the protection on the specified physical page. 1213 * 1214 * There are only two cases, either the protection is going to 0, 1215 * or it is going to read-only. 1216 */ 1217 void 1218 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 1219 { 1220 struct pte_desc *pted; 1221 void *pte; 1222 pmap_t pm; 1223 int s; 1224 1225 if (prot == PROT_NONE) { 1226 mtx_enter(&pg->mdpage.pv_mtx); 1227 while ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) != NULL) { 1228 pmap_reference(pted->pted_pmap); 1229 pm = pted->pted_pmap; 1230 mtx_leave(&pg->mdpage.pv_mtx); 1231 1232 PMAP_VP_LOCK(pm); 1233 1234 /* 1235 * We dropped the pvlist lock before grabbing 1236 * the pmap lock to avoid lock ordering 1237 * problems. This means we have to check the 1238 * pvlist again since somebody else might have 1239 * modified it. All we care about is that the 1240 * pvlist entry matches the pmap we just 1241 * locked. If it doesn't, unlock the pmap and 1242 * try again. 1243 */ 1244 mtx_enter(&pg->mdpage.pv_mtx); 1245 if ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) == NULL || 1246 pted->pted_pmap != pm) { 1247 mtx_leave(&pg->mdpage.pv_mtx); 1248 PMAP_VP_UNLOCK(pm); 1249 pmap_destroy(pm); 1250 mtx_enter(&pg->mdpage.pv_mtx); 1251 continue; 1252 } 1253 1254 PMAP_HASH_LOCK(s); 1255 if ((pte = pmap_ptedinhash(pted)) != NULL) 1256 pte_zap(pte, pted); 1257 PMAP_HASH_UNLOCK(s); 1258 1259 pted->pted_va &= ~PTED_VA_MANAGED_M; 1260 LIST_REMOVE(pted, pted_pv_list); 1261 mtx_leave(&pg->mdpage.pv_mtx); 1262 1263 pmap_remove_pted(pm, pted); 1264 1265 PMAP_VP_UNLOCK(pm); 1266 pmap_destroy(pm); 1267 mtx_enter(&pg->mdpage.pv_mtx); 1268 } 1269 mtx_leave(&pg->mdpage.pv_mtx); 1270 /* page is being reclaimed, sync icache next use */ 1271 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 1272 return; 1273 } 1274 1275 mtx_enter(&pg->mdpage.pv_mtx); 1276 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) 1277 pmap_pted_ro(pted, prot); 1278 mtx_leave(&pg->mdpage.pv_mtx); 1279 } 1280 1281 void 1282 pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1283 { 1284 if (prot & (PROT_READ | PROT_EXEC)) { 1285 struct pte_desc *pted; 1286 1287 PMAP_VP_LOCK(pm); 1288 while (sva < eva) { 1289 pted = pmap_vp_lookup(pm, sva); 1290 if (pted && PTED_VALID(pted)) 1291 pmap_pted_ro(pted, prot); 1292 sva += PAGE_SIZE; 1293 } 1294 PMAP_VP_UNLOCK(pm); 1295 return; 1296 } 1297 pmap_remove(pm, sva, eva); 1298 } 1299 1300 void 1301 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) 1302 { 1303 pmap_t pm = pmap_kernel(); 1304 struct pte_desc pted; 1305 struct vm_page *pg; 1306 int cache = (pa & PMAP_NOCACHE) ? PMAP_CACHE_CI : PMAP_CACHE_WB; 1307 1308 pm->pm_stats.resident_count++; 1309 1310 if (prot & PROT_WRITE) { 1311 pg = PHYS_TO_VM_PAGE(pa); 1312 if (pg != NULL) 1313 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 1314 } 1315 1316 /* Calculate PTE */ 1317 pmap_fill_pte(pm, va, pa, &pted, prot, cache); 1318 pted.pted_pte.pte_hi |= PTE_WIRED; 1319 1320 /* Insert into HTAB */ 1321 pte_insert(&pted); 1322 } 1323 1324 void 1325 pmap_kremove(vaddr_t va, vsize_t len) 1326 { 1327 pmap_t pm = pmap_kernel(); 1328 vaddr_t eva = va + len; 1329 struct pte *pte; 1330 uint64_t vsid; 1331 int s; 1332 1333 while (va < eva) { 1334 vsid = pmap_kernel_vsid(va >> ADDR_ESID_SHIFT); 1335 1336 PMAP_HASH_LOCK(s); 1337 pte = pte_lookup(vsid, va); 1338 if (pte) 1339 pte_del(pte, pmap_ava(vsid, va)); 1340 PMAP_HASH_UNLOCK(s); 1341 1342 if (pte) 1343 pm->pm_stats.resident_count--; 1344 1345 va += PAGE_SIZE; 1346 } 1347 } 1348 1349 int 1350 pmap_is_referenced(struct vm_page *pg) 1351 { 1352 return ((pg->pg_flags & PG_PMAP_REF) != 0); 1353 } 1354 1355 int 1356 pmap_is_modified(struct vm_page *pg) 1357 { 1358 return ((pg->pg_flags & PG_PMAP_MOD) != 0); 1359 } 1360 1361 int 1362 pmap_clear_reference(struct vm_page *pg) 1363 { 1364 struct pte_desc *pted; 1365 int s; 1366 1367 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_REF); 1368 1369 mtx_enter(&pg->mdpage.pv_mtx); 1370 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) { 1371 struct pte *pte; 1372 1373 PMAP_HASH_LOCK(s); 1374 if ((pte = pmap_ptedinhash(pted)) != NULL) 1375 pte_zap(pte, pted); 1376 PMAP_HASH_UNLOCK(s); 1377 } 1378 mtx_leave(&pg->mdpage.pv_mtx); 1379 1380 return 0; 1381 } 1382 1383 int 1384 pmap_clear_modify(struct vm_page *pg) 1385 { 1386 struct pte_desc *pted; 1387 int s; 1388 1389 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_MOD); 1390 1391 mtx_enter(&pg->mdpage.pv_mtx); 1392 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) { 1393 struct pte *pte; 1394 1395 pted->pted_pte.pte_lo &= ~PTE_PP; 1396 pted->pted_pte.pte_lo |= PTE_RO; 1397 1398 PMAP_HASH_LOCK(s); 1399 if ((pte = pmap_ptedinhash(pted)) != NULL) { 1400 pte_zap(pte, pted); 1401 1402 /* Add a Page Table Entry, section 5.10.1.1. */ 1403 pte->pte_lo = pted->pted_pte.pte_lo; 1404 eieio(); /* Order 1st PTE update before 2nd. */ 1405 pte->pte_hi |= PTE_VALID; 1406 ptesync(); /* Ensure updates completed. */ 1407 } 1408 PMAP_HASH_UNLOCK(s); 1409 } 1410 mtx_leave(&pg->mdpage.pv_mtx); 1411 1412 return 0; 1413 } 1414 1415 int 1416 pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pa) 1417 { 1418 struct pte *pte; 1419 uint64_t vsid; 1420 int s; 1421 1422 if (pm == pmap_kernel() && 1423 va >= (vaddr_t)_start && va < (vaddr_t)_end) { 1424 *pa = va; 1425 return 1; 1426 } 1427 1428 PMAP_VP_LOCK(pm); 1429 vsid = pmap_va2vsid(pm, va); 1430 PMAP_VP_UNLOCK(pm); 1431 if (vsid == 0) 1432 return 0; 1433 1434 PMAP_HASH_LOCK(s); 1435 pte = pte_lookup(vsid, va); 1436 if (pte) 1437 *pa = (pte->pte_lo & PTE_RPGN) | (va & PAGE_MASK); 1438 PMAP_HASH_UNLOCK(s); 1439 1440 return (pte != NULL); 1441 } 1442 1443 void 1444 pmap_activate(struct proc *p) 1445 { 1446 } 1447 1448 void 1449 pmap_deactivate(struct proc *p) 1450 { 1451 } 1452 1453 void 1454 pmap_unwire(pmap_t pm, vaddr_t va) 1455 { 1456 struct pte_desc *pted; 1457 1458 PMAP_VP_LOCK(pm); 1459 pted = pmap_vp_lookup(pm, va); 1460 if (pted && PTED_WIRED(pted)) { 1461 pm->pm_stats.wired_count--; 1462 pted->pted_va &= ~PTED_VA_WIRED_M; 1463 } 1464 PMAP_VP_UNLOCK(pm); 1465 } 1466 1467 void 1468 pmap_zero_page(struct vm_page *pg) 1469 { 1470 paddr_t pa = VM_PAGE_TO_PHYS(pg); 1471 paddr_t va = zero_page + cpu_number() * PAGE_SIZE; 1472 int offset; 1473 1474 pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE); 1475 for (offset = 0; offset < PAGE_SIZE; offset += cacheline_size) 1476 __asm volatile ("dcbz 0, %0" :: "r"(va + offset)); 1477 pmap_kremove(va, PAGE_SIZE); 1478 } 1479 1480 void 1481 pmap_flush_page(struct vm_page *pg) 1482 { 1483 paddr_t pa = VM_PAGE_TO_PHYS(pg); 1484 paddr_t va = zero_page + cpu_number() * PAGE_SIZE; 1485 int offset; 1486 1487 pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE); 1488 for (offset = 0; offset < PAGE_SIZE; offset += cacheline_size) 1489 __asm volatile ("dcbf 0, %0" :: "r"(va + offset)); 1490 pmap_kremove(va, PAGE_SIZE); 1491 } 1492 1493 void 1494 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) 1495 { 1496 paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg); 1497 paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg); 1498 vaddr_t srcva = copy_src_page + cpu_number() * PAGE_SIZE; 1499 vaddr_t dstva = copy_dst_page + cpu_number() * PAGE_SIZE; 1500 1501 pmap_kenter_pa(srcva, srcpa, PROT_READ); 1502 pmap_kenter_pa(dstva, dstpa, PROT_READ | PROT_WRITE); 1503 memcpy((void *)dstva, (void *)srcva, PAGE_SIZE); 1504 pmap_kremove(srcva, PAGE_SIZE); 1505 pmap_kremove(dstva, PAGE_SIZE); 1506 } 1507 1508 void 1509 pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len) 1510 { 1511 paddr_t pa; 1512 vaddr_t cva; 1513 vsize_t clen; 1514 1515 while (len > 0) { 1516 /* add one to always round up to the next page */ 1517 clen = round_page(va + 1) - va; 1518 if (clen > len) 1519 clen = len; 1520 1521 if (pmap_extract(pr->ps_vmspace->vm_map.pmap, va, &pa)) { 1522 cva = zero_page + cpu_number() * PAGE_SIZE; 1523 pmap_kenter_pa(cva, pa, PROT_READ | PROT_WRITE); 1524 __syncicache((void *)cva, clen); 1525 pmap_kremove(cva, PAGE_SIZE); 1526 } 1527 1528 len -= clen; 1529 va += clen; 1530 } 1531 } 1532 1533 void 1534 pmap_set_kernel_slb(vaddr_t va) 1535 { 1536 uint64_t esid; 1537 int idx; 1538 1539 esid = va >> ADDR_ESID_SHIFT; 1540 1541 for (idx = 0; idx < nitems(kernel_slb_desc); idx++) { 1542 if (kernel_slb_desc[idx].slbd_vsid == 0) 1543 break; 1544 if (kernel_slb_desc[idx].slbd_esid == esid) 1545 return; 1546 } 1547 KASSERT(idx < nitems(kernel_slb_desc)); 1548 1549 kernel_slb_desc[idx].slbd_esid = esid; 1550 kernel_slb_desc[idx].slbd_vsid = pmap_kernel_vsid(esid); 1551 } 1552 1553 /* 1554 * Handle SLB entry spills for the kernel. This function runs without 1555 * belt and suspenders in real-mode on a small per-CPU stack. 1556 */ 1557 void 1558 pmap_spill_kernel_slb(vaddr_t va) 1559 { 1560 struct cpu_info *ci = curcpu(); 1561 uint64_t esid; 1562 uint64_t slbe, slbv; 1563 int idx; 1564 1565 esid = va >> ADDR_ESID_SHIFT; 1566 1567 for (idx = 0; idx < 31; idx++) { 1568 if (ci->ci_kernel_slb[idx].slb_slbe == 0) 1569 break; 1570 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx; 1571 if (ci->ci_kernel_slb[idx].slb_slbe == slbe) 1572 return; 1573 } 1574 1575 /* 1576 * If no free slot was found, randomly replace an entry in 1577 * slot 15-30. 1578 */ 1579 if (idx == 31) 1580 idx = 15 + mftb() % 16; 1581 1582 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx; 1583 slbv = pmap_kernel_vsid(esid) << SLBV_VSID_SHIFT; 1584 1585 ci->ci_kernel_slb[idx].slb_slbe = slbe; 1586 ci->ci_kernel_slb[idx].slb_slbv = slbv; 1587 } 1588 1589 void 1590 pmap_bootstrap_cpu(void) 1591 { 1592 struct cpu_info *ci = curcpu(); 1593 uint64_t esid, vsid; 1594 uint64_t slbe, slbv; 1595 int idx; 1596 1597 /* Clear SLB. */ 1598 slbia(); 1599 slbie(slbmfee(0)); 1600 1601 /* Clear TLB. */ 1602 tlbia(); 1603 1604 if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { 1605 /* Set partition table. */ 1606 mtptcr((paddr_t)pmap_pat | PATSIZE); 1607 } else { 1608 /* Set page table. */ 1609 mtsdr1((paddr_t)pmap_ptable | HTABSIZE); 1610 } 1611 1612 /* Load SLB. */ 1613 for (idx = 0; idx < 31; idx++) { 1614 if (kernel_slb_desc[idx].slbd_vsid == 0) 1615 break; 1616 1617 esid = kernel_slb_desc[idx].slbd_esid; 1618 vsid = kernel_slb_desc[idx].slbd_vsid; 1619 1620 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx; 1621 slbv = vsid << SLBV_VSID_SHIFT; 1622 slbmte(slbv, slbe); 1623 1624 ci->ci_kernel_slb[idx].slb_slbe = slbe; 1625 ci->ci_kernel_slb[idx].slb_slbv = slbv; 1626 } 1627 } 1628 1629 void 1630 pmap_bootstrap(void) 1631 { 1632 paddr_t start, end, pa; 1633 vm_prot_t prot; 1634 vaddr_t va; 1635 1636 #define HTABENTS 2048 1637 1638 pmap_ptab_cnt = HTABENTS; 1639 while (pmap_ptab_cnt * 2 < physmem) 1640 pmap_ptab_cnt <<= 1; 1641 1642 /* Make sure the page tables don't use more than 8 SLB entries. */ 1643 while (HTABMEMSZ > 8 * SEGMENT_SIZE) 1644 pmap_ptab_cnt >>= 1; 1645 1646 /* 1647 * allocate suitably aligned memory for HTAB 1648 */ 1649 pmap_ptable = pmap_steal_avail(HTABMEMSZ, HTABMEMSZ); 1650 memset(pmap_ptable, 0, HTABMEMSZ); 1651 pmap_ptab_mask = pmap_ptab_cnt - 1; 1652 1653 /* Map page tables. */ 1654 start = (paddr_t)pmap_ptable; 1655 end = start + HTABMEMSZ; 1656 for (pa = start; pa < end; pa += PAGE_SIZE) 1657 pmap_kenter_pa(pa, pa, PROT_READ | PROT_WRITE); 1658 1659 /* Map kernel. */ 1660 start = (paddr_t)_start; 1661 end = (paddr_t)_end; 1662 for (pa = start; pa < end; pa += PAGE_SIZE) { 1663 if (pa < (paddr_t)_etext) 1664 prot = PROT_READ | PROT_EXEC; 1665 else if (pa < (paddr_t)_erodata) 1666 prot = PROT_READ; 1667 else 1668 prot = PROT_READ | PROT_WRITE; 1669 pmap_kenter_pa(pa, pa, prot); 1670 } 1671 1672 #ifdef DDB 1673 /* Map initrd. */ 1674 start = initrd_reg.addr; 1675 end = initrd_reg.addr + initrd_reg.size; 1676 for (pa = start; pa < end; pa += PAGE_SIZE) 1677 pmap_kenter_pa(pa, pa, PROT_READ | PROT_WRITE); 1678 #endif 1679 1680 /* Allocate partition table. */ 1681 pmap_pat = pmap_steal_avail(PATMEMSZ, PATMEMSZ); 1682 memset(pmap_pat, 0, PATMEMSZ); 1683 pmap_pat[0].pate_htab = (paddr_t)pmap_ptable | HTABSIZE; 1684 1685 /* SLB entry for the kernel. */ 1686 pmap_set_kernel_slb((vaddr_t)_start); 1687 1688 /* SLB entries for the page tables. */ 1689 for (va = (vaddr_t)pmap_ptable; va < (vaddr_t)pmap_ptable + HTABMEMSZ; 1690 va += SEGMENT_SIZE) 1691 pmap_set_kernel_slb(va); 1692 1693 /* SLB entries for kernel VA. */ 1694 for (va = VM_MIN_KERNEL_ADDRESS; va < VM_MAX_KERNEL_ADDRESS; 1695 va += SEGMENT_SIZE) 1696 pmap_set_kernel_slb(va); 1697 1698 pmap_bootstrap_cpu(); 1699 1700 pmap_vsid[0] |= (1U << 0); 1701 #if VSID_VRMA < NUM_VSID 1702 pmap_vsid[VSID_VRMA / 32] |= (1U << (VSID_VRMA % 32)); 1703 #endif 1704 1705 vmmap = virtual_avail; 1706 virtual_avail += PAGE_SIZE; 1707 zero_page = virtual_avail; 1708 virtual_avail += MAXCPUS * PAGE_SIZE; 1709 copy_src_page = virtual_avail; 1710 virtual_avail += MAXCPUS * PAGE_SIZE; 1711 copy_dst_page = virtual_avail; 1712 virtual_avail += MAXCPUS * PAGE_SIZE; 1713 } 1714 1715 #ifdef DDB 1716 /* 1717 * DDB will edit the PTE to gain temporary write access to a page in 1718 * the read-only kernel text. 1719 */ 1720 struct pte * 1721 pmap_get_kernel_pte(vaddr_t va) 1722 { 1723 uint64_t vsid; 1724 1725 vsid = pmap_kernel_vsid(va >> ADDR_ESID_SHIFT); 1726 return pte_lookup(vsid, va); 1727 } 1728 #endif 1729