1 /* $OpenBSD: pmap.c,v 1.107 2025/01/25 12:29:35 kettenis Exp $ */ 2 /* 3 * Copyright (c) 2008-2009,2014-2016 Dale Rahn <drahn@dalerahn.com> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/param.h> 19 #include <sys/systm.h> 20 #include <sys/atomic.h> 21 #include <sys/pool.h> 22 #include <sys/proc.h> 23 24 #include <uvm/uvm.h> 25 26 #include <machine/cpufunc.h> 27 #include <machine/pmap.h> 28 29 #include <machine/db_machdep.h> 30 #include <ddb/db_extern.h> 31 #include <ddb/db_output.h> 32 33 void pmap_setttb(struct proc *p); 34 void pmap_allocate_asid(pmap_t); 35 void pmap_free_asid(pmap_t pm); 36 37 /* We run userland code with ASIDs that have the low bit set. */ 38 #define ASID_USER 1 39 40 static inline void 41 ttlb_flush(pmap_t pm, vaddr_t va) 42 { 43 vaddr_t resva; 44 45 resva = ((va >> PAGE_SHIFT) & ((1ULL << 44) - 1)); 46 if (pm == pmap_kernel()) { 47 cpu_tlb_flush_all_asid(resva); 48 } else { 49 resva |= (uint64_t)pm->pm_asid << 48; 50 cpu_tlb_flush_asid(resva); 51 resva |= (uint64_t)ASID_USER << 48; 52 cpu_tlb_flush_asid(resva); 53 } 54 } 55 56 struct pmap kernel_pmap_; 57 struct pmap pmap_tramp; 58 59 LIST_HEAD(pted_pv_head, pte_desc); 60 61 struct pte_desc { 62 LIST_ENTRY(pte_desc) pted_pv_list; 63 uint64_t pted_pte; 64 pmap_t pted_pmap; 65 vaddr_t pted_va; 66 }; 67 68 struct pmapvp0 { 69 uint64_t l0[VP_IDX0_CNT]; 70 struct pmapvp1 *vp[VP_IDX0_CNT]; 71 }; 72 73 struct pmapvp1 { 74 uint64_t l1[VP_IDX1_CNT]; 75 struct pmapvp2 *vp[VP_IDX1_CNT]; 76 }; 77 78 struct pmapvp2 { 79 uint64_t l2[VP_IDX2_CNT]; 80 struct pmapvp3 *vp[VP_IDX2_CNT]; 81 }; 82 83 struct pmapvp3 { 84 uint64_t l3[VP_IDX3_CNT]; 85 struct pte_desc *vp[VP_IDX3_CNT]; 86 }; 87 CTASSERT(sizeof(struct pmapvp0) == sizeof(struct pmapvp1)); 88 CTASSERT(sizeof(struct pmapvp0) == sizeof(struct pmapvp2)); 89 CTASSERT(sizeof(struct pmapvp0) == sizeof(struct pmapvp3)); 90 91 void pmap_vp_destroy(pmap_t pm); 92 93 /* Allocator for VP pool. */ 94 void *pmap_vp_page_alloc(struct pool *, int, int *); 95 void pmap_vp_page_free(struct pool *, void *); 96 97 struct pool_allocator pmap_vp_allocator = { 98 pmap_vp_page_alloc, pmap_vp_page_free, sizeof(struct pmapvp0) 99 }; 100 101 void pmap_remove_pted(pmap_t, struct pte_desc *); 102 void pmap_kremove_pg(vaddr_t); 103 void pmap_set_l1(struct pmap *, uint64_t, struct pmapvp1 *); 104 void pmap_set_l2(struct pmap *, uint64_t, struct pmapvp1 *, 105 struct pmapvp2 *); 106 void pmap_set_l3(struct pmap *, uint64_t, struct pmapvp2 *, 107 struct pmapvp3 *); 108 109 void pmap_fill_pte(pmap_t, vaddr_t, paddr_t, struct pte_desc *, 110 vm_prot_t, int, int); 111 void pmap_icache_sync_page(struct pmap *, paddr_t); 112 void pmap_pte_insert(struct pte_desc *); 113 void pmap_pte_remove(struct pte_desc *, int); 114 void pmap_pte_update(struct pte_desc *, uint64_t *); 115 void pmap_release(pmap_t); 116 paddr_t pmap_steal_avail(size_t, int, void **); 117 void pmap_remove_avail(paddr_t, paddr_t); 118 vaddr_t pmap_map_stolen(vaddr_t); 119 120 vaddr_t vmmap; 121 vaddr_t zero_page; 122 vaddr_t copy_src_page; 123 vaddr_t copy_dst_page; 124 125 struct pool pmap_pmap_pool; 126 struct pool pmap_pted_pool; 127 struct pool pmap_vp_pool; 128 129 int pmap_initialized = 0; 130 131 struct mem_region { 132 vaddr_t start; 133 vsize_t size; 134 }; 135 136 struct mem_region pmap_avail_regions[10]; 137 struct mem_region pmap_allocated_regions[10]; 138 struct mem_region *pmap_avail = &pmap_avail_regions[0]; 139 struct mem_region *pmap_allocated = &pmap_allocated_regions[0]; 140 int pmap_cnt_avail, pmap_cnt_allocated; 141 uint64_t pmap_avail_kvo; 142 143 static inline void 144 pmap_lock(struct pmap *pmap) 145 { 146 if (pmap != pmap_kernel()) 147 mtx_enter(&pmap->pm_mtx); 148 } 149 150 static inline void 151 pmap_unlock(struct pmap *pmap) 152 { 153 if (pmap != pmap_kernel()) 154 mtx_leave(&pmap->pm_mtx); 155 } 156 157 #define PMAP_ASSERT_LOCKED(pmap) \ 158 if ((pmap) != pmap_kernel()) \ 159 MUTEX_ASSERT_LOCKED(&(pmap)->pm_mtx); 160 161 /* virtual to physical helpers */ 162 static inline int 163 VP_IDX0(vaddr_t va) 164 { 165 return (va >> VP_IDX0_POS) & VP_IDX0_MASK; 166 } 167 168 static inline int 169 VP_IDX1(vaddr_t va) 170 { 171 return (va >> VP_IDX1_POS) & VP_IDX1_MASK; 172 } 173 174 static inline int 175 VP_IDX2(vaddr_t va) 176 { 177 return (va >> VP_IDX2_POS) & VP_IDX2_MASK; 178 } 179 180 static inline int 181 VP_IDX3(vaddr_t va) 182 { 183 return (va >> VP_IDX3_POS) & VP_IDX3_MASK; 184 } 185 186 const uint64_t ap_bits_user[8] = { 187 [PROT_NONE] = 0, 188 [PROT_READ] = ATTR_PXN|ATTR_UXN|ATTR_AF|ATTR_AP(3), 189 [PROT_WRITE] = ATTR_PXN|ATTR_UXN|ATTR_AF|ATTR_AP(1), 190 [PROT_WRITE|PROT_READ] = ATTR_PXN|ATTR_UXN|ATTR_AF|ATTR_AP(1), 191 [PROT_EXEC] = ATTR_PXN|ATTR_AF|ATTR_AP(2), 192 [PROT_EXEC|PROT_READ] = ATTR_PXN|ATTR_AF|ATTR_AP(3), 193 [PROT_EXEC|PROT_WRITE] = ATTR_PXN|ATTR_AF|ATTR_AP(1), 194 [PROT_EXEC|PROT_WRITE|PROT_READ] = ATTR_PXN|ATTR_AF|ATTR_AP(1), 195 }; 196 197 const uint64_t ap_bits_kern[8] = { 198 [PROT_NONE] = 0, 199 [PROT_READ] = ATTR_PXN|ATTR_UXN|ATTR_AF|ATTR_AP(2), 200 [PROT_WRITE] = ATTR_PXN|ATTR_UXN|ATTR_AF|ATTR_AP(0), 201 [PROT_WRITE|PROT_READ] = ATTR_PXN|ATTR_UXN|ATTR_AF|ATTR_AP(0), 202 [PROT_EXEC] = ATTR_UXN|ATTR_AF|ATTR_AP(2), 203 [PROT_EXEC|PROT_READ] = ATTR_UXN|ATTR_AF|ATTR_AP(2), 204 [PROT_EXEC|PROT_WRITE] = ATTR_UXN|ATTR_AF|ATTR_AP(0), 205 [PROT_EXEC|PROT_WRITE|PROT_READ] = ATTR_UXN|ATTR_AF|ATTR_AP(0), 206 }; 207 208 /* 209 * We allocate ASIDs in pairs. The first ASID is used to run the 210 * kernel and has both userland and the full kernel mapped. The 211 * second ASID is used for running userland and has only the 212 * trampoline page mapped in addition to userland. 213 */ 214 215 #define PMAP_MAX_NASID (1 << 16) 216 #define PMAP_ASID_MASK (PMAP_MAX_NASID - 1) 217 int pmap_nasid = (1 << 8); 218 219 uint32_t pmap_asid[PMAP_MAX_NASID / 32]; 220 unsigned long pmap_asid_gen = PMAP_MAX_NASID; 221 struct mutex pmap_asid_mtx = MUTEX_INITIALIZER(IPL_HIGH); 222 223 int 224 pmap_find_asid(pmap_t pm) 225 { 226 uint32_t bits; 227 int asid, bit; 228 int retry; 229 230 MUTEX_ASSERT_LOCKED(&pmap_asid_mtx); 231 232 /* Attempt to re-use the old ASID. */ 233 asid = pm->pm_asid & PMAP_ASID_MASK; 234 bit = asid & (32 - 1); 235 bits = pmap_asid[asid / 32]; 236 if ((bits & (3U << bit)) == 0) 237 return asid; 238 239 /* Attempt to obtain a random ASID. */ 240 for (retry = 5; retry > 0; retry--) { 241 asid = arc4random() & (pmap_nasid - 2); 242 bit = (asid & (32 - 1)); 243 bits = pmap_asid[asid / 32]; 244 if ((bits & (3U << bit)) == 0) 245 return asid; 246 } 247 248 /* Do a linear search if that fails. */ 249 for (asid = 0; asid < pmap_nasid; asid += 32) { 250 bits = pmap_asid[asid / 32]; 251 if (bits == ~0) 252 continue; 253 for (bit = 0; bit < 32; bit += 2) { 254 if ((bits & (3U << bit)) == 0) 255 return asid + bit; 256 } 257 } 258 259 return -1; 260 } 261 262 int 263 pmap_rollover_asid(pmap_t pm) 264 { 265 struct cpu_info *ci; 266 CPU_INFO_ITERATOR cii; 267 unsigned long gen; 268 int asid, bit; 269 270 MUTEX_ASSERT_LOCKED(&pmap_asid_mtx); 271 272 /* Start a new generation. Mark ASID 0 as in-use again. */ 273 gen = atomic_add_long_nv(&pmap_asid_gen, PMAP_MAX_NASID); 274 memset(pmap_asid, 0, (pmap_nasid / 32) * sizeof(uint32_t)); 275 pmap_asid[0] |= (3U << 0); 276 277 /* 278 * Carry over all the ASIDs that are currently active into the 279 * new generation and reserve them. 280 * CPUs in cpu_switchto() will spin in pmap_setttb() waiting for 281 * the mutex. In that case an old ASID will be carried over but 282 * that is not problematic. 283 */ 284 CPU_INFO_FOREACH(cii, ci) { 285 asid = ci->ci_curpm->pm_asid & PMAP_ASID_MASK; 286 ci->ci_curpm->pm_asid = asid | gen; 287 bit = (asid & (32 - 1)); 288 pmap_asid[asid / 32] |= (3U << bit); 289 } 290 291 /* Flush the TLBs on all CPUs. */ 292 cpu_tlb_flush(); 293 294 if ((pm->pm_asid & ~PMAP_ASID_MASK) == gen) 295 return pm->pm_asid & PMAP_ASID_MASK; 296 297 return pmap_find_asid(pm); 298 } 299 300 void 301 pmap_allocate_asid(pmap_t pm) 302 { 303 int asid, bit; 304 305 mtx_enter(&pmap_asid_mtx); 306 asid = pmap_find_asid(pm); 307 if (asid == -1) { 308 /* 309 * We have no free ASIDs. Do a rollover to clear all 310 * inactive ASIDs and pick a fresh one. 311 */ 312 asid = pmap_rollover_asid(pm); 313 } 314 KASSERT(asid > 0 && asid < pmap_nasid); 315 bit = asid & (32 - 1); 316 pmap_asid[asid / 32] |= (3U << bit); 317 pm->pm_asid = asid | pmap_asid_gen; 318 mtx_leave(&pmap_asid_mtx); 319 } 320 321 void 322 pmap_free_asid(pmap_t pm) 323 { 324 int asid, bit; 325 326 KASSERT(pm != curcpu()->ci_curpm); 327 cpu_tlb_flush_asid_all((uint64_t)pm->pm_asid << 48); 328 cpu_tlb_flush_asid_all((uint64_t)(pm->pm_asid | ASID_USER) << 48); 329 330 mtx_enter(&pmap_asid_mtx); 331 if ((pm->pm_asid & ~PMAP_ASID_MASK) == pmap_asid_gen) { 332 asid = pm->pm_asid & PMAP_ASID_MASK; 333 bit = (asid & (32 - 1)); 334 pmap_asid[asid / 32] &= ~(3U << bit); 335 } 336 mtx_leave(&pmap_asid_mtx); 337 } 338 339 /* 340 * This is used for pmap_kernel() mappings, they are not to be removed 341 * from the vp table because they were statically initialized at the 342 * initial pmap initialization. This is so that memory allocation 343 * is not necessary in the pmap_kernel() mappings. 344 * Otherwise bad race conditions can appear. 345 */ 346 struct pte_desc * 347 pmap_vp_lookup(pmap_t pm, vaddr_t va, uint64_t **pl3entry) 348 { 349 struct pmapvp1 *vp1; 350 struct pmapvp2 *vp2; 351 struct pmapvp3 *vp3; 352 struct pte_desc *pted; 353 354 if (pm->have_4_level_pt) { 355 if (pm->pm_vp.l0 == NULL) { 356 return NULL; 357 } 358 vp1 = pm->pm_vp.l0->vp[VP_IDX0(va)]; 359 } else { 360 vp1 = pm->pm_vp.l1; 361 } 362 if (vp1 == NULL) { 363 return NULL; 364 } 365 366 vp2 = vp1->vp[VP_IDX1(va)]; 367 if (vp2 == NULL) { 368 return NULL; 369 } 370 371 vp3 = vp2->vp[VP_IDX2(va)]; 372 if (vp3 == NULL) { 373 return NULL; 374 } 375 376 pted = vp3->vp[VP_IDX3(va)]; 377 if (pl3entry != NULL) 378 *pl3entry = &(vp3->l3[VP_IDX3(va)]); 379 380 return pted; 381 } 382 383 /* 384 * Create a V -> P mapping for the given pmap and virtual address 385 * with reference to the pte descriptor that is used to map the page. 386 * This code should track allocations of vp table allocations 387 * so they can be freed efficiently. 388 * 389 * XXX it may be possible to save some bits of count in the 390 * upper address bits of the pa or the pte entry. 391 * However that does make populating the other bits more tricky. 392 * each level has 512 entries, so that mean 9 bits to store 393 * stash 3 bits each in the first 3 entries? 394 */ 395 int 396 pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags) 397 { 398 struct pmapvp1 *vp1; 399 struct pmapvp2 *vp2; 400 struct pmapvp3 *vp3; 401 402 PMAP_ASSERT_LOCKED(pm); 403 404 if (pm->have_4_level_pt) { 405 vp1 = pm->pm_vp.l0->vp[VP_IDX0(va)]; 406 if (vp1 == NULL) { 407 vp1 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO); 408 if (vp1 == NULL) { 409 if ((flags & PMAP_CANFAIL) == 0) 410 panic("%s: unable to allocate L1", 411 __func__); 412 return ENOMEM; 413 } 414 pmap_set_l1(pm, va, vp1); 415 } 416 } else { 417 vp1 = pm->pm_vp.l1; 418 } 419 420 vp2 = vp1->vp[VP_IDX1(va)]; 421 if (vp2 == NULL) { 422 vp2 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO); 423 if (vp2 == NULL) { 424 if ((flags & PMAP_CANFAIL) == 0) 425 panic("%s: unable to allocate L2", __func__); 426 return ENOMEM; 427 } 428 pmap_set_l2(pm, va, vp1, vp2); 429 } 430 431 vp3 = vp2->vp[VP_IDX2(va)]; 432 if (vp3 == NULL) { 433 vp3 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO); 434 if (vp3 == NULL) { 435 if ((flags & PMAP_CANFAIL) == 0) 436 panic("%s: unable to allocate L3", __func__); 437 return ENOMEM; 438 } 439 pmap_set_l3(pm, va, vp2, vp3); 440 } 441 442 vp3->vp[VP_IDX3(va)] = pted; 443 return 0; 444 } 445 446 void 447 pmap_vp_populate(pmap_t pm, vaddr_t va) 448 { 449 struct pte_desc *pted; 450 struct pmapvp1 *vp1; 451 struct pmapvp2 *vp2; 452 struct pmapvp3 *vp3; 453 void *vp; 454 455 pted = pool_get(&pmap_pted_pool, PR_WAITOK | PR_ZERO); 456 vp = pool_get(&pmap_vp_pool, PR_WAITOK | PR_ZERO); 457 458 pmap_lock(pm); 459 460 if (pm->have_4_level_pt) { 461 vp1 = pm->pm_vp.l0->vp[VP_IDX0(va)]; 462 if (vp1 == NULL) { 463 vp1 = vp; vp = NULL; 464 pmap_set_l1(pm, va, vp1); 465 } 466 } else { 467 vp1 = pm->pm_vp.l1; 468 } 469 470 if (vp == NULL) { 471 pmap_unlock(pm); 472 vp = pool_get(&pmap_vp_pool, PR_WAITOK | PR_ZERO); 473 pmap_lock(pm); 474 } 475 476 vp2 = vp1->vp[VP_IDX1(va)]; 477 if (vp2 == NULL) { 478 vp2 = vp; vp = NULL; 479 pmap_set_l2(pm, va, vp1, vp2); 480 } 481 482 if (vp == NULL) { 483 pmap_unlock(pm); 484 vp = pool_get(&pmap_vp_pool, PR_WAITOK | PR_ZERO); 485 pmap_lock(pm); 486 } 487 488 vp3 = vp2->vp[VP_IDX2(va)]; 489 if (vp3 == NULL) { 490 vp3 = vp; vp = NULL; 491 pmap_set_l3(pm, va, vp2, vp3); 492 } 493 494 if (vp3->vp[VP_IDX3(va)] == NULL) { 495 vp3->vp[VP_IDX3(va)] = pted; 496 pted = NULL; 497 } 498 499 pmap_unlock(pm); 500 501 if (vp) 502 pool_put(&pmap_vp_pool, vp); 503 if (pted) 504 pool_put(&pmap_pted_pool, pted); 505 } 506 507 void * 508 pmap_vp_page_alloc(struct pool *pp, int flags, int *slowdown) 509 { 510 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 511 512 kd.kd_waitok = ISSET(flags, PR_WAITOK); 513 kd.kd_trylock = ISSET(flags, PR_NOWAIT); 514 kd.kd_slowdown = slowdown; 515 516 return km_alloc(pp->pr_pgsize, &kv_any, &kp_dirty, &kd); 517 } 518 519 void 520 pmap_vp_page_free(struct pool *pp, void *v) 521 { 522 km_free(v, pp->pr_pgsize, &kv_any, &kp_dirty); 523 } 524 525 static inline u_int32_t 526 PTED_MANAGED(struct pte_desc *pted) 527 { 528 return (pted->pted_va & PTED_VA_MANAGED_M); 529 } 530 531 static inline u_int32_t 532 PTED_WIRED(struct pte_desc *pted) 533 { 534 return (pted->pted_va & PTED_VA_WIRED_M); 535 } 536 537 static inline u_int32_t 538 PTED_VALID(struct pte_desc *pted) 539 { 540 return (pted->pted_pte != 0); 541 } 542 543 /* 544 * PV entries - 545 * manipulate the physical to virtual translations for the entire system. 546 * 547 * QUESTION: should all mapped memory be stored in PV tables? Or 548 * is it alright to only store "ram" memory. Currently device mappings 549 * are not stored. 550 * It makes sense to pre-allocate mappings for all of "ram" memory, since 551 * it is likely that it will be mapped at some point, but would it also 552 * make sense to use a tree/table like is use for pmap to store device 553 * mappings? 554 * Further notes: It seems that the PV table is only used for pmap_protect 555 * and other paging related operations. Given this, it is not necessary 556 * to store any pmap_kernel() entries in PV tables and does not make 557 * sense to store device mappings in PV either. 558 * 559 * Note: unlike other powerpc pmap designs, the array is only an array 560 * of pointers. Since the same structure is used for holding information 561 * in the VP table, the PV table, and for kernel mappings, the wired entries. 562 * Allocate one data structure to hold all of the info, instead of replicating 563 * it multiple times. 564 * 565 * One issue of making this a single data structure is that two pointers are 566 * wasted for every page which does not map ram (device mappings), this 567 * should be a low percentage of mapped pages in the system, so should not 568 * have too noticeable unnecessary ram consumption. 569 */ 570 571 void 572 pmap_enter_pv(struct pte_desc *pted, struct vm_page *pg) 573 { 574 /* 575 * XXX does this test mean that some pages try to be managed, 576 * but this is called too soon? 577 */ 578 if (__predict_false(!pmap_initialized)) 579 return; 580 581 mtx_enter(&pg->mdpage.pv_mtx); 582 LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list); 583 pted->pted_va |= PTED_VA_MANAGED_M; 584 mtx_leave(&pg->mdpage.pv_mtx); 585 } 586 587 void 588 pmap_remove_pv(struct pte_desc *pted) 589 { 590 struct vm_page *pg = PHYS_TO_VM_PAGE(pted->pted_pte & PTE_RPGN); 591 592 mtx_enter(&pg->mdpage.pv_mtx); 593 LIST_REMOVE(pted, pted_pv_list); 594 mtx_leave(&pg->mdpage.pv_mtx); 595 } 596 597 int 598 pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) 599 { 600 struct pte_desc *pted; 601 struct vm_page *pg; 602 int error; 603 int cache = PMAP_CACHE_WB; 604 605 if (pa & PMAP_NOCACHE) 606 cache = PMAP_CACHE_CI; 607 if (pa & PMAP_DEVICE) 608 cache = PMAP_CACHE_DEV_NGNRNE; 609 pg = PHYS_TO_VM_PAGE(pa); 610 611 pmap_lock(pm); 612 pted = pmap_vp_lookup(pm, va, NULL); 613 if (pted && PTED_VALID(pted)) { 614 pmap_remove_pted(pm, pted); 615 /* we lost our pted if it was user */ 616 if (pm != pmap_kernel()) 617 pted = pmap_vp_lookup(pm, va, NULL); 618 } 619 620 pm->pm_stats.resident_count++; 621 622 /* Do not have pted for this, get one and put it in VP */ 623 if (pted == NULL) { 624 pted = pool_get(&pmap_pted_pool, PR_NOWAIT | PR_ZERO); 625 if (pted == NULL) { 626 if ((flags & PMAP_CANFAIL) == 0) 627 panic("%s: failed to allocate pted", __func__); 628 error = ENOMEM; 629 goto out; 630 } 631 if (pmap_vp_enter(pm, va, pted, flags)) { 632 if ((flags & PMAP_CANFAIL) == 0) 633 panic("%s: failed to allocate L2/L3", __func__); 634 error = ENOMEM; 635 pool_put(&pmap_pted_pool, pted); 636 goto out; 637 } 638 } 639 640 /* 641 * If it should be enabled _right now_, we can skip doing ref/mod 642 * emulation. Any access includes reference, modified only by write. 643 */ 644 if (pg != NULL && 645 ((flags & PROT_MASK) || (pg->pg_flags & PG_PMAP_REF))) { 646 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF); 647 if ((prot & PROT_WRITE) && (flags & PROT_WRITE)) { 648 atomic_setbits_int(&pg->pg_flags, PG_PMAP_MOD); 649 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 650 } 651 } 652 653 pmap_fill_pte(pm, va, pa, pted, prot, flags, cache); 654 655 if (pg != NULL) { 656 pmap_enter_pv(pted, pg); /* only managed mem */ 657 } 658 659 if (pg != NULL && (flags & PROT_EXEC)) { 660 if ((pg->pg_flags & PG_PMAP_EXE) == 0) 661 pmap_icache_sync_page(pm, pa); 662 atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE); 663 } 664 665 /* 666 * Insert into table, if this mapping said it needed to be mapped 667 * now. 668 */ 669 if (flags & (PROT_READ|PROT_WRITE|PROT_EXEC|PMAP_WIRED)) { 670 pmap_pte_insert(pted); 671 ttlb_flush(pm, va & ~PAGE_MASK); 672 } 673 674 error = 0; 675 out: 676 pmap_unlock(pm); 677 return error; 678 } 679 680 void 681 pmap_populate(pmap_t pm, vaddr_t va) 682 { 683 pmap_vp_populate(pm, va); 684 } 685 686 /* 687 * Remove the given range of mapping entries. 688 */ 689 void 690 pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva) 691 { 692 struct pte_desc *pted; 693 vaddr_t va; 694 695 pmap_lock(pm); 696 for (va = sva; va < eva; va += PAGE_SIZE) { 697 pted = pmap_vp_lookup(pm, va, NULL); 698 699 if (pted == NULL) 700 continue; 701 702 if (PTED_WIRED(pted)) { 703 pm->pm_stats.wired_count--; 704 pted->pted_va &= ~PTED_VA_WIRED_M; 705 } 706 707 if (PTED_VALID(pted)) 708 pmap_remove_pted(pm, pted); 709 } 710 pmap_unlock(pm); 711 } 712 713 /* 714 * remove a single mapping, notice that this code is O(1) 715 */ 716 void 717 pmap_remove_pted(pmap_t pm, struct pte_desc *pted) 718 { 719 pm->pm_stats.resident_count--; 720 721 if (PTED_WIRED(pted)) { 722 pm->pm_stats.wired_count--; 723 pted->pted_va &= ~PTED_VA_WIRED_M; 724 } 725 726 pmap_pte_remove(pted, pm != pmap_kernel()); 727 ttlb_flush(pm, pted->pted_va & ~PAGE_MASK); 728 729 if (pted->pted_va & PTED_VA_EXEC_M) { 730 pted->pted_va &= ~PTED_VA_EXEC_M; 731 } 732 733 if (PTED_MANAGED(pted)) 734 pmap_remove_pv(pted); 735 736 pted->pted_pte = 0; 737 pted->pted_va = 0; 738 739 if (pm != pmap_kernel()) 740 pool_put(&pmap_pted_pool, pted); 741 } 742 743 744 /* 745 * Enter a kernel mapping for the given page. 746 * kernel mappings have a larger set of prerequisites than normal mappings. 747 * 748 * 1. no memory should be allocated to create a kernel mapping. 749 * 2. a vp mapping should already exist, even if invalid. (see 1) 750 * 3. all vp tree mappings should already exist (see 1) 751 * 752 */ 753 void 754 _pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, int flags, int cache) 755 { 756 pmap_t pm = pmap_kernel(); 757 struct pte_desc *pted; 758 struct vm_page *pg; 759 760 pted = pmap_vp_lookup(pm, va, NULL); 761 if (pted == NULL) { 762 panic("pted not preallocated in pmap_kernel() va %lx pa %lx", 763 va, pa); 764 } 765 766 if (pted && PTED_VALID(pted)) 767 pmap_kremove_pg(va); /* pted is reused */ 768 769 pm->pm_stats.resident_count++; 770 771 flags |= PMAP_WIRED; /* kernel mappings are always wired. */ 772 /* Calculate PTE */ 773 pmap_fill_pte(pm, va, pa, pted, prot, flags, cache); 774 775 /* Insert into table */ 776 pmap_pte_insert(pted); 777 ttlb_flush(pm, va & ~PAGE_MASK); 778 779 pg = PHYS_TO_VM_PAGE(pted->pted_pte & PTE_RPGN); 780 if (pg && (cache == PMAP_CACHE_CI || cache == PMAP_CACHE_DEV_NGNRNE)) 781 cpu_idcache_wbinv_range(va & ~PAGE_MASK, PAGE_SIZE); 782 } 783 784 void 785 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) 786 { 787 _pmap_kenter_pa(va, pa, prot, prot, 788 (pa & PMAP_NOCACHE) ? PMAP_CACHE_CI : PMAP_CACHE_WB); 789 } 790 791 void 792 pmap_kenter_cache(vaddr_t va, paddr_t pa, vm_prot_t prot, int cacheable) 793 { 794 _pmap_kenter_pa(va, pa, prot, prot, cacheable); 795 } 796 797 /* 798 * remove kernel (pmap_kernel()) mapping, one page 799 */ 800 void 801 pmap_kremove_pg(vaddr_t va) 802 { 803 pmap_t pm = pmap_kernel(); 804 struct pte_desc *pted; 805 int s; 806 807 pted = pmap_vp_lookup(pm, va, NULL); 808 if (pted == NULL) 809 return; 810 811 if (!PTED_VALID(pted)) 812 return; /* not mapped */ 813 814 s = splvm(); 815 816 pm->pm_stats.resident_count--; 817 818 pmap_pte_remove(pted, 0); 819 ttlb_flush(pm, pted->pted_va & ~PAGE_MASK); 820 821 if (pted->pted_va & PTED_VA_EXEC_M) 822 pted->pted_va &= ~PTED_VA_EXEC_M; 823 824 if (PTED_MANAGED(pted)) 825 pmap_remove_pv(pted); 826 827 if (PTED_WIRED(pted)) 828 pm->pm_stats.wired_count--; 829 830 /* invalidate pted; */ 831 pted->pted_pte = 0; 832 pted->pted_va = 0; 833 834 splx(s); 835 } 836 837 /* 838 * remove kernel (pmap_kernel()) mappings 839 */ 840 void 841 pmap_kremove(vaddr_t va, vsize_t len) 842 { 843 for (len >>= PAGE_SHIFT; len >0; len--, va += PAGE_SIZE) 844 pmap_kremove_pg(va); 845 } 846 847 void 848 pmap_fill_pte(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted, 849 vm_prot_t prot, int flags, int cache) 850 { 851 pted->pted_va = va; 852 pted->pted_pmap = pm; 853 854 switch (cache) { 855 case PMAP_CACHE_WB: 856 break; 857 case PMAP_CACHE_WT: 858 break; 859 case PMAP_CACHE_CI: 860 break; 861 case PMAP_CACHE_DEV_NGNRNE: 862 break; 863 case PMAP_CACHE_DEV_NGNRE: 864 break; 865 default: 866 panic("%s: invalid cache mode", __func__); 867 } 868 pted->pted_va |= cache; 869 870 pted->pted_va |= prot & (PROT_READ|PROT_WRITE|PROT_EXEC); 871 872 if (flags & PMAP_WIRED) { 873 pted->pted_va |= PTED_VA_WIRED_M; 874 pm->pm_stats.wired_count++; 875 } 876 877 pted->pted_pte = pa & PTE_RPGN; 878 pted->pted_pte |= flags & (PROT_READ|PROT_WRITE|PROT_EXEC); 879 } 880 881 /* 882 * Fill the given physical page with zeros. 883 */ 884 void 885 pmap_zero_page(struct vm_page *pg) 886 { 887 paddr_t pa = VM_PAGE_TO_PHYS(pg); 888 vaddr_t va = zero_page + cpu_number() * PAGE_SIZE; 889 890 KASSERT(curcpu()->ci_idepth == 0); 891 892 pmap_kenter_pa(va, pa, PROT_READ|PROT_WRITE); 893 pagezero_cache(va); 894 pmap_kremove_pg(va); 895 } 896 897 /* 898 * Copy the given physical page. 899 */ 900 void 901 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) 902 { 903 paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg); 904 paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg); 905 vaddr_t srcva = copy_src_page + cpu_number() * PAGE_SIZE; 906 vaddr_t dstva = copy_dst_page + cpu_number() * PAGE_SIZE; 907 int s; 908 909 /* 910 * XXX The buffer flipper (incorrectly?) uses pmap_copy_page() 911 * (from uvm_pagerealloc_multi()) from interrupt context! 912 */ 913 s = splbio(); 914 pmap_kenter_pa(srcva, srcpa, PROT_READ); 915 pmap_kenter_pa(dstva, dstpa, PROT_READ|PROT_WRITE); 916 memcpy((void *)dstva, (void *)srcva, PAGE_SIZE); 917 pmap_kremove_pg(srcva); 918 pmap_kremove_pg(dstva); 919 splx(s); 920 } 921 922 void 923 pmap_pinit(pmap_t pm) 924 { 925 vaddr_t l0va; 926 927 /* Allocate a full L0/L1 table. */ 928 if (pm->have_4_level_pt) { 929 while (pm->pm_vp.l0 == NULL) { 930 pm->pm_vp.l0 = pool_get(&pmap_vp_pool, 931 PR_WAITOK | PR_ZERO); 932 } 933 l0va = (vaddr_t)pm->pm_vp.l0->l0; /* top level is l0 */ 934 } else { 935 while (pm->pm_vp.l1 == NULL) { 936 937 pm->pm_vp.l1 = pool_get(&pmap_vp_pool, 938 PR_WAITOK | PR_ZERO); 939 } 940 l0va = (vaddr_t)pm->pm_vp.l1->l1; /* top level is l1 */ 941 942 } 943 944 pmap_extract(pmap_kernel(), l0va, (paddr_t *)&pm->pm_pt0pa); 945 946 pmap_reference(pm); 947 } 948 949 int pmap_vp_poolcache = 0; /* force vp poolcache to allocate late */ 950 951 /* 952 * Create and return a physical map. 953 */ 954 pmap_t 955 pmap_create(void) 956 { 957 pmap_t pmap; 958 959 pmap = pool_get(&pmap_pmap_pool, PR_WAITOK | PR_ZERO); 960 961 mtx_init(&pmap->pm_mtx, IPL_VM); 962 963 pmap_pinit(pmap); 964 if (pmap_vp_poolcache == 0) { 965 pool_setlowat(&pmap_vp_pool, 20); 966 pmap_vp_poolcache = 20; 967 } 968 return (pmap); 969 } 970 971 /* 972 * Add a reference to a given pmap. 973 */ 974 void 975 pmap_reference(pmap_t pm) 976 { 977 atomic_inc_int(&pm->pm_refs); 978 } 979 980 /* 981 * Retire the given pmap from service. 982 * Should only be called if the map contains no valid mappings. 983 */ 984 void 985 pmap_destroy(pmap_t pm) 986 { 987 int refs; 988 989 refs = atomic_dec_int_nv(&pm->pm_refs); 990 if (refs > 0) 991 return; 992 993 /* 994 * reference count is zero, free pmap resources and free pmap. 995 */ 996 pmap_release(pm); 997 pmap_free_asid(pm); 998 pool_put(&pmap_pmap_pool, pm); 999 } 1000 1001 /* 1002 * Release any resources held by the given physical map. 1003 * Called when a pmap initialized by pmap_pinit is being released. 1004 */ 1005 void 1006 pmap_release(pmap_t pm) 1007 { 1008 pmap_vp_destroy(pm); 1009 } 1010 1011 void 1012 pmap_vp_destroy_l2_l3(pmap_t pm, struct pmapvp1 *vp1) 1013 { 1014 struct pmapvp2 *vp2; 1015 struct pmapvp3 *vp3; 1016 struct pte_desc *pted; 1017 int j, k, l; 1018 1019 for (j = 0; j < VP_IDX1_CNT; j++) { 1020 vp2 = vp1->vp[j]; 1021 if (vp2 == NULL) 1022 continue; 1023 vp1->vp[j] = NULL; 1024 1025 for (k = 0; k < VP_IDX2_CNT; k++) { 1026 vp3 = vp2->vp[k]; 1027 if (vp3 == NULL) 1028 continue; 1029 vp2->vp[k] = NULL; 1030 1031 for (l = 0; l < VP_IDX3_CNT; l++) { 1032 pted = vp3->vp[l]; 1033 if (pted == NULL) 1034 continue; 1035 vp3->vp[l] = NULL; 1036 1037 pool_put(&pmap_pted_pool, pted); 1038 } 1039 pool_put(&pmap_vp_pool, vp3); 1040 } 1041 pool_put(&pmap_vp_pool, vp2); 1042 } 1043 } 1044 1045 void 1046 pmap_vp_destroy(pmap_t pm) 1047 { 1048 struct pmapvp0 *vp0; 1049 struct pmapvp1 *vp1; 1050 int i; 1051 1052 /* 1053 * XXX Is there a better way to share this code between 3 and 1054 * 4 level tables? Split the lower levels into a different 1055 * function? 1056 */ 1057 if (!pm->have_4_level_pt) { 1058 pmap_vp_destroy_l2_l3(pm, pm->pm_vp.l1); 1059 pool_put(&pmap_vp_pool, pm->pm_vp.l1); 1060 pm->pm_vp.l1 = NULL; 1061 return; 1062 } 1063 1064 vp0 = pm->pm_vp.l0; 1065 for (i = 0; i < VP_IDX0_CNT; i++) { 1066 vp1 = vp0->vp[i]; 1067 if (vp1 == NULL) 1068 continue; 1069 vp0->vp[i] = NULL; 1070 1071 pmap_vp_destroy_l2_l3(pm, vp1); 1072 pool_put(&pmap_vp_pool, vp1); 1073 } 1074 pool_put(&pmap_vp_pool, vp0); 1075 pm->pm_vp.l0 = NULL; 1076 } 1077 1078 vaddr_t virtual_avail; 1079 int pmap_virtual_space_called; 1080 1081 static inline uint64_t 1082 VP_Lx(paddr_t pa) 1083 { 1084 /* 1085 * This function takes the pa address given and manipulates it 1086 * into the form that should be inserted into the VM table. 1087 */ 1088 return pa | Lx_TYPE_PT; 1089 } 1090 1091 /* 1092 * In pmap_bootstrap() we allocate the page tables for the first GB 1093 * of the kernel address space. 1094 */ 1095 vaddr_t pmap_maxkvaddr = VM_MIN_KERNEL_ADDRESS + 1024 * 1024 * 1024; 1096 1097 /* 1098 * Allocator for growing the kernel page tables. We use a dedicated 1099 * submap to make sure we have the space to map them as we are called 1100 * when address space is tight! 1101 */ 1102 1103 struct vm_map *pmap_kvp_map; 1104 1105 const struct kmem_va_mode kv_kvp = { 1106 .kv_map = &pmap_kvp_map, 1107 .kv_wait = 0 1108 }; 1109 1110 void * 1111 pmap_kvp_alloc(void) 1112 { 1113 void *kvp; 1114 1115 if (!uvm.page_init_done && !pmap_virtual_space_called) { 1116 paddr_t pa[2]; 1117 vaddr_t va; 1118 1119 if (!uvm_page_physget(&pa[0]) || !uvm_page_physget(&pa[1])) 1120 panic("%s: out of memory", __func__); 1121 1122 va = virtual_avail; 1123 virtual_avail += 2 * PAGE_SIZE; 1124 KASSERT(virtual_avail <= pmap_maxkvaddr); 1125 kvp = (void *)va; 1126 1127 pmap_kenter_pa(va, pa[0], PROT_READ|PROT_WRITE); 1128 pmap_kenter_pa(va + PAGE_SIZE, pa[1], PROT_READ|PROT_WRITE); 1129 pagezero_cache(va); 1130 pagezero_cache(va + PAGE_SIZE); 1131 } else { 1132 kvp = km_alloc(sizeof(struct pmapvp0), &kv_kvp, &kp_zero, 1133 &kd_nowait); 1134 } 1135 1136 return kvp; 1137 } 1138 1139 struct pte_desc * 1140 pmap_kpted_alloc(void) 1141 { 1142 static struct pte_desc *pted; 1143 static int npted; 1144 1145 if (npted == 0) { 1146 if (!uvm.page_init_done && !pmap_virtual_space_called) { 1147 paddr_t pa; 1148 vaddr_t va; 1149 1150 if (!uvm_page_physget(&pa)) 1151 panic("%s: out of memory", __func__); 1152 1153 va = virtual_avail; 1154 virtual_avail += PAGE_SIZE; 1155 KASSERT(virtual_avail <= pmap_maxkvaddr); 1156 pted = (struct pte_desc *)va; 1157 1158 pmap_kenter_pa(va, pa, PROT_READ|PROT_WRITE); 1159 pagezero_cache(va); 1160 } else { 1161 pted = km_alloc(PAGE_SIZE, &kv_kvp, &kp_zero, 1162 &kd_nowait); 1163 if (pted == NULL) 1164 return NULL; 1165 } 1166 1167 npted = PAGE_SIZE / sizeof(struct pte_desc); 1168 } 1169 1170 npted--; 1171 return pted++; 1172 } 1173 1174 vaddr_t 1175 pmap_growkernel(vaddr_t maxkvaddr) 1176 { 1177 struct pmapvp1 *vp1 = pmap_kernel()->pm_vp.l1; 1178 struct pmapvp2 *vp2; 1179 struct pmapvp3 *vp3; 1180 struct pte_desc *pted; 1181 paddr_t pa; 1182 int lb_idx2, ub_idx2; 1183 int i, j, k; 1184 int s; 1185 1186 if (maxkvaddr <= pmap_maxkvaddr) 1187 return pmap_maxkvaddr; 1188 1189 /* 1190 * Not strictly necessary, but we use an interrupt-safe map 1191 * and uvm asserts that we're at IPL_VM. 1192 */ 1193 s = splvm(); 1194 1195 for (i = VP_IDX1(pmap_maxkvaddr); i <= VP_IDX1(maxkvaddr - 1); i++) { 1196 vp2 = vp1->vp[i]; 1197 if (vp2 == NULL) { 1198 vp2 = pmap_kvp_alloc(); 1199 if (vp2 == NULL) 1200 goto fail; 1201 pmap_extract(pmap_kernel(), (vaddr_t)vp2, &pa); 1202 vp1->vp[i] = vp2; 1203 vp1->l1[i] = VP_Lx(pa); 1204 } 1205 1206 if (i == VP_IDX1(pmap_maxkvaddr)) { 1207 lb_idx2 = VP_IDX2(pmap_maxkvaddr); 1208 } else { 1209 lb_idx2 = 0; 1210 } 1211 1212 if (i == VP_IDX1(maxkvaddr - 1)) { 1213 ub_idx2 = VP_IDX2(maxkvaddr - 1); 1214 } else { 1215 ub_idx2 = VP_IDX2_CNT - 1; 1216 } 1217 1218 for (j = lb_idx2; j <= ub_idx2; j++) { 1219 vp3 = vp2->vp[j]; 1220 if (vp3 == NULL) { 1221 vp3 = pmap_kvp_alloc(); 1222 if (vp3 == NULL) 1223 goto fail; 1224 pmap_extract(pmap_kernel(), (vaddr_t)vp3, &pa); 1225 vp2->vp[j] = vp3; 1226 vp2->l2[j] = VP_Lx(pa); 1227 } 1228 1229 for (k = 0; k <= VP_IDX3_CNT - 1; k++) { 1230 if (vp3->vp[k] == NULL) { 1231 pted = pmap_kpted_alloc(); 1232 if (pted == NULL) 1233 goto fail; 1234 vp3->vp[k] = pted; 1235 pmap_maxkvaddr += PAGE_SIZE; 1236 } 1237 } 1238 } 1239 } 1240 KASSERT(pmap_maxkvaddr >= maxkvaddr); 1241 1242 fail: 1243 splx(s); 1244 1245 return pmap_maxkvaddr; 1246 } 1247 1248 void pmap_setup_avail(uint64_t ram_start, uint64_t ram_end, uint64_t kvo); 1249 1250 /* 1251 * Initialize pmap setup. 1252 * ALL of the code which deals with avail needs rewritten as an actual 1253 * memory allocation. 1254 */ 1255 CTASSERT(sizeof(struct pmapvp0) == 2 * PAGE_SIZE); 1256 1257 int mappings_allocated = 0; 1258 int pted_allocated = 0; 1259 1260 extern char __text_start[], _etext[]; 1261 extern char __rodata_start[], _erodata[]; 1262 1263 vaddr_t 1264 pmap_bootstrap(long kvo, paddr_t lpt1, long kernelstart, long kernelend, 1265 long ram_start, long ram_end) 1266 { 1267 void *va; 1268 paddr_t pa, pt1pa; 1269 struct pmapvp1 *vp1; 1270 struct pmapvp2 *vp2; 1271 struct pmapvp3 *vp3; 1272 struct pte_desc *pted; 1273 vaddr_t vstart; 1274 uint64_t id_aa64mmfr0; 1275 int i, j, k; 1276 int lb_idx2, ub_idx2; 1277 1278 pmap_setup_avail(ram_start, ram_end, kvo); 1279 1280 /* 1281 * in theory we could start with just the memory in the 1282 * kernel, however this could 'allocate' the bootloader and 1283 * bootstrap vm table, which we may need to preserve until 1284 * later. 1285 */ 1286 printf("removing %lx-%lx\n", ram_start, kernelstart+kvo); 1287 pmap_remove_avail(ram_start, kernelstart+kvo); 1288 printf("removing %lx-%lx\n", kernelstart+kvo, kernelend+kvo); 1289 pmap_remove_avail(kernelstart+kvo, kernelend+kvo); 1290 1291 /* 1292 * KERNEL IS ASSUMED TO BE 39 bits (or less), start from L1, 1293 * not L0 ALSO kernel mappings may not cover enough ram to 1294 * bootstrap so all accesses initializing tables must be done 1295 * via physical pointers 1296 */ 1297 1298 pt1pa = pmap_steal_avail(2 * sizeof(struct pmapvp1), Lx_TABLE_ALIGN, 1299 &va); 1300 vp1 = (struct pmapvp1 *)pt1pa; 1301 pmap_kernel()->pm_vp.l1 = (struct pmapvp1 *)va; 1302 pmap_kernel()->pm_privileged = 1; 1303 pmap_kernel()->pm_guarded = ATTR_GP; 1304 pmap_kernel()->pm_asid = 0; 1305 1306 mtx_init(&pmap_tramp.pm_mtx, IPL_VM); 1307 pmap_tramp.pm_vp.l1 = (struct pmapvp1 *)va + 1; 1308 pmap_tramp.pm_privileged = 1; 1309 pmap_tramp.pm_guarded = ATTR_GP; 1310 pmap_tramp.pm_asid = 0; 1311 1312 /* Mark ASID 0 as in-use. */ 1313 pmap_asid[0] |= (3U << 0); 1314 1315 /* allocate Lx entries */ 1316 for (i = VP_IDX1(VM_MIN_KERNEL_ADDRESS); 1317 i <= VP_IDX1(pmap_maxkvaddr - 1); 1318 i++) { 1319 mappings_allocated++; 1320 pa = pmap_steal_avail(sizeof(struct pmapvp2), Lx_TABLE_ALIGN, 1321 &va); 1322 vp2 = (struct pmapvp2 *)pa; /* indexed physically */ 1323 vp1->vp[i] = va; 1324 vp1->l1[i] = VP_Lx(pa); 1325 1326 if (i == VP_IDX1(VM_MIN_KERNEL_ADDRESS)) { 1327 lb_idx2 = VP_IDX2(VM_MIN_KERNEL_ADDRESS); 1328 } else { 1329 lb_idx2 = 0; 1330 } 1331 if (i == VP_IDX1(pmap_maxkvaddr - 1)) { 1332 ub_idx2 = VP_IDX2(pmap_maxkvaddr - 1); 1333 } else { 1334 ub_idx2 = VP_IDX2_CNT - 1; 1335 } 1336 for (j = lb_idx2; j <= ub_idx2; j++) { 1337 mappings_allocated++; 1338 pa = pmap_steal_avail(sizeof(struct pmapvp3), 1339 Lx_TABLE_ALIGN, &va); 1340 vp3 = (struct pmapvp3 *)pa; /* indexed physically */ 1341 vp2->vp[j] = va; 1342 vp2->l2[j] = VP_Lx(pa); 1343 1344 } 1345 } 1346 /* allocate Lx entries */ 1347 for (i = VP_IDX1(VM_MIN_KERNEL_ADDRESS); 1348 i <= VP_IDX1(pmap_maxkvaddr - 1); 1349 i++) { 1350 /* access must be performed physical */ 1351 vp2 = (void *)((long)vp1->vp[i] + kvo); 1352 1353 if (i == VP_IDX1(VM_MIN_KERNEL_ADDRESS)) { 1354 lb_idx2 = VP_IDX2(VM_MIN_KERNEL_ADDRESS); 1355 } else { 1356 lb_idx2 = 0; 1357 } 1358 if (i == VP_IDX1(pmap_maxkvaddr - 1)) { 1359 ub_idx2 = VP_IDX2(pmap_maxkvaddr - 1); 1360 } else { 1361 ub_idx2 = VP_IDX2_CNT - 1; 1362 } 1363 for (j = lb_idx2; j <= ub_idx2; j++) { 1364 /* access must be performed physical */ 1365 vp3 = (void *)((long)vp2->vp[j] + kvo); 1366 1367 for (k = 0; k <= VP_IDX3_CNT - 1; k++) { 1368 pted_allocated++; 1369 pa = pmap_steal_avail(sizeof(struct pte_desc), 1370 4, &va); 1371 pted = va; 1372 vp3->vp[k] = pted; 1373 } 1374 } 1375 } 1376 1377 pa = pmap_steal_avail(Lx_TABLE_ALIGN, Lx_TABLE_ALIGN, &va); 1378 memset((void *)pa, 0, Lx_TABLE_ALIGN); 1379 pmap_kernel()->pm_pt0pa = pa; 1380 1381 pmap_avail_fixup(); 1382 1383 /* 1384 * At this point we are still running on the bootstrap page 1385 * tables however all memory for the final page tables is 1386 * 'allocated' and should now be mapped. This means we are 1387 * able to use the virtual addressing to enter the final 1388 * mappings into the new mapping tables. 1389 */ 1390 vstart = pmap_map_stolen(kernelstart); 1391 1392 void (switch_mmu_kernel)(long); 1393 void (*switch_mmu_kernel_table)(long) = 1394 (void *)((long)&switch_mmu_kernel + kvo); 1395 switch_mmu_kernel_table(pt1pa); 1396 1397 printf("all mapped\n"); 1398 1399 curcpu()->ci_curpm = pmap_kernel(); 1400 1401 id_aa64mmfr0 = READ_SPECIALREG(id_aa64mmfr0_el1); 1402 if (ID_AA64MMFR0_ASID_BITS(id_aa64mmfr0) == ID_AA64MMFR0_ASID_BITS_16) 1403 pmap_nasid = (1 << 16); 1404 1405 vmmap = vstart; 1406 vstart += PAGE_SIZE; 1407 1408 return vstart; 1409 } 1410 1411 void 1412 pmap_set_l1(struct pmap *pm, uint64_t va, struct pmapvp1 *l1_va) 1413 { 1414 uint64_t pg_entry; 1415 paddr_t l1_pa; 1416 int idx0; 1417 1418 if (pmap_extract(pmap_kernel(), (vaddr_t)l1_va, &l1_pa) == 0) 1419 panic("unable to find vp pa mapping %p", l1_va); 1420 1421 if (l1_pa & (Lx_TABLE_ALIGN-1)) 1422 panic("misaligned L2 table"); 1423 1424 pg_entry = VP_Lx(l1_pa); 1425 1426 idx0 = VP_IDX0(va); 1427 pm->pm_vp.l0->vp[idx0] = l1_va; 1428 pm->pm_vp.l0->l0[idx0] = pg_entry; 1429 } 1430 1431 void 1432 pmap_set_l2(struct pmap *pm, uint64_t va, struct pmapvp1 *vp1, 1433 struct pmapvp2 *l2_va) 1434 { 1435 uint64_t pg_entry; 1436 paddr_t l2_pa; 1437 int idx1; 1438 1439 if (pmap_extract(pmap_kernel(), (vaddr_t)l2_va, &l2_pa) == 0) 1440 panic("unable to find vp pa mapping %p", l2_va); 1441 1442 if (l2_pa & (Lx_TABLE_ALIGN-1)) 1443 panic("misaligned L2 table"); 1444 1445 pg_entry = VP_Lx(l2_pa); 1446 1447 idx1 = VP_IDX1(va); 1448 vp1->vp[idx1] = l2_va; 1449 vp1->l1[idx1] = pg_entry; 1450 } 1451 1452 void 1453 pmap_set_l3(struct pmap *pm, uint64_t va, struct pmapvp2 *vp2, 1454 struct pmapvp3 *l3_va) 1455 { 1456 uint64_t pg_entry; 1457 paddr_t l3_pa; 1458 int idx2; 1459 1460 if (pmap_extract(pmap_kernel(), (vaddr_t)l3_va, &l3_pa) == 0) 1461 panic("unable to find vp pa mapping %p", l3_va); 1462 1463 if (l3_pa & (Lx_TABLE_ALIGN-1)) 1464 panic("misaligned L2 table"); 1465 1466 pg_entry = VP_Lx(l3_pa); 1467 1468 idx2 = VP_IDX2(va); 1469 vp2->vp[idx2] = l3_va; 1470 vp2->l2[idx2] = pg_entry; 1471 } 1472 1473 /* 1474 * activate a pmap entry 1475 */ 1476 void 1477 pmap_activate(struct proc *p) 1478 { 1479 pmap_t pm = p->p_vmspace->vm_map.pmap; 1480 1481 if (p == curproc && pm != curcpu()->ci_curpm) 1482 pmap_setttb(p); 1483 } 1484 1485 /* 1486 * deactivate a pmap entry 1487 */ 1488 void 1489 pmap_deactivate(struct proc *p) 1490 { 1491 } 1492 1493 /* 1494 * Get the physical page address for the given pmap/virtual address. 1495 */ 1496 int 1497 pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap) 1498 { 1499 struct pte_desc *pted; 1500 1501 pmap_lock(pm); 1502 pted = pmap_vp_lookup(pm, va, NULL); 1503 if (!pted || !PTED_VALID(pted)) { 1504 pmap_unlock(pm); 1505 return 0; 1506 } 1507 if (pap != NULL) 1508 *pap = (pted->pted_pte & PTE_RPGN) | (va & PAGE_MASK); 1509 pmap_unlock(pm); 1510 1511 return 1; 1512 } 1513 1514 void 1515 pmap_page_ro(pmap_t pm, vaddr_t va, vm_prot_t prot) 1516 { 1517 struct pte_desc *pted; 1518 uint64_t *pl3; 1519 1520 /* Every VA needs a pted, even unmanaged ones. */ 1521 pted = pmap_vp_lookup(pm, va, &pl3); 1522 if (!pted || !PTED_VALID(pted)) { 1523 return; 1524 } 1525 1526 pted->pted_va &= ~PROT_WRITE; 1527 pted->pted_pte &= ~PROT_WRITE; 1528 if ((prot & PROT_READ) == 0) { 1529 pted->pted_va &= ~PROT_READ; 1530 pted->pted_pte &= ~PROT_READ; 1531 } 1532 if ((prot & PROT_EXEC) == 0) { 1533 pted->pted_va &= ~PROT_EXEC; 1534 pted->pted_pte &= ~PROT_EXEC; 1535 } 1536 pmap_pte_update(pted, pl3); 1537 ttlb_flush(pm, pted->pted_va & ~PAGE_MASK); 1538 } 1539 1540 #ifdef DDB 1541 void 1542 pmap_page_rw(pmap_t pm, vaddr_t va) 1543 { 1544 struct pte_desc *pted; 1545 uint64_t *pl3; 1546 1547 /* Every VA needs a pted, even unmanaged ones. */ 1548 pted = pmap_vp_lookup(pm, va, &pl3); 1549 if (!pted || !PTED_VALID(pted)) { 1550 return; 1551 } 1552 1553 pted->pted_va |= PROT_WRITE; 1554 pted->pted_pte |= PROT_WRITE; 1555 pmap_pte_update(pted, pl3); 1556 ttlb_flush(pm, pted->pted_va & ~PAGE_MASK); 1557 } 1558 #endif /* DDB */ 1559 1560 /* 1561 * Lower the protection on the specified physical page. 1562 */ 1563 void 1564 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 1565 { 1566 struct pte_desc *pted; 1567 struct pmap *pm; 1568 1569 if (prot != PROT_NONE) { 1570 mtx_enter(&pg->mdpage.pv_mtx); 1571 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) { 1572 pmap_page_ro(pted->pted_pmap, pted->pted_va, prot); 1573 } 1574 mtx_leave(&pg->mdpage.pv_mtx); 1575 return; 1576 } 1577 1578 mtx_enter(&pg->mdpage.pv_mtx); 1579 while ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) != NULL) { 1580 pmap_reference(pted->pted_pmap); 1581 pm = pted->pted_pmap; 1582 mtx_leave(&pg->mdpage.pv_mtx); 1583 1584 pmap_lock(pm); 1585 1586 /* 1587 * We dropped the pvlist lock before grabbing the pmap 1588 * lock to avoid lock ordering problems. This means 1589 * we have to check the pvlist again since somebody 1590 * else might have modified it. All we care about is 1591 * that the pvlist entry matches the pmap we just 1592 * locked. If it doesn't, unlock the pmap and try 1593 * again. 1594 */ 1595 mtx_enter(&pg->mdpage.pv_mtx); 1596 pted = LIST_FIRST(&(pg->mdpage.pv_list)); 1597 if (pted == NULL || pted->pted_pmap != pm) { 1598 mtx_leave(&pg->mdpage.pv_mtx); 1599 pmap_unlock(pm); 1600 pmap_destroy(pm); 1601 mtx_enter(&pg->mdpage.pv_mtx); 1602 continue; 1603 } 1604 mtx_leave(&pg->mdpage.pv_mtx); 1605 1606 pmap_remove_pted(pm, pted); 1607 pmap_unlock(pm); 1608 pmap_destroy(pm); 1609 1610 mtx_enter(&pg->mdpage.pv_mtx); 1611 } 1612 /* page is being reclaimed, sync icache next use */ 1613 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 1614 mtx_leave(&pg->mdpage.pv_mtx); 1615 } 1616 1617 void 1618 pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1619 { 1620 if (prot & (PROT_READ | PROT_EXEC)) { 1621 pmap_lock(pm); 1622 while (sva < eva) { 1623 pmap_page_ro(pm, sva, prot); 1624 sva += PAGE_SIZE; 1625 } 1626 pmap_unlock(pm); 1627 return; 1628 } 1629 pmap_remove(pm, sva, eva); 1630 } 1631 1632 void 1633 pmap_init(void) 1634 { 1635 uint64_t tcr; 1636 1637 /* 1638 * Now that we are in virtual address space we don't need 1639 * the identity mapping in TTBR0 and can set the TCR to a 1640 * more useful value. 1641 */ 1642 WRITE_SPECIALREG(ttbr0_el1, pmap_kernel()->pm_pt0pa); 1643 __asm volatile("isb"); 1644 tcr = READ_SPECIALREG(tcr_el1); 1645 tcr &= ~TCR_T0SZ(0x3f); 1646 tcr |= TCR_T0SZ(64 - USER_SPACE_BITS); 1647 tcr |= TCR_A1; 1648 WRITE_SPECIALREG(tcr_el1, tcr); 1649 cpu_tlb_flush(); 1650 1651 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0, 1652 "pmap", NULL); 1653 pool_setlowat(&pmap_pmap_pool, 2); 1654 pool_init(&pmap_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0, 1655 "pted", NULL); 1656 pool_setlowat(&pmap_pted_pool, 20); 1657 pool_init(&pmap_vp_pool, sizeof(struct pmapvp0), PAGE_SIZE, IPL_VM, 0, 1658 "vp", &pmap_vp_allocator); 1659 /* pool_setlowat(&pmap_vp_pool, 20); */ 1660 1661 pmap_initialized = 1; 1662 } 1663 1664 void 1665 pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len) 1666 { 1667 struct pmap *pm = vm_map_pmap(&pr->ps_vmspace->vm_map); 1668 vaddr_t kva = zero_page + cpu_number() * PAGE_SIZE; 1669 paddr_t pa; 1670 vsize_t clen; 1671 vsize_t off; 1672 1673 /* 1674 * If we're called for the current process, we can simply 1675 * flush the data cache to the point of unification and 1676 * invalidate the instruction cache. 1677 */ 1678 if (pr == curproc->p_p) { 1679 cpu_icache_sync_range(va, len); 1680 return; 1681 } 1682 1683 /* 1684 * Flush and invalidate through an aliased mapping. This 1685 * assumes the instruction cache is PIPT. That is only true 1686 * for some of the hardware we run on. 1687 */ 1688 while (len > 0) { 1689 /* add one to always round up to the next page */ 1690 clen = round_page(va + 1) - va; 1691 if (clen > len) 1692 clen = len; 1693 1694 off = va - trunc_page(va); 1695 if (pmap_extract(pm, trunc_page(va), &pa)) { 1696 pmap_kenter_pa(kva, pa, PROT_READ|PROT_WRITE); 1697 cpu_icache_sync_range(kva + off, clen); 1698 pmap_kremove_pg(kva); 1699 } 1700 1701 len -= clen; 1702 va += clen; 1703 } 1704 } 1705 1706 void 1707 pmap_icache_sync_page(struct pmap *pm, paddr_t pa) 1708 { 1709 vaddr_t kva = zero_page + cpu_number() * PAGE_SIZE; 1710 1711 pmap_kenter_pa(kva, pa, PROT_READ|PROT_WRITE); 1712 cpu_icache_sync_range(kva, PAGE_SIZE); 1713 pmap_kremove_pg(kva); 1714 } 1715 1716 void 1717 pmap_pte_insert(struct pte_desc *pted) 1718 { 1719 pmap_t pm = pted->pted_pmap; 1720 uint64_t *pl3; 1721 1722 if (pmap_vp_lookup(pm, pted->pted_va, &pl3) == NULL) { 1723 panic("%s: have a pted, but missing a vp" 1724 " for %lx va pmap %p", __func__, pted->pted_va, pm); 1725 } 1726 1727 pmap_pte_update(pted, pl3); 1728 } 1729 1730 void 1731 pmap_pte_update(struct pte_desc *pted, uint64_t *pl3) 1732 { 1733 uint64_t pte, access_bits; 1734 pmap_t pm = pted->pted_pmap; 1735 uint64_t attr = ATTR_nG; 1736 1737 /* see mair in locore.S */ 1738 switch (pted->pted_va & PMAP_CACHE_BITS) { 1739 case PMAP_CACHE_WB: 1740 /* inner and outer writeback */ 1741 attr |= ATTR_IDX(PTE_ATTR_WB); 1742 attr |= ATTR_SH(SH_INNER); 1743 break; 1744 case PMAP_CACHE_WT: 1745 /* inner and outer writethrough */ 1746 attr |= ATTR_IDX(PTE_ATTR_WT); 1747 attr |= ATTR_SH(SH_INNER); 1748 break; 1749 case PMAP_CACHE_CI: 1750 attr |= ATTR_IDX(PTE_ATTR_CI); 1751 attr |= ATTR_SH(SH_INNER); 1752 break; 1753 case PMAP_CACHE_DEV_NGNRNE: 1754 attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRNE); 1755 attr |= ATTR_SH(SH_INNER); 1756 break; 1757 case PMAP_CACHE_DEV_NGNRE: 1758 attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRE); 1759 attr |= ATTR_SH(SH_INNER); 1760 break; 1761 default: 1762 panic("%s: invalid cache mode", __func__); 1763 } 1764 1765 if (pm->pm_privileged) 1766 access_bits = ap_bits_kern[pted->pted_pte & PROT_MASK]; 1767 else 1768 access_bits = ap_bits_user[pted->pted_pte & PROT_MASK]; 1769 1770 #ifndef SMALL_KERNEL 1771 access_bits |= pm->pm_guarded; 1772 #endif 1773 1774 pte = (pted->pted_pte & PTE_RPGN) | attr | access_bits | L3_P; 1775 *pl3 = access_bits ? pte : 0; 1776 } 1777 1778 void 1779 pmap_pte_remove(struct pte_desc *pted, int remove_pted) 1780 { 1781 struct pmapvp1 *vp1; 1782 struct pmapvp2 *vp2; 1783 struct pmapvp3 *vp3; 1784 pmap_t pm = pted->pted_pmap; 1785 1786 if (pm->have_4_level_pt) 1787 vp1 = pm->pm_vp.l0->vp[VP_IDX0(pted->pted_va)]; 1788 else 1789 vp1 = pm->pm_vp.l1; 1790 if (vp1 == NULL) { 1791 panic("have a pted, but missing the l1 for %lx va pmap %p", 1792 pted->pted_va, pm); 1793 } 1794 vp2 = vp1->vp[VP_IDX1(pted->pted_va)]; 1795 if (vp2 == NULL) { 1796 panic("have a pted, but missing the l2 for %lx va pmap %p", 1797 pted->pted_va, pm); 1798 } 1799 vp3 = vp2->vp[VP_IDX2(pted->pted_va)]; 1800 if (vp3 == NULL) { 1801 panic("have a pted, but missing the l3 for %lx va pmap %p", 1802 pted->pted_va, pm); 1803 } 1804 vp3->l3[VP_IDX3(pted->pted_va)] = 0; 1805 if (remove_pted) 1806 vp3->vp[VP_IDX3(pted->pted_va)] = NULL; 1807 } 1808 1809 /* 1810 * This function exists to do software referenced/modified emulation. 1811 * Its purpose is to tell the caller that a fault was generated either 1812 * for this emulation, or to tell the caller that it's a legit fault. 1813 */ 1814 int 1815 pmap_fault_fixup(pmap_t pm, vaddr_t va, vm_prot_t ftype) 1816 { 1817 struct pte_desc *pted; 1818 struct vm_page *pg; 1819 paddr_t pa; 1820 uint64_t *pl3 = NULL; 1821 int retcode = 0; 1822 1823 pmap_lock(pm); 1824 1825 /* Every VA needs a pted, even unmanaged ones. */ 1826 pted = pmap_vp_lookup(pm, va, &pl3); 1827 if (!pted || !PTED_VALID(pted)) 1828 goto done; 1829 1830 /* There has to be a PA for the VA, get it. */ 1831 pa = (pted->pted_pte & PTE_RPGN); 1832 1833 /* If it's unmanaged, it must not fault. */ 1834 pg = PHYS_TO_VM_PAGE(pa); 1835 if (pg == NULL) 1836 goto done; 1837 1838 /* 1839 * Check the fault types to find out if we were doing 1840 * any mod/ref emulation and fixup the PTE if we were. 1841 */ 1842 if ((ftype & PROT_WRITE) && /* fault caused by a write */ 1843 !(pted->pted_pte & PROT_WRITE) && /* and write is disabled now */ 1844 (pted->pted_va & PROT_WRITE)) { /* but is supposedly allowed */ 1845 1846 /* 1847 * Page modified emulation. A write always includes 1848 * a reference. This means that we can enable read and 1849 * exec as well, akin to the page reference emulation. 1850 */ 1851 atomic_setbits_int(&pg->pg_flags, PG_PMAP_MOD|PG_PMAP_REF); 1852 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); 1853 1854 /* Thus, enable read, write and exec. */ 1855 pted->pted_pte |= 1856 (pted->pted_va & (PROT_READ|PROT_WRITE|PROT_EXEC)); 1857 } else if ((ftype & PROT_EXEC) && /* fault caused by an exec */ 1858 !(pted->pted_pte & PROT_EXEC) && /* and exec is disabled now */ 1859 (pted->pted_va & PROT_EXEC)) { /* but is supposedly allowed */ 1860 1861 /* 1862 * Exec always includes a reference. Since we now know 1863 * the page has been accessed, we can enable read as well 1864 * if UVM allows it. 1865 */ 1866 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF); 1867 1868 /* Thus, enable read and exec. */ 1869 pted->pted_pte |= (pted->pted_va & (PROT_READ|PROT_EXEC)); 1870 } else if ((ftype & PROT_READ) && /* fault caused by a read */ 1871 !(pted->pted_pte & PROT_READ) && /* and read is disabled now */ 1872 (pted->pted_va & PROT_READ)) { /* but is supposedly allowed */ 1873 1874 /* 1875 * Page referenced emulation. Since we now know the page 1876 * has been accessed, we can enable exec as well if UVM 1877 * allows it. 1878 */ 1879 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF); 1880 1881 /* Thus, enable read and exec. */ 1882 pted->pted_pte |= (pted->pted_va & (PROT_READ|PROT_EXEC)); 1883 } else { 1884 /* didn't catch it, so probably broken */ 1885 goto done; 1886 } 1887 1888 /* 1889 * If this is a page that can be executed, make sure to invalidate 1890 * the instruction cache if the page has been modified or not used 1891 * yet. 1892 */ 1893 if (pted->pted_va & PROT_EXEC) { 1894 if ((pg->pg_flags & PG_PMAP_EXE) == 0) 1895 pmap_icache_sync_page(pm, pa); 1896 atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE); 1897 } 1898 1899 /* We actually made a change, so flush it and sync. */ 1900 pmap_pte_update(pted, pl3); 1901 ttlb_flush(pm, va & ~PAGE_MASK); 1902 1903 retcode = 1; 1904 done: 1905 pmap_unlock(pm); 1906 return retcode; 1907 } 1908 1909 void 1910 pmap_postinit(void) 1911 { 1912 extern char trampoline_vectors[]; 1913 extern char trampoline_vectors_end[]; 1914 paddr_t pa; 1915 vaddr_t minaddr, maxaddr; 1916 u_long npteds, npages; 1917 1918 memset(pmap_tramp.pm_vp.l1, 0, sizeof(struct pmapvp1)); 1919 pmap_extract(pmap_kernel(), (vaddr_t)trampoline_vectors, &pa); 1920 minaddr = (vaddr_t)trampoline_vectors; 1921 maxaddr = (vaddr_t)trampoline_vectors_end; 1922 while (minaddr < maxaddr) { 1923 pmap_enter(&pmap_tramp, minaddr, pa, 1924 PROT_READ | PROT_EXEC, PROT_READ | PROT_EXEC | PMAP_WIRED); 1925 minaddr += PAGE_SIZE; 1926 pa += PAGE_SIZE; 1927 } 1928 1929 /* 1930 * Reserve enough virtual address space to grow the kernel 1931 * page tables. We need a descriptor for each page as well as 1932 * an extra page for level 1/2/3 page tables for management. 1933 * To simplify the code, we always allocate full tables at 1934 * level 3, so take that into account. 1935 */ 1936 npteds = (VM_MAX_KERNEL_ADDRESS - pmap_maxkvaddr + 1) / PAGE_SIZE; 1937 npteds = roundup(npteds, VP_IDX3_CNT); 1938 npages = howmany(npteds, PAGE_SIZE / (sizeof(struct pte_desc))); 1939 npages += 2 * howmany(npteds, VP_IDX3_CNT); 1940 npages += 2 * howmany(npteds, VP_IDX3_CNT * VP_IDX2_CNT); 1941 npages += 2 * howmany(npteds, VP_IDX3_CNT * VP_IDX2_CNT * VP_IDX1_CNT); 1942 1943 /* 1944 * Use an interrupt safe map such that we don't recurse into 1945 * uvm_map() to allocate map entries. 1946 */ 1947 minaddr = vm_map_min(kernel_map); 1948 pmap_kvp_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, 1949 npages * PAGE_SIZE, VM_MAP_INTRSAFE, FALSE, NULL); 1950 } 1951 1952 void 1953 pmap_init_percpu(void) 1954 { 1955 pool_cache_init(&pmap_pted_pool); 1956 pool_cache_init(&pmap_vp_pool); 1957 } 1958 1959 void 1960 pmap_update(pmap_t pm) 1961 { 1962 } 1963 1964 int 1965 pmap_is_referenced(struct vm_page *pg) 1966 { 1967 return ((pg->pg_flags & PG_PMAP_REF) != 0); 1968 } 1969 1970 int 1971 pmap_is_modified(struct vm_page *pg) 1972 { 1973 return ((pg->pg_flags & PG_PMAP_MOD) != 0); 1974 } 1975 1976 int 1977 pmap_clear_modify(struct vm_page *pg) 1978 { 1979 struct pte_desc *pted; 1980 1981 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_MOD); 1982 1983 mtx_enter(&pg->mdpage.pv_mtx); 1984 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) { 1985 pted->pted_pte &= ~PROT_WRITE; 1986 pmap_pte_insert(pted); 1987 ttlb_flush(pted->pted_pmap, pted->pted_va & ~PAGE_MASK); 1988 } 1989 mtx_leave(&pg->mdpage.pv_mtx); 1990 1991 return 0; 1992 } 1993 1994 /* 1995 * When this turns off read permissions it also disables write permissions 1996 * so that mod is correctly tracked after clear_ref; FAULT_READ; FAULT_WRITE; 1997 */ 1998 int 1999 pmap_clear_reference(struct vm_page *pg) 2000 { 2001 struct pte_desc *pted; 2002 2003 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_REF); 2004 2005 mtx_enter(&pg->mdpage.pv_mtx); 2006 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) { 2007 pted->pted_pte &= ~PROT_MASK; 2008 pmap_pte_insert(pted); 2009 ttlb_flush(pted->pted_pmap, pted->pted_va & ~PAGE_MASK); 2010 } 2011 mtx_leave(&pg->mdpage.pv_mtx); 2012 2013 return 0; 2014 } 2015 2016 void 2017 pmap_unwire(pmap_t pm, vaddr_t va) 2018 { 2019 struct pte_desc *pted; 2020 2021 pmap_lock(pm); 2022 pted = pmap_vp_lookup(pm, va, NULL); 2023 if (pted != NULL && PTED_WIRED(pted)) { 2024 pm->pm_stats.wired_count--; 2025 pted->pted_va &= ~PTED_VA_WIRED_M; 2026 } 2027 pmap_unlock(pm); 2028 } 2029 2030 void 2031 pmap_remove_holes(struct vmspace *vm) 2032 { 2033 /* NOOP */ 2034 } 2035 2036 void 2037 pmap_virtual_space(vaddr_t *start, vaddr_t *end) 2038 { 2039 *start = virtual_avail; 2040 *end = VM_MAX_KERNEL_ADDRESS; 2041 2042 /* Prevent further KVA stealing. */ 2043 pmap_virtual_space_called = 1; 2044 } 2045 2046 void 2047 pmap_setup_avail(uint64_t ram_start, uint64_t ram_end, uint64_t kvo) 2048 { 2049 /* This makes several assumptions 2050 * 1) kernel will be located 'low' in memory 2051 * 2) memory will not start at VM_MIN_KERNEL_ADDRESS 2052 * 3) several MB of memory starting just after the kernel will 2053 * be premapped at the kernel address in the bootstrap mappings 2054 * 4) kvo will be the 64 bit number to add to the ram address to 2055 * obtain the kernel virtual mapping of the ram. KVO == PA -> VA 2056 * 5) it is generally assumed that these translations will occur with 2057 * large granularity, at minimum the translation will be page 2058 * aligned, if not 'section' or greater. 2059 */ 2060 2061 pmap_avail_kvo = kvo; 2062 pmap_avail[0].start = ram_start; 2063 pmap_avail[0].size = ram_end-ram_start; 2064 2065 /* XXX - multiple sections */ 2066 physmem = atop(pmap_avail[0].size); 2067 2068 pmap_cnt_avail = 1; 2069 2070 pmap_avail_fixup(); 2071 } 2072 2073 void 2074 pmap_avail_fixup(void) 2075 { 2076 struct mem_region *mp; 2077 vaddr_t align; 2078 vaddr_t end; 2079 2080 mp = pmap_avail; 2081 while(mp->size !=0) { 2082 align = round_page(mp->start); 2083 if (mp->start != align) { 2084 pmap_remove_avail(mp->start, align); 2085 mp = pmap_avail; 2086 continue; 2087 } 2088 end = mp->start+mp->size; 2089 align = trunc_page(end); 2090 if (end != align) { 2091 pmap_remove_avail(align, end); 2092 mp = pmap_avail; 2093 continue; 2094 } 2095 mp++; 2096 } 2097 } 2098 2099 /* remove a given region from avail memory */ 2100 void 2101 pmap_remove_avail(paddr_t base, paddr_t end) 2102 { 2103 struct mem_region *mp; 2104 int i; 2105 long mpend; 2106 2107 /* remove given region from available */ 2108 for (mp = pmap_avail; mp->size; mp++) { 2109 /* 2110 * Check if this region holds all of the region 2111 */ 2112 mpend = mp->start + mp->size; 2113 if (base > mpend) { 2114 continue; 2115 } 2116 if (base <= mp->start) { 2117 if (end <= mp->start) 2118 break; /* region not present -??? */ 2119 2120 if (end >= mpend) { 2121 /* covers whole region */ 2122 /* shorten */ 2123 for (i = mp - pmap_avail; 2124 i < pmap_cnt_avail; 2125 i++) { 2126 pmap_avail[i] = pmap_avail[i+1]; 2127 } 2128 pmap_cnt_avail--; 2129 pmap_avail[pmap_cnt_avail].size = 0; 2130 } else { 2131 mp->start = end; 2132 mp->size = mpend - end; 2133 } 2134 } else { 2135 /* start after the beginning */ 2136 if (end >= mpend) { 2137 /* just truncate */ 2138 mp->size = base - mp->start; 2139 } else { 2140 /* split */ 2141 for (i = pmap_cnt_avail; 2142 i > (mp - pmap_avail); 2143 i--) { 2144 pmap_avail[i] = pmap_avail[i - 1]; 2145 } 2146 pmap_cnt_avail++; 2147 mp->size = base - mp->start; 2148 mp++; 2149 mp->start = end; 2150 mp->size = mpend - end; 2151 } 2152 } 2153 } 2154 for (mp = pmap_allocated; mp->size != 0; mp++) { 2155 if (base < mp->start) { 2156 if (end == mp->start) { 2157 mp->start = base; 2158 mp->size += end - base; 2159 break; 2160 } 2161 /* lengthen */ 2162 for (i = pmap_cnt_allocated; i > (mp - pmap_allocated); 2163 i--) { 2164 pmap_allocated[i] = pmap_allocated[i - 1]; 2165 } 2166 pmap_cnt_allocated++; 2167 mp->start = base; 2168 mp->size = end - base; 2169 return; 2170 } 2171 if (base == (mp->start + mp->size)) { 2172 mp->size += end - base; 2173 return; 2174 } 2175 } 2176 if (mp->size == 0) { 2177 mp->start = base; 2178 mp->size = end - base; 2179 pmap_cnt_allocated++; 2180 } 2181 } 2182 2183 /* XXX - this zeros pages via their physical address */ 2184 paddr_t 2185 pmap_steal_avail(size_t size, int align, void **kva) 2186 { 2187 struct mem_region *mp; 2188 long start; 2189 long remsize; 2190 2191 for (mp = pmap_avail; mp->size; mp++) { 2192 if (mp->size > size) { 2193 start = (mp->start + (align -1)) & ~(align -1); 2194 remsize = mp->size - (start - mp->start); 2195 if (remsize >= 0) { 2196 pmap_remove_avail(start, start+size); 2197 if (kva != NULL){ 2198 *kva = (void *)(start - pmap_avail_kvo); 2199 } 2200 bzero((void*)(start), size); 2201 return start; 2202 } 2203 } 2204 } 2205 panic ("unable to allocate region with size %lx align %x", 2206 size, align); 2207 } 2208 2209 vaddr_t 2210 pmap_map_stolen(vaddr_t kernel_start) 2211 { 2212 struct mem_region *mp; 2213 paddr_t pa; 2214 vaddr_t va; 2215 uint64_t e; 2216 2217 for (mp = pmap_allocated; mp->size; mp++) { 2218 for (e = 0; e < mp->size; e += PAGE_SIZE) { 2219 int prot = PROT_READ | PROT_WRITE; 2220 2221 pa = mp->start + e; 2222 va = pa - pmap_avail_kvo; 2223 2224 if (va < VM_MIN_KERNEL_ADDRESS || 2225 va >= VM_MAX_KERNEL_ADDRESS) 2226 continue; 2227 2228 if (va >= (vaddr_t)__text_start && 2229 va < (vaddr_t)_etext) 2230 prot = PROT_READ | PROT_EXEC; 2231 else if (va >= (vaddr_t)__rodata_start && 2232 va < (vaddr_t)_erodata) 2233 prot = PROT_READ; 2234 2235 pmap_kenter_cache(va, pa, prot, PMAP_CACHE_WB); 2236 } 2237 } 2238 2239 return va + PAGE_SIZE; 2240 } 2241 2242 void 2243 pmap_physload_avail(void) 2244 { 2245 struct mem_region *mp; 2246 uint64_t start, end; 2247 2248 for (mp = pmap_avail; mp->size; mp++) { 2249 if (mp->size < PAGE_SIZE) { 2250 printf(" skipped - too small\n"); 2251 continue; 2252 } 2253 start = mp->start; 2254 if (start & PAGE_MASK) { 2255 start = PAGE_SIZE + (start & PMAP_PA_MASK); 2256 } 2257 end = mp->start + mp->size; 2258 if (end & PAGE_MASK) { 2259 end = (end & PMAP_PA_MASK); 2260 } 2261 uvm_page_physload(atop(start), atop(end), 2262 atop(start), atop(end), 0); 2263 2264 } 2265 } 2266 2267 void 2268 pmap_show_mapping(uint64_t va) 2269 { 2270 struct pmapvp1 *vp1; 2271 struct pmapvp2 *vp2; 2272 struct pmapvp3 *vp3; 2273 struct pte_desc *pted; 2274 struct pmap *pm; 2275 uint64_t ttbr0, tcr; 2276 2277 printf("showing mapping of %llx\n", va); 2278 2279 if (va & 1ULL << 63) 2280 pm = pmap_kernel(); 2281 else 2282 pm = curproc->p_vmspace->vm_map.pmap; 2283 2284 if (pm->have_4_level_pt) { 2285 printf(" vp0 = %p off %x\n", pm->pm_vp.l0, VP_IDX0(va)*8); 2286 vp1 = pm->pm_vp.l0->vp[VP_IDX0(va)]; 2287 if (vp1 == NULL) 2288 return; 2289 } else { 2290 vp1 = pm->pm_vp.l1; 2291 } 2292 2293 __asm volatile ("mrs %x0, ttbr0_el1" : "=r"(ttbr0)); 2294 __asm volatile ("mrs %x0, tcr_el1" : "=r"(tcr)); 2295 printf(" ttbr0 %llx %llx tcr %llx\n", ttbr0, pm->pm_pt0pa, tcr); 2296 printf(" vp1 = %p\n", vp1); 2297 2298 vp2 = vp1->vp[VP_IDX1(va)]; 2299 printf(" vp2 = %p lp2 = %llx idx1 off %x\n", 2300 vp2, vp1->l1[VP_IDX1(va)], VP_IDX1(va)*8); 2301 if (vp2 == NULL) 2302 return; 2303 2304 vp3 = vp2->vp[VP_IDX2(va)]; 2305 printf(" vp3 = %p lp3 = %llx idx2 off %x\n", 2306 vp3, vp2->l2[VP_IDX2(va)], VP_IDX2(va)*8); 2307 if (vp3 == NULL) 2308 return; 2309 2310 pted = vp3->vp[VP_IDX3(va)]; 2311 printf(" pted = %p lp3 = %llx idx3 off %x\n", 2312 pted, vp3->l3[VP_IDX3(va)], VP_IDX3(va)*8); 2313 } 2314 2315 __attribute__((target("+pauth"))) 2316 void 2317 pmap_setpauthkeys(struct pmap *pm) 2318 { 2319 if (ID_AA64ISAR1_APA(cpu_id_aa64isar1) >= ID_AA64ISAR1_APA_PAC || 2320 ID_AA64ISAR1_API(cpu_id_aa64isar1) >= ID_AA64ISAR1_API_PAC) { 2321 __asm volatile ("msr apiakeylo_el1, %0" 2322 :: "r"(pm->pm_apiakey[0])); 2323 __asm volatile ("msr apiakeyhi_el1, %0" 2324 :: "r"(pm->pm_apiakey[1])); 2325 __asm volatile ("msr apdakeylo_el1, %0" 2326 :: "r"(pm->pm_apdakey[0])); 2327 __asm volatile ("msr apdakeyhi_el1, %0" 2328 :: "r"(pm->pm_apdakey[1])); 2329 __asm volatile ("msr apibkeylo_el1, %0" 2330 :: "r"(pm->pm_apibkey[0])); 2331 __asm volatile ("msr apibkeyhi_el1, %0" 2332 :: "r"(pm->pm_apibkey[1])); 2333 __asm volatile ("msr apdbkeylo_el1, %0" 2334 :: "r"(pm->pm_apdbkey[0])); 2335 __asm volatile ("msr apdbkeyhi_el1, %0" 2336 :: "r"(pm->pm_apdbkey[1])); 2337 } 2338 2339 if (ID_AA64ISAR1_GPA(cpu_id_aa64isar1) >= ID_AA64ISAR1_GPA_IMPL || 2340 ID_AA64ISAR1_GPI(cpu_id_aa64isar1) >= ID_AA64ISAR1_GPI_IMPL) { 2341 __asm volatile ("msr apgakeylo_el1, %0" 2342 :: "r"(pm->pm_apgakey[0])); 2343 __asm volatile ("msr apgakeyhi_el1, %0" 2344 :: "r"(pm->pm_apgakey[1])); 2345 } 2346 } 2347 2348 void 2349 pmap_setttb(struct proc *p) 2350 { 2351 struct cpu_info *ci = curcpu(); 2352 pmap_t pm = p->p_vmspace->vm_map.pmap; 2353 2354 /* 2355 * If the generation of the ASID for the new pmap doesn't 2356 * match the current generation, allocate a new ASID. 2357 */ 2358 if (pm != pmap_kernel() && 2359 (pm->pm_asid & ~PMAP_ASID_MASK) != READ_ONCE(pmap_asid_gen)) 2360 pmap_allocate_asid(pm); 2361 2362 if (pm != pmap_kernel()) 2363 pmap_setpauthkeys(pm); 2364 2365 WRITE_SPECIALREG(ttbr0_el1, pmap_kernel()->pm_pt0pa); 2366 __asm volatile("isb"); 2367 cpu_setttb(pm->pm_asid, pm->pm_pt0pa); 2368 ci->ci_curpm = pm; 2369 ci->ci_flush_bp(); 2370 } 2371