1 /* $OpenBSD: pmap.c,v 1.92 2024/08/23 15:14:45 miod Exp $ */ 2 /* $NetBSD: pmap.c,v 1.154 2000/12/07 22:18:55 thorpej Exp $ */ 3 4 /*- 5 * Copyright (c) 1998, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center and by Chris G. Demetriou. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1991, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * the Systems Programming Group of the University of Utah Computer 40 * Science Department. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)pmap.c 8.6 (Berkeley) 5/27/94 67 */ 68 69 /* 70 * DEC Alpha physical map management code. 71 * 72 * History: 73 * 74 * This pmap started life as a Motorola 68851/68030 pmap, 75 * written by Mike Hibler at the University of Utah. 76 * 77 * It was modified for the DEC Alpha by Chris Demetriou 78 * at Carnegie Mellon University. 79 * 80 * Support for non-contiguous physical memory was added by 81 * Jason R. Thorpe of the Numerical Aerospace Simulation 82 * Facility, NASA Ames Research Center and Chris Demetriou. 83 * 84 * Page table management and a major cleanup were undertaken 85 * by Jason R. Thorpe, with lots of help from Ross Harvey of 86 * Avalon Computer Systems and from Chris Demetriou. 87 * 88 * Support for the new UVM pmap interface was written by 89 * Jason R. Thorpe. 90 * 91 * Support for ASNs was written by Jason R. Thorpe, again 92 * with help from Chris Demetriou and Ross Harvey. 93 * 94 * The locking protocol was written by Jason R. Thorpe, 95 * using Chuck Cranor's i386 pmap for UVM as a model. 96 * 97 * TLB shootdown code was written by Jason R. Thorpe. 98 * 99 * Notes: 100 * 101 * All page table access is done via K0SEG. The one exception 102 * to this is for kernel mappings. Since all kernel page 103 * tables are pre-allocated, we can use the Virtual Page Table 104 * to access PTEs that map K1SEG addresses. 105 * 106 * Kernel page table pages are statically allocated in 107 * pmap_bootstrap(), and are never freed. In the future, 108 * support for dynamically adding additional kernel page 109 * table pages may be added. User page table pages are 110 * dynamically allocated and freed. 111 * 112 * Bugs/misfeatures: 113 * 114 * - Some things could be optimized. 115 */ 116 117 /* 118 * Manages physical address maps. 119 * 120 * Since the information managed by this module is 121 * also stored by the logical address mapping module, 122 * this module may throw away valid virtual-to-physical 123 * mappings at almost any time. However, invalidations 124 * of virtual-to-physical mappings must be done as 125 * requested. 126 * 127 * In order to cope with hardware architectures which 128 * make virtual-to-physical map invalidates expensive, 129 * this module may delay invalidate or reduced protection 130 * operations until such time as they are actually 131 * necessary. This module is given full information as 132 * to which processors are currently using which maps, 133 * and to when physical maps must be made correct. 134 */ 135 136 #include <sys/param.h> 137 #include <sys/systm.h> 138 #include <sys/proc.h> 139 #include <sys/malloc.h> 140 #include <sys/pool.h> 141 #include <sys/user.h> 142 #include <sys/buf.h> 143 #include <sys/atomic.h> 144 #ifdef SYSVSHM 145 #include <sys/shm.h> 146 #endif 147 148 #include <uvm/uvm.h> 149 150 #include <machine/atomic.h> 151 #include <machine/cpu.h> 152 #if defined(MULTIPROCESSOR) 153 #include <machine/rpb.h> 154 #endif 155 156 #ifdef DEBUG 157 #define PDB_FOLLOW 0x0001 158 #define PDB_INIT 0x0002 159 #define PDB_ENTER 0x0004 160 #define PDB_REMOVE 0x0008 161 #define PDB_CREATE 0x0010 162 #define PDB_PTPAGE 0x0020 163 #define PDB_ASN 0x0040 164 #define PDB_BITS 0x0080 165 #define PDB_COLLECT 0x0100 166 #define PDB_PROTECT 0x0200 167 #define PDB_BOOTSTRAP 0x1000 168 #define PDB_PARANOIA 0x2000 169 #define PDB_WIRING 0x4000 170 #define PDB_PVDUMP 0x8000 171 172 int debugmap = 0; 173 int pmapdebug = PDB_PARANOIA|PDB_FOLLOW|PDB_ENTER; 174 #endif 175 176 /* 177 * Given a map and a machine independent protection code, 178 * convert to an alpha protection code. 179 */ 180 #define pte_prot(m, p) (protection_codes[m == pmap_kernel() ? 0 : 1][p]) 181 int protection_codes[2][8]; 182 183 /* 184 * kernel_lev1map: 185 * 186 * Kernel level 1 page table. This maps all kernel level 2 187 * page table pages, and is used as a template for all user 188 * pmap level 1 page tables. When a new user level 1 page 189 * table is allocated, all kernel_lev1map PTEs for kernel 190 * addresses are copied to the new map. 191 * 192 * The kernel also has an initial set of kernel level 2 page 193 * table pages. These map the kernel level 3 page table pages. 194 * As kernel level 3 page table pages are added, more level 2 195 * page table pages may be added to map them. These pages are 196 * never freed. 197 * 198 * Finally, the kernel also has an initial set of kernel level 199 * 3 page table pages. These map pages in K1SEG. More level 200 * 3 page table pages may be added at run-time if additional 201 * K1SEG address space is required. These pages are never freed. 202 * 203 * NOTE: When mappings are inserted into the kernel pmap, all 204 * level 2 and level 3 page table pages must already be allocated 205 * and mapped into the parent page table. 206 */ 207 pt_entry_t *kernel_lev1map; 208 209 /* 210 * Virtual Page Table. 211 */ 212 pt_entry_t *VPT; 213 214 struct pmap kernel_pmap_store 215 [(PMAP_SIZEOF(ALPHA_MAXPROCS) + sizeof(struct pmap) - 1) 216 / sizeof(struct pmap)]; 217 218 paddr_t avail_start; /* PA of first available physical page */ 219 paddr_t avail_end; /* PA of last available physical page */ 220 vaddr_t pmap_maxkvaddr; /* VA of last avail page (pmap_growkernel) */ 221 222 boolean_t pmap_initialized; /* Has pmap_init completed? */ 223 224 u_long pmap_pages_stolen; /* instrumentation */ 225 226 /* 227 * This variable contains the number of CPU IDs we need to allocate 228 * space for when allocating the pmap structure. It is used to 229 * size a per-CPU array of ASN and ASN Generation number. 230 */ 231 u_long pmap_ncpuids; 232 233 #ifndef PMAP_PV_LOWAT 234 #define PMAP_PV_LOWAT 16 235 #endif 236 int pmap_pv_lowat = PMAP_PV_LOWAT; 237 238 /* 239 * List of all pmaps, used to update them when e.g. additional kernel 240 * page tables are allocated. This list is kept LRU-ordered by 241 * pmap_activate(). 242 */ 243 TAILQ_HEAD(, pmap) pmap_all_pmaps; 244 245 /* 246 * The pools from which pmap structures and sub-structures are allocated. 247 */ 248 struct pool pmap_pmap_pool; 249 struct pool pmap_l1pt_pool; 250 struct pool pmap_pv_pool; 251 252 /* 253 * Address Space Numbers. 254 * 255 * On many implementations of the Alpha architecture, the TLB entries and 256 * I-cache blocks are tagged with a unique number within an implementation- 257 * specified range. When a process context becomes active, the ASN is used 258 * to match TLB entries; if a TLB entry for a particular VA does not match 259 * the current ASN, it is ignored (one could think of the processor as 260 * having a collection of <max ASN> separate TLBs). This allows operating 261 * system software to skip the TLB flush that would otherwise be necessary 262 * at context switch time. 263 * 264 * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that 265 * causes TLB entries to match any ASN. The PALcode also provides 266 * a TBI (Translation Buffer Invalidate) operation that flushes all 267 * TLB entries that _do not_ have PG_ASM. We use this bit for kernel 268 * mappings, so that invalidation of all user mappings does not invalidate 269 * kernel mappings (which are consistent across all processes). 270 * 271 * pma_asn always indicates to the next ASN to use. When 272 * pma_asn exceeds pmap_max_asn, we start a new ASN generation. 273 * 274 * When a new ASN generation is created, the per-process (i.e. non-PG_ASM) 275 * TLB entries and the I-cache are flushed, the generation number is bumped, 276 * and pma_asn is changed to indicate the first non-reserved ASN. 277 * 278 * We reserve ASN #0 for pmaps that use the global kernel_lev1map. This 279 * prevents the following scenario: 280 * 281 * * New ASN generation starts, and process A is given ASN #0. 282 * 283 * * A new process B (and thus new pmap) is created. The ASN, 284 * for lack of a better value, is initialized to 0. 285 * 286 * * Process B runs. It is now using the TLB entries tagged 287 * by process A. *poof* 288 * 289 * In the scenario above, in addition to the processor using incorrect 290 * TLB entries, the PALcode might use incorrect information to service a 291 * TLB miss. (The PALcode uses the recursively mapped Virtual Page Table 292 * to locate the PTE for a faulting address, and tagged TLB entries exist 293 * for the Virtual Page Table addresses in order to speed up this procedure, 294 * as well.) 295 * 296 * By reserving an ASN for kernel_lev1map users, we are guaranteeing that 297 * new pmaps will initially run with no TLB entries for user addresses 298 * or VPT mappings that map user page tables. Since kernel_lev1map only 299 * contains mappings for kernel addresses, and since those mappings 300 * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is 301 * safe (since PG_ASM mappings match any ASN). 302 * 303 * On processors that do not support ASNs, the PALcode invalidates 304 * the TLB and I-cache automatically on swpctx. We still go 305 * through the motions of assigning an ASN (really, just refreshing 306 * the ASN generation in this particular case) to keep the logic sane 307 * in other parts of the code. 308 */ 309 u_int pmap_max_asn; /* max ASN supported by the system */ 310 /* next ASN and current ASN generation */ 311 struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS]; 312 313 /* 314 * Locking: 315 * 316 * * pm_mtx (per-pmap) - This lock protects all of the members 317 * of the pmap structure itself. 318 * 319 * * pvh_mtx (per-page) - This locks protects the list of mappings 320 * of a (managed) physical page. 321 * 322 * * pmap_all_pmaps_mtx - This lock protects the global list of 323 * all pmaps. Note that a pm_slock must never be held while this 324 * lock is held. 325 * 326 * * pmap_growkernel_mtx - This lock protects pmap_growkernel() 327 * and the pmap_maxkvaddr variable. 328 * 329 * There is a lock ordering constraint for pmap_growkernel_mtx. 330 * pmap_growkernel() acquires the locks in the following order: 331 * 332 * pmap_growkernel_mtx -> pmap_all_pmaps_mtx -> 333 * pmap->pm_mtx 334 * 335 * Address space number management (global ASN counters and per-pmap 336 * ASN state) are not locked; they use arrays of values indexed 337 * per-processor. 338 * 339 * All internal functions which operate on a pmap are called 340 * with the pmap already locked by the caller (which will be 341 * an interface function). 342 */ 343 struct mutex pmap_all_pmaps_mtx; 344 struct mutex pmap_growkernel_mtx; 345 346 #define PMAP_LOCK(pmap) mtx_enter(&pmap->pm_mtx) 347 #define PMAP_UNLOCK(pmap) mtx_leave(&pmap->pm_mtx) 348 349 #if defined(MULTIPROCESSOR) 350 /* 351 * TLB Shootdown: 352 * 353 * When a mapping is changed in a pmap, the TLB entry corresponding to 354 * the virtual address must be invalidated on all processors. In order 355 * to accomplish this on systems with multiple processors, messages are 356 * sent from the processor which performs the mapping change to all 357 * processors on which the pmap is active. For other processors, the 358 * ASN generation numbers for that processor is invalidated, so that 359 * the next time the pmap is activated on that processor, a new ASN 360 * will be allocated (which implicitly invalidates all TLB entries). 361 * 362 * Note, we can use the pool allocator to allocate job entries 363 * since pool pages are mapped with K0SEG, not with the TLB. 364 */ 365 struct pmap_tlb_shootdown_job { 366 TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list; 367 vaddr_t pj_va; /* virtual address */ 368 pmap_t pj_pmap; /* the pmap which maps the address */ 369 pt_entry_t pj_pte; /* the PTE bits */ 370 }; 371 372 /* If we have more pending jobs than this, we just nail the whole TLB. */ 373 #define PMAP_TLB_SHOOTDOWN_MAXJOBS 6 374 375 struct pmap_tlb_shootdown_q { 376 TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head; 377 TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_free; 378 int pq_pte; /* aggregate low PTE bits */ 379 int pq_tbia; /* pending global flush */ 380 struct mutex pq_mtx; /* queue lock */ 381 struct pmap_tlb_shootdown_job pq_jobs[PMAP_TLB_SHOOTDOWN_MAXJOBS]; 382 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS]; 383 384 #define PSJQ_LOCK(pq, s) mtx_enter(&(pq)->pq_mtx) 385 #define PSJQ_UNLOCK(pq, s) mtx_leave(&(pq)->pq_mtx) 386 387 void pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *); 388 struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get 389 (struct pmap_tlb_shootdown_q *); 390 void pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *, 391 struct pmap_tlb_shootdown_job *); 392 #endif /* MULTIPROCESSOR */ 393 394 #define PAGE_IS_MANAGED(pa) (vm_physseg_find(atop(pa), NULL) != -1) 395 396 /* 397 * Internal routines 398 */ 399 void alpha_protection_init(void); 400 void pmap_do_remove(pmap_t, vaddr_t, vaddr_t, boolean_t); 401 boolean_t pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, 402 boolean_t, cpuid_t); 403 void pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, cpuid_t); 404 405 /* 406 * PT page management functions. 407 */ 408 int pmap_lev1map_create(pmap_t, cpuid_t); 409 void pmap_lev1map_destroy(pmap_t); 410 int pmap_ptpage_alloc(pmap_t, pt_entry_t *, int); 411 void pmap_ptpage_free(pmap_t, pt_entry_t *); 412 void pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, cpuid_t); 413 void pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *); 414 void pmap_l1pt_delref(pmap_t, pt_entry_t *); 415 416 void *pmap_l1pt_alloc(struct pool *, int, int *); 417 void pmap_l1pt_free(struct pool *, void *); 418 419 struct pool_allocator pmap_l1pt_allocator = { 420 pmap_l1pt_alloc, pmap_l1pt_free, 0, 421 }; 422 423 void pmap_l1pt_ctor(pt_entry_t *); 424 425 /* 426 * PV table management functions. 427 */ 428 int pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *, 429 boolean_t); 430 void pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, boolean_t); 431 void *pmap_pv_page_alloc(struct pool *, int, int *); 432 void pmap_pv_page_free(struct pool *, void *); 433 434 struct pool_allocator pmap_pv_page_allocator = { 435 pmap_pv_page_alloc, pmap_pv_page_free, 0, 436 }; 437 438 #ifdef DEBUG 439 void pmap_pv_dump(paddr_t); 440 #endif 441 442 #define pmap_pv_alloc() pool_get(&pmap_pv_pool, PR_NOWAIT) 443 #define pmap_pv_free(pv) pool_put(&pmap_pv_pool, (pv)) 444 445 /* 446 * ASN management functions. 447 */ 448 void pmap_asn_alloc(pmap_t, cpuid_t); 449 450 /* 451 * Misc. functions. 452 */ 453 boolean_t pmap_physpage_alloc(int, paddr_t *); 454 void pmap_physpage_free(paddr_t); 455 int pmap_physpage_addref(void *); 456 int pmap_physpage_delref(void *); 457 458 /* pmap_physpage_alloc() page usage */ 459 #define PGU_NORMAL 0 /* free or normal use */ 460 #define PGU_PVENT 1 /* PV entries */ 461 #define PGU_L1PT 2 /* level 1 page table */ 462 #define PGU_L2PT 3 /* level 2 page table */ 463 #define PGU_L3PT 4 /* level 3 page table */ 464 465 /* 466 * PMAP_ISACTIVE{,_TEST}: 467 * 468 * Check to see if a pmap is active on the current processor. 469 */ 470 #define PMAP_ISACTIVE_TEST(pm, cpu_id) \ 471 (((pm)->pm_cpus & (1UL << (cpu_id))) != 0) 472 473 #if defined(DEBUG) && !defined(MULTIPROCESSOR) 474 #define PMAP_ISACTIVE(pm, cpu_id) \ 475 ({ \ 476 /* \ 477 * XXX This test is not MP-safe. \ 478 */ \ 479 int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id); \ 480 \ 481 if (curproc != NULL && curproc->p_vmspace != NULL && \ 482 (pm) != pmap_kernel() && \ 483 (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap))) \ 484 panic("PMAP_ISACTIVE, isa: %d pm: %p curpm:%p", \ 485 isactive_, (pm), curproc->p_vmspace->vm_map.pmap); \ 486 (isactive_); \ 487 }) 488 #else 489 #define PMAP_ISACTIVE(pm, cpu_id) PMAP_ISACTIVE_TEST(pm, cpu_id) 490 #endif /* DEBUG && !MULTIPROCESSOR */ 491 492 /* 493 * PMAP_ACTIVATE_ASN_SANITY: 494 * 495 * DEBUG sanity checks for ASNs within PMAP_ACTIVATE. 496 */ 497 #ifdef DEBUG 498 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) \ 499 do { \ 500 struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)]; \ 501 struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)]; \ 502 \ 503 if ((pmap)->pm_lev1map == kernel_lev1map) { \ 504 /* \ 505 * This pmap implementation also ensures that pmaps \ 506 * referencing kernel_lev1map use a reserved ASN \ 507 * ASN to prevent the PALcode from servicing a TLB \ 508 * miss with the wrong PTE. \ 509 */ \ 510 if (__pma->pma_asn != PMAP_ASN_RESERVED) { \ 511 printf("kernel_lev1map with non-reserved ASN " \ 512 "(line %d)\n", __LINE__); \ 513 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 514 } \ 515 } else { \ 516 if (__pma->pma_asngen != __cpma->pma_asngen) { \ 517 /* \ 518 * ASN generation number isn't valid! \ 519 */ \ 520 printf("pmap asngen %lu, current %lu " \ 521 "(line %d)\n", \ 522 __pma->pma_asngen, \ 523 __cpma->pma_asngen, \ 524 __LINE__); \ 525 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 526 } \ 527 if (__pma->pma_asn == PMAP_ASN_RESERVED) { \ 528 /* \ 529 * DANGER WILL ROBINSON! We're going to \ 530 * pollute the VPT TLB entries! \ 531 */ \ 532 printf("Using reserved ASN! (line %d)\n", \ 533 __LINE__); \ 534 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 535 } \ 536 } \ 537 } while (0) 538 #else 539 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) /* nothing */ 540 #endif 541 542 /* 543 * PMAP_ACTIVATE: 544 * 545 * This is essentially the guts of pmap_activate(), without 546 * ASN allocation. This is used by pmap_activate(), 547 * pmap_lev1map_create(), and pmap_lev1map_destroy(). 548 * 549 * This is called only when it is known that a pmap is "active" 550 * on the current processor; the ASN must already be valid. 551 */ 552 #define PMAP_ACTIVATE(pmap, p, cpu_id) \ 553 do { \ 554 PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id); \ 555 \ 556 (p)->p_addr->u_pcb.pcb_hw.apcb_ptbr = \ 557 ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \ 558 (p)->p_addr->u_pcb.pcb_hw.apcb_asn = \ 559 (pmap)->pm_asni[(cpu_id)].pma_asn; \ 560 \ 561 if ((p) == curproc) { \ 562 /* \ 563 * Page table base register has changed; switch to \ 564 * our own context again so that it will take effect. \ 565 */ \ 566 (void) alpha_pal_swpctx((u_long)p->p_md.md_pcbpaddr); \ 567 } \ 568 } while (0) 569 570 /* 571 * PMAP_SET_NEEDISYNC: 572 * 573 * Mark that a user pmap needs an I-stream synch on its 574 * way back out to userspace. 575 */ 576 #define PMAP_SET_NEEDISYNC(pmap) (pmap)->pm_needisync = ~0UL 577 578 /* 579 * PMAP_SYNC_ISTREAM: 580 * 581 * Synchronize the I-stream for the specified pmap. For user 582 * pmaps, this is deferred until a process using the pmap returns 583 * to userspace. 584 */ 585 #if defined(MULTIPROCESSOR) 586 #define PMAP_SYNC_ISTREAM_KERNEL() \ 587 do { \ 588 alpha_pal_imb(); \ 589 alpha_broadcast_ipi(ALPHA_IPI_IMB); \ 590 } while (0) 591 592 #define PMAP_SYNC_ISTREAM_USER(pmap) \ 593 do { \ 594 alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST); \ 595 /* for curcpu, do it before userret() */ \ 596 } while (0) 597 #else 598 #define PMAP_SYNC_ISTREAM_KERNEL() alpha_pal_imb() 599 #define PMAP_SYNC_ISTREAM_USER(pmap) /* done before userret() */ 600 #endif /* MULTIPROCESSOR */ 601 602 #define PMAP_SYNC_ISTREAM(pmap) \ 603 do { \ 604 if ((pmap) == pmap_kernel()) \ 605 PMAP_SYNC_ISTREAM_KERNEL(); \ 606 else \ 607 PMAP_SYNC_ISTREAM_USER(pmap); \ 608 } while (0) 609 610 /* 611 * PMAP_INVALIDATE_ASN: 612 * 613 * Invalidate the specified pmap's ASN, so as to force allocation 614 * of a new one the next time pmap_asn_alloc() is called. 615 * 616 * NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING 617 * CONDITIONS ARE TRUE: 618 * 619 * (1) The pmap references the global kernel_lev1map. 620 * 621 * (2) The pmap is not active on the current processor. 622 */ 623 #define PMAP_INVALIDATE_ASN(pmap, cpu_id) \ 624 do { \ 625 (pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED; \ 626 } while (0) 627 628 /* 629 * PMAP_INVALIDATE_TLB: 630 * 631 * Invalidate the TLB entry for the pmap/va pair. 632 */ 633 #define PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id) \ 634 do { \ 635 if ((hadasm) || (isactive)) { \ 636 /* \ 637 * Simply invalidating the TLB entry and I-cache \ 638 * works in this case. \ 639 */ \ 640 ALPHA_TBIS((va)); \ 641 } else if ((pmap)->pm_asni[(cpu_id)].pma_asngen == \ 642 pmap_asn_info[(cpu_id)].pma_asngen) { \ 643 /* \ 644 * We can't directly invalidate the TLB entry \ 645 * in this case, so we have to force allocation \ 646 * of a new ASN the next time this pmap becomes \ 647 * active. \ 648 */ \ 649 PMAP_INVALIDATE_ASN((pmap), (cpu_id)); \ 650 } \ 651 /* \ 652 * Nothing to do in this case; the next time the \ 653 * pmap becomes active on this processor, a new \ 654 * ASN will be allocated anyway. \ 655 */ \ 656 } while (0) 657 658 /* 659 * PMAP_KERNEL_PTE: 660 * 661 * Get a kernel PTE. 662 * 663 * If debugging, do a table walk. If not debugging, just use 664 * the Virtual Page Table, since all kernel page tables are 665 * pre-allocated and mapped in. 666 */ 667 #ifdef DEBUG 668 #define PMAP_KERNEL_PTE(va) \ 669 ({ \ 670 pt_entry_t *l1pte_, *l2pte_; \ 671 \ 672 l1pte_ = pmap_l1pte(pmap_kernel(), va); \ 673 if (pmap_pte_v(l1pte_) == 0) { \ 674 printf("kernel level 1 PTE not valid, va 0x%lx " \ 675 "(line %d)\n", (va), __LINE__); \ 676 panic("PMAP_KERNEL_PTE"); \ 677 } \ 678 l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_); \ 679 if (pmap_pte_v(l2pte_) == 0) { \ 680 printf("kernel level 2 PTE not valid, va 0x%lx " \ 681 "(line %d)\n", (va), __LINE__); \ 682 panic("PMAP_KERNEL_PTE"); \ 683 } \ 684 pmap_l3pte(pmap_kernel(), va, l2pte_); \ 685 }) 686 #else 687 #define PMAP_KERNEL_PTE(va) (&VPT[VPT_INDEX((va))]) 688 #endif 689 690 /* 691 * PMAP_SET_PTE: 692 * 693 * Set a PTE to a specified value. 694 */ 695 #define PMAP_SET_PTE(ptep, val) *(ptep) = (val) 696 697 /* 698 * PMAP_STAT_{INCR,DECR}: 699 * 700 * Increment or decrement a pmap statistic. 701 */ 702 #define PMAP_STAT_INCR(s, v) atomic_add_ulong((unsigned long *)(&(s)), (v)) 703 #define PMAP_STAT_DECR(s, v) atomic_sub_ulong((unsigned long *)(&(s)), (v)) 704 705 /* 706 * pmap_bootstrap: 707 * 708 * Bootstrap the system to run with virtual memory. 709 * 710 * Note: no locking is necessary in this function. 711 */ 712 void 713 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) 714 { 715 vsize_t lev2mapsize, lev3mapsize; 716 pt_entry_t *lev2map, *lev3map; 717 pt_entry_t pte; 718 int i; 719 #ifdef MULTIPROCESSOR 720 int j; 721 #endif 722 723 #ifdef DEBUG 724 if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP)) 725 printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn); 726 #endif 727 728 /* 729 * Compute the number of pages kmem_map will have. 730 */ 731 kmeminit_nkmempages(); 732 733 /* 734 * Figure out how many PTEs are necessary to map the kernel. 735 */ 736 lev3mapsize = (VM_PHYS_SIZE + 16 * NCARGS + PAGER_MAP_SIZE) / 737 PAGE_SIZE + (maxthread * UPAGES) + nkmempages; 738 739 #ifdef SYSVSHM 740 lev3mapsize += shminfo.shmall; 741 #endif 742 lev3mapsize = roundup(lev3mapsize, NPTEPG); 743 744 /* 745 * Allocate a level 1 PTE table for the kernel. 746 * This is always one page long. 747 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 748 */ 749 kernel_lev1map = (pt_entry_t *) 750 pmap_steal_memory(sizeof(pt_entry_t) * NPTEPG, NULL, NULL); 751 752 /* 753 * Allocate a level 2 PTE table for the kernel. 754 * These must map all of the level3 PTEs. 755 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 756 */ 757 lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG); 758 lev2map = (pt_entry_t *) 759 pmap_steal_memory(sizeof(pt_entry_t) * lev2mapsize, NULL, NULL); 760 761 /* 762 * Allocate a level 3 PTE table for the kernel. 763 * Contains lev3mapsize PTEs. 764 */ 765 lev3map = (pt_entry_t *) 766 pmap_steal_memory(sizeof(pt_entry_t) * lev3mapsize, NULL, NULL); 767 768 /* 769 * Set up level 1 page table 770 */ 771 772 /* Map all of the level 2 pte pages */ 773 for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) { 774 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) + 775 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 776 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 777 kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS + 778 (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte; 779 } 780 781 /* Map the virtual page table */ 782 pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT) 783 << PG_SHIFT; 784 pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */ 785 kernel_lev1map[l1pte_index(VPTBASE)] = pte; 786 VPT = (pt_entry_t *)VPTBASE; 787 788 /* 789 * Set up level 2 page table. 790 */ 791 /* Map all of the level 3 pte pages */ 792 for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) { 793 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) + 794 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 795 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 796 lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+ 797 (i*PAGE_SIZE*NPTEPG))] = pte; 798 } 799 800 /* Initialize the pmap_growkernel_mtx. */ 801 mtx_init(&pmap_growkernel_mtx, IPL_NONE); 802 803 /* 804 * Set up level three page table (lev3map) 805 */ 806 /* Nothing to do; it's already zeroed */ 807 808 /* 809 * Initialize `FYI' variables. Note we're relying on 810 * the fact that BSEARCH sorts the vm_physmem[] array 811 * for us. 812 */ 813 avail_start = ptoa(vm_physmem[0].start); 814 avail_end = ptoa(vm_physmem[vm_nphysseg - 1].end); 815 816 pmap_maxkvaddr = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE; 817 818 #if 0 819 printf("avail_start = 0x%lx\n", avail_start); 820 printf("avail_end = 0x%lx\n", avail_end); 821 #endif 822 823 /* 824 * Initialize the pmap pools and list. 825 */ 826 pmap_ncpuids = ncpuids; 827 pool_init(&pmap_pmap_pool, PMAP_SIZEOF(pmap_ncpuids), 0, IPL_NONE, 0, 828 "pmappl", &pool_allocator_single); 829 pool_init(&pmap_l1pt_pool, PAGE_SIZE, 0, IPL_VM, 0, 830 "l1ptpl", &pmap_l1pt_allocator); 831 pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, IPL_VM, 0, 832 "pvpl", &pmap_pv_page_allocator); 833 834 TAILQ_INIT(&pmap_all_pmaps); 835 836 /* 837 * Initialize the ASN logic. 838 */ 839 pmap_max_asn = maxasn; 840 for (i = 0; i < ALPHA_MAXPROCS; i++) { 841 pmap_asn_info[i].pma_asn = 1; 842 pmap_asn_info[i].pma_asngen = 0; 843 } 844 845 /* 846 * Initialize the locks. 847 */ 848 mtx_init(&pmap_all_pmaps_mtx, IPL_NONE); 849 850 /* 851 * Initialize kernel pmap. Note that all kernel mappings 852 * have PG_ASM set, so the ASN doesn't really matter for 853 * the kernel pmap. Also, since the kernel pmap always 854 * references kernel_lev1map, it always has an invalid ASN 855 * generation. 856 */ 857 memset(pmap_kernel(), 0, sizeof(pmap_kernel())); 858 pmap_kernel()->pm_lev1map = kernel_lev1map; 859 pmap_kernel()->pm_count = 1; 860 for (i = 0; i < ALPHA_MAXPROCS; i++) { 861 pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 862 pmap_kernel()->pm_asni[i].pma_asngen = 863 pmap_asn_info[i].pma_asngen; 864 } 865 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list); 866 mtx_init(&pmap_kernel()->pm_mtx, IPL_VM); 867 868 #if defined(MULTIPROCESSOR) 869 /* 870 * Initialize the TLB shootdown queues. 871 */ 872 for (i = 0; i < ALPHA_MAXPROCS; i++) { 873 TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head); 874 TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_free); 875 for (j = 0; j < PMAP_TLB_SHOOTDOWN_MAXJOBS; j++) 876 TAILQ_INSERT_TAIL(&pmap_tlb_shootdown_q[i].pq_free, 877 &pmap_tlb_shootdown_q[i].pq_jobs[j], pj_list); 878 mtx_init(&pmap_tlb_shootdown_q[i].pq_mtx, IPL_IPI); 879 } 880 #endif 881 882 /* 883 * Set up proc0's PCB such that the ptbr points to the right place 884 * and has the kernel pmap's (really unused) ASN. 885 */ 886 proc0.p_addr->u_pcb.pcb_hw.apcb_ptbr = 887 ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT; 888 proc0.p_addr->u_pcb.pcb_hw.apcb_asn = 889 pmap_kernel()->pm_asni[cpu_number()].pma_asn; 890 891 /* 892 * Mark the kernel pmap `active' on this processor. 893 */ 894 atomic_setbits_ulong(&pmap_kernel()->pm_cpus, 895 (1UL << cpu_number())); 896 } 897 898 /* 899 * pmap_steal_memory: [ INTERFACE ] 900 * 901 * Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()). 902 * This function allows for early dynamic memory allocation until the 903 * virtual memory system has been bootstrapped. After that point, either 904 * kmem_alloc or malloc should be used. This function works by stealing 905 * pages from the (to be) managed page pool, then implicitly mapping the 906 * pages (by using their k0seg addresses) and zeroing them. 907 * 908 * It may be used once the physical memory segments have been pre-loaded 909 * into the vm_physmem[] array. Early memory allocation MUST use this 910 * interface! This cannot be used after vm_page_startup(), and will 911 * generate a panic if tried. 912 * 913 * Note that this memory will never be freed, and in essence it is wired 914 * down. 915 * 916 * Note: no locking is necessary in this function. 917 */ 918 vaddr_t 919 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp) 920 { 921 int bank, npgs, x; 922 vaddr_t va; 923 paddr_t pa; 924 925 size = round_page(size); 926 npgs = atop(size); 927 928 #if 0 929 printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs); 930 #endif 931 932 for (bank = 0; bank < vm_nphysseg; bank++) { 933 if (uvm.page_init_done == TRUE) 934 panic("pmap_steal_memory: called _after_ bootstrap"); 935 936 #if 0 937 printf(" bank %d: avail_start 0x%lx, start 0x%lx, " 938 "avail_end 0x%lx\n", bank, vm_physmem[bank].avail_start, 939 vm_physmem[bank].start, vm_physmem[bank].avail_end); 940 #endif 941 942 if (vm_physmem[bank].avail_start != vm_physmem[bank].start || 943 vm_physmem[bank].avail_start >= vm_physmem[bank].avail_end) 944 continue; 945 946 #if 0 947 printf(" avail_end - avail_start = 0x%lx\n", 948 vm_physmem[bank].avail_end - vm_physmem[bank].avail_start); 949 #endif 950 951 if ((vm_physmem[bank].avail_end - vm_physmem[bank].avail_start) 952 < npgs) 953 continue; 954 955 /* 956 * There are enough pages here; steal them! 957 */ 958 pa = ptoa(vm_physmem[bank].avail_start); 959 vm_physmem[bank].avail_start += npgs; 960 vm_physmem[bank].start += npgs; 961 962 /* 963 * Have we used up this segment? 964 */ 965 if (vm_physmem[bank].avail_start == vm_physmem[bank].end) { 966 if (vm_nphysseg == 1) 967 panic("pmap_steal_memory: out of memory!"); 968 969 /* Remove this segment from the list. */ 970 vm_nphysseg--; 971 for (x = bank; x < vm_nphysseg; x++) { 972 /* structure copy */ 973 vm_physmem[x] = vm_physmem[x + 1]; 974 } 975 } 976 977 /* 978 * Fill these in for the caller; we don't modify them, 979 * but the upper layers still want to know. 980 */ 981 if (vstartp) 982 *vstartp = VM_MIN_KERNEL_ADDRESS; 983 if (vendp) 984 *vendp = VM_MAX_KERNEL_ADDRESS; 985 986 va = ALPHA_PHYS_TO_K0SEG(pa); 987 memset((caddr_t)va, 0, size); 988 pmap_pages_stolen += npgs; 989 return (va); 990 } 991 992 /* 993 * If we got here, this was no memory left. 994 */ 995 panic("pmap_steal_memory: no memory to steal"); 996 } 997 998 /* 999 * pmap_init: [ INTERFACE ] 1000 * 1001 * Initialize the pmap module. Called by uvm_init(), to initialize any 1002 * structures that the pmap system needs to map virtual memory. 1003 * 1004 * Note: no locking is necessary in this function. 1005 */ 1006 void 1007 pmap_init(void) 1008 { 1009 1010 #ifdef DEBUG 1011 if (pmapdebug & PDB_FOLLOW) 1012 printf("pmap_init()\n"); 1013 #endif 1014 1015 /* initialize protection array */ 1016 alpha_protection_init(); 1017 1018 /* 1019 * Set a low water mark on the pv_entry pool, so that we are 1020 * more likely to have these around even in extreme memory 1021 * starvation. 1022 */ 1023 pool_setlowat(&pmap_pv_pool, pmap_pv_lowat); 1024 1025 /* 1026 * Now it is safe to enable pv entry recording. 1027 */ 1028 pmap_initialized = TRUE; 1029 1030 #if 0 1031 for (bank = 0; bank < vm_nphysseg; bank++) { 1032 printf("bank %d\n", bank); 1033 printf("\tstart = 0x%x\n", ptoa(vm_physmem[bank].start)); 1034 printf("\tend = 0x%x\n", ptoa(vm_physmem[bank].end)); 1035 printf("\tavail_start = 0x%x\n", 1036 ptoa(vm_physmem[bank].avail_start)); 1037 printf("\tavail_end = 0x%x\n", 1038 ptoa(vm_physmem[bank].avail_end)); 1039 } 1040 #endif 1041 } 1042 1043 /* 1044 * pmap_create: [ INTERFACE ] 1045 * 1046 * Create and return a physical map. 1047 */ 1048 pmap_t 1049 pmap_create(void) 1050 { 1051 pmap_t pmap; 1052 int i; 1053 1054 #ifdef DEBUG 1055 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 1056 printf("pmap_create()\n"); 1057 #endif 1058 1059 pmap = pool_get(&pmap_pmap_pool, PR_WAITOK|PR_ZERO); 1060 1061 pmap->pm_count = 1; 1062 for (i = 0; i < pmap_ncpuids; i++) { 1063 pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 1064 /* XXX Locking? */ 1065 pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen; 1066 } 1067 mtx_init(&pmap->pm_mtx, IPL_VM); 1068 1069 for (;;) { 1070 mtx_enter(&pmap_growkernel_mtx); 1071 i = pmap_lev1map_create(pmap, cpu_number()); 1072 mtx_leave(&pmap_growkernel_mtx); 1073 if (i == 0) 1074 break; 1075 uvm_wait(__func__); 1076 } 1077 1078 mtx_enter(&pmap_all_pmaps_mtx); 1079 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list); 1080 mtx_leave(&pmap_all_pmaps_mtx); 1081 1082 return (pmap); 1083 } 1084 1085 /* 1086 * pmap_destroy: [ INTERFACE ] 1087 * 1088 * Drop the reference count on the specified pmap, releasing 1089 * all resources if the reference count drops to zero. 1090 */ 1091 void 1092 pmap_destroy(pmap_t pmap) 1093 { 1094 int refs; 1095 1096 #ifdef DEBUG 1097 if (pmapdebug & PDB_FOLLOW) 1098 printf("pmap_destroy(%p)\n", pmap); 1099 #endif 1100 1101 refs = atomic_dec_int_nv(&pmap->pm_count); 1102 if (refs > 0) 1103 return; 1104 1105 /* 1106 * Remove it from the global list of all pmaps. 1107 */ 1108 mtx_enter(&pmap_all_pmaps_mtx); 1109 TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list); 1110 mtx_leave(&pmap_all_pmaps_mtx); 1111 1112 mtx_enter(&pmap_growkernel_mtx); 1113 pmap_lev1map_destroy(pmap); 1114 mtx_leave(&pmap_growkernel_mtx); 1115 1116 pool_put(&pmap_pmap_pool, pmap); 1117 } 1118 1119 /* 1120 * pmap_reference: [ INTERFACE ] 1121 * 1122 * Add a reference to the specified pmap. 1123 */ 1124 void 1125 pmap_reference(pmap_t pmap) 1126 { 1127 1128 #ifdef DEBUG 1129 if (pmapdebug & PDB_FOLLOW) 1130 printf("pmap_reference(%p)\n", pmap); 1131 #endif 1132 1133 atomic_inc_int(&pmap->pm_count); 1134 } 1135 1136 /* 1137 * pmap_remove: [ INTERFACE ] 1138 * 1139 * Remove the given range of addresses from the specified map. 1140 * 1141 * It is assumed that the start and end are properly 1142 * rounded to the page size. 1143 */ 1144 void 1145 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva) 1146 { 1147 1148 #ifdef DEBUG 1149 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 1150 printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva); 1151 #endif 1152 1153 pmap_do_remove(pmap, sva, eva, TRUE); 1154 } 1155 1156 /* 1157 * pmap_do_remove: 1158 * 1159 * This actually removes the range of addresses from the 1160 * specified map. It is used by pmap_collect() (does not 1161 * want to remove wired mappings) and pmap_remove() (does 1162 * want to remove wired mappings). 1163 */ 1164 void 1165 pmap_do_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva, boolean_t dowired) 1166 { 1167 pt_entry_t *l1pte, *l2pte, *l3pte; 1168 pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte; 1169 vaddr_t l1eva, l2eva, vptva; 1170 boolean_t needisync = FALSE; 1171 cpuid_t cpu_id = cpu_number(); 1172 1173 #ifdef DEBUG 1174 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 1175 printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva); 1176 #endif 1177 1178 /* 1179 * If this is the kernel pmap, we can use a faster method 1180 * for accessing the PTEs (since the PT pages are always 1181 * resident). 1182 * 1183 * Note that this routine should NEVER be called from an 1184 * interrupt context; pmap_kremove() is used for that. 1185 */ 1186 if (pmap == pmap_kernel()) { 1187 PMAP_LOCK(pmap); 1188 1189 KASSERT(dowired == TRUE); 1190 1191 while (sva < eva) { 1192 l3pte = PMAP_KERNEL_PTE(sva); 1193 if (pmap_pte_v(l3pte)) { 1194 #ifdef DIAGNOSTIC 1195 if (PAGE_IS_MANAGED(pmap_pte_pa(l3pte)) && 1196 pmap_pte_pv(l3pte) == 0) 1197 panic("pmap_remove: managed page " 1198 "without PG_PVLIST for 0x%lx", 1199 sva); 1200 #endif 1201 needisync |= pmap_remove_mapping(pmap, sva, 1202 l3pte, TRUE, cpu_id); 1203 } 1204 sva += PAGE_SIZE; 1205 } 1206 1207 PMAP_UNLOCK(pmap); 1208 1209 if (needisync) 1210 PMAP_SYNC_ISTREAM_KERNEL(); 1211 return; 1212 } 1213 1214 #ifdef DIAGNOSTIC 1215 if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS) 1216 panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel " 1217 "address range", sva, eva); 1218 #endif 1219 1220 PMAP_LOCK(pmap); 1221 1222 /* 1223 * If we're already referencing the kernel_lev1map, there 1224 * is no work for us to do. 1225 */ 1226 if (pmap->pm_lev1map == kernel_lev1map) 1227 goto out; 1228 1229 saved_l1pte = l1pte = pmap_l1pte(pmap, sva); 1230 1231 /* 1232 * Add a reference to the L1 table to it won't get 1233 * removed from under us. 1234 */ 1235 pmap_physpage_addref(saved_l1pte); 1236 1237 for (; sva < eva; sva = l1eva, l1pte++) { 1238 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1239 if (pmap_pte_v(l1pte)) { 1240 saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte); 1241 1242 /* 1243 * Add a reference to the L2 table so it won't 1244 * get removed from under us. 1245 */ 1246 pmap_physpage_addref(saved_l2pte); 1247 1248 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1249 l2eva = 1250 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1251 if (pmap_pte_v(l2pte)) { 1252 saved_l3pte = l3pte = 1253 pmap_l3pte(pmap, sva, l2pte); 1254 1255 /* 1256 * Add a reference to the L3 table so 1257 * it won't get removed from under us. 1258 */ 1259 pmap_physpage_addref(saved_l3pte); 1260 1261 /* 1262 * Remember this sva; if the L3 table 1263 * gets removed, we need to invalidate 1264 * the VPT TLB entry for it. 1265 */ 1266 vptva = sva; 1267 1268 for (; sva < l2eva && sva < eva; 1269 sva += PAGE_SIZE, l3pte++) { 1270 if (pmap_pte_v(l3pte) && 1271 (dowired == TRUE || 1272 pmap_pte_w(l3pte) == 0)) { 1273 needisync |= 1274 pmap_remove_mapping( 1275 pmap, sva, 1276 l3pte, TRUE, 1277 cpu_id); 1278 } 1279 } 1280 1281 /* 1282 * Remove the reference to the L3 1283 * table that we added above. This 1284 * may free the L3 table. 1285 */ 1286 pmap_l3pt_delref(pmap, vptva, 1287 saved_l3pte, cpu_id); 1288 } 1289 } 1290 1291 /* 1292 * Remove the reference to the L2 table that we 1293 * added above. This may free the L2 table. 1294 */ 1295 pmap_l2pt_delref(pmap, l1pte, saved_l2pte); 1296 } 1297 } 1298 1299 /* 1300 * Remove the reference to the L1 table that we added above. 1301 * This may free the L1 table. 1302 */ 1303 pmap_l1pt_delref(pmap, saved_l1pte); 1304 1305 if (needisync) 1306 PMAP_SYNC_ISTREAM_USER(pmap); 1307 1308 out: 1309 PMAP_UNLOCK(pmap); 1310 } 1311 1312 /* 1313 * pmap_page_protect: [ INTERFACE ] 1314 * 1315 * Lower the permission for all mappings to a given page to 1316 * the permissions specified. 1317 */ 1318 void 1319 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 1320 { 1321 pmap_t pmap; 1322 pv_entry_t pv; 1323 boolean_t needkisync = FALSE; 1324 cpuid_t cpu_id = cpu_number(); 1325 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1326 1327 #ifdef DEBUG 1328 if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) || 1329 (prot == PROT_NONE && (pmapdebug & PDB_REMOVE))) 1330 printf("pmap_page_protect(%p, %x)\n", pg, prot); 1331 #endif 1332 1333 switch (prot) { 1334 case PROT_READ | PROT_WRITE | PROT_EXEC: 1335 case PROT_READ | PROT_WRITE: 1336 return; 1337 1338 /* copy_on_write */ 1339 case PROT_READ | PROT_EXEC: 1340 case PROT_READ: 1341 mtx_enter(&pg->mdpage.pvh_mtx); 1342 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) { 1343 if (*pv->pv_pte & (PG_KWE | PG_UWE)) { 1344 *pv->pv_pte &= ~(PG_KWE | PG_UWE); 1345 PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va, 1346 pmap_pte_asm(pv->pv_pte), 1347 PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id); 1348 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va, 1349 pmap_pte_asm(pv->pv_pte)); 1350 } 1351 } 1352 mtx_leave(&pg->mdpage.pvh_mtx); 1353 PMAP_TLB_SHOOTNOW(); 1354 return; 1355 1356 /* remove_all */ 1357 default: 1358 break; 1359 } 1360 1361 mtx_enter(&pg->mdpage.pvh_mtx); 1362 while ((pv = pg->mdpage.pvh_list) != NULL) { 1363 pmap_reference(pv->pv_pmap); 1364 pmap = pv->pv_pmap; 1365 mtx_leave(&pg->mdpage.pvh_mtx); 1366 1367 PMAP_LOCK(pmap); 1368 1369 /* 1370 * We dropped the pvlist lock before grabbing the pmap 1371 * lock to avoid lock ordering problems. This means 1372 * we have to check the pvlist again since somebody 1373 * else might have modified it. All we care about is 1374 * that the pvlist entry matches the pmap we just 1375 * locked. If it doesn't, unlock the pmap and try 1376 * again. 1377 */ 1378 mtx_enter(&pg->mdpage.pvh_mtx); 1379 if ((pv = pg->mdpage.pvh_list) == NULL || 1380 pv->pv_pmap != pmap) { 1381 mtx_leave(&pg->mdpage.pvh_mtx); 1382 PMAP_UNLOCK(pmap); 1383 pmap_destroy(pmap); 1384 mtx_enter(&pg->mdpage.pvh_mtx); 1385 continue; 1386 } 1387 1388 #ifdef DEBUG 1389 if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 || 1390 pmap_pte_pa(pv->pv_pte) != VM_PAGE_TO_PHYS(pg)) 1391 panic("pmap_page_protect: bad mapping"); 1392 #endif 1393 if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte, 1394 FALSE, cpu_id) == TRUE) { 1395 if (pmap == pmap_kernel()) 1396 needkisync |= TRUE; 1397 else 1398 PMAP_SYNC_ISTREAM_USER(pmap); 1399 } 1400 mtx_leave(&pg->mdpage.pvh_mtx); 1401 PMAP_UNLOCK(pmap); 1402 pmap_destroy(pmap); 1403 mtx_enter(&pg->mdpage.pvh_mtx); 1404 } 1405 mtx_leave(&pg->mdpage.pvh_mtx); 1406 1407 if (needkisync) 1408 PMAP_SYNC_ISTREAM_KERNEL(); 1409 } 1410 1411 /* 1412 * pmap_protect: [ INTERFACE ] 1413 * 1414 * Set the physical protection on the specified range of this map 1415 * as requested. 1416 */ 1417 void 1418 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1419 { 1420 pt_entry_t *l1pte, *l2pte, *l3pte, bits; 1421 boolean_t isactive; 1422 boolean_t hadasm; 1423 vaddr_t l1eva, l2eva; 1424 cpuid_t cpu_id = cpu_number(); 1425 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1426 1427 #ifdef DEBUG 1428 if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) 1429 printf("pmap_protect(%p, %lx, %lx, %x)\n", 1430 pmap, sva, eva, prot); 1431 #endif 1432 1433 if ((prot & PROT_READ) == PROT_NONE) { 1434 pmap_remove(pmap, sva, eva); 1435 return; 1436 } 1437 1438 PMAP_LOCK(pmap); 1439 1440 bits = pte_prot(pmap, prot); 1441 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1442 1443 l1pte = pmap_l1pte(pmap, sva); 1444 for (; sva < eva; sva = l1eva, l1pte++) { 1445 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1446 if (!pmap_pte_v(l1pte)) 1447 continue; 1448 1449 l2pte = pmap_l2pte(pmap, sva, l1pte); 1450 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1451 l2eva = alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1452 if (!pmap_pte_v(l2pte)) 1453 continue; 1454 1455 l3pte = pmap_l3pte(pmap, sva, l2pte); 1456 for (; sva < l2eva && sva < eva; 1457 sva += PAGE_SIZE, l3pte++) { 1458 if (!pmap_pte_v(l3pte)) 1459 continue; 1460 1461 if (pmap_pte_prot_chg(l3pte, bits)) { 1462 hadasm = (pmap_pte_asm(l3pte) != 0); 1463 pmap_pte_set_prot(l3pte, bits); 1464 PMAP_INVALIDATE_TLB(pmap, sva, hadasm, 1465 isactive, cpu_id); 1466 PMAP_TLB_SHOOTDOWN(pmap, sva, 1467 hadasm ? PG_ASM : 0); 1468 } 1469 } 1470 } 1471 } 1472 1473 PMAP_TLB_SHOOTNOW(); 1474 1475 if (prot & PROT_EXEC) 1476 PMAP_SYNC_ISTREAM(pmap); 1477 1478 PMAP_UNLOCK(pmap); 1479 } 1480 1481 /* 1482 * pmap_enter: [ INTERFACE ] 1483 * 1484 * Insert the given physical page (p) at 1485 * the specified virtual address (v) in the 1486 * target physical map with the protection requested. 1487 * 1488 * If specified, the page will be wired down, meaning 1489 * that the related pte can not be reclaimed. 1490 * 1491 * Note: This is the only routine which MAY NOT lazy-evaluate 1492 * or lose information. That is, this routine must actually 1493 * insert this page into the given map NOW. 1494 */ 1495 int 1496 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) 1497 { 1498 struct vm_page *pg; 1499 pt_entry_t *pte, npte, opte; 1500 paddr_t opa; 1501 boolean_t tflush = TRUE; 1502 boolean_t hadasm = FALSE; /* XXX gcc -Wuninitialized */ 1503 boolean_t needisync = FALSE; 1504 boolean_t setisync = FALSE; 1505 boolean_t isactive; 1506 boolean_t wired; 1507 cpuid_t cpu_id = cpu_number(); 1508 int error = 0; 1509 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1510 1511 #ifdef DEBUG 1512 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1513 printf("pmap_enter(%p, %lx, %lx, %x, %x)\n", 1514 pmap, va, pa, prot, flags); 1515 #endif 1516 pg = PHYS_TO_VM_PAGE(pa); 1517 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1518 wired = (flags & PMAP_WIRED) != 0; 1519 1520 /* 1521 * Determine what we need to do about the I-stream. If 1522 * PROT_EXEC is set, we mark a user pmap as needing 1523 * an I-sync on the way back out to userspace. We always 1524 * need an immediate I-sync for the kernel pmap. 1525 */ 1526 if (prot & PROT_EXEC) { 1527 if (pmap == pmap_kernel()) 1528 needisync = TRUE; 1529 else { 1530 setisync = TRUE; 1531 needisync = (pmap->pm_cpus != 0); 1532 } 1533 } 1534 1535 PMAP_LOCK(pmap); 1536 1537 if (pmap == pmap_kernel()) { 1538 #ifdef DIAGNOSTIC 1539 /* 1540 * Sanity check the virtual address. 1541 */ 1542 if (va < VM_MIN_KERNEL_ADDRESS) 1543 panic("pmap_enter: kernel pmap, invalid va 0x%lx", va); 1544 #endif 1545 pte = PMAP_KERNEL_PTE(va); 1546 } else { 1547 pt_entry_t *l1pte, *l2pte; 1548 1549 #ifdef DIAGNOSTIC 1550 /* 1551 * Sanity check the virtual address. 1552 */ 1553 if (va >= VM_MAXUSER_ADDRESS) 1554 panic("pmap_enter: user pmap, invalid va 0x%lx", va); 1555 #endif 1556 1557 KASSERT(pmap->pm_lev1map != kernel_lev1map); 1558 1559 /* 1560 * Check to see if the level 1 PTE is valid, and 1561 * allocate a new level 2 page table page if it's not. 1562 * A reference will be added to the level 2 table when 1563 * the level 3 table is created. 1564 */ 1565 l1pte = pmap_l1pte(pmap, va); 1566 if (pmap_pte_v(l1pte) == 0) { 1567 pmap_physpage_addref(l1pte); 1568 error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT); 1569 if (error) { 1570 pmap_l1pt_delref(pmap, l1pte); 1571 if (flags & PMAP_CANFAIL) 1572 goto out; 1573 panic("pmap_enter: unable to create L2 PT " 1574 "page"); 1575 } 1576 #ifdef DEBUG 1577 if (pmapdebug & PDB_PTPAGE) 1578 printf("pmap_enter: new level 2 table at " 1579 "0x%lx\n", pmap_pte_pa(l1pte)); 1580 #endif 1581 } 1582 1583 /* 1584 * Check to see if the level 2 PTE is valid, and 1585 * allocate a new level 3 page table page if it's not. 1586 * A reference will be added to the level 3 table when 1587 * the mapping is validated. 1588 */ 1589 l2pte = pmap_l2pte(pmap, va, l1pte); 1590 if (pmap_pte_v(l2pte) == 0) { 1591 pmap_physpage_addref(l2pte); 1592 error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT); 1593 if (error) { 1594 pmap_l2pt_delref(pmap, l1pte, l2pte); 1595 if (flags & PMAP_CANFAIL) 1596 goto out; 1597 panic("pmap_enter: unable to create L3 PT " 1598 "page"); 1599 } 1600 #ifdef DEBUG 1601 if (pmapdebug & PDB_PTPAGE) 1602 printf("pmap_enter: new level 3 table at " 1603 "0x%lx\n", pmap_pte_pa(l2pte)); 1604 #endif 1605 } 1606 1607 /* 1608 * Get the PTE that will map the page. 1609 */ 1610 pte = pmap_l3pte(pmap, va, l2pte); 1611 } 1612 1613 /* Remember all of the old PTE; used for TBI check later. */ 1614 opte = *pte; 1615 1616 /* 1617 * Check to see if the old mapping is valid. If not, validate the 1618 * new one immediately. 1619 */ 1620 if (pmap_pte_v(pte) == 0) { 1621 /* 1622 * No need to invalidate the TLB in this case; an invalid 1623 * mapping won't be in the TLB, and a previously valid 1624 * mapping would have been flushed when it was invalidated. 1625 */ 1626 tflush = FALSE; 1627 1628 /* 1629 * No need to synchronize the I-stream, either, for basically 1630 * the same reason. 1631 */ 1632 setisync = needisync = FALSE; 1633 1634 if (pmap != pmap_kernel()) { 1635 /* 1636 * New mappings gain a reference on the level 3 1637 * table. 1638 */ 1639 pmap_physpage_addref(pte); 1640 } 1641 goto validate_enterpv; 1642 } 1643 1644 opa = pmap_pte_pa(pte); 1645 hadasm = (pmap_pte_asm(pte) != 0); 1646 1647 if (opa == pa) { 1648 /* 1649 * Mapping has not changed; must be a protection or 1650 * wiring change. 1651 */ 1652 if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) { 1653 #ifdef DEBUG 1654 if (pmapdebug & PDB_ENTER) 1655 printf("pmap_enter: wiring change -> %d\n", 1656 wired); 1657 #endif 1658 /* 1659 * Adjust the wiring count. 1660 */ 1661 if (wired) 1662 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1663 else 1664 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1665 } 1666 1667 /* 1668 * Set the PTE. 1669 */ 1670 goto validate; 1671 } 1672 1673 /* 1674 * The mapping has changed. We need to invalidate the 1675 * old mapping before creating the new one. 1676 */ 1677 #ifdef DEBUG 1678 if (pmapdebug & PDB_ENTER) 1679 printf("pmap_enter: removing old mapping 0x%lx\n", va); 1680 #endif 1681 if (pmap != pmap_kernel()) { 1682 /* 1683 * Gain an extra reference on the level 3 table. 1684 * pmap_remove_mapping() will delete a reference, 1685 * and we don't want the table to be erroneously 1686 * freed. 1687 */ 1688 pmap_physpage_addref(pte); 1689 } 1690 needisync |= pmap_remove_mapping(pmap, va, pte, TRUE, cpu_id); 1691 1692 validate_enterpv: 1693 /* 1694 * Enter the mapping into the pv_table if appropriate. 1695 */ 1696 if (pg != NULL) { 1697 error = pmap_pv_enter(pmap, pg, va, pte, TRUE); 1698 if (error) { 1699 pmap_l3pt_delref(pmap, va, pte, cpu_id); 1700 if (flags & PMAP_CANFAIL) 1701 goto out; 1702 panic("pmap_enter: unable to enter mapping in PV " 1703 "table"); 1704 } 1705 } 1706 1707 /* 1708 * Increment counters. 1709 */ 1710 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1711 if (wired) 1712 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1713 1714 validate: 1715 /* 1716 * Build the new PTE. 1717 */ 1718 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V; 1719 if (pg != NULL) { 1720 int attrs; 1721 1722 #ifdef DIAGNOSTIC 1723 if ((flags & PROT_MASK) & ~prot) 1724 panic("pmap_enter: access type exceeds prot"); 1725 #endif 1726 if (flags & PROT_WRITE) 1727 atomic_setbits_int(&pg->pg_flags, 1728 PG_PMAP_REF | PG_PMAP_MOD); 1729 else if (flags & PROT_MASK) 1730 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF); 1731 1732 /* 1733 * Set up referenced/modified emulation for new mapping. 1734 */ 1735 attrs = pg->pg_flags; 1736 if ((attrs & PG_PMAP_REF) == 0) 1737 npte |= PG_FOR | PG_FOW | PG_FOE; 1738 else if ((attrs & PG_PMAP_MOD) == 0) 1739 npte |= PG_FOW; 1740 1741 /* 1742 * Mapping was entered on PV list. 1743 */ 1744 npte |= PG_PVLIST; 1745 } 1746 if (wired) 1747 npte |= PG_WIRED; 1748 #ifdef DEBUG 1749 if (pmapdebug & PDB_ENTER) 1750 printf("pmap_enter: new pte = 0x%lx\n", npte); 1751 #endif 1752 1753 /* 1754 * If the PALcode portion of the new PTE is the same as the 1755 * old PTE, no TBI is necessary. 1756 */ 1757 if (PG_PALCODE(opte) == PG_PALCODE(npte)) 1758 tflush = FALSE; 1759 1760 /* 1761 * Set the new PTE. 1762 */ 1763 PMAP_SET_PTE(pte, npte); 1764 1765 /* 1766 * Invalidate the TLB entry for this VA and any appropriate 1767 * caches. 1768 */ 1769 if (tflush) { 1770 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 1771 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 1772 PMAP_TLB_SHOOTNOW(); 1773 } 1774 if (setisync) 1775 PMAP_SET_NEEDISYNC(pmap); 1776 if (needisync) 1777 PMAP_SYNC_ISTREAM(pmap); 1778 1779 out: 1780 PMAP_UNLOCK(pmap); 1781 1782 return error; 1783 } 1784 1785 /* 1786 * pmap_kenter_pa: [ INTERFACE ] 1787 * 1788 * Enter a va -> pa mapping into the kernel pmap without any 1789 * physical->virtual tracking. 1790 * 1791 * Note: no locking is necessary in this function. 1792 */ 1793 void 1794 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) 1795 { 1796 pt_entry_t *pte, npte; 1797 cpuid_t cpu_id = cpu_number(); 1798 boolean_t needisync = FALSE; 1799 pmap_t pmap = pmap_kernel(); 1800 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1801 1802 #ifdef DEBUG 1803 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1804 printf("pmap_kenter_pa(%lx, %lx, %x)\n", 1805 va, pa, prot); 1806 #endif 1807 1808 #ifdef DIAGNOSTIC 1809 /* 1810 * Sanity check the virtual address. 1811 */ 1812 if (va < VM_MIN_KERNEL_ADDRESS) 1813 panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va); 1814 #endif 1815 1816 pte = PMAP_KERNEL_PTE(va); 1817 1818 if (pmap_pte_v(pte) == 0) 1819 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1820 if (pmap_pte_w(pte) == 0) 1821 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1822 1823 if ((prot & PROT_EXEC) != 0 || pmap_pte_exec(pte)) 1824 needisync = TRUE; 1825 1826 /* 1827 * Build the new PTE. 1828 */ 1829 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) | 1830 PG_V | PG_WIRED; 1831 1832 /* 1833 * Set the new PTE. 1834 */ 1835 PMAP_SET_PTE(pte, npte); 1836 #if defined(MULTIPROCESSOR) 1837 alpha_mb(); /* XXX alpha_wmb()? */ 1838 #endif 1839 1840 /* 1841 * Invalidate the TLB entry for this VA and any appropriate 1842 * caches. 1843 */ 1844 PMAP_INVALIDATE_TLB(pmap, va, TRUE, TRUE, cpu_id); 1845 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1846 PMAP_TLB_SHOOTNOW(); 1847 1848 if (needisync) 1849 PMAP_SYNC_ISTREAM_KERNEL(); 1850 } 1851 1852 /* 1853 * pmap_kremove: [ INTERFACE ] 1854 * 1855 * Remove a mapping entered with pmap_kenter_pa() starting at va, 1856 * for size bytes (assumed to be page rounded). 1857 */ 1858 void 1859 pmap_kremove(vaddr_t va, vsize_t size) 1860 { 1861 pt_entry_t *pte; 1862 boolean_t needisync = FALSE; 1863 cpuid_t cpu_id = cpu_number(); 1864 pmap_t pmap = pmap_kernel(); 1865 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1866 1867 #ifdef DEBUG 1868 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1869 printf("pmap_kremove(%lx, %lx)\n", 1870 va, size); 1871 #endif 1872 1873 #ifdef DIAGNOSTIC 1874 if (va < VM_MIN_KERNEL_ADDRESS) 1875 panic("pmap_kremove: user address"); 1876 #endif 1877 1878 for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) { 1879 pte = PMAP_KERNEL_PTE(va); 1880 if (pmap_pte_v(pte)) { 1881 #ifdef DIAGNOSTIC 1882 if (pmap_pte_pv(pte)) 1883 panic("pmap_kremove: PG_PVLIST mapping for " 1884 "0x%lx", va); 1885 #endif 1886 if (pmap_pte_exec(pte)) 1887 needisync = TRUE; 1888 1889 /* Zap the mapping. */ 1890 PMAP_SET_PTE(pte, PG_NV); 1891 #if defined(MULTIPROCESSOR) 1892 alpha_mb(); /* XXX alpha_wmb()? */ 1893 #endif 1894 PMAP_INVALIDATE_TLB(pmap, va, TRUE, TRUE, cpu_id); 1895 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1896 1897 /* Update stats. */ 1898 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 1899 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1900 } 1901 } 1902 1903 PMAP_TLB_SHOOTNOW(); 1904 1905 if (needisync) 1906 PMAP_SYNC_ISTREAM_KERNEL(); 1907 } 1908 1909 /* 1910 * pmap_unwire: [ INTERFACE ] 1911 * 1912 * Clear the wired attribute for a map/virtual-address pair. 1913 * 1914 * The mapping must already exist in the pmap. 1915 */ 1916 void 1917 pmap_unwire(pmap_t pmap, vaddr_t va) 1918 { 1919 pt_entry_t *pte; 1920 1921 #ifdef DEBUG 1922 if (pmapdebug & PDB_FOLLOW) 1923 printf("pmap_unwire(%p, %lx)\n", pmap, va); 1924 #endif 1925 1926 PMAP_LOCK(pmap); 1927 1928 pte = pmap_l3pte(pmap, va, NULL); 1929 #ifdef DIAGNOSTIC 1930 if (pte == NULL || pmap_pte_v(pte) == 0) 1931 panic("pmap_unwire"); 1932 #endif 1933 1934 /* 1935 * If wiring actually changed (always?) clear the wire bit and 1936 * update the wire count. Note that wiring is not a hardware 1937 * characteristic so there is no need to invalidate the TLB. 1938 */ 1939 if (pmap_pte_w_chg(pte, 0)) { 1940 pmap_pte_set_w(pte, FALSE); 1941 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1942 } 1943 #ifdef DIAGNOSTIC 1944 else { 1945 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 1946 "didn't change!\n", pmap, va); 1947 } 1948 #endif 1949 1950 PMAP_UNLOCK(pmap); 1951 } 1952 1953 /* 1954 * pmap_extract: [ INTERFACE ] 1955 * 1956 * Extract the physical address associated with the given 1957 * pmap/virtual address pair. 1958 */ 1959 boolean_t 1960 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap) 1961 { 1962 pt_entry_t *l1pte, *l2pte, *l3pte; 1963 boolean_t rv = FALSE; 1964 paddr_t pa; 1965 1966 #ifdef DEBUG 1967 if (pmapdebug & PDB_FOLLOW) 1968 printf("pmap_extract(%p, %lx) -> ", pmap, va); 1969 #endif 1970 1971 if (pmap == pmap_kernel()) { 1972 if (va < ALPHA_K0SEG_BASE) { 1973 /* nothing */ 1974 } else if (va <= ALPHA_K0SEG_END) { 1975 pa = ALPHA_K0SEG_TO_PHYS(va); 1976 *pap = pa; 1977 rv = TRUE; 1978 } else { 1979 l3pte = PMAP_KERNEL_PTE(va); 1980 if (pmap_pte_v(l3pte)) { 1981 pa = pmap_pte_pa(l3pte) | (va & PGOFSET); 1982 *pap = pa; 1983 rv = TRUE; 1984 } 1985 } 1986 goto out_nolock; 1987 } 1988 1989 PMAP_LOCK(pmap); 1990 1991 l1pte = pmap_l1pte(pmap, va); 1992 if (pmap_pte_v(l1pte) == 0) 1993 goto out; 1994 1995 l2pte = pmap_l2pte(pmap, va, l1pte); 1996 if (pmap_pte_v(l2pte) == 0) 1997 goto out; 1998 1999 l3pte = pmap_l3pte(pmap, va, l2pte); 2000 if (pmap_pte_v(l3pte) == 0) 2001 goto out; 2002 2003 pa = pmap_pte_pa(l3pte) | (va & PGOFSET); 2004 *pap = pa; 2005 rv = TRUE; 2006 out: 2007 PMAP_UNLOCK(pmap); 2008 out_nolock: 2009 #ifdef DEBUG 2010 if (pmapdebug & PDB_FOLLOW) { 2011 if (rv) 2012 printf("0x%lx\n", pa); 2013 else 2014 printf("failed\n"); 2015 } 2016 #endif 2017 return (rv); 2018 } 2019 2020 /* 2021 * pmap_collect: [ INTERFACE ] 2022 * 2023 * Garbage collects the physical map system for pages which are no 2024 * longer used. Success need not be guaranteed -- that is, there 2025 * may well be pages which are not referenced, but others may be 2026 * collected. 2027 * 2028 * Called by the pageout daemon when pages are scarce. 2029 */ 2030 void 2031 pmap_collect(pmap_t pmap) 2032 { 2033 2034 #ifdef DEBUG 2035 if (pmapdebug & PDB_FOLLOW) 2036 printf("pmap_collect(%p)\n", pmap); 2037 #endif 2038 2039 /* 2040 * If called for the kernel pmap, just return. We 2041 * handle this case in the event that we ever want 2042 * to have swappable kernel threads. 2043 */ 2044 if (pmap == pmap_kernel()) 2045 return; 2046 2047 /* 2048 * This process is about to be swapped out; free all of 2049 * the PT pages by removing the physical mappings for its 2050 * entire address space. Note: pmap_do_remove() performs 2051 * all necessary locking. 2052 */ 2053 pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS, FALSE); 2054 } 2055 2056 /* 2057 * pmap_activate: [ INTERFACE ] 2058 * 2059 * Activate the pmap used by the specified process. This includes 2060 * reloading the MMU context if the current process, and marking 2061 * the pmap in use by the processor. 2062 * 2063 * Note: We may use only spin locks here, since we are called 2064 * by a critical section in cpu_switch()! 2065 */ 2066 void 2067 pmap_activate(struct proc *p) 2068 { 2069 struct pmap *pmap = p->p_vmspace->vm_map.pmap; 2070 cpuid_t cpu_id = cpu_number(); 2071 2072 #ifdef DEBUG 2073 if (pmapdebug & PDB_FOLLOW) 2074 printf("pmap_activate(%p)\n", p); 2075 #endif 2076 2077 /* Mark the pmap in use by this processor. */ 2078 atomic_setbits_ulong(&pmap->pm_cpus, (1UL << cpu_id)); 2079 2080 /* Allocate an ASN. */ 2081 pmap_asn_alloc(pmap, cpu_id); 2082 2083 PMAP_ACTIVATE(pmap, p, cpu_id); 2084 } 2085 2086 /* 2087 * pmap_deactivate: [ INTERFACE ] 2088 * 2089 * Mark that the pmap used by the specified process is no longer 2090 * in use by the processor. 2091 * 2092 * The comment above pmap_activate() wrt. locking applies here, 2093 * as well. Note that we use only a single `atomic' operation, 2094 * so no locking is necessary. 2095 */ 2096 void 2097 pmap_deactivate(struct proc *p) 2098 { 2099 struct pmap *pmap = p->p_vmspace->vm_map.pmap; 2100 2101 #ifdef DEBUG 2102 if (pmapdebug & PDB_FOLLOW) 2103 printf("pmap_deactivate(%p)\n", p); 2104 #endif 2105 2106 /* 2107 * Mark the pmap no longer in use by this processor. 2108 */ 2109 atomic_clearbits_ulong(&pmap->pm_cpus, (1UL << cpu_number())); 2110 } 2111 2112 /* 2113 * pmap_zero_page: [ INTERFACE ] 2114 * 2115 * Zero the specified (machine independent) page by mapping the page 2116 * into virtual memory and clear its contents, one machine dependent 2117 * page at a time. 2118 * 2119 * Note: no locking is necessary in this function. 2120 */ 2121 void 2122 pmap_zero_page(struct vm_page *pg) 2123 { 2124 paddr_t phys = VM_PAGE_TO_PHYS(pg); 2125 u_long *p0, *p1, *pend; 2126 2127 #ifdef DEBUG 2128 if (pmapdebug & PDB_FOLLOW) 2129 printf("pmap_zero_page(%lx)\n", phys); 2130 #endif 2131 2132 p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys); 2133 p1 = NULL; 2134 pend = (u_long *)((u_long)p0 + PAGE_SIZE); 2135 2136 /* 2137 * Unroll the loop a bit, doing 16 quadwords per iteration. 2138 * Do only 8 back-to-back stores, and alternate registers. 2139 */ 2140 do { 2141 __asm volatile( 2142 "# BEGIN loop body\n" 2143 " addq %2, (8 * 8), %1 \n" 2144 " stq $31, (0 * 8)(%0) \n" 2145 " stq $31, (1 * 8)(%0) \n" 2146 " stq $31, (2 * 8)(%0) \n" 2147 " stq $31, (3 * 8)(%0) \n" 2148 " stq $31, (4 * 8)(%0) \n" 2149 " stq $31, (5 * 8)(%0) \n" 2150 " stq $31, (6 * 8)(%0) \n" 2151 " stq $31, (7 * 8)(%0) \n" 2152 " \n" 2153 " addq %3, (8 * 8), %0 \n" 2154 " stq $31, (0 * 8)(%1) \n" 2155 " stq $31, (1 * 8)(%1) \n" 2156 " stq $31, (2 * 8)(%1) \n" 2157 " stq $31, (3 * 8)(%1) \n" 2158 " stq $31, (4 * 8)(%1) \n" 2159 " stq $31, (5 * 8)(%1) \n" 2160 " stq $31, (6 * 8)(%1) \n" 2161 " stq $31, (7 * 8)(%1) \n" 2162 " # END loop body" 2163 : "=r" (p0), "=r" (p1) 2164 : "0" (p0), "1" (p1) 2165 : "memory"); 2166 } while (p0 < pend); 2167 } 2168 2169 /* 2170 * pmap_copy_page: [ INTERFACE ] 2171 * 2172 * Copy the specified (machine independent) page by mapping the page 2173 * into virtual memory and using memcpy to copy the page, one machine 2174 * dependent page at a time. 2175 * 2176 * Note: no locking is necessary in this function. 2177 */ 2178 void 2179 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) 2180 { 2181 paddr_t src = VM_PAGE_TO_PHYS(srcpg); 2182 paddr_t dst = VM_PAGE_TO_PHYS(dstpg); 2183 caddr_t s, d; 2184 2185 #ifdef DEBUG 2186 if (pmapdebug & PDB_FOLLOW) 2187 printf("pmap_copy_page(%lx, %lx)\n", src, dst); 2188 #endif 2189 s = (caddr_t)ALPHA_PHYS_TO_K0SEG(src); 2190 d = (caddr_t)ALPHA_PHYS_TO_K0SEG(dst); 2191 memcpy(d, s, PAGE_SIZE); 2192 } 2193 2194 /* 2195 * pmap_clear_modify: [ INTERFACE ] 2196 * 2197 * Clear the modify bits on the specified physical page. 2198 */ 2199 boolean_t 2200 pmap_clear_modify(struct vm_page *pg) 2201 { 2202 boolean_t rv = FALSE; 2203 cpuid_t cpu_id = cpu_number(); 2204 2205 #ifdef DEBUG 2206 if (pmapdebug & PDB_FOLLOW) 2207 printf("pmap_clear_modify(%p)\n", pg); 2208 #endif 2209 2210 mtx_enter(&pg->mdpage.pvh_mtx); 2211 if (pg->pg_flags & PG_PMAP_MOD) { 2212 rv = TRUE; 2213 pmap_changebit(pg, PG_FOW, ~0, cpu_id); 2214 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_MOD); 2215 } 2216 mtx_leave(&pg->mdpage.pvh_mtx); 2217 2218 return (rv); 2219 } 2220 2221 /* 2222 * pmap_clear_reference: [ INTERFACE ] 2223 * 2224 * Clear the reference bit on the specified physical page. 2225 */ 2226 boolean_t 2227 pmap_clear_reference(struct vm_page *pg) 2228 { 2229 boolean_t rv = FALSE; 2230 cpuid_t cpu_id = cpu_number(); 2231 2232 #ifdef DEBUG 2233 if (pmapdebug & PDB_FOLLOW) 2234 printf("pmap_clear_reference(%p)\n", pg); 2235 #endif 2236 2237 mtx_enter(&pg->mdpage.pvh_mtx); 2238 if (pg->pg_flags & PG_PMAP_REF) { 2239 rv = TRUE; 2240 pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id); 2241 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_REF); 2242 } 2243 mtx_leave(&pg->mdpage.pvh_mtx); 2244 2245 return (rv); 2246 } 2247 2248 /* 2249 * pmap_is_referenced: [ INTERFACE ] 2250 * 2251 * Return whether or not the specified physical page is referenced 2252 * by any physical maps. 2253 */ 2254 boolean_t 2255 pmap_is_referenced(struct vm_page *pg) 2256 { 2257 boolean_t rv; 2258 2259 rv = ((pg->pg_flags & PG_PMAP_REF) != 0); 2260 #ifdef DEBUG 2261 if (pmapdebug & PDB_FOLLOW) { 2262 printf("pmap_is_referenced(%p) -> %c\n", pg, "FT"[rv]); 2263 } 2264 #endif 2265 return (rv); 2266 } 2267 2268 /* 2269 * pmap_is_modified: [ INTERFACE ] 2270 * 2271 * Return whether or not the specified physical page is modified 2272 * by any physical maps. 2273 */ 2274 boolean_t 2275 pmap_is_modified(struct vm_page *pg) 2276 { 2277 boolean_t rv; 2278 2279 rv = ((pg->pg_flags & PG_PMAP_MOD) != 0); 2280 #ifdef DEBUG 2281 if (pmapdebug & PDB_FOLLOW) { 2282 printf("pmap_is_modified(%p) -> %c\n", pg, "FT"[rv]); 2283 } 2284 #endif 2285 return (rv); 2286 } 2287 2288 /* 2289 * Miscellaneous support routines follow 2290 */ 2291 2292 /* 2293 * alpha_protection_init: 2294 * 2295 * Initialize Alpha protection code array. 2296 * 2297 * Note: no locking is necessary in this function. 2298 */ 2299 void 2300 alpha_protection_init(void) 2301 { 2302 int prot, *kp, *up; 2303 2304 kp = protection_codes[0]; 2305 up = protection_codes[1]; 2306 2307 for (prot = 0; prot < 8; prot++) { 2308 kp[prot] = PG_ASM; 2309 up[prot] = 0; 2310 2311 if (prot & PROT_READ) { 2312 kp[prot] |= PG_KRE; 2313 up[prot] |= PG_KRE | PG_URE; 2314 } 2315 if (prot & PROT_WRITE) { 2316 kp[prot] |= PG_KWE; 2317 up[prot] |= PG_KWE | PG_UWE; 2318 } 2319 if (prot & PROT_EXEC) { 2320 kp[prot] |= PG_EXEC | PG_KRE; 2321 up[prot] |= PG_EXEC | PG_KRE | PG_URE; 2322 } else { 2323 kp[prot] |= PG_FOE; 2324 up[prot] |= PG_FOE; 2325 } 2326 } 2327 } 2328 2329 /* 2330 * pmap_remove_mapping: 2331 * 2332 * Invalidate a single page denoted by pmap/va. 2333 * 2334 * If (pte != NULL), it is the already computed PTE for the page. 2335 * 2336 * Note: locking in this function is complicated by the fact 2337 * that we can be called when the PV list is already locked. 2338 * (pmap_page_protect()). In this case, the caller must be 2339 * careful to get the next PV entry while we remove this entry 2340 * from beneath it. We assume that the pmap itself is already 2341 * locked; dolock applies only to the PV list. 2342 * 2343 * Returns TRUE or FALSE, indicating if an I-stream sync needs 2344 * to be initiated (for this CPU or for other CPUs). 2345 */ 2346 boolean_t 2347 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte, 2348 boolean_t dolock, cpuid_t cpu_id) 2349 { 2350 paddr_t pa; 2351 struct vm_page *pg; 2352 boolean_t onpv; 2353 boolean_t hadasm; 2354 boolean_t isactive; 2355 boolean_t needisync = FALSE; 2356 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2357 2358 #ifdef DEBUG 2359 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 2360 printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n", 2361 pmap, va, pte, dolock, cpu_id); 2362 #endif 2363 2364 /* 2365 * PTE not provided, compute it from pmap and va. 2366 */ 2367 if (pte == PT_ENTRY_NULL) { 2368 pte = pmap_l3pte(pmap, va, NULL); 2369 if (pmap_pte_v(pte) == 0) 2370 return (FALSE); 2371 } 2372 2373 pa = pmap_pte_pa(pte); 2374 onpv = (pmap_pte_pv(pte) != 0); 2375 if (onpv) { 2376 /* 2377 * Remove it from the PV table such that nobody will 2378 * attempt to modify the PTE behind our back. 2379 */ 2380 pg = PHYS_TO_VM_PAGE(pa); 2381 KASSERT(pg != NULL); 2382 pmap_pv_remove(pmap, pg, va, dolock); 2383 } 2384 2385 hadasm = (pmap_pte_asm(pte) != 0); 2386 isactive = PMAP_ISACTIVE(pmap, cpu_id); 2387 2388 /* 2389 * Determine what we need to do about the I-stream. If 2390 * PG_EXEC was set, we mark a user pmap as needing an 2391 * I-sync on the way out to userspace. We always need 2392 * an immediate I-sync for the kernel pmap. 2393 */ 2394 if (pmap_pte_exec(pte)) { 2395 if (pmap == pmap_kernel()) 2396 needisync = TRUE; 2397 else { 2398 PMAP_SET_NEEDISYNC(pmap); 2399 needisync = (pmap->pm_cpus != 0); 2400 } 2401 } 2402 2403 /* 2404 * Update statistics 2405 */ 2406 if (pmap_pte_w(pte)) 2407 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2408 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 2409 2410 /* 2411 * Invalidate the PTE after saving the reference modify info. 2412 */ 2413 #ifdef DEBUG 2414 if (pmapdebug & PDB_REMOVE) 2415 printf("remove: invalidating pte at %p\n", pte); 2416 #endif 2417 PMAP_SET_PTE(pte, PG_NV); 2418 2419 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 2420 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 2421 PMAP_TLB_SHOOTNOW(); 2422 2423 /* 2424 * If we're removing a user mapping, check to see if we 2425 * can free page table pages. 2426 */ 2427 if (pmap != pmap_kernel()) { 2428 /* 2429 * Delete the reference on the level 3 table. It will 2430 * delete references on the level 2 and 1 tables as 2431 * appropriate. 2432 */ 2433 pmap_l3pt_delref(pmap, va, pte, cpu_id); 2434 } 2435 2436 return (needisync); 2437 } 2438 2439 /* 2440 * pmap_changebit: 2441 * 2442 * Set or clear the specified PTE bits for all mappings on the 2443 * specified page. 2444 * 2445 * Note: we assume that the pvlist is already locked. There is no 2446 * need to lock the pmap itself as amapping cannot be removed while 2447 * we are holding the pvlist lock. 2448 */ 2449 void 2450 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, cpuid_t cpu_id) 2451 { 2452 pv_entry_t pv; 2453 pt_entry_t *pte, npte; 2454 vaddr_t va; 2455 boolean_t hadasm, isactive; 2456 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2457 2458 #ifdef DEBUG 2459 if (pmapdebug & PDB_BITS) 2460 printf("pmap_changebit(0x%lx, 0x%lx, 0x%lx)\n", 2461 VM_PAGE_TO_PHYS(pg), set, mask); 2462 #endif 2463 2464 MUTEX_ASSERT_LOCKED(&pg->mdpage.pvh_mtx); 2465 2466 /* 2467 * Loop over all current mappings setting/clearing as appropriate. 2468 */ 2469 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) { 2470 va = pv->pv_va; 2471 2472 pte = pv->pv_pte; 2473 npte = (*pte | set) & mask; 2474 if (*pte != npte) { 2475 hadasm = (pmap_pte_asm(pte) != 0); 2476 isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id); 2477 PMAP_SET_PTE(pte, npte); 2478 PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive, 2479 cpu_id); 2480 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va, 2481 hadasm ? PG_ASM : 0); 2482 } 2483 } 2484 2485 PMAP_TLB_SHOOTNOW(); 2486 } 2487 2488 /* 2489 * pmap_emulate_reference: 2490 * 2491 * Emulate reference and/or modified bit hits. 2492 * Return non-zero if this was an execute fault on a non-exec mapping, 2493 * otherwise return 0. 2494 */ 2495 int 2496 pmap_emulate_reference(struct proc *p, vaddr_t v, int user, int type) 2497 { 2498 struct pmap *pmap; 2499 pt_entry_t faultoff, *pte; 2500 struct vm_page *pg; 2501 paddr_t pa; 2502 boolean_t didlock = FALSE; 2503 boolean_t exec = FALSE; 2504 cpuid_t cpu_id = cpu_number(); 2505 2506 #ifdef DEBUG 2507 if (pmapdebug & PDB_FOLLOW) 2508 printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n", 2509 p, v, user, type); 2510 #endif 2511 2512 /* 2513 * Convert process and virtual address to physical address. 2514 */ 2515 if (v >= VM_MIN_KERNEL_ADDRESS) { 2516 if (user) 2517 panic("pmap_emulate_reference: user ref to kernel"); 2518 /* 2519 * No need to lock here; kernel PT pages never go away. 2520 */ 2521 pte = PMAP_KERNEL_PTE(v); 2522 } else { 2523 #ifdef DIAGNOSTIC 2524 if (p == NULL) 2525 panic("pmap_emulate_reference: bad proc"); 2526 if (p->p_vmspace == NULL) 2527 panic("pmap_emulate_reference: bad p_vmspace"); 2528 #endif 2529 pmap = p->p_vmspace->vm_map.pmap; 2530 PMAP_LOCK(pmap); 2531 didlock = TRUE; 2532 pte = pmap_l3pte(pmap, v, NULL); 2533 /* 2534 * We'll unlock below where we're done with the PTE. 2535 */ 2536 } 2537 if (pte == NULL || !pmap_pte_v(pte)) { 2538 if (didlock) 2539 PMAP_UNLOCK(pmap); 2540 return (0); 2541 } 2542 exec = pmap_pte_exec(pte); 2543 if (!exec && type == ALPHA_MMCSR_FOE) { 2544 if (didlock) 2545 PMAP_UNLOCK(pmap); 2546 return (1); 2547 } 2548 #ifdef DEBUG 2549 if (pmapdebug & PDB_FOLLOW) { 2550 printf("\tpte = %p, ", pte); 2551 printf("*pte = 0x%lx\n", *pte); 2552 } 2553 #endif 2554 #ifdef DEBUG /* These checks are more expensive */ 2555 #ifndef MULTIPROCESSOR 2556 /* 2557 * Quoting the Alpha ARM 14.3.1.4/5/6: 2558 * ``The Translation Buffer may reload and cache the old PTE value 2559 * between the time the FOR (resp. FOW, FOE) fault invalidates the 2560 * old value from the Translation Buffer and the time software 2561 * updates the PTE in memory. Software that depends on the 2562 * processor-provided invalidate must thus be prepared to take 2563 * another FOR (resp. FOW, FOE) fault on a page after clearing the 2564 * page's PTE<FOR(resp. FOW, FOE)> bit. The second fault will 2565 * invalidate the stale PTE from the Translation Buffer, and the 2566 * processor cannot load another stale copy. Thus, in the worst case, 2567 * a multiprocessor system will take an initial FOR (resp. FOW, FOE) 2568 * fault and then an additional FOR (resp. FOW, FOE) fault on each 2569 * processor. In practice, even a single repetition is unlikely.'' 2570 * 2571 * In practice, spurious faults on the other processors happen, at 2572 * least on fast 21264 or better processors. 2573 */ 2574 if (type == ALPHA_MMCSR_FOW) { 2575 if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE))) { 2576 panic("pmap_emulate_reference(%d,%d): " 2577 "write but unwritable pte 0x%lx", 2578 user, type, *pte); 2579 } 2580 if (!(*pte & PG_FOW)) { 2581 panic("pmap_emulate_reference(%d,%d): " 2582 "write but not FOW pte 0x%lx", 2583 user, type, *pte); 2584 } 2585 } else { 2586 if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE))) { 2587 panic("pmap_emulate_reference(%d,%d): " 2588 "!write but unreadable pte 0x%lx", 2589 user, type, *pte); 2590 } 2591 if (!(*pte & (PG_FOR | PG_FOE))) { 2592 panic("pmap_emulate_reference(%d,%d): " 2593 "!write but not FOR|FOE pte 0x%lx", 2594 user, type, *pte); 2595 } 2596 } 2597 #endif /* MULTIPROCESSOR */ 2598 /* Other diagnostics? */ 2599 #endif 2600 pa = pmap_pte_pa(pte); 2601 2602 /* 2603 * We're now done with the PTE. If it was a user pmap, unlock 2604 * it now. 2605 */ 2606 if (didlock) 2607 PMAP_UNLOCK(pmap); 2608 2609 #ifdef DEBUG 2610 if (pmapdebug & PDB_FOLLOW) 2611 printf("\tpa = 0x%lx\n", pa); 2612 #endif 2613 2614 pg = PHYS_TO_VM_PAGE(pa); 2615 2616 #ifdef DIAGNOSTIC 2617 if (pg == NULL) { 2618 panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): " 2619 "pa 0x%lx (pte %p 0x%08lx) not managed", 2620 p, v, user, type, pa, pte, *pte); 2621 } 2622 #endif 2623 2624 /* 2625 * Twiddle the appropriate bits to reflect the reference 2626 * and/or modification.. 2627 * 2628 * The rules: 2629 * (1) always mark page as used, and 2630 * (2) if it was a write fault, mark page as modified. 2631 */ 2632 2633 mtx_enter(&pg->mdpage.pvh_mtx); 2634 if (type == ALPHA_MMCSR_FOW) { 2635 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF | PG_PMAP_MOD); 2636 faultoff = PG_FOR | PG_FOW; 2637 } else { 2638 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF); 2639 faultoff = PG_FOR; 2640 if (exec) { 2641 faultoff |= PG_FOE; 2642 } 2643 } 2644 pmap_changebit(pg, 0, ~faultoff, cpu_id); 2645 mtx_leave(&pg->mdpage.pvh_mtx); 2646 2647 return (0); 2648 } 2649 2650 #ifdef DEBUG 2651 /* 2652 * pmap_pv_dump: 2653 * 2654 * Dump the physical->virtual data for the specified page. 2655 */ 2656 void 2657 pmap_pv_dump(paddr_t pa) 2658 { 2659 struct vm_page *pg; 2660 pv_entry_t pv; 2661 2662 pg = PHYS_TO_VM_PAGE(pa); 2663 2664 printf("pa 0x%lx (attrs = 0x%x):\n", 2665 pa, pg->pg_flags & (PG_PMAP_REF | PG_PMAP_MOD)); 2666 mtx_enter(&pg->mdpage.pvh_mtx); 2667 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) 2668 printf(" pmap %p, va 0x%lx\n", 2669 pv->pv_pmap, pv->pv_va); 2670 mtx_leave(&pg->mdpage.pvh_mtx); 2671 printf("\n"); 2672 } 2673 #endif 2674 2675 /* 2676 * vtophys: 2677 * 2678 * Return the physical address corresponding to the K0SEG or 2679 * K1SEG address provided. 2680 * 2681 * Note: no locking is necessary in this function. 2682 */ 2683 paddr_t 2684 vtophys(vaddr_t vaddr) 2685 { 2686 pt_entry_t *pte; 2687 paddr_t paddr = 0; 2688 2689 if (vaddr < ALPHA_K0SEG_BASE) 2690 printf("vtophys: invalid vaddr 0x%lx", vaddr); 2691 else if (vaddr <= ALPHA_K0SEG_END) 2692 paddr = ALPHA_K0SEG_TO_PHYS(vaddr); 2693 else { 2694 pte = PMAP_KERNEL_PTE(vaddr); 2695 if (pmap_pte_v(pte)) 2696 paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET); 2697 } 2698 2699 #if 0 2700 printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr); 2701 #endif 2702 2703 return (paddr); 2704 } 2705 2706 /******************** pv_entry management ********************/ 2707 2708 /* 2709 * pmap_pv_enter: 2710 * 2711 * Add a physical->virtual entry to the pv_table. 2712 */ 2713 int 2714 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte, 2715 boolean_t dolock) 2716 { 2717 pv_entry_t newpv; 2718 2719 /* 2720 * Allocate and fill in the new pv_entry. 2721 */ 2722 newpv = pmap_pv_alloc(); 2723 if (newpv == NULL) 2724 return (ENOMEM); 2725 newpv->pv_va = va; 2726 newpv->pv_pmap = pmap; 2727 newpv->pv_pte = pte; 2728 2729 if (dolock) 2730 mtx_enter(&pg->mdpage.pvh_mtx); 2731 2732 #ifdef DEBUG 2733 { 2734 pv_entry_t pv; 2735 /* 2736 * Make sure the entry doesn't already exist. 2737 */ 2738 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) { 2739 if (pmap == pv->pv_pmap && va == pv->pv_va) { 2740 printf("pmap = %p, va = 0x%lx\n", pmap, va); 2741 panic("pmap_pv_enter: already in pv table"); 2742 } 2743 } 2744 } 2745 #endif 2746 2747 /* 2748 * ...and put it in the list. 2749 */ 2750 newpv->pv_next = pg->mdpage.pvh_list; 2751 pg->mdpage.pvh_list = newpv; 2752 2753 if (dolock) 2754 mtx_leave(&pg->mdpage.pvh_mtx); 2755 2756 return (0); 2757 } 2758 2759 /* 2760 * pmap_pv_remove: 2761 * 2762 * Remove a physical->virtual entry from the pv_table. 2763 */ 2764 void 2765 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, boolean_t dolock) 2766 { 2767 pv_entry_t pv, *pvp; 2768 2769 if (dolock) 2770 mtx_enter(&pg->mdpage.pvh_mtx); 2771 2772 /* 2773 * Find the entry to remove. 2774 */ 2775 for (pvp = &pg->mdpage.pvh_list, pv = *pvp; 2776 pv != NULL; pvp = &pv->pv_next, pv = *pvp) 2777 if (pmap == pv->pv_pmap && va == pv->pv_va) 2778 break; 2779 2780 #ifdef DEBUG 2781 if (pv == NULL) 2782 panic("pmap_pv_remove: not in pv table"); 2783 #endif 2784 2785 *pvp = pv->pv_next; 2786 2787 if (dolock) 2788 mtx_leave(&pg->mdpage.pvh_mtx); 2789 2790 pmap_pv_free(pv); 2791 } 2792 2793 /* 2794 * pmap_pv_page_alloc: 2795 * 2796 * Allocate a page for the pv_entry pool. 2797 */ 2798 void * 2799 pmap_pv_page_alloc(struct pool *pp, int flags, int *slowdown) 2800 { 2801 paddr_t pg; 2802 2803 *slowdown = 0; 2804 if (pmap_physpage_alloc(PGU_PVENT, &pg)) 2805 return ((void *)ALPHA_PHYS_TO_K0SEG(pg)); 2806 return (NULL); 2807 } 2808 2809 /* 2810 * pmap_pv_page_free: 2811 * 2812 * Free a pv_entry pool page. 2813 */ 2814 void 2815 pmap_pv_page_free(struct pool *pp, void *v) 2816 { 2817 2818 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v)); 2819 } 2820 2821 /******************** misc. functions ********************/ 2822 2823 /* 2824 * pmap_physpage_alloc: 2825 * 2826 * Allocate a single page from the VM system and return the 2827 * physical address for that page. 2828 */ 2829 boolean_t 2830 pmap_physpage_alloc(int usage, paddr_t *pap) 2831 { 2832 struct vm_page *pg; 2833 paddr_t pa; 2834 2835 /* 2836 * Don't ask for a zeroed page in the L1PT case -- we will 2837 * properly initialize it in the constructor. 2838 */ 2839 2840 pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ? 2841 UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO); 2842 if (pg != NULL) { 2843 pa = VM_PAGE_TO_PHYS(pg); 2844 2845 #ifdef DIAGNOSTIC 2846 if (pg->wire_count != 0) { 2847 printf("pmap_physpage_alloc: page 0x%lx has " 2848 "%d references\n", pa, pg->wire_count); 2849 panic("pmap_physpage_alloc"); 2850 } 2851 #endif 2852 *pap = pa; 2853 return (TRUE); 2854 } 2855 return (FALSE); 2856 } 2857 2858 /* 2859 * pmap_physpage_free: 2860 * 2861 * Free the single page table page at the specified physical address. 2862 */ 2863 void 2864 pmap_physpage_free(paddr_t pa) 2865 { 2866 struct vm_page *pg; 2867 2868 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) 2869 panic("pmap_physpage_free: bogus physical page address"); 2870 2871 #ifdef DIAGNOSTIC 2872 if (pg->wire_count != 0) 2873 panic("pmap_physpage_free: page still has references"); 2874 #endif 2875 2876 uvm_pagefree(pg); 2877 } 2878 2879 /* 2880 * pmap_physpage_addref: 2881 * 2882 * Add a reference to the specified special use page. 2883 */ 2884 int 2885 pmap_physpage_addref(void *kva) 2886 { 2887 struct vm_page *pg; 2888 paddr_t pa; 2889 int rval; 2890 2891 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2892 pg = PHYS_TO_VM_PAGE(pa); 2893 2894 rval = ++pg->wire_count; 2895 2896 return (rval); 2897 } 2898 2899 /* 2900 * pmap_physpage_delref: 2901 * 2902 * Delete a reference to the specified special use page. 2903 */ 2904 int 2905 pmap_physpage_delref(void *kva) 2906 { 2907 struct vm_page *pg; 2908 paddr_t pa; 2909 int rval; 2910 2911 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2912 pg = PHYS_TO_VM_PAGE(pa); 2913 2914 #ifdef DIAGNOSTIC 2915 /* 2916 * Make sure we never have a negative reference count. 2917 */ 2918 if (pg->wire_count == 0) 2919 panic("pmap_physpage_delref: reference count already zero"); 2920 #endif 2921 2922 rval = --pg->wire_count; 2923 2924 return (rval); 2925 } 2926 2927 /******************** page table page management ********************/ 2928 2929 /* 2930 * pmap_growkernel: [ INTERFACE ] 2931 * 2932 * Grow the kernel address space. This is a hint from the 2933 * upper layer to pre-allocate more kernel PT pages. 2934 */ 2935 vaddr_t 2936 pmap_growkernel(vaddr_t maxkvaddr) 2937 { 2938 struct pmap *kpm = pmap_kernel(), *pm; 2939 paddr_t ptaddr; 2940 pt_entry_t *l1pte, *l2pte, pte; 2941 vaddr_t va; 2942 int l1idx; 2943 2944 mtx_enter(&pmap_growkernel_mtx); 2945 2946 if (maxkvaddr <= pmap_maxkvaddr) 2947 goto out; /* we are OK */ 2948 2949 va = pmap_maxkvaddr; 2950 2951 while (va < maxkvaddr) { 2952 /* 2953 * If there is no valid L1 PTE (i.e. no L2 PT page), 2954 * allocate a new L2 PT page and insert it into the 2955 * L1 map. 2956 */ 2957 l1pte = pmap_l1pte(kpm, va); 2958 if (pmap_pte_v(l1pte) == 0) { 2959 /* 2960 * XXX PGU_NORMAL? It's not a "traditional" PT page. 2961 */ 2962 if (uvm.page_init_done == FALSE) { 2963 /* 2964 * We're growing the kernel pmap early (from 2965 * uvm_pageboot_alloc()). This case must 2966 * be handled a little differently. 2967 */ 2968 ptaddr = ALPHA_K0SEG_TO_PHYS( 2969 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 2970 } else if (pmap_physpage_alloc(PGU_NORMAL, 2971 &ptaddr) == FALSE) 2972 goto die; 2973 pte = (atop(ptaddr) << PG_SHIFT) | 2974 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 2975 *l1pte = pte; 2976 2977 l1idx = l1pte_index(va); 2978 2979 /* Update all the user pmaps. */ 2980 mtx_enter(&pmap_all_pmaps_mtx); 2981 for (pm = TAILQ_FIRST(&pmap_all_pmaps); 2982 pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) { 2983 /* Skip the kernel pmap. */ 2984 if (pm == pmap_kernel()) 2985 continue; 2986 2987 PMAP_LOCK(pm); 2988 KDASSERT(pm->pm_lev1map != kernel_lev1map); 2989 pm->pm_lev1map[l1idx] = pte; 2990 PMAP_UNLOCK(pm); 2991 } 2992 mtx_leave(&pmap_all_pmaps_mtx); 2993 } 2994 2995 /* 2996 * Have an L2 PT page now, add the L3 PT page. 2997 */ 2998 l2pte = pmap_l2pte(kpm, va, l1pte); 2999 KASSERT(pmap_pte_v(l2pte) == 0); 3000 if (uvm.page_init_done == FALSE) { 3001 /* 3002 * See above. 3003 */ 3004 ptaddr = ALPHA_K0SEG_TO_PHYS( 3005 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3006 } else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == FALSE) 3007 goto die; 3008 *l2pte = (atop(ptaddr) << PG_SHIFT) | 3009 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3010 va += ALPHA_L2SEG_SIZE; 3011 } 3012 3013 #if 0 3014 /* Invalidate the L1 PT cache. */ 3015 pool_cache_invalidate(&pmap_l1pt_cache); 3016 #endif 3017 3018 pmap_maxkvaddr = va; 3019 3020 out: 3021 mtx_leave(&pmap_growkernel_mtx); 3022 3023 return (pmap_maxkvaddr); 3024 3025 die: 3026 mtx_leave(&pmap_growkernel_mtx); 3027 panic("pmap_growkernel: out of memory"); 3028 } 3029 3030 /* 3031 * pmap_lev1map_create: 3032 * 3033 * Create a new level 1 page table for the specified pmap. 3034 * 3035 * Note: growkernel must already by held and the pmap either 3036 * already locked or unreferenced globally. 3037 */ 3038 int 3039 pmap_lev1map_create(pmap_t pmap, cpuid_t cpu_id) 3040 { 3041 pt_entry_t *l1pt; 3042 3043 KASSERT(pmap != pmap_kernel()); 3044 KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED); 3045 3046 /* Don't sleep -- we're called with locks held. */ 3047 l1pt = pool_get(&pmap_l1pt_pool, PR_NOWAIT); 3048 if (l1pt == NULL) 3049 return (ENOMEM); 3050 3051 pmap_l1pt_ctor(l1pt); 3052 pmap->pm_lev1map = l1pt; 3053 3054 return (0); 3055 } 3056 3057 /* 3058 * pmap_lev1map_destroy: 3059 * 3060 * Destroy the level 1 page table for the specified pmap. 3061 * 3062 * Note: growkernel must already by held and the pmap either 3063 * already locked or unreferenced globally. 3064 */ 3065 void 3066 pmap_lev1map_destroy(pmap_t pmap) 3067 { 3068 pt_entry_t *l1pt = pmap->pm_lev1map; 3069 3070 KASSERT(pmap != pmap_kernel()); 3071 3072 /* 3073 * Go back to referencing the global kernel_lev1map. 3074 */ 3075 pmap->pm_lev1map = kernel_lev1map; 3076 3077 /* 3078 * Free the old level 1 page table page. 3079 */ 3080 pool_put(&pmap_l1pt_pool, l1pt); 3081 } 3082 3083 /* 3084 * pmap_l1pt_ctor: 3085 * 3086 * Constructor for L1 PT pages. 3087 */ 3088 void 3089 pmap_l1pt_ctor(pt_entry_t *l1pt) 3090 { 3091 pt_entry_t pte; 3092 int i; 3093 3094 /* 3095 * Initialize the new level 1 table by zeroing the 3096 * user portion and copying the kernel mappings into 3097 * the kernel portion. 3098 */ 3099 for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++) 3100 l1pt[i] = 0; 3101 3102 for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS); 3103 i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++) 3104 l1pt[i] = kernel_lev1map[i]; 3105 3106 /* 3107 * Now, map the new virtual page table. NOTE: NO ASM! 3108 */ 3109 pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) | 3110 PG_V | PG_KRE | PG_KWE; 3111 l1pt[l1pte_index(VPTBASE)] = pte; 3112 } 3113 3114 /* 3115 * pmap_l1pt_alloc: 3116 * 3117 * Page allocator for L1 PT pages. 3118 * 3119 * Note: The growkernel lock is held across allocations 3120 * from this pool, so we don't need to acquire it 3121 * ourselves. 3122 */ 3123 void * 3124 pmap_l1pt_alloc(struct pool *pp, int flags, int *slowdown) 3125 { 3126 paddr_t ptpa; 3127 3128 /* 3129 * Attempt to allocate a free page. 3130 */ 3131 *slowdown = 0; 3132 if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == FALSE) 3133 return (NULL); 3134 3135 return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa)); 3136 } 3137 3138 /* 3139 * pmap_l1pt_free: 3140 * 3141 * Page freer for L1 PT pages. 3142 */ 3143 void 3144 pmap_l1pt_free(struct pool *pp, void *v) 3145 { 3146 3147 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v)); 3148 } 3149 3150 /* 3151 * pmap_ptpage_alloc: 3152 * 3153 * Allocate a level 2 or level 3 page table page, and 3154 * initialize the PTE that references it. 3155 * 3156 * Note: the pmap must already be locked. 3157 */ 3158 int 3159 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage) 3160 { 3161 paddr_t ptpa; 3162 3163 /* 3164 * Allocate the page table page. 3165 */ 3166 if (pmap_physpage_alloc(usage, &ptpa) == FALSE) 3167 return (ENOMEM); 3168 3169 /* 3170 * Initialize the referencing PTE. 3171 */ 3172 PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) | 3173 PG_V | PG_KRE | PG_KWE | PG_WIRED | 3174 (pmap == pmap_kernel() ? PG_ASM : 0)); 3175 3176 return (0); 3177 } 3178 3179 /* 3180 * pmap_ptpage_free: 3181 * 3182 * Free the level 2 or level 3 page table page referenced 3183 * be the provided PTE. 3184 * 3185 * Note: the pmap must already be locked. 3186 */ 3187 void 3188 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte) 3189 { 3190 paddr_t ptpa; 3191 3192 /* 3193 * Extract the physical address of the page from the PTE 3194 * and clear the entry. 3195 */ 3196 ptpa = pmap_pte_pa(pte); 3197 PMAP_SET_PTE(pte, PG_NV); 3198 3199 #ifdef DEBUG 3200 pmap_zero_page(PHYS_TO_VM_PAGE(ptpa)); 3201 #endif 3202 pmap_physpage_free(ptpa); 3203 } 3204 3205 /* 3206 * pmap_l3pt_delref: 3207 * 3208 * Delete a reference on a level 3 PT page. If the reference drops 3209 * to zero, free it. 3210 * 3211 * Note: the pmap must already be locked. 3212 */ 3213 void 3214 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, cpuid_t cpu_id) 3215 { 3216 pt_entry_t *l1pte, *l2pte; 3217 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 3218 3219 l1pte = pmap_l1pte(pmap, va); 3220 l2pte = pmap_l2pte(pmap, va, l1pte); 3221 3222 #ifdef DIAGNOSTIC 3223 if (pmap == pmap_kernel()) 3224 panic("pmap_l3pt_delref: kernel pmap"); 3225 #endif 3226 3227 if (pmap_physpage_delref(l3pte) == 0) { 3228 /* 3229 * No more mappings; we can free the level 3 table. 3230 */ 3231 #ifdef DEBUG 3232 if (pmapdebug & PDB_PTPAGE) 3233 printf("pmap_l3pt_delref: freeing level 3 table at " 3234 "0x%lx\n", pmap_pte_pa(l2pte)); 3235 #endif 3236 pmap_ptpage_free(pmap, l2pte); 3237 3238 /* 3239 * We've freed a level 3 table, so we must 3240 * invalidate the TLB entry for that PT page 3241 * in the Virtual Page Table VA range, because 3242 * otherwise the PALcode will service a TLB 3243 * miss using the stale VPT TLB entry it entered 3244 * behind our back to shortcut to the VA's PTE. 3245 */ 3246 PMAP_INVALIDATE_TLB(pmap, 3247 (vaddr_t)(&VPT[VPT_INDEX(va)]), FALSE, 3248 PMAP_ISACTIVE(pmap, cpu_id), cpu_id); 3249 PMAP_TLB_SHOOTDOWN(pmap, 3250 (vaddr_t)(&VPT[VPT_INDEX(va)]), 0); 3251 PMAP_TLB_SHOOTNOW(); 3252 3253 /* 3254 * We've freed a level 3 table, so delete the reference 3255 * on the level 2 table. 3256 */ 3257 pmap_l2pt_delref(pmap, l1pte, l2pte); 3258 } 3259 } 3260 3261 /* 3262 * pmap_l2pt_delref: 3263 * 3264 * Delete a reference on a level 2 PT page. If the reference drops 3265 * to zero, free it. 3266 * 3267 * Note: the pmap must already be locked. 3268 */ 3269 void 3270 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte) 3271 { 3272 KASSERT(pmap != pmap_kernel()); 3273 if (pmap_physpage_delref(l2pte) == 0) { 3274 /* 3275 * No more mappings in this segment; we can free the 3276 * level 2 table. 3277 */ 3278 #ifdef DEBUG 3279 if (pmapdebug & PDB_PTPAGE) 3280 printf("pmap_l2pt_delref: freeing level 2 table at " 3281 "0x%lx\n", pmap_pte_pa(l1pte)); 3282 #endif 3283 pmap_ptpage_free(pmap, l1pte); 3284 3285 /* 3286 * We've freed a level 2 table, so delete the reference 3287 * on the level 1 table. 3288 */ 3289 pmap_l1pt_delref(pmap, l1pte); 3290 } 3291 } 3292 3293 /* 3294 * pmap_l1pt_delref: 3295 * 3296 * Delete a reference on a level 1 PT page. If the reference drops 3297 * to zero, free it. 3298 * 3299 * Note: the pmap must already be locked. 3300 */ 3301 void 3302 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte) 3303 { 3304 KASSERT(pmap != pmap_kernel()); 3305 pmap_physpage_delref(l1pte); 3306 } 3307 3308 /******************** Address Space Number management ********************/ 3309 3310 /* 3311 * pmap_asn_alloc: 3312 * 3313 * Allocate and assign an ASN to the specified pmap. 3314 * 3315 * Note: the pmap must already be locked. This may be called from 3316 * an interprocessor interrupt, and in that case, the sender of 3317 * the IPI has the pmap lock. 3318 */ 3319 void 3320 pmap_asn_alloc(pmap_t pmap, cpuid_t cpu_id) 3321 { 3322 struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id]; 3323 struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id]; 3324 3325 #ifdef DEBUG 3326 if (pmapdebug & (PDB_FOLLOW|PDB_ASN)) 3327 printf("pmap_asn_alloc(%p)\n", pmap); 3328 #endif 3329 3330 /* 3331 * If the pmap is still using the global kernel_lev1map, there 3332 * is no need to assign an ASN at this time, because only 3333 * kernel mappings exist in that map, and all kernel mappings 3334 * have PG_ASM set. If the pmap eventually gets its own 3335 * lev1map, an ASN will be allocated at that time. 3336 * 3337 * Only the kernel pmap will reference kernel_lev1map. Do the 3338 * same old fixups, but note that we no longer need the pmap 3339 * to be locked if we're in this mode, since pm_lev1map will 3340 * never change. 3341 */ 3342 if (pmap->pm_lev1map == kernel_lev1map) { 3343 #ifdef DEBUG 3344 if (pmapdebug & PDB_ASN) 3345 printf("pmap_asn_alloc: still references " 3346 "kernel_lev1map\n"); 3347 #endif 3348 #if defined(MULTIPROCESSOR) 3349 /* 3350 * In a multiprocessor system, it's possible to 3351 * get here without having PMAP_ASN_RESERVED in 3352 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy(). 3353 * 3354 * So, what we do here, is simply assign the reserved 3355 * ASN for kernel_lev1map users and let things 3356 * continue on. We do, however, let uniprocessor 3357 * configurations continue to make its assertion. 3358 */ 3359 pma->pma_asn = PMAP_ASN_RESERVED; 3360 #else 3361 KASSERT(pma->pma_asn == PMAP_ASN_RESERVED); 3362 #endif /* MULTIPROCESSOR */ 3363 return; 3364 } 3365 3366 /* 3367 * On processors which do not implement ASNs, the swpctx PALcode 3368 * operation will automatically invalidate the TLB and I-cache, 3369 * so we don't need to do that here. 3370 */ 3371 if (pmap_max_asn == 0) { 3372 /* 3373 * Refresh the pmap's generation number, to 3374 * simplify logic elsewhere. 3375 */ 3376 pma->pma_asngen = cpma->pma_asngen; 3377 #ifdef DEBUG 3378 if (pmapdebug & PDB_ASN) 3379 printf("pmap_asn_alloc: no ASNs, using asngen %lu\n", 3380 pma->pma_asngen); 3381 #endif 3382 return; 3383 } 3384 3385 /* 3386 * Hopefully, we can continue using the one we have... 3387 */ 3388 if (pma->pma_asn != PMAP_ASN_RESERVED && 3389 pma->pma_asngen == cpma->pma_asngen) { 3390 /* 3391 * ASN is still in the current generation; keep on using it. 3392 */ 3393 #ifdef DEBUG 3394 if (pmapdebug & PDB_ASN) 3395 printf("pmap_asn_alloc: same generation, keeping %u\n", 3396 pma->pma_asn); 3397 #endif 3398 return; 3399 } 3400 3401 /* 3402 * Need to assign a new ASN. Grab the next one, incrementing 3403 * the generation number if we have to. 3404 */ 3405 if (cpma->pma_asn > pmap_max_asn) { 3406 /* 3407 * Invalidate all non-PG_ASM TLB entries and the 3408 * I-cache, and bump the generation number. 3409 */ 3410 ALPHA_TBIAP(); 3411 alpha_pal_imb(); 3412 3413 cpma->pma_asn = 1; 3414 cpma->pma_asngen++; 3415 #ifdef DIAGNOSTIC 3416 if (cpma->pma_asngen == 0) { 3417 /* 3418 * The generation number has wrapped. We could 3419 * handle this scenario by traversing all of 3420 * the pmaps, and invalidating the generation 3421 * number on those which are not currently 3422 * in use by this processor. 3423 * 3424 * However... considering that we're using 3425 * an unsigned 64-bit integer for generation 3426 * numbers, on non-ASN CPUs, we won't wrap 3427 * for approx. 585 million years, or 75 billion 3428 * years on a 128-ASN CPU (assuming 1000 switch 3429 * operations per second). 3430 * 3431 * So, we don't bother. 3432 */ 3433 panic("pmap_asn_alloc: too much uptime"); 3434 } 3435 #endif 3436 #ifdef DEBUG 3437 if (pmapdebug & PDB_ASN) 3438 printf("pmap_asn_alloc: generation bumped to %lu\n", 3439 cpma->pma_asngen); 3440 #endif 3441 } 3442 3443 /* 3444 * Assign the new ASN and validate the generation number. 3445 */ 3446 pma->pma_asn = cpma->pma_asn++; 3447 pma->pma_asngen = cpma->pma_asngen; 3448 3449 #ifdef DEBUG 3450 if (pmapdebug & PDB_ASN) 3451 printf("pmap_asn_alloc: assigning %u to pmap %p\n", 3452 pma->pma_asn, pmap); 3453 #endif 3454 3455 /* 3456 * Have a new ASN, so there's no need to sync the I-stream 3457 * on the way back out to userspace. 3458 */ 3459 atomic_clearbits_ulong(&pmap->pm_needisync, (1UL << cpu_id)); 3460 } 3461 3462 #if defined(MULTIPROCESSOR) 3463 /******************** TLB shootdown code ********************/ 3464 3465 /* 3466 * pmap_tlb_shootdown: 3467 * 3468 * Cause the TLB entry for pmap/va to be shot down. 3469 * 3470 * NOTE: The pmap must be locked here. 3471 */ 3472 void 3473 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp) 3474 { 3475 struct pmap_tlb_shootdown_q *pq; 3476 struct pmap_tlb_shootdown_job *pj; 3477 struct cpu_info *ci, *self = curcpu(); 3478 u_long cpumask; 3479 CPU_INFO_ITERATOR cii; 3480 #if 0 3481 int s; 3482 #endif 3483 3484 cpumask = 0; 3485 3486 CPU_INFO_FOREACH(cii, ci) { 3487 if (ci == self) 3488 continue; 3489 3490 /* 3491 * The pmap must be locked (unless its the kernel 3492 * pmap, in which case it is okay for it to be 3493 * unlocked), which prevents it from becoming 3494 * active on any additional processors. This makes 3495 * it safe to check for activeness. If it's not 3496 * active on the processor in question, then just 3497 * mark it as needing a new ASN the next time it 3498 * does, saving the IPI. We always have to send 3499 * the IPI for the kernel pmap. 3500 * 3501 * Note if it's marked active now, and it becomes 3502 * inactive by the time the processor receives 3503 * the IPI, that's okay, because it does the right 3504 * thing with it later. 3505 */ 3506 if (pmap != pmap_kernel() && 3507 PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) { 3508 PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid); 3509 continue; 3510 } 3511 3512 cpumask |= 1UL << ci->ci_cpuid; 3513 3514 pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; 3515 3516 PSJQ_LOCK(pq, s); 3517 3518 pq->pq_pte |= pte; 3519 3520 /* 3521 * If a global flush is already pending, we 3522 * don't really have to do anything else. 3523 */ 3524 if (pq->pq_tbia) { 3525 PSJQ_UNLOCK(pq, s); 3526 continue; 3527 } 3528 3529 pj = pmap_tlb_shootdown_job_get(pq); 3530 if (pj == NULL) { 3531 /* 3532 * Couldn't allocate a job entry. Just 3533 * tell the processor to kill everything. 3534 */ 3535 pq->pq_tbia = 1; 3536 } else { 3537 pj->pj_pmap = pmap; 3538 pj->pj_va = va; 3539 pj->pj_pte = pte; 3540 TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list); 3541 } 3542 3543 PSJQ_UNLOCK(pq, s); 3544 } 3545 3546 *cpumaskp |= cpumask; 3547 } 3548 3549 /* 3550 * pmap_tlb_shootnow: 3551 * 3552 * Process the TLB shootdowns that we have been accumulating 3553 * for the specified processor set. 3554 */ 3555 void 3556 pmap_tlb_shootnow(u_long cpumask) 3557 { 3558 3559 alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN); 3560 } 3561 3562 /* 3563 * pmap_do_tlb_shootdown: 3564 * 3565 * Process pending TLB shootdown operations for this processor. 3566 */ 3567 void 3568 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep) 3569 { 3570 u_long cpu_id = ci->ci_cpuid; 3571 u_long cpu_mask = (1UL << cpu_id); 3572 struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; 3573 struct pmap_tlb_shootdown_job *pj; 3574 #if 0 3575 int s; 3576 #endif 3577 3578 PSJQ_LOCK(pq, s); 3579 3580 if (pq->pq_tbia) { 3581 if (pq->pq_pte & PG_ASM) 3582 ALPHA_TBIA(); 3583 else 3584 ALPHA_TBIAP(); 3585 pq->pq_tbia = 0; 3586 pmap_tlb_shootdown_q_drain(pq); 3587 } else { 3588 while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) { 3589 TAILQ_REMOVE(&pq->pq_head, pj, pj_list); 3590 PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va, 3591 pj->pj_pte & PG_ASM, 3592 pj->pj_pmap->pm_cpus & cpu_mask, cpu_id); 3593 pmap_tlb_shootdown_job_put(pq, pj); 3594 } 3595 } 3596 pq->pq_pte = 0; 3597 3598 PSJQ_UNLOCK(pq, s); 3599 } 3600 3601 /* 3602 * pmap_tlb_shootdown_q_drain: 3603 * 3604 * Drain a processor's TLB shootdown queue. We do not perform 3605 * the shootdown operations. This is merely a convenience 3606 * function. 3607 * 3608 * Note: We expect the queue to be locked. 3609 */ 3610 void 3611 pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *pq) 3612 { 3613 struct pmap_tlb_shootdown_job *pj; 3614 3615 while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) { 3616 TAILQ_REMOVE(&pq->pq_head, pj, pj_list); 3617 pmap_tlb_shootdown_job_put(pq, pj); 3618 } 3619 } 3620 3621 /* 3622 * pmap_tlb_shootdown_job_get: 3623 * 3624 * Get a TLB shootdown job queue entry. This places a limit on 3625 * the number of outstanding jobs a processor may have. 3626 * 3627 * Note: We expect the queue to be locked. 3628 */ 3629 struct pmap_tlb_shootdown_job * 3630 pmap_tlb_shootdown_job_get(struct pmap_tlb_shootdown_q *pq) 3631 { 3632 struct pmap_tlb_shootdown_job *pj; 3633 3634 pj = TAILQ_FIRST(&pq->pq_free); 3635 if (pj != NULL) 3636 TAILQ_REMOVE(&pq->pq_free, pj, pj_list); 3637 return (pj); 3638 } 3639 3640 /* 3641 * pmap_tlb_shootdown_job_put: 3642 * 3643 * Put a TLB shootdown job queue entry onto the free list. 3644 * 3645 * Note: We expect the queue to be locked. 3646 */ 3647 void 3648 pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *pq, 3649 struct pmap_tlb_shootdown_job *pj) 3650 { 3651 TAILQ_INSERT_TAIL(&pq->pq_free, pj, pj_list); 3652 } 3653 #endif /* MULTIPROCESSOR */ 3654