1 /* $NetBSD: pmap.c,v 1.261 2016/12/23 07:15:27 cherry Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center and by Chris G. Demetriou. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)pmap.c 8.6 (Berkeley) 5/27/94 66 */ 67 68 /* 69 * DEC Alpha physical map management code. 70 * 71 * History: 72 * 73 * This pmap started life as a Motorola 68851/68030 pmap, 74 * written by Mike Hibler at the University of Utah. 75 * 76 * It was modified for the DEC Alpha by Chris Demetriou 77 * at Carnegie Mellon University. 78 * 79 * Support for non-contiguous physical memory was added by 80 * Jason R. Thorpe of the Numerical Aerospace Simulation 81 * Facility, NASA Ames Research Center and Chris Demetriou. 82 * 83 * Page table management and a major cleanup were undertaken 84 * by Jason R. Thorpe, with lots of help from Ross Harvey of 85 * Avalon Computer Systems and from Chris Demetriou. 86 * 87 * Support for the new UVM pmap interface was written by 88 * Jason R. Thorpe. 89 * 90 * Support for ASNs was written by Jason R. Thorpe, again 91 * with help from Chris Demetriou and Ross Harvey. 92 * 93 * The locking protocol was written by Jason R. Thorpe, 94 * using Chuck Cranor's i386 pmap for UVM as a model. 95 * 96 * TLB shootdown code was written by Jason R. Thorpe. 97 * 98 * Multiprocessor modifications by Andrew Doran. 99 * 100 * Notes: 101 * 102 * All page table access is done via K0SEG. The one exception 103 * to this is for kernel mappings. Since all kernel page 104 * tables are pre-allocated, we can use the Virtual Page Table 105 * to access PTEs that map K1SEG addresses. 106 * 107 * Kernel page table pages are statically allocated in 108 * pmap_bootstrap(), and are never freed. In the future, 109 * support for dynamically adding additional kernel page 110 * table pages may be added. User page table pages are 111 * dynamically allocated and freed. 112 * 113 * Bugs/misfeatures: 114 * 115 * - Some things could be optimized. 116 */ 117 118 /* 119 * Manages physical address maps. 120 * 121 * Since the information managed by this module is 122 * also stored by the logical address mapping module, 123 * this module may throw away valid virtual-to-physical 124 * mappings at almost any time. However, invalidations 125 * of virtual-to-physical mappings must be done as 126 * requested. 127 * 128 * In order to cope with hardware architectures which 129 * make virtual-to-physical map invalidates expensive, 130 * this module may delay invalidate or reduced protection 131 * operations until such time as they are actually 132 * necessary. This module is given full information as 133 * to which processors are currently using which maps, 134 * and to when physical maps must be made correct. 135 */ 136 137 #include "opt_lockdebug.h" 138 #include "opt_sysv.h" 139 #include "opt_multiprocessor.h" 140 141 #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 142 143 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.261 2016/12/23 07:15:27 cherry Exp $"); 144 145 #include <sys/param.h> 146 #include <sys/systm.h> 147 #include <sys/kernel.h> 148 #include <sys/proc.h> 149 #include <sys/malloc.h> 150 #include <sys/pool.h> 151 #include <sys/buf.h> 152 #include <sys/atomic.h> 153 #include <sys/cpu.h> 154 155 #include <uvm/uvm.h> 156 157 #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR) 158 #include <machine/rpb.h> 159 #endif 160 161 #ifdef DEBUG 162 #define PDB_FOLLOW 0x0001 163 #define PDB_INIT 0x0002 164 #define PDB_ENTER 0x0004 165 #define PDB_REMOVE 0x0008 166 #define PDB_CREATE 0x0010 167 #define PDB_PTPAGE 0x0020 168 #define PDB_ASN 0x0040 169 #define PDB_BITS 0x0080 170 #define PDB_COLLECT 0x0100 171 #define PDB_PROTECT 0x0200 172 #define PDB_BOOTSTRAP 0x1000 173 #define PDB_PARANOIA 0x2000 174 #define PDB_WIRING 0x4000 175 #define PDB_PVDUMP 0x8000 176 177 int debugmap = 0; 178 int pmapdebug = PDB_PARANOIA; 179 #endif 180 181 /* 182 * Given a map and a machine independent protection code, 183 * convert to an alpha protection code. 184 */ 185 #define pte_prot(m, p) (protection_codes[m == pmap_kernel() ? 0 : 1][p]) 186 static int protection_codes[2][8]; 187 188 /* 189 * kernel_lev1map: 190 * 191 * Kernel level 1 page table. This maps all kernel level 2 192 * page table pages, and is used as a template for all user 193 * pmap level 1 page tables. When a new user level 1 page 194 * table is allocated, all kernel_lev1map PTEs for kernel 195 * addresses are copied to the new map. 196 * 197 * The kernel also has an initial set of kernel level 2 page 198 * table pages. These map the kernel level 3 page table pages. 199 * As kernel level 3 page table pages are added, more level 2 200 * page table pages may be added to map them. These pages are 201 * never freed. 202 * 203 * Finally, the kernel also has an initial set of kernel level 204 * 3 page table pages. These map pages in K1SEG. More level 205 * 3 page table pages may be added at run-time if additional 206 * K1SEG address space is required. These pages are never freed. 207 * 208 * NOTE: When mappings are inserted into the kernel pmap, all 209 * level 2 and level 3 page table pages must already be allocated 210 * and mapped into the parent page table. 211 */ 212 pt_entry_t *kernel_lev1map; 213 214 /* 215 * Virtual Page Table. 216 */ 217 static pt_entry_t *VPT; 218 219 static struct { 220 struct pmap k_pmap; 221 struct pmap_asn_info k_asni[ALPHA_MAXPROCS]; 222 } kernel_pmap_store; 223 224 struct pmap *const kernel_pmap_ptr = &kernel_pmap_store.k_pmap; 225 226 paddr_t avail_start; /* PA of first available physical page */ 227 paddr_t avail_end; /* PA of last available physical page */ 228 static vaddr_t virtual_end; /* VA of last avail page (end of kernel AS) */ 229 230 static bool pmap_initialized; /* Has pmap_init completed? */ 231 232 u_long pmap_pages_stolen; /* instrumentation */ 233 234 /* 235 * This variable contains the number of CPU IDs we need to allocate 236 * space for when allocating the pmap structure. It is used to 237 * size a per-CPU array of ASN and ASN Generation number. 238 */ 239 static u_long pmap_ncpuids; 240 241 #ifndef PMAP_PV_LOWAT 242 #define PMAP_PV_LOWAT 16 243 #endif 244 int pmap_pv_lowat = PMAP_PV_LOWAT; 245 246 /* 247 * List of all pmaps, used to update them when e.g. additional kernel 248 * page tables are allocated. This list is kept LRU-ordered by 249 * pmap_activate(). 250 */ 251 static TAILQ_HEAD(, pmap) pmap_all_pmaps; 252 253 /* 254 * The pools from which pmap structures and sub-structures are allocated. 255 */ 256 static struct pool_cache pmap_pmap_cache; 257 static struct pool_cache pmap_l1pt_cache; 258 static struct pool_cache pmap_pv_cache; 259 260 /* 261 * Address Space Numbers. 262 * 263 * On many implementations of the Alpha architecture, the TLB entries and 264 * I-cache blocks are tagged with a unique number within an implementation- 265 * specified range. When a process context becomes active, the ASN is used 266 * to match TLB entries; if a TLB entry for a particular VA does not match 267 * the current ASN, it is ignored (one could think of the processor as 268 * having a collection of <max ASN> separate TLBs). This allows operating 269 * system software to skip the TLB flush that would otherwise be necessary 270 * at context switch time. 271 * 272 * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that 273 * causes TLB entries to match any ASN. The PALcode also provides 274 * a TBI (Translation Buffer Invalidate) operation that flushes all 275 * TLB entries that _do not_ have PG_ASM. We use this bit for kernel 276 * mappings, so that invalidation of all user mappings does not invalidate 277 * kernel mappings (which are consistent across all processes). 278 * 279 * pmap_next_asn always indicates to the next ASN to use. When 280 * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation. 281 * 282 * When a new ASN generation is created, the per-process (i.e. non-PG_ASM) 283 * TLB entries and the I-cache are flushed, the generation number is bumped, 284 * and pmap_next_asn is changed to indicate the first non-reserved ASN. 285 * 286 * We reserve ASN #0 for pmaps that use the global kernel_lev1map. This 287 * prevents the following scenario: 288 * 289 * * New ASN generation starts, and process A is given ASN #0. 290 * 291 * * A new process B (and thus new pmap) is created. The ASN, 292 * for lack of a better value, is initialized to 0. 293 * 294 * * Process B runs. It is now using the TLB entries tagged 295 * by process A. *poof* 296 * 297 * In the scenario above, in addition to the processor using using incorrect 298 * TLB entires, the PALcode might use incorrect information to service a 299 * TLB miss. (The PALcode uses the recursively mapped Virtual Page Table 300 * to locate the PTE for a faulting address, and tagged TLB entires exist 301 * for the Virtual Page Table addresses in order to speed up this procedure, 302 * as well.) 303 * 304 * By reserving an ASN for kernel_lev1map users, we are guaranteeing that 305 * new pmaps will initially run with no TLB entries for user addresses 306 * or VPT mappings that map user page tables. Since kernel_lev1map only 307 * contains mappings for kernel addresses, and since those mappings 308 * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is 309 * safe (since PG_ASM mappings match any ASN). 310 * 311 * On processors that do not support ASNs, the PALcode invalidates 312 * the TLB and I-cache automatically on swpctx. We still still go 313 * through the motions of assigning an ASN (really, just refreshing 314 * the ASN generation in this particular case) to keep the logic sane 315 * in other parts of the code. 316 */ 317 static u_int pmap_max_asn; /* max ASN supported by the system */ 318 /* next ASN and cur ASN generation */ 319 static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS]; 320 321 /* 322 * Locking: 323 * 324 * READ/WRITE LOCKS 325 * ---------------- 326 * 327 * * pmap_main_lock - This lock is used to prevent deadlock and/or 328 * provide mutex access to the pmap module. Most operations lock 329 * the pmap first, then PV lists as needed. However, some operations, 330 * such as pmap_page_protect(), lock the PV lists before locking 331 * the pmaps. To prevent deadlock, we require a mutex lock on the 332 * pmap module if locking in the PV->pmap direction. This is 333 * implemented by acquiring a (shared) read lock on pmap_main_lock 334 * if locking pmap->PV and a (exclusive) write lock if locking in 335 * the PV->pmap direction. Since only one thread can hold a write 336 * lock at a time, this provides the mutex. 337 * 338 * MUTEXES 339 * ------- 340 * 341 * * pm_lock (per-pmap) - This lock protects all of the members 342 * of the pmap structure itself. This lock will be asserted 343 * in pmap_activate() and pmap_deactivate() from a critical 344 * section of mi_switch(), and must never sleep. Note that 345 * in the case of the kernel pmap, interrupts which cause 346 * memory allocation *must* be blocked while this lock is 347 * asserted. 348 * 349 * * pvh_lock (global hash) - These locks protects the PV lists 350 * for managed pages. 351 * 352 * * pmap_all_pmaps_lock - This lock protects the global list of 353 * all pmaps. Note that a pm_lock must never be held while this 354 * lock is held. 355 * 356 * * pmap_growkernel_lock - This lock protects pmap_growkernel() 357 * and the virtual_end variable. 358 * 359 * There is a lock ordering constraint for pmap_growkernel_lock. 360 * pmap_growkernel() acquires the locks in the following order: 361 * 362 * pmap_growkernel_lock (write) -> pmap_all_pmaps_lock -> 363 * pmap->pm_lock 364 * 365 * We need to ensure consistency between user pmaps and the 366 * kernel_lev1map. For this reason, pmap_growkernel_lock must 367 * be held to prevent kernel_lev1map changing across pmaps 368 * being added to / removed from the global pmaps list. 369 * 370 * Address space number management (global ASN counters and per-pmap 371 * ASN state) are not locked; they use arrays of values indexed 372 * per-processor. 373 * 374 * All internal functions which operate on a pmap are called 375 * with the pmap already locked by the caller (which will be 376 * an interface function). 377 */ 378 static krwlock_t pmap_main_lock; 379 static kmutex_t pmap_all_pmaps_lock; 380 static krwlock_t pmap_growkernel_lock; 381 382 #define PMAP_MAP_TO_HEAD_LOCK() rw_enter(&pmap_main_lock, RW_READER) 383 #define PMAP_MAP_TO_HEAD_UNLOCK() rw_exit(&pmap_main_lock) 384 #define PMAP_HEAD_TO_MAP_LOCK() rw_enter(&pmap_main_lock, RW_WRITER) 385 #define PMAP_HEAD_TO_MAP_UNLOCK() rw_exit(&pmap_main_lock) 386 387 struct { 388 kmutex_t lock; 389 } __aligned(64) static pmap_pvh_locks[64] __aligned(64); 390 391 static inline kmutex_t * 392 pmap_pvh_lock(struct vm_page *pg) 393 { 394 395 /* Cut bits 11-6 out of page address and use directly as offset. */ 396 return (kmutex_t *)((uintptr_t)&pmap_pvh_locks + 397 ((uintptr_t)pg & (63 << 6))); 398 } 399 400 #if defined(MULTIPROCESSOR) 401 /* 402 * TLB Shootdown: 403 * 404 * When a mapping is changed in a pmap, the TLB entry corresponding to 405 * the virtual address must be invalidated on all processors. In order 406 * to accomplish this on systems with multiple processors, messages are 407 * sent from the processor which performs the mapping change to all 408 * processors on which the pmap is active. For other processors, the 409 * ASN generation numbers for that processor is invalidated, so that 410 * the next time the pmap is activated on that processor, a new ASN 411 * will be allocated (which implicitly invalidates all TLB entries). 412 * 413 * Note, we can use the pool allocator to allocate job entries 414 * since pool pages are mapped with K0SEG, not with the TLB. 415 */ 416 struct pmap_tlb_shootdown_job { 417 TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list; 418 vaddr_t pj_va; /* virtual address */ 419 pmap_t pj_pmap; /* the pmap which maps the address */ 420 pt_entry_t pj_pte; /* the PTE bits */ 421 }; 422 423 static struct pmap_tlb_shootdown_q { 424 TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head; /* queue 16b */ 425 kmutex_t pq_lock; /* spin lock on queue 16b */ 426 int pq_pte; /* aggregate PTE bits 4b */ 427 int pq_count; /* number of pending requests 4b */ 428 int pq_tbia; /* pending global flush 4b */ 429 uint8_t pq_pad[64-16-16-4-4-4]; /* pad to 64 bytes */ 430 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS] __aligned(CACHE_LINE_SIZE); 431 432 /* If we have more pending jobs than this, we just nail the whole TLB. */ 433 #define PMAP_TLB_SHOOTDOWN_MAXJOBS 6 434 435 static struct pool_cache pmap_tlb_shootdown_job_cache; 436 #endif /* MULTIPROCESSOR */ 437 438 /* 439 * Internal routines 440 */ 441 static void alpha_protection_init(void); 442 static bool pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, long); 443 static void pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, long); 444 445 /* 446 * PT page management functions. 447 */ 448 static int pmap_lev1map_create(pmap_t, long); 449 static void pmap_lev1map_destroy(pmap_t, long); 450 static int pmap_ptpage_alloc(pmap_t, pt_entry_t *, int); 451 static void pmap_ptpage_free(pmap_t, pt_entry_t *); 452 static void pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, long); 453 static void pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, long); 454 static void pmap_l1pt_delref(pmap_t, pt_entry_t *, long); 455 456 static void *pmap_l1pt_alloc(struct pool *, int); 457 static void pmap_l1pt_free(struct pool *, void *); 458 459 static struct pool_allocator pmap_l1pt_allocator = { 460 pmap_l1pt_alloc, pmap_l1pt_free, 0, 461 }; 462 463 static int pmap_l1pt_ctor(void *, void *, int); 464 465 /* 466 * PV table management functions. 467 */ 468 static int pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *, 469 bool); 470 static void pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool); 471 static void *pmap_pv_page_alloc(struct pool *, int); 472 static void pmap_pv_page_free(struct pool *, void *); 473 474 static struct pool_allocator pmap_pv_page_allocator = { 475 pmap_pv_page_alloc, pmap_pv_page_free, 0, 476 }; 477 478 #ifdef DEBUG 479 void pmap_pv_dump(paddr_t); 480 #endif 481 482 #define pmap_pv_alloc() pool_cache_get(&pmap_pv_cache, PR_NOWAIT) 483 #define pmap_pv_free(pv) pool_cache_put(&pmap_pv_cache, (pv)) 484 485 /* 486 * ASN management functions. 487 */ 488 static void pmap_asn_alloc(pmap_t, long); 489 490 /* 491 * Misc. functions. 492 */ 493 static bool pmap_physpage_alloc(int, paddr_t *); 494 static void pmap_physpage_free(paddr_t); 495 static int pmap_physpage_addref(void *); 496 static int pmap_physpage_delref(void *); 497 498 /* 499 * PMAP_ISACTIVE{,_TEST}: 500 * 501 * Check to see if a pmap is active on the current processor. 502 */ 503 #define PMAP_ISACTIVE_TEST(pm, cpu_id) \ 504 (((pm)->pm_cpus & (1UL << (cpu_id))) != 0) 505 506 #if defined(DEBUG) && !defined(MULTIPROCESSOR) 507 #define PMAP_ISACTIVE(pm, cpu_id) \ 508 ({ \ 509 /* \ 510 * XXX This test is not MP-safe. \ 511 */ \ 512 int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id); \ 513 \ 514 if ((curlwp->l_flag & LW_IDLE) != 0 && \ 515 curproc->p_vmspace != NULL && \ 516 ((curproc->p_sflag & PS_WEXIT) == 0) && \ 517 (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap))) \ 518 panic("PMAP_ISACTIVE"); \ 519 (isactive_); \ 520 }) 521 #else 522 #define PMAP_ISACTIVE(pm, cpu_id) PMAP_ISACTIVE_TEST(pm, cpu_id) 523 #endif /* DEBUG && !MULTIPROCESSOR */ 524 525 /* 526 * PMAP_ACTIVATE_ASN_SANITY: 527 * 528 * DEBUG sanity checks for ASNs within PMAP_ACTIVATE. 529 */ 530 #ifdef DEBUG 531 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) \ 532 do { \ 533 struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)]; \ 534 struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)]; \ 535 \ 536 if ((pmap)->pm_lev1map == kernel_lev1map) { \ 537 /* \ 538 * This pmap implementation also ensures that pmaps \ 539 * referencing kernel_lev1map use a reserved ASN \ 540 * ASN to prevent the PALcode from servicing a TLB \ 541 * miss with the wrong PTE. \ 542 */ \ 543 if (__pma->pma_asn != PMAP_ASN_RESERVED) { \ 544 printf("kernel_lev1map with non-reserved ASN " \ 545 "(line %d)\n", __LINE__); \ 546 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 547 } \ 548 } else { \ 549 if (__pma->pma_asngen != __cpma->pma_asngen) { \ 550 /* \ 551 * ASN generation number isn't valid! \ 552 */ \ 553 printf("pmap asngen %lu, current %lu " \ 554 "(line %d)\n", \ 555 __pma->pma_asngen, \ 556 __cpma->pma_asngen, \ 557 __LINE__); \ 558 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 559 } \ 560 if (__pma->pma_asn == PMAP_ASN_RESERVED) { \ 561 /* \ 562 * DANGER WILL ROBINSON! We're going to \ 563 * pollute the VPT TLB entries! \ 564 */ \ 565 printf("Using reserved ASN! (line %d)\n", \ 566 __LINE__); \ 567 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 568 } \ 569 } \ 570 } while (/*CONSTCOND*/0) 571 #else 572 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) /* nothing */ 573 #endif 574 575 /* 576 * PMAP_ACTIVATE: 577 * 578 * This is essentially the guts of pmap_activate(), without 579 * ASN allocation. This is used by pmap_activate(), 580 * pmap_lev1map_create(), and pmap_lev1map_destroy(). 581 * 582 * This is called only when it is known that a pmap is "active" 583 * on the current processor; the ASN must already be valid. 584 */ 585 #define PMAP_ACTIVATE(pmap, l, cpu_id) \ 586 do { \ 587 struct pcb *pcb = lwp_getpcb(l); \ 588 PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id); \ 589 \ 590 pcb->pcb_hw.apcb_ptbr = \ 591 ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \ 592 pcb->pcb_hw.apcb_asn = (pmap)->pm_asni[(cpu_id)].pma_asn; \ 593 \ 594 if ((l) == curlwp) { \ 595 /* \ 596 * Page table base register has changed; switch to \ 597 * our own context again so that it will take effect. \ 598 */ \ 599 (void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr); \ 600 } \ 601 } while (/*CONSTCOND*/0) 602 603 /* 604 * PMAP_SET_NEEDISYNC: 605 * 606 * Mark that a user pmap needs an I-stream synch on its 607 * way back out to userspace. 608 */ 609 #define PMAP_SET_NEEDISYNC(pmap) (pmap)->pm_needisync = ~0UL 610 611 /* 612 * PMAP_SYNC_ISTREAM: 613 * 614 * Synchronize the I-stream for the specified pmap. For user 615 * pmaps, this is deferred until a process using the pmap returns 616 * to userspace. 617 */ 618 #if defined(MULTIPROCESSOR) 619 #define PMAP_SYNC_ISTREAM_KERNEL() \ 620 do { \ 621 alpha_pal_imb(); \ 622 alpha_broadcast_ipi(ALPHA_IPI_IMB); \ 623 } while (/*CONSTCOND*/0) 624 625 #define PMAP_SYNC_ISTREAM_USER(pmap) \ 626 do { \ 627 alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST); \ 628 /* for curcpu, will happen in userret() */ \ 629 } while (/*CONSTCOND*/0) 630 #else 631 #define PMAP_SYNC_ISTREAM_KERNEL() alpha_pal_imb() 632 #define PMAP_SYNC_ISTREAM_USER(pmap) /* will happen in userret() */ 633 #endif /* MULTIPROCESSOR */ 634 635 #define PMAP_SYNC_ISTREAM(pmap) \ 636 do { \ 637 if ((pmap) == pmap_kernel()) \ 638 PMAP_SYNC_ISTREAM_KERNEL(); \ 639 else \ 640 PMAP_SYNC_ISTREAM_USER(pmap); \ 641 } while (/*CONSTCOND*/0) 642 643 /* 644 * PMAP_INVALIDATE_ASN: 645 * 646 * Invalidate the specified pmap's ASN, so as to force allocation 647 * of a new one the next time pmap_asn_alloc() is called. 648 * 649 * NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING 650 * CONDITIONS ARE true: 651 * 652 * (1) The pmap references the global kernel_lev1map. 653 * 654 * (2) The pmap is not active on the current processor. 655 */ 656 #define PMAP_INVALIDATE_ASN(pmap, cpu_id) \ 657 do { \ 658 (pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED; \ 659 } while (/*CONSTCOND*/0) 660 661 /* 662 * PMAP_INVALIDATE_TLB: 663 * 664 * Invalidate the TLB entry for the pmap/va pair. 665 */ 666 #define PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id) \ 667 do { \ 668 if ((hadasm) || (isactive)) { \ 669 /* \ 670 * Simply invalidating the TLB entry and I-cache \ 671 * works in this case. \ 672 */ \ 673 ALPHA_TBIS((va)); \ 674 } else if ((pmap)->pm_asni[(cpu_id)].pma_asngen == \ 675 pmap_asn_info[(cpu_id)].pma_asngen) { \ 676 /* \ 677 * We can't directly invalidate the TLB entry \ 678 * in this case, so we have to force allocation \ 679 * of a new ASN the next time this pmap becomes \ 680 * active. \ 681 */ \ 682 PMAP_INVALIDATE_ASN((pmap), (cpu_id)); \ 683 } \ 684 /* \ 685 * Nothing to do in this case; the next time the \ 686 * pmap becomes active on this processor, a new \ 687 * ASN will be allocated anyway. \ 688 */ \ 689 } while (/*CONSTCOND*/0) 690 691 /* 692 * PMAP_KERNEL_PTE: 693 * 694 * Get a kernel PTE. 695 * 696 * If debugging, do a table walk. If not debugging, just use 697 * the Virtual Page Table, since all kernel page tables are 698 * pre-allocated and mapped in. 699 */ 700 #ifdef DEBUG 701 #define PMAP_KERNEL_PTE(va) \ 702 ({ \ 703 pt_entry_t *l1pte_, *l2pte_; \ 704 \ 705 l1pte_ = pmap_l1pte(pmap_kernel(), va); \ 706 if (pmap_pte_v(l1pte_) == 0) { \ 707 printf("kernel level 1 PTE not valid, va 0x%lx " \ 708 "(line %d)\n", (va), __LINE__); \ 709 panic("PMAP_KERNEL_PTE"); \ 710 } \ 711 l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_); \ 712 if (pmap_pte_v(l2pte_) == 0) { \ 713 printf("kernel level 2 PTE not valid, va 0x%lx " \ 714 "(line %d)\n", (va), __LINE__); \ 715 panic("PMAP_KERNEL_PTE"); \ 716 } \ 717 pmap_l3pte(pmap_kernel(), va, l2pte_); \ 718 }) 719 #else 720 #define PMAP_KERNEL_PTE(va) (&VPT[VPT_INDEX((va))]) 721 #endif 722 723 /* 724 * PMAP_SET_PTE: 725 * 726 * Set a PTE to a specified value. 727 */ 728 #define PMAP_SET_PTE(ptep, val) *(ptep) = (val) 729 730 /* 731 * PMAP_STAT_{INCR,DECR}: 732 * 733 * Increment or decrement a pmap statistic. 734 */ 735 #define PMAP_STAT_INCR(s, v) atomic_add_long((unsigned long *)(&(s)), (v)) 736 #define PMAP_STAT_DECR(s, v) atomic_add_long((unsigned long *)(&(s)), -(v)) 737 738 /* 739 * pmap_bootstrap: 740 * 741 * Bootstrap the system to run with virtual memory. 742 * 743 * Note: no locking is necessary in this function. 744 */ 745 void 746 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) 747 { 748 vsize_t lev2mapsize, lev3mapsize; 749 pt_entry_t *lev2map, *lev3map; 750 pt_entry_t pte; 751 vsize_t bufsz; 752 struct pcb *pcb; 753 int i; 754 755 #ifdef DEBUG 756 if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP)) 757 printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn); 758 #endif 759 760 /* 761 * Compute the number of pages kmem_arena will have. 762 */ 763 kmeminit_nkmempages(); 764 765 /* 766 * Figure out how many initial PTE's are necessary to map the 767 * kernel. We also reserve space for kmem_alloc_pageable() 768 * for vm_fork(). 769 */ 770 771 /* Get size of buffer cache and set an upper limit */ 772 bufsz = buf_memcalc(); 773 buf_setvalimit(bufsz); 774 775 lev3mapsize = 776 (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) + 777 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE + 778 (maxproc * UPAGES) + nkmempages; 779 780 lev3mapsize = roundup(lev3mapsize, NPTEPG); 781 782 /* 783 * Initialize `FYI' variables. Note we're relying on 784 * the fact that BSEARCH sorts the vm_physmem[] array 785 * for us. 786 */ 787 avail_start = ptoa(uvm_physseg_get_avail_start(uvm_physseg_get_first())); 788 avail_end = ptoa(uvm_physseg_get_avail_end(uvm_physseg_get_last())); 789 virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE; 790 791 #if 0 792 printf("avail_start = 0x%lx\n", avail_start); 793 printf("avail_end = 0x%lx\n", avail_end); 794 printf("virtual_end = 0x%lx\n", virtual_end); 795 #endif 796 797 /* 798 * Allocate a level 1 PTE table for the kernel. 799 * This is always one page long. 800 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 801 */ 802 kernel_lev1map = (pt_entry_t *) 803 uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG); 804 805 /* 806 * Allocate a level 2 PTE table for the kernel. 807 * These must map all of the level3 PTEs. 808 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 809 */ 810 lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG); 811 lev2map = (pt_entry_t *) 812 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize); 813 814 /* 815 * Allocate a level 3 PTE table for the kernel. 816 * Contains lev3mapsize PTEs. 817 */ 818 lev3map = (pt_entry_t *) 819 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize); 820 821 /* 822 * Set up level 1 page table 823 */ 824 825 /* Map all of the level 2 pte pages */ 826 for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) { 827 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) + 828 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 829 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 830 kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS + 831 (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte; 832 } 833 834 /* Map the virtual page table */ 835 pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT) 836 << PG_SHIFT; 837 pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */ 838 kernel_lev1map[l1pte_index(VPTBASE)] = pte; 839 VPT = (pt_entry_t *)VPTBASE; 840 841 #ifdef _PMAP_MAY_USE_PROM_CONSOLE 842 { 843 extern pt_entry_t prom_pte; /* XXX */ 844 extern int prom_mapped; /* XXX */ 845 846 if (pmap_uses_prom_console()) { 847 /* 848 * XXX Save old PTE so we can remap the PROM, if 849 * XXX necessary. 850 */ 851 prom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM; 852 } 853 prom_mapped = 0; 854 855 /* 856 * Actually, this code lies. The prom is still mapped, and will 857 * remain so until the context switch after alpha_init() returns. 858 */ 859 } 860 #endif 861 862 /* 863 * Set up level 2 page table. 864 */ 865 /* Map all of the level 3 pte pages */ 866 for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) { 867 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) + 868 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 869 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 870 lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+ 871 (i*PAGE_SIZE*NPTEPG))] = pte; 872 } 873 874 /* Initialize the pmap_growkernel_lock. */ 875 rw_init(&pmap_growkernel_lock); 876 877 /* 878 * Set up level three page table (lev3map) 879 */ 880 /* Nothing to do; it's already zero'd */ 881 882 /* 883 * Initialize the pmap pools and list. 884 */ 885 pmap_ncpuids = ncpuids; 886 pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 0, 887 0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL); 888 pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt", 889 &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL); 890 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 891 PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL, 892 NULL, NULL); 893 894 TAILQ_INIT(&pmap_all_pmaps); 895 896 /* 897 * Initialize the ASN logic. 898 */ 899 pmap_max_asn = maxasn; 900 for (i = 0; i < ALPHA_MAXPROCS; i++) { 901 pmap_asn_info[i].pma_asn = 1; 902 pmap_asn_info[i].pma_asngen = 0; 903 } 904 905 /* 906 * Initialize the locks. 907 */ 908 rw_init(&pmap_main_lock); 909 mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE); 910 for (i = 0; i < __arraycount(pmap_pvh_locks); i++) { 911 mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE); 912 } 913 914 /* 915 * Initialize kernel pmap. Note that all kernel mappings 916 * have PG_ASM set, so the ASN doesn't really matter for 917 * the kernel pmap. Also, since the kernel pmap always 918 * references kernel_lev1map, it always has an invalid ASN 919 * generation. 920 */ 921 memset(pmap_kernel(), 0, sizeof(struct pmap)); 922 pmap_kernel()->pm_lev1map = kernel_lev1map; 923 pmap_kernel()->pm_count = 1; 924 for (i = 0; i < ALPHA_MAXPROCS; i++) { 925 pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 926 pmap_kernel()->pm_asni[i].pma_asngen = 927 pmap_asn_info[i].pma_asngen; 928 } 929 mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE); 930 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list); 931 932 #if defined(MULTIPROCESSOR) 933 /* 934 * Initialize the TLB shootdown queues. 935 */ 936 pool_cache_bootstrap(&pmap_tlb_shootdown_job_cache, 937 sizeof(struct pmap_tlb_shootdown_job), CACHE_LINE_SIZE, 938 0, PR_LARGECACHE, "pmaptlb", NULL, IPL_VM, NULL, NULL, NULL); 939 for (i = 0; i < ALPHA_MAXPROCS; i++) { 940 TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head); 941 mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT, 942 IPL_SCHED); 943 } 944 #endif 945 946 /* 947 * Set up lwp0's PCB such that the ptbr points to the right place 948 * and has the kernel pmap's (really unused) ASN. 949 */ 950 pcb = lwp_getpcb(&lwp0); 951 pcb->pcb_hw.apcb_ptbr = 952 ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT; 953 pcb->pcb_hw.apcb_asn = pmap_kernel()->pm_asni[cpu_number()].pma_asn; 954 955 /* 956 * Mark the kernel pmap `active' on this processor. 957 */ 958 atomic_or_ulong(&pmap_kernel()->pm_cpus, 959 (1UL << cpu_number())); 960 } 961 962 #ifdef _PMAP_MAY_USE_PROM_CONSOLE 963 int 964 pmap_uses_prom_console(void) 965 { 966 967 return (cputype == ST_DEC_21000); 968 } 969 #endif /* _PMAP_MAY_USE_PROM_CONSOLE */ 970 971 /* 972 * pmap_virtual_space: [ INTERFACE ] 973 * 974 * Define the initial bounds of the kernel virtual address space. 975 */ 976 void 977 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 978 { 979 980 *vstartp = VM_MIN_KERNEL_ADDRESS; /* kernel is in K0SEG */ 981 *vendp = VM_MAX_KERNEL_ADDRESS; /* we use pmap_growkernel */ 982 } 983 984 /* 985 * pmap_steal_memory: [ INTERFACE ] 986 * 987 * Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()). 988 * This function allows for early dynamic memory allocation until the 989 * virtual memory system has been bootstrapped. After that point, either 990 * kmem_alloc or malloc should be used. This function works by stealing 991 * pages from the (to be) managed page pool, then implicitly mapping the 992 * pages (by using their k0seg addresses) and zeroing them. 993 * 994 * It may be used once the physical memory segments have been pre-loaded 995 * into the vm_physmem[] array. Early memory allocation MUST use this 996 * interface! This cannot be used after vm_page_startup(), and will 997 * generate a panic if tried. 998 * 999 * Note that this memory will never be freed, and in essence it is wired 1000 * down. 1001 * 1002 * We must adjust *vstartp and/or *vendp iff we use address space 1003 * from the kernel virtual address range defined by pmap_virtual_space(). 1004 * 1005 * Note: no locking is necessary in this function. 1006 */ 1007 vaddr_t 1008 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp) 1009 { 1010 int npgs; 1011 vaddr_t va; 1012 paddr_t pa; 1013 1014 uvm_physseg_t bank; 1015 1016 size = round_page(size); 1017 npgs = atop(size); 1018 1019 #if 0 1020 printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs); 1021 #endif 1022 1023 for (bank = uvm_physseg_get_first(); 1024 uvm_physseg_valid_p(bank); 1025 bank = uvm_physseg_get_next(bank)) { 1026 if (uvm.page_init_done == true) 1027 panic("pmap_steal_memory: called _after_ bootstrap"); 1028 1029 #if 0 1030 printf(" bank %d: avail_start 0x%"PRIxPADDR", start 0x%"PRIxPADDR", " 1031 "avail_end 0x%"PRIxPADDR"\n", bank, VM_PHYSMEM_PTR(bank)->avail_start, 1032 VM_PHYSMEM_PTR(bank)->start, VM_PHYSMEM_PTR(bank)->avail_end); 1033 #endif 1034 1035 if (uvm_physseg_get_avail_start(bank) != uvm_physseg_get_start(bank) || 1036 uvm_physseg_get_avail_start(bank) >= uvm_physseg_get_avail_end(bank)) 1037 continue; 1038 1039 #if 0 1040 printf(" avail_end - avail_start = 0x%"PRIxPADDR"\n", 1041 VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start); 1042 #endif 1043 1044 if (uvm_physseg_get_avail_end(bank) - uvm_physseg_get_avail_start(bank) 1045 < npgs) 1046 continue; 1047 1048 /* 1049 * There are enough pages here; steal them! 1050 */ 1051 pa = ptoa(uvm_physseg_get_start(bank)); 1052 uvm_physseg_unplug(atop(pa), npgs); 1053 1054 va = ALPHA_PHYS_TO_K0SEG(pa); 1055 memset((void *)va, 0, size); 1056 pmap_pages_stolen += npgs; 1057 return (va); 1058 } 1059 1060 /* 1061 * If we got here, this was no memory left. 1062 */ 1063 panic("pmap_steal_memory: no memory to steal"); 1064 } 1065 1066 /* 1067 * pmap_init: [ INTERFACE ] 1068 * 1069 * Initialize the pmap module. Called by vm_init(), to initialize any 1070 * structures that the pmap system needs to map virtual memory. 1071 * 1072 * Note: no locking is necessary in this function. 1073 */ 1074 void 1075 pmap_init(void) 1076 { 1077 1078 #ifdef DEBUG 1079 if (pmapdebug & PDB_FOLLOW) 1080 printf("pmap_init()\n"); 1081 #endif 1082 1083 /* initialize protection array */ 1084 alpha_protection_init(); 1085 1086 /* 1087 * Set a low water mark on the pv_entry pool, so that we are 1088 * more likely to have these around even in extreme memory 1089 * starvation. 1090 */ 1091 pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat); 1092 1093 /* 1094 * Now it is safe to enable pv entry recording. 1095 */ 1096 pmap_initialized = true; 1097 1098 #if 0 1099 for (bank = 0; bank < vm_nphysseg; bank++) { 1100 printf("bank %d\n", bank); 1101 printf("\tstart = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->start)); 1102 printf("\tend = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->end)); 1103 printf("\tavail_start = 0x%x\n", 1104 ptoa(VM_PHYSMEM_PTR(bank)->avail_start)); 1105 printf("\tavail_end = 0x%x\n", 1106 ptoa(VM_PHYSMEM_PTR(bank)->avail_end)); 1107 } 1108 #endif 1109 } 1110 1111 /* 1112 * pmap_create: [ INTERFACE ] 1113 * 1114 * Create and return a physical map. 1115 * 1116 * Note: no locking is necessary in this function. 1117 */ 1118 pmap_t 1119 pmap_create(void) 1120 { 1121 pmap_t pmap; 1122 int i; 1123 1124 #ifdef DEBUG 1125 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 1126 printf("pmap_create()\n"); 1127 #endif 1128 1129 pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK); 1130 memset(pmap, 0, sizeof(*pmap)); 1131 1132 /* 1133 * Defer allocation of a new level 1 page table until 1134 * the first new mapping is entered; just take a reference 1135 * to the kernel kernel_lev1map. 1136 */ 1137 pmap->pm_lev1map = kernel_lev1map; 1138 1139 pmap->pm_count = 1; 1140 for (i = 0; i < pmap_ncpuids; i++) { 1141 pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 1142 /* XXX Locking? */ 1143 pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen; 1144 } 1145 mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE); 1146 1147 try_again: 1148 rw_enter(&pmap_growkernel_lock, RW_READER); 1149 1150 if (pmap_lev1map_create(pmap, cpu_number()) != 0) { 1151 rw_exit(&pmap_growkernel_lock); 1152 (void) kpause("pmap_create", false, hz >> 2, NULL); 1153 goto try_again; 1154 } 1155 1156 mutex_enter(&pmap_all_pmaps_lock); 1157 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list); 1158 mutex_exit(&pmap_all_pmaps_lock); 1159 1160 rw_exit(&pmap_growkernel_lock); 1161 1162 return (pmap); 1163 } 1164 1165 /* 1166 * pmap_destroy: [ INTERFACE ] 1167 * 1168 * Drop the reference count on the specified pmap, releasing 1169 * all resources if the reference count drops to zero. 1170 */ 1171 void 1172 pmap_destroy(pmap_t pmap) 1173 { 1174 1175 #ifdef DEBUG 1176 if (pmapdebug & PDB_FOLLOW) 1177 printf("pmap_destroy(%p)\n", pmap); 1178 #endif 1179 1180 if (atomic_dec_uint_nv(&pmap->pm_count) > 0) 1181 return; 1182 1183 rw_enter(&pmap_growkernel_lock, RW_READER); 1184 1185 /* 1186 * Remove it from the global list of all pmaps. 1187 */ 1188 mutex_enter(&pmap_all_pmaps_lock); 1189 TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list); 1190 mutex_exit(&pmap_all_pmaps_lock); 1191 1192 pmap_lev1map_destroy(pmap, cpu_number()); 1193 1194 rw_exit(&pmap_growkernel_lock); 1195 1196 /* 1197 * Since the pmap is supposed to contain no valid 1198 * mappings at this point, we should always see 1199 * kernel_lev1map here. 1200 */ 1201 KASSERT(pmap->pm_lev1map == kernel_lev1map); 1202 1203 mutex_destroy(&pmap->pm_lock); 1204 pool_cache_put(&pmap_pmap_cache, pmap); 1205 } 1206 1207 /* 1208 * pmap_reference: [ INTERFACE ] 1209 * 1210 * Add a reference to the specified pmap. 1211 */ 1212 void 1213 pmap_reference(pmap_t pmap) 1214 { 1215 1216 #ifdef DEBUG 1217 if (pmapdebug & PDB_FOLLOW) 1218 printf("pmap_reference(%p)\n", pmap); 1219 #endif 1220 1221 atomic_inc_uint(&pmap->pm_count); 1222 } 1223 1224 /* 1225 * pmap_remove: [ INTERFACE ] 1226 * 1227 * Remove the given range of addresses from the specified map. 1228 * 1229 * It is assumed that the start and end are properly 1230 * rounded to the page size. 1231 */ 1232 void 1233 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva) 1234 { 1235 pt_entry_t *l1pte, *l2pte, *l3pte; 1236 pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte; 1237 vaddr_t l1eva, l2eva, vptva; 1238 bool needisync = false; 1239 long cpu_id = cpu_number(); 1240 1241 #ifdef DEBUG 1242 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 1243 printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva); 1244 #endif 1245 1246 /* 1247 * If this is the kernel pmap, we can use a faster method 1248 * for accessing the PTEs (since the PT pages are always 1249 * resident). 1250 * 1251 * Note that this routine should NEVER be called from an 1252 * interrupt context; pmap_kremove() is used for that. 1253 */ 1254 if (pmap == pmap_kernel()) { 1255 PMAP_MAP_TO_HEAD_LOCK(); 1256 PMAP_LOCK(pmap); 1257 1258 while (sva < eva) { 1259 l3pte = PMAP_KERNEL_PTE(sva); 1260 if (pmap_pte_v(l3pte)) { 1261 #ifdef DIAGNOSTIC 1262 if (uvm_pageismanaged(pmap_pte_pa(l3pte)) && 1263 pmap_pte_pv(l3pte) == 0) 1264 panic("pmap_remove: managed page " 1265 "without PG_PVLIST for 0x%lx", 1266 sva); 1267 #endif 1268 needisync |= pmap_remove_mapping(pmap, sva, 1269 l3pte, true, cpu_id); 1270 } 1271 sva += PAGE_SIZE; 1272 } 1273 1274 PMAP_UNLOCK(pmap); 1275 PMAP_MAP_TO_HEAD_UNLOCK(); 1276 1277 if (needisync) 1278 PMAP_SYNC_ISTREAM_KERNEL(); 1279 return; 1280 } 1281 1282 #ifdef DIAGNOSTIC 1283 if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS) 1284 panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel " 1285 "address range", sva, eva); 1286 #endif 1287 1288 PMAP_MAP_TO_HEAD_LOCK(); 1289 PMAP_LOCK(pmap); 1290 1291 /* 1292 * If we're already referencing the kernel_lev1map, there 1293 * is no work for us to do. 1294 */ 1295 if (pmap->pm_lev1map == kernel_lev1map) 1296 goto out; 1297 1298 saved_l1pte = l1pte = pmap_l1pte(pmap, sva); 1299 1300 /* 1301 * Add a reference to the L1 table to it won't get 1302 * removed from under us. 1303 */ 1304 pmap_physpage_addref(saved_l1pte); 1305 1306 for (; sva < eva; sva = l1eva, l1pte++) { 1307 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1308 if (pmap_pte_v(l1pte)) { 1309 saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte); 1310 1311 /* 1312 * Add a reference to the L2 table so it won't 1313 * get removed from under us. 1314 */ 1315 pmap_physpage_addref(saved_l2pte); 1316 1317 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1318 l2eva = 1319 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1320 if (pmap_pte_v(l2pte)) { 1321 saved_l3pte = l3pte = 1322 pmap_l3pte(pmap, sva, l2pte); 1323 1324 /* 1325 * Add a reference to the L3 table so 1326 * it won't get removed from under us. 1327 */ 1328 pmap_physpage_addref(saved_l3pte); 1329 1330 /* 1331 * Remember this sva; if the L3 table 1332 * gets removed, we need to invalidate 1333 * the VPT TLB entry for it. 1334 */ 1335 vptva = sva; 1336 1337 for (; sva < l2eva && sva < eva; 1338 sva += PAGE_SIZE, l3pte++) { 1339 if (!pmap_pte_v(l3pte)) { 1340 continue; 1341 } 1342 needisync |= 1343 pmap_remove_mapping( 1344 pmap, sva, 1345 l3pte, true, 1346 cpu_id); 1347 } 1348 1349 /* 1350 * Remove the reference to the L3 1351 * table that we added above. This 1352 * may free the L3 table. 1353 */ 1354 pmap_l3pt_delref(pmap, vptva, 1355 saved_l3pte, cpu_id); 1356 } 1357 } 1358 1359 /* 1360 * Remove the reference to the L2 table that we 1361 * added above. This may free the L2 table. 1362 */ 1363 pmap_l2pt_delref(pmap, l1pte, saved_l2pte, cpu_id); 1364 } 1365 } 1366 1367 /* 1368 * Remove the reference to the L1 table that we added above. 1369 * This may free the L1 table. 1370 */ 1371 pmap_l1pt_delref(pmap, saved_l1pte, cpu_id); 1372 1373 if (needisync) 1374 PMAP_SYNC_ISTREAM_USER(pmap); 1375 1376 out: 1377 PMAP_UNLOCK(pmap); 1378 PMAP_MAP_TO_HEAD_UNLOCK(); 1379 } 1380 1381 /* 1382 * pmap_page_protect: [ INTERFACE ] 1383 * 1384 * Lower the permission for all mappings to a given page to 1385 * the permissions specified. 1386 */ 1387 void 1388 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 1389 { 1390 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 1391 pmap_t pmap; 1392 pv_entry_t pv, nextpv; 1393 bool needkisync = false; 1394 long cpu_id = cpu_number(); 1395 kmutex_t *lock; 1396 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1397 #ifdef DEBUG 1398 paddr_t pa = VM_PAGE_TO_PHYS(pg); 1399 1400 1401 if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) || 1402 (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE))) 1403 printf("pmap_page_protect(%p, %x)\n", pg, prot); 1404 #endif 1405 1406 switch (prot) { 1407 case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE: 1408 case VM_PROT_READ|VM_PROT_WRITE: 1409 return; 1410 1411 /* copy_on_write */ 1412 case VM_PROT_READ|VM_PROT_EXECUTE: 1413 case VM_PROT_READ: 1414 PMAP_HEAD_TO_MAP_LOCK(); 1415 lock = pmap_pvh_lock(pg); 1416 mutex_enter(lock); 1417 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 1418 PMAP_LOCK(pv->pv_pmap); 1419 if (*pv->pv_pte & (PG_KWE | PG_UWE)) { 1420 *pv->pv_pte &= ~(PG_KWE | PG_UWE); 1421 PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va, 1422 pmap_pte_asm(pv->pv_pte), 1423 PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id); 1424 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va, 1425 pmap_pte_asm(pv->pv_pte)); 1426 } 1427 PMAP_UNLOCK(pv->pv_pmap); 1428 } 1429 mutex_exit(lock); 1430 PMAP_HEAD_TO_MAP_UNLOCK(); 1431 PMAP_TLB_SHOOTNOW(); 1432 return; 1433 1434 /* remove_all */ 1435 default: 1436 break; 1437 } 1438 1439 PMAP_HEAD_TO_MAP_LOCK(); 1440 lock = pmap_pvh_lock(pg); 1441 mutex_enter(lock); 1442 for (pv = md->pvh_list; pv != NULL; pv = nextpv) { 1443 nextpv = pv->pv_next; 1444 pmap = pv->pv_pmap; 1445 1446 PMAP_LOCK(pmap); 1447 #ifdef DEBUG 1448 if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 || 1449 pmap_pte_pa(pv->pv_pte) != pa) 1450 panic("pmap_page_protect: bad mapping"); 1451 #endif 1452 if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte, 1453 false, cpu_id) == true) { 1454 if (pmap == pmap_kernel()) 1455 needkisync |= true; 1456 else 1457 PMAP_SYNC_ISTREAM_USER(pmap); 1458 } 1459 PMAP_UNLOCK(pmap); 1460 } 1461 1462 if (needkisync) 1463 PMAP_SYNC_ISTREAM_KERNEL(); 1464 1465 mutex_exit(lock); 1466 PMAP_HEAD_TO_MAP_UNLOCK(); 1467 } 1468 1469 /* 1470 * pmap_protect: [ INTERFACE ] 1471 * 1472 * Set the physical protection on the specified range of this map 1473 * as requested. 1474 */ 1475 void 1476 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1477 { 1478 pt_entry_t *l1pte, *l2pte, *l3pte, bits; 1479 bool isactive; 1480 bool hadasm; 1481 vaddr_t l1eva, l2eva; 1482 long cpu_id = cpu_number(); 1483 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1484 1485 #ifdef DEBUG 1486 if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) 1487 printf("pmap_protect(%p, %lx, %lx, %x)\n", 1488 pmap, sva, eva, prot); 1489 #endif 1490 1491 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1492 pmap_remove(pmap, sva, eva); 1493 return; 1494 } 1495 1496 PMAP_LOCK(pmap); 1497 1498 bits = pte_prot(pmap, prot); 1499 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1500 1501 l1pte = pmap_l1pte(pmap, sva); 1502 for (; sva < eva; sva = l1eva, l1pte++) { 1503 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1504 if (pmap_pte_v(l1pte)) { 1505 l2pte = pmap_l2pte(pmap, sva, l1pte); 1506 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1507 l2eva = 1508 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1509 if (pmap_pte_v(l2pte)) { 1510 l3pte = pmap_l3pte(pmap, sva, l2pte); 1511 for (; sva < l2eva && sva < eva; 1512 sva += PAGE_SIZE, l3pte++) { 1513 if (pmap_pte_v(l3pte) && 1514 pmap_pte_prot_chg(l3pte, 1515 bits)) { 1516 hadasm = 1517 (pmap_pte_asm(l3pte) 1518 != 0); 1519 pmap_pte_set_prot(l3pte, 1520 bits); 1521 PMAP_INVALIDATE_TLB( 1522 pmap, sva, hadasm, 1523 isactive, cpu_id); 1524 PMAP_TLB_SHOOTDOWN( 1525 pmap, sva, 1526 hadasm ? PG_ASM : 0); 1527 } 1528 } 1529 } 1530 } 1531 } 1532 } 1533 1534 PMAP_TLB_SHOOTNOW(); 1535 1536 if (prot & VM_PROT_EXECUTE) 1537 PMAP_SYNC_ISTREAM(pmap); 1538 1539 PMAP_UNLOCK(pmap); 1540 } 1541 1542 /* 1543 * pmap_enter: [ INTERFACE ] 1544 * 1545 * Insert the given physical page (p) at 1546 * the specified virtual address (v) in the 1547 * target physical map with the protection requested. 1548 * 1549 * If specified, the page will be wired down, meaning 1550 * that the related pte can not be reclaimed. 1551 * 1552 * Note: This is the only routine which MAY NOT lazy-evaluate 1553 * or lose information. That is, this routine must actually 1554 * insert this page into the given map NOW. 1555 */ 1556 int 1557 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1558 { 1559 struct vm_page *pg; /* if != NULL, managed page */ 1560 pt_entry_t *pte, npte, opte; 1561 paddr_t opa; 1562 bool tflush = true; 1563 bool hadasm = false; /* XXX gcc -Wuninitialized */ 1564 bool needisync = false; 1565 bool setisync = false; 1566 bool isactive; 1567 bool wired; 1568 long cpu_id = cpu_number(); 1569 int error = 0; 1570 kmutex_t *lock; 1571 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1572 1573 #ifdef DEBUG 1574 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1575 printf("pmap_enter(%p, %lx, %lx, %x, %x)\n", 1576 pmap, va, pa, prot, flags); 1577 #endif 1578 pg = PHYS_TO_VM_PAGE(pa); 1579 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1580 wired = (flags & PMAP_WIRED) != 0; 1581 1582 /* 1583 * Determine what we need to do about the I-stream. If 1584 * VM_PROT_EXECUTE is set, we mark a user pmap as needing 1585 * an I-sync on the way back out to userspace. We always 1586 * need an immediate I-sync for the kernel pmap. 1587 */ 1588 if (prot & VM_PROT_EXECUTE) { 1589 if (pmap == pmap_kernel()) 1590 needisync = true; 1591 else { 1592 setisync = true; 1593 needisync = (pmap->pm_cpus != 0); 1594 } 1595 } 1596 1597 PMAP_MAP_TO_HEAD_LOCK(); 1598 PMAP_LOCK(pmap); 1599 1600 if (pmap == pmap_kernel()) { 1601 #ifdef DIAGNOSTIC 1602 /* 1603 * Sanity check the virtual address. 1604 */ 1605 if (va < VM_MIN_KERNEL_ADDRESS) 1606 panic("pmap_enter: kernel pmap, invalid va 0x%lx", va); 1607 #endif 1608 pte = PMAP_KERNEL_PTE(va); 1609 } else { 1610 pt_entry_t *l1pte, *l2pte; 1611 1612 #ifdef DIAGNOSTIC 1613 /* 1614 * Sanity check the virtual address. 1615 */ 1616 if (va >= VM_MAXUSER_ADDRESS) 1617 panic("pmap_enter: user pmap, invalid va 0x%lx", va); 1618 #endif 1619 1620 KASSERT(pmap->pm_lev1map != kernel_lev1map); 1621 1622 /* 1623 * Check to see if the level 1 PTE is valid, and 1624 * allocate a new level 2 page table page if it's not. 1625 * A reference will be added to the level 2 table when 1626 * the level 3 table is created. 1627 */ 1628 l1pte = pmap_l1pte(pmap, va); 1629 if (pmap_pte_v(l1pte) == 0) { 1630 pmap_physpage_addref(l1pte); 1631 error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT); 1632 if (error) { 1633 pmap_l1pt_delref(pmap, l1pte, cpu_id); 1634 if (flags & PMAP_CANFAIL) 1635 goto out; 1636 panic("pmap_enter: unable to create L2 PT " 1637 "page"); 1638 } 1639 #ifdef DEBUG 1640 if (pmapdebug & PDB_PTPAGE) 1641 printf("pmap_enter: new level 2 table at " 1642 "0x%lx\n", pmap_pte_pa(l1pte)); 1643 #endif 1644 } 1645 1646 /* 1647 * Check to see if the level 2 PTE is valid, and 1648 * allocate a new level 3 page table page if it's not. 1649 * A reference will be added to the level 3 table when 1650 * the mapping is validated. 1651 */ 1652 l2pte = pmap_l2pte(pmap, va, l1pte); 1653 if (pmap_pte_v(l2pte) == 0) { 1654 pmap_physpage_addref(l2pte); 1655 error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT); 1656 if (error) { 1657 pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id); 1658 if (flags & PMAP_CANFAIL) 1659 goto out; 1660 panic("pmap_enter: unable to create L3 PT " 1661 "page"); 1662 } 1663 #ifdef DEBUG 1664 if (pmapdebug & PDB_PTPAGE) 1665 printf("pmap_enter: new level 3 table at " 1666 "0x%lx\n", pmap_pte_pa(l2pte)); 1667 #endif 1668 } 1669 1670 /* 1671 * Get the PTE that will map the page. 1672 */ 1673 pte = pmap_l3pte(pmap, va, l2pte); 1674 } 1675 1676 /* Remember all of the old PTE; used for TBI check later. */ 1677 opte = *pte; 1678 1679 /* 1680 * Check to see if the old mapping is valid. If not, validate the 1681 * new one immediately. 1682 */ 1683 if (pmap_pte_v(pte) == 0) { 1684 /* 1685 * No need to invalidate the TLB in this case; an invalid 1686 * mapping won't be in the TLB, and a previously valid 1687 * mapping would have been flushed when it was invalidated. 1688 */ 1689 tflush = false; 1690 1691 /* 1692 * No need to synchronize the I-stream, either, for basically 1693 * the same reason. 1694 */ 1695 setisync = needisync = false; 1696 1697 if (pmap != pmap_kernel()) { 1698 /* 1699 * New mappings gain a reference on the level 3 1700 * table. 1701 */ 1702 pmap_physpage_addref(pte); 1703 } 1704 goto validate_enterpv; 1705 } 1706 1707 opa = pmap_pte_pa(pte); 1708 hadasm = (pmap_pte_asm(pte) != 0); 1709 1710 if (opa == pa) { 1711 /* 1712 * Mapping has not changed; must be a protection or 1713 * wiring change. 1714 */ 1715 if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) { 1716 #ifdef DEBUG 1717 if (pmapdebug & PDB_ENTER) 1718 printf("pmap_enter: wiring change -> %d\n", 1719 wired); 1720 #endif 1721 /* 1722 * Adjust the wiring count. 1723 */ 1724 if (wired) 1725 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1726 else 1727 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1728 } 1729 1730 /* 1731 * Set the PTE. 1732 */ 1733 goto validate; 1734 } 1735 1736 /* 1737 * The mapping has changed. We need to invalidate the 1738 * old mapping before creating the new one. 1739 */ 1740 #ifdef DEBUG 1741 if (pmapdebug & PDB_ENTER) 1742 printf("pmap_enter: removing old mapping 0x%lx\n", va); 1743 #endif 1744 if (pmap != pmap_kernel()) { 1745 /* 1746 * Gain an extra reference on the level 3 table. 1747 * pmap_remove_mapping() will delete a reference, 1748 * and we don't want the table to be erroneously 1749 * freed. 1750 */ 1751 pmap_physpage_addref(pte); 1752 } 1753 needisync |= pmap_remove_mapping(pmap, va, pte, true, cpu_id); 1754 1755 validate_enterpv: 1756 /* 1757 * Enter the mapping into the pv_table if appropriate. 1758 */ 1759 if (pg != NULL) { 1760 error = pmap_pv_enter(pmap, pg, va, pte, true); 1761 if (error) { 1762 pmap_l3pt_delref(pmap, va, pte, cpu_id); 1763 if (flags & PMAP_CANFAIL) 1764 goto out; 1765 panic("pmap_enter: unable to enter mapping in PV " 1766 "table"); 1767 } 1768 } 1769 1770 /* 1771 * Increment counters. 1772 */ 1773 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1774 if (wired) 1775 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1776 1777 validate: 1778 /* 1779 * Build the new PTE. 1780 */ 1781 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V; 1782 if (pg != NULL) { 1783 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 1784 int attrs; 1785 1786 #ifdef DIAGNOSTIC 1787 if ((flags & VM_PROT_ALL) & ~prot) 1788 panic("pmap_enter: access type exceeds prot"); 1789 #endif 1790 lock = pmap_pvh_lock(pg); 1791 mutex_enter(lock); 1792 if (flags & VM_PROT_WRITE) 1793 md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 1794 else if (flags & VM_PROT_ALL) 1795 md->pvh_attrs |= PGA_REFERENCED; 1796 attrs = md->pvh_attrs; 1797 mutex_exit(lock); 1798 1799 /* 1800 * Set up referenced/modified emulation for new mapping. 1801 */ 1802 if ((attrs & PGA_REFERENCED) == 0) 1803 npte |= PG_FOR | PG_FOW | PG_FOE; 1804 else if ((attrs & PGA_MODIFIED) == 0) 1805 npte |= PG_FOW; 1806 1807 /* 1808 * Mapping was entered on PV list. 1809 */ 1810 npte |= PG_PVLIST; 1811 } 1812 if (wired) 1813 npte |= PG_WIRED; 1814 #ifdef DEBUG 1815 if (pmapdebug & PDB_ENTER) 1816 printf("pmap_enter: new pte = 0x%lx\n", npte); 1817 #endif 1818 1819 /* 1820 * If the PALcode portion of the new PTE is the same as the 1821 * old PTE, no TBI is necessary. 1822 */ 1823 if (PG_PALCODE(opte) == PG_PALCODE(npte)) 1824 tflush = false; 1825 1826 /* 1827 * Set the new PTE. 1828 */ 1829 PMAP_SET_PTE(pte, npte); 1830 1831 /* 1832 * Invalidate the TLB entry for this VA and any appropriate 1833 * caches. 1834 */ 1835 if (tflush) { 1836 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 1837 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 1838 PMAP_TLB_SHOOTNOW(); 1839 } 1840 if (setisync) 1841 PMAP_SET_NEEDISYNC(pmap); 1842 if (needisync) 1843 PMAP_SYNC_ISTREAM(pmap); 1844 1845 out: 1846 PMAP_UNLOCK(pmap); 1847 PMAP_MAP_TO_HEAD_UNLOCK(); 1848 1849 return error; 1850 } 1851 1852 /* 1853 * pmap_kenter_pa: [ INTERFACE ] 1854 * 1855 * Enter a va -> pa mapping into the kernel pmap without any 1856 * physical->virtual tracking. 1857 * 1858 * Note: no locking is necessary in this function. 1859 */ 1860 void 1861 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1862 { 1863 pt_entry_t *pte, npte; 1864 long cpu_id = cpu_number(); 1865 bool needisync = false; 1866 pmap_t pmap = pmap_kernel(); 1867 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1868 1869 #ifdef DEBUG 1870 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1871 printf("pmap_kenter_pa(%lx, %lx, %x)\n", 1872 va, pa, prot); 1873 #endif 1874 1875 #ifdef DIAGNOSTIC 1876 /* 1877 * Sanity check the virtual address. 1878 */ 1879 if (va < VM_MIN_KERNEL_ADDRESS) 1880 panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va); 1881 #endif 1882 1883 pte = PMAP_KERNEL_PTE(va); 1884 1885 if (pmap_pte_v(pte) == 0) 1886 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1887 if (pmap_pte_w(pte) == 0) 1888 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1889 1890 if ((prot & VM_PROT_EXECUTE) != 0 || pmap_pte_exec(pte)) 1891 needisync = true; 1892 1893 /* 1894 * Build the new PTE. 1895 */ 1896 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) | 1897 PG_V | PG_WIRED; 1898 1899 /* 1900 * Set the new PTE. 1901 */ 1902 PMAP_SET_PTE(pte, npte); 1903 #if defined(MULTIPROCESSOR) 1904 alpha_mb(); /* XXX alpha_wmb()? */ 1905 #endif 1906 1907 /* 1908 * Invalidate the TLB entry for this VA and any appropriate 1909 * caches. 1910 */ 1911 PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id); 1912 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1913 PMAP_TLB_SHOOTNOW(); 1914 1915 if (needisync) 1916 PMAP_SYNC_ISTREAM_KERNEL(); 1917 } 1918 1919 /* 1920 * pmap_kremove: [ INTERFACE ] 1921 * 1922 * Remove a mapping entered with pmap_kenter_pa() starting at va, 1923 * for size bytes (assumed to be page rounded). 1924 */ 1925 void 1926 pmap_kremove(vaddr_t va, vsize_t size) 1927 { 1928 pt_entry_t *pte; 1929 bool needisync = false; 1930 long cpu_id = cpu_number(); 1931 pmap_t pmap = pmap_kernel(); 1932 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1933 1934 #ifdef DEBUG 1935 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1936 printf("pmap_kremove(%lx, %lx)\n", 1937 va, size); 1938 #endif 1939 1940 #ifdef DIAGNOSTIC 1941 if (va < VM_MIN_KERNEL_ADDRESS) 1942 panic("pmap_kremove: user address"); 1943 #endif 1944 1945 for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) { 1946 pte = PMAP_KERNEL_PTE(va); 1947 if (pmap_pte_v(pte)) { 1948 #ifdef DIAGNOSTIC 1949 if (pmap_pte_pv(pte)) 1950 panic("pmap_kremove: PG_PVLIST mapping for " 1951 "0x%lx", va); 1952 #endif 1953 if (pmap_pte_exec(pte)) 1954 needisync = true; 1955 1956 /* Zap the mapping. */ 1957 PMAP_SET_PTE(pte, PG_NV); 1958 #if defined(MULTIPROCESSOR) 1959 alpha_mb(); /* XXX alpha_wmb()? */ 1960 #endif 1961 PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id); 1962 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1963 1964 /* Update stats. */ 1965 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 1966 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1967 } 1968 } 1969 1970 PMAP_TLB_SHOOTNOW(); 1971 1972 if (needisync) 1973 PMAP_SYNC_ISTREAM_KERNEL(); 1974 } 1975 1976 /* 1977 * pmap_unwire: [ INTERFACE ] 1978 * 1979 * Clear the wired attribute for a map/virtual-address pair. 1980 * 1981 * The mapping must already exist in the pmap. 1982 */ 1983 void 1984 pmap_unwire(pmap_t pmap, vaddr_t va) 1985 { 1986 pt_entry_t *pte; 1987 1988 #ifdef DEBUG 1989 if (pmapdebug & PDB_FOLLOW) 1990 printf("pmap_unwire(%p, %lx)\n", pmap, va); 1991 #endif 1992 1993 PMAP_LOCK(pmap); 1994 1995 pte = pmap_l3pte(pmap, va, NULL); 1996 #ifdef DIAGNOSTIC 1997 if (pte == NULL || pmap_pte_v(pte) == 0) 1998 panic("pmap_unwire"); 1999 #endif 2000 2001 /* 2002 * If wiring actually changed (always?) clear the wire bit and 2003 * update the wire count. Note that wiring is not a hardware 2004 * characteristic so there is no need to invalidate the TLB. 2005 */ 2006 if (pmap_pte_w_chg(pte, 0)) { 2007 pmap_pte_set_w(pte, false); 2008 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2009 } 2010 #ifdef DIAGNOSTIC 2011 else { 2012 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 2013 "didn't change!\n", pmap, va); 2014 } 2015 #endif 2016 2017 PMAP_UNLOCK(pmap); 2018 } 2019 2020 /* 2021 * pmap_extract: [ INTERFACE ] 2022 * 2023 * Extract the physical address associated with the given 2024 * pmap/virtual address pair. 2025 */ 2026 bool 2027 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap) 2028 { 2029 pt_entry_t *l1pte, *l2pte, *l3pte; 2030 paddr_t pa; 2031 2032 #ifdef DEBUG 2033 if (pmapdebug & PDB_FOLLOW) 2034 printf("pmap_extract(%p, %lx) -> ", pmap, va); 2035 #endif 2036 2037 /* 2038 * Take a faster path for the kernel pmap. Avoids locking, 2039 * handles K0SEG. 2040 */ 2041 if (pmap == pmap_kernel()) { 2042 pa = vtophys(va); 2043 if (pap != NULL) 2044 *pap = pa; 2045 #ifdef DEBUG 2046 if (pmapdebug & PDB_FOLLOW) 2047 printf("0x%lx (kernel vtophys)\n", pa); 2048 #endif 2049 return (pa != 0); /* XXX */ 2050 } 2051 2052 PMAP_LOCK(pmap); 2053 2054 l1pte = pmap_l1pte(pmap, va); 2055 if (pmap_pte_v(l1pte) == 0) 2056 goto out; 2057 2058 l2pte = pmap_l2pte(pmap, va, l1pte); 2059 if (pmap_pte_v(l2pte) == 0) 2060 goto out; 2061 2062 l3pte = pmap_l3pte(pmap, va, l2pte); 2063 if (pmap_pte_v(l3pte) == 0) 2064 goto out; 2065 2066 pa = pmap_pte_pa(l3pte) | (va & PGOFSET); 2067 PMAP_UNLOCK(pmap); 2068 if (pap != NULL) 2069 *pap = pa; 2070 #ifdef DEBUG 2071 if (pmapdebug & PDB_FOLLOW) 2072 printf("0x%lx\n", pa); 2073 #endif 2074 return (true); 2075 2076 out: 2077 PMAP_UNLOCK(pmap); 2078 #ifdef DEBUG 2079 if (pmapdebug & PDB_FOLLOW) 2080 printf("failed\n"); 2081 #endif 2082 return (false); 2083 } 2084 2085 /* 2086 * pmap_copy: [ INTERFACE ] 2087 * 2088 * Copy the mapping range specified by src_addr/len 2089 * from the source map to the range dst_addr/len 2090 * in the destination map. 2091 * 2092 * This routine is only advisory and need not do anything. 2093 */ 2094 /* call deleted in <machine/pmap.h> */ 2095 2096 /* 2097 * pmap_update: [ INTERFACE ] 2098 * 2099 * Require that all active physical maps contain no 2100 * incorrect entries NOW, by processing any deferred 2101 * pmap operations. 2102 */ 2103 /* call deleted in <machine/pmap.h> */ 2104 2105 /* 2106 * pmap_activate: [ INTERFACE ] 2107 * 2108 * Activate the pmap used by the specified process. This includes 2109 * reloading the MMU context if the current process, and marking 2110 * the pmap in use by the processor. 2111 */ 2112 void 2113 pmap_activate(struct lwp *l) 2114 { 2115 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2116 long cpu_id = cpu_number(); 2117 2118 #ifdef DEBUG 2119 if (pmapdebug & PDB_FOLLOW) 2120 printf("pmap_activate(%p)\n", l); 2121 #endif 2122 2123 /* Mark the pmap in use by this processor. */ 2124 atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id)); 2125 2126 /* Allocate an ASN. */ 2127 pmap_asn_alloc(pmap, cpu_id); 2128 2129 PMAP_ACTIVATE(pmap, l, cpu_id); 2130 } 2131 2132 /* 2133 * pmap_deactivate: [ INTERFACE ] 2134 * 2135 * Mark that the pmap used by the specified process is no longer 2136 * in use by the processor. 2137 * 2138 * The comment above pmap_activate() wrt. locking applies here, 2139 * as well. Note that we use only a single `atomic' operation, 2140 * so no locking is necessary. 2141 */ 2142 void 2143 pmap_deactivate(struct lwp *l) 2144 { 2145 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2146 2147 #ifdef DEBUG 2148 if (pmapdebug & PDB_FOLLOW) 2149 printf("pmap_deactivate(%p)\n", l); 2150 #endif 2151 2152 /* 2153 * Mark the pmap no longer in use by this processor. 2154 */ 2155 atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number())); 2156 } 2157 2158 /* 2159 * pmap_zero_page: [ INTERFACE ] 2160 * 2161 * Zero the specified (machine independent) page by mapping the page 2162 * into virtual memory and clear its contents, one machine dependent 2163 * page at a time. 2164 * 2165 * Note: no locking is necessary in this function. 2166 */ 2167 void 2168 pmap_zero_page(paddr_t phys) 2169 { 2170 u_long *p0, *p1, *pend; 2171 2172 #ifdef DEBUG 2173 if (pmapdebug & PDB_FOLLOW) 2174 printf("pmap_zero_page(%lx)\n", phys); 2175 #endif 2176 2177 p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys); 2178 p1 = NULL; 2179 pend = (u_long *)((u_long)p0 + PAGE_SIZE); 2180 2181 /* 2182 * Unroll the loop a bit, doing 16 quadwords per iteration. 2183 * Do only 8 back-to-back stores, and alternate registers. 2184 */ 2185 do { 2186 __asm volatile( 2187 "# BEGIN loop body\n" 2188 " addq %2, (8 * 8), %1 \n" 2189 " stq $31, (0 * 8)(%0) \n" 2190 " stq $31, (1 * 8)(%0) \n" 2191 " stq $31, (2 * 8)(%0) \n" 2192 " stq $31, (3 * 8)(%0) \n" 2193 " stq $31, (4 * 8)(%0) \n" 2194 " stq $31, (5 * 8)(%0) \n" 2195 " stq $31, (6 * 8)(%0) \n" 2196 " stq $31, (7 * 8)(%0) \n" 2197 " \n" 2198 " addq %3, (8 * 8), %0 \n" 2199 " stq $31, (0 * 8)(%1) \n" 2200 " stq $31, (1 * 8)(%1) \n" 2201 " stq $31, (2 * 8)(%1) \n" 2202 " stq $31, (3 * 8)(%1) \n" 2203 " stq $31, (4 * 8)(%1) \n" 2204 " stq $31, (5 * 8)(%1) \n" 2205 " stq $31, (6 * 8)(%1) \n" 2206 " stq $31, (7 * 8)(%1) \n" 2207 " # END loop body" 2208 : "=r" (p0), "=r" (p1) 2209 : "0" (p0), "1" (p1) 2210 : "memory"); 2211 } while (p0 < pend); 2212 } 2213 2214 /* 2215 * pmap_copy_page: [ INTERFACE ] 2216 * 2217 * Copy the specified (machine independent) page by mapping the page 2218 * into virtual memory and using memcpy to copy the page, one machine 2219 * dependent page at a time. 2220 * 2221 * Note: no locking is necessary in this function. 2222 */ 2223 void 2224 pmap_copy_page(paddr_t src, paddr_t dst) 2225 { 2226 const void *s; 2227 void *d; 2228 2229 #ifdef DEBUG 2230 if (pmapdebug & PDB_FOLLOW) 2231 printf("pmap_copy_page(%lx, %lx)\n", src, dst); 2232 #endif 2233 s = (const void *)ALPHA_PHYS_TO_K0SEG(src); 2234 d = (void *)ALPHA_PHYS_TO_K0SEG(dst); 2235 memcpy(d, s, PAGE_SIZE); 2236 } 2237 2238 /* 2239 * pmap_pageidlezero: [ INTERFACE ] 2240 * 2241 * Page zero'er for the idle loop. Returns true if the 2242 * page was zero'd, FLASE if we aborted for some reason. 2243 */ 2244 bool 2245 pmap_pageidlezero(paddr_t pa) 2246 { 2247 u_long *ptr; 2248 int i, cnt = PAGE_SIZE / sizeof(u_long); 2249 2250 for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) { 2251 if (sched_curcpu_runnable_p()) { 2252 /* 2253 * An LWP has become ready. Abort now, 2254 * so we don't keep it waiting while we 2255 * finish zeroing the page. 2256 */ 2257 return (false); 2258 } 2259 *ptr++ = 0; 2260 } 2261 2262 return (true); 2263 } 2264 2265 /* 2266 * pmap_clear_modify: [ INTERFACE ] 2267 * 2268 * Clear the modify bits on the specified physical page. 2269 */ 2270 bool 2271 pmap_clear_modify(struct vm_page *pg) 2272 { 2273 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2274 bool rv = false; 2275 long cpu_id = cpu_number(); 2276 kmutex_t *lock; 2277 2278 #ifdef DEBUG 2279 if (pmapdebug & PDB_FOLLOW) 2280 printf("pmap_clear_modify(%p)\n", pg); 2281 #endif 2282 2283 PMAP_HEAD_TO_MAP_LOCK(); 2284 lock = pmap_pvh_lock(pg); 2285 mutex_enter(lock); 2286 2287 if (md->pvh_attrs & PGA_MODIFIED) { 2288 rv = true; 2289 pmap_changebit(pg, PG_FOW, ~0, cpu_id); 2290 md->pvh_attrs &= ~PGA_MODIFIED; 2291 } 2292 2293 mutex_exit(lock); 2294 PMAP_HEAD_TO_MAP_UNLOCK(); 2295 2296 return (rv); 2297 } 2298 2299 /* 2300 * pmap_clear_reference: [ INTERFACE ] 2301 * 2302 * Clear the reference bit on the specified physical page. 2303 */ 2304 bool 2305 pmap_clear_reference(struct vm_page *pg) 2306 { 2307 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2308 bool rv = false; 2309 long cpu_id = cpu_number(); 2310 kmutex_t *lock; 2311 2312 #ifdef DEBUG 2313 if (pmapdebug & PDB_FOLLOW) 2314 printf("pmap_clear_reference(%p)\n", pg); 2315 #endif 2316 2317 PMAP_HEAD_TO_MAP_LOCK(); 2318 lock = pmap_pvh_lock(pg); 2319 mutex_enter(lock); 2320 2321 if (md->pvh_attrs & PGA_REFERENCED) { 2322 rv = true; 2323 pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id); 2324 md->pvh_attrs &= ~PGA_REFERENCED; 2325 } 2326 2327 mutex_exit(lock); 2328 PMAP_HEAD_TO_MAP_UNLOCK(); 2329 2330 return (rv); 2331 } 2332 2333 /* 2334 * pmap_is_referenced: [ INTERFACE ] 2335 * 2336 * Return whether or not the specified physical page is referenced 2337 * by any physical maps. 2338 */ 2339 /* See <machine/pmap.h> */ 2340 2341 /* 2342 * pmap_is_modified: [ INTERFACE ] 2343 * 2344 * Return whether or not the specified physical page is modified 2345 * by any physical maps. 2346 */ 2347 /* See <machine/pmap.h> */ 2348 2349 /* 2350 * pmap_phys_address: [ INTERFACE ] 2351 * 2352 * Return the physical address corresponding to the specified 2353 * cookie. Used by the device pager to decode a device driver's 2354 * mmap entry point return value. 2355 * 2356 * Note: no locking is necessary in this function. 2357 */ 2358 paddr_t 2359 pmap_phys_address(paddr_t ppn) 2360 { 2361 2362 return (alpha_ptob(ppn)); 2363 } 2364 2365 /* 2366 * Miscellaneous support routines follow 2367 */ 2368 2369 /* 2370 * alpha_protection_init: 2371 * 2372 * Initialize Alpha protection code array. 2373 * 2374 * Note: no locking is necessary in this function. 2375 */ 2376 static void 2377 alpha_protection_init(void) 2378 { 2379 int prot, *kp, *up; 2380 2381 kp = protection_codes[0]; 2382 up = protection_codes[1]; 2383 2384 for (prot = 0; prot < 8; prot++) { 2385 kp[prot] = PG_ASM; 2386 up[prot] = 0; 2387 2388 if (prot & VM_PROT_READ) { 2389 kp[prot] |= PG_KRE; 2390 up[prot] |= PG_KRE | PG_URE; 2391 } 2392 if (prot & VM_PROT_WRITE) { 2393 kp[prot] |= PG_KWE; 2394 up[prot] |= PG_KWE | PG_UWE; 2395 } 2396 if (prot & VM_PROT_EXECUTE) { 2397 kp[prot] |= PG_EXEC | PG_KRE; 2398 up[prot] |= PG_EXEC | PG_KRE | PG_URE; 2399 } else { 2400 kp[prot] |= PG_FOE; 2401 up[prot] |= PG_FOE; 2402 } 2403 } 2404 } 2405 2406 /* 2407 * pmap_remove_mapping: 2408 * 2409 * Invalidate a single page denoted by pmap/va. 2410 * 2411 * If (pte != NULL), it is the already computed PTE for the page. 2412 * 2413 * Note: locking in this function is complicated by the fact 2414 * that we can be called when the PV list is already locked. 2415 * (pmap_page_protect()). In this case, the caller must be 2416 * careful to get the next PV entry while we remove this entry 2417 * from beneath it. We assume that the pmap itself is already 2418 * locked; dolock applies only to the PV list. 2419 * 2420 * Returns true or false, indicating if an I-stream sync needs 2421 * to be initiated (for this CPU or for other CPUs). 2422 */ 2423 static bool 2424 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte, 2425 bool dolock, long cpu_id) 2426 { 2427 paddr_t pa; 2428 struct vm_page *pg; /* if != NULL, page is managed */ 2429 bool onpv; 2430 bool hadasm; 2431 bool isactive; 2432 bool needisync = false; 2433 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2434 2435 #ifdef DEBUG 2436 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 2437 printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n", 2438 pmap, va, pte, dolock, cpu_id); 2439 #endif 2440 2441 /* 2442 * PTE not provided, compute it from pmap and va. 2443 */ 2444 if (pte == NULL) { 2445 pte = pmap_l3pte(pmap, va, NULL); 2446 if (pmap_pte_v(pte) == 0) 2447 return (false); 2448 } 2449 2450 pa = pmap_pte_pa(pte); 2451 onpv = (pmap_pte_pv(pte) != 0); 2452 hadasm = (pmap_pte_asm(pte) != 0); 2453 isactive = PMAP_ISACTIVE(pmap, cpu_id); 2454 2455 /* 2456 * Determine what we need to do about the I-stream. If 2457 * PG_EXEC was set, we mark a user pmap as needing an 2458 * I-sync on the way out to userspace. We always need 2459 * an immediate I-sync for the kernel pmap. 2460 */ 2461 if (pmap_pte_exec(pte)) { 2462 if (pmap == pmap_kernel()) 2463 needisync = true; 2464 else { 2465 PMAP_SET_NEEDISYNC(pmap); 2466 needisync = (pmap->pm_cpus != 0); 2467 } 2468 } 2469 2470 /* 2471 * Update statistics 2472 */ 2473 if (pmap_pte_w(pte)) 2474 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2475 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 2476 2477 /* 2478 * Invalidate the PTE after saving the reference modify info. 2479 */ 2480 #ifdef DEBUG 2481 if (pmapdebug & PDB_REMOVE) 2482 printf("remove: invalidating pte at %p\n", pte); 2483 #endif 2484 PMAP_SET_PTE(pte, PG_NV); 2485 2486 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 2487 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 2488 PMAP_TLB_SHOOTNOW(); 2489 2490 /* 2491 * If we're removing a user mapping, check to see if we 2492 * can free page table pages. 2493 */ 2494 if (pmap != pmap_kernel()) { 2495 /* 2496 * Delete the reference on the level 3 table. It will 2497 * delete references on the level 2 and 1 tables as 2498 * appropriate. 2499 */ 2500 pmap_l3pt_delref(pmap, va, pte, cpu_id); 2501 } 2502 2503 /* 2504 * If the mapping wasn't entered on the PV list, we're all done. 2505 */ 2506 if (onpv == false) 2507 return (needisync); 2508 2509 /* 2510 * Remove it from the PV table. 2511 */ 2512 pg = PHYS_TO_VM_PAGE(pa); 2513 KASSERT(pg != NULL); 2514 pmap_pv_remove(pmap, pg, va, dolock); 2515 2516 return (needisync); 2517 } 2518 2519 /* 2520 * pmap_changebit: 2521 * 2522 * Set or clear the specified PTE bits for all mappings on the 2523 * specified page. 2524 * 2525 * Note: we assume that the pv_head is already locked, and that 2526 * the caller has acquired a PV->pmap mutex so that we can lock 2527 * the pmaps as we encounter them. 2528 */ 2529 static void 2530 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, long cpu_id) 2531 { 2532 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2533 pv_entry_t pv; 2534 pt_entry_t *pte, npte; 2535 vaddr_t va; 2536 bool hadasm, isactive; 2537 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2538 2539 #ifdef DEBUG 2540 if (pmapdebug & PDB_BITS) 2541 printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n", 2542 pg, set, mask); 2543 #endif 2544 2545 /* 2546 * Loop over all current mappings setting/clearing as apropos. 2547 */ 2548 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 2549 va = pv->pv_va; 2550 2551 PMAP_LOCK(pv->pv_pmap); 2552 2553 pte = pv->pv_pte; 2554 npte = (*pte | set) & mask; 2555 if (*pte != npte) { 2556 hadasm = (pmap_pte_asm(pte) != 0); 2557 isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id); 2558 PMAP_SET_PTE(pte, npte); 2559 PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive, 2560 cpu_id); 2561 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va, 2562 hadasm ? PG_ASM : 0); 2563 } 2564 PMAP_UNLOCK(pv->pv_pmap); 2565 } 2566 2567 PMAP_TLB_SHOOTNOW(); 2568 } 2569 2570 /* 2571 * pmap_emulate_reference: 2572 * 2573 * Emulate reference and/or modified bit hits. 2574 * Return 1 if this was an execute fault on a non-exec mapping, 2575 * otherwise return 0. 2576 */ 2577 int 2578 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type) 2579 { 2580 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2581 pt_entry_t faultoff, *pte; 2582 struct vm_page *pg; 2583 paddr_t pa; 2584 bool didlock = false; 2585 bool exec = false; 2586 long cpu_id = cpu_number(); 2587 kmutex_t *lock; 2588 2589 #ifdef DEBUG 2590 if (pmapdebug & PDB_FOLLOW) 2591 printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n", 2592 l, v, user, type); 2593 #endif 2594 2595 /* 2596 * Convert process and virtual address to physical address. 2597 */ 2598 if (v >= VM_MIN_KERNEL_ADDRESS) { 2599 if (user) 2600 panic("pmap_emulate_reference: user ref to kernel"); 2601 /* 2602 * No need to lock here; kernel PT pages never go away. 2603 */ 2604 pte = PMAP_KERNEL_PTE(v); 2605 } else { 2606 #ifdef DIAGNOSTIC 2607 if (l == NULL) 2608 panic("pmap_emulate_reference: bad proc"); 2609 if (l->l_proc->p_vmspace == NULL) 2610 panic("pmap_emulate_reference: bad p_vmspace"); 2611 #endif 2612 PMAP_LOCK(pmap); 2613 didlock = true; 2614 pte = pmap_l3pte(pmap, v, NULL); 2615 /* 2616 * We'll unlock below where we're done with the PTE. 2617 */ 2618 } 2619 exec = pmap_pte_exec(pte); 2620 if (!exec && type == ALPHA_MMCSR_FOE) { 2621 if (didlock) 2622 PMAP_UNLOCK(pmap); 2623 return (1); 2624 } 2625 #ifdef DEBUG 2626 if (pmapdebug & PDB_FOLLOW) { 2627 printf("\tpte = %p, ", pte); 2628 printf("*pte = 0x%lx\n", *pte); 2629 } 2630 #endif 2631 #ifdef DEBUG /* These checks are more expensive */ 2632 if (!pmap_pte_v(pte)) 2633 panic("pmap_emulate_reference: invalid pte"); 2634 if (type == ALPHA_MMCSR_FOW) { 2635 if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE))) 2636 panic("pmap_emulate_reference: write but unwritable"); 2637 if (!(*pte & PG_FOW)) 2638 panic("pmap_emulate_reference: write but not FOW"); 2639 } else { 2640 if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE))) 2641 panic("pmap_emulate_reference: !write but unreadable"); 2642 if (!(*pte & (PG_FOR | PG_FOE))) 2643 panic("pmap_emulate_reference: !write but not FOR|FOE"); 2644 } 2645 /* Other diagnostics? */ 2646 #endif 2647 pa = pmap_pte_pa(pte); 2648 2649 /* 2650 * We're now done with the PTE. If it was a user pmap, unlock 2651 * it now. 2652 */ 2653 if (didlock) 2654 PMAP_UNLOCK(pmap); 2655 2656 #ifdef DEBUG 2657 if (pmapdebug & PDB_FOLLOW) 2658 printf("\tpa = 0x%lx\n", pa); 2659 #endif 2660 #ifdef DIAGNOSTIC 2661 if (!uvm_pageismanaged(pa)) 2662 panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): " 2663 "pa 0x%lx not managed", l, v, user, type, pa); 2664 #endif 2665 2666 /* 2667 * Twiddle the appropriate bits to reflect the reference 2668 * and/or modification.. 2669 * 2670 * The rules: 2671 * (1) always mark page as used, and 2672 * (2) if it was a write fault, mark page as modified. 2673 */ 2674 pg = PHYS_TO_VM_PAGE(pa); 2675 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2676 2677 PMAP_HEAD_TO_MAP_LOCK(); 2678 lock = pmap_pvh_lock(pg); 2679 mutex_enter(lock); 2680 2681 if (type == ALPHA_MMCSR_FOW) { 2682 md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 2683 faultoff = PG_FOR | PG_FOW; 2684 } else { 2685 md->pvh_attrs |= PGA_REFERENCED; 2686 faultoff = PG_FOR; 2687 if (exec) { 2688 faultoff |= PG_FOE; 2689 } 2690 } 2691 pmap_changebit(pg, 0, ~faultoff, cpu_id); 2692 2693 mutex_exit(lock); 2694 PMAP_HEAD_TO_MAP_UNLOCK(); 2695 return (0); 2696 } 2697 2698 #ifdef DEBUG 2699 /* 2700 * pmap_pv_dump: 2701 * 2702 * Dump the physical->virtual data for the specified page. 2703 */ 2704 void 2705 pmap_pv_dump(paddr_t pa) 2706 { 2707 struct vm_page *pg; 2708 struct vm_page_md *md; 2709 pv_entry_t pv; 2710 kmutex_t *lock; 2711 2712 pg = PHYS_TO_VM_PAGE(pa); 2713 md = VM_PAGE_TO_MD(pg); 2714 2715 lock = pmap_pvh_lock(pg); 2716 mutex_enter(lock); 2717 2718 printf("pa 0x%lx (attrs = 0x%x):\n", pa, md->pvh_attrs); 2719 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) 2720 printf(" pmap %p, va 0x%lx\n", 2721 pv->pv_pmap, pv->pv_va); 2722 printf("\n"); 2723 2724 mutex_exit(lock); 2725 } 2726 #endif 2727 2728 /* 2729 * vtophys: 2730 * 2731 * Return the physical address corresponding to the K0SEG or 2732 * K1SEG address provided. 2733 * 2734 * Note: no locking is necessary in this function. 2735 */ 2736 paddr_t 2737 vtophys(vaddr_t vaddr) 2738 { 2739 pt_entry_t *pte; 2740 paddr_t paddr = 0; 2741 2742 if (vaddr < ALPHA_K0SEG_BASE) 2743 printf("vtophys: invalid vaddr 0x%lx", vaddr); 2744 else if (vaddr <= ALPHA_K0SEG_END) 2745 paddr = ALPHA_K0SEG_TO_PHYS(vaddr); 2746 else { 2747 pte = PMAP_KERNEL_PTE(vaddr); 2748 if (pmap_pte_v(pte)) 2749 paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET); 2750 } 2751 2752 #if 0 2753 printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr); 2754 #endif 2755 2756 return (paddr); 2757 } 2758 2759 /******************** pv_entry management ********************/ 2760 2761 /* 2762 * pmap_pv_enter: 2763 * 2764 * Add a physical->virtual entry to the pv_table. 2765 */ 2766 static int 2767 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte, 2768 bool dolock) 2769 { 2770 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2771 pv_entry_t newpv; 2772 kmutex_t *lock; 2773 2774 /* 2775 * Allocate and fill in the new pv_entry. 2776 */ 2777 newpv = pmap_pv_alloc(); 2778 if (newpv == NULL) 2779 return ENOMEM; 2780 newpv->pv_va = va; 2781 newpv->pv_pmap = pmap; 2782 newpv->pv_pte = pte; 2783 2784 if (dolock) { 2785 lock = pmap_pvh_lock(pg); 2786 mutex_enter(lock); 2787 } 2788 2789 #ifdef DEBUG 2790 { 2791 pv_entry_t pv; 2792 /* 2793 * Make sure the entry doesn't already exist. 2794 */ 2795 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 2796 if (pmap == pv->pv_pmap && va == pv->pv_va) { 2797 printf("pmap = %p, va = 0x%lx\n", pmap, va); 2798 panic("pmap_pv_enter: already in pv table"); 2799 } 2800 } 2801 } 2802 #endif 2803 2804 /* 2805 * ...and put it in the list. 2806 */ 2807 newpv->pv_next = md->pvh_list; 2808 md->pvh_list = newpv; 2809 2810 if (dolock) { 2811 mutex_exit(lock); 2812 } 2813 2814 return 0; 2815 } 2816 2817 /* 2818 * pmap_pv_remove: 2819 * 2820 * Remove a physical->virtual entry from the pv_table. 2821 */ 2822 static void 2823 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock) 2824 { 2825 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2826 pv_entry_t pv, *pvp; 2827 kmutex_t *lock; 2828 2829 if (dolock) { 2830 lock = pmap_pvh_lock(pg); 2831 mutex_enter(lock); 2832 } else { 2833 lock = NULL; /* XXX stupid gcc */ 2834 } 2835 2836 /* 2837 * Find the entry to remove. 2838 */ 2839 for (pvp = &md->pvh_list, pv = *pvp; 2840 pv != NULL; pvp = &pv->pv_next, pv = *pvp) 2841 if (pmap == pv->pv_pmap && va == pv->pv_va) 2842 break; 2843 2844 #ifdef DEBUG 2845 if (pv == NULL) 2846 panic("pmap_pv_remove: not in pv table"); 2847 #endif 2848 2849 *pvp = pv->pv_next; 2850 2851 if (dolock) { 2852 mutex_exit(lock); 2853 } 2854 2855 pmap_pv_free(pv); 2856 } 2857 2858 /* 2859 * pmap_pv_page_alloc: 2860 * 2861 * Allocate a page for the pv_entry pool. 2862 */ 2863 static void * 2864 pmap_pv_page_alloc(struct pool *pp, int flags) 2865 { 2866 paddr_t pg; 2867 2868 if (pmap_physpage_alloc(PGU_PVENT, &pg)) 2869 return ((void *)ALPHA_PHYS_TO_K0SEG(pg)); 2870 return (NULL); 2871 } 2872 2873 /* 2874 * pmap_pv_page_free: 2875 * 2876 * Free a pv_entry pool page. 2877 */ 2878 static void 2879 pmap_pv_page_free(struct pool *pp, void *v) 2880 { 2881 2882 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v)); 2883 } 2884 2885 /******************** misc. functions ********************/ 2886 2887 /* 2888 * pmap_physpage_alloc: 2889 * 2890 * Allocate a single page from the VM system and return the 2891 * physical address for that page. 2892 */ 2893 static bool 2894 pmap_physpage_alloc(int usage, paddr_t *pap) 2895 { 2896 struct vm_page *pg; 2897 paddr_t pa; 2898 2899 /* 2900 * Don't ask for a zero'd page in the L1PT case -- we will 2901 * properly initialize it in the constructor. 2902 */ 2903 2904 pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ? 2905 UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO); 2906 if (pg != NULL) { 2907 pa = VM_PAGE_TO_PHYS(pg); 2908 #ifdef DEBUG 2909 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2910 if (md->pvh_refcnt != 0) { 2911 printf("pmap_physpage_alloc: page 0x%lx has " 2912 "%d references\n", pa, md->pvh_refcnt); 2913 panic("pmap_physpage_alloc"); 2914 } 2915 #endif 2916 *pap = pa; 2917 return (true); 2918 } 2919 return (false); 2920 } 2921 2922 /* 2923 * pmap_physpage_free: 2924 * 2925 * Free the single page table page at the specified physical address. 2926 */ 2927 static void 2928 pmap_physpage_free(paddr_t pa) 2929 { 2930 struct vm_page *pg; 2931 2932 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) 2933 panic("pmap_physpage_free: bogus physical page address"); 2934 2935 #ifdef DEBUG 2936 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2937 if (md->pvh_refcnt != 0) 2938 panic("pmap_physpage_free: page still has references"); 2939 #endif 2940 2941 uvm_pagefree(pg); 2942 } 2943 2944 /* 2945 * pmap_physpage_addref: 2946 * 2947 * Add a reference to the specified special use page. 2948 */ 2949 static int 2950 pmap_physpage_addref(void *kva) 2951 { 2952 struct vm_page *pg; 2953 struct vm_page_md *md; 2954 paddr_t pa; 2955 2956 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2957 pg = PHYS_TO_VM_PAGE(pa); 2958 md = VM_PAGE_TO_MD(pg); 2959 2960 KASSERT((int)md->pvh_refcnt >= 0); 2961 2962 return atomic_inc_uint_nv(&md->pvh_refcnt); 2963 } 2964 2965 /* 2966 * pmap_physpage_delref: 2967 * 2968 * Delete a reference to the specified special use page. 2969 */ 2970 static int 2971 pmap_physpage_delref(void *kva) 2972 { 2973 struct vm_page *pg; 2974 struct vm_page_md *md; 2975 paddr_t pa; 2976 2977 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2978 pg = PHYS_TO_VM_PAGE(pa); 2979 md = VM_PAGE_TO_MD(pg); 2980 2981 KASSERT((int)md->pvh_refcnt > 0); 2982 2983 return atomic_dec_uint_nv(&md->pvh_refcnt); 2984 } 2985 2986 /******************** page table page management ********************/ 2987 2988 /* 2989 * pmap_growkernel: [ INTERFACE ] 2990 * 2991 * Grow the kernel address space. This is a hint from the 2992 * upper layer to pre-allocate more kernel PT pages. 2993 */ 2994 vaddr_t 2995 pmap_growkernel(vaddr_t maxkvaddr) 2996 { 2997 struct pmap *kpm = pmap_kernel(), *pm; 2998 paddr_t ptaddr; 2999 pt_entry_t *l1pte, *l2pte, pte; 3000 vaddr_t va; 3001 int l1idx; 3002 3003 rw_enter(&pmap_growkernel_lock, RW_WRITER); 3004 3005 if (maxkvaddr <= virtual_end) 3006 goto out; /* we are OK */ 3007 3008 va = virtual_end; 3009 3010 while (va < maxkvaddr) { 3011 /* 3012 * If there is no valid L1 PTE (i.e. no L2 PT page), 3013 * allocate a new L2 PT page and insert it into the 3014 * L1 map. 3015 */ 3016 l1pte = pmap_l1pte(kpm, va); 3017 if (pmap_pte_v(l1pte) == 0) { 3018 /* 3019 * XXX PGU_NORMAL? It's not a "traditional" PT page. 3020 */ 3021 if (uvm.page_init_done == false) { 3022 /* 3023 * We're growing the kernel pmap early (from 3024 * uvm_pageboot_alloc()). This case must 3025 * be handled a little differently. 3026 */ 3027 ptaddr = ALPHA_K0SEG_TO_PHYS( 3028 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3029 } else if (pmap_physpage_alloc(PGU_NORMAL, 3030 &ptaddr) == false) 3031 goto die; 3032 pte = (atop(ptaddr) << PG_SHIFT) | 3033 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3034 *l1pte = pte; 3035 3036 l1idx = l1pte_index(va); 3037 3038 /* Update all the user pmaps. */ 3039 mutex_enter(&pmap_all_pmaps_lock); 3040 for (pm = TAILQ_FIRST(&pmap_all_pmaps); 3041 pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) { 3042 /* Skip the kernel pmap. */ 3043 if (pm == pmap_kernel()) 3044 continue; 3045 3046 PMAP_LOCK(pm); 3047 if (pm->pm_lev1map == kernel_lev1map) { 3048 PMAP_UNLOCK(pm); 3049 continue; 3050 } 3051 pm->pm_lev1map[l1idx] = pte; 3052 PMAP_UNLOCK(pm); 3053 } 3054 mutex_exit(&pmap_all_pmaps_lock); 3055 } 3056 3057 /* 3058 * Have an L2 PT page now, add the L3 PT page. 3059 */ 3060 l2pte = pmap_l2pte(kpm, va, l1pte); 3061 KASSERT(pmap_pte_v(l2pte) == 0); 3062 if (uvm.page_init_done == false) { 3063 /* 3064 * See above. 3065 */ 3066 ptaddr = ALPHA_K0SEG_TO_PHYS( 3067 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3068 } else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == false) 3069 goto die; 3070 *l2pte = (atop(ptaddr) << PG_SHIFT) | 3071 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3072 va += ALPHA_L2SEG_SIZE; 3073 } 3074 3075 /* Invalidate the L1 PT cache. */ 3076 pool_cache_invalidate(&pmap_l1pt_cache); 3077 3078 virtual_end = va; 3079 3080 out: 3081 rw_exit(&pmap_growkernel_lock); 3082 3083 return (virtual_end); 3084 3085 die: 3086 panic("pmap_growkernel: out of memory"); 3087 } 3088 3089 /* 3090 * pmap_lev1map_create: 3091 * 3092 * Create a new level 1 page table for the specified pmap. 3093 * 3094 * Note: growkernel must already be held and the pmap either 3095 * already locked or unreferenced globally. 3096 */ 3097 static int 3098 pmap_lev1map_create(pmap_t pmap, long cpu_id) 3099 { 3100 pt_entry_t *l1pt; 3101 3102 KASSERT(pmap != pmap_kernel()); 3103 3104 KASSERT(pmap->pm_lev1map == kernel_lev1map); 3105 KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED); 3106 3107 /* Don't sleep -- we're called with locks held. */ 3108 l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT); 3109 if (l1pt == NULL) 3110 return (ENOMEM); 3111 3112 pmap->pm_lev1map = l1pt; 3113 return (0); 3114 } 3115 3116 /* 3117 * pmap_lev1map_destroy: 3118 * 3119 * Destroy the level 1 page table for the specified pmap. 3120 * 3121 * Note: growkernel must be held and the pmap must already be 3122 * locked or not globally referenced. 3123 */ 3124 static void 3125 pmap_lev1map_destroy(pmap_t pmap, long cpu_id) 3126 { 3127 pt_entry_t *l1pt = pmap->pm_lev1map; 3128 3129 KASSERT(pmap != pmap_kernel()); 3130 3131 /* 3132 * Go back to referencing the global kernel_lev1map. 3133 */ 3134 pmap->pm_lev1map = kernel_lev1map; 3135 3136 /* 3137 * Free the old level 1 page table page. 3138 */ 3139 pool_cache_put(&pmap_l1pt_cache, l1pt); 3140 } 3141 3142 /* 3143 * pmap_l1pt_ctor: 3144 * 3145 * Pool cache constructor for L1 PT pages. 3146 * 3147 * Note: The growkernel lock is held across allocations 3148 * from our pool_cache, so we don't need to acquire it 3149 * ourselves. 3150 */ 3151 static int 3152 pmap_l1pt_ctor(void *arg, void *object, int flags) 3153 { 3154 pt_entry_t *l1pt = object, pte; 3155 int i; 3156 3157 /* 3158 * Initialize the new level 1 table by zeroing the 3159 * user portion and copying the kernel mappings into 3160 * the kernel portion. 3161 */ 3162 for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++) 3163 l1pt[i] = 0; 3164 3165 for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS); 3166 i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++) 3167 l1pt[i] = kernel_lev1map[i]; 3168 3169 /* 3170 * Now, map the new virtual page table. NOTE: NO ASM! 3171 */ 3172 pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) | 3173 PG_V | PG_KRE | PG_KWE; 3174 l1pt[l1pte_index(VPTBASE)] = pte; 3175 3176 return (0); 3177 } 3178 3179 /* 3180 * pmap_l1pt_alloc: 3181 * 3182 * Page alloctaor for L1 PT pages. 3183 */ 3184 static void * 3185 pmap_l1pt_alloc(struct pool *pp, int flags) 3186 { 3187 paddr_t ptpa; 3188 3189 /* 3190 * Attempt to allocate a free page. 3191 */ 3192 if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == false) 3193 return (NULL); 3194 3195 return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa)); 3196 } 3197 3198 /* 3199 * pmap_l1pt_free: 3200 * 3201 * Page freer for L1 PT pages. 3202 */ 3203 static void 3204 pmap_l1pt_free(struct pool *pp, void *v) 3205 { 3206 3207 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v)); 3208 } 3209 3210 /* 3211 * pmap_ptpage_alloc: 3212 * 3213 * Allocate a level 2 or level 3 page table page, and 3214 * initialize the PTE that references it. 3215 * 3216 * Note: the pmap must already be locked. 3217 */ 3218 static int 3219 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage) 3220 { 3221 paddr_t ptpa; 3222 3223 /* 3224 * Allocate the page table page. 3225 */ 3226 if (pmap_physpage_alloc(usage, &ptpa) == false) 3227 return (ENOMEM); 3228 3229 /* 3230 * Initialize the referencing PTE. 3231 */ 3232 PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) | 3233 PG_V | PG_KRE | PG_KWE | PG_WIRED | 3234 (pmap == pmap_kernel() ? PG_ASM : 0)); 3235 3236 return (0); 3237 } 3238 3239 /* 3240 * pmap_ptpage_free: 3241 * 3242 * Free the level 2 or level 3 page table page referenced 3243 * be the provided PTE. 3244 * 3245 * Note: the pmap must already be locked. 3246 */ 3247 static void 3248 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte) 3249 { 3250 paddr_t ptpa; 3251 3252 /* 3253 * Extract the physical address of the page from the PTE 3254 * and clear the entry. 3255 */ 3256 ptpa = pmap_pte_pa(pte); 3257 PMAP_SET_PTE(pte, PG_NV); 3258 3259 #ifdef DEBUG 3260 pmap_zero_page(ptpa); 3261 #endif 3262 pmap_physpage_free(ptpa); 3263 } 3264 3265 /* 3266 * pmap_l3pt_delref: 3267 * 3268 * Delete a reference on a level 3 PT page. If the reference drops 3269 * to zero, free it. 3270 * 3271 * Note: the pmap must already be locked. 3272 */ 3273 static void 3274 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, long cpu_id) 3275 { 3276 pt_entry_t *l1pte, *l2pte; 3277 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 3278 3279 l1pte = pmap_l1pte(pmap, va); 3280 l2pte = pmap_l2pte(pmap, va, l1pte); 3281 3282 #ifdef DIAGNOSTIC 3283 if (pmap == pmap_kernel()) 3284 panic("pmap_l3pt_delref: kernel pmap"); 3285 #endif 3286 3287 if (pmap_physpage_delref(l3pte) == 0) { 3288 /* 3289 * No more mappings; we can free the level 3 table. 3290 */ 3291 #ifdef DEBUG 3292 if (pmapdebug & PDB_PTPAGE) 3293 printf("pmap_l3pt_delref: freeing level 3 table at " 3294 "0x%lx\n", pmap_pte_pa(l2pte)); 3295 #endif 3296 pmap_ptpage_free(pmap, l2pte); 3297 3298 /* 3299 * We've freed a level 3 table, so we must 3300 * invalidate the TLB entry for that PT page 3301 * in the Virtual Page Table VA range, because 3302 * otherwise the PALcode will service a TLB 3303 * miss using the stale VPT TLB entry it entered 3304 * behind our back to shortcut to the VA's PTE. 3305 */ 3306 PMAP_INVALIDATE_TLB(pmap, 3307 (vaddr_t)(&VPT[VPT_INDEX(va)]), false, 3308 PMAP_ISACTIVE(pmap, cpu_id), cpu_id); 3309 PMAP_TLB_SHOOTDOWN(pmap, 3310 (vaddr_t)(&VPT[VPT_INDEX(va)]), 0); 3311 PMAP_TLB_SHOOTNOW(); 3312 3313 /* 3314 * We've freed a level 3 table, so delete the reference 3315 * on the level 2 table. 3316 */ 3317 pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id); 3318 } 3319 } 3320 3321 /* 3322 * pmap_l2pt_delref: 3323 * 3324 * Delete a reference on a level 2 PT page. If the reference drops 3325 * to zero, free it. 3326 * 3327 * Note: the pmap must already be locked. 3328 */ 3329 static void 3330 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte, 3331 long cpu_id) 3332 { 3333 3334 #ifdef DIAGNOSTIC 3335 if (pmap == pmap_kernel()) 3336 panic("pmap_l2pt_delref: kernel pmap"); 3337 #endif 3338 3339 if (pmap_physpage_delref(l2pte) == 0) { 3340 /* 3341 * No more mappings in this segment; we can free the 3342 * level 2 table. 3343 */ 3344 #ifdef DEBUG 3345 if (pmapdebug & PDB_PTPAGE) 3346 printf("pmap_l2pt_delref: freeing level 2 table at " 3347 "0x%lx\n", pmap_pte_pa(l1pte)); 3348 #endif 3349 pmap_ptpage_free(pmap, l1pte); 3350 3351 /* 3352 * We've freed a level 2 table, so delete the reference 3353 * on the level 1 table. 3354 */ 3355 pmap_l1pt_delref(pmap, l1pte, cpu_id); 3356 } 3357 } 3358 3359 /* 3360 * pmap_l1pt_delref: 3361 * 3362 * Delete a reference on a level 1 PT page. If the reference drops 3363 * to zero, free it. 3364 * 3365 * Note: the pmap must already be locked. 3366 */ 3367 static void 3368 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id) 3369 { 3370 3371 #ifdef DIAGNOSTIC 3372 if (pmap == pmap_kernel()) 3373 panic("pmap_l1pt_delref: kernel pmap"); 3374 #endif 3375 3376 (void)pmap_physpage_delref(l1pte); 3377 } 3378 3379 /******************** Address Space Number management ********************/ 3380 3381 /* 3382 * pmap_asn_alloc: 3383 * 3384 * Allocate and assign an ASN to the specified pmap. 3385 * 3386 * Note: the pmap must already be locked. This may be called from 3387 * an interprocessor interrupt, and in that case, the sender of 3388 * the IPI has the pmap lock. 3389 */ 3390 static void 3391 pmap_asn_alloc(pmap_t pmap, long cpu_id) 3392 { 3393 struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id]; 3394 struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id]; 3395 3396 #ifdef DEBUG 3397 if (pmapdebug & (PDB_FOLLOW|PDB_ASN)) 3398 printf("pmap_asn_alloc(%p)\n", pmap); 3399 #endif 3400 3401 /* 3402 * If the pmap is still using the global kernel_lev1map, there 3403 * is no need to assign an ASN at this time, because only 3404 * kernel mappings exist in that map, and all kernel mappings 3405 * have PG_ASM set. If the pmap eventually gets its own 3406 * lev1map, an ASN will be allocated at that time. 3407 * 3408 * Only the kernel pmap will reference kernel_lev1map. Do the 3409 * same old fixups, but note that we no longer need the pmap 3410 * to be locked if we're in this mode, since pm_lev1map will 3411 * never change. 3412 * #endif 3413 */ 3414 if (pmap->pm_lev1map == kernel_lev1map) { 3415 #ifdef DEBUG 3416 if (pmapdebug & PDB_ASN) 3417 printf("pmap_asn_alloc: still references " 3418 "kernel_lev1map\n"); 3419 #endif 3420 #if defined(MULTIPROCESSOR) 3421 /* 3422 * In a multiprocessor system, it's possible to 3423 * get here without having PMAP_ASN_RESERVED in 3424 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy(). 3425 * 3426 * So, what we do here, is simply assign the reserved 3427 * ASN for kernel_lev1map users and let things 3428 * continue on. We do, however, let uniprocessor 3429 * configurations continue to make its assertion. 3430 */ 3431 pma->pma_asn = PMAP_ASN_RESERVED; 3432 #else 3433 KASSERT(pma->pma_asn == PMAP_ASN_RESERVED); 3434 #endif /* MULTIPROCESSOR */ 3435 return; 3436 } 3437 3438 /* 3439 * On processors which do not implement ASNs, the swpctx PALcode 3440 * operation will automatically invalidate the TLB and I-cache, 3441 * so we don't need to do that here. 3442 */ 3443 if (pmap_max_asn == 0) { 3444 /* 3445 * Refresh the pmap's generation number, to 3446 * simplify logic elsewhere. 3447 */ 3448 pma->pma_asngen = cpma->pma_asngen; 3449 #ifdef DEBUG 3450 if (pmapdebug & PDB_ASN) 3451 printf("pmap_asn_alloc: no ASNs, using asngen %lu\n", 3452 pma->pma_asngen); 3453 #endif 3454 return; 3455 } 3456 3457 /* 3458 * Hopefully, we can continue using the one we have... 3459 */ 3460 if (pma->pma_asn != PMAP_ASN_RESERVED && 3461 pma->pma_asngen == cpma->pma_asngen) { 3462 /* 3463 * ASN is still in the current generation; keep on using it. 3464 */ 3465 #ifdef DEBUG 3466 if (pmapdebug & PDB_ASN) 3467 printf("pmap_asn_alloc: same generation, keeping %u\n", 3468 pma->pma_asn); 3469 #endif 3470 return; 3471 } 3472 3473 /* 3474 * Need to assign a new ASN. Grab the next one, incrementing 3475 * the generation number if we have to. 3476 */ 3477 if (cpma->pma_asn > pmap_max_asn) { 3478 /* 3479 * Invalidate all non-PG_ASM TLB entries and the 3480 * I-cache, and bump the generation number. 3481 */ 3482 ALPHA_TBIAP(); 3483 alpha_pal_imb(); 3484 3485 cpma->pma_asn = 1; 3486 cpma->pma_asngen++; 3487 #ifdef DIAGNOSTIC 3488 if (cpma->pma_asngen == 0) { 3489 /* 3490 * The generation number has wrapped. We could 3491 * handle this scenario by traversing all of 3492 * the pmaps, and invalidating the generation 3493 * number on those which are not currently 3494 * in use by this processor. 3495 * 3496 * However... considering that we're using 3497 * an unsigned 64-bit integer for generation 3498 * numbers, on non-ASN CPUs, we won't wrap 3499 * for approx. 585 million years, or 75 billion 3500 * years on a 128-ASN CPU (assuming 1000 switch 3501 * operations per second). 3502 * 3503 * So, we don't bother. 3504 */ 3505 panic("pmap_asn_alloc: too much uptime"); 3506 } 3507 #endif 3508 #ifdef DEBUG 3509 if (pmapdebug & PDB_ASN) 3510 printf("pmap_asn_alloc: generation bumped to %lu\n", 3511 cpma->pma_asngen); 3512 #endif 3513 } 3514 3515 /* 3516 * Assign the new ASN and validate the generation number. 3517 */ 3518 pma->pma_asn = cpma->pma_asn++; 3519 pma->pma_asngen = cpma->pma_asngen; 3520 3521 #ifdef DEBUG 3522 if (pmapdebug & PDB_ASN) 3523 printf("pmap_asn_alloc: assigning %u to pmap %p\n", 3524 pma->pma_asn, pmap); 3525 #endif 3526 3527 /* 3528 * Have a new ASN, so there's no need to sync the I-stream 3529 * on the way back out to userspace. 3530 */ 3531 atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id)); 3532 } 3533 3534 #if defined(MULTIPROCESSOR) 3535 /******************** TLB shootdown code ********************/ 3536 3537 /* 3538 * pmap_tlb_shootdown: 3539 * 3540 * Cause the TLB entry for pmap/va to be shot down. 3541 * 3542 * NOTE: The pmap must be locked here. 3543 */ 3544 void 3545 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp) 3546 { 3547 struct pmap_tlb_shootdown_q *pq; 3548 struct pmap_tlb_shootdown_job *pj; 3549 struct cpu_info *ci, *self = curcpu(); 3550 u_long cpumask; 3551 CPU_INFO_ITERATOR cii; 3552 3553 KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock)); 3554 3555 cpumask = 0; 3556 3557 for (CPU_INFO_FOREACH(cii, ci)) { 3558 if (ci == self) 3559 continue; 3560 3561 /* 3562 * The pmap must be locked (unless its the kernel 3563 * pmap, in which case it is okay for it to be 3564 * unlocked), which prevents it from becoming 3565 * active on any additional processors. This makes 3566 * it safe to check for activeness. If it's not 3567 * active on the processor in question, then just 3568 * mark it as needing a new ASN the next time it 3569 * does, saving the IPI. We always have to send 3570 * the IPI for the kernel pmap. 3571 * 3572 * Note if it's marked active now, and it becomes 3573 * inactive by the time the processor receives 3574 * the IPI, that's okay, because it does the right 3575 * thing with it later. 3576 */ 3577 if (pmap != pmap_kernel() && 3578 PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) { 3579 PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid); 3580 continue; 3581 } 3582 3583 cpumask |= 1UL << ci->ci_cpuid; 3584 3585 pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; 3586 mutex_spin_enter(&pq->pq_lock); 3587 3588 /* 3589 * Allocate a job. 3590 */ 3591 if (pq->pq_count < PMAP_TLB_SHOOTDOWN_MAXJOBS) { 3592 pj = pool_cache_get(&pmap_tlb_shootdown_job_cache, 3593 PR_NOWAIT); 3594 } else { 3595 pj = NULL; 3596 } 3597 3598 /* 3599 * If a global flush is already pending, we 3600 * don't really have to do anything else. 3601 */ 3602 pq->pq_pte |= pte; 3603 if (pq->pq_tbia) { 3604 mutex_spin_exit(&pq->pq_lock); 3605 if (pj != NULL) { 3606 pool_cache_put(&pmap_tlb_shootdown_job_cache, 3607 pj); 3608 } 3609 continue; 3610 } 3611 if (pj == NULL) { 3612 /* 3613 * Couldn't allocate a job entry. Just 3614 * tell the processor to kill everything. 3615 */ 3616 pq->pq_tbia = 1; 3617 } else { 3618 pj->pj_pmap = pmap; 3619 pj->pj_va = va; 3620 pj->pj_pte = pte; 3621 pq->pq_count++; 3622 TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list); 3623 } 3624 mutex_spin_exit(&pq->pq_lock); 3625 } 3626 3627 *cpumaskp |= cpumask; 3628 } 3629 3630 /* 3631 * pmap_tlb_shootnow: 3632 * 3633 * Process the TLB shootdowns that we have been accumulating 3634 * for the specified processor set. 3635 */ 3636 void 3637 pmap_tlb_shootnow(u_long cpumask) 3638 { 3639 3640 alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN); 3641 } 3642 3643 /* 3644 * pmap_do_tlb_shootdown: 3645 * 3646 * Process pending TLB shootdown operations for this processor. 3647 */ 3648 void 3649 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep) 3650 { 3651 u_long cpu_id = ci->ci_cpuid; 3652 u_long cpu_mask = (1UL << cpu_id); 3653 struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; 3654 struct pmap_tlb_shootdown_job *pj, *next; 3655 TAILQ_HEAD(, pmap_tlb_shootdown_job) jobs; 3656 3657 TAILQ_INIT(&jobs); 3658 3659 mutex_spin_enter(&pq->pq_lock); 3660 TAILQ_CONCAT(&jobs, &pq->pq_head, pj_list); 3661 if (pq->pq_tbia) { 3662 if (pq->pq_pte & PG_ASM) 3663 ALPHA_TBIA(); 3664 else 3665 ALPHA_TBIAP(); 3666 pq->pq_tbia = 0; 3667 pq->pq_pte = 0; 3668 } else { 3669 TAILQ_FOREACH(pj, &jobs, pj_list) { 3670 PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va, 3671 pj->pj_pte & PG_ASM, 3672 pj->pj_pmap->pm_cpus & cpu_mask, cpu_id); 3673 } 3674 pq->pq_pte = 0; 3675 } 3676 pq->pq_count = 0; 3677 mutex_spin_exit(&pq->pq_lock); 3678 3679 /* Free jobs back to the cache. */ 3680 for (pj = TAILQ_FIRST(&jobs); pj != NULL; pj = next) { 3681 next = TAILQ_NEXT(pj, pj_list); 3682 pool_cache_put(&pmap_tlb_shootdown_job_cache, pj); 3683 } 3684 } 3685 #endif /* MULTIPROCESSOR */ 3686