1 /* $NetBSD: pmap.c,v 1.259 2014/01/01 22:35:54 matt Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center and by Chris G. Demetriou. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)pmap.c 8.6 (Berkeley) 5/27/94 66 */ 67 68 /* 69 * DEC Alpha physical map management code. 70 * 71 * History: 72 * 73 * This pmap started life as a Motorola 68851/68030 pmap, 74 * written by Mike Hibler at the University of Utah. 75 * 76 * It was modified for the DEC Alpha by Chris Demetriou 77 * at Carnegie Mellon University. 78 * 79 * Support for non-contiguous physical memory was added by 80 * Jason R. Thorpe of the Numerical Aerospace Simulation 81 * Facility, NASA Ames Research Center and Chris Demetriou. 82 * 83 * Page table management and a major cleanup were undertaken 84 * by Jason R. Thorpe, with lots of help from Ross Harvey of 85 * Avalon Computer Systems and from Chris Demetriou. 86 * 87 * Support for the new UVM pmap interface was written by 88 * Jason R. Thorpe. 89 * 90 * Support for ASNs was written by Jason R. Thorpe, again 91 * with help from Chris Demetriou and Ross Harvey. 92 * 93 * The locking protocol was written by Jason R. Thorpe, 94 * using Chuck Cranor's i386 pmap for UVM as a model. 95 * 96 * TLB shootdown code was written by Jason R. Thorpe. 97 * 98 * Multiprocessor modifications by Andrew Doran. 99 * 100 * Notes: 101 * 102 * All page table access is done via K0SEG. The one exception 103 * to this is for kernel mappings. Since all kernel page 104 * tables are pre-allocated, we can use the Virtual Page Table 105 * to access PTEs that map K1SEG addresses. 106 * 107 * Kernel page table pages are statically allocated in 108 * pmap_bootstrap(), and are never freed. In the future, 109 * support for dynamically adding additional kernel page 110 * table pages may be added. User page table pages are 111 * dynamically allocated and freed. 112 * 113 * Bugs/misfeatures: 114 * 115 * - Some things could be optimized. 116 */ 117 118 /* 119 * Manages physical address maps. 120 * 121 * Since the information managed by this module is 122 * also stored by the logical address mapping module, 123 * this module may throw away valid virtual-to-physical 124 * mappings at almost any time. However, invalidations 125 * of virtual-to-physical mappings must be done as 126 * requested. 127 * 128 * In order to cope with hardware architectures which 129 * make virtual-to-physical map invalidates expensive, 130 * this module may delay invalidate or reduced protection 131 * operations until such time as they are actually 132 * necessary. This module is given full information as 133 * to which processors are currently using which maps, 134 * and to when physical maps must be made correct. 135 */ 136 137 #include "opt_lockdebug.h" 138 #include "opt_sysv.h" 139 #include "opt_multiprocessor.h" 140 141 #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 142 143 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.259 2014/01/01 22:35:54 matt Exp $"); 144 145 #include <sys/param.h> 146 #include <sys/systm.h> 147 #include <sys/kernel.h> 148 #include <sys/proc.h> 149 #include <sys/malloc.h> 150 #include <sys/pool.h> 151 #include <sys/buf.h> 152 #include <sys/shm.h> 153 #include <sys/atomic.h> 154 #include <sys/cpu.h> 155 156 #include <uvm/uvm.h> 157 158 #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR) 159 #include <machine/rpb.h> 160 #endif 161 162 #ifdef DEBUG 163 #define PDB_FOLLOW 0x0001 164 #define PDB_INIT 0x0002 165 #define PDB_ENTER 0x0004 166 #define PDB_REMOVE 0x0008 167 #define PDB_CREATE 0x0010 168 #define PDB_PTPAGE 0x0020 169 #define PDB_ASN 0x0040 170 #define PDB_BITS 0x0080 171 #define PDB_COLLECT 0x0100 172 #define PDB_PROTECT 0x0200 173 #define PDB_BOOTSTRAP 0x1000 174 #define PDB_PARANOIA 0x2000 175 #define PDB_WIRING 0x4000 176 #define PDB_PVDUMP 0x8000 177 178 int debugmap = 0; 179 int pmapdebug = PDB_PARANOIA; 180 #endif 181 182 /* 183 * Given a map and a machine independent protection code, 184 * convert to an alpha protection code. 185 */ 186 #define pte_prot(m, p) (protection_codes[m == pmap_kernel() ? 0 : 1][p]) 187 static int protection_codes[2][8]; 188 189 /* 190 * kernel_lev1map: 191 * 192 * Kernel level 1 page table. This maps all kernel level 2 193 * page table pages, and is used as a template for all user 194 * pmap level 1 page tables. When a new user level 1 page 195 * table is allocated, all kernel_lev1map PTEs for kernel 196 * addresses are copied to the new map. 197 * 198 * The kernel also has an initial set of kernel level 2 page 199 * table pages. These map the kernel level 3 page table pages. 200 * As kernel level 3 page table pages are added, more level 2 201 * page table pages may be added to map them. These pages are 202 * never freed. 203 * 204 * Finally, the kernel also has an initial set of kernel level 205 * 3 page table pages. These map pages in K1SEG. More level 206 * 3 page table pages may be added at run-time if additional 207 * K1SEG address space is required. These pages are never freed. 208 * 209 * NOTE: When mappings are inserted into the kernel pmap, all 210 * level 2 and level 3 page table pages must already be allocated 211 * and mapped into the parent page table. 212 */ 213 pt_entry_t *kernel_lev1map; 214 215 /* 216 * Virtual Page Table. 217 */ 218 static pt_entry_t *VPT; 219 220 static struct { 221 struct pmap k_pmap; 222 struct pmap_asn_info k_asni[ALPHA_MAXPROCS]; 223 } kernel_pmap_store; 224 225 struct pmap *const kernel_pmap_ptr = &kernel_pmap_store.k_pmap; 226 227 paddr_t avail_start; /* PA of first available physical page */ 228 paddr_t avail_end; /* PA of last available physical page */ 229 static vaddr_t virtual_end; /* VA of last avail page (end of kernel AS) */ 230 231 static bool pmap_initialized; /* Has pmap_init completed? */ 232 233 u_long pmap_pages_stolen; /* instrumentation */ 234 235 /* 236 * This variable contains the number of CPU IDs we need to allocate 237 * space for when allocating the pmap structure. It is used to 238 * size a per-CPU array of ASN and ASN Generation number. 239 */ 240 static u_long pmap_ncpuids; 241 242 #ifndef PMAP_PV_LOWAT 243 #define PMAP_PV_LOWAT 16 244 #endif 245 int pmap_pv_lowat = PMAP_PV_LOWAT; 246 247 /* 248 * List of all pmaps, used to update them when e.g. additional kernel 249 * page tables are allocated. This list is kept LRU-ordered by 250 * pmap_activate(). 251 */ 252 static TAILQ_HEAD(, pmap) pmap_all_pmaps; 253 254 /* 255 * The pools from which pmap structures and sub-structures are allocated. 256 */ 257 static struct pool_cache pmap_pmap_cache; 258 static struct pool_cache pmap_l1pt_cache; 259 static struct pool_cache pmap_pv_cache; 260 261 /* 262 * Address Space Numbers. 263 * 264 * On many implementations of the Alpha architecture, the TLB entries and 265 * I-cache blocks are tagged with a unique number within an implementation- 266 * specified range. When a process context becomes active, the ASN is used 267 * to match TLB entries; if a TLB entry for a particular VA does not match 268 * the current ASN, it is ignored (one could think of the processor as 269 * having a collection of <max ASN> separate TLBs). This allows operating 270 * system software to skip the TLB flush that would otherwise be necessary 271 * at context switch time. 272 * 273 * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that 274 * causes TLB entries to match any ASN. The PALcode also provides 275 * a TBI (Translation Buffer Invalidate) operation that flushes all 276 * TLB entries that _do not_ have PG_ASM. We use this bit for kernel 277 * mappings, so that invalidation of all user mappings does not invalidate 278 * kernel mappings (which are consistent across all processes). 279 * 280 * pmap_next_asn always indicates to the next ASN to use. When 281 * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation. 282 * 283 * When a new ASN generation is created, the per-process (i.e. non-PG_ASM) 284 * TLB entries and the I-cache are flushed, the generation number is bumped, 285 * and pmap_next_asn is changed to indicate the first non-reserved ASN. 286 * 287 * We reserve ASN #0 for pmaps that use the global kernel_lev1map. This 288 * prevents the following scenario: 289 * 290 * * New ASN generation starts, and process A is given ASN #0. 291 * 292 * * A new process B (and thus new pmap) is created. The ASN, 293 * for lack of a better value, is initialized to 0. 294 * 295 * * Process B runs. It is now using the TLB entries tagged 296 * by process A. *poof* 297 * 298 * In the scenario above, in addition to the processor using using incorrect 299 * TLB entires, the PALcode might use incorrect information to service a 300 * TLB miss. (The PALcode uses the recursively mapped Virtual Page Table 301 * to locate the PTE for a faulting address, and tagged TLB entires exist 302 * for the Virtual Page Table addresses in order to speed up this procedure, 303 * as well.) 304 * 305 * By reserving an ASN for kernel_lev1map users, we are guaranteeing that 306 * new pmaps will initially run with no TLB entries for user addresses 307 * or VPT mappings that map user page tables. Since kernel_lev1map only 308 * contains mappings for kernel addresses, and since those mappings 309 * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is 310 * safe (since PG_ASM mappings match any ASN). 311 * 312 * On processors that do not support ASNs, the PALcode invalidates 313 * the TLB and I-cache automatically on swpctx. We still still go 314 * through the motions of assigning an ASN (really, just refreshing 315 * the ASN generation in this particular case) to keep the logic sane 316 * in other parts of the code. 317 */ 318 static u_int pmap_max_asn; /* max ASN supported by the system */ 319 /* next ASN and cur ASN generation */ 320 static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS]; 321 322 /* 323 * Locking: 324 * 325 * READ/WRITE LOCKS 326 * ---------------- 327 * 328 * * pmap_main_lock - This lock is used to prevent deadlock and/or 329 * provide mutex access to the pmap module. Most operations lock 330 * the pmap first, then PV lists as needed. However, some operations, 331 * such as pmap_page_protect(), lock the PV lists before locking 332 * the pmaps. To prevent deadlock, we require a mutex lock on the 333 * pmap module if locking in the PV->pmap direction. This is 334 * implemented by acquiring a (shared) read lock on pmap_main_lock 335 * if locking pmap->PV and a (exclusive) write lock if locking in 336 * the PV->pmap direction. Since only one thread can hold a write 337 * lock at a time, this provides the mutex. 338 * 339 * MUTEXES 340 * ------- 341 * 342 * * pm_lock (per-pmap) - This lock protects all of the members 343 * of the pmap structure itself. This lock will be asserted 344 * in pmap_activate() and pmap_deactivate() from a critical 345 * section of mi_switch(), and must never sleep. Note that 346 * in the case of the kernel pmap, interrupts which cause 347 * memory allocation *must* be blocked while this lock is 348 * asserted. 349 * 350 * * pvh_lock (global hash) - These locks protects the PV lists 351 * for managed pages. 352 * 353 * * pmap_all_pmaps_lock - This lock protects the global list of 354 * all pmaps. Note that a pm_lock must never be held while this 355 * lock is held. 356 * 357 * * pmap_growkernel_lock - This lock protects pmap_growkernel() 358 * and the virtual_end variable. 359 * 360 * There is a lock ordering constraint for pmap_growkernel_lock. 361 * pmap_growkernel() acquires the locks in the following order: 362 * 363 * pmap_growkernel_lock (write) -> pmap_all_pmaps_lock -> 364 * pmap->pm_lock 365 * 366 * We need to ensure consistency between user pmaps and the 367 * kernel_lev1map. For this reason, pmap_growkernel_lock must 368 * be held to prevent kernel_lev1map changing across pmaps 369 * being added to / removed from the global pmaps list. 370 * 371 * Address space number management (global ASN counters and per-pmap 372 * ASN state) are not locked; they use arrays of values indexed 373 * per-processor. 374 * 375 * All internal functions which operate on a pmap are called 376 * with the pmap already locked by the caller (which will be 377 * an interface function). 378 */ 379 static krwlock_t pmap_main_lock; 380 static kmutex_t pmap_all_pmaps_lock; 381 static krwlock_t pmap_growkernel_lock; 382 383 #define PMAP_MAP_TO_HEAD_LOCK() rw_enter(&pmap_main_lock, RW_READER) 384 #define PMAP_MAP_TO_HEAD_UNLOCK() rw_exit(&pmap_main_lock) 385 #define PMAP_HEAD_TO_MAP_LOCK() rw_enter(&pmap_main_lock, RW_WRITER) 386 #define PMAP_HEAD_TO_MAP_UNLOCK() rw_exit(&pmap_main_lock) 387 388 struct { 389 kmutex_t lock; 390 } __aligned(64) static pmap_pvh_locks[64] __aligned(64); 391 392 static inline kmutex_t * 393 pmap_pvh_lock(struct vm_page *pg) 394 { 395 396 /* Cut bits 11-6 out of page address and use directly as offset. */ 397 return (kmutex_t *)((uintptr_t)&pmap_pvh_locks + 398 ((uintptr_t)pg & (63 << 6))); 399 } 400 401 #if defined(MULTIPROCESSOR) 402 /* 403 * TLB Shootdown: 404 * 405 * When a mapping is changed in a pmap, the TLB entry corresponding to 406 * the virtual address must be invalidated on all processors. In order 407 * to accomplish this on systems with multiple processors, messages are 408 * sent from the processor which performs the mapping change to all 409 * processors on which the pmap is active. For other processors, the 410 * ASN generation numbers for that processor is invalidated, so that 411 * the next time the pmap is activated on that processor, a new ASN 412 * will be allocated (which implicitly invalidates all TLB entries). 413 * 414 * Note, we can use the pool allocator to allocate job entries 415 * since pool pages are mapped with K0SEG, not with the TLB. 416 */ 417 struct pmap_tlb_shootdown_job { 418 TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list; 419 vaddr_t pj_va; /* virtual address */ 420 pmap_t pj_pmap; /* the pmap which maps the address */ 421 pt_entry_t pj_pte; /* the PTE bits */ 422 }; 423 424 static struct pmap_tlb_shootdown_q { 425 TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head; /* queue 16b */ 426 kmutex_t pq_lock; /* spin lock on queue 16b */ 427 int pq_pte; /* aggregate PTE bits 4b */ 428 int pq_count; /* number of pending requests 4b */ 429 int pq_tbia; /* pending global flush 4b */ 430 uint8_t pq_pad[64-16-16-4-4-4]; /* pad to 64 bytes */ 431 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS] __aligned(CACHE_LINE_SIZE); 432 433 /* If we have more pending jobs than this, we just nail the whole TLB. */ 434 #define PMAP_TLB_SHOOTDOWN_MAXJOBS 6 435 436 static struct pool_cache pmap_tlb_shootdown_job_cache; 437 #endif /* MULTIPROCESSOR */ 438 439 /* 440 * Internal routines 441 */ 442 static void alpha_protection_init(void); 443 static bool pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, long); 444 static void pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, long); 445 446 /* 447 * PT page management functions. 448 */ 449 static int pmap_lev1map_create(pmap_t, long); 450 static void pmap_lev1map_destroy(pmap_t, long); 451 static int pmap_ptpage_alloc(pmap_t, pt_entry_t *, int); 452 static void pmap_ptpage_free(pmap_t, pt_entry_t *); 453 static void pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, long); 454 static void pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, long); 455 static void pmap_l1pt_delref(pmap_t, pt_entry_t *, long); 456 457 static void *pmap_l1pt_alloc(struct pool *, int); 458 static void pmap_l1pt_free(struct pool *, void *); 459 460 static struct pool_allocator pmap_l1pt_allocator = { 461 pmap_l1pt_alloc, pmap_l1pt_free, 0, 462 }; 463 464 static int pmap_l1pt_ctor(void *, void *, int); 465 466 /* 467 * PV table management functions. 468 */ 469 static int pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *, 470 bool); 471 static void pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool); 472 static void *pmap_pv_page_alloc(struct pool *, int); 473 static void pmap_pv_page_free(struct pool *, void *); 474 475 static struct pool_allocator pmap_pv_page_allocator = { 476 pmap_pv_page_alloc, pmap_pv_page_free, 0, 477 }; 478 479 #ifdef DEBUG 480 void pmap_pv_dump(paddr_t); 481 #endif 482 483 #define pmap_pv_alloc() pool_cache_get(&pmap_pv_cache, PR_NOWAIT) 484 #define pmap_pv_free(pv) pool_cache_put(&pmap_pv_cache, (pv)) 485 486 /* 487 * ASN management functions. 488 */ 489 static void pmap_asn_alloc(pmap_t, long); 490 491 /* 492 * Misc. functions. 493 */ 494 static bool pmap_physpage_alloc(int, paddr_t *); 495 static void pmap_physpage_free(paddr_t); 496 static int pmap_physpage_addref(void *); 497 static int pmap_physpage_delref(void *); 498 499 /* 500 * PMAP_ISACTIVE{,_TEST}: 501 * 502 * Check to see if a pmap is active on the current processor. 503 */ 504 #define PMAP_ISACTIVE_TEST(pm, cpu_id) \ 505 (((pm)->pm_cpus & (1UL << (cpu_id))) != 0) 506 507 #if defined(DEBUG) && !defined(MULTIPROCESSOR) 508 #define PMAP_ISACTIVE(pm, cpu_id) \ 509 ({ \ 510 /* \ 511 * XXX This test is not MP-safe. \ 512 */ \ 513 int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id); \ 514 \ 515 if ((curlwp->l_flag & LW_IDLE) != 0 && \ 516 curproc->p_vmspace != NULL && \ 517 ((curproc->p_sflag & PS_WEXIT) == 0) && \ 518 (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap))) \ 519 panic("PMAP_ISACTIVE"); \ 520 (isactive_); \ 521 }) 522 #else 523 #define PMAP_ISACTIVE(pm, cpu_id) PMAP_ISACTIVE_TEST(pm, cpu_id) 524 #endif /* DEBUG && !MULTIPROCESSOR */ 525 526 /* 527 * PMAP_ACTIVATE_ASN_SANITY: 528 * 529 * DEBUG sanity checks for ASNs within PMAP_ACTIVATE. 530 */ 531 #ifdef DEBUG 532 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) \ 533 do { \ 534 struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)]; \ 535 struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)]; \ 536 \ 537 if ((pmap)->pm_lev1map == kernel_lev1map) { \ 538 /* \ 539 * This pmap implementation also ensures that pmaps \ 540 * referencing kernel_lev1map use a reserved ASN \ 541 * ASN to prevent the PALcode from servicing a TLB \ 542 * miss with the wrong PTE. \ 543 */ \ 544 if (__pma->pma_asn != PMAP_ASN_RESERVED) { \ 545 printf("kernel_lev1map with non-reserved ASN " \ 546 "(line %d)\n", __LINE__); \ 547 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 548 } \ 549 } else { \ 550 if (__pma->pma_asngen != __cpma->pma_asngen) { \ 551 /* \ 552 * ASN generation number isn't valid! \ 553 */ \ 554 printf("pmap asngen %lu, current %lu " \ 555 "(line %d)\n", \ 556 __pma->pma_asngen, \ 557 __cpma->pma_asngen, \ 558 __LINE__); \ 559 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 560 } \ 561 if (__pma->pma_asn == PMAP_ASN_RESERVED) { \ 562 /* \ 563 * DANGER WILL ROBINSON! We're going to \ 564 * pollute the VPT TLB entries! \ 565 */ \ 566 printf("Using reserved ASN! (line %d)\n", \ 567 __LINE__); \ 568 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 569 } \ 570 } \ 571 } while (/*CONSTCOND*/0) 572 #else 573 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) /* nothing */ 574 #endif 575 576 /* 577 * PMAP_ACTIVATE: 578 * 579 * This is essentially the guts of pmap_activate(), without 580 * ASN allocation. This is used by pmap_activate(), 581 * pmap_lev1map_create(), and pmap_lev1map_destroy(). 582 * 583 * This is called only when it is known that a pmap is "active" 584 * on the current processor; the ASN must already be valid. 585 */ 586 #define PMAP_ACTIVATE(pmap, l, cpu_id) \ 587 do { \ 588 struct pcb *pcb = lwp_getpcb(l); \ 589 PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id); \ 590 \ 591 pcb->pcb_hw.apcb_ptbr = \ 592 ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \ 593 pcb->pcb_hw.apcb_asn = (pmap)->pm_asni[(cpu_id)].pma_asn; \ 594 \ 595 if ((l) == curlwp) { \ 596 /* \ 597 * Page table base register has changed; switch to \ 598 * our own context again so that it will take effect. \ 599 */ \ 600 (void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr); \ 601 } \ 602 } while (/*CONSTCOND*/0) 603 604 /* 605 * PMAP_SET_NEEDISYNC: 606 * 607 * Mark that a user pmap needs an I-stream synch on its 608 * way back out to userspace. 609 */ 610 #define PMAP_SET_NEEDISYNC(pmap) (pmap)->pm_needisync = ~0UL 611 612 /* 613 * PMAP_SYNC_ISTREAM: 614 * 615 * Synchronize the I-stream for the specified pmap. For user 616 * pmaps, this is deferred until a process using the pmap returns 617 * to userspace. 618 */ 619 #if defined(MULTIPROCESSOR) 620 #define PMAP_SYNC_ISTREAM_KERNEL() \ 621 do { \ 622 alpha_pal_imb(); \ 623 alpha_broadcast_ipi(ALPHA_IPI_IMB); \ 624 } while (/*CONSTCOND*/0) 625 626 #define PMAP_SYNC_ISTREAM_USER(pmap) \ 627 do { \ 628 alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST); \ 629 /* for curcpu, will happen in userret() */ \ 630 } while (/*CONSTCOND*/0) 631 #else 632 #define PMAP_SYNC_ISTREAM_KERNEL() alpha_pal_imb() 633 #define PMAP_SYNC_ISTREAM_USER(pmap) /* will happen in userret() */ 634 #endif /* MULTIPROCESSOR */ 635 636 #define PMAP_SYNC_ISTREAM(pmap) \ 637 do { \ 638 if ((pmap) == pmap_kernel()) \ 639 PMAP_SYNC_ISTREAM_KERNEL(); \ 640 else \ 641 PMAP_SYNC_ISTREAM_USER(pmap); \ 642 } while (/*CONSTCOND*/0) 643 644 /* 645 * PMAP_INVALIDATE_ASN: 646 * 647 * Invalidate the specified pmap's ASN, so as to force allocation 648 * of a new one the next time pmap_asn_alloc() is called. 649 * 650 * NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING 651 * CONDITIONS ARE true: 652 * 653 * (1) The pmap references the global kernel_lev1map. 654 * 655 * (2) The pmap is not active on the current processor. 656 */ 657 #define PMAP_INVALIDATE_ASN(pmap, cpu_id) \ 658 do { \ 659 (pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED; \ 660 } while (/*CONSTCOND*/0) 661 662 /* 663 * PMAP_INVALIDATE_TLB: 664 * 665 * Invalidate the TLB entry for the pmap/va pair. 666 */ 667 #define PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id) \ 668 do { \ 669 if ((hadasm) || (isactive)) { \ 670 /* \ 671 * Simply invalidating the TLB entry and I-cache \ 672 * works in this case. \ 673 */ \ 674 ALPHA_TBIS((va)); \ 675 } else if ((pmap)->pm_asni[(cpu_id)].pma_asngen == \ 676 pmap_asn_info[(cpu_id)].pma_asngen) { \ 677 /* \ 678 * We can't directly invalidate the TLB entry \ 679 * in this case, so we have to force allocation \ 680 * of a new ASN the next time this pmap becomes \ 681 * active. \ 682 */ \ 683 PMAP_INVALIDATE_ASN((pmap), (cpu_id)); \ 684 } \ 685 /* \ 686 * Nothing to do in this case; the next time the \ 687 * pmap becomes active on this processor, a new \ 688 * ASN will be allocated anyway. \ 689 */ \ 690 } while (/*CONSTCOND*/0) 691 692 /* 693 * PMAP_KERNEL_PTE: 694 * 695 * Get a kernel PTE. 696 * 697 * If debugging, do a table walk. If not debugging, just use 698 * the Virtual Page Table, since all kernel page tables are 699 * pre-allocated and mapped in. 700 */ 701 #ifdef DEBUG 702 #define PMAP_KERNEL_PTE(va) \ 703 ({ \ 704 pt_entry_t *l1pte_, *l2pte_; \ 705 \ 706 l1pte_ = pmap_l1pte(pmap_kernel(), va); \ 707 if (pmap_pte_v(l1pte_) == 0) { \ 708 printf("kernel level 1 PTE not valid, va 0x%lx " \ 709 "(line %d)\n", (va), __LINE__); \ 710 panic("PMAP_KERNEL_PTE"); \ 711 } \ 712 l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_); \ 713 if (pmap_pte_v(l2pte_) == 0) { \ 714 printf("kernel level 2 PTE not valid, va 0x%lx " \ 715 "(line %d)\n", (va), __LINE__); \ 716 panic("PMAP_KERNEL_PTE"); \ 717 } \ 718 pmap_l3pte(pmap_kernel(), va, l2pte_); \ 719 }) 720 #else 721 #define PMAP_KERNEL_PTE(va) (&VPT[VPT_INDEX((va))]) 722 #endif 723 724 /* 725 * PMAP_SET_PTE: 726 * 727 * Set a PTE to a specified value. 728 */ 729 #define PMAP_SET_PTE(ptep, val) *(ptep) = (val) 730 731 /* 732 * PMAP_STAT_{INCR,DECR}: 733 * 734 * Increment or decrement a pmap statistic. 735 */ 736 #define PMAP_STAT_INCR(s, v) atomic_add_long((unsigned long *)(&(s)), (v)) 737 #define PMAP_STAT_DECR(s, v) atomic_add_long((unsigned long *)(&(s)), -(v)) 738 739 /* 740 * pmap_bootstrap: 741 * 742 * Bootstrap the system to run with virtual memory. 743 * 744 * Note: no locking is necessary in this function. 745 */ 746 void 747 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) 748 { 749 vsize_t lev2mapsize, lev3mapsize; 750 pt_entry_t *lev2map, *lev3map; 751 pt_entry_t pte; 752 vsize_t bufsz; 753 struct pcb *pcb; 754 int i; 755 756 #ifdef DEBUG 757 if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP)) 758 printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn); 759 #endif 760 761 /* 762 * Compute the number of pages kmem_arena will have. 763 */ 764 kmeminit_nkmempages(); 765 766 /* 767 * Figure out how many initial PTE's are necessary to map the 768 * kernel. We also reserve space for kmem_alloc_pageable() 769 * for vm_fork(). 770 */ 771 772 /* Get size of buffer cache and set an upper limit */ 773 bufsz = buf_memcalc(); 774 buf_setvalimit(bufsz); 775 776 lev3mapsize = 777 (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) + 778 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE + 779 (maxproc * UPAGES) + nkmempages; 780 781 #ifdef SYSVSHM 782 lev3mapsize += shminfo.shmall; 783 #endif 784 lev3mapsize = roundup(lev3mapsize, NPTEPG); 785 786 /* 787 * Initialize `FYI' variables. Note we're relying on 788 * the fact that BSEARCH sorts the vm_physmem[] array 789 * for us. 790 */ 791 avail_start = ptoa(VM_PHYSMEM_PTR(0)->start); 792 avail_end = ptoa(VM_PHYSMEM_PTR(vm_nphysseg - 1)->end); 793 virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE; 794 795 #if 0 796 printf("avail_start = 0x%lx\n", avail_start); 797 printf("avail_end = 0x%lx\n", avail_end); 798 printf("virtual_end = 0x%lx\n", virtual_end); 799 #endif 800 801 /* 802 * Allocate a level 1 PTE table for the kernel. 803 * This is always one page long. 804 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 805 */ 806 kernel_lev1map = (pt_entry_t *) 807 uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG); 808 809 /* 810 * Allocate a level 2 PTE table for the kernel. 811 * These must map all of the level3 PTEs. 812 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 813 */ 814 lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG); 815 lev2map = (pt_entry_t *) 816 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize); 817 818 /* 819 * Allocate a level 3 PTE table for the kernel. 820 * Contains lev3mapsize PTEs. 821 */ 822 lev3map = (pt_entry_t *) 823 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize); 824 825 /* 826 * Set up level 1 page table 827 */ 828 829 /* Map all of the level 2 pte pages */ 830 for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) { 831 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) + 832 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 833 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 834 kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS + 835 (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte; 836 } 837 838 /* Map the virtual page table */ 839 pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT) 840 << PG_SHIFT; 841 pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */ 842 kernel_lev1map[l1pte_index(VPTBASE)] = pte; 843 VPT = (pt_entry_t *)VPTBASE; 844 845 #ifdef _PMAP_MAY_USE_PROM_CONSOLE 846 { 847 extern pt_entry_t prom_pte; /* XXX */ 848 extern int prom_mapped; /* XXX */ 849 850 if (pmap_uses_prom_console()) { 851 /* 852 * XXX Save old PTE so we can remap the PROM, if 853 * XXX necessary. 854 */ 855 prom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM; 856 } 857 prom_mapped = 0; 858 859 /* 860 * Actually, this code lies. The prom is still mapped, and will 861 * remain so until the context switch after alpha_init() returns. 862 */ 863 } 864 #endif 865 866 /* 867 * Set up level 2 page table. 868 */ 869 /* Map all of the level 3 pte pages */ 870 for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) { 871 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) + 872 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 873 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 874 lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+ 875 (i*PAGE_SIZE*NPTEPG))] = pte; 876 } 877 878 /* Initialize the pmap_growkernel_lock. */ 879 rw_init(&pmap_growkernel_lock); 880 881 /* 882 * Set up level three page table (lev3map) 883 */ 884 /* Nothing to do; it's already zero'd */ 885 886 /* 887 * Initialize the pmap pools and list. 888 */ 889 pmap_ncpuids = ncpuids; 890 pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 0, 891 0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL); 892 pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt", 893 &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL); 894 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 895 PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL, 896 NULL, NULL); 897 898 TAILQ_INIT(&pmap_all_pmaps); 899 900 /* 901 * Initialize the ASN logic. 902 */ 903 pmap_max_asn = maxasn; 904 for (i = 0; i < ALPHA_MAXPROCS; i++) { 905 pmap_asn_info[i].pma_asn = 1; 906 pmap_asn_info[i].pma_asngen = 0; 907 } 908 909 /* 910 * Initialize the locks. 911 */ 912 rw_init(&pmap_main_lock); 913 mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE); 914 for (i = 0; i < __arraycount(pmap_pvh_locks); i++) { 915 mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE); 916 } 917 918 /* 919 * Initialize kernel pmap. Note that all kernel mappings 920 * have PG_ASM set, so the ASN doesn't really matter for 921 * the kernel pmap. Also, since the kernel pmap always 922 * references kernel_lev1map, it always has an invalid ASN 923 * generation. 924 */ 925 memset(pmap_kernel(), 0, sizeof(struct pmap)); 926 pmap_kernel()->pm_lev1map = kernel_lev1map; 927 pmap_kernel()->pm_count = 1; 928 for (i = 0; i < ALPHA_MAXPROCS; i++) { 929 pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 930 pmap_kernel()->pm_asni[i].pma_asngen = 931 pmap_asn_info[i].pma_asngen; 932 } 933 mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE); 934 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list); 935 936 #if defined(MULTIPROCESSOR) 937 /* 938 * Initialize the TLB shootdown queues. 939 */ 940 pool_cache_bootstrap(&pmap_tlb_shootdown_job_cache, 941 sizeof(struct pmap_tlb_shootdown_job), CACHE_LINE_SIZE, 942 0, PR_LARGECACHE, "pmaptlb", NULL, IPL_VM, NULL, NULL, NULL); 943 for (i = 0; i < ALPHA_MAXPROCS; i++) { 944 TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head); 945 mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT, 946 IPL_SCHED); 947 } 948 #endif 949 950 /* 951 * Set up lwp0's PCB such that the ptbr points to the right place 952 * and has the kernel pmap's (really unused) ASN. 953 */ 954 pcb = lwp_getpcb(&lwp0); 955 pcb->pcb_hw.apcb_ptbr = 956 ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT; 957 pcb->pcb_hw.apcb_asn = pmap_kernel()->pm_asni[cpu_number()].pma_asn; 958 959 /* 960 * Mark the kernel pmap `active' on this processor. 961 */ 962 atomic_or_ulong(&pmap_kernel()->pm_cpus, 963 (1UL << cpu_number())); 964 } 965 966 #ifdef _PMAP_MAY_USE_PROM_CONSOLE 967 int 968 pmap_uses_prom_console(void) 969 { 970 971 return (cputype == ST_DEC_21000); 972 } 973 #endif /* _PMAP_MAY_USE_PROM_CONSOLE */ 974 975 /* 976 * pmap_virtual_space: [ INTERFACE ] 977 * 978 * Define the initial bounds of the kernel virtual address space. 979 */ 980 void 981 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 982 { 983 984 *vstartp = VM_MIN_KERNEL_ADDRESS; /* kernel is in K0SEG */ 985 *vendp = VM_MAX_KERNEL_ADDRESS; /* we use pmap_growkernel */ 986 } 987 988 /* 989 * pmap_steal_memory: [ INTERFACE ] 990 * 991 * Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()). 992 * This function allows for early dynamic memory allocation until the 993 * virtual memory system has been bootstrapped. After that point, either 994 * kmem_alloc or malloc should be used. This function works by stealing 995 * pages from the (to be) managed page pool, then implicitly mapping the 996 * pages (by using their k0seg addresses) and zeroing them. 997 * 998 * It may be used once the physical memory segments have been pre-loaded 999 * into the vm_physmem[] array. Early memory allocation MUST use this 1000 * interface! This cannot be used after vm_page_startup(), and will 1001 * generate a panic if tried. 1002 * 1003 * Note that this memory will never be freed, and in essence it is wired 1004 * down. 1005 * 1006 * We must adjust *vstartp and/or *vendp iff we use address space 1007 * from the kernel virtual address range defined by pmap_virtual_space(). 1008 * 1009 * Note: no locking is necessary in this function. 1010 */ 1011 vaddr_t 1012 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp) 1013 { 1014 int bank, npgs, x; 1015 vaddr_t va; 1016 paddr_t pa; 1017 1018 size = round_page(size); 1019 npgs = atop(size); 1020 1021 #if 0 1022 printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs); 1023 #endif 1024 1025 for (bank = 0; bank < vm_nphysseg; bank++) { 1026 if (uvm.page_init_done == true) 1027 panic("pmap_steal_memory: called _after_ bootstrap"); 1028 1029 #if 0 1030 printf(" bank %d: avail_start 0x%lx, start 0x%lx, " 1031 "avail_end 0x%lx\n", bank, VM_PHYSMEM_PTR(bank)->avail_start, 1032 VM_PHYSMEM_PTR(bank)->start, VM_PHYSMEM_PTR(bank)->avail_end); 1033 #endif 1034 1035 if (VM_PHYSMEM_PTR(bank)->avail_start != VM_PHYSMEM_PTR(bank)->start || 1036 VM_PHYSMEM_PTR(bank)->avail_start >= VM_PHYSMEM_PTR(bank)->avail_end) 1037 continue; 1038 1039 #if 0 1040 printf(" avail_end - avail_start = 0x%lx\n", 1041 VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start); 1042 #endif 1043 1044 if ((VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start) 1045 < npgs) 1046 continue; 1047 1048 /* 1049 * There are enough pages here; steal them! 1050 */ 1051 pa = ptoa(VM_PHYSMEM_PTR(bank)->avail_start); 1052 VM_PHYSMEM_PTR(bank)->avail_start += npgs; 1053 VM_PHYSMEM_PTR(bank)->start += npgs; 1054 1055 /* 1056 * Have we used up this segment? 1057 */ 1058 if (VM_PHYSMEM_PTR(bank)->avail_start == VM_PHYSMEM_PTR(bank)->end) { 1059 if (vm_nphysseg == 1) 1060 panic("pmap_steal_memory: out of memory!"); 1061 1062 /* Remove this segment from the list. */ 1063 vm_nphysseg--; 1064 for (x = bank; x < vm_nphysseg; x++) { 1065 /* structure copy */ 1066 VM_PHYSMEM_PTR_SWAP(x, x + 1); 1067 } 1068 } 1069 1070 va = ALPHA_PHYS_TO_K0SEG(pa); 1071 memset((void *)va, 0, size); 1072 pmap_pages_stolen += npgs; 1073 return (va); 1074 } 1075 1076 /* 1077 * If we got here, this was no memory left. 1078 */ 1079 panic("pmap_steal_memory: no memory to steal"); 1080 } 1081 1082 /* 1083 * pmap_init: [ INTERFACE ] 1084 * 1085 * Initialize the pmap module. Called by vm_init(), to initialize any 1086 * structures that the pmap system needs to map virtual memory. 1087 * 1088 * Note: no locking is necessary in this function. 1089 */ 1090 void 1091 pmap_init(void) 1092 { 1093 1094 #ifdef DEBUG 1095 if (pmapdebug & PDB_FOLLOW) 1096 printf("pmap_init()\n"); 1097 #endif 1098 1099 /* initialize protection array */ 1100 alpha_protection_init(); 1101 1102 /* 1103 * Set a low water mark on the pv_entry pool, so that we are 1104 * more likely to have these around even in extreme memory 1105 * starvation. 1106 */ 1107 pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat); 1108 1109 /* 1110 * Now it is safe to enable pv entry recording. 1111 */ 1112 pmap_initialized = true; 1113 1114 #if 0 1115 for (bank = 0; bank < vm_nphysseg; bank++) { 1116 printf("bank %d\n", bank); 1117 printf("\tstart = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->start)); 1118 printf("\tend = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->end)); 1119 printf("\tavail_start = 0x%x\n", 1120 ptoa(VM_PHYSMEM_PTR(bank)->avail_start)); 1121 printf("\tavail_end = 0x%x\n", 1122 ptoa(VM_PHYSMEM_PTR(bank)->avail_end)); 1123 } 1124 #endif 1125 } 1126 1127 /* 1128 * pmap_create: [ INTERFACE ] 1129 * 1130 * Create and return a physical map. 1131 * 1132 * Note: no locking is necessary in this function. 1133 */ 1134 pmap_t 1135 pmap_create(void) 1136 { 1137 pmap_t pmap; 1138 int i; 1139 1140 #ifdef DEBUG 1141 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 1142 printf("pmap_create()\n"); 1143 #endif 1144 1145 pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK); 1146 memset(pmap, 0, sizeof(*pmap)); 1147 1148 /* 1149 * Defer allocation of a new level 1 page table until 1150 * the first new mapping is entered; just take a reference 1151 * to the kernel kernel_lev1map. 1152 */ 1153 pmap->pm_lev1map = kernel_lev1map; 1154 1155 pmap->pm_count = 1; 1156 for (i = 0; i < pmap_ncpuids; i++) { 1157 pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 1158 /* XXX Locking? */ 1159 pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen; 1160 } 1161 mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE); 1162 1163 try_again: 1164 rw_enter(&pmap_growkernel_lock, RW_READER); 1165 1166 if (pmap_lev1map_create(pmap, cpu_number()) != 0) { 1167 rw_exit(&pmap_growkernel_lock); 1168 (void) kpause("pmap_create", false, hz >> 2, NULL); 1169 goto try_again; 1170 } 1171 1172 mutex_enter(&pmap_all_pmaps_lock); 1173 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list); 1174 mutex_exit(&pmap_all_pmaps_lock); 1175 1176 rw_exit(&pmap_growkernel_lock); 1177 1178 return (pmap); 1179 } 1180 1181 /* 1182 * pmap_destroy: [ INTERFACE ] 1183 * 1184 * Drop the reference count on the specified pmap, releasing 1185 * all resources if the reference count drops to zero. 1186 */ 1187 void 1188 pmap_destroy(pmap_t pmap) 1189 { 1190 1191 #ifdef DEBUG 1192 if (pmapdebug & PDB_FOLLOW) 1193 printf("pmap_destroy(%p)\n", pmap); 1194 #endif 1195 1196 if (atomic_dec_uint_nv(&pmap->pm_count) > 0) 1197 return; 1198 1199 rw_enter(&pmap_growkernel_lock, RW_READER); 1200 1201 /* 1202 * Remove it from the global list of all pmaps. 1203 */ 1204 mutex_enter(&pmap_all_pmaps_lock); 1205 TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list); 1206 mutex_exit(&pmap_all_pmaps_lock); 1207 1208 pmap_lev1map_destroy(pmap, cpu_number()); 1209 1210 rw_exit(&pmap_growkernel_lock); 1211 1212 /* 1213 * Since the pmap is supposed to contain no valid 1214 * mappings at this point, we should always see 1215 * kernel_lev1map here. 1216 */ 1217 KASSERT(pmap->pm_lev1map == kernel_lev1map); 1218 1219 mutex_destroy(&pmap->pm_lock); 1220 pool_cache_put(&pmap_pmap_cache, pmap); 1221 } 1222 1223 /* 1224 * pmap_reference: [ INTERFACE ] 1225 * 1226 * Add a reference to the specified pmap. 1227 */ 1228 void 1229 pmap_reference(pmap_t pmap) 1230 { 1231 1232 #ifdef DEBUG 1233 if (pmapdebug & PDB_FOLLOW) 1234 printf("pmap_reference(%p)\n", pmap); 1235 #endif 1236 1237 atomic_inc_uint(&pmap->pm_count); 1238 } 1239 1240 /* 1241 * pmap_remove: [ INTERFACE ] 1242 * 1243 * Remove the given range of addresses from the specified map. 1244 * 1245 * It is assumed that the start and end are properly 1246 * rounded to the page size. 1247 */ 1248 void 1249 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva) 1250 { 1251 pt_entry_t *l1pte, *l2pte, *l3pte; 1252 pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte; 1253 vaddr_t l1eva, l2eva, vptva; 1254 bool needisync = false; 1255 long cpu_id = cpu_number(); 1256 1257 #ifdef DEBUG 1258 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 1259 printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva); 1260 #endif 1261 1262 /* 1263 * If this is the kernel pmap, we can use a faster method 1264 * for accessing the PTEs (since the PT pages are always 1265 * resident). 1266 * 1267 * Note that this routine should NEVER be called from an 1268 * interrupt context; pmap_kremove() is used for that. 1269 */ 1270 if (pmap == pmap_kernel()) { 1271 PMAP_MAP_TO_HEAD_LOCK(); 1272 PMAP_LOCK(pmap); 1273 1274 while (sva < eva) { 1275 l3pte = PMAP_KERNEL_PTE(sva); 1276 if (pmap_pte_v(l3pte)) { 1277 #ifdef DIAGNOSTIC 1278 if (uvm_pageismanaged(pmap_pte_pa(l3pte)) && 1279 pmap_pte_pv(l3pte) == 0) 1280 panic("pmap_remove: managed page " 1281 "without PG_PVLIST for 0x%lx", 1282 sva); 1283 #endif 1284 needisync |= pmap_remove_mapping(pmap, sva, 1285 l3pte, true, cpu_id); 1286 } 1287 sva += PAGE_SIZE; 1288 } 1289 1290 PMAP_UNLOCK(pmap); 1291 PMAP_MAP_TO_HEAD_UNLOCK(); 1292 1293 if (needisync) 1294 PMAP_SYNC_ISTREAM_KERNEL(); 1295 return; 1296 } 1297 1298 #ifdef DIAGNOSTIC 1299 if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS) 1300 panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel " 1301 "address range", sva, eva); 1302 #endif 1303 1304 PMAP_MAP_TO_HEAD_LOCK(); 1305 PMAP_LOCK(pmap); 1306 1307 /* 1308 * If we're already referencing the kernel_lev1map, there 1309 * is no work for us to do. 1310 */ 1311 if (pmap->pm_lev1map == kernel_lev1map) 1312 goto out; 1313 1314 saved_l1pte = l1pte = pmap_l1pte(pmap, sva); 1315 1316 /* 1317 * Add a reference to the L1 table to it won't get 1318 * removed from under us. 1319 */ 1320 pmap_physpage_addref(saved_l1pte); 1321 1322 for (; sva < eva; sva = l1eva, l1pte++) { 1323 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1324 if (pmap_pte_v(l1pte)) { 1325 saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte); 1326 1327 /* 1328 * Add a reference to the L2 table so it won't 1329 * get removed from under us. 1330 */ 1331 pmap_physpage_addref(saved_l2pte); 1332 1333 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1334 l2eva = 1335 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1336 if (pmap_pte_v(l2pte)) { 1337 saved_l3pte = l3pte = 1338 pmap_l3pte(pmap, sva, l2pte); 1339 1340 /* 1341 * Add a reference to the L3 table so 1342 * it won't get removed from under us. 1343 */ 1344 pmap_physpage_addref(saved_l3pte); 1345 1346 /* 1347 * Remember this sva; if the L3 table 1348 * gets removed, we need to invalidate 1349 * the VPT TLB entry for it. 1350 */ 1351 vptva = sva; 1352 1353 for (; sva < l2eva && sva < eva; 1354 sva += PAGE_SIZE, l3pte++) { 1355 if (!pmap_pte_v(l3pte)) { 1356 continue; 1357 } 1358 needisync |= 1359 pmap_remove_mapping( 1360 pmap, sva, 1361 l3pte, true, 1362 cpu_id); 1363 } 1364 1365 /* 1366 * Remove the reference to the L3 1367 * table that we added above. This 1368 * may free the L3 table. 1369 */ 1370 pmap_l3pt_delref(pmap, vptva, 1371 saved_l3pte, cpu_id); 1372 } 1373 } 1374 1375 /* 1376 * Remove the reference to the L2 table that we 1377 * added above. This may free the L2 table. 1378 */ 1379 pmap_l2pt_delref(pmap, l1pte, saved_l2pte, cpu_id); 1380 } 1381 } 1382 1383 /* 1384 * Remove the reference to the L1 table that we added above. 1385 * This may free the L1 table. 1386 */ 1387 pmap_l1pt_delref(pmap, saved_l1pte, cpu_id); 1388 1389 if (needisync) 1390 PMAP_SYNC_ISTREAM_USER(pmap); 1391 1392 out: 1393 PMAP_UNLOCK(pmap); 1394 PMAP_MAP_TO_HEAD_UNLOCK(); 1395 } 1396 1397 /* 1398 * pmap_page_protect: [ INTERFACE ] 1399 * 1400 * Lower the permission for all mappings to a given page to 1401 * the permissions specified. 1402 */ 1403 void 1404 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 1405 { 1406 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 1407 pmap_t pmap; 1408 pv_entry_t pv, nextpv; 1409 bool needkisync = false; 1410 long cpu_id = cpu_number(); 1411 kmutex_t *lock; 1412 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1413 #ifdef DEBUG 1414 paddr_t pa = VM_PAGE_TO_PHYS(pg); 1415 1416 1417 if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) || 1418 (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE))) 1419 printf("pmap_page_protect(%p, %x)\n", pg, prot); 1420 #endif 1421 1422 switch (prot) { 1423 case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE: 1424 case VM_PROT_READ|VM_PROT_WRITE: 1425 return; 1426 1427 /* copy_on_write */ 1428 case VM_PROT_READ|VM_PROT_EXECUTE: 1429 case VM_PROT_READ: 1430 PMAP_HEAD_TO_MAP_LOCK(); 1431 lock = pmap_pvh_lock(pg); 1432 mutex_enter(lock); 1433 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 1434 PMAP_LOCK(pv->pv_pmap); 1435 if (*pv->pv_pte & (PG_KWE | PG_UWE)) { 1436 *pv->pv_pte &= ~(PG_KWE | PG_UWE); 1437 PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va, 1438 pmap_pte_asm(pv->pv_pte), 1439 PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id); 1440 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va, 1441 pmap_pte_asm(pv->pv_pte)); 1442 } 1443 PMAP_UNLOCK(pv->pv_pmap); 1444 } 1445 mutex_exit(lock); 1446 PMAP_HEAD_TO_MAP_UNLOCK(); 1447 PMAP_TLB_SHOOTNOW(); 1448 return; 1449 1450 /* remove_all */ 1451 default: 1452 break; 1453 } 1454 1455 PMAP_HEAD_TO_MAP_LOCK(); 1456 lock = pmap_pvh_lock(pg); 1457 mutex_enter(lock); 1458 for (pv = md->pvh_list; pv != NULL; pv = nextpv) { 1459 nextpv = pv->pv_next; 1460 pmap = pv->pv_pmap; 1461 1462 PMAP_LOCK(pmap); 1463 #ifdef DEBUG 1464 if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 || 1465 pmap_pte_pa(pv->pv_pte) != pa) 1466 panic("pmap_page_protect: bad mapping"); 1467 #endif 1468 if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte, 1469 false, cpu_id) == true) { 1470 if (pmap == pmap_kernel()) 1471 needkisync |= true; 1472 else 1473 PMAP_SYNC_ISTREAM_USER(pmap); 1474 } 1475 PMAP_UNLOCK(pmap); 1476 } 1477 1478 if (needkisync) 1479 PMAP_SYNC_ISTREAM_KERNEL(); 1480 1481 mutex_exit(lock); 1482 PMAP_HEAD_TO_MAP_UNLOCK(); 1483 } 1484 1485 /* 1486 * pmap_protect: [ INTERFACE ] 1487 * 1488 * Set the physical protection on the specified range of this map 1489 * as requested. 1490 */ 1491 void 1492 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1493 { 1494 pt_entry_t *l1pte, *l2pte, *l3pte, bits; 1495 bool isactive; 1496 bool hadasm; 1497 vaddr_t l1eva, l2eva; 1498 long cpu_id = cpu_number(); 1499 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1500 1501 #ifdef DEBUG 1502 if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) 1503 printf("pmap_protect(%p, %lx, %lx, %x)\n", 1504 pmap, sva, eva, prot); 1505 #endif 1506 1507 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1508 pmap_remove(pmap, sva, eva); 1509 return; 1510 } 1511 1512 PMAP_LOCK(pmap); 1513 1514 bits = pte_prot(pmap, prot); 1515 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1516 1517 l1pte = pmap_l1pte(pmap, sva); 1518 for (; sva < eva; sva = l1eva, l1pte++) { 1519 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1520 if (pmap_pte_v(l1pte)) { 1521 l2pte = pmap_l2pte(pmap, sva, l1pte); 1522 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1523 l2eva = 1524 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1525 if (pmap_pte_v(l2pte)) { 1526 l3pte = pmap_l3pte(pmap, sva, l2pte); 1527 for (; sva < l2eva && sva < eva; 1528 sva += PAGE_SIZE, l3pte++) { 1529 if (pmap_pte_v(l3pte) && 1530 pmap_pte_prot_chg(l3pte, 1531 bits)) { 1532 hadasm = 1533 (pmap_pte_asm(l3pte) 1534 != 0); 1535 pmap_pte_set_prot(l3pte, 1536 bits); 1537 PMAP_INVALIDATE_TLB( 1538 pmap, sva, hadasm, 1539 isactive, cpu_id); 1540 PMAP_TLB_SHOOTDOWN( 1541 pmap, sva, 1542 hadasm ? PG_ASM : 0); 1543 } 1544 } 1545 } 1546 } 1547 } 1548 } 1549 1550 PMAP_TLB_SHOOTNOW(); 1551 1552 if (prot & VM_PROT_EXECUTE) 1553 PMAP_SYNC_ISTREAM(pmap); 1554 1555 PMAP_UNLOCK(pmap); 1556 } 1557 1558 /* 1559 * pmap_enter: [ INTERFACE ] 1560 * 1561 * Insert the given physical page (p) at 1562 * the specified virtual address (v) in the 1563 * target physical map with the protection requested. 1564 * 1565 * If specified, the page will be wired down, meaning 1566 * that the related pte can not be reclaimed. 1567 * 1568 * Note: This is the only routine which MAY NOT lazy-evaluate 1569 * or lose information. That is, this routine must actually 1570 * insert this page into the given map NOW. 1571 */ 1572 int 1573 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1574 { 1575 struct vm_page *pg; /* if != NULL, managed page */ 1576 pt_entry_t *pte, npte, opte; 1577 paddr_t opa; 1578 bool tflush = true; 1579 bool hadasm = false; /* XXX gcc -Wuninitialized */ 1580 bool needisync = false; 1581 bool setisync = false; 1582 bool isactive; 1583 bool wired; 1584 long cpu_id = cpu_number(); 1585 int error = 0; 1586 kmutex_t *lock; 1587 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1588 1589 #ifdef DEBUG 1590 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1591 printf("pmap_enter(%p, %lx, %lx, %x, %x)\n", 1592 pmap, va, pa, prot, flags); 1593 #endif 1594 pg = PHYS_TO_VM_PAGE(pa); 1595 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1596 wired = (flags & PMAP_WIRED) != 0; 1597 1598 /* 1599 * Determine what we need to do about the I-stream. If 1600 * VM_PROT_EXECUTE is set, we mark a user pmap as needing 1601 * an I-sync on the way back out to userspace. We always 1602 * need an immediate I-sync for the kernel pmap. 1603 */ 1604 if (prot & VM_PROT_EXECUTE) { 1605 if (pmap == pmap_kernel()) 1606 needisync = true; 1607 else { 1608 setisync = true; 1609 needisync = (pmap->pm_cpus != 0); 1610 } 1611 } 1612 1613 PMAP_MAP_TO_HEAD_LOCK(); 1614 PMAP_LOCK(pmap); 1615 1616 if (pmap == pmap_kernel()) { 1617 #ifdef DIAGNOSTIC 1618 /* 1619 * Sanity check the virtual address. 1620 */ 1621 if (va < VM_MIN_KERNEL_ADDRESS) 1622 panic("pmap_enter: kernel pmap, invalid va 0x%lx", va); 1623 #endif 1624 pte = PMAP_KERNEL_PTE(va); 1625 } else { 1626 pt_entry_t *l1pte, *l2pte; 1627 1628 #ifdef DIAGNOSTIC 1629 /* 1630 * Sanity check the virtual address. 1631 */ 1632 if (va >= VM_MAXUSER_ADDRESS) 1633 panic("pmap_enter: user pmap, invalid va 0x%lx", va); 1634 #endif 1635 1636 KASSERT(pmap->pm_lev1map != kernel_lev1map); 1637 1638 /* 1639 * Check to see if the level 1 PTE is valid, and 1640 * allocate a new level 2 page table page if it's not. 1641 * A reference will be added to the level 2 table when 1642 * the level 3 table is created. 1643 */ 1644 l1pte = pmap_l1pte(pmap, va); 1645 if (pmap_pte_v(l1pte) == 0) { 1646 pmap_physpage_addref(l1pte); 1647 error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT); 1648 if (error) { 1649 pmap_l1pt_delref(pmap, l1pte, cpu_id); 1650 if (flags & PMAP_CANFAIL) 1651 goto out; 1652 panic("pmap_enter: unable to create L2 PT " 1653 "page"); 1654 } 1655 #ifdef DEBUG 1656 if (pmapdebug & PDB_PTPAGE) 1657 printf("pmap_enter: new level 2 table at " 1658 "0x%lx\n", pmap_pte_pa(l1pte)); 1659 #endif 1660 } 1661 1662 /* 1663 * Check to see if the level 2 PTE is valid, and 1664 * allocate a new level 3 page table page if it's not. 1665 * A reference will be added to the level 3 table when 1666 * the mapping is validated. 1667 */ 1668 l2pte = pmap_l2pte(pmap, va, l1pte); 1669 if (pmap_pte_v(l2pte) == 0) { 1670 pmap_physpage_addref(l2pte); 1671 error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT); 1672 if (error) { 1673 pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id); 1674 if (flags & PMAP_CANFAIL) 1675 goto out; 1676 panic("pmap_enter: unable to create L3 PT " 1677 "page"); 1678 } 1679 #ifdef DEBUG 1680 if (pmapdebug & PDB_PTPAGE) 1681 printf("pmap_enter: new level 3 table at " 1682 "0x%lx\n", pmap_pte_pa(l2pte)); 1683 #endif 1684 } 1685 1686 /* 1687 * Get the PTE that will map the page. 1688 */ 1689 pte = pmap_l3pte(pmap, va, l2pte); 1690 } 1691 1692 /* Remember all of the old PTE; used for TBI check later. */ 1693 opte = *pte; 1694 1695 /* 1696 * Check to see if the old mapping is valid. If not, validate the 1697 * new one immediately. 1698 */ 1699 if (pmap_pte_v(pte) == 0) { 1700 /* 1701 * No need to invalidate the TLB in this case; an invalid 1702 * mapping won't be in the TLB, and a previously valid 1703 * mapping would have been flushed when it was invalidated. 1704 */ 1705 tflush = false; 1706 1707 /* 1708 * No need to synchronize the I-stream, either, for basically 1709 * the same reason. 1710 */ 1711 setisync = needisync = false; 1712 1713 if (pmap != pmap_kernel()) { 1714 /* 1715 * New mappings gain a reference on the level 3 1716 * table. 1717 */ 1718 pmap_physpage_addref(pte); 1719 } 1720 goto validate_enterpv; 1721 } 1722 1723 opa = pmap_pte_pa(pte); 1724 hadasm = (pmap_pte_asm(pte) != 0); 1725 1726 if (opa == pa) { 1727 /* 1728 * Mapping has not changed; must be a protection or 1729 * wiring change. 1730 */ 1731 if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) { 1732 #ifdef DEBUG 1733 if (pmapdebug & PDB_ENTER) 1734 printf("pmap_enter: wiring change -> %d\n", 1735 wired); 1736 #endif 1737 /* 1738 * Adjust the wiring count. 1739 */ 1740 if (wired) 1741 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1742 else 1743 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1744 } 1745 1746 /* 1747 * Set the PTE. 1748 */ 1749 goto validate; 1750 } 1751 1752 /* 1753 * The mapping has changed. We need to invalidate the 1754 * old mapping before creating the new one. 1755 */ 1756 #ifdef DEBUG 1757 if (pmapdebug & PDB_ENTER) 1758 printf("pmap_enter: removing old mapping 0x%lx\n", va); 1759 #endif 1760 if (pmap != pmap_kernel()) { 1761 /* 1762 * Gain an extra reference on the level 3 table. 1763 * pmap_remove_mapping() will delete a reference, 1764 * and we don't want the table to be erroneously 1765 * freed. 1766 */ 1767 pmap_physpage_addref(pte); 1768 } 1769 needisync |= pmap_remove_mapping(pmap, va, pte, true, cpu_id); 1770 1771 validate_enterpv: 1772 /* 1773 * Enter the mapping into the pv_table if appropriate. 1774 */ 1775 if (pg != NULL) { 1776 error = pmap_pv_enter(pmap, pg, va, pte, true); 1777 if (error) { 1778 pmap_l3pt_delref(pmap, va, pte, cpu_id); 1779 if (flags & PMAP_CANFAIL) 1780 goto out; 1781 panic("pmap_enter: unable to enter mapping in PV " 1782 "table"); 1783 } 1784 } 1785 1786 /* 1787 * Increment counters. 1788 */ 1789 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1790 if (wired) 1791 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1792 1793 validate: 1794 /* 1795 * Build the new PTE. 1796 */ 1797 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V; 1798 if (pg != NULL) { 1799 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 1800 int attrs; 1801 1802 #ifdef DIAGNOSTIC 1803 if ((flags & VM_PROT_ALL) & ~prot) 1804 panic("pmap_enter: access type exceeds prot"); 1805 #endif 1806 lock = pmap_pvh_lock(pg); 1807 mutex_enter(lock); 1808 if (flags & VM_PROT_WRITE) 1809 md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 1810 else if (flags & VM_PROT_ALL) 1811 md->pvh_attrs |= PGA_REFERENCED; 1812 attrs = md->pvh_attrs; 1813 mutex_exit(lock); 1814 1815 /* 1816 * Set up referenced/modified emulation for new mapping. 1817 */ 1818 if ((attrs & PGA_REFERENCED) == 0) 1819 npte |= PG_FOR | PG_FOW | PG_FOE; 1820 else if ((attrs & PGA_MODIFIED) == 0) 1821 npte |= PG_FOW; 1822 1823 /* 1824 * Mapping was entered on PV list. 1825 */ 1826 npte |= PG_PVLIST; 1827 } 1828 if (wired) 1829 npte |= PG_WIRED; 1830 #ifdef DEBUG 1831 if (pmapdebug & PDB_ENTER) 1832 printf("pmap_enter: new pte = 0x%lx\n", npte); 1833 #endif 1834 1835 /* 1836 * If the PALcode portion of the new PTE is the same as the 1837 * old PTE, no TBI is necessary. 1838 */ 1839 if (PG_PALCODE(opte) == PG_PALCODE(npte)) 1840 tflush = false; 1841 1842 /* 1843 * Set the new PTE. 1844 */ 1845 PMAP_SET_PTE(pte, npte); 1846 1847 /* 1848 * Invalidate the TLB entry for this VA and any appropriate 1849 * caches. 1850 */ 1851 if (tflush) { 1852 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 1853 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 1854 PMAP_TLB_SHOOTNOW(); 1855 } 1856 if (setisync) 1857 PMAP_SET_NEEDISYNC(pmap); 1858 if (needisync) 1859 PMAP_SYNC_ISTREAM(pmap); 1860 1861 out: 1862 PMAP_UNLOCK(pmap); 1863 PMAP_MAP_TO_HEAD_UNLOCK(); 1864 1865 return error; 1866 } 1867 1868 /* 1869 * pmap_kenter_pa: [ INTERFACE ] 1870 * 1871 * Enter a va -> pa mapping into the kernel pmap without any 1872 * physical->virtual tracking. 1873 * 1874 * Note: no locking is necessary in this function. 1875 */ 1876 void 1877 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1878 { 1879 pt_entry_t *pte, npte; 1880 long cpu_id = cpu_number(); 1881 bool needisync = false; 1882 pmap_t pmap = pmap_kernel(); 1883 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1884 1885 #ifdef DEBUG 1886 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1887 printf("pmap_kenter_pa(%lx, %lx, %x)\n", 1888 va, pa, prot); 1889 #endif 1890 1891 #ifdef DIAGNOSTIC 1892 /* 1893 * Sanity check the virtual address. 1894 */ 1895 if (va < VM_MIN_KERNEL_ADDRESS) 1896 panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va); 1897 #endif 1898 1899 pte = PMAP_KERNEL_PTE(va); 1900 1901 if (pmap_pte_v(pte) == 0) 1902 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1903 if (pmap_pte_w(pte) == 0) 1904 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1905 1906 if ((prot & VM_PROT_EXECUTE) != 0 || pmap_pte_exec(pte)) 1907 needisync = true; 1908 1909 /* 1910 * Build the new PTE. 1911 */ 1912 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) | 1913 PG_V | PG_WIRED; 1914 1915 /* 1916 * Set the new PTE. 1917 */ 1918 PMAP_SET_PTE(pte, npte); 1919 #if defined(MULTIPROCESSOR) 1920 alpha_mb(); /* XXX alpha_wmb()? */ 1921 #endif 1922 1923 /* 1924 * Invalidate the TLB entry for this VA and any appropriate 1925 * caches. 1926 */ 1927 PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id); 1928 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1929 PMAP_TLB_SHOOTNOW(); 1930 1931 if (needisync) 1932 PMAP_SYNC_ISTREAM_KERNEL(); 1933 } 1934 1935 /* 1936 * pmap_kremove: [ INTERFACE ] 1937 * 1938 * Remove a mapping entered with pmap_kenter_pa() starting at va, 1939 * for size bytes (assumed to be page rounded). 1940 */ 1941 void 1942 pmap_kremove(vaddr_t va, vsize_t size) 1943 { 1944 pt_entry_t *pte; 1945 bool needisync = false; 1946 long cpu_id = cpu_number(); 1947 pmap_t pmap = pmap_kernel(); 1948 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1949 1950 #ifdef DEBUG 1951 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1952 printf("pmap_kremove(%lx, %lx)\n", 1953 va, size); 1954 #endif 1955 1956 #ifdef DIAGNOSTIC 1957 if (va < VM_MIN_KERNEL_ADDRESS) 1958 panic("pmap_kremove: user address"); 1959 #endif 1960 1961 for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) { 1962 pte = PMAP_KERNEL_PTE(va); 1963 if (pmap_pte_v(pte)) { 1964 #ifdef DIAGNOSTIC 1965 if (pmap_pte_pv(pte)) 1966 panic("pmap_kremove: PG_PVLIST mapping for " 1967 "0x%lx", va); 1968 #endif 1969 if (pmap_pte_exec(pte)) 1970 needisync = true; 1971 1972 /* Zap the mapping. */ 1973 PMAP_SET_PTE(pte, PG_NV); 1974 #if defined(MULTIPROCESSOR) 1975 alpha_mb(); /* XXX alpha_wmb()? */ 1976 #endif 1977 PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id); 1978 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1979 1980 /* Update stats. */ 1981 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 1982 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1983 } 1984 } 1985 1986 PMAP_TLB_SHOOTNOW(); 1987 1988 if (needisync) 1989 PMAP_SYNC_ISTREAM_KERNEL(); 1990 } 1991 1992 /* 1993 * pmap_unwire: [ INTERFACE ] 1994 * 1995 * Clear the wired attribute for a map/virtual-address pair. 1996 * 1997 * The mapping must already exist in the pmap. 1998 */ 1999 void 2000 pmap_unwire(pmap_t pmap, vaddr_t va) 2001 { 2002 pt_entry_t *pte; 2003 2004 #ifdef DEBUG 2005 if (pmapdebug & PDB_FOLLOW) 2006 printf("pmap_unwire(%p, %lx)\n", pmap, va); 2007 #endif 2008 2009 PMAP_LOCK(pmap); 2010 2011 pte = pmap_l3pte(pmap, va, NULL); 2012 #ifdef DIAGNOSTIC 2013 if (pte == NULL || pmap_pte_v(pte) == 0) 2014 panic("pmap_unwire"); 2015 #endif 2016 2017 /* 2018 * If wiring actually changed (always?) clear the wire bit and 2019 * update the wire count. Note that wiring is not a hardware 2020 * characteristic so there is no need to invalidate the TLB. 2021 */ 2022 if (pmap_pte_w_chg(pte, 0)) { 2023 pmap_pte_set_w(pte, false); 2024 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2025 } 2026 #ifdef DIAGNOSTIC 2027 else { 2028 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 2029 "didn't change!\n", pmap, va); 2030 } 2031 #endif 2032 2033 PMAP_UNLOCK(pmap); 2034 } 2035 2036 /* 2037 * pmap_extract: [ INTERFACE ] 2038 * 2039 * Extract the physical address associated with the given 2040 * pmap/virtual address pair. 2041 */ 2042 bool 2043 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap) 2044 { 2045 pt_entry_t *l1pte, *l2pte, *l3pte; 2046 paddr_t pa; 2047 2048 #ifdef DEBUG 2049 if (pmapdebug & PDB_FOLLOW) 2050 printf("pmap_extract(%p, %lx) -> ", pmap, va); 2051 #endif 2052 2053 /* 2054 * Take a faster path for the kernel pmap. Avoids locking, 2055 * handles K0SEG. 2056 */ 2057 if (pmap == pmap_kernel()) { 2058 pa = vtophys(va); 2059 if (pap != NULL) 2060 *pap = pa; 2061 #ifdef DEBUG 2062 if (pmapdebug & PDB_FOLLOW) 2063 printf("0x%lx (kernel vtophys)\n", pa); 2064 #endif 2065 return (pa != 0); /* XXX */ 2066 } 2067 2068 PMAP_LOCK(pmap); 2069 2070 l1pte = pmap_l1pte(pmap, va); 2071 if (pmap_pte_v(l1pte) == 0) 2072 goto out; 2073 2074 l2pte = pmap_l2pte(pmap, va, l1pte); 2075 if (pmap_pte_v(l2pte) == 0) 2076 goto out; 2077 2078 l3pte = pmap_l3pte(pmap, va, l2pte); 2079 if (pmap_pte_v(l3pte) == 0) 2080 goto out; 2081 2082 pa = pmap_pte_pa(l3pte) | (va & PGOFSET); 2083 PMAP_UNLOCK(pmap); 2084 if (pap != NULL) 2085 *pap = pa; 2086 #ifdef DEBUG 2087 if (pmapdebug & PDB_FOLLOW) 2088 printf("0x%lx\n", pa); 2089 #endif 2090 return (true); 2091 2092 out: 2093 PMAP_UNLOCK(pmap); 2094 #ifdef DEBUG 2095 if (pmapdebug & PDB_FOLLOW) 2096 printf("failed\n"); 2097 #endif 2098 return (false); 2099 } 2100 2101 /* 2102 * pmap_copy: [ INTERFACE ] 2103 * 2104 * Copy the mapping range specified by src_addr/len 2105 * from the source map to the range dst_addr/len 2106 * in the destination map. 2107 * 2108 * This routine is only advisory and need not do anything. 2109 */ 2110 /* call deleted in <machine/pmap.h> */ 2111 2112 /* 2113 * pmap_update: [ INTERFACE ] 2114 * 2115 * Require that all active physical maps contain no 2116 * incorrect entries NOW, by processing any deferred 2117 * pmap operations. 2118 */ 2119 /* call deleted in <machine/pmap.h> */ 2120 2121 /* 2122 * pmap_activate: [ INTERFACE ] 2123 * 2124 * Activate the pmap used by the specified process. This includes 2125 * reloading the MMU context if the current process, and marking 2126 * the pmap in use by the processor. 2127 */ 2128 void 2129 pmap_activate(struct lwp *l) 2130 { 2131 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2132 long cpu_id = cpu_number(); 2133 2134 #ifdef DEBUG 2135 if (pmapdebug & PDB_FOLLOW) 2136 printf("pmap_activate(%p)\n", l); 2137 #endif 2138 2139 /* Mark the pmap in use by this processor. */ 2140 atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id)); 2141 2142 /* Allocate an ASN. */ 2143 pmap_asn_alloc(pmap, cpu_id); 2144 2145 PMAP_ACTIVATE(pmap, l, cpu_id); 2146 } 2147 2148 /* 2149 * pmap_deactivate: [ INTERFACE ] 2150 * 2151 * Mark that the pmap used by the specified process is no longer 2152 * in use by the processor. 2153 * 2154 * The comment above pmap_activate() wrt. locking applies here, 2155 * as well. Note that we use only a single `atomic' operation, 2156 * so no locking is necessary. 2157 */ 2158 void 2159 pmap_deactivate(struct lwp *l) 2160 { 2161 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2162 2163 #ifdef DEBUG 2164 if (pmapdebug & PDB_FOLLOW) 2165 printf("pmap_deactivate(%p)\n", l); 2166 #endif 2167 2168 /* 2169 * Mark the pmap no longer in use by this processor. 2170 */ 2171 atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number())); 2172 } 2173 2174 /* 2175 * pmap_zero_page: [ INTERFACE ] 2176 * 2177 * Zero the specified (machine independent) page by mapping the page 2178 * into virtual memory and clear its contents, one machine dependent 2179 * page at a time. 2180 * 2181 * Note: no locking is necessary in this function. 2182 */ 2183 void 2184 pmap_zero_page(paddr_t phys) 2185 { 2186 u_long *p0, *p1, *pend; 2187 2188 #ifdef DEBUG 2189 if (pmapdebug & PDB_FOLLOW) 2190 printf("pmap_zero_page(%lx)\n", phys); 2191 #endif 2192 2193 p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys); 2194 p1 = NULL; 2195 pend = (u_long *)((u_long)p0 + PAGE_SIZE); 2196 2197 /* 2198 * Unroll the loop a bit, doing 16 quadwords per iteration. 2199 * Do only 8 back-to-back stores, and alternate registers. 2200 */ 2201 do { 2202 __asm volatile( 2203 "# BEGIN loop body\n" 2204 " addq %2, (8 * 8), %1 \n" 2205 " stq $31, (0 * 8)(%0) \n" 2206 " stq $31, (1 * 8)(%0) \n" 2207 " stq $31, (2 * 8)(%0) \n" 2208 " stq $31, (3 * 8)(%0) \n" 2209 " stq $31, (4 * 8)(%0) \n" 2210 " stq $31, (5 * 8)(%0) \n" 2211 " stq $31, (6 * 8)(%0) \n" 2212 " stq $31, (7 * 8)(%0) \n" 2213 " \n" 2214 " addq %3, (8 * 8), %0 \n" 2215 " stq $31, (0 * 8)(%1) \n" 2216 " stq $31, (1 * 8)(%1) \n" 2217 " stq $31, (2 * 8)(%1) \n" 2218 " stq $31, (3 * 8)(%1) \n" 2219 " stq $31, (4 * 8)(%1) \n" 2220 " stq $31, (5 * 8)(%1) \n" 2221 " stq $31, (6 * 8)(%1) \n" 2222 " stq $31, (7 * 8)(%1) \n" 2223 " # END loop body" 2224 : "=r" (p0), "=r" (p1) 2225 : "0" (p0), "1" (p1) 2226 : "memory"); 2227 } while (p0 < pend); 2228 } 2229 2230 /* 2231 * pmap_copy_page: [ INTERFACE ] 2232 * 2233 * Copy the specified (machine independent) page by mapping the page 2234 * into virtual memory and using memcpy to copy the page, one machine 2235 * dependent page at a time. 2236 * 2237 * Note: no locking is necessary in this function. 2238 */ 2239 void 2240 pmap_copy_page(paddr_t src, paddr_t dst) 2241 { 2242 const void *s; 2243 void *d; 2244 2245 #ifdef DEBUG 2246 if (pmapdebug & PDB_FOLLOW) 2247 printf("pmap_copy_page(%lx, %lx)\n", src, dst); 2248 #endif 2249 s = (const void *)ALPHA_PHYS_TO_K0SEG(src); 2250 d = (void *)ALPHA_PHYS_TO_K0SEG(dst); 2251 memcpy(d, s, PAGE_SIZE); 2252 } 2253 2254 /* 2255 * pmap_pageidlezero: [ INTERFACE ] 2256 * 2257 * Page zero'er for the idle loop. Returns true if the 2258 * page was zero'd, FLASE if we aborted for some reason. 2259 */ 2260 bool 2261 pmap_pageidlezero(paddr_t pa) 2262 { 2263 u_long *ptr; 2264 int i, cnt = PAGE_SIZE / sizeof(u_long); 2265 2266 for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) { 2267 if (sched_curcpu_runnable_p()) { 2268 /* 2269 * An LWP has become ready. Abort now, 2270 * so we don't keep it waiting while we 2271 * finish zeroing the page. 2272 */ 2273 return (false); 2274 } 2275 *ptr++ = 0; 2276 } 2277 2278 return (true); 2279 } 2280 2281 /* 2282 * pmap_clear_modify: [ INTERFACE ] 2283 * 2284 * Clear the modify bits on the specified physical page. 2285 */ 2286 bool 2287 pmap_clear_modify(struct vm_page *pg) 2288 { 2289 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2290 bool rv = false; 2291 long cpu_id = cpu_number(); 2292 kmutex_t *lock; 2293 2294 #ifdef DEBUG 2295 if (pmapdebug & PDB_FOLLOW) 2296 printf("pmap_clear_modify(%p)\n", pg); 2297 #endif 2298 2299 PMAP_HEAD_TO_MAP_LOCK(); 2300 lock = pmap_pvh_lock(pg); 2301 mutex_enter(lock); 2302 2303 if (md->pvh_attrs & PGA_MODIFIED) { 2304 rv = true; 2305 pmap_changebit(pg, PG_FOW, ~0, cpu_id); 2306 md->pvh_attrs &= ~PGA_MODIFIED; 2307 } 2308 2309 mutex_exit(lock); 2310 PMAP_HEAD_TO_MAP_UNLOCK(); 2311 2312 return (rv); 2313 } 2314 2315 /* 2316 * pmap_clear_reference: [ INTERFACE ] 2317 * 2318 * Clear the reference bit on the specified physical page. 2319 */ 2320 bool 2321 pmap_clear_reference(struct vm_page *pg) 2322 { 2323 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2324 bool rv = false; 2325 long cpu_id = cpu_number(); 2326 kmutex_t *lock; 2327 2328 #ifdef DEBUG 2329 if (pmapdebug & PDB_FOLLOW) 2330 printf("pmap_clear_reference(%p)\n", pg); 2331 #endif 2332 2333 PMAP_HEAD_TO_MAP_LOCK(); 2334 lock = pmap_pvh_lock(pg); 2335 mutex_enter(lock); 2336 2337 if (md->pvh_attrs & PGA_REFERENCED) { 2338 rv = true; 2339 pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id); 2340 md->pvh_attrs &= ~PGA_REFERENCED; 2341 } 2342 2343 mutex_exit(lock); 2344 PMAP_HEAD_TO_MAP_UNLOCK(); 2345 2346 return (rv); 2347 } 2348 2349 /* 2350 * pmap_is_referenced: [ INTERFACE ] 2351 * 2352 * Return whether or not the specified physical page is referenced 2353 * by any physical maps. 2354 */ 2355 /* See <machine/pmap.h> */ 2356 2357 /* 2358 * pmap_is_modified: [ INTERFACE ] 2359 * 2360 * Return whether or not the specified physical page is modified 2361 * by any physical maps. 2362 */ 2363 /* See <machine/pmap.h> */ 2364 2365 /* 2366 * pmap_phys_address: [ INTERFACE ] 2367 * 2368 * Return the physical address corresponding to the specified 2369 * cookie. Used by the device pager to decode a device driver's 2370 * mmap entry point return value. 2371 * 2372 * Note: no locking is necessary in this function. 2373 */ 2374 paddr_t 2375 pmap_phys_address(paddr_t ppn) 2376 { 2377 2378 return (alpha_ptob(ppn)); 2379 } 2380 2381 /* 2382 * Miscellaneous support routines follow 2383 */ 2384 2385 /* 2386 * alpha_protection_init: 2387 * 2388 * Initialize Alpha protection code array. 2389 * 2390 * Note: no locking is necessary in this function. 2391 */ 2392 static void 2393 alpha_protection_init(void) 2394 { 2395 int prot, *kp, *up; 2396 2397 kp = protection_codes[0]; 2398 up = protection_codes[1]; 2399 2400 for (prot = 0; prot < 8; prot++) { 2401 kp[prot] = PG_ASM; 2402 up[prot] = 0; 2403 2404 if (prot & VM_PROT_READ) { 2405 kp[prot] |= PG_KRE; 2406 up[prot] |= PG_KRE | PG_URE; 2407 } 2408 if (prot & VM_PROT_WRITE) { 2409 kp[prot] |= PG_KWE; 2410 up[prot] |= PG_KWE | PG_UWE; 2411 } 2412 if (prot & VM_PROT_EXECUTE) { 2413 kp[prot] |= PG_EXEC | PG_KRE; 2414 up[prot] |= PG_EXEC | PG_KRE | PG_URE; 2415 } else { 2416 kp[prot] |= PG_FOE; 2417 up[prot] |= PG_FOE; 2418 } 2419 } 2420 } 2421 2422 /* 2423 * pmap_remove_mapping: 2424 * 2425 * Invalidate a single page denoted by pmap/va. 2426 * 2427 * If (pte != NULL), it is the already computed PTE for the page. 2428 * 2429 * Note: locking in this function is complicated by the fact 2430 * that we can be called when the PV list is already locked. 2431 * (pmap_page_protect()). In this case, the caller must be 2432 * careful to get the next PV entry while we remove this entry 2433 * from beneath it. We assume that the pmap itself is already 2434 * locked; dolock applies only to the PV list. 2435 * 2436 * Returns true or false, indicating if an I-stream sync needs 2437 * to be initiated (for this CPU or for other CPUs). 2438 */ 2439 static bool 2440 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte, 2441 bool dolock, long cpu_id) 2442 { 2443 paddr_t pa; 2444 struct vm_page *pg; /* if != NULL, page is managed */ 2445 bool onpv; 2446 bool hadasm; 2447 bool isactive; 2448 bool needisync = false; 2449 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2450 2451 #ifdef DEBUG 2452 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 2453 printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n", 2454 pmap, va, pte, dolock, cpu_id); 2455 #endif 2456 2457 /* 2458 * PTE not provided, compute it from pmap and va. 2459 */ 2460 if (pte == NULL) { 2461 pte = pmap_l3pte(pmap, va, NULL); 2462 if (pmap_pte_v(pte) == 0) 2463 return (false); 2464 } 2465 2466 pa = pmap_pte_pa(pte); 2467 onpv = (pmap_pte_pv(pte) != 0); 2468 hadasm = (pmap_pte_asm(pte) != 0); 2469 isactive = PMAP_ISACTIVE(pmap, cpu_id); 2470 2471 /* 2472 * Determine what we need to do about the I-stream. If 2473 * PG_EXEC was set, we mark a user pmap as needing an 2474 * I-sync on the way out to userspace. We always need 2475 * an immediate I-sync for the kernel pmap. 2476 */ 2477 if (pmap_pte_exec(pte)) { 2478 if (pmap == pmap_kernel()) 2479 needisync = true; 2480 else { 2481 PMAP_SET_NEEDISYNC(pmap); 2482 needisync = (pmap->pm_cpus != 0); 2483 } 2484 } 2485 2486 /* 2487 * Update statistics 2488 */ 2489 if (pmap_pte_w(pte)) 2490 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2491 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 2492 2493 /* 2494 * Invalidate the PTE after saving the reference modify info. 2495 */ 2496 #ifdef DEBUG 2497 if (pmapdebug & PDB_REMOVE) 2498 printf("remove: invalidating pte at %p\n", pte); 2499 #endif 2500 PMAP_SET_PTE(pte, PG_NV); 2501 2502 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 2503 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 2504 PMAP_TLB_SHOOTNOW(); 2505 2506 /* 2507 * If we're removing a user mapping, check to see if we 2508 * can free page table pages. 2509 */ 2510 if (pmap != pmap_kernel()) { 2511 /* 2512 * Delete the reference on the level 3 table. It will 2513 * delete references on the level 2 and 1 tables as 2514 * appropriate. 2515 */ 2516 pmap_l3pt_delref(pmap, va, pte, cpu_id); 2517 } 2518 2519 /* 2520 * If the mapping wasn't entered on the PV list, we're all done. 2521 */ 2522 if (onpv == false) 2523 return (needisync); 2524 2525 /* 2526 * Remove it from the PV table. 2527 */ 2528 pg = PHYS_TO_VM_PAGE(pa); 2529 KASSERT(pg != NULL); 2530 pmap_pv_remove(pmap, pg, va, dolock); 2531 2532 return (needisync); 2533 } 2534 2535 /* 2536 * pmap_changebit: 2537 * 2538 * Set or clear the specified PTE bits for all mappings on the 2539 * specified page. 2540 * 2541 * Note: we assume that the pv_head is already locked, and that 2542 * the caller has acquired a PV->pmap mutex so that we can lock 2543 * the pmaps as we encounter them. 2544 */ 2545 static void 2546 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, long cpu_id) 2547 { 2548 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2549 pv_entry_t pv; 2550 pt_entry_t *pte, npte; 2551 vaddr_t va; 2552 bool hadasm, isactive; 2553 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2554 2555 #ifdef DEBUG 2556 if (pmapdebug & PDB_BITS) 2557 printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n", 2558 pg, set, mask); 2559 #endif 2560 2561 /* 2562 * Loop over all current mappings setting/clearing as apropos. 2563 */ 2564 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 2565 va = pv->pv_va; 2566 2567 PMAP_LOCK(pv->pv_pmap); 2568 2569 pte = pv->pv_pte; 2570 npte = (*pte | set) & mask; 2571 if (*pte != npte) { 2572 hadasm = (pmap_pte_asm(pte) != 0); 2573 isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id); 2574 PMAP_SET_PTE(pte, npte); 2575 PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive, 2576 cpu_id); 2577 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va, 2578 hadasm ? PG_ASM : 0); 2579 } 2580 PMAP_UNLOCK(pv->pv_pmap); 2581 } 2582 2583 PMAP_TLB_SHOOTNOW(); 2584 } 2585 2586 /* 2587 * pmap_emulate_reference: 2588 * 2589 * Emulate reference and/or modified bit hits. 2590 * Return 1 if this was an execute fault on a non-exec mapping, 2591 * otherwise return 0. 2592 */ 2593 int 2594 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type) 2595 { 2596 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2597 pt_entry_t faultoff, *pte; 2598 struct vm_page *pg; 2599 paddr_t pa; 2600 bool didlock = false; 2601 bool exec = false; 2602 long cpu_id = cpu_number(); 2603 kmutex_t *lock; 2604 2605 #ifdef DEBUG 2606 if (pmapdebug & PDB_FOLLOW) 2607 printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n", 2608 l, v, user, type); 2609 #endif 2610 2611 /* 2612 * Convert process and virtual address to physical address. 2613 */ 2614 if (v >= VM_MIN_KERNEL_ADDRESS) { 2615 if (user) 2616 panic("pmap_emulate_reference: user ref to kernel"); 2617 /* 2618 * No need to lock here; kernel PT pages never go away. 2619 */ 2620 pte = PMAP_KERNEL_PTE(v); 2621 } else { 2622 #ifdef DIAGNOSTIC 2623 if (l == NULL) 2624 panic("pmap_emulate_reference: bad proc"); 2625 if (l->l_proc->p_vmspace == NULL) 2626 panic("pmap_emulate_reference: bad p_vmspace"); 2627 #endif 2628 PMAP_LOCK(pmap); 2629 didlock = true; 2630 pte = pmap_l3pte(pmap, v, NULL); 2631 /* 2632 * We'll unlock below where we're done with the PTE. 2633 */ 2634 } 2635 exec = pmap_pte_exec(pte); 2636 if (!exec && type == ALPHA_MMCSR_FOE) { 2637 if (didlock) 2638 PMAP_UNLOCK(pmap); 2639 return (1); 2640 } 2641 #ifdef DEBUG 2642 if (pmapdebug & PDB_FOLLOW) { 2643 printf("\tpte = %p, ", pte); 2644 printf("*pte = 0x%lx\n", *pte); 2645 } 2646 #endif 2647 #ifdef DEBUG /* These checks are more expensive */ 2648 if (!pmap_pte_v(pte)) 2649 panic("pmap_emulate_reference: invalid pte"); 2650 if (type == ALPHA_MMCSR_FOW) { 2651 if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE))) 2652 panic("pmap_emulate_reference: write but unwritable"); 2653 if (!(*pte & PG_FOW)) 2654 panic("pmap_emulate_reference: write but not FOW"); 2655 } else { 2656 if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE))) 2657 panic("pmap_emulate_reference: !write but unreadable"); 2658 if (!(*pte & (PG_FOR | PG_FOE))) 2659 panic("pmap_emulate_reference: !write but not FOR|FOE"); 2660 } 2661 /* Other diagnostics? */ 2662 #endif 2663 pa = pmap_pte_pa(pte); 2664 2665 /* 2666 * We're now done with the PTE. If it was a user pmap, unlock 2667 * it now. 2668 */ 2669 if (didlock) 2670 PMAP_UNLOCK(pmap); 2671 2672 #ifdef DEBUG 2673 if (pmapdebug & PDB_FOLLOW) 2674 printf("\tpa = 0x%lx\n", pa); 2675 #endif 2676 #ifdef DIAGNOSTIC 2677 if (!uvm_pageismanaged(pa)) 2678 panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): " 2679 "pa 0x%lx not managed", l, v, user, type, pa); 2680 #endif 2681 2682 /* 2683 * Twiddle the appropriate bits to reflect the reference 2684 * and/or modification.. 2685 * 2686 * The rules: 2687 * (1) always mark page as used, and 2688 * (2) if it was a write fault, mark page as modified. 2689 */ 2690 pg = PHYS_TO_VM_PAGE(pa); 2691 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2692 2693 PMAP_HEAD_TO_MAP_LOCK(); 2694 lock = pmap_pvh_lock(pg); 2695 mutex_enter(lock); 2696 2697 if (type == ALPHA_MMCSR_FOW) { 2698 md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 2699 faultoff = PG_FOR | PG_FOW; 2700 } else { 2701 md->pvh_attrs |= PGA_REFERENCED; 2702 faultoff = PG_FOR; 2703 if (exec) { 2704 faultoff |= PG_FOE; 2705 } 2706 } 2707 pmap_changebit(pg, 0, ~faultoff, cpu_id); 2708 2709 mutex_exit(lock); 2710 PMAP_HEAD_TO_MAP_UNLOCK(); 2711 return (0); 2712 } 2713 2714 #ifdef DEBUG 2715 /* 2716 * pmap_pv_dump: 2717 * 2718 * Dump the physical->virtual data for the specified page. 2719 */ 2720 void 2721 pmap_pv_dump(paddr_t pa) 2722 { 2723 struct vm_page *pg; 2724 struct vm_page_md *md; 2725 pv_entry_t pv; 2726 kmutex_t *lock; 2727 2728 pg = PHYS_TO_VM_PAGE(pa); 2729 md = VM_PAGE_TO_MD(pg); 2730 2731 lock = pmap_pvh_lock(pg); 2732 mutex_enter(lock); 2733 2734 printf("pa 0x%lx (attrs = 0x%x):\n", pa, md->pvh_attrs); 2735 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) 2736 printf(" pmap %p, va 0x%lx\n", 2737 pv->pv_pmap, pv->pv_va); 2738 printf("\n"); 2739 2740 mutex_exit(lock); 2741 } 2742 #endif 2743 2744 /* 2745 * vtophys: 2746 * 2747 * Return the physical address corresponding to the K0SEG or 2748 * K1SEG address provided. 2749 * 2750 * Note: no locking is necessary in this function. 2751 */ 2752 paddr_t 2753 vtophys(vaddr_t vaddr) 2754 { 2755 pt_entry_t *pte; 2756 paddr_t paddr = 0; 2757 2758 if (vaddr < ALPHA_K0SEG_BASE) 2759 printf("vtophys: invalid vaddr 0x%lx", vaddr); 2760 else if (vaddr <= ALPHA_K0SEG_END) 2761 paddr = ALPHA_K0SEG_TO_PHYS(vaddr); 2762 else { 2763 pte = PMAP_KERNEL_PTE(vaddr); 2764 if (pmap_pte_v(pte)) 2765 paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET); 2766 } 2767 2768 #if 0 2769 printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr); 2770 #endif 2771 2772 return (paddr); 2773 } 2774 2775 /******************** pv_entry management ********************/ 2776 2777 /* 2778 * pmap_pv_enter: 2779 * 2780 * Add a physical->virtual entry to the pv_table. 2781 */ 2782 static int 2783 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte, 2784 bool dolock) 2785 { 2786 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2787 pv_entry_t newpv; 2788 kmutex_t *lock; 2789 2790 /* 2791 * Allocate and fill in the new pv_entry. 2792 */ 2793 newpv = pmap_pv_alloc(); 2794 if (newpv == NULL) 2795 return ENOMEM; 2796 newpv->pv_va = va; 2797 newpv->pv_pmap = pmap; 2798 newpv->pv_pte = pte; 2799 2800 if (dolock) { 2801 lock = pmap_pvh_lock(pg); 2802 mutex_enter(lock); 2803 } 2804 2805 #ifdef DEBUG 2806 { 2807 pv_entry_t pv; 2808 /* 2809 * Make sure the entry doesn't already exist. 2810 */ 2811 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 2812 if (pmap == pv->pv_pmap && va == pv->pv_va) { 2813 printf("pmap = %p, va = 0x%lx\n", pmap, va); 2814 panic("pmap_pv_enter: already in pv table"); 2815 } 2816 } 2817 } 2818 #endif 2819 2820 /* 2821 * ...and put it in the list. 2822 */ 2823 newpv->pv_next = md->pvh_list; 2824 md->pvh_list = newpv; 2825 2826 if (dolock) { 2827 mutex_exit(lock); 2828 } 2829 2830 return 0; 2831 } 2832 2833 /* 2834 * pmap_pv_remove: 2835 * 2836 * Remove a physical->virtual entry from the pv_table. 2837 */ 2838 static void 2839 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock) 2840 { 2841 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2842 pv_entry_t pv, *pvp; 2843 kmutex_t *lock; 2844 2845 if (dolock) { 2846 lock = pmap_pvh_lock(pg); 2847 mutex_enter(lock); 2848 } else { 2849 lock = NULL; /* XXX stupid gcc */ 2850 } 2851 2852 /* 2853 * Find the entry to remove. 2854 */ 2855 for (pvp = &md->pvh_list, pv = *pvp; 2856 pv != NULL; pvp = &pv->pv_next, pv = *pvp) 2857 if (pmap == pv->pv_pmap && va == pv->pv_va) 2858 break; 2859 2860 #ifdef DEBUG 2861 if (pv == NULL) 2862 panic("pmap_pv_remove: not in pv table"); 2863 #endif 2864 2865 *pvp = pv->pv_next; 2866 2867 if (dolock) { 2868 mutex_exit(lock); 2869 } 2870 2871 pmap_pv_free(pv); 2872 } 2873 2874 /* 2875 * pmap_pv_page_alloc: 2876 * 2877 * Allocate a page for the pv_entry pool. 2878 */ 2879 static void * 2880 pmap_pv_page_alloc(struct pool *pp, int flags) 2881 { 2882 paddr_t pg; 2883 2884 if (pmap_physpage_alloc(PGU_PVENT, &pg)) 2885 return ((void *)ALPHA_PHYS_TO_K0SEG(pg)); 2886 return (NULL); 2887 } 2888 2889 /* 2890 * pmap_pv_page_free: 2891 * 2892 * Free a pv_entry pool page. 2893 */ 2894 static void 2895 pmap_pv_page_free(struct pool *pp, void *v) 2896 { 2897 2898 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v)); 2899 } 2900 2901 /******************** misc. functions ********************/ 2902 2903 /* 2904 * pmap_physpage_alloc: 2905 * 2906 * Allocate a single page from the VM system and return the 2907 * physical address for that page. 2908 */ 2909 static bool 2910 pmap_physpage_alloc(int usage, paddr_t *pap) 2911 { 2912 struct vm_page *pg; 2913 paddr_t pa; 2914 2915 /* 2916 * Don't ask for a zero'd page in the L1PT case -- we will 2917 * properly initialize it in the constructor. 2918 */ 2919 2920 pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ? 2921 UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO); 2922 if (pg != NULL) { 2923 pa = VM_PAGE_TO_PHYS(pg); 2924 #ifdef DEBUG 2925 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2926 if (md->pvh_refcnt != 0) { 2927 printf("pmap_physpage_alloc: page 0x%lx has " 2928 "%d references\n", pa, md->pvh_refcnt); 2929 panic("pmap_physpage_alloc"); 2930 } 2931 #endif 2932 *pap = pa; 2933 return (true); 2934 } 2935 return (false); 2936 } 2937 2938 /* 2939 * pmap_physpage_free: 2940 * 2941 * Free the single page table page at the specified physical address. 2942 */ 2943 static void 2944 pmap_physpage_free(paddr_t pa) 2945 { 2946 struct vm_page *pg; 2947 2948 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) 2949 panic("pmap_physpage_free: bogus physical page address"); 2950 2951 #ifdef DEBUG 2952 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2953 if (md->pvh_refcnt != 0) 2954 panic("pmap_physpage_free: page still has references"); 2955 #endif 2956 2957 uvm_pagefree(pg); 2958 } 2959 2960 /* 2961 * pmap_physpage_addref: 2962 * 2963 * Add a reference to the specified special use page. 2964 */ 2965 static int 2966 pmap_physpage_addref(void *kva) 2967 { 2968 struct vm_page *pg; 2969 struct vm_page_md *md; 2970 paddr_t pa; 2971 2972 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2973 pg = PHYS_TO_VM_PAGE(pa); 2974 md = VM_PAGE_TO_MD(pg); 2975 2976 KASSERT((int)md->pvh_refcnt >= 0); 2977 2978 return atomic_inc_uint_nv(&md->pvh_refcnt); 2979 } 2980 2981 /* 2982 * pmap_physpage_delref: 2983 * 2984 * Delete a reference to the specified special use page. 2985 */ 2986 static int 2987 pmap_physpage_delref(void *kva) 2988 { 2989 struct vm_page *pg; 2990 struct vm_page_md *md; 2991 paddr_t pa; 2992 2993 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2994 pg = PHYS_TO_VM_PAGE(pa); 2995 md = VM_PAGE_TO_MD(pg); 2996 2997 KASSERT((int)md->pvh_refcnt > 0); 2998 2999 return atomic_dec_uint_nv(&md->pvh_refcnt); 3000 } 3001 3002 /******************** page table page management ********************/ 3003 3004 /* 3005 * pmap_growkernel: [ INTERFACE ] 3006 * 3007 * Grow the kernel address space. This is a hint from the 3008 * upper layer to pre-allocate more kernel PT pages. 3009 */ 3010 vaddr_t 3011 pmap_growkernel(vaddr_t maxkvaddr) 3012 { 3013 struct pmap *kpm = pmap_kernel(), *pm; 3014 paddr_t ptaddr; 3015 pt_entry_t *l1pte, *l2pte, pte; 3016 vaddr_t va; 3017 int l1idx; 3018 3019 rw_enter(&pmap_growkernel_lock, RW_WRITER); 3020 3021 if (maxkvaddr <= virtual_end) 3022 goto out; /* we are OK */ 3023 3024 va = virtual_end; 3025 3026 while (va < maxkvaddr) { 3027 /* 3028 * If there is no valid L1 PTE (i.e. no L2 PT page), 3029 * allocate a new L2 PT page and insert it into the 3030 * L1 map. 3031 */ 3032 l1pte = pmap_l1pte(kpm, va); 3033 if (pmap_pte_v(l1pte) == 0) { 3034 /* 3035 * XXX PGU_NORMAL? It's not a "traditional" PT page. 3036 */ 3037 if (uvm.page_init_done == false) { 3038 /* 3039 * We're growing the kernel pmap early (from 3040 * uvm_pageboot_alloc()). This case must 3041 * be handled a little differently. 3042 */ 3043 ptaddr = ALPHA_K0SEG_TO_PHYS( 3044 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3045 } else if (pmap_physpage_alloc(PGU_NORMAL, 3046 &ptaddr) == false) 3047 goto die; 3048 pte = (atop(ptaddr) << PG_SHIFT) | 3049 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3050 *l1pte = pte; 3051 3052 l1idx = l1pte_index(va); 3053 3054 /* Update all the user pmaps. */ 3055 mutex_enter(&pmap_all_pmaps_lock); 3056 for (pm = TAILQ_FIRST(&pmap_all_pmaps); 3057 pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) { 3058 /* Skip the kernel pmap. */ 3059 if (pm == pmap_kernel()) 3060 continue; 3061 3062 PMAP_LOCK(pm); 3063 if (pm->pm_lev1map == kernel_lev1map) { 3064 PMAP_UNLOCK(pm); 3065 continue; 3066 } 3067 pm->pm_lev1map[l1idx] = pte; 3068 PMAP_UNLOCK(pm); 3069 } 3070 mutex_exit(&pmap_all_pmaps_lock); 3071 } 3072 3073 /* 3074 * Have an L2 PT page now, add the L3 PT page. 3075 */ 3076 l2pte = pmap_l2pte(kpm, va, l1pte); 3077 KASSERT(pmap_pte_v(l2pte) == 0); 3078 if (uvm.page_init_done == false) { 3079 /* 3080 * See above. 3081 */ 3082 ptaddr = ALPHA_K0SEG_TO_PHYS( 3083 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3084 } else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == false) 3085 goto die; 3086 *l2pte = (atop(ptaddr) << PG_SHIFT) | 3087 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3088 va += ALPHA_L2SEG_SIZE; 3089 } 3090 3091 /* Invalidate the L1 PT cache. */ 3092 pool_cache_invalidate(&pmap_l1pt_cache); 3093 3094 virtual_end = va; 3095 3096 out: 3097 rw_exit(&pmap_growkernel_lock); 3098 3099 return (virtual_end); 3100 3101 die: 3102 panic("pmap_growkernel: out of memory"); 3103 } 3104 3105 /* 3106 * pmap_lev1map_create: 3107 * 3108 * Create a new level 1 page table for the specified pmap. 3109 * 3110 * Note: growkernel must already be held and the pmap either 3111 * already locked or unreferenced globally. 3112 */ 3113 static int 3114 pmap_lev1map_create(pmap_t pmap, long cpu_id) 3115 { 3116 pt_entry_t *l1pt; 3117 3118 KASSERT(pmap != pmap_kernel()); 3119 3120 KASSERT(pmap->pm_lev1map == kernel_lev1map); 3121 KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED); 3122 3123 /* Don't sleep -- we're called with locks held. */ 3124 l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT); 3125 if (l1pt == NULL) 3126 return (ENOMEM); 3127 3128 pmap->pm_lev1map = l1pt; 3129 return (0); 3130 } 3131 3132 /* 3133 * pmap_lev1map_destroy: 3134 * 3135 * Destroy the level 1 page table for the specified pmap. 3136 * 3137 * Note: growkernel must be held and the pmap must already be 3138 * locked or not globally referenced. 3139 */ 3140 static void 3141 pmap_lev1map_destroy(pmap_t pmap, long cpu_id) 3142 { 3143 pt_entry_t *l1pt = pmap->pm_lev1map; 3144 3145 KASSERT(pmap != pmap_kernel()); 3146 3147 /* 3148 * Go back to referencing the global kernel_lev1map. 3149 */ 3150 pmap->pm_lev1map = kernel_lev1map; 3151 3152 /* 3153 * Free the old level 1 page table page. 3154 */ 3155 pool_cache_put(&pmap_l1pt_cache, l1pt); 3156 } 3157 3158 /* 3159 * pmap_l1pt_ctor: 3160 * 3161 * Pool cache constructor for L1 PT pages. 3162 * 3163 * Note: The growkernel lock is held across allocations 3164 * from our pool_cache, so we don't need to acquire it 3165 * ourselves. 3166 */ 3167 static int 3168 pmap_l1pt_ctor(void *arg, void *object, int flags) 3169 { 3170 pt_entry_t *l1pt = object, pte; 3171 int i; 3172 3173 /* 3174 * Initialize the new level 1 table by zeroing the 3175 * user portion and copying the kernel mappings into 3176 * the kernel portion. 3177 */ 3178 for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++) 3179 l1pt[i] = 0; 3180 3181 for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS); 3182 i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++) 3183 l1pt[i] = kernel_lev1map[i]; 3184 3185 /* 3186 * Now, map the new virtual page table. NOTE: NO ASM! 3187 */ 3188 pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) | 3189 PG_V | PG_KRE | PG_KWE; 3190 l1pt[l1pte_index(VPTBASE)] = pte; 3191 3192 return (0); 3193 } 3194 3195 /* 3196 * pmap_l1pt_alloc: 3197 * 3198 * Page alloctaor for L1 PT pages. 3199 */ 3200 static void * 3201 pmap_l1pt_alloc(struct pool *pp, int flags) 3202 { 3203 paddr_t ptpa; 3204 3205 /* 3206 * Attempt to allocate a free page. 3207 */ 3208 if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == false) 3209 return (NULL); 3210 3211 return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa)); 3212 } 3213 3214 /* 3215 * pmap_l1pt_free: 3216 * 3217 * Page freer for L1 PT pages. 3218 */ 3219 static void 3220 pmap_l1pt_free(struct pool *pp, void *v) 3221 { 3222 3223 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v)); 3224 } 3225 3226 /* 3227 * pmap_ptpage_alloc: 3228 * 3229 * Allocate a level 2 or level 3 page table page, and 3230 * initialize the PTE that references it. 3231 * 3232 * Note: the pmap must already be locked. 3233 */ 3234 static int 3235 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage) 3236 { 3237 paddr_t ptpa; 3238 3239 /* 3240 * Allocate the page table page. 3241 */ 3242 if (pmap_physpage_alloc(usage, &ptpa) == false) 3243 return (ENOMEM); 3244 3245 /* 3246 * Initialize the referencing PTE. 3247 */ 3248 PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) | 3249 PG_V | PG_KRE | PG_KWE | PG_WIRED | 3250 (pmap == pmap_kernel() ? PG_ASM : 0)); 3251 3252 return (0); 3253 } 3254 3255 /* 3256 * pmap_ptpage_free: 3257 * 3258 * Free the level 2 or level 3 page table page referenced 3259 * be the provided PTE. 3260 * 3261 * Note: the pmap must already be locked. 3262 */ 3263 static void 3264 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte) 3265 { 3266 paddr_t ptpa; 3267 3268 /* 3269 * Extract the physical address of the page from the PTE 3270 * and clear the entry. 3271 */ 3272 ptpa = pmap_pte_pa(pte); 3273 PMAP_SET_PTE(pte, PG_NV); 3274 3275 #ifdef DEBUG 3276 pmap_zero_page(ptpa); 3277 #endif 3278 pmap_physpage_free(ptpa); 3279 } 3280 3281 /* 3282 * pmap_l3pt_delref: 3283 * 3284 * Delete a reference on a level 3 PT page. If the reference drops 3285 * to zero, free it. 3286 * 3287 * Note: the pmap must already be locked. 3288 */ 3289 static void 3290 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, long cpu_id) 3291 { 3292 pt_entry_t *l1pte, *l2pte; 3293 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 3294 3295 l1pte = pmap_l1pte(pmap, va); 3296 l2pte = pmap_l2pte(pmap, va, l1pte); 3297 3298 #ifdef DIAGNOSTIC 3299 if (pmap == pmap_kernel()) 3300 panic("pmap_l3pt_delref: kernel pmap"); 3301 #endif 3302 3303 if (pmap_physpage_delref(l3pte) == 0) { 3304 /* 3305 * No more mappings; we can free the level 3 table. 3306 */ 3307 #ifdef DEBUG 3308 if (pmapdebug & PDB_PTPAGE) 3309 printf("pmap_l3pt_delref: freeing level 3 table at " 3310 "0x%lx\n", pmap_pte_pa(l2pte)); 3311 #endif 3312 pmap_ptpage_free(pmap, l2pte); 3313 3314 /* 3315 * We've freed a level 3 table, so we must 3316 * invalidate the TLB entry for that PT page 3317 * in the Virtual Page Table VA range, because 3318 * otherwise the PALcode will service a TLB 3319 * miss using the stale VPT TLB entry it entered 3320 * behind our back to shortcut to the VA's PTE. 3321 */ 3322 PMAP_INVALIDATE_TLB(pmap, 3323 (vaddr_t)(&VPT[VPT_INDEX(va)]), false, 3324 PMAP_ISACTIVE(pmap, cpu_id), cpu_id); 3325 PMAP_TLB_SHOOTDOWN(pmap, 3326 (vaddr_t)(&VPT[VPT_INDEX(va)]), 0); 3327 PMAP_TLB_SHOOTNOW(); 3328 3329 /* 3330 * We've freed a level 3 table, so delete the reference 3331 * on the level 2 table. 3332 */ 3333 pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id); 3334 } 3335 } 3336 3337 /* 3338 * pmap_l2pt_delref: 3339 * 3340 * Delete a reference on a level 2 PT page. If the reference drops 3341 * to zero, free it. 3342 * 3343 * Note: the pmap must already be locked. 3344 */ 3345 static void 3346 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte, 3347 long cpu_id) 3348 { 3349 3350 #ifdef DIAGNOSTIC 3351 if (pmap == pmap_kernel()) 3352 panic("pmap_l2pt_delref: kernel pmap"); 3353 #endif 3354 3355 if (pmap_physpage_delref(l2pte) == 0) { 3356 /* 3357 * No more mappings in this segment; we can free the 3358 * level 2 table. 3359 */ 3360 #ifdef DEBUG 3361 if (pmapdebug & PDB_PTPAGE) 3362 printf("pmap_l2pt_delref: freeing level 2 table at " 3363 "0x%lx\n", pmap_pte_pa(l1pte)); 3364 #endif 3365 pmap_ptpage_free(pmap, l1pte); 3366 3367 /* 3368 * We've freed a level 2 table, so delete the reference 3369 * on the level 1 table. 3370 */ 3371 pmap_l1pt_delref(pmap, l1pte, cpu_id); 3372 } 3373 } 3374 3375 /* 3376 * pmap_l1pt_delref: 3377 * 3378 * Delete a reference on a level 1 PT page. If the reference drops 3379 * to zero, free it. 3380 * 3381 * Note: the pmap must already be locked. 3382 */ 3383 static void 3384 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id) 3385 { 3386 3387 #ifdef DIAGNOSTIC 3388 if (pmap == pmap_kernel()) 3389 panic("pmap_l1pt_delref: kernel pmap"); 3390 #endif 3391 3392 (void)pmap_physpage_delref(l1pte); 3393 } 3394 3395 /******************** Address Space Number management ********************/ 3396 3397 /* 3398 * pmap_asn_alloc: 3399 * 3400 * Allocate and assign an ASN to the specified pmap. 3401 * 3402 * Note: the pmap must already be locked. This may be called from 3403 * an interprocessor interrupt, and in that case, the sender of 3404 * the IPI has the pmap lock. 3405 */ 3406 static void 3407 pmap_asn_alloc(pmap_t pmap, long cpu_id) 3408 { 3409 struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id]; 3410 struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id]; 3411 3412 #ifdef DEBUG 3413 if (pmapdebug & (PDB_FOLLOW|PDB_ASN)) 3414 printf("pmap_asn_alloc(%p)\n", pmap); 3415 #endif 3416 3417 /* 3418 * If the pmap is still using the global kernel_lev1map, there 3419 * is no need to assign an ASN at this time, because only 3420 * kernel mappings exist in that map, and all kernel mappings 3421 * have PG_ASM set. If the pmap eventually gets its own 3422 * lev1map, an ASN will be allocated at that time. 3423 * 3424 * Only the kernel pmap will reference kernel_lev1map. Do the 3425 * same old fixups, but note that we no longer need the pmap 3426 * to be locked if we're in this mode, since pm_lev1map will 3427 * never change. 3428 * #endif 3429 */ 3430 if (pmap->pm_lev1map == kernel_lev1map) { 3431 #ifdef DEBUG 3432 if (pmapdebug & PDB_ASN) 3433 printf("pmap_asn_alloc: still references " 3434 "kernel_lev1map\n"); 3435 #endif 3436 #if defined(MULTIPROCESSOR) 3437 /* 3438 * In a multiprocessor system, it's possible to 3439 * get here without having PMAP_ASN_RESERVED in 3440 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy(). 3441 * 3442 * So, what we do here, is simply assign the reserved 3443 * ASN for kernel_lev1map users and let things 3444 * continue on. We do, however, let uniprocessor 3445 * configurations continue to make its assertion. 3446 */ 3447 pma->pma_asn = PMAP_ASN_RESERVED; 3448 #else 3449 KASSERT(pma->pma_asn == PMAP_ASN_RESERVED); 3450 #endif /* MULTIPROCESSOR */ 3451 return; 3452 } 3453 3454 /* 3455 * On processors which do not implement ASNs, the swpctx PALcode 3456 * operation will automatically invalidate the TLB and I-cache, 3457 * so we don't need to do that here. 3458 */ 3459 if (pmap_max_asn == 0) { 3460 /* 3461 * Refresh the pmap's generation number, to 3462 * simplify logic elsewhere. 3463 */ 3464 pma->pma_asngen = cpma->pma_asngen; 3465 #ifdef DEBUG 3466 if (pmapdebug & PDB_ASN) 3467 printf("pmap_asn_alloc: no ASNs, using asngen %lu\n", 3468 pma->pma_asngen); 3469 #endif 3470 return; 3471 } 3472 3473 /* 3474 * Hopefully, we can continue using the one we have... 3475 */ 3476 if (pma->pma_asn != PMAP_ASN_RESERVED && 3477 pma->pma_asngen == cpma->pma_asngen) { 3478 /* 3479 * ASN is still in the current generation; keep on using it. 3480 */ 3481 #ifdef DEBUG 3482 if (pmapdebug & PDB_ASN) 3483 printf("pmap_asn_alloc: same generation, keeping %u\n", 3484 pma->pma_asn); 3485 #endif 3486 return; 3487 } 3488 3489 /* 3490 * Need to assign a new ASN. Grab the next one, incrementing 3491 * the generation number if we have to. 3492 */ 3493 if (cpma->pma_asn > pmap_max_asn) { 3494 /* 3495 * Invalidate all non-PG_ASM TLB entries and the 3496 * I-cache, and bump the generation number. 3497 */ 3498 ALPHA_TBIAP(); 3499 alpha_pal_imb(); 3500 3501 cpma->pma_asn = 1; 3502 cpma->pma_asngen++; 3503 #ifdef DIAGNOSTIC 3504 if (cpma->pma_asngen == 0) { 3505 /* 3506 * The generation number has wrapped. We could 3507 * handle this scenario by traversing all of 3508 * the pmaps, and invalidating the generation 3509 * number on those which are not currently 3510 * in use by this processor. 3511 * 3512 * However... considering that we're using 3513 * an unsigned 64-bit integer for generation 3514 * numbers, on non-ASN CPUs, we won't wrap 3515 * for approx. 585 million years, or 75 billion 3516 * years on a 128-ASN CPU (assuming 1000 switch 3517 * operations per second). 3518 * 3519 * So, we don't bother. 3520 */ 3521 panic("pmap_asn_alloc: too much uptime"); 3522 } 3523 #endif 3524 #ifdef DEBUG 3525 if (pmapdebug & PDB_ASN) 3526 printf("pmap_asn_alloc: generation bumped to %lu\n", 3527 cpma->pma_asngen); 3528 #endif 3529 } 3530 3531 /* 3532 * Assign the new ASN and validate the generation number. 3533 */ 3534 pma->pma_asn = cpma->pma_asn++; 3535 pma->pma_asngen = cpma->pma_asngen; 3536 3537 #ifdef DEBUG 3538 if (pmapdebug & PDB_ASN) 3539 printf("pmap_asn_alloc: assigning %u to pmap %p\n", 3540 pma->pma_asn, pmap); 3541 #endif 3542 3543 /* 3544 * Have a new ASN, so there's no need to sync the I-stream 3545 * on the way back out to userspace. 3546 */ 3547 atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id)); 3548 } 3549 3550 #if defined(MULTIPROCESSOR) 3551 /******************** TLB shootdown code ********************/ 3552 3553 /* 3554 * pmap_tlb_shootdown: 3555 * 3556 * Cause the TLB entry for pmap/va to be shot down. 3557 * 3558 * NOTE: The pmap must be locked here. 3559 */ 3560 void 3561 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp) 3562 { 3563 struct pmap_tlb_shootdown_q *pq; 3564 struct pmap_tlb_shootdown_job *pj; 3565 struct cpu_info *ci, *self = curcpu(); 3566 u_long cpumask; 3567 CPU_INFO_ITERATOR cii; 3568 3569 KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock)); 3570 3571 cpumask = 0; 3572 3573 for (CPU_INFO_FOREACH(cii, ci)) { 3574 if (ci == self) 3575 continue; 3576 3577 /* 3578 * The pmap must be locked (unless its the kernel 3579 * pmap, in which case it is okay for it to be 3580 * unlocked), which prevents it from becoming 3581 * active on any additional processors. This makes 3582 * it safe to check for activeness. If it's not 3583 * active on the processor in question, then just 3584 * mark it as needing a new ASN the next time it 3585 * does, saving the IPI. We always have to send 3586 * the IPI for the kernel pmap. 3587 * 3588 * Note if it's marked active now, and it becomes 3589 * inactive by the time the processor receives 3590 * the IPI, that's okay, because it does the right 3591 * thing with it later. 3592 */ 3593 if (pmap != pmap_kernel() && 3594 PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) { 3595 PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid); 3596 continue; 3597 } 3598 3599 cpumask |= 1UL << ci->ci_cpuid; 3600 3601 pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; 3602 mutex_spin_enter(&pq->pq_lock); 3603 3604 /* 3605 * Allocate a job. 3606 */ 3607 if (pq->pq_count < PMAP_TLB_SHOOTDOWN_MAXJOBS) { 3608 pj = pool_cache_get(&pmap_tlb_shootdown_job_cache, 3609 PR_NOWAIT); 3610 } else { 3611 pj = NULL; 3612 } 3613 3614 /* 3615 * If a global flush is already pending, we 3616 * don't really have to do anything else. 3617 */ 3618 pq->pq_pte |= pte; 3619 if (pq->pq_tbia) { 3620 mutex_spin_exit(&pq->pq_lock); 3621 if (pj != NULL) { 3622 pool_cache_put(&pmap_tlb_shootdown_job_cache, 3623 pj); 3624 } 3625 continue; 3626 } 3627 if (pj == NULL) { 3628 /* 3629 * Couldn't allocate a job entry. Just 3630 * tell the processor to kill everything. 3631 */ 3632 pq->pq_tbia = 1; 3633 } else { 3634 pj->pj_pmap = pmap; 3635 pj->pj_va = va; 3636 pj->pj_pte = pte; 3637 pq->pq_count++; 3638 TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list); 3639 } 3640 mutex_spin_exit(&pq->pq_lock); 3641 } 3642 3643 *cpumaskp |= cpumask; 3644 } 3645 3646 /* 3647 * pmap_tlb_shootnow: 3648 * 3649 * Process the TLB shootdowns that we have been accumulating 3650 * for the specified processor set. 3651 */ 3652 void 3653 pmap_tlb_shootnow(u_long cpumask) 3654 { 3655 3656 alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN); 3657 } 3658 3659 /* 3660 * pmap_do_tlb_shootdown: 3661 * 3662 * Process pending TLB shootdown operations for this processor. 3663 */ 3664 void 3665 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep) 3666 { 3667 u_long cpu_id = ci->ci_cpuid; 3668 u_long cpu_mask = (1UL << cpu_id); 3669 struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; 3670 struct pmap_tlb_shootdown_job *pj, *next; 3671 TAILQ_HEAD(, pmap_tlb_shootdown_job) jobs; 3672 3673 TAILQ_INIT(&jobs); 3674 3675 mutex_spin_enter(&pq->pq_lock); 3676 TAILQ_CONCAT(&jobs, &pq->pq_head, pj_list); 3677 if (pq->pq_tbia) { 3678 if (pq->pq_pte & PG_ASM) 3679 ALPHA_TBIA(); 3680 else 3681 ALPHA_TBIAP(); 3682 pq->pq_tbia = 0; 3683 pq->pq_pte = 0; 3684 } else { 3685 TAILQ_FOREACH(pj, &jobs, pj_list) { 3686 PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va, 3687 pj->pj_pte & PG_ASM, 3688 pj->pj_pmap->pm_cpus & cpu_mask, cpu_id); 3689 } 3690 pq->pq_pte = 0; 3691 } 3692 pq->pq_count = 0; 3693 mutex_spin_exit(&pq->pq_lock); 3694 3695 /* Free jobs back to the cache. */ 3696 for (pj = TAILQ_FIRST(&jobs); pj != NULL; pj = next) { 3697 next = TAILQ_NEXT(pj, pj_list); 3698 pool_cache_put(&pmap_tlb_shootdown_job_cache, pj); 3699 } 3700 } 3701 #endif /* MULTIPROCESSOR */ 3702