1 /* $NetBSD: pmap.c,v 1.276 2021/04/03 15:29:02 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008, 2020 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center, by Andrew Doran and Mindaugas Rasiukevicius, 11 * and by Chris G. Demetriou. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * Copyright (c) 1991, 1993 37 * The Regents of the University of California. All rights reserved. 38 * 39 * This code is derived from software contributed to Berkeley by 40 * the Systems Programming Group of the University of Utah Computer 41 * Science Department. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 * 67 * @(#)pmap.c 8.6 (Berkeley) 5/27/94 68 */ 69 70 /* 71 * DEC Alpha physical map management code. 72 * 73 * History: 74 * 75 * This pmap started life as a Motorola 68851/68030 pmap, 76 * written by Mike Hibler at the University of Utah. 77 * 78 * It was modified for the DEC Alpha by Chris Demetriou 79 * at Carnegie Mellon University. 80 * 81 * Support for non-contiguous physical memory was added by 82 * Jason R. Thorpe of the Numerical Aerospace Simulation 83 * Facility, NASA Ames Research Center and Chris Demetriou. 84 * 85 * Page table management and a major cleanup were undertaken 86 * by Jason R. Thorpe, with lots of help from Ross Harvey of 87 * Avalon Computer Systems and from Chris Demetriou. 88 * 89 * Support for the new UVM pmap interface was written by 90 * Jason R. Thorpe. 91 * 92 * Support for ASNs was written by Jason R. Thorpe, again 93 * with help from Chris Demetriou and Ross Harvey. 94 * 95 * The locking protocol was written by Jason R. Thorpe, 96 * using Chuck Cranor's i386 pmap for UVM as a model. 97 * 98 * TLB shootdown code was written (and then subsequently 99 * rewritten some years later, borrowing some ideas from 100 * the x86 pmap) by Jason R. Thorpe. 101 * 102 * Multiprocessor modifications by Andrew Doran and 103 * Jason R. Thorpe. 104 * 105 * Notes: 106 * 107 * All user page table access is done via K0SEG. Kernel 108 * page table access is done via the recursive Virtual Page 109 * Table becase kernel PT pages are pre-allocated and never 110 * freed, so no VPT fault handling is requiried. 111 */ 112 113 /* 114 * Manages physical address maps. 115 * 116 * Since the information managed by this module is 117 * also stored by the logical address mapping module, 118 * this module may throw away valid virtual-to-physical 119 * mappings at almost any time. However, invalidations 120 * of virtual-to-physical mappings must be done as 121 * requested. 122 * 123 * In order to cope with hardware architectures which 124 * make virtual-to-physical map invalidates expensive, 125 * this module may delay invalidate or reduced protection 126 * operations until such time as they are actually 127 * necessary. This module is given full information as 128 * to which processors are currently using which maps, 129 * and to when physical maps must be made correct. 130 */ 131 132 #include "opt_lockdebug.h" 133 #include "opt_sysv.h" 134 #include "opt_multiprocessor.h" 135 136 #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 137 138 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.276 2021/04/03 15:29:02 thorpej Exp $"); 139 140 #include <sys/param.h> 141 #include <sys/systm.h> 142 #include <sys/kernel.h> 143 #include <sys/proc.h> 144 #include <sys/malloc.h> 145 #include <sys/pool.h> 146 #include <sys/buf.h> 147 #include <sys/evcnt.h> 148 #include <sys/atomic.h> 149 #include <sys/cpu.h> 150 151 #include <uvm/uvm.h> 152 153 #if defined(MULTIPROCESSOR) 154 #include <machine/rpb.h> 155 #endif 156 157 #ifdef DEBUG 158 #define PDB_FOLLOW 0x0001 159 #define PDB_INIT 0x0002 160 #define PDB_ENTER 0x0004 161 #define PDB_REMOVE 0x0008 162 #define PDB_CREATE 0x0010 163 #define PDB_PTPAGE 0x0020 164 #define PDB_ASN 0x0040 165 #define PDB_BITS 0x0080 166 #define PDB_COLLECT 0x0100 167 #define PDB_PROTECT 0x0200 168 #define PDB_BOOTSTRAP 0x1000 169 #define PDB_PARANOIA 0x2000 170 #define PDB_WIRING 0x4000 171 #define PDB_PVDUMP 0x8000 172 173 int debugmap = 0; 174 int pmapdebug = PDB_PARANOIA; 175 #endif 176 177 #if defined(MULTIPROCESSOR) 178 #define PMAP_MP(x) x 179 #else 180 #define PMAP_MP(x) __nothing 181 #endif /* MULTIPROCESSOR */ 182 183 /* 184 * Given a map and a machine independent protection code, 185 * convert to an alpha protection code. 186 */ 187 #define pte_prot(m, p) (protection_codes[m == pmap_kernel() ? 0 : 1][p]) 188 static int protection_codes[2][8] __read_mostly; 189 190 /* 191 * kernel_lev1map: 192 * 193 * Kernel level 1 page table. This maps all kernel level 2 194 * page table pages, and is used as a template for all user 195 * pmap level 1 page tables. When a new user level 1 page 196 * table is allocated, all kernel_lev1map PTEs for kernel 197 * addresses are copied to the new map. 198 * 199 * The kernel also has an initial set of kernel level 2 page 200 * table pages. These map the kernel level 3 page table pages. 201 * As kernel level 3 page table pages are added, more level 2 202 * page table pages may be added to map them. These pages are 203 * never freed. 204 * 205 * Finally, the kernel also has an initial set of kernel level 206 * 3 page table pages. These map pages in K1SEG. More level 207 * 3 page table pages may be added at run-time if additional 208 * K1SEG address space is required. These pages are never freed. 209 * 210 * NOTE: When mappings are inserted into the kernel pmap, all 211 * level 2 and level 3 page table pages must already be allocated 212 * and mapped into the parent page table. 213 */ 214 pt_entry_t *kernel_lev1map __read_mostly; 215 216 /* 217 * Virtual Page Table. 218 */ 219 static pt_entry_t *VPT __read_mostly; 220 221 static struct { 222 struct pmap k_pmap; 223 } kernel_pmap_store __cacheline_aligned; 224 225 struct pmap *const kernel_pmap_ptr = &kernel_pmap_store.k_pmap; 226 227 /* PA of first available physical page */ 228 paddr_t avail_start __read_mostly; 229 230 /* PA of last available physical page */ 231 paddr_t avail_end __read_mostly; 232 233 /* VA of last avail page (end of kernel AS) */ 234 static vaddr_t virtual_end __read_mostly; 235 236 /* Has pmap_init completed? */ 237 static bool pmap_initialized __read_mostly; 238 239 /* Instrumentation */ 240 u_long pmap_pages_stolen __read_mostly; 241 242 /* 243 * This variable contains the number of CPU IDs we need to allocate 244 * space for when allocating the pmap structure. It is used to 245 * size a per-CPU array of ASN and ASN Generation number. 246 */ 247 static u_long pmap_ncpuids __read_mostly; 248 249 #ifndef PMAP_PV_LOWAT 250 #define PMAP_PV_LOWAT 16 251 #endif 252 int pmap_pv_lowat __read_mostly = PMAP_PV_LOWAT; 253 254 /* 255 * List of all pmaps, used to update them when e.g. additional kernel 256 * page tables are allocated. This list is kept LRU-ordered by 257 * pmap_activate(). 258 */ 259 static TAILQ_HEAD(, pmap) pmap_all_pmaps __cacheline_aligned; 260 261 /* 262 * The pools from which pmap structures and sub-structures are allocated. 263 */ 264 static struct pool_cache pmap_pmap_cache __read_mostly; 265 static struct pool_cache pmap_l1pt_cache __read_mostly; 266 static struct pool_cache pmap_pv_cache __read_mostly; 267 268 CTASSERT(offsetof(struct pmap, pm_asni[0]) == COHERENCY_UNIT); 269 CTASSERT(PMAP_SIZEOF(ALPHA_MAXPROCS) < ALPHA_PGBYTES); 270 CTASSERT(sizeof(struct pmap_asn_info) == COHERENCY_UNIT); 271 272 /* 273 * Address Space Numbers. 274 * 275 * On many implementations of the Alpha architecture, the TLB entries and 276 * I-cache blocks are tagged with a unique number within an implementation- 277 * specified range. When a process context becomes active, the ASN is used 278 * to match TLB entries; if a TLB entry for a particular VA does not match 279 * the current ASN, it is ignored (one could think of the processor as 280 * having a collection of <max ASN> separate TLBs). This allows operating 281 * system software to skip the TLB flush that would otherwise be necessary 282 * at context switch time. 283 * 284 * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that 285 * causes TLB entries to match any ASN. The PALcode also provides 286 * a TBI (Translation Buffer Invalidate) operation that flushes all 287 * TLB entries that _do not_ have PG_ASM. We use this bit for kernel 288 * mappings, so that invalidation of all user mappings does not invalidate 289 * kernel mappings (which are consistent across all processes). 290 * 291 * pmap_next_asn always indicates to the next ASN to use. When 292 * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation. 293 * 294 * When a new ASN generation is created, the per-process (i.e. non-PG_ASM) 295 * TLB entries and the I-cache are flushed, the generation number is bumped, 296 * and pmap_next_asn is changed to indicate the first non-reserved ASN. 297 * 298 * We reserve ASN #0 for pmaps that use the global kernel_lev1map. This 299 * prevents the following scenario to ensure no accidental accesses to 300 * user space for LWPs using the kernel pmap. This is important because 301 * the PALcode may use the recursive VPT to service TLB misses. 302 * 303 * By reserving an ASN for the kernel, we are guaranteeing that an lwp 304 * will not see any valid user space TLB entries until it passes through 305 * pmap_activate() for the first time. 306 * 307 * On processors that do not support ASNs, the PALcode invalidates 308 * non-ASM TLB entries automatically on swpctx. We completely skip 309 * the ASN machinery in this case because the PALcode neither reads 310 * nor writes that field of the HWPCB. 311 */ 312 313 /* max ASN supported by the system */ 314 static u_int pmap_max_asn __read_mostly; 315 316 /* 317 * Locking: 318 * 319 * READ/WRITE LOCKS 320 * ---------------- 321 * 322 * * pmap_main_lock - This lock is used to prevent deadlock and/or 323 * provide mutex access to the pmap module. Most operations lock 324 * the pmap first, then PV lists as needed. However, some operations, 325 * such as pmap_page_protect(), lock the PV lists before locking 326 * the pmaps. To prevent deadlock, we require a mutex lock on the 327 * pmap module if locking in the PV->pmap direction. This is 328 * implemented by acquiring a (shared) read lock on pmap_main_lock 329 * if locking pmap->PV and a (exclusive) write lock if locking in 330 * the PV->pmap direction. Since only one thread can hold a write 331 * lock at a time, this provides the mutex. 332 * 333 * MUTEXES 334 * ------- 335 * 336 * * pmap lock (global hash) - These locks protect the pmap structures. 337 * 338 * * pmap activation lock (global hash) - These IPL_SCHED spin locks 339 * synchronize pmap_activate() and TLB shootdowns. This has a lock 340 * ordering constraint with the tlb_lock: 341 * 342 * tlb_lock -> pmap activation lock 343 * 344 * * pvh_lock (global hash) - These locks protect the PV lists for 345 * managed pages. 346 * 347 * * tlb_lock - This IPL_VM lock serializes local and remote TLB 348 * invalidation. 349 * 350 * * pmap_all_pmaps_lock - This lock protects the global list of 351 * all pmaps. 352 * 353 * * pmap_growkernel_lock - This lock protects pmap_growkernel() 354 * and the virtual_end variable. 355 * 356 * There is a lock ordering constraint for pmap_growkernel_lock. 357 * pmap_growkernel() acquires the locks in the following order: 358 * 359 * pmap_growkernel_lock (write) -> pmap_all_pmaps_lock -> 360 * pmap lock 361 * 362 * We need to ensure consistency between user pmaps and the 363 * kernel_lev1map. For this reason, pmap_growkernel_lock must 364 * be held to prevent kernel_lev1map changing across pmaps 365 * being added to / removed from the global pmaps list. 366 * 367 * Address space number management (global ASN counters and per-pmap 368 * ASN state) are not locked; they use arrays of values indexed 369 * per-processor. 370 * 371 * All internal functions which operate on a pmap are called 372 * with the pmap already locked by the caller (which will be 373 * an interface function). 374 */ 375 static krwlock_t pmap_main_lock __cacheline_aligned; 376 static kmutex_t pmap_all_pmaps_lock __cacheline_aligned; 377 static krwlock_t pmap_growkernel_lock __cacheline_aligned; 378 379 #define PMAP_MAP_TO_HEAD_LOCK() rw_enter(&pmap_main_lock, RW_READER) 380 #define PMAP_MAP_TO_HEAD_UNLOCK() rw_exit(&pmap_main_lock) 381 #define PMAP_HEAD_TO_MAP_LOCK() rw_enter(&pmap_main_lock, RW_WRITER) 382 #define PMAP_HEAD_TO_MAP_UNLOCK() rw_exit(&pmap_main_lock) 383 384 static union { 385 kmutex_t lock; 386 uint8_t pad[COHERENCY_UNIT]; 387 } pmap_pvh_locks[64] __cacheline_aligned; 388 389 #define PVH_LOCK_HASH(pg) \ 390 ((((uintptr_t)(pg)) >> 6) & 63) 391 392 static inline kmutex_t * 393 pmap_pvh_lock(struct vm_page *pg) 394 { 395 return &pmap_pvh_locks[PVH_LOCK_HASH(pg)].lock; 396 } 397 398 static union { 399 struct { 400 kmutex_t lock; 401 kmutex_t activation_lock; 402 } locks; 403 uint8_t pad[COHERENCY_UNIT]; 404 } pmap_pmap_locks[64] __cacheline_aligned; 405 406 #define PMAP_LOCK_HASH(pm) \ 407 ((((uintptr_t)(pm)) >> 6) & 63) 408 409 static inline kmutex_t * 410 pmap_pmap_lock(pmap_t const pmap) 411 { 412 return &pmap_pmap_locks[PMAP_LOCK_HASH(pmap)].locks.lock; 413 } 414 415 static inline kmutex_t * 416 pmap_activation_lock(pmap_t const pmap) 417 { 418 return &pmap_pmap_locks[PMAP_LOCK_HASH(pmap)].locks.activation_lock; 419 } 420 421 #define PMAP_LOCK(pmap) mutex_enter(pmap_pmap_lock(pmap)) 422 #define PMAP_UNLOCK(pmap) mutex_exit(pmap_pmap_lock(pmap)) 423 424 #define PMAP_ACT_LOCK(pmap) mutex_spin_enter(pmap_activation_lock(pmap)) 425 #define PMAP_ACT_TRYLOCK(pmap) mutex_tryenter(pmap_activation_lock(pmap)) 426 #define PMAP_ACT_UNLOCK(pmap) mutex_spin_exit(pmap_activation_lock(pmap)) 427 428 #if defined(MULTIPROCESSOR) 429 #define pmap_all_cpus() cpus_running 430 #else 431 #define pmap_all_cpus() ~0UL 432 #endif /* MULTIPROCESSOR */ 433 434 /* 435 * TLB management. 436 * 437 * TLB invalidations need to be performed on local and remote CPUs 438 * whenever parts of the PTE that the hardware or PALcode understands 439 * changes. In order amortize the cost of these operations, we will 440 * queue up to 8 addresses to invalidate in a batch. Any more than 441 * that, and we will hit the entire TLB. 442 * 443 * Some things that add complexity: 444 * 445 * ==> ASNs. A CPU may have valid TLB entries for other than the current 446 * address spaace. We can only invalidate TLB entries for the current 447 * address space, so when asked to invalidate a VA for the non-current 448 * pmap on a given CPU, we simply invalidate the ASN for that pmap,CPU 449 * tuple so that new one is allocated on the next activation on that 450 * CPU. N.B. that for CPUs that don't implement ASNs, SWPCTX does all 451 * the work necessary, so we can skip some work in the pmap module 452 * itself. 453 * 454 * When a pmap is activated on a given CPU, we set a corresponding 455 * bit in pmap::pm_cpus, indicating that it potentially has valid 456 * TLB entries for that address space. This bitmap is then used to 457 * determine which remote CPUs need to be notified of invalidations. 458 * The bit is cleared when the ASN is invalidated on that CPU. 459 * 460 * In order to serialize with activating an address space on a 461 * given CPU (that we can reliably send notifications only to 462 * relevant remote CPUs), we acquire the pmap lock in pmap_activate() 463 * and also hold the lock while remote shootdowns take place. 464 * This does not apply to the kernel pmap; all CPUs are notified about 465 * invalidations for the kernel pmap, and the pmap lock is not held 466 * in pmap_activate() for the kernel pmap. 467 * 468 * ==> P->V operations (e.g. pmap_page_protect()) may require sending 469 * invalidations for multiple address spaces. We only track one 470 * address space at a time, and if we encounter more than one, then 471 * the notification each CPU gets is to hit the entire TLB. Note 472 * also that we can't serialize with pmap_activate() in this case, 473 * so all CPUs will get the notification, and they check when 474 * processing the notification if the pmap is current on that CPU. 475 * 476 * Invalidation information is gathered into a pmap_tlb_context structure 477 * that includes room for 8 VAs, the pmap the VAs belong to, a bitmap of 478 * CPUs to be notified, and a list for PT pages that are freed during 479 * removal off mappings. The number of valid addresses in the list as 480 * well as flags are sqeezed into the lower bits of the first two VAs. 481 * Storage for this structure is allocated on the stack. We need to be 482 * careful to keep the size of this struture under control. 483 * 484 * When notifying remote CPUs, we acquire the tlb_lock (which also 485 * blocks IPIs), record the pointer to our context structure, set a 486 * global bitmap off CPUs to be notified, and then send the IPIs to 487 * each victim. While the other CPUs are in-flight, we then perform 488 * any invalidations necessary on the local CPU. Once that is done, 489 * we then wait the the global context pointer to be cleared, which 490 * will be done by the final remote CPU to complete their work. This 491 * method reduces cache line contention during pocessing. 492 * 493 * When removing mappings in user pmaps, this implemention frees page 494 * table pages back to the VM system once they contain no valid mappings. 495 * As we do this, we must ensure to invalidate TLB entries that the 496 * CPU might hold for the respective recursive VPT mappings. This must 497 * be done whenever an L1 or L2 PTE is invalidated. Until these VPT 498 * translations are invalidated, the PT pages must not be reused. For 499 * this reason, we keep a list of freed PT pages in the context stucture 500 * and drain them off once all invalidations are complete. 501 * 502 * NOTE: The value of TLB_CTX_MAXVA is tuned to accommodate the UBC 503 * window size (defined as 64KB on alpha in <machine/vmparam.h>). 504 */ 505 506 #define TLB_CTX_MAXVA 8 507 #define TLB_CTX_ALLVA PAGE_MASK 508 509 #define TLB_CTX_F_ASM __BIT(0) 510 #define TLB_CTX_F_IMB __BIT(1) 511 #define TLB_CTX_F_KIMB __BIT(2) 512 #define TLB_CTX_F_PV __BIT(3) 513 #define TLB_CTX_F_MULTI __BIT(4) 514 515 #define TLB_CTX_COUNT(ctx) ((ctx)->t_addrdata[0] & PAGE_MASK) 516 #define TLB_CTX_INC_COUNT(ctx) (ctx)->t_addrdata[0]++ 517 #define TLB_CTX_SET_ALLVA(ctx) (ctx)->t_addrdata[0] |= TLB_CTX_ALLVA 518 519 #define TLB_CTX_FLAGS(ctx) ((ctx)->t_addrdata[1] & PAGE_MASK) 520 #define TLB_CTX_SET_FLAG(ctx, f) (ctx)->t_addrdata[1] |= (f) 521 522 #define TLB_CTX_VA(ctx, i) ((ctx)->t_addrdata[(i)] & ~PAGE_MASK) 523 #define TLB_CTX_SETVA(ctx, i, va) \ 524 (ctx)->t_addrdata[(i)] = (va) | ((ctx)->t_addrdata[(i)] & PAGE_MASK) 525 526 struct pmap_tlb_context { 527 uintptr_t t_addrdata[TLB_CTX_MAXVA]; 528 pmap_t t_pmap; 529 LIST_HEAD(, vm_page) t_freeptq; 530 }; 531 532 static struct { 533 kmutex_t lock; 534 struct evcnt events; 535 } tlb_shootdown __cacheline_aligned; 536 #define tlb_lock tlb_shootdown.lock 537 #define tlb_evcnt tlb_shootdown.events 538 #if defined(MULTIPROCESSOR) 539 static const struct pmap_tlb_context *tlb_context __cacheline_aligned; 540 static unsigned long tlb_pending __cacheline_aligned; 541 #endif /* MULTIPROCESSOR */ 542 543 #if defined(TLB_STATS) 544 #define TLB_COUNT_DECL(cnt) static struct evcnt tlb_stat_##cnt 545 #define TLB_COUNT(cnt) atomic_inc_64(&tlb_stat_##cnt .ev_count) 546 #define TLB_COUNT_ATTACH(cnt) \ 547 evcnt_attach_dynamic_nozero(&tlb_stat_##cnt, EVCNT_TYPE_MISC, \ 548 NULL, "TLB", #cnt) 549 550 TLB_COUNT_DECL(invalidate_multi_tbia); 551 TLB_COUNT_DECL(invalidate_multi_tbiap); 552 TLB_COUNT_DECL(invalidate_multi_imb); 553 554 TLB_COUNT_DECL(invalidate_kern_tbia); 555 TLB_COUNT_DECL(invalidate_kern_tbis); 556 TLB_COUNT_DECL(invalidate_kern_imb); 557 558 TLB_COUNT_DECL(invalidate_user_not_current); 559 TLB_COUNT_DECL(invalidate_user_lazy_imb); 560 TLB_COUNT_DECL(invalidate_user_tbiap); 561 TLB_COUNT_DECL(invalidate_user_tbis); 562 563 TLB_COUNT_DECL(shootdown_kernel); 564 TLB_COUNT_DECL(shootdown_user); 565 TLB_COUNT_DECL(shootdown_imb); 566 TLB_COUNT_DECL(shootdown_kimb); 567 TLB_COUNT_DECL(shootdown_overflow); 568 569 TLB_COUNT_DECL(shootdown_all_user); 570 TLB_COUNT_DECL(shootdown_all_user_imb); 571 572 TLB_COUNT_DECL(shootdown_pv); 573 TLB_COUNT_DECL(shootdown_pv_multi); 574 575 TLB_COUNT_DECL(shootnow_over_notify); 576 TLB_COUNT_DECL(shootnow_remote); 577 578 TLB_COUNT_DECL(reason_remove_kernel); 579 TLB_COUNT_DECL(reason_remove_user); 580 TLB_COUNT_DECL(reason_page_protect_read); 581 TLB_COUNT_DECL(reason_page_protect_none); 582 TLB_COUNT_DECL(reason_protect); 583 TLB_COUNT_DECL(reason_enter_kernel); 584 TLB_COUNT_DECL(reason_enter_user); 585 TLB_COUNT_DECL(reason_kenter); 586 TLB_COUNT_DECL(reason_enter_l2pt_delref); 587 TLB_COUNT_DECL(reason_enter_l3pt_delref); 588 TLB_COUNT_DECL(reason_kremove); 589 TLB_COUNT_DECL(reason_clear_modify); 590 TLB_COUNT_DECL(reason_clear_reference); 591 TLB_COUNT_DECL(reason_emulate_reference); 592 593 TLB_COUNT_DECL(asn_reuse); 594 TLB_COUNT_DECL(asn_newgen); 595 TLB_COUNT_DECL(asn_assign); 596 597 TLB_COUNT_DECL(activate_both_change); 598 TLB_COUNT_DECL(activate_asn_change); 599 TLB_COUNT_DECL(activate_ptbr_change); 600 TLB_COUNT_DECL(activate_swpctx); 601 TLB_COUNT_DECL(activate_skip_swpctx); 602 603 #else /* ! TLB_STATS */ 604 #define TLB_COUNT(cnt) __nothing 605 #define TLB_COUNT_ATTACH(cnt) __nothing 606 #endif /* TLB_STATS */ 607 608 static void 609 pmap_tlb_init(void) 610 { 611 /* mutex is initialized in pmap_bootstrap(). */ 612 613 evcnt_attach_dynamic_nozero(&tlb_evcnt, EVCNT_TYPE_MISC, 614 NULL, "TLB", "shootdown"); 615 616 TLB_COUNT_ATTACH(invalidate_multi_tbia); 617 TLB_COUNT_ATTACH(invalidate_multi_tbiap); 618 TLB_COUNT_ATTACH(invalidate_multi_imb); 619 620 TLB_COUNT_ATTACH(invalidate_kern_tbia); 621 TLB_COUNT_ATTACH(invalidate_kern_tbis); 622 TLB_COUNT_ATTACH(invalidate_kern_imb); 623 624 TLB_COUNT_ATTACH(invalidate_user_not_current); 625 TLB_COUNT_ATTACH(invalidate_user_lazy_imb); 626 TLB_COUNT_ATTACH(invalidate_user_tbiap); 627 TLB_COUNT_ATTACH(invalidate_user_tbis); 628 629 TLB_COUNT_ATTACH(shootdown_kernel); 630 TLB_COUNT_ATTACH(shootdown_user); 631 TLB_COUNT_ATTACH(shootdown_imb); 632 TLB_COUNT_ATTACH(shootdown_kimb); 633 TLB_COUNT_ATTACH(shootdown_overflow); 634 635 TLB_COUNT_ATTACH(shootdown_all_user); 636 TLB_COUNT_ATTACH(shootdown_all_user_imb); 637 638 TLB_COUNT_ATTACH(shootdown_pv); 639 TLB_COUNT_ATTACH(shootdown_pv_multi); 640 641 TLB_COUNT_ATTACH(shootnow_over_notify); 642 TLB_COUNT_ATTACH(shootnow_remote); 643 644 TLB_COUNT_ATTACH(reason_remove_kernel); 645 TLB_COUNT_ATTACH(reason_remove_user); 646 TLB_COUNT_ATTACH(reason_page_protect_read); 647 TLB_COUNT_ATTACH(reason_page_protect_none); 648 TLB_COUNT_ATTACH(reason_protect); 649 TLB_COUNT_ATTACH(reason_enter_kernel); 650 TLB_COUNT_ATTACH(reason_enter_user); 651 TLB_COUNT_ATTACH(reason_kenter); 652 TLB_COUNT_ATTACH(reason_enter_l2pt_delref); 653 TLB_COUNT_ATTACH(reason_enter_l3pt_delref); 654 TLB_COUNT_ATTACH(reason_kremove); 655 TLB_COUNT_ATTACH(reason_clear_modify); 656 TLB_COUNT_ATTACH(reason_clear_reference); 657 658 TLB_COUNT_ATTACH(asn_reuse); 659 TLB_COUNT_ATTACH(asn_newgen); 660 TLB_COUNT_ATTACH(asn_assign); 661 662 TLB_COUNT_ATTACH(activate_both_change); 663 TLB_COUNT_ATTACH(activate_asn_change); 664 TLB_COUNT_ATTACH(activate_ptbr_change); 665 TLB_COUNT_ATTACH(activate_swpctx); 666 TLB_COUNT_ATTACH(activate_skip_swpctx); 667 } 668 669 static inline void 670 pmap_tlb_context_init(struct pmap_tlb_context * const tlbctx) 671 { 672 /* Initialize the minimum number of fields. */ 673 tlbctx->t_addrdata[0] = 0; 674 tlbctx->t_addrdata[1] = 0; 675 tlbctx->t_pmap = NULL; 676 LIST_INIT(&tlbctx->t_freeptq); 677 } 678 679 static void 680 pmap_tlb_shootdown(pmap_t const pmap, vaddr_t const va, 681 pt_entry_t const pte_bits, struct pmap_tlb_context * const tlbctx) 682 { 683 KASSERT(pmap != NULL); 684 KASSERT((va & PAGE_MASK) == 0); 685 686 /* 687 * Figure out who needs to hear about this, and the scope 688 * of an all-entries invalidate. 689 */ 690 if (pmap == pmap_kernel()) { 691 TLB_COUNT(shootdown_kernel); 692 KASSERT(pte_bits & PG_ASM); 693 TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_ASM); 694 695 /* Note if an I-stream sync is also needed. */ 696 if (pte_bits & PG_EXEC) { 697 TLB_COUNT(shootdown_kimb); 698 TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_KIMB); 699 } 700 } else { 701 TLB_COUNT(shootdown_user); 702 KASSERT((pte_bits & PG_ASM) == 0); 703 704 /* Note if an I-stream sync is also needed. */ 705 if (pte_bits & PG_EXEC) { 706 TLB_COUNT(shootdown_imb); 707 TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_IMB); 708 } 709 } 710 711 KASSERT(tlbctx->t_pmap == NULL || tlbctx->t_pmap == pmap); 712 tlbctx->t_pmap = pmap; 713 714 /* 715 * If we're already at the max, just tell each active CPU 716 * to nail everything. 717 */ 718 const uintptr_t count = TLB_CTX_COUNT(tlbctx); 719 if (count > TLB_CTX_MAXVA) { 720 return; 721 } 722 if (count == TLB_CTX_MAXVA) { 723 TLB_COUNT(shootdown_overflow); 724 TLB_CTX_SET_ALLVA(tlbctx); 725 return; 726 } 727 728 TLB_CTX_SETVA(tlbctx, count, va); 729 TLB_CTX_INC_COUNT(tlbctx); 730 } 731 732 static void 733 pmap_tlb_shootdown_all_user(pmap_t const pmap, pt_entry_t const pte_bits, 734 struct pmap_tlb_context * const tlbctx) 735 { 736 KASSERT(pmap != pmap_kernel()); 737 738 TLB_COUNT(shootdown_all_user); 739 740 /* Note if an I-stream sync is also needed. */ 741 if (pte_bits & PG_EXEC) { 742 TLB_COUNT(shootdown_all_user_imb); 743 TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_IMB); 744 } 745 746 TLB_CTX_SET_ALLVA(tlbctx); 747 } 748 749 static void 750 pmap_tlb_shootdown_pv(const pv_entry_t pv, pt_entry_t const pte_bits, 751 struct pmap_tlb_context * const tlbctx) 752 { 753 uintptr_t flags = TLB_CTX_F_PV; 754 755 TLB_COUNT(shootdown_pv); 756 757 if (tlbctx->t_pmap == NULL || tlbctx->t_pmap == pv->pv_pmap) { 758 if (tlbctx->t_pmap == NULL) { 759 pmap_reference(pv->pv_pmap); 760 } 761 pmap_tlb_shootdown(pv->pv_pmap, pv->pv_va, pte_bits, tlbctx); 762 } else { 763 TLB_COUNT(shootdown_pv_multi); 764 flags |= TLB_CTX_F_MULTI; 765 if (pv->pv_pmap == pmap_kernel()) { 766 KASSERT(pte_bits & PG_ASM); 767 flags |= TLB_CTX_F_ASM; 768 } else { 769 KASSERT((pte_bits & PG_ASM) == 0); 770 } 771 772 /* 773 * No need to distinguish between kernel and user IMB 774 * here; see pmap_tlb_invalidate_multi(). 775 */ 776 if (pte_bits & PG_EXEC) { 777 flags |= TLB_CTX_F_IMB; 778 } 779 TLB_CTX_SET_ALLVA(tlbctx); 780 } 781 TLB_CTX_SET_FLAG(tlbctx, flags); 782 } 783 784 static void 785 pmap_tlb_invalidate_multi(const struct pmap_tlb_context * const tlbctx) 786 { 787 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_ASM) { 788 TLB_COUNT(invalidate_multi_tbia); 789 ALPHA_TBIA(); 790 } else { 791 TLB_COUNT(invalidate_multi_tbiap); 792 ALPHA_TBIAP(); 793 } 794 if (TLB_CTX_FLAGS(tlbctx) & (TLB_CTX_F_IMB | TLB_CTX_F_KIMB)) { 795 TLB_COUNT(invalidate_multi_imb); 796 alpha_pal_imb(); 797 } 798 } 799 800 static void 801 pmap_tlb_invalidate_kernel(const struct pmap_tlb_context * const tlbctx) 802 { 803 const uintptr_t count = TLB_CTX_COUNT(tlbctx); 804 805 if (count == TLB_CTX_ALLVA) { 806 TLB_COUNT(invalidate_kern_tbia); 807 ALPHA_TBIA(); 808 } else { 809 TLB_COUNT(invalidate_kern_tbis); 810 for (uintptr_t i = 0; i < count; i++) { 811 ALPHA_TBIS(TLB_CTX_VA(tlbctx, i)); 812 } 813 } 814 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_KIMB) { 815 TLB_COUNT(invalidate_kern_imb); 816 alpha_pal_imb(); 817 } 818 } 819 820 static void 821 pmap_tlb_invalidate(const struct pmap_tlb_context * const tlbctx, 822 const struct cpu_info * const ci) 823 { 824 const uintptr_t count = TLB_CTX_COUNT(tlbctx); 825 826 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_MULTI) { 827 pmap_tlb_invalidate_multi(tlbctx); 828 return; 829 } 830 831 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_ASM) { 832 pmap_tlb_invalidate_kernel(tlbctx); 833 return; 834 } 835 836 KASSERT(kpreempt_disabled()); 837 838 pmap_t const pmap = tlbctx->t_pmap; 839 KASSERT(pmap != NULL); 840 841 const u_long cpu_mask = 1UL << ci->ci_cpuid; 842 843 if (__predict_false(pmap != ci->ci_pmap)) { 844 TLB_COUNT(invalidate_user_not_current); 845 846 /* 847 * For CPUs that don't implement ASNs, the SWPCTX call 848 * does all of the TLB invalidation work for us. 849 */ 850 if (__predict_false(pmap_max_asn == 0)) { 851 return; 852 } 853 854 /* 855 * We cannot directly invalidate the TLB in this case, 856 * so force allocation of a new ASN when the pmap becomes 857 * active again. 858 */ 859 pmap->pm_asni[ci->ci_cpuid].pma_asngen = PMAP_ASNGEN_INVALID; 860 atomic_and_ulong(&pmap->pm_cpus, ~cpu_mask); 861 862 /* 863 * This isn't strictly necessary; when we allocate a 864 * new ASN, we're going to clear this bit and skip 865 * syncing the I-stream. But we will keep this bit 866 * of accounting for internal consistency. 867 */ 868 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_IMB) { 869 atomic_or_ulong(&pmap->pm_needisync, cpu_mask); 870 } 871 return; 872 } 873 874 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_IMB) { 875 TLB_COUNT(invalidate_user_lazy_imb); 876 atomic_or_ulong(&pmap->pm_needisync, cpu_mask); 877 } 878 879 if (count == TLB_CTX_ALLVA) { 880 /* 881 * Another option here for CPUs that implement ASNs is 882 * to allocate a new ASN and do a SWPCTX. That's almost 883 * certainly faster than a TBIAP, but would require us 884 * to synchronize against IPIs in pmap_activate(). 885 */ 886 TLB_COUNT(invalidate_user_tbiap); 887 KASSERT((TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_ASM) == 0); 888 ALPHA_TBIAP(); 889 } else { 890 TLB_COUNT(invalidate_user_tbis); 891 for (uintptr_t i = 0; i < count; i++) { 892 ALPHA_TBIS(TLB_CTX_VA(tlbctx, i)); 893 } 894 } 895 } 896 897 static void 898 pmap_tlb_shootnow(const struct pmap_tlb_context * const tlbctx) 899 { 900 901 if (TLB_CTX_COUNT(tlbctx) == 0) { 902 /* No work to do. */ 903 return; 904 } 905 906 /* 907 * Acquire the shootdown mutex. This will also block IPL_VM 908 * interrupts and disable preemption. It is critically important 909 * that IPIs not be blocked in this routine. 910 */ 911 KASSERT((alpha_pal_rdps() & ALPHA_PSL_IPL_MASK) < ALPHA_PSL_IPL_CLOCK); 912 mutex_spin_enter(&tlb_lock); 913 tlb_evcnt.ev_count++; 914 915 const struct cpu_info *ci = curcpu(); 916 const u_long this_cpu = 1UL << ci->ci_cpuid; 917 u_long active_cpus; 918 bool activation_locked, activation_lock_tried; 919 920 /* 921 * Figure out who to notify. If it's for the kernel or 922 * multiple aaddress spaces, we notify everybody. If 923 * it's a single user pmap, then we try to acquire the 924 * activation lock so we can get an accurate accounting 925 * of who needs to be notified. If we can't acquire 926 * the activation lock, then just notify everyone and 927 * let them sort it out when they process the IPI. 928 */ 929 if (TLB_CTX_FLAGS(tlbctx) & (TLB_CTX_F_ASM | TLB_CTX_F_MULTI)) { 930 active_cpus = pmap_all_cpus(); 931 activation_locked = false; 932 activation_lock_tried = false; 933 } else { 934 KASSERT(tlbctx->t_pmap != NULL); 935 activation_locked = PMAP_ACT_TRYLOCK(tlbctx->t_pmap); 936 if (__predict_true(activation_locked)) { 937 active_cpus = tlbctx->t_pmap->pm_cpus; 938 } else { 939 TLB_COUNT(shootnow_over_notify); 940 active_cpus = pmap_all_cpus(); 941 } 942 activation_lock_tried = true; 943 } 944 945 #if defined(MULTIPROCESSOR) 946 /* 947 * If there are remote CPUs that need to do work, get them 948 * started now. 949 */ 950 const u_long remote_cpus = active_cpus & ~this_cpu; 951 KASSERT(tlb_context == NULL); 952 if (remote_cpus) { 953 TLB_COUNT(shootnow_remote); 954 tlb_context = tlbctx; 955 tlb_pending = remote_cpus; 956 alpha_multicast_ipi(remote_cpus, ALPHA_IPI_SHOOTDOWN); 957 } 958 #endif /* MULTIPROCESSOR */ 959 960 /* 961 * Now that the remotes have been notified, release the 962 * activation lock. 963 */ 964 if (activation_lock_tried) { 965 if (activation_locked) { 966 KASSERT(tlbctx->t_pmap != NULL); 967 PMAP_ACT_UNLOCK(tlbctx->t_pmap); 968 } 969 /* 970 * When we tried to acquire the activation lock, we 971 * raised IPL to IPL_SCHED (even if we ultimately 972 * failed to acquire the lock), which blocks out IPIs. 973 * Force our IPL back down to IPL_VM so that we can 974 * receive IPIs. 975 */ 976 alpha_pal_swpipl(IPL_VM); 977 } 978 979 /* 980 * Do any work that we might need to do. We don't need to 981 * synchronize with activation here because we know that 982 * for the current CPU, activation status will not change. 983 */ 984 if (active_cpus & this_cpu) { 985 pmap_tlb_invalidate(tlbctx, ci); 986 } 987 988 #if defined(MULTIPROCESSOR) 989 /* Wait for remote CPUs to finish. */ 990 if (remote_cpus) { 991 int backoff = SPINLOCK_BACKOFF_MIN; 992 u_int spins = 0; 993 994 while (atomic_load_acquire(&tlb_context) != NULL) { 995 SPINLOCK_BACKOFF(backoff); 996 if (spins++ > 0x0fffffff) { 997 printf("TLB LOCAL MASK = 0x%016lx\n", 998 this_cpu); 999 printf("TLB REMOTE MASK = 0x%016lx\n", 1000 remote_cpus); 1001 printf("TLB REMOTE PENDING = 0x%016lx\n", 1002 tlb_pending); 1003 printf("TLB CONTEXT = %p\n", tlb_context); 1004 printf("TLB LOCAL IPL = %lu\n", 1005 alpha_pal_rdps() & ALPHA_PSL_IPL_MASK); 1006 panic("pmap_tlb_shootnow"); 1007 } 1008 } 1009 } 1010 KASSERT(tlb_context == NULL); 1011 #endif /* MULTIPROCESSOR */ 1012 1013 mutex_spin_exit(&tlb_lock); 1014 1015 if (__predict_false(TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_PV)) { 1016 /* 1017 * P->V TLB operations may operate on multiple pmaps. 1018 * The shootdown takes a reference on the first pmap it 1019 * encounters, in order to prevent it from disappearing, 1020 * in the hope that we end up with a single-pmap P->V 1021 * operation (instrumentation shows this is not rare). 1022 * 1023 * Once this shootdown is finished globally, we need to 1024 * release this extra reference. 1025 */ 1026 KASSERT(tlbctx->t_pmap != NULL); 1027 pmap_destroy(tlbctx->t_pmap); 1028 } 1029 } 1030 1031 #if defined(MULTIPROCESSOR) 1032 void 1033 pmap_tlb_shootdown_ipi(struct cpu_info * const ci, 1034 struct trapframe * const tf __unused) 1035 { 1036 KASSERT(tlb_context != NULL); 1037 pmap_tlb_invalidate(tlb_context, ci); 1038 if (atomic_and_ulong_nv(&tlb_pending, ~(1UL << ci->ci_cpuid)) == 0) { 1039 atomic_store_release(&tlb_context, NULL); 1040 } 1041 } 1042 #endif /* MULTIPROCESSOR */ 1043 1044 static void 1045 pmap_tlb_physpage_free(paddr_t const ptpa, 1046 struct pmap_tlb_context * const tlbctx) 1047 { 1048 struct vm_page * const pg = PHYS_TO_VM_PAGE(ptpa); 1049 1050 KASSERT(pg != NULL); 1051 1052 #ifdef DEBUG 1053 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 1054 KDASSERT(md->pvh_refcnt == 0); 1055 #endif 1056 1057 LIST_INSERT_HEAD(&tlbctx->t_freeptq, pg, pageq.list); 1058 } 1059 1060 static void 1061 pmap_tlb_ptpage_drain(struct pmap_tlb_context * const tlbctx) 1062 { 1063 struct vm_page *pg; 1064 1065 while ((pg = LIST_FIRST(&tlbctx->t_freeptq)) != NULL) { 1066 LIST_REMOVE(pg, pageq.list); 1067 uvm_pagefree(pg); 1068 } 1069 } 1070 1071 /* 1072 * Internal routines 1073 */ 1074 static void alpha_protection_init(void); 1075 static pt_entry_t pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, 1076 pv_entry_t *, 1077 struct pmap_tlb_context *); 1078 static void pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, 1079 struct pmap_tlb_context *); 1080 1081 /* 1082 * PT page management functions. 1083 */ 1084 static int pmap_ptpage_alloc(pt_entry_t *, int); 1085 static void pmap_ptpage_free(pt_entry_t *, struct pmap_tlb_context *); 1086 static void pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, 1087 struct pmap_tlb_context *); 1088 static void pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, 1089 struct pmap_tlb_context *); 1090 static void pmap_l1pt_delref(pmap_t, pt_entry_t *); 1091 1092 static void *pmap_l1pt_alloc(struct pool *, int); 1093 static void pmap_l1pt_free(struct pool *, void *); 1094 1095 static struct pool_allocator pmap_l1pt_allocator = { 1096 pmap_l1pt_alloc, pmap_l1pt_free, 0, 1097 }; 1098 1099 static int pmap_l1pt_ctor(void *, void *, int); 1100 1101 /* 1102 * PV table management functions. 1103 */ 1104 static int pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *, 1105 bool, pv_entry_t); 1106 static void pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool, 1107 pv_entry_t *); 1108 static void *pmap_pv_page_alloc(struct pool *, int); 1109 static void pmap_pv_page_free(struct pool *, void *); 1110 1111 static struct pool_allocator pmap_pv_page_allocator = { 1112 pmap_pv_page_alloc, pmap_pv_page_free, 0, 1113 }; 1114 1115 #ifdef DEBUG 1116 void pmap_pv_dump(paddr_t); 1117 #endif 1118 1119 #define pmap_pv_alloc() pool_cache_get(&pmap_pv_cache, PR_NOWAIT) 1120 #define pmap_pv_free(pv) pool_cache_put(&pmap_pv_cache, (pv)) 1121 1122 /* 1123 * ASN management functions. 1124 */ 1125 static u_int pmap_asn_alloc(pmap_t, struct cpu_info *); 1126 1127 /* 1128 * Misc. functions. 1129 */ 1130 static bool pmap_physpage_alloc(int, paddr_t *); 1131 static void pmap_physpage_free(paddr_t); 1132 static int pmap_physpage_addref(void *); 1133 static int pmap_physpage_delref(void *); 1134 1135 static bool vtophys_internal(vaddr_t, paddr_t *p); 1136 1137 /* 1138 * PMAP_KERNEL_PTE: 1139 * 1140 * Get a kernel PTE. 1141 * 1142 * If debugging, do a table walk. If not debugging, just use 1143 * the Virtual Page Table, since all kernel page tables are 1144 * pre-allocated and mapped in. 1145 */ 1146 #ifdef DEBUG 1147 #define PMAP_KERNEL_PTE(va) \ 1148 ({ \ 1149 pt_entry_t *l1pte_, *l2pte_; \ 1150 \ 1151 l1pte_ = pmap_l1pte(pmap_kernel(), va); \ 1152 if (pmap_pte_v(l1pte_) == 0) { \ 1153 printf("kernel level 1 PTE not valid, va 0x%lx " \ 1154 "(line %d)\n", (va), __LINE__); \ 1155 panic("PMAP_KERNEL_PTE"); \ 1156 } \ 1157 l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_); \ 1158 if (pmap_pte_v(l2pte_) == 0) { \ 1159 printf("kernel level 2 PTE not valid, va 0x%lx " \ 1160 "(line %d)\n", (va), __LINE__); \ 1161 panic("PMAP_KERNEL_PTE"); \ 1162 } \ 1163 pmap_l3pte(pmap_kernel(), va, l2pte_); \ 1164 }) 1165 #else 1166 #define PMAP_KERNEL_PTE(va) (&VPT[VPT_INDEX((va))]) 1167 #endif 1168 1169 /* 1170 * PMAP_STAT_{INCR,DECR}: 1171 * 1172 * Increment or decrement a pmap statistic. 1173 */ 1174 #define PMAP_STAT_INCR(s, v) atomic_add_long((unsigned long *)(&(s)), (v)) 1175 #define PMAP_STAT_DECR(s, v) atomic_add_long((unsigned long *)(&(s)), -(v)) 1176 1177 /* 1178 * pmap_init_cpu: 1179 * 1180 * Initilize pmap data in the cpu_info. 1181 */ 1182 void 1183 pmap_init_cpu(struct cpu_info * const ci) 1184 { 1185 pmap_t const pmap = pmap_kernel(); 1186 1187 /* All CPUs start out using the kernel pmap. */ 1188 atomic_or_ulong(&pmap->pm_cpus, 1UL << ci->ci_cpuid); 1189 pmap_reference(pmap); 1190 ci->ci_pmap = pmap; 1191 1192 /* Initialize ASN allocation logic. */ 1193 ci->ci_next_asn = PMAP_ASN_FIRST_USER; 1194 ci->ci_asn_gen = PMAP_ASNGEN_INITIAL; 1195 } 1196 1197 /* 1198 * pmap_bootstrap: 1199 * 1200 * Bootstrap the system to run with virtual memory. 1201 * 1202 * Note: no locking is necessary in this function. 1203 */ 1204 void 1205 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) 1206 { 1207 vsize_t lev2mapsize, lev3mapsize; 1208 pt_entry_t *lev2map, *lev3map; 1209 pt_entry_t pte; 1210 vsize_t bufsz; 1211 struct pcb *pcb; 1212 int i; 1213 1214 #ifdef DEBUG 1215 if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP)) 1216 printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn); 1217 #endif 1218 1219 /* 1220 * Compute the number of pages kmem_arena will have. 1221 */ 1222 kmeminit_nkmempages(); 1223 1224 /* 1225 * Figure out how many initial PTE's are necessary to map the 1226 * kernel. We also reserve space for kmem_alloc_pageable() 1227 * for vm_fork(). 1228 */ 1229 1230 /* Get size of buffer cache and set an upper limit */ 1231 bufsz = buf_memcalc(); 1232 buf_setvalimit(bufsz); 1233 1234 lev3mapsize = 1235 (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) + 1236 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE + 1237 (maxproc * UPAGES) + nkmempages; 1238 1239 lev3mapsize = roundup(lev3mapsize, NPTEPG); 1240 1241 /* 1242 * Initialize `FYI' variables. Note we're relying on 1243 * the fact that BSEARCH sorts the vm_physmem[] array 1244 * for us. 1245 */ 1246 avail_start = ptoa(uvm_physseg_get_avail_start(uvm_physseg_get_first())); 1247 avail_end = ptoa(uvm_physseg_get_avail_end(uvm_physseg_get_last())); 1248 virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE; 1249 1250 #if 0 1251 printf("avail_start = 0x%lx\n", avail_start); 1252 printf("avail_end = 0x%lx\n", avail_end); 1253 printf("virtual_end = 0x%lx\n", virtual_end); 1254 #endif 1255 1256 /* 1257 * Allocate a level 1 PTE table for the kernel. 1258 * This is always one page long. 1259 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 1260 */ 1261 kernel_lev1map = (pt_entry_t *) 1262 uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG); 1263 1264 /* 1265 * Allocate a level 2 PTE table for the kernel. 1266 * These must map all of the level3 PTEs. 1267 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 1268 */ 1269 lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG); 1270 lev2map = (pt_entry_t *) 1271 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize); 1272 1273 /* 1274 * Allocate a level 3 PTE table for the kernel. 1275 * Contains lev3mapsize PTEs. 1276 */ 1277 lev3map = (pt_entry_t *) 1278 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize); 1279 1280 /* 1281 * Set up level 1 page table 1282 */ 1283 1284 /* Map all of the level 2 pte pages */ 1285 for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) { 1286 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) + 1287 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 1288 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 1289 kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS + 1290 (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte; 1291 } 1292 1293 /* Map the virtual page table */ 1294 pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT) 1295 << PG_SHIFT; 1296 pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */ 1297 kernel_lev1map[l1pte_index(VPTBASE)] = pte; 1298 VPT = (pt_entry_t *)VPTBASE; 1299 1300 /* 1301 * Set up level 2 page table. 1302 */ 1303 /* Map all of the level 3 pte pages */ 1304 for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) { 1305 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) + 1306 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 1307 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 1308 lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+ 1309 (i*PAGE_SIZE*NPTEPG))] = pte; 1310 } 1311 1312 /* Initialize the pmap_growkernel_lock. */ 1313 rw_init(&pmap_growkernel_lock); 1314 1315 /* 1316 * Set up level three page table (lev3map) 1317 */ 1318 /* Nothing to do; it's already zero'd */ 1319 1320 /* 1321 * Initialize the pmap pools and list. 1322 */ 1323 pmap_ncpuids = ncpuids; 1324 pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 1325 COHERENCY_UNIT, 0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL); 1326 pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt", 1327 &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL); 1328 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 1329 PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL, 1330 NULL, NULL); 1331 1332 TAILQ_INIT(&pmap_all_pmaps); 1333 1334 /* Initialize the ASN logic. See also pmap_init_cpu(). */ 1335 pmap_max_asn = maxasn; 1336 1337 /* 1338 * Initialize the locks. 1339 */ 1340 rw_init(&pmap_main_lock); 1341 mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE); 1342 for (i = 0; i < __arraycount(pmap_pvh_locks); i++) { 1343 mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE); 1344 } 1345 for (i = 0; i < __arraycount(pmap_pvh_locks); i++) { 1346 mutex_init(&pmap_pmap_locks[i].locks.lock, 1347 MUTEX_DEFAULT, IPL_NONE); 1348 mutex_init(&pmap_pmap_locks[i].locks.activation_lock, 1349 MUTEX_SPIN, IPL_SCHED); 1350 } 1351 1352 /* 1353 * This must block any interrupt from which a TLB shootdown 1354 * could be issued, but must NOT block IPIs. 1355 */ 1356 mutex_init(&tlb_lock, MUTEX_SPIN, IPL_VM); 1357 1358 /* 1359 * Initialize kernel pmap. Note that all kernel mappings 1360 * have PG_ASM set, so the ASN doesn't really matter for 1361 * the kernel pmap. Also, since the kernel pmap always 1362 * references kernel_lev1map, it always has an invalid ASN 1363 * generation. 1364 */ 1365 memset(pmap_kernel(), 0, sizeof(struct pmap)); 1366 pmap_kernel()->pm_lev1map = kernel_lev1map; 1367 pmap_kernel()->pm_count = 1; 1368 /* Kernel pmap does not have ASN info. */ 1369 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list); 1370 1371 /* 1372 * Set up lwp0's PCB such that the ptbr points to the right place 1373 * and has the kernel pmap's (really unused) ASN. 1374 */ 1375 pcb = lwp_getpcb(&lwp0); 1376 pcb->pcb_hw.apcb_ptbr = 1377 ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT; 1378 pcb->pcb_hw.apcb_asn = PMAP_ASN_KERNEL; 1379 1380 struct cpu_info * const ci = curcpu(); 1381 pmap_init_cpu(ci); 1382 } 1383 1384 /* 1385 * pmap_virtual_space: [ INTERFACE ] 1386 * 1387 * Define the initial bounds of the kernel virtual address space. 1388 */ 1389 void 1390 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 1391 { 1392 1393 *vstartp = VM_MIN_KERNEL_ADDRESS; /* kernel is in K0SEG */ 1394 *vendp = VM_MAX_KERNEL_ADDRESS; /* we use pmap_growkernel */ 1395 } 1396 1397 /* 1398 * pmap_steal_memory: [ INTERFACE ] 1399 * 1400 * Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()). 1401 * This function allows for early dynamic memory allocation until the 1402 * virtual memory system has been bootstrapped. After that point, either 1403 * kmem_alloc or malloc should be used. This function works by stealing 1404 * pages from the (to be) managed page pool, then implicitly mapping the 1405 * pages (by using their k0seg addresses) and zeroing them. 1406 * 1407 * It may be used once the physical memory segments have been pre-loaded 1408 * into the vm_physmem[] array. Early memory allocation MUST use this 1409 * interface! This cannot be used after vm_page_startup(), and will 1410 * generate a panic if tried. 1411 * 1412 * Note that this memory will never be freed, and in essence it is wired 1413 * down. 1414 * 1415 * We must adjust *vstartp and/or *vendp iff we use address space 1416 * from the kernel virtual address range defined by pmap_virtual_space(). 1417 * 1418 * Note: no locking is necessary in this function. 1419 */ 1420 vaddr_t 1421 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp) 1422 { 1423 int npgs; 1424 vaddr_t va; 1425 paddr_t pa; 1426 1427 uvm_physseg_t bank; 1428 1429 size = round_page(size); 1430 npgs = atop(size); 1431 1432 #if 0 1433 printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs); 1434 #endif 1435 1436 for (bank = uvm_physseg_get_first(); 1437 uvm_physseg_valid_p(bank); 1438 bank = uvm_physseg_get_next(bank)) { 1439 if (uvm.page_init_done == true) 1440 panic("pmap_steal_memory: called _after_ bootstrap"); 1441 1442 #if 0 1443 printf(" bank %d: avail_start 0x%"PRIxPADDR", start 0x%"PRIxPADDR", " 1444 "avail_end 0x%"PRIxPADDR"\n", bank, uvm_physseg_get_avail_start(bank), 1445 uvm_physseg_get_start(bank), uvm_physseg_get_avail_end(bank)); 1446 #endif 1447 1448 if (uvm_physseg_get_avail_start(bank) != uvm_physseg_get_start(bank) || 1449 uvm_physseg_get_avail_start(bank) >= uvm_physseg_get_avail_end(bank)) 1450 continue; 1451 1452 #if 0 1453 printf(" avail_end - avail_start = 0x%"PRIxPADDR"\n", 1454 uvm_physseg_get_avail_end(bank) - uvm_physseg_get_avail_start(bank)); 1455 #endif 1456 1457 if (uvm_physseg_get_avail_end(bank) - uvm_physseg_get_avail_start(bank) 1458 < npgs) 1459 continue; 1460 1461 /* 1462 * There are enough pages here; steal them! 1463 */ 1464 pa = ptoa(uvm_physseg_get_start(bank)); 1465 uvm_physseg_unplug(atop(pa), npgs); 1466 1467 va = ALPHA_PHYS_TO_K0SEG(pa); 1468 memset((void *)va, 0, size); 1469 pmap_pages_stolen += npgs; 1470 return (va); 1471 } 1472 1473 /* 1474 * If we got here, this was no memory left. 1475 */ 1476 panic("pmap_steal_memory: no memory to steal"); 1477 } 1478 1479 /* 1480 * pmap_init: [ INTERFACE ] 1481 * 1482 * Initialize the pmap module. Called by vm_init(), to initialize any 1483 * structures that the pmap system needs to map virtual memory. 1484 * 1485 * Note: no locking is necessary in this function. 1486 */ 1487 void 1488 pmap_init(void) 1489 { 1490 1491 #ifdef DEBUG 1492 if (pmapdebug & PDB_FOLLOW) 1493 printf("pmap_init()\n"); 1494 #endif 1495 1496 /* initialize protection array */ 1497 alpha_protection_init(); 1498 1499 /* Initialize TLB handling. */ 1500 pmap_tlb_init(); 1501 1502 /* 1503 * Set a low water mark on the pv_entry pool, so that we are 1504 * more likely to have these around even in extreme memory 1505 * starvation. 1506 */ 1507 pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat); 1508 1509 /* 1510 * Now it is safe to enable pv entry recording. 1511 */ 1512 pmap_initialized = true; 1513 1514 #if 0 1515 for (uvm_physseg_t bank = uvm_physseg_get_first(); 1516 uvm_physseg_valid_p(bank); 1517 bank = uvm_physseg_get_next(bank)) { 1518 printf("bank %d\n", bank); 1519 printf("\tstart = 0x%lx\n", ptoa(uvm_physseg_get_start(bank))); 1520 printf("\tend = 0x%lx\n", ptoa(uvm_physseg_get_end(bank))); 1521 printf("\tavail_start = 0x%lx\n", 1522 ptoa(uvm_physseg_get_avail_start(bank))); 1523 printf("\tavail_end = 0x%lx\n", 1524 ptoa(uvm_physseg_get_avail_end(bank))); 1525 } 1526 #endif 1527 } 1528 1529 /* 1530 * pmap_create: [ INTERFACE ] 1531 * 1532 * Create and return a physical map. 1533 * 1534 * Note: no locking is necessary in this function. 1535 */ 1536 pmap_t 1537 pmap_create(void) 1538 { 1539 pmap_t pmap; 1540 int i; 1541 1542 #ifdef DEBUG 1543 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 1544 printf("pmap_create()\n"); 1545 #endif 1546 1547 pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK); 1548 memset(pmap, 0, sizeof(*pmap)); 1549 1550 pmap->pm_count = 1; 1551 1552 /* 1553 * There are only kernel mappings at this point; give the pmap 1554 * the kernel ASN. This will be initialized to correct values 1555 * when the pmap is activated. 1556 */ 1557 for (i = 0; i < pmap_ncpuids; i++) { 1558 pmap->pm_asni[i].pma_asn = PMAP_ASN_KERNEL; 1559 pmap->pm_asni[i].pma_asngen = PMAP_ASNGEN_INVALID; 1560 } 1561 1562 try_again: 1563 rw_enter(&pmap_growkernel_lock, RW_READER); 1564 1565 pmap->pm_lev1map = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT); 1566 if (__predict_false(pmap->pm_lev1map == NULL)) { 1567 rw_exit(&pmap_growkernel_lock); 1568 (void) kpause("pmap_create", false, hz >> 2, NULL); 1569 goto try_again; 1570 } 1571 1572 mutex_enter(&pmap_all_pmaps_lock); 1573 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list); 1574 mutex_exit(&pmap_all_pmaps_lock); 1575 1576 rw_exit(&pmap_growkernel_lock); 1577 1578 return (pmap); 1579 } 1580 1581 /* 1582 * pmap_destroy: [ INTERFACE ] 1583 * 1584 * Drop the reference count on the specified pmap, releasing 1585 * all resources if the reference count drops to zero. 1586 */ 1587 void 1588 pmap_destroy(pmap_t pmap) 1589 { 1590 1591 #ifdef DEBUG 1592 if (pmapdebug & PDB_FOLLOW) 1593 printf("pmap_destroy(%p)\n", pmap); 1594 #endif 1595 1596 PMAP_MP(membar_exit()); 1597 if (atomic_dec_ulong_nv(&pmap->pm_count) > 0) 1598 return; 1599 1600 rw_enter(&pmap_growkernel_lock, RW_READER); 1601 1602 /* 1603 * Remove it from the global list of all pmaps. 1604 */ 1605 mutex_enter(&pmap_all_pmaps_lock); 1606 TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list); 1607 mutex_exit(&pmap_all_pmaps_lock); 1608 1609 pool_cache_put(&pmap_l1pt_cache, pmap->pm_lev1map); 1610 pmap->pm_lev1map = NULL; 1611 1612 rw_exit(&pmap_growkernel_lock); 1613 1614 pool_cache_put(&pmap_pmap_cache, pmap); 1615 } 1616 1617 /* 1618 * pmap_reference: [ INTERFACE ] 1619 * 1620 * Add a reference to the specified pmap. 1621 */ 1622 void 1623 pmap_reference(pmap_t pmap) 1624 { 1625 1626 #ifdef DEBUG 1627 if (pmapdebug & PDB_FOLLOW) 1628 printf("pmap_reference(%p)\n", pmap); 1629 #endif 1630 1631 atomic_inc_ulong(&pmap->pm_count); 1632 PMAP_MP(membar_enter()); 1633 } 1634 1635 /* 1636 * pmap_remove: [ INTERFACE ] 1637 * 1638 * Remove the given range of addresses from the specified map. 1639 * 1640 * It is assumed that the start and end are properly 1641 * rounded to the page size. 1642 */ 1643 static void 1644 pmap_remove_internal(pmap_t pmap, vaddr_t sva, vaddr_t eva, 1645 struct pmap_tlb_context * const tlbctx) 1646 { 1647 pt_entry_t *l1pte, *l2pte, *l3pte; 1648 pt_entry_t *saved_l2pte, *saved_l3pte; 1649 vaddr_t l1eva, l2eva, l3vptva; 1650 pt_entry_t pte_bits; 1651 1652 #ifdef DEBUG 1653 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 1654 printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva); 1655 #endif 1656 1657 /* 1658 * If this is the kernel pmap, we can use a faster method 1659 * for accessing the PTEs (since the PT pages are always 1660 * resident). 1661 * 1662 * Note that this routine should NEVER be called from an 1663 * interrupt context; pmap_kremove() is used for that. 1664 */ 1665 if (pmap == pmap_kernel()) { 1666 PMAP_MAP_TO_HEAD_LOCK(); 1667 PMAP_LOCK(pmap); 1668 1669 while (sva < eva) { 1670 l3pte = PMAP_KERNEL_PTE(sva); 1671 if (pmap_pte_v(l3pte)) { 1672 pte_bits = pmap_remove_mapping(pmap, sva, 1673 l3pte, true, NULL, tlbctx); 1674 pmap_tlb_shootdown(pmap, sva, pte_bits, 1675 tlbctx); 1676 } 1677 sva += PAGE_SIZE; 1678 } 1679 1680 PMAP_MAP_TO_HEAD_UNLOCK(); 1681 PMAP_UNLOCK(pmap); 1682 pmap_tlb_shootnow(tlbctx); 1683 pmap_tlb_ptpage_drain(tlbctx); 1684 TLB_COUNT(reason_remove_kernel); 1685 1686 return; 1687 } 1688 1689 KASSERT(sva < VM_MAXUSER_ADDRESS); 1690 KASSERT(eva <= VM_MAXUSER_ADDRESS); 1691 KASSERT(pmap->pm_lev1map != kernel_lev1map); 1692 1693 PMAP_MAP_TO_HEAD_LOCK(); 1694 PMAP_LOCK(pmap); 1695 1696 l1pte = pmap_l1pte(pmap, sva); 1697 1698 for (; sva < eva; sva = l1eva, l1pte++) { 1699 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1700 if (pmap_pte_v(l1pte)) { 1701 saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte); 1702 1703 /* 1704 * Add a reference to the L2 table so it won't 1705 * get removed from under us. 1706 */ 1707 pmap_physpage_addref(saved_l2pte); 1708 1709 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1710 l2eva = 1711 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1712 if (pmap_pte_v(l2pte)) { 1713 saved_l3pte = l3pte = 1714 pmap_l3pte(pmap, sva, l2pte); 1715 1716 /* 1717 * Add a reference to the L3 table so 1718 * it won't get removed from under us. 1719 */ 1720 pmap_physpage_addref(saved_l3pte); 1721 1722 /* 1723 * Remember this sva; if the L3 table 1724 * gets removed, we need to invalidate 1725 * the VPT TLB entry for it. 1726 */ 1727 l3vptva = sva; 1728 1729 for (; sva < l2eva && sva < eva; 1730 sva += PAGE_SIZE, l3pte++) { 1731 if (!pmap_pte_v(l3pte)) { 1732 continue; 1733 } 1734 pte_bits = 1735 pmap_remove_mapping( 1736 pmap, sva, 1737 l3pte, true, 1738 NULL, tlbctx); 1739 pmap_tlb_shootdown(pmap, 1740 sva, pte_bits, tlbctx); 1741 } 1742 1743 /* 1744 * Remove the reference to the L3 1745 * table that we added above. This 1746 * may free the L3 table. 1747 */ 1748 pmap_l3pt_delref(pmap, l3vptva, 1749 saved_l3pte, tlbctx); 1750 } 1751 } 1752 1753 /* 1754 * Remove the reference to the L2 table that we 1755 * added above. This may free the L2 table. 1756 */ 1757 pmap_l2pt_delref(pmap, l1pte, saved_l2pte, tlbctx); 1758 } 1759 } 1760 1761 PMAP_MAP_TO_HEAD_UNLOCK(); 1762 PMAP_UNLOCK(pmap); 1763 pmap_tlb_shootnow(tlbctx); 1764 pmap_tlb_ptpage_drain(tlbctx); 1765 TLB_COUNT(reason_remove_user); 1766 } 1767 1768 void 1769 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva) 1770 { 1771 struct pmap_tlb_context tlbctx; 1772 1773 pmap_tlb_context_init(&tlbctx); 1774 pmap_remove_internal(pmap, sva, eva, &tlbctx); 1775 } 1776 1777 /* 1778 * pmap_page_protect: [ INTERFACE ] 1779 * 1780 * Lower the permission for all mappings to a given page to 1781 * the permissions specified. 1782 */ 1783 void 1784 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 1785 { 1786 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 1787 pv_entry_t pv, nextpv; 1788 pt_entry_t opte; 1789 kmutex_t *lock; 1790 struct pmap_tlb_context tlbctx; 1791 1792 #ifdef DEBUG 1793 paddr_t pa = VM_PAGE_TO_PHYS(pg); 1794 1795 1796 if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) || 1797 (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE))) 1798 printf("pmap_page_protect(%p, %x)\n", pg, prot); 1799 #endif 1800 1801 pmap_tlb_context_init(&tlbctx); 1802 1803 switch (prot) { 1804 case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE: 1805 case VM_PROT_READ|VM_PROT_WRITE: 1806 return; 1807 1808 /* copy_on_write */ 1809 case VM_PROT_READ|VM_PROT_EXECUTE: 1810 case VM_PROT_READ: 1811 PMAP_HEAD_TO_MAP_LOCK(); 1812 lock = pmap_pvh_lock(pg); 1813 mutex_enter(lock); 1814 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 1815 PMAP_LOCK(pv->pv_pmap); 1816 opte = atomic_load_relaxed(pv->pv_pte); 1817 if (opte & (PG_KWE | PG_UWE)) { 1818 atomic_store_relaxed(pv->pv_pte, 1819 opte & ~(PG_KWE | PG_UWE)); 1820 pmap_tlb_shootdown_pv(pv, opte, &tlbctx); 1821 } 1822 PMAP_UNLOCK(pv->pv_pmap); 1823 } 1824 mutex_exit(lock); 1825 PMAP_HEAD_TO_MAP_UNLOCK(); 1826 pmap_tlb_shootnow(&tlbctx); 1827 TLB_COUNT(reason_page_protect_read); 1828 return; 1829 1830 /* remove_all */ 1831 default: 1832 break; 1833 } 1834 1835 PMAP_HEAD_TO_MAP_LOCK(); 1836 lock = pmap_pvh_lock(pg); 1837 mutex_enter(lock); 1838 for (pv = md->pvh_list; pv != NULL; pv = nextpv) { 1839 pt_entry_t pte_bits; 1840 1841 nextpv = pv->pv_next; 1842 1843 PMAP_LOCK(pv->pv_pmap); 1844 pte_bits = pmap_remove_mapping(pv->pv_pmap, pv->pv_va, 1845 pv->pv_pte, false, NULL, &tlbctx); 1846 pmap_tlb_shootdown_pv(pv, pte_bits, &tlbctx); 1847 PMAP_UNLOCK(pv->pv_pmap); 1848 } 1849 mutex_exit(lock); 1850 PMAP_HEAD_TO_MAP_UNLOCK(); 1851 pmap_tlb_shootnow(&tlbctx); 1852 pmap_tlb_ptpage_drain(&tlbctx); 1853 TLB_COUNT(reason_page_protect_none); 1854 } 1855 1856 /* 1857 * pmap_protect: [ INTERFACE ] 1858 * 1859 * Set the physical protection on the specified range of this map 1860 * as requested. 1861 */ 1862 void 1863 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1864 { 1865 pt_entry_t *l1pte, *l2pte, *l3pte, opte; 1866 vaddr_t l1eva, l2eva; 1867 struct pmap_tlb_context tlbctx; 1868 1869 #ifdef DEBUG 1870 if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) 1871 printf("pmap_protect(%p, %lx, %lx, %x)\n", 1872 pmap, sva, eva, prot); 1873 #endif 1874 1875 pmap_tlb_context_init(&tlbctx); 1876 1877 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1878 pmap_remove_internal(pmap, sva, eva, &tlbctx); 1879 return; 1880 } 1881 1882 const pt_entry_t bits = pte_prot(pmap, prot); 1883 1884 PMAP_LOCK(pmap); 1885 1886 l1pte = pmap_l1pte(pmap, sva); 1887 for (; sva < eva; sva = l1eva, l1pte++) { 1888 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1889 if (pmap_pte_v(l1pte)) { 1890 l2pte = pmap_l2pte(pmap, sva, l1pte); 1891 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1892 l2eva = 1893 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1894 if (pmap_pte_v(l2pte)) { 1895 l3pte = pmap_l3pte(pmap, sva, l2pte); 1896 for (; sva < l2eva && sva < eva; 1897 sva += PAGE_SIZE, l3pte++) { 1898 if (pmap_pte_v(l3pte) && 1899 pmap_pte_prot_chg(l3pte, 1900 bits)) { 1901 opte = atomic_load_relaxed(l3pte); 1902 pmap_pte_set_prot(l3pte, 1903 bits); 1904 pmap_tlb_shootdown(pmap, 1905 sva, opte, &tlbctx); 1906 } 1907 } 1908 } 1909 } 1910 } 1911 } 1912 1913 PMAP_UNLOCK(pmap); 1914 pmap_tlb_shootnow(&tlbctx); 1915 TLB_COUNT(reason_protect); 1916 } 1917 1918 /* 1919 * pmap_enter_tlb_shootdown: 1920 * 1921 * Carry out a TLB shootdown on behalf of a pmap_enter() 1922 * or a pmap_kenter_pa(). This is factored out separately 1923 * because we expect it to be not a common case. 1924 */ 1925 static void __noinline 1926 pmap_enter_tlb_shootdown(pmap_t const pmap, vaddr_t const va, 1927 pt_entry_t const pte_bits, bool locked) 1928 { 1929 struct pmap_tlb_context tlbctx; 1930 1931 pmap_tlb_context_init(&tlbctx); 1932 pmap_tlb_shootdown(pmap, va, pte_bits, &tlbctx); 1933 if (locked) { 1934 PMAP_UNLOCK(pmap); 1935 } 1936 pmap_tlb_shootnow(&tlbctx); 1937 } 1938 1939 /* 1940 * pmap_enter_l2pt_delref: 1941 * 1942 * Release a reference on an L2 PT page for pmap_enter(). 1943 * This is factored out separately becacause we expect it 1944 * to be a rare case. 1945 */ 1946 static void __noinline 1947 pmap_enter_l2pt_delref(pmap_t const pmap, pt_entry_t * const l1pte, 1948 pt_entry_t * const l2pte) 1949 { 1950 struct pmap_tlb_context tlbctx; 1951 1952 /* 1953 * PALcode may have tried to service a TLB miss with 1954 * this L2 PTE, so we need to make sure we don't actully 1955 * free the PT page untl we've shot down any TLB entries 1956 * for this VPT index. 1957 */ 1958 1959 pmap_tlb_context_init(&tlbctx); 1960 pmap_l2pt_delref(pmap, l1pte, l2pte, &tlbctx); 1961 PMAP_UNLOCK(pmap); 1962 pmap_tlb_shootnow(&tlbctx); 1963 pmap_tlb_ptpage_drain(&tlbctx); 1964 TLB_COUNT(reason_enter_l2pt_delref); 1965 } 1966 1967 /* 1968 * pmap_enter_l3pt_delref: 1969 * 1970 * Release a reference on an L3 PT page for pmap_enter(). 1971 * This is factored out separately becacause we expect it 1972 * to be a rare case. 1973 */ 1974 static void __noinline 1975 pmap_enter_l3pt_delref(pmap_t const pmap, vaddr_t const va, 1976 pt_entry_t * const pte) 1977 { 1978 struct pmap_tlb_context tlbctx; 1979 1980 /* 1981 * PALcode may have tried to service a TLB miss with 1982 * this PTE, so we need to make sure we don't actully 1983 * free the PT page untl we've shot down any TLB entries 1984 * for this VPT index. 1985 */ 1986 1987 pmap_tlb_context_init(&tlbctx); 1988 pmap_l3pt_delref(pmap, va, pte, &tlbctx); 1989 PMAP_UNLOCK(pmap); 1990 pmap_tlb_shootnow(&tlbctx); 1991 pmap_tlb_ptpage_drain(&tlbctx); 1992 TLB_COUNT(reason_enter_l3pt_delref); 1993 } 1994 1995 /* 1996 * pmap_enter: [ INTERFACE ] 1997 * 1998 * Insert the given physical page (p) at 1999 * the specified virtual address (v) in the 2000 * target physical map with the protection requested. 2001 * 2002 * If specified, the page will be wired down, meaning 2003 * that the related pte can not be reclaimed. 2004 * 2005 * Note: This is the only routine which MAY NOT lazy-evaluate 2006 * or lose information. That is, this routine must actually 2007 * insert this page into the given map NOW. 2008 */ 2009 int 2010 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 2011 { 2012 pt_entry_t *pte, npte, opte; 2013 pv_entry_t opv = NULL; 2014 paddr_t opa; 2015 bool tflush = false; 2016 int error = 0; 2017 kmutex_t *lock; 2018 2019 #ifdef DEBUG 2020 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 2021 printf("pmap_enter(%p, %lx, %lx, %x, %x)\n", 2022 pmap, va, pa, prot, flags); 2023 #endif 2024 struct vm_page * const pg = PHYS_TO_VM_PAGE(pa); 2025 const bool wired = (flags & PMAP_WIRED) != 0; 2026 2027 PMAP_MAP_TO_HEAD_LOCK(); 2028 PMAP_LOCK(pmap); 2029 2030 if (pmap == pmap_kernel()) { 2031 KASSERT(va >= VM_MIN_KERNEL_ADDRESS); 2032 pte = PMAP_KERNEL_PTE(va); 2033 } else { 2034 pt_entry_t *l1pte, *l2pte; 2035 2036 KASSERT(va < VM_MAXUSER_ADDRESS); 2037 KASSERT(pmap->pm_lev1map != kernel_lev1map); 2038 2039 /* 2040 * Check to see if the level 1 PTE is valid, and 2041 * allocate a new level 2 page table page if it's not. 2042 * A reference will be added to the level 2 table when 2043 * the level 3 table is created. 2044 */ 2045 l1pte = pmap_l1pte(pmap, va); 2046 if (pmap_pte_v(l1pte) == 0) { 2047 pmap_physpage_addref(l1pte); 2048 error = pmap_ptpage_alloc(l1pte, PGU_L2PT); 2049 if (error) { 2050 pmap_l1pt_delref(pmap, l1pte); 2051 if (flags & PMAP_CANFAIL) 2052 goto out; 2053 panic("pmap_enter: unable to create L2 PT " 2054 "page"); 2055 } 2056 #ifdef DEBUG 2057 if (pmapdebug & PDB_PTPAGE) 2058 printf("pmap_enter: new level 2 table at " 2059 "0x%lx\n", pmap_pte_pa(l1pte)); 2060 #endif 2061 } 2062 2063 /* 2064 * Check to see if the level 2 PTE is valid, and 2065 * allocate a new level 3 page table page if it's not. 2066 * A reference will be added to the level 3 table when 2067 * the mapping is validated. 2068 */ 2069 l2pte = pmap_l2pte(pmap, va, l1pte); 2070 if (pmap_pte_v(l2pte) == 0) { 2071 pmap_physpage_addref(l2pte); 2072 error = pmap_ptpage_alloc(l2pte, PGU_L3PT); 2073 if (error) { 2074 /* unlocks pmap */ 2075 pmap_enter_l2pt_delref(pmap, l1pte, l2pte); 2076 if (flags & PMAP_CANFAIL) { 2077 PMAP_LOCK(pmap); 2078 goto out; 2079 } 2080 panic("pmap_enter: unable to create L3 PT " 2081 "page"); 2082 } 2083 #ifdef DEBUG 2084 if (pmapdebug & PDB_PTPAGE) 2085 printf("pmap_enter: new level 3 table at " 2086 "0x%lx\n", pmap_pte_pa(l2pte)); 2087 #endif 2088 } 2089 2090 /* 2091 * Get the PTE that will map the page. 2092 */ 2093 pte = pmap_l3pte(pmap, va, l2pte); 2094 } 2095 2096 /* Remember all of the old PTE; used for TBI check later. */ 2097 opte = atomic_load_relaxed(pte); 2098 2099 /* 2100 * Check to see if the old mapping is valid. If not, validate the 2101 * new one immediately. 2102 */ 2103 if ((opte & PG_V) == 0) { 2104 /* No TLB invalidatons needed for new mappings. */ 2105 2106 if (pmap != pmap_kernel()) { 2107 /* 2108 * New mappings gain a reference on the level 3 2109 * table. 2110 */ 2111 pmap_physpage_addref(pte); 2112 } 2113 goto validate_enterpv; 2114 } 2115 2116 opa = pmap_pte_pa(pte); 2117 2118 if (opa == pa) { 2119 /* 2120 * Mapping has not changed; must be a protection or 2121 * wiring change. 2122 */ 2123 if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) { 2124 #ifdef DEBUG 2125 if (pmapdebug & PDB_ENTER) 2126 printf("pmap_enter: wiring change -> %d\n", 2127 wired); 2128 #endif 2129 /* Adjust the wiring count. */ 2130 if (wired) 2131 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 2132 else 2133 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2134 } 2135 2136 /* Set the PTE. */ 2137 goto validate; 2138 } 2139 2140 /* 2141 * The mapping has changed. We need to invalidate the 2142 * old mapping before creating the new one. 2143 */ 2144 #ifdef DEBUG 2145 if (pmapdebug & PDB_ENTER) 2146 printf("pmap_enter: removing old mapping 0x%lx\n", va); 2147 #endif 2148 if (pmap != pmap_kernel()) { 2149 /* 2150 * Gain an extra reference on the level 3 table. 2151 * pmap_remove_mapping() will delete a reference, 2152 * and we don't want the table to be erroneously 2153 * freed. 2154 */ 2155 pmap_physpage_addref(pte); 2156 } 2157 /* Already have the bits from opte above. */ 2158 (void) pmap_remove_mapping(pmap, va, pte, true, &opv, NULL); 2159 2160 validate_enterpv: 2161 /* Enter the mapping into the pv_table if appropriate. */ 2162 if (pg != NULL) { 2163 error = pmap_pv_enter(pmap, pg, va, pte, true, opv); 2164 if (error) { 2165 /* This can only fail if opv == NULL */ 2166 KASSERT(opv == NULL); 2167 2168 /* unlocks pmap */ 2169 pmap_enter_l3pt_delref(pmap, va, pte); 2170 if (flags & PMAP_CANFAIL) { 2171 PMAP_LOCK(pmap); 2172 goto out; 2173 } 2174 panic("pmap_enter: unable to enter mapping in PV " 2175 "table"); 2176 } 2177 opv = NULL; 2178 } 2179 2180 /* Increment counters. */ 2181 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 2182 if (wired) 2183 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 2184 2185 validate: 2186 /* Build the new PTE. */ 2187 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V; 2188 if (pg != NULL) { 2189 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2190 int attrs; 2191 2192 KASSERT(((flags & VM_PROT_ALL) & ~prot) == 0); 2193 2194 lock = pmap_pvh_lock(pg); 2195 mutex_enter(lock); 2196 if (flags & VM_PROT_WRITE) 2197 md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 2198 else if (flags & VM_PROT_ALL) 2199 md->pvh_attrs |= PGA_REFERENCED; 2200 attrs = md->pvh_attrs; 2201 mutex_exit(lock); 2202 2203 /* Set up referenced/modified emulation for new mapping. */ 2204 if ((attrs & PGA_REFERENCED) == 0) 2205 npte |= PG_FOR | PG_FOW | PG_FOE; 2206 else if ((attrs & PGA_MODIFIED) == 0) 2207 npte |= PG_FOW; 2208 2209 /* 2210 * Mapping was entered on PV list. 2211 */ 2212 npte |= PG_PVLIST; 2213 } 2214 if (wired) 2215 npte |= PG_WIRED; 2216 #ifdef DEBUG 2217 if (pmapdebug & PDB_ENTER) 2218 printf("pmap_enter: new pte = 0x%lx\n", npte); 2219 #endif 2220 2221 /* 2222 * If the HW / PALcode portion of the new PTE is the same as the 2223 * old PTE, no TBI is necessary. 2224 */ 2225 if (opte & PG_V) { 2226 tflush = PG_PALCODE(opte) != PG_PALCODE(npte); 2227 } 2228 2229 /* Set the new PTE. */ 2230 atomic_store_relaxed(pte, npte); 2231 2232 out: 2233 PMAP_MAP_TO_HEAD_UNLOCK(); 2234 2235 /* 2236 * Invalidate the TLB entry for this VA and any appropriate 2237 * caches. 2238 */ 2239 if (tflush) { 2240 /* unlocks pmap */ 2241 pmap_enter_tlb_shootdown(pmap, va, opte, true); 2242 if (pmap == pmap_kernel()) { 2243 TLB_COUNT(reason_enter_kernel); 2244 } else { 2245 TLB_COUNT(reason_enter_user); 2246 } 2247 } else { 2248 PMAP_UNLOCK(pmap); 2249 } 2250 2251 if (opv) 2252 pmap_pv_free(opv); 2253 2254 return error; 2255 } 2256 2257 /* 2258 * pmap_kenter_pa: [ INTERFACE ] 2259 * 2260 * Enter a va -> pa mapping into the kernel pmap without any 2261 * physical->virtual tracking. 2262 * 2263 * Note: no locking is necessary in this function. 2264 */ 2265 void 2266 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 2267 { 2268 pmap_t const pmap = pmap_kernel(); 2269 2270 #ifdef DEBUG 2271 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 2272 printf("pmap_kenter_pa(%lx, %lx, %x)\n", 2273 va, pa, prot); 2274 #endif 2275 2276 KASSERT(va >= VM_MIN_KERNEL_ADDRESS); 2277 2278 pt_entry_t * const pte = PMAP_KERNEL_PTE(va); 2279 2280 /* Build the new PTE. */ 2281 const pt_entry_t npte = 2282 ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) | 2283 PG_V | PG_WIRED; 2284 2285 /* Set the new PTE. */ 2286 const pt_entry_t opte = atomic_load_relaxed(pte); 2287 atomic_store_relaxed(pte, npte); 2288 2289 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 2290 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 2291 2292 /* 2293 * There should not have been anything here, previously, 2294 * so we can skip TLB shootdowns, etc. in the common case. 2295 */ 2296 if (__predict_false(opte & PG_V)) { 2297 const pt_entry_t diff = npte ^ opte; 2298 2299 printf_nolog("%s: mapping already present\n", __func__); 2300 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 2301 if (diff & PG_WIRED) 2302 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2303 /* XXX Can't handle this case. */ 2304 if (diff & PG_PVLIST) 2305 panic("pmap_kenter_pa: old mapping was managed"); 2306 2307 pmap_enter_tlb_shootdown(pmap_kernel(), va, opte, false); 2308 TLB_COUNT(reason_kenter); 2309 } 2310 } 2311 2312 /* 2313 * pmap_kremove: [ INTERFACE ] 2314 * 2315 * Remove a mapping entered with pmap_kenter_pa() starting at va, 2316 * for size bytes (assumed to be page rounded). 2317 */ 2318 void 2319 pmap_kremove(vaddr_t va, vsize_t size) 2320 { 2321 pt_entry_t *pte, opte; 2322 pmap_t const pmap = pmap_kernel(); 2323 struct pmap_tlb_context tlbctx; 2324 int count = 0; 2325 2326 #ifdef DEBUG 2327 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 2328 printf("pmap_kremove(%lx, %lx)\n", 2329 va, size); 2330 #endif 2331 2332 pmap_tlb_context_init(&tlbctx); 2333 2334 KASSERT(va >= VM_MIN_KERNEL_ADDRESS); 2335 2336 for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) { 2337 pte = PMAP_KERNEL_PTE(va); 2338 opte = atomic_load_relaxed(pte); 2339 if (opte & PG_V) { 2340 KASSERT((opte & PG_PVLIST) == 0); 2341 2342 /* Zap the mapping. */ 2343 atomic_store_relaxed(pte, PG_NV); 2344 pmap_tlb_shootdown(pmap, va, opte, &tlbctx); 2345 2346 count++; 2347 } 2348 } 2349 2350 /* Update stats. */ 2351 if (__predict_true(count != 0)) { 2352 PMAP_STAT_DECR(pmap->pm_stats.resident_count, count); 2353 PMAP_STAT_DECR(pmap->pm_stats.wired_count, count); 2354 } 2355 2356 pmap_tlb_shootnow(&tlbctx); 2357 TLB_COUNT(reason_kremove); 2358 } 2359 2360 /* 2361 * pmap_unwire: [ INTERFACE ] 2362 * 2363 * Clear the wired attribute for a map/virtual-address pair. 2364 * 2365 * The mapping must already exist in the pmap. 2366 */ 2367 void 2368 pmap_unwire(pmap_t pmap, vaddr_t va) 2369 { 2370 pt_entry_t *pte; 2371 2372 #ifdef DEBUG 2373 if (pmapdebug & PDB_FOLLOW) 2374 printf("pmap_unwire(%p, %lx)\n", pmap, va); 2375 #endif 2376 2377 PMAP_LOCK(pmap); 2378 2379 pte = pmap_l3pte(pmap, va, NULL); 2380 2381 KASSERT(pte != NULL); 2382 KASSERT(pmap_pte_v(pte)); 2383 2384 /* 2385 * If wiring actually changed (always?) clear the wire bit and 2386 * update the wire count. Note that wiring is not a hardware 2387 * characteristic so there is no need to invalidate the TLB. 2388 */ 2389 if (pmap_pte_w_chg(pte, 0)) { 2390 pmap_pte_set_w(pte, false); 2391 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2392 } 2393 #ifdef DEBUG 2394 else { 2395 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 2396 "didn't change!\n", pmap, va); 2397 } 2398 #endif 2399 2400 PMAP_UNLOCK(pmap); 2401 } 2402 2403 /* 2404 * pmap_extract: [ INTERFACE ] 2405 * 2406 * Extract the physical address associated with the given 2407 * pmap/virtual address pair. 2408 */ 2409 bool 2410 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap) 2411 { 2412 pt_entry_t *l1pte, *l2pte, *l3pte; 2413 paddr_t pa; 2414 2415 #ifdef DEBUG 2416 if (pmapdebug & PDB_FOLLOW) 2417 printf("pmap_extract(%p, %lx) -> ", pmap, va); 2418 #endif 2419 2420 /* 2421 * Take a faster path for the kernel pmap. Avoids locking, 2422 * handles K0SEG. 2423 */ 2424 if (__predict_true(pmap == pmap_kernel())) { 2425 if (__predict_true(vtophys_internal(va, pap))) { 2426 #ifdef DEBUG 2427 if (pmapdebug & PDB_FOLLOW) 2428 printf("0x%lx (kernel vtophys)\n", pa); 2429 #endif 2430 return true; 2431 } 2432 #ifdef DEBUG 2433 if (pmapdebug & PDB_FOLLOW) 2434 printf("failed (kernel vtophys)\n"); 2435 #endif 2436 return false; 2437 } 2438 2439 PMAP_LOCK(pmap); 2440 2441 l1pte = pmap_l1pte(pmap, va); 2442 if (pmap_pte_v(l1pte) == 0) 2443 goto out; 2444 2445 l2pte = pmap_l2pte(pmap, va, l1pte); 2446 if (pmap_pte_v(l2pte) == 0) 2447 goto out; 2448 2449 l3pte = pmap_l3pte(pmap, va, l2pte); 2450 if (pmap_pte_v(l3pte) == 0) 2451 goto out; 2452 2453 pa = pmap_pte_pa(l3pte) | (va & PGOFSET); 2454 PMAP_UNLOCK(pmap); 2455 if (pap != NULL) 2456 *pap = pa; 2457 #ifdef DEBUG 2458 if (pmapdebug & PDB_FOLLOW) 2459 printf("0x%lx\n", pa); 2460 #endif 2461 return (true); 2462 2463 out: 2464 PMAP_UNLOCK(pmap); 2465 #ifdef DEBUG 2466 if (pmapdebug & PDB_FOLLOW) 2467 printf("failed\n"); 2468 #endif 2469 return (false); 2470 } 2471 2472 /* 2473 * pmap_copy: [ INTERFACE ] 2474 * 2475 * Copy the mapping range specified by src_addr/len 2476 * from the source map to the range dst_addr/len 2477 * in the destination map. 2478 * 2479 * This routine is only advisory and need not do anything. 2480 */ 2481 /* call deleted in <machine/pmap.h> */ 2482 2483 /* 2484 * pmap_update: [ INTERFACE ] 2485 * 2486 * Require that all active physical maps contain no 2487 * incorrect entries NOW, by processing any deferred 2488 * pmap operations. 2489 */ 2490 /* call deleted in <machine/pmap.h> */ 2491 2492 /* 2493 * pmap_activate: [ INTERFACE ] 2494 * 2495 * Activate the pmap used by the specified process. This includes 2496 * reloading the MMU context of the current process, and marking 2497 * the pmap in use by the processor. 2498 */ 2499 void 2500 pmap_activate(struct lwp *l) 2501 { 2502 struct pmap * const pmap = l->l_proc->p_vmspace->vm_map.pmap; 2503 struct pcb * const pcb = lwp_getpcb(l); 2504 2505 #ifdef DEBUG 2506 if (pmapdebug & PDB_FOLLOW) 2507 printf("pmap_activate(%p)\n", l); 2508 #endif 2509 2510 KASSERT(kpreempt_disabled()); 2511 2512 struct cpu_info * const ci = curcpu(); 2513 2514 KASSERT(l == ci->ci_curlwp); 2515 2516 u_long const old_ptbr = pcb->pcb_hw.apcb_ptbr; 2517 u_int const old_asn = pcb->pcb_hw.apcb_asn; 2518 2519 /* 2520 * We hold the activation lock to synchronize with TLB shootdown. 2521 * The kernel pmap does not require those tests because shootdowns 2522 * for the kernel pmap are always sent to all CPUs. 2523 */ 2524 if (pmap != pmap_kernel()) { 2525 PMAP_ACT_LOCK(pmap); 2526 pcb->pcb_hw.apcb_asn = pmap_asn_alloc(pmap, ci); 2527 atomic_or_ulong(&pmap->pm_cpus, (1UL << ci->ci_cpuid)); 2528 } else { 2529 pcb->pcb_hw.apcb_asn = PMAP_ASN_KERNEL; 2530 } 2531 pcb->pcb_hw.apcb_ptbr = 2532 ALPHA_K0SEG_TO_PHYS((vaddr_t)pmap->pm_lev1map) >> PGSHIFT; 2533 2534 /* 2535 * Check to see if the ASN or page table base has changed; if 2536 * so, switch to our own context again so that it will take 2537 * effect. 2538 * 2539 * We test ASN first because it's the most likely value to change. 2540 */ 2541 if (old_asn != pcb->pcb_hw.apcb_asn || 2542 old_ptbr != pcb->pcb_hw.apcb_ptbr) { 2543 if (old_asn != pcb->pcb_hw.apcb_asn && 2544 old_ptbr != pcb->pcb_hw.apcb_ptbr) { 2545 TLB_COUNT(activate_both_change); 2546 } else if (old_asn != pcb->pcb_hw.apcb_asn) { 2547 TLB_COUNT(activate_asn_change); 2548 } else { 2549 TLB_COUNT(activate_ptbr_change); 2550 } 2551 (void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr); 2552 TLB_COUNT(activate_swpctx); 2553 } else { 2554 TLB_COUNT(activate_skip_swpctx); 2555 } 2556 2557 pmap_reference(pmap); 2558 ci->ci_pmap = pmap; 2559 2560 if (pmap != pmap_kernel()) { 2561 PMAP_ACT_UNLOCK(pmap); 2562 } 2563 } 2564 2565 /* 2566 * pmap_deactivate: [ INTERFACE ] 2567 * 2568 * Mark that the pmap used by the specified process is no longer 2569 * in use by the processor. 2570 */ 2571 void 2572 pmap_deactivate(struct lwp *l) 2573 { 2574 struct pmap * const pmap = l->l_proc->p_vmspace->vm_map.pmap; 2575 2576 #ifdef DEBUG 2577 if (pmapdebug & PDB_FOLLOW) 2578 printf("pmap_deactivate(%p)\n", l); 2579 #endif 2580 2581 KASSERT(kpreempt_disabled()); 2582 2583 struct cpu_info * const ci = curcpu(); 2584 2585 KASSERT(l == ci->ci_curlwp); 2586 KASSERT(pmap == ci->ci_pmap); 2587 2588 /* 2589 * There is no need to switch to a different PTBR here, 2590 * because a pmap_activate() or SWPCTX is guaranteed 2591 * before whatever lev1map we're on now is invalidated 2592 * or before user space is accessed again. 2593 * 2594 * Because only kernel mappings will be accessed before the 2595 * next pmap_activate() call, we consider our CPU to be on 2596 * the kernel pmap. 2597 */ 2598 ci->ci_pmap = pmap_kernel(); 2599 KASSERT(atomic_load_relaxed(&pmap->pm_count) > 1); 2600 pmap_destroy(pmap); 2601 } 2602 2603 /* 2604 * pmap_zero_page: [ INTERFACE ] 2605 * 2606 * Zero the specified (machine independent) page by mapping the page 2607 * into virtual memory and clear its contents, one machine dependent 2608 * page at a time. 2609 * 2610 * Note: no locking is necessary in this function. 2611 */ 2612 void 2613 pmap_zero_page(paddr_t phys) 2614 { 2615 u_long *p0, *p1, *pend; 2616 2617 #ifdef DEBUG 2618 if (pmapdebug & PDB_FOLLOW) 2619 printf("pmap_zero_page(%lx)\n", phys); 2620 #endif 2621 2622 p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys); 2623 p1 = NULL; 2624 pend = (u_long *)((u_long)p0 + PAGE_SIZE); 2625 2626 /* 2627 * Unroll the loop a bit, doing 16 quadwords per iteration. 2628 * Do only 8 back-to-back stores, and alternate registers. 2629 */ 2630 do { 2631 __asm volatile( 2632 "# BEGIN loop body\n" 2633 " addq %2, (8 * 8), %1 \n" 2634 " stq $31, (0 * 8)(%0) \n" 2635 " stq $31, (1 * 8)(%0) \n" 2636 " stq $31, (2 * 8)(%0) \n" 2637 " stq $31, (3 * 8)(%0) \n" 2638 " stq $31, (4 * 8)(%0) \n" 2639 " stq $31, (5 * 8)(%0) \n" 2640 " stq $31, (6 * 8)(%0) \n" 2641 " stq $31, (7 * 8)(%0) \n" 2642 " \n" 2643 " addq %3, (8 * 8), %0 \n" 2644 " stq $31, (0 * 8)(%1) \n" 2645 " stq $31, (1 * 8)(%1) \n" 2646 " stq $31, (2 * 8)(%1) \n" 2647 " stq $31, (3 * 8)(%1) \n" 2648 " stq $31, (4 * 8)(%1) \n" 2649 " stq $31, (5 * 8)(%1) \n" 2650 " stq $31, (6 * 8)(%1) \n" 2651 " stq $31, (7 * 8)(%1) \n" 2652 " # END loop body" 2653 : "=r" (p0), "=r" (p1) 2654 : "0" (p0), "1" (p1) 2655 : "memory"); 2656 } while (p0 < pend); 2657 } 2658 2659 /* 2660 * pmap_copy_page: [ INTERFACE ] 2661 * 2662 * Copy the specified (machine independent) page by mapping the page 2663 * into virtual memory and using memcpy to copy the page, one machine 2664 * dependent page at a time. 2665 * 2666 * Note: no locking is necessary in this function. 2667 */ 2668 void 2669 pmap_copy_page(paddr_t src, paddr_t dst) 2670 { 2671 const void *s; 2672 void *d; 2673 2674 #ifdef DEBUG 2675 if (pmapdebug & PDB_FOLLOW) 2676 printf("pmap_copy_page(%lx, %lx)\n", src, dst); 2677 #endif 2678 s = (const void *)ALPHA_PHYS_TO_K0SEG(src); 2679 d = (void *)ALPHA_PHYS_TO_K0SEG(dst); 2680 memcpy(d, s, PAGE_SIZE); 2681 } 2682 2683 /* 2684 * pmap_pageidlezero: [ INTERFACE ] 2685 * 2686 * Page zero'er for the idle loop. Returns true if the 2687 * page was zero'd, FALSE if we aborted for some reason. 2688 */ 2689 bool 2690 pmap_pageidlezero(paddr_t pa) 2691 { 2692 u_long *ptr; 2693 int i, cnt = PAGE_SIZE / sizeof(u_long); 2694 2695 for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) { 2696 if (sched_curcpu_runnable_p()) { 2697 /* 2698 * An LWP has become ready. Abort now, 2699 * so we don't keep it waiting while we 2700 * finish zeroing the page. 2701 */ 2702 return (false); 2703 } 2704 *ptr++ = 0; 2705 } 2706 2707 return (true); 2708 } 2709 2710 /* 2711 * pmap_clear_modify: [ INTERFACE ] 2712 * 2713 * Clear the modify bits on the specified physical page. 2714 */ 2715 bool 2716 pmap_clear_modify(struct vm_page *pg) 2717 { 2718 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2719 bool rv = false; 2720 kmutex_t *lock; 2721 struct pmap_tlb_context tlbctx; 2722 2723 #ifdef DEBUG 2724 if (pmapdebug & PDB_FOLLOW) 2725 printf("pmap_clear_modify(%p)\n", pg); 2726 #endif 2727 2728 pmap_tlb_context_init(&tlbctx); 2729 2730 PMAP_HEAD_TO_MAP_LOCK(); 2731 lock = pmap_pvh_lock(pg); 2732 mutex_enter(lock); 2733 2734 if (md->pvh_attrs & PGA_MODIFIED) { 2735 rv = true; 2736 pmap_changebit(pg, PG_FOW, ~0UL, &tlbctx); 2737 md->pvh_attrs &= ~PGA_MODIFIED; 2738 } 2739 2740 mutex_exit(lock); 2741 PMAP_HEAD_TO_MAP_UNLOCK(); 2742 2743 pmap_tlb_shootnow(&tlbctx); 2744 TLB_COUNT(reason_clear_modify); 2745 2746 return (rv); 2747 } 2748 2749 /* 2750 * pmap_clear_reference: [ INTERFACE ] 2751 * 2752 * Clear the reference bit on the specified physical page. 2753 */ 2754 bool 2755 pmap_clear_reference(struct vm_page *pg) 2756 { 2757 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2758 bool rv = false; 2759 kmutex_t *lock; 2760 struct pmap_tlb_context tlbctx; 2761 2762 #ifdef DEBUG 2763 if (pmapdebug & PDB_FOLLOW) 2764 printf("pmap_clear_reference(%p)\n", pg); 2765 #endif 2766 2767 pmap_tlb_context_init(&tlbctx); 2768 2769 PMAP_HEAD_TO_MAP_LOCK(); 2770 lock = pmap_pvh_lock(pg); 2771 mutex_enter(lock); 2772 2773 if (md->pvh_attrs & PGA_REFERENCED) { 2774 rv = true; 2775 pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0UL, &tlbctx); 2776 md->pvh_attrs &= ~PGA_REFERENCED; 2777 } 2778 2779 mutex_exit(lock); 2780 PMAP_HEAD_TO_MAP_UNLOCK(); 2781 2782 pmap_tlb_shootnow(&tlbctx); 2783 TLB_COUNT(reason_clear_reference); 2784 2785 return (rv); 2786 } 2787 2788 /* 2789 * pmap_is_referenced: [ INTERFACE ] 2790 * 2791 * Return whether or not the specified physical page is referenced 2792 * by any physical maps. 2793 */ 2794 /* See <machine/pmap.h> */ 2795 2796 /* 2797 * pmap_is_modified: [ INTERFACE ] 2798 * 2799 * Return whether or not the specified physical page is modified 2800 * by any physical maps. 2801 */ 2802 /* See <machine/pmap.h> */ 2803 2804 /* 2805 * pmap_phys_address: [ INTERFACE ] 2806 * 2807 * Return the physical address corresponding to the specified 2808 * cookie. Used by the device pager to decode a device driver's 2809 * mmap entry point return value. 2810 * 2811 * Note: no locking is necessary in this function. 2812 */ 2813 paddr_t 2814 pmap_phys_address(paddr_t ppn) 2815 { 2816 2817 return (alpha_ptob(ppn)); 2818 } 2819 2820 /* 2821 * Miscellaneous support routines follow 2822 */ 2823 2824 /* 2825 * alpha_protection_init: 2826 * 2827 * Initialize Alpha protection code array. 2828 * 2829 * Note: no locking is necessary in this function. 2830 */ 2831 static void 2832 alpha_protection_init(void) 2833 { 2834 int prot, *kp, *up; 2835 2836 kp = protection_codes[0]; 2837 up = protection_codes[1]; 2838 2839 for (prot = 0; prot < 8; prot++) { 2840 kp[prot] = PG_ASM; 2841 up[prot] = 0; 2842 2843 if (prot & VM_PROT_READ) { 2844 kp[prot] |= PG_KRE; 2845 up[prot] |= PG_KRE | PG_URE; 2846 } 2847 if (prot & VM_PROT_WRITE) { 2848 kp[prot] |= PG_KWE; 2849 up[prot] |= PG_KWE | PG_UWE; 2850 } 2851 if (prot & VM_PROT_EXECUTE) { 2852 kp[prot] |= PG_EXEC | PG_KRE; 2853 up[prot] |= PG_EXEC | PG_KRE | PG_URE; 2854 } else { 2855 kp[prot] |= PG_FOE; 2856 up[prot] |= PG_FOE; 2857 } 2858 } 2859 } 2860 2861 /* 2862 * pmap_remove_mapping: 2863 * 2864 * Invalidate a single page denoted by pmap/va. 2865 * 2866 * If (pte != NULL), it is the already computed PTE for the page. 2867 * 2868 * Note: locking in this function is complicated by the fact 2869 * that we can be called when the PV list is already locked. 2870 * (pmap_page_protect()). In this case, the caller must be 2871 * careful to get the next PV entry while we remove this entry 2872 * from beneath it. We assume that the pmap itself is already 2873 * locked; dolock applies only to the PV list. 2874 * 2875 * Returns important PTE bits that the caller needs to check for 2876 * TLB / I-stream invalidation purposes. 2877 */ 2878 static pt_entry_t 2879 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte, 2880 bool dolock, pv_entry_t *opvp, struct pmap_tlb_context * const tlbctx) 2881 { 2882 pt_entry_t opte; 2883 paddr_t pa; 2884 struct vm_page *pg; /* if != NULL, page is managed */ 2885 2886 #ifdef DEBUG 2887 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 2888 printf("pmap_remove_mapping(%p, %lx, %p, %d, %p)\n", 2889 pmap, va, pte, dolock, opvp); 2890 #endif 2891 2892 /* 2893 * PTE not provided, compute it from pmap and va. 2894 */ 2895 if (pte == NULL) { 2896 pte = pmap_l3pte(pmap, va, NULL); 2897 if (pmap_pte_v(pte) == 0) 2898 return 0; 2899 } 2900 2901 opte = *pte; 2902 2903 pa = PG_PFNUM(opte) << PGSHIFT; 2904 2905 /* 2906 * Update statistics 2907 */ 2908 if (pmap_pte_w(pte)) 2909 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2910 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 2911 2912 /* 2913 * Invalidate the PTE after saving the reference modify info. 2914 */ 2915 #ifdef DEBUG 2916 if (pmapdebug & PDB_REMOVE) 2917 printf("remove: invalidating pte at %p\n", pte); 2918 #endif 2919 atomic_store_relaxed(pte, PG_NV); 2920 2921 /* 2922 * If we're removing a user mapping, check to see if we 2923 * can free page table pages. 2924 */ 2925 if (pmap != pmap_kernel()) { 2926 /* 2927 * Delete the reference on the level 3 table. It will 2928 * delete references on the level 2 and 1 tables as 2929 * appropriate. 2930 */ 2931 pmap_l3pt_delref(pmap, va, pte, tlbctx); 2932 } 2933 2934 if (opte & PG_PVLIST) { 2935 /* 2936 * Remove it from the PV table. 2937 */ 2938 pg = PHYS_TO_VM_PAGE(pa); 2939 KASSERT(pg != NULL); 2940 pmap_pv_remove(pmap, pg, va, dolock, opvp); 2941 KASSERT(opvp == NULL || *opvp != NULL); 2942 } 2943 2944 return opte & (PG_V | PG_ASM | PG_EXEC); 2945 } 2946 2947 /* 2948 * pmap_changebit: 2949 * 2950 * Set or clear the specified PTE bits for all mappings on the 2951 * specified page. 2952 * 2953 * Note: we assume that the pv_head is already locked, and that 2954 * the caller has acquired a PV->pmap mutex so that we can lock 2955 * the pmaps as we encounter them. 2956 */ 2957 static void 2958 pmap_changebit(struct vm_page *pg, pt_entry_t set, pt_entry_t mask, 2959 struct pmap_tlb_context * const tlbctx) 2960 { 2961 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2962 pv_entry_t pv; 2963 pt_entry_t *pte, npte, opte; 2964 2965 #ifdef DEBUG 2966 if (pmapdebug & PDB_BITS) 2967 printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n", 2968 pg, set, mask); 2969 #endif 2970 2971 /* 2972 * Loop over all current mappings setting/clearing as apropos. 2973 */ 2974 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 2975 PMAP_LOCK(pv->pv_pmap); 2976 2977 pte = pv->pv_pte; 2978 2979 opte = atomic_load_relaxed(pte); 2980 npte = (opte | set) & mask; 2981 if (npte != opte) { 2982 atomic_store_relaxed(pte, npte); 2983 pmap_tlb_shootdown_pv(pv, opte, tlbctx); 2984 } 2985 PMAP_UNLOCK(pv->pv_pmap); 2986 } 2987 } 2988 2989 /* 2990 * pmap_emulate_reference: 2991 * 2992 * Emulate reference and/or modified bit hits. 2993 * Return 1 if this was an execute fault on a non-exec mapping, 2994 * otherwise return 0. 2995 */ 2996 int 2997 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type) 2998 { 2999 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 3000 pt_entry_t faultoff, *pte; 3001 struct vm_page *pg; 3002 paddr_t pa; 3003 bool didlock = false; 3004 bool exec = false; 3005 kmutex_t *lock; 3006 3007 #ifdef DEBUG 3008 if (pmapdebug & PDB_FOLLOW) 3009 printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n", 3010 l, v, user, type); 3011 #endif 3012 3013 /* 3014 * Convert process and virtual address to physical address. 3015 */ 3016 if (v >= VM_MIN_KERNEL_ADDRESS) { 3017 if (user) 3018 panic("pmap_emulate_reference: user ref to kernel"); 3019 /* 3020 * No need to lock here; kernel PT pages never go away. 3021 */ 3022 pte = PMAP_KERNEL_PTE(v); 3023 } else { 3024 #ifdef DIAGNOSTIC 3025 if (l == NULL) 3026 panic("pmap_emulate_reference: bad proc"); 3027 if (l->l_proc->p_vmspace == NULL) 3028 panic("pmap_emulate_reference: bad p_vmspace"); 3029 #endif 3030 PMAP_LOCK(pmap); 3031 didlock = true; 3032 pte = pmap_l3pte(pmap, v, NULL); 3033 /* 3034 * We'll unlock below where we're done with the PTE. 3035 */ 3036 } 3037 exec = pmap_pte_exec(pte); 3038 if (!exec && type == ALPHA_MMCSR_FOE) { 3039 if (didlock) 3040 PMAP_UNLOCK(pmap); 3041 return (1); 3042 } 3043 #ifdef DEBUG 3044 if (pmapdebug & PDB_FOLLOW) { 3045 printf("\tpte = %p, ", pte); 3046 printf("*pte = 0x%lx\n", *pte); 3047 } 3048 #endif 3049 3050 pa = pmap_pte_pa(pte); 3051 3052 /* 3053 * We're now done with the PTE. If it was a user pmap, unlock 3054 * it now. 3055 */ 3056 if (didlock) 3057 PMAP_UNLOCK(pmap); 3058 3059 #ifdef DEBUG 3060 if (pmapdebug & PDB_FOLLOW) 3061 printf("\tpa = 0x%lx\n", pa); 3062 #endif 3063 #ifdef DIAGNOSTIC 3064 if (!uvm_pageismanaged(pa)) 3065 panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): " 3066 "pa 0x%lx not managed", l, v, user, type, pa); 3067 #endif 3068 3069 /* 3070 * Twiddle the appropriate bits to reflect the reference 3071 * and/or modification.. 3072 * 3073 * The rules: 3074 * (1) always mark page as used, and 3075 * (2) if it was a write fault, mark page as modified. 3076 */ 3077 pg = PHYS_TO_VM_PAGE(pa); 3078 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 3079 struct pmap_tlb_context tlbctx; 3080 3081 pmap_tlb_context_init(&tlbctx); 3082 3083 PMAP_HEAD_TO_MAP_LOCK(); 3084 lock = pmap_pvh_lock(pg); 3085 mutex_enter(lock); 3086 3087 if (type == ALPHA_MMCSR_FOW) { 3088 md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 3089 faultoff = PG_FOR | PG_FOW; 3090 } else { 3091 md->pvh_attrs |= PGA_REFERENCED; 3092 faultoff = PG_FOR; 3093 if (exec) { 3094 faultoff |= PG_FOE; 3095 } 3096 } 3097 pmap_changebit(pg, 0, ~faultoff, &tlbctx); 3098 3099 mutex_exit(lock); 3100 PMAP_HEAD_TO_MAP_UNLOCK(); 3101 3102 pmap_tlb_shootnow(&tlbctx); 3103 TLB_COUNT(reason_emulate_reference); 3104 3105 return (0); 3106 } 3107 3108 #ifdef DEBUG 3109 /* 3110 * pmap_pv_dump: 3111 * 3112 * Dump the physical->virtual data for the specified page. 3113 */ 3114 void 3115 pmap_pv_dump(paddr_t pa) 3116 { 3117 struct vm_page *pg; 3118 struct vm_page_md *md; 3119 pv_entry_t pv; 3120 kmutex_t *lock; 3121 3122 pg = PHYS_TO_VM_PAGE(pa); 3123 md = VM_PAGE_TO_MD(pg); 3124 3125 lock = pmap_pvh_lock(pg); 3126 mutex_enter(lock); 3127 3128 printf("pa 0x%lx (attrs = 0x%x):\n", pa, md->pvh_attrs); 3129 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) 3130 printf(" pmap %p, va 0x%lx\n", 3131 pv->pv_pmap, pv->pv_va); 3132 printf("\n"); 3133 3134 mutex_exit(lock); 3135 } 3136 #endif 3137 3138 /* 3139 * vtophys: 3140 * 3141 * Return the physical address corresponding to the K0SEG or 3142 * K1SEG address provided. 3143 * 3144 * Note: no locking is necessary in this function. 3145 */ 3146 static bool 3147 vtophys_internal(vaddr_t const vaddr, paddr_t * const pap) 3148 { 3149 paddr_t pa; 3150 3151 KASSERT(vaddr >= ALPHA_K0SEG_BASE); 3152 3153 if (vaddr <= ALPHA_K0SEG_END) { 3154 pa = ALPHA_K0SEG_TO_PHYS(vaddr); 3155 } else { 3156 pt_entry_t * const pte = PMAP_KERNEL_PTE(vaddr); 3157 if (__predict_false(! pmap_pte_v(pte))) { 3158 return false; 3159 } 3160 pa = pmap_pte_pa(pte) | (vaddr & PGOFSET); 3161 } 3162 3163 if (pap != NULL) { 3164 *pap = pa; 3165 } 3166 3167 return true; 3168 } 3169 3170 paddr_t 3171 vtophys(vaddr_t const vaddr) 3172 { 3173 paddr_t pa; 3174 3175 if (__predict_false(! vtophys_internal(vaddr, &pa))) 3176 pa = 0; 3177 return pa; 3178 } 3179 3180 /******************** pv_entry management ********************/ 3181 3182 /* 3183 * pmap_pv_enter: 3184 * 3185 * Add a physical->virtual entry to the pv_table. 3186 */ 3187 static int 3188 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte, 3189 bool dolock, pv_entry_t newpv) 3190 { 3191 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 3192 kmutex_t *lock; 3193 3194 /* 3195 * Allocate and fill in the new pv_entry. 3196 */ 3197 if (newpv == NULL) { 3198 newpv = pmap_pv_alloc(); 3199 if (newpv == NULL) 3200 return ENOMEM; 3201 } 3202 newpv->pv_va = va; 3203 newpv->pv_pmap = pmap; 3204 newpv->pv_pte = pte; 3205 3206 if (dolock) { 3207 lock = pmap_pvh_lock(pg); 3208 mutex_enter(lock); 3209 } 3210 3211 #ifdef DEBUG 3212 { 3213 pv_entry_t pv; 3214 /* 3215 * Make sure the entry doesn't already exist. 3216 */ 3217 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 3218 if (pmap == pv->pv_pmap && va == pv->pv_va) { 3219 printf("pmap = %p, va = 0x%lx\n", pmap, va); 3220 panic("pmap_pv_enter: already in pv table"); 3221 } 3222 } 3223 } 3224 #endif 3225 3226 /* 3227 * ...and put it in the list. 3228 */ 3229 newpv->pv_next = md->pvh_list; 3230 md->pvh_list = newpv; 3231 3232 if (dolock) { 3233 mutex_exit(lock); 3234 } 3235 3236 return 0; 3237 } 3238 3239 /* 3240 * pmap_pv_remove: 3241 * 3242 * Remove a physical->virtual entry from the pv_table. 3243 */ 3244 static void 3245 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock, 3246 pv_entry_t *opvp) 3247 { 3248 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 3249 pv_entry_t pv, *pvp; 3250 kmutex_t *lock; 3251 3252 if (dolock) { 3253 lock = pmap_pvh_lock(pg); 3254 mutex_enter(lock); 3255 } else { 3256 lock = NULL; /* XXX stupid gcc */ 3257 } 3258 3259 /* 3260 * Find the entry to remove. 3261 */ 3262 for (pvp = &md->pvh_list, pv = *pvp; 3263 pv != NULL; pvp = &pv->pv_next, pv = *pvp) 3264 if (pmap == pv->pv_pmap && va == pv->pv_va) 3265 break; 3266 3267 #ifdef DEBUG 3268 if (pv == NULL) 3269 panic("pmap_pv_remove: not in pv table"); 3270 #endif 3271 3272 *pvp = pv->pv_next; 3273 3274 if (dolock) { 3275 mutex_exit(lock); 3276 } 3277 3278 if (opvp != NULL) 3279 *opvp = pv; 3280 else 3281 pmap_pv_free(pv); 3282 } 3283 3284 /* 3285 * pmap_pv_page_alloc: 3286 * 3287 * Allocate a page for the pv_entry pool. 3288 */ 3289 static void * 3290 pmap_pv_page_alloc(struct pool *pp, int flags) 3291 { 3292 paddr_t pg; 3293 3294 if (pmap_physpage_alloc(PGU_PVENT, &pg)) 3295 return ((void *)ALPHA_PHYS_TO_K0SEG(pg)); 3296 return (NULL); 3297 } 3298 3299 /* 3300 * pmap_pv_page_free: 3301 * 3302 * Free a pv_entry pool page. 3303 */ 3304 static void 3305 pmap_pv_page_free(struct pool *pp, void *v) 3306 { 3307 3308 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v)); 3309 } 3310 3311 /******************** misc. functions ********************/ 3312 3313 /* 3314 * pmap_physpage_alloc: 3315 * 3316 * Allocate a single page from the VM system and return the 3317 * physical address for that page. 3318 */ 3319 static bool 3320 pmap_physpage_alloc(int usage, paddr_t *pap) 3321 { 3322 struct vm_page *pg; 3323 paddr_t pa; 3324 3325 /* 3326 * Don't ask for a zero'd page in the L1PT case -- we will 3327 * properly initialize it in the constructor. 3328 */ 3329 3330 pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ? 3331 UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO); 3332 if (pg != NULL) { 3333 pa = VM_PAGE_TO_PHYS(pg); 3334 #ifdef DEBUG 3335 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 3336 if (md->pvh_refcnt != 0) { 3337 printf("pmap_physpage_alloc: page 0x%lx has " 3338 "%d references\n", pa, md->pvh_refcnt); 3339 panic("pmap_physpage_alloc"); 3340 } 3341 #endif 3342 *pap = pa; 3343 return (true); 3344 } 3345 return (false); 3346 } 3347 3348 /* 3349 * pmap_physpage_free: 3350 * 3351 * Free the single page table page at the specified physical address. 3352 */ 3353 static void 3354 pmap_physpage_free(paddr_t pa) 3355 { 3356 struct vm_page *pg; 3357 3358 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) 3359 panic("pmap_physpage_free: bogus physical page address"); 3360 3361 #ifdef DEBUG 3362 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 3363 if (md->pvh_refcnt != 0) 3364 panic("pmap_physpage_free: page still has references"); 3365 #endif 3366 3367 uvm_pagefree(pg); 3368 } 3369 3370 /* 3371 * pmap_physpage_addref: 3372 * 3373 * Add a reference to the specified special use page. 3374 */ 3375 static int 3376 pmap_physpage_addref(void *kva) 3377 { 3378 struct vm_page *pg; 3379 struct vm_page_md *md; 3380 paddr_t pa; 3381 3382 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 3383 pg = PHYS_TO_VM_PAGE(pa); 3384 md = VM_PAGE_TO_MD(pg); 3385 3386 KASSERT((int)md->pvh_refcnt >= 0); 3387 3388 return atomic_inc_uint_nv(&md->pvh_refcnt); 3389 } 3390 3391 /* 3392 * pmap_physpage_delref: 3393 * 3394 * Delete a reference to the specified special use page. 3395 */ 3396 static int 3397 pmap_physpage_delref(void *kva) 3398 { 3399 struct vm_page *pg; 3400 struct vm_page_md *md; 3401 paddr_t pa; 3402 3403 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 3404 pg = PHYS_TO_VM_PAGE(pa); 3405 md = VM_PAGE_TO_MD(pg); 3406 3407 KASSERT((int)md->pvh_refcnt > 0); 3408 3409 return atomic_dec_uint_nv(&md->pvh_refcnt); 3410 } 3411 3412 /******************** page table page management ********************/ 3413 3414 static bool 3415 pmap_kptpage_alloc(paddr_t *pap) 3416 { 3417 if (uvm.page_init_done == false) { 3418 /* 3419 * We're growing the kernel pmap early (from 3420 * uvm_pageboot_alloc()). This case must 3421 * be handled a little differently. 3422 */ 3423 *pap = ALPHA_K0SEG_TO_PHYS( 3424 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3425 return true; 3426 } 3427 3428 return pmap_physpage_alloc(PGU_NORMAL, pap); 3429 } 3430 3431 /* 3432 * pmap_growkernel: [ INTERFACE ] 3433 * 3434 * Grow the kernel address space. This is a hint from the 3435 * upper layer to pre-allocate more kernel PT pages. 3436 */ 3437 vaddr_t 3438 pmap_growkernel(vaddr_t maxkvaddr) 3439 { 3440 struct pmap *kpm = pmap_kernel(), *pm; 3441 paddr_t ptaddr; 3442 pt_entry_t *l1pte, *l2pte, pte; 3443 vaddr_t va; 3444 int l1idx; 3445 3446 rw_enter(&pmap_growkernel_lock, RW_WRITER); 3447 3448 if (maxkvaddr <= virtual_end) 3449 goto out; /* we are OK */ 3450 3451 va = virtual_end; 3452 3453 while (va < maxkvaddr) { 3454 /* 3455 * If there is no valid L1 PTE (i.e. no L2 PT page), 3456 * allocate a new L2 PT page and insert it into the 3457 * L1 map. 3458 */ 3459 l1pte = pmap_l1pte(kpm, va); 3460 if (pmap_pte_v(l1pte) == 0) { 3461 if (!pmap_kptpage_alloc(&ptaddr)) 3462 goto die; 3463 pte = (atop(ptaddr) << PG_SHIFT) | 3464 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3465 *l1pte = pte; 3466 3467 l1idx = l1pte_index(va); 3468 3469 /* Update all the user pmaps. */ 3470 mutex_enter(&pmap_all_pmaps_lock); 3471 for (pm = TAILQ_FIRST(&pmap_all_pmaps); 3472 pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) { 3473 /* Skip the kernel pmap. */ 3474 if (pm == pmap_kernel()) 3475 continue; 3476 3477 /* 3478 * Any pmaps published on the global list 3479 * should never be referencing kernel_lev1map. 3480 */ 3481 KASSERT(pm->pm_lev1map != kernel_lev1map); 3482 3483 PMAP_LOCK(pm); 3484 pm->pm_lev1map[l1idx] = pte; 3485 PMAP_UNLOCK(pm); 3486 } 3487 mutex_exit(&pmap_all_pmaps_lock); 3488 } 3489 3490 /* 3491 * Have an L2 PT page now, add the L3 PT page. 3492 */ 3493 l2pte = pmap_l2pte(kpm, va, l1pte); 3494 KASSERT(pmap_pte_v(l2pte) == 0); 3495 if (!pmap_kptpage_alloc(&ptaddr)) 3496 goto die; 3497 *l2pte = (atop(ptaddr) << PG_SHIFT) | 3498 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3499 va += ALPHA_L2SEG_SIZE; 3500 } 3501 3502 /* Invalidate the L1 PT cache. */ 3503 pool_cache_invalidate(&pmap_l1pt_cache); 3504 3505 virtual_end = va; 3506 3507 out: 3508 rw_exit(&pmap_growkernel_lock); 3509 3510 return (virtual_end); 3511 3512 die: 3513 panic("pmap_growkernel: out of memory"); 3514 } 3515 3516 /* 3517 * pmap_l1pt_ctor: 3518 * 3519 * Pool cache constructor for L1 PT pages. 3520 * 3521 * Note: The growkernel lock is held across allocations 3522 * from our pool_cache, so we don't need to acquire it 3523 * ourselves. 3524 */ 3525 static int 3526 pmap_l1pt_ctor(void *arg, void *object, int flags) 3527 { 3528 pt_entry_t *l1pt = object, pte; 3529 int i; 3530 3531 /* 3532 * Initialize the new level 1 table by zeroing the 3533 * user portion and copying the kernel mappings into 3534 * the kernel portion. 3535 */ 3536 for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++) 3537 l1pt[i] = 0; 3538 3539 for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS); 3540 i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++) 3541 l1pt[i] = kernel_lev1map[i]; 3542 3543 /* 3544 * Now, map the new virtual page table. NOTE: NO ASM! 3545 */ 3546 pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) | 3547 PG_V | PG_KRE | PG_KWE; 3548 l1pt[l1pte_index(VPTBASE)] = pte; 3549 3550 return (0); 3551 } 3552 3553 /* 3554 * pmap_l1pt_alloc: 3555 * 3556 * Page alloctaor for L1 PT pages. 3557 */ 3558 static void * 3559 pmap_l1pt_alloc(struct pool *pp, int flags) 3560 { 3561 paddr_t ptpa; 3562 3563 /* 3564 * Attempt to allocate a free page. 3565 */ 3566 if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == false) 3567 return (NULL); 3568 3569 return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa)); 3570 } 3571 3572 /* 3573 * pmap_l1pt_free: 3574 * 3575 * Page freer for L1 PT pages. 3576 */ 3577 static void 3578 pmap_l1pt_free(struct pool *pp, void *v) 3579 { 3580 3581 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v)); 3582 } 3583 3584 /* 3585 * pmap_ptpage_alloc: 3586 * 3587 * Allocate a level 2 or level 3 page table page for a user 3588 * pmap, and initialize the PTE that references it. 3589 * 3590 * Note: the pmap must already be locked. 3591 */ 3592 static int 3593 pmap_ptpage_alloc(pt_entry_t * const pte, int const usage) 3594 { 3595 paddr_t ptpa; 3596 3597 /* 3598 * Allocate the page table page. 3599 */ 3600 if (pmap_physpage_alloc(usage, &ptpa) == false) 3601 return (ENOMEM); 3602 3603 /* 3604 * Initialize the referencing PTE. 3605 */ 3606 const pt_entry_t npte = ((ptpa >> PGSHIFT) << PG_SHIFT) | 3607 PG_V | PG_KRE | PG_KWE | PG_WIRED; 3608 3609 atomic_store_relaxed(pte, npte); 3610 3611 return (0); 3612 } 3613 3614 /* 3615 * pmap_ptpage_free: 3616 * 3617 * Free the level 2 or level 3 page table page referenced 3618 * be the provided PTE. 3619 * 3620 * Note: the pmap must already be locked. 3621 */ 3622 static void 3623 pmap_ptpage_free(pt_entry_t * const pte, struct pmap_tlb_context * const tlbctx) 3624 { 3625 3626 /* 3627 * Extract the physical address of the page from the PTE 3628 * and clear the entry. 3629 */ 3630 const paddr_t ptpa = pmap_pte_pa(pte); 3631 atomic_store_relaxed(pte, PG_NV); 3632 3633 #ifdef DEBUG 3634 pmap_zero_page(ptpa); 3635 #endif 3636 pmap_tlb_physpage_free(ptpa, tlbctx); 3637 } 3638 3639 /* 3640 * pmap_l3pt_delref: 3641 * 3642 * Delete a reference on a level 3 PT page. If the reference drops 3643 * to zero, free it. 3644 * 3645 * Note: the pmap must already be locked. 3646 */ 3647 static void 3648 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, 3649 struct pmap_tlb_context * const tlbctx) 3650 { 3651 pt_entry_t *l1pte, *l2pte; 3652 3653 l1pte = pmap_l1pte(pmap, va); 3654 l2pte = pmap_l2pte(pmap, va, l1pte); 3655 3656 #ifdef DIAGNOSTIC 3657 if (pmap == pmap_kernel()) 3658 panic("pmap_l3pt_delref: kernel pmap"); 3659 #endif 3660 3661 if (pmap_physpage_delref(l3pte) == 0) { 3662 /* 3663 * No more mappings; we can free the level 3 table. 3664 */ 3665 #ifdef DEBUG 3666 if (pmapdebug & PDB_PTPAGE) 3667 printf("pmap_l3pt_delref: freeing level 3 table at " 3668 "0x%lx\n", pmap_pte_pa(l2pte)); 3669 #endif 3670 /* 3671 * You can pass NULL if you know the last refrence won't 3672 * be dropped. 3673 */ 3674 KASSERT(tlbctx != NULL); 3675 pmap_ptpage_free(l2pte, tlbctx); 3676 3677 /* 3678 * We've freed a level 3 table, so we must invalidate 3679 * any now-stale TLB entries for the corresponding VPT 3680 * VA range. Easiest way to guarantee this is to hit 3681 * all of the user TLB entries. 3682 */ 3683 pmap_tlb_shootdown_all_user(pmap, PG_V, tlbctx); 3684 3685 /* 3686 * We've freed a level 3 table, so delete the reference 3687 * on the level 2 table. 3688 */ 3689 pmap_l2pt_delref(pmap, l1pte, l2pte, tlbctx); 3690 } 3691 } 3692 3693 /* 3694 * pmap_l2pt_delref: 3695 * 3696 * Delete a reference on a level 2 PT page. If the reference drops 3697 * to zero, free it. 3698 * 3699 * Note: the pmap must already be locked. 3700 */ 3701 static void 3702 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte, 3703 struct pmap_tlb_context * const tlbctx) 3704 { 3705 3706 #ifdef DIAGNOSTIC 3707 if (pmap == pmap_kernel()) 3708 panic("pmap_l2pt_delref: kernel pmap"); 3709 #endif 3710 3711 if (pmap_physpage_delref(l2pte) == 0) { 3712 /* 3713 * No more mappings in this segment; we can free the 3714 * level 2 table. 3715 */ 3716 #ifdef DEBUG 3717 if (pmapdebug & PDB_PTPAGE) 3718 printf("pmap_l2pt_delref: freeing level 2 table at " 3719 "0x%lx\n", pmap_pte_pa(l1pte)); 3720 #endif 3721 /* 3722 * You can pass NULL if you know the last refrence won't 3723 * be dropped. 3724 */ 3725 KASSERT(tlbctx != NULL); 3726 pmap_ptpage_free(l1pte, tlbctx); 3727 3728 /* 3729 * We've freed a level 2 table, so we must invalidate 3730 * any now-stale TLB entries for the corresponding VPT 3731 * VA range. Easiest way to guarantee this is to hit 3732 * all of the user TLB entries. 3733 */ 3734 pmap_tlb_shootdown_all_user(pmap, PG_V, tlbctx); 3735 3736 /* 3737 * We've freed a level 2 table, so delete the reference 3738 * on the level 1 table. 3739 */ 3740 pmap_l1pt_delref(pmap, l1pte); 3741 } 3742 } 3743 3744 /* 3745 * pmap_l1pt_delref: 3746 * 3747 * Delete a reference on a level 1 PT page. 3748 */ 3749 static void 3750 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte) 3751 { 3752 3753 KASSERT(pmap != pmap_kernel()); 3754 3755 (void)pmap_physpage_delref(l1pte); 3756 } 3757 3758 /******************** Address Space Number management ********************/ 3759 3760 /* 3761 * pmap_asn_alloc: 3762 * 3763 * Allocate and assign an ASN to the specified pmap. 3764 * 3765 * Note: the pmap must already be locked. This may be called from 3766 * an interprocessor interrupt, and in that case, the sender of 3767 * the IPI has the pmap lock. 3768 */ 3769 static u_int 3770 pmap_asn_alloc(pmap_t const pmap, struct cpu_info * const ci) 3771 { 3772 3773 #ifdef DEBUG 3774 if (pmapdebug & (PDB_FOLLOW|PDB_ASN)) 3775 printf("pmap_asn_alloc(%p)\n", pmap); 3776 #endif 3777 3778 KASSERT(pmap != pmap_kernel()); 3779 KASSERT(pmap->pm_lev1map != kernel_lev1map); 3780 KASSERT(kpreempt_disabled()); 3781 3782 /* No work to do if the the CPU does not implement ASNs. */ 3783 if (pmap_max_asn == 0) 3784 return 0; 3785 3786 struct pmap_asn_info * const pma = &pmap->pm_asni[ci->ci_cpuid]; 3787 3788 /* 3789 * Hopefully, we can continue using the one we have... 3790 * 3791 * N.B. the generation check will fail the first time 3792 * any pmap is activated on a given CPU, because we start 3793 * the generation counter at 1, but initialize pmaps with 3794 * 0; this forces the first ASN allocation to occur. 3795 */ 3796 if (pma->pma_asngen == ci->ci_asn_gen) { 3797 #ifdef DEBUG 3798 if (pmapdebug & PDB_ASN) 3799 printf("pmap_asn_alloc: same generation, keeping %u\n", 3800 pma->pma_asn); 3801 #endif 3802 TLB_COUNT(asn_reuse); 3803 return pma->pma_asn; 3804 } 3805 3806 /* 3807 * Need to assign a new ASN. Grab the next one, incrementing 3808 * the generation number if we have to. 3809 */ 3810 if (ci->ci_next_asn > pmap_max_asn) { 3811 /* 3812 * Invalidate all non-PG_ASM TLB entries and the 3813 * I-cache, and bump the generation number. 3814 */ 3815 ALPHA_TBIAP(); 3816 alpha_pal_imb(); 3817 3818 ci->ci_next_asn = PMAP_ASN_FIRST_USER; 3819 ci->ci_asn_gen++; 3820 TLB_COUNT(asn_newgen); 3821 3822 /* 3823 * Make sure the generation number doesn't wrap. We could 3824 * handle this scenario by traversing all of the pmaps, 3825 * and invalidating the generation number on those which 3826 * are not currently in use by this processor. 3827 * 3828 * However... considering that we're using an unsigned 64-bit 3829 * integer for generation numbers, on non-ASN CPUs, we won't 3830 * wrap for approximately 75 billion years on a 128-ASN CPU 3831 * (assuming 1000 switch * operations per second). 3832 * 3833 * So, we don't bother. 3834 */ 3835 KASSERT(ci->ci_asn_gen != PMAP_ASNGEN_INVALID); 3836 #ifdef DEBUG 3837 if (pmapdebug & PDB_ASN) 3838 printf("pmap_asn_alloc: generation bumped to %lu\n", 3839 ci->ci_asn_ge); 3840 #endif 3841 } 3842 3843 /* 3844 * Assign the new ASN and validate the generation number. 3845 */ 3846 pma->pma_asn = ci->ci_next_asn++; 3847 pma->pma_asngen = ci->ci_asn_gen; 3848 TLB_COUNT(asn_assign); 3849 3850 /* 3851 * We have a new ASN, so we can skip any pending I-stream sync 3852 * on the way back out to user space. 3853 */ 3854 atomic_and_ulong(&pmap->pm_needisync, ~(1UL << ci->ci_cpuid)); 3855 3856 #ifdef DEBUG 3857 if (pmapdebug & PDB_ASN) 3858 printf("pmap_asn_alloc: assigning %u to pmap %p\n", 3859 pma->pma_asn, pmap); 3860 #endif 3861 return pma->pma_asn; 3862 } 3863