1 /* $NetBSD: pmap.c,v 1.260 2015/11/05 06:26:15 pgoyette Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center and by Chris G. Demetriou. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)pmap.c 8.6 (Berkeley) 5/27/94 66 */ 67 68 /* 69 * DEC Alpha physical map management code. 70 * 71 * History: 72 * 73 * This pmap started life as a Motorola 68851/68030 pmap, 74 * written by Mike Hibler at the University of Utah. 75 * 76 * It was modified for the DEC Alpha by Chris Demetriou 77 * at Carnegie Mellon University. 78 * 79 * Support for non-contiguous physical memory was added by 80 * Jason R. Thorpe of the Numerical Aerospace Simulation 81 * Facility, NASA Ames Research Center and Chris Demetriou. 82 * 83 * Page table management and a major cleanup were undertaken 84 * by Jason R. Thorpe, with lots of help from Ross Harvey of 85 * Avalon Computer Systems and from Chris Demetriou. 86 * 87 * Support for the new UVM pmap interface was written by 88 * Jason R. Thorpe. 89 * 90 * Support for ASNs was written by Jason R. Thorpe, again 91 * with help from Chris Demetriou and Ross Harvey. 92 * 93 * The locking protocol was written by Jason R. Thorpe, 94 * using Chuck Cranor's i386 pmap for UVM as a model. 95 * 96 * TLB shootdown code was written by Jason R. Thorpe. 97 * 98 * Multiprocessor modifications by Andrew Doran. 99 * 100 * Notes: 101 * 102 * All page table access is done via K0SEG. The one exception 103 * to this is for kernel mappings. Since all kernel page 104 * tables are pre-allocated, we can use the Virtual Page Table 105 * to access PTEs that map K1SEG addresses. 106 * 107 * Kernel page table pages are statically allocated in 108 * pmap_bootstrap(), and are never freed. In the future, 109 * support for dynamically adding additional kernel page 110 * table pages may be added. User page table pages are 111 * dynamically allocated and freed. 112 * 113 * Bugs/misfeatures: 114 * 115 * - Some things could be optimized. 116 */ 117 118 /* 119 * Manages physical address maps. 120 * 121 * Since the information managed by this module is 122 * also stored by the logical address mapping module, 123 * this module may throw away valid virtual-to-physical 124 * mappings at almost any time. However, invalidations 125 * of virtual-to-physical mappings must be done as 126 * requested. 127 * 128 * In order to cope with hardware architectures which 129 * make virtual-to-physical map invalidates expensive, 130 * this module may delay invalidate or reduced protection 131 * operations until such time as they are actually 132 * necessary. This module is given full information as 133 * to which processors are currently using which maps, 134 * and to when physical maps must be made correct. 135 */ 136 137 #include "opt_lockdebug.h" 138 #include "opt_sysv.h" 139 #include "opt_multiprocessor.h" 140 141 #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 142 143 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.260 2015/11/05 06:26:15 pgoyette Exp $"); 144 145 #include <sys/param.h> 146 #include <sys/systm.h> 147 #include <sys/kernel.h> 148 #include <sys/proc.h> 149 #include <sys/malloc.h> 150 #include <sys/pool.h> 151 #include <sys/buf.h> 152 #include <sys/atomic.h> 153 #include <sys/cpu.h> 154 155 #include <uvm/uvm.h> 156 157 #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR) 158 #include <machine/rpb.h> 159 #endif 160 161 #ifdef DEBUG 162 #define PDB_FOLLOW 0x0001 163 #define PDB_INIT 0x0002 164 #define PDB_ENTER 0x0004 165 #define PDB_REMOVE 0x0008 166 #define PDB_CREATE 0x0010 167 #define PDB_PTPAGE 0x0020 168 #define PDB_ASN 0x0040 169 #define PDB_BITS 0x0080 170 #define PDB_COLLECT 0x0100 171 #define PDB_PROTECT 0x0200 172 #define PDB_BOOTSTRAP 0x1000 173 #define PDB_PARANOIA 0x2000 174 #define PDB_WIRING 0x4000 175 #define PDB_PVDUMP 0x8000 176 177 int debugmap = 0; 178 int pmapdebug = PDB_PARANOIA; 179 #endif 180 181 /* 182 * Given a map and a machine independent protection code, 183 * convert to an alpha protection code. 184 */ 185 #define pte_prot(m, p) (protection_codes[m == pmap_kernel() ? 0 : 1][p]) 186 static int protection_codes[2][8]; 187 188 /* 189 * kernel_lev1map: 190 * 191 * Kernel level 1 page table. This maps all kernel level 2 192 * page table pages, and is used as a template for all user 193 * pmap level 1 page tables. When a new user level 1 page 194 * table is allocated, all kernel_lev1map PTEs for kernel 195 * addresses are copied to the new map. 196 * 197 * The kernel also has an initial set of kernel level 2 page 198 * table pages. These map the kernel level 3 page table pages. 199 * As kernel level 3 page table pages are added, more level 2 200 * page table pages may be added to map them. These pages are 201 * never freed. 202 * 203 * Finally, the kernel also has an initial set of kernel level 204 * 3 page table pages. These map pages in K1SEG. More level 205 * 3 page table pages may be added at run-time if additional 206 * K1SEG address space is required. These pages are never freed. 207 * 208 * NOTE: When mappings are inserted into the kernel pmap, all 209 * level 2 and level 3 page table pages must already be allocated 210 * and mapped into the parent page table. 211 */ 212 pt_entry_t *kernel_lev1map; 213 214 /* 215 * Virtual Page Table. 216 */ 217 static pt_entry_t *VPT; 218 219 static struct { 220 struct pmap k_pmap; 221 struct pmap_asn_info k_asni[ALPHA_MAXPROCS]; 222 } kernel_pmap_store; 223 224 struct pmap *const kernel_pmap_ptr = &kernel_pmap_store.k_pmap; 225 226 paddr_t avail_start; /* PA of first available physical page */ 227 paddr_t avail_end; /* PA of last available physical page */ 228 static vaddr_t virtual_end; /* VA of last avail page (end of kernel AS) */ 229 230 static bool pmap_initialized; /* Has pmap_init completed? */ 231 232 u_long pmap_pages_stolen; /* instrumentation */ 233 234 /* 235 * This variable contains the number of CPU IDs we need to allocate 236 * space for when allocating the pmap structure. It is used to 237 * size a per-CPU array of ASN and ASN Generation number. 238 */ 239 static u_long pmap_ncpuids; 240 241 #ifndef PMAP_PV_LOWAT 242 #define PMAP_PV_LOWAT 16 243 #endif 244 int pmap_pv_lowat = PMAP_PV_LOWAT; 245 246 /* 247 * List of all pmaps, used to update them when e.g. additional kernel 248 * page tables are allocated. This list is kept LRU-ordered by 249 * pmap_activate(). 250 */ 251 static TAILQ_HEAD(, pmap) pmap_all_pmaps; 252 253 /* 254 * The pools from which pmap structures and sub-structures are allocated. 255 */ 256 static struct pool_cache pmap_pmap_cache; 257 static struct pool_cache pmap_l1pt_cache; 258 static struct pool_cache pmap_pv_cache; 259 260 /* 261 * Address Space Numbers. 262 * 263 * On many implementations of the Alpha architecture, the TLB entries and 264 * I-cache blocks are tagged with a unique number within an implementation- 265 * specified range. When a process context becomes active, the ASN is used 266 * to match TLB entries; if a TLB entry for a particular VA does not match 267 * the current ASN, it is ignored (one could think of the processor as 268 * having a collection of <max ASN> separate TLBs). This allows operating 269 * system software to skip the TLB flush that would otherwise be necessary 270 * at context switch time. 271 * 272 * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that 273 * causes TLB entries to match any ASN. The PALcode also provides 274 * a TBI (Translation Buffer Invalidate) operation that flushes all 275 * TLB entries that _do not_ have PG_ASM. We use this bit for kernel 276 * mappings, so that invalidation of all user mappings does not invalidate 277 * kernel mappings (which are consistent across all processes). 278 * 279 * pmap_next_asn always indicates to the next ASN to use. When 280 * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation. 281 * 282 * When a new ASN generation is created, the per-process (i.e. non-PG_ASM) 283 * TLB entries and the I-cache are flushed, the generation number is bumped, 284 * and pmap_next_asn is changed to indicate the first non-reserved ASN. 285 * 286 * We reserve ASN #0 for pmaps that use the global kernel_lev1map. This 287 * prevents the following scenario: 288 * 289 * * New ASN generation starts, and process A is given ASN #0. 290 * 291 * * A new process B (and thus new pmap) is created. The ASN, 292 * for lack of a better value, is initialized to 0. 293 * 294 * * Process B runs. It is now using the TLB entries tagged 295 * by process A. *poof* 296 * 297 * In the scenario above, in addition to the processor using using incorrect 298 * TLB entires, the PALcode might use incorrect information to service a 299 * TLB miss. (The PALcode uses the recursively mapped Virtual Page Table 300 * to locate the PTE for a faulting address, and tagged TLB entires exist 301 * for the Virtual Page Table addresses in order to speed up this procedure, 302 * as well.) 303 * 304 * By reserving an ASN for kernel_lev1map users, we are guaranteeing that 305 * new pmaps will initially run with no TLB entries for user addresses 306 * or VPT mappings that map user page tables. Since kernel_lev1map only 307 * contains mappings for kernel addresses, and since those mappings 308 * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is 309 * safe (since PG_ASM mappings match any ASN). 310 * 311 * On processors that do not support ASNs, the PALcode invalidates 312 * the TLB and I-cache automatically on swpctx. We still still go 313 * through the motions of assigning an ASN (really, just refreshing 314 * the ASN generation in this particular case) to keep the logic sane 315 * in other parts of the code. 316 */ 317 static u_int pmap_max_asn; /* max ASN supported by the system */ 318 /* next ASN and cur ASN generation */ 319 static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS]; 320 321 /* 322 * Locking: 323 * 324 * READ/WRITE LOCKS 325 * ---------------- 326 * 327 * * pmap_main_lock - This lock is used to prevent deadlock and/or 328 * provide mutex access to the pmap module. Most operations lock 329 * the pmap first, then PV lists as needed. However, some operations, 330 * such as pmap_page_protect(), lock the PV lists before locking 331 * the pmaps. To prevent deadlock, we require a mutex lock on the 332 * pmap module if locking in the PV->pmap direction. This is 333 * implemented by acquiring a (shared) read lock on pmap_main_lock 334 * if locking pmap->PV and a (exclusive) write lock if locking in 335 * the PV->pmap direction. Since only one thread can hold a write 336 * lock at a time, this provides the mutex. 337 * 338 * MUTEXES 339 * ------- 340 * 341 * * pm_lock (per-pmap) - This lock protects all of the members 342 * of the pmap structure itself. This lock will be asserted 343 * in pmap_activate() and pmap_deactivate() from a critical 344 * section of mi_switch(), and must never sleep. Note that 345 * in the case of the kernel pmap, interrupts which cause 346 * memory allocation *must* be blocked while this lock is 347 * asserted. 348 * 349 * * pvh_lock (global hash) - These locks protects the PV lists 350 * for managed pages. 351 * 352 * * pmap_all_pmaps_lock - This lock protects the global list of 353 * all pmaps. Note that a pm_lock must never be held while this 354 * lock is held. 355 * 356 * * pmap_growkernel_lock - This lock protects pmap_growkernel() 357 * and the virtual_end variable. 358 * 359 * There is a lock ordering constraint for pmap_growkernel_lock. 360 * pmap_growkernel() acquires the locks in the following order: 361 * 362 * pmap_growkernel_lock (write) -> pmap_all_pmaps_lock -> 363 * pmap->pm_lock 364 * 365 * We need to ensure consistency between user pmaps and the 366 * kernel_lev1map. For this reason, pmap_growkernel_lock must 367 * be held to prevent kernel_lev1map changing across pmaps 368 * being added to / removed from the global pmaps list. 369 * 370 * Address space number management (global ASN counters and per-pmap 371 * ASN state) are not locked; they use arrays of values indexed 372 * per-processor. 373 * 374 * All internal functions which operate on a pmap are called 375 * with the pmap already locked by the caller (which will be 376 * an interface function). 377 */ 378 static krwlock_t pmap_main_lock; 379 static kmutex_t pmap_all_pmaps_lock; 380 static krwlock_t pmap_growkernel_lock; 381 382 #define PMAP_MAP_TO_HEAD_LOCK() rw_enter(&pmap_main_lock, RW_READER) 383 #define PMAP_MAP_TO_HEAD_UNLOCK() rw_exit(&pmap_main_lock) 384 #define PMAP_HEAD_TO_MAP_LOCK() rw_enter(&pmap_main_lock, RW_WRITER) 385 #define PMAP_HEAD_TO_MAP_UNLOCK() rw_exit(&pmap_main_lock) 386 387 struct { 388 kmutex_t lock; 389 } __aligned(64) static pmap_pvh_locks[64] __aligned(64); 390 391 static inline kmutex_t * 392 pmap_pvh_lock(struct vm_page *pg) 393 { 394 395 /* Cut bits 11-6 out of page address and use directly as offset. */ 396 return (kmutex_t *)((uintptr_t)&pmap_pvh_locks + 397 ((uintptr_t)pg & (63 << 6))); 398 } 399 400 #if defined(MULTIPROCESSOR) 401 /* 402 * TLB Shootdown: 403 * 404 * When a mapping is changed in a pmap, the TLB entry corresponding to 405 * the virtual address must be invalidated on all processors. In order 406 * to accomplish this on systems with multiple processors, messages are 407 * sent from the processor which performs the mapping change to all 408 * processors on which the pmap is active. For other processors, the 409 * ASN generation numbers for that processor is invalidated, so that 410 * the next time the pmap is activated on that processor, a new ASN 411 * will be allocated (which implicitly invalidates all TLB entries). 412 * 413 * Note, we can use the pool allocator to allocate job entries 414 * since pool pages are mapped with K0SEG, not with the TLB. 415 */ 416 struct pmap_tlb_shootdown_job { 417 TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list; 418 vaddr_t pj_va; /* virtual address */ 419 pmap_t pj_pmap; /* the pmap which maps the address */ 420 pt_entry_t pj_pte; /* the PTE bits */ 421 }; 422 423 static struct pmap_tlb_shootdown_q { 424 TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head; /* queue 16b */ 425 kmutex_t pq_lock; /* spin lock on queue 16b */ 426 int pq_pte; /* aggregate PTE bits 4b */ 427 int pq_count; /* number of pending requests 4b */ 428 int pq_tbia; /* pending global flush 4b */ 429 uint8_t pq_pad[64-16-16-4-4-4]; /* pad to 64 bytes */ 430 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS] __aligned(CACHE_LINE_SIZE); 431 432 /* If we have more pending jobs than this, we just nail the whole TLB. */ 433 #define PMAP_TLB_SHOOTDOWN_MAXJOBS 6 434 435 static struct pool_cache pmap_tlb_shootdown_job_cache; 436 #endif /* MULTIPROCESSOR */ 437 438 /* 439 * Internal routines 440 */ 441 static void alpha_protection_init(void); 442 static bool pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, long); 443 static void pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, long); 444 445 /* 446 * PT page management functions. 447 */ 448 static int pmap_lev1map_create(pmap_t, long); 449 static void pmap_lev1map_destroy(pmap_t, long); 450 static int pmap_ptpage_alloc(pmap_t, pt_entry_t *, int); 451 static void pmap_ptpage_free(pmap_t, pt_entry_t *); 452 static void pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, long); 453 static void pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, long); 454 static void pmap_l1pt_delref(pmap_t, pt_entry_t *, long); 455 456 static void *pmap_l1pt_alloc(struct pool *, int); 457 static void pmap_l1pt_free(struct pool *, void *); 458 459 static struct pool_allocator pmap_l1pt_allocator = { 460 pmap_l1pt_alloc, pmap_l1pt_free, 0, 461 }; 462 463 static int pmap_l1pt_ctor(void *, void *, int); 464 465 /* 466 * PV table management functions. 467 */ 468 static int pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *, 469 bool); 470 static void pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool); 471 static void *pmap_pv_page_alloc(struct pool *, int); 472 static void pmap_pv_page_free(struct pool *, void *); 473 474 static struct pool_allocator pmap_pv_page_allocator = { 475 pmap_pv_page_alloc, pmap_pv_page_free, 0, 476 }; 477 478 #ifdef DEBUG 479 void pmap_pv_dump(paddr_t); 480 #endif 481 482 #define pmap_pv_alloc() pool_cache_get(&pmap_pv_cache, PR_NOWAIT) 483 #define pmap_pv_free(pv) pool_cache_put(&pmap_pv_cache, (pv)) 484 485 /* 486 * ASN management functions. 487 */ 488 static void pmap_asn_alloc(pmap_t, long); 489 490 /* 491 * Misc. functions. 492 */ 493 static bool pmap_physpage_alloc(int, paddr_t *); 494 static void pmap_physpage_free(paddr_t); 495 static int pmap_physpage_addref(void *); 496 static int pmap_physpage_delref(void *); 497 498 /* 499 * PMAP_ISACTIVE{,_TEST}: 500 * 501 * Check to see if a pmap is active on the current processor. 502 */ 503 #define PMAP_ISACTIVE_TEST(pm, cpu_id) \ 504 (((pm)->pm_cpus & (1UL << (cpu_id))) != 0) 505 506 #if defined(DEBUG) && !defined(MULTIPROCESSOR) 507 #define PMAP_ISACTIVE(pm, cpu_id) \ 508 ({ \ 509 /* \ 510 * XXX This test is not MP-safe. \ 511 */ \ 512 int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id); \ 513 \ 514 if ((curlwp->l_flag & LW_IDLE) != 0 && \ 515 curproc->p_vmspace != NULL && \ 516 ((curproc->p_sflag & PS_WEXIT) == 0) && \ 517 (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap))) \ 518 panic("PMAP_ISACTIVE"); \ 519 (isactive_); \ 520 }) 521 #else 522 #define PMAP_ISACTIVE(pm, cpu_id) PMAP_ISACTIVE_TEST(pm, cpu_id) 523 #endif /* DEBUG && !MULTIPROCESSOR */ 524 525 /* 526 * PMAP_ACTIVATE_ASN_SANITY: 527 * 528 * DEBUG sanity checks for ASNs within PMAP_ACTIVATE. 529 */ 530 #ifdef DEBUG 531 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) \ 532 do { \ 533 struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)]; \ 534 struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)]; \ 535 \ 536 if ((pmap)->pm_lev1map == kernel_lev1map) { \ 537 /* \ 538 * This pmap implementation also ensures that pmaps \ 539 * referencing kernel_lev1map use a reserved ASN \ 540 * ASN to prevent the PALcode from servicing a TLB \ 541 * miss with the wrong PTE. \ 542 */ \ 543 if (__pma->pma_asn != PMAP_ASN_RESERVED) { \ 544 printf("kernel_lev1map with non-reserved ASN " \ 545 "(line %d)\n", __LINE__); \ 546 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 547 } \ 548 } else { \ 549 if (__pma->pma_asngen != __cpma->pma_asngen) { \ 550 /* \ 551 * ASN generation number isn't valid! \ 552 */ \ 553 printf("pmap asngen %lu, current %lu " \ 554 "(line %d)\n", \ 555 __pma->pma_asngen, \ 556 __cpma->pma_asngen, \ 557 __LINE__); \ 558 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 559 } \ 560 if (__pma->pma_asn == PMAP_ASN_RESERVED) { \ 561 /* \ 562 * DANGER WILL ROBINSON! We're going to \ 563 * pollute the VPT TLB entries! \ 564 */ \ 565 printf("Using reserved ASN! (line %d)\n", \ 566 __LINE__); \ 567 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 568 } \ 569 } \ 570 } while (/*CONSTCOND*/0) 571 #else 572 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) /* nothing */ 573 #endif 574 575 /* 576 * PMAP_ACTIVATE: 577 * 578 * This is essentially the guts of pmap_activate(), without 579 * ASN allocation. This is used by pmap_activate(), 580 * pmap_lev1map_create(), and pmap_lev1map_destroy(). 581 * 582 * This is called only when it is known that a pmap is "active" 583 * on the current processor; the ASN must already be valid. 584 */ 585 #define PMAP_ACTIVATE(pmap, l, cpu_id) \ 586 do { \ 587 struct pcb *pcb = lwp_getpcb(l); \ 588 PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id); \ 589 \ 590 pcb->pcb_hw.apcb_ptbr = \ 591 ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \ 592 pcb->pcb_hw.apcb_asn = (pmap)->pm_asni[(cpu_id)].pma_asn; \ 593 \ 594 if ((l) == curlwp) { \ 595 /* \ 596 * Page table base register has changed; switch to \ 597 * our own context again so that it will take effect. \ 598 */ \ 599 (void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr); \ 600 } \ 601 } while (/*CONSTCOND*/0) 602 603 /* 604 * PMAP_SET_NEEDISYNC: 605 * 606 * Mark that a user pmap needs an I-stream synch on its 607 * way back out to userspace. 608 */ 609 #define PMAP_SET_NEEDISYNC(pmap) (pmap)->pm_needisync = ~0UL 610 611 /* 612 * PMAP_SYNC_ISTREAM: 613 * 614 * Synchronize the I-stream for the specified pmap. For user 615 * pmaps, this is deferred until a process using the pmap returns 616 * to userspace. 617 */ 618 #if defined(MULTIPROCESSOR) 619 #define PMAP_SYNC_ISTREAM_KERNEL() \ 620 do { \ 621 alpha_pal_imb(); \ 622 alpha_broadcast_ipi(ALPHA_IPI_IMB); \ 623 } while (/*CONSTCOND*/0) 624 625 #define PMAP_SYNC_ISTREAM_USER(pmap) \ 626 do { \ 627 alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST); \ 628 /* for curcpu, will happen in userret() */ \ 629 } while (/*CONSTCOND*/0) 630 #else 631 #define PMAP_SYNC_ISTREAM_KERNEL() alpha_pal_imb() 632 #define PMAP_SYNC_ISTREAM_USER(pmap) /* will happen in userret() */ 633 #endif /* MULTIPROCESSOR */ 634 635 #define PMAP_SYNC_ISTREAM(pmap) \ 636 do { \ 637 if ((pmap) == pmap_kernel()) \ 638 PMAP_SYNC_ISTREAM_KERNEL(); \ 639 else \ 640 PMAP_SYNC_ISTREAM_USER(pmap); \ 641 } while (/*CONSTCOND*/0) 642 643 /* 644 * PMAP_INVALIDATE_ASN: 645 * 646 * Invalidate the specified pmap's ASN, so as to force allocation 647 * of a new one the next time pmap_asn_alloc() is called. 648 * 649 * NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING 650 * CONDITIONS ARE true: 651 * 652 * (1) The pmap references the global kernel_lev1map. 653 * 654 * (2) The pmap is not active on the current processor. 655 */ 656 #define PMAP_INVALIDATE_ASN(pmap, cpu_id) \ 657 do { \ 658 (pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED; \ 659 } while (/*CONSTCOND*/0) 660 661 /* 662 * PMAP_INVALIDATE_TLB: 663 * 664 * Invalidate the TLB entry for the pmap/va pair. 665 */ 666 #define PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id) \ 667 do { \ 668 if ((hadasm) || (isactive)) { \ 669 /* \ 670 * Simply invalidating the TLB entry and I-cache \ 671 * works in this case. \ 672 */ \ 673 ALPHA_TBIS((va)); \ 674 } else if ((pmap)->pm_asni[(cpu_id)].pma_asngen == \ 675 pmap_asn_info[(cpu_id)].pma_asngen) { \ 676 /* \ 677 * We can't directly invalidate the TLB entry \ 678 * in this case, so we have to force allocation \ 679 * of a new ASN the next time this pmap becomes \ 680 * active. \ 681 */ \ 682 PMAP_INVALIDATE_ASN((pmap), (cpu_id)); \ 683 } \ 684 /* \ 685 * Nothing to do in this case; the next time the \ 686 * pmap becomes active on this processor, a new \ 687 * ASN will be allocated anyway. \ 688 */ \ 689 } while (/*CONSTCOND*/0) 690 691 /* 692 * PMAP_KERNEL_PTE: 693 * 694 * Get a kernel PTE. 695 * 696 * If debugging, do a table walk. If not debugging, just use 697 * the Virtual Page Table, since all kernel page tables are 698 * pre-allocated and mapped in. 699 */ 700 #ifdef DEBUG 701 #define PMAP_KERNEL_PTE(va) \ 702 ({ \ 703 pt_entry_t *l1pte_, *l2pte_; \ 704 \ 705 l1pte_ = pmap_l1pte(pmap_kernel(), va); \ 706 if (pmap_pte_v(l1pte_) == 0) { \ 707 printf("kernel level 1 PTE not valid, va 0x%lx " \ 708 "(line %d)\n", (va), __LINE__); \ 709 panic("PMAP_KERNEL_PTE"); \ 710 } \ 711 l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_); \ 712 if (pmap_pte_v(l2pte_) == 0) { \ 713 printf("kernel level 2 PTE not valid, va 0x%lx " \ 714 "(line %d)\n", (va), __LINE__); \ 715 panic("PMAP_KERNEL_PTE"); \ 716 } \ 717 pmap_l3pte(pmap_kernel(), va, l2pte_); \ 718 }) 719 #else 720 #define PMAP_KERNEL_PTE(va) (&VPT[VPT_INDEX((va))]) 721 #endif 722 723 /* 724 * PMAP_SET_PTE: 725 * 726 * Set a PTE to a specified value. 727 */ 728 #define PMAP_SET_PTE(ptep, val) *(ptep) = (val) 729 730 /* 731 * PMAP_STAT_{INCR,DECR}: 732 * 733 * Increment or decrement a pmap statistic. 734 */ 735 #define PMAP_STAT_INCR(s, v) atomic_add_long((unsigned long *)(&(s)), (v)) 736 #define PMAP_STAT_DECR(s, v) atomic_add_long((unsigned long *)(&(s)), -(v)) 737 738 /* 739 * pmap_bootstrap: 740 * 741 * Bootstrap the system to run with virtual memory. 742 * 743 * Note: no locking is necessary in this function. 744 */ 745 void 746 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) 747 { 748 vsize_t lev2mapsize, lev3mapsize; 749 pt_entry_t *lev2map, *lev3map; 750 pt_entry_t pte; 751 vsize_t bufsz; 752 struct pcb *pcb; 753 int i; 754 755 #ifdef DEBUG 756 if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP)) 757 printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn); 758 #endif 759 760 /* 761 * Compute the number of pages kmem_arena will have. 762 */ 763 kmeminit_nkmempages(); 764 765 /* 766 * Figure out how many initial PTE's are necessary to map the 767 * kernel. We also reserve space for kmem_alloc_pageable() 768 * for vm_fork(). 769 */ 770 771 /* Get size of buffer cache and set an upper limit */ 772 bufsz = buf_memcalc(); 773 buf_setvalimit(bufsz); 774 775 lev3mapsize = 776 (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) + 777 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE + 778 (maxproc * UPAGES) + nkmempages; 779 780 lev3mapsize = roundup(lev3mapsize, NPTEPG); 781 782 /* 783 * Initialize `FYI' variables. Note we're relying on 784 * the fact that BSEARCH sorts the vm_physmem[] array 785 * for us. 786 */ 787 avail_start = ptoa(VM_PHYSMEM_PTR(0)->start); 788 avail_end = ptoa(VM_PHYSMEM_PTR(vm_nphysseg - 1)->end); 789 virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE; 790 791 #if 0 792 printf("avail_start = 0x%lx\n", avail_start); 793 printf("avail_end = 0x%lx\n", avail_end); 794 printf("virtual_end = 0x%lx\n", virtual_end); 795 #endif 796 797 /* 798 * Allocate a level 1 PTE table for the kernel. 799 * This is always one page long. 800 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 801 */ 802 kernel_lev1map = (pt_entry_t *) 803 uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG); 804 805 /* 806 * Allocate a level 2 PTE table for the kernel. 807 * These must map all of the level3 PTEs. 808 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 809 */ 810 lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG); 811 lev2map = (pt_entry_t *) 812 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize); 813 814 /* 815 * Allocate a level 3 PTE table for the kernel. 816 * Contains lev3mapsize PTEs. 817 */ 818 lev3map = (pt_entry_t *) 819 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize); 820 821 /* 822 * Set up level 1 page table 823 */ 824 825 /* Map all of the level 2 pte pages */ 826 for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) { 827 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) + 828 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 829 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 830 kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS + 831 (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte; 832 } 833 834 /* Map the virtual page table */ 835 pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT) 836 << PG_SHIFT; 837 pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */ 838 kernel_lev1map[l1pte_index(VPTBASE)] = pte; 839 VPT = (pt_entry_t *)VPTBASE; 840 841 #ifdef _PMAP_MAY_USE_PROM_CONSOLE 842 { 843 extern pt_entry_t prom_pte; /* XXX */ 844 extern int prom_mapped; /* XXX */ 845 846 if (pmap_uses_prom_console()) { 847 /* 848 * XXX Save old PTE so we can remap the PROM, if 849 * XXX necessary. 850 */ 851 prom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM; 852 } 853 prom_mapped = 0; 854 855 /* 856 * Actually, this code lies. The prom is still mapped, and will 857 * remain so until the context switch after alpha_init() returns. 858 */ 859 } 860 #endif 861 862 /* 863 * Set up level 2 page table. 864 */ 865 /* Map all of the level 3 pte pages */ 866 for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) { 867 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) + 868 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 869 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 870 lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+ 871 (i*PAGE_SIZE*NPTEPG))] = pte; 872 } 873 874 /* Initialize the pmap_growkernel_lock. */ 875 rw_init(&pmap_growkernel_lock); 876 877 /* 878 * Set up level three page table (lev3map) 879 */ 880 /* Nothing to do; it's already zero'd */ 881 882 /* 883 * Initialize the pmap pools and list. 884 */ 885 pmap_ncpuids = ncpuids; 886 pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 0, 887 0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL); 888 pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt", 889 &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL); 890 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 891 PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL, 892 NULL, NULL); 893 894 TAILQ_INIT(&pmap_all_pmaps); 895 896 /* 897 * Initialize the ASN logic. 898 */ 899 pmap_max_asn = maxasn; 900 for (i = 0; i < ALPHA_MAXPROCS; i++) { 901 pmap_asn_info[i].pma_asn = 1; 902 pmap_asn_info[i].pma_asngen = 0; 903 } 904 905 /* 906 * Initialize the locks. 907 */ 908 rw_init(&pmap_main_lock); 909 mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE); 910 for (i = 0; i < __arraycount(pmap_pvh_locks); i++) { 911 mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE); 912 } 913 914 /* 915 * Initialize kernel pmap. Note that all kernel mappings 916 * have PG_ASM set, so the ASN doesn't really matter for 917 * the kernel pmap. Also, since the kernel pmap always 918 * references kernel_lev1map, it always has an invalid ASN 919 * generation. 920 */ 921 memset(pmap_kernel(), 0, sizeof(struct pmap)); 922 pmap_kernel()->pm_lev1map = kernel_lev1map; 923 pmap_kernel()->pm_count = 1; 924 for (i = 0; i < ALPHA_MAXPROCS; i++) { 925 pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 926 pmap_kernel()->pm_asni[i].pma_asngen = 927 pmap_asn_info[i].pma_asngen; 928 } 929 mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE); 930 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list); 931 932 #if defined(MULTIPROCESSOR) 933 /* 934 * Initialize the TLB shootdown queues. 935 */ 936 pool_cache_bootstrap(&pmap_tlb_shootdown_job_cache, 937 sizeof(struct pmap_tlb_shootdown_job), CACHE_LINE_SIZE, 938 0, PR_LARGECACHE, "pmaptlb", NULL, IPL_VM, NULL, NULL, NULL); 939 for (i = 0; i < ALPHA_MAXPROCS; i++) { 940 TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head); 941 mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT, 942 IPL_SCHED); 943 } 944 #endif 945 946 /* 947 * Set up lwp0's PCB such that the ptbr points to the right place 948 * and has the kernel pmap's (really unused) ASN. 949 */ 950 pcb = lwp_getpcb(&lwp0); 951 pcb->pcb_hw.apcb_ptbr = 952 ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT; 953 pcb->pcb_hw.apcb_asn = pmap_kernel()->pm_asni[cpu_number()].pma_asn; 954 955 /* 956 * Mark the kernel pmap `active' on this processor. 957 */ 958 atomic_or_ulong(&pmap_kernel()->pm_cpus, 959 (1UL << cpu_number())); 960 } 961 962 #ifdef _PMAP_MAY_USE_PROM_CONSOLE 963 int 964 pmap_uses_prom_console(void) 965 { 966 967 return (cputype == ST_DEC_21000); 968 } 969 #endif /* _PMAP_MAY_USE_PROM_CONSOLE */ 970 971 /* 972 * pmap_virtual_space: [ INTERFACE ] 973 * 974 * Define the initial bounds of the kernel virtual address space. 975 */ 976 void 977 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 978 { 979 980 *vstartp = VM_MIN_KERNEL_ADDRESS; /* kernel is in K0SEG */ 981 *vendp = VM_MAX_KERNEL_ADDRESS; /* we use pmap_growkernel */ 982 } 983 984 /* 985 * pmap_steal_memory: [ INTERFACE ] 986 * 987 * Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()). 988 * This function allows for early dynamic memory allocation until the 989 * virtual memory system has been bootstrapped. After that point, either 990 * kmem_alloc or malloc should be used. This function works by stealing 991 * pages from the (to be) managed page pool, then implicitly mapping the 992 * pages (by using their k0seg addresses) and zeroing them. 993 * 994 * It may be used once the physical memory segments have been pre-loaded 995 * into the vm_physmem[] array. Early memory allocation MUST use this 996 * interface! This cannot be used after vm_page_startup(), and will 997 * generate a panic if tried. 998 * 999 * Note that this memory will never be freed, and in essence it is wired 1000 * down. 1001 * 1002 * We must adjust *vstartp and/or *vendp iff we use address space 1003 * from the kernel virtual address range defined by pmap_virtual_space(). 1004 * 1005 * Note: no locking is necessary in this function. 1006 */ 1007 vaddr_t 1008 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp) 1009 { 1010 int bank, npgs, x; 1011 vaddr_t va; 1012 paddr_t pa; 1013 1014 size = round_page(size); 1015 npgs = atop(size); 1016 1017 #if 0 1018 printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs); 1019 #endif 1020 1021 for (bank = 0; bank < vm_nphysseg; bank++) { 1022 if (uvm.page_init_done == true) 1023 panic("pmap_steal_memory: called _after_ bootstrap"); 1024 1025 #if 0 1026 printf(" bank %d: avail_start 0x%lx, start 0x%lx, " 1027 "avail_end 0x%lx\n", bank, VM_PHYSMEM_PTR(bank)->avail_start, 1028 VM_PHYSMEM_PTR(bank)->start, VM_PHYSMEM_PTR(bank)->avail_end); 1029 #endif 1030 1031 if (VM_PHYSMEM_PTR(bank)->avail_start != VM_PHYSMEM_PTR(bank)->start || 1032 VM_PHYSMEM_PTR(bank)->avail_start >= VM_PHYSMEM_PTR(bank)->avail_end) 1033 continue; 1034 1035 #if 0 1036 printf(" avail_end - avail_start = 0x%lx\n", 1037 VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start); 1038 #endif 1039 1040 if ((VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start) 1041 < npgs) 1042 continue; 1043 1044 /* 1045 * There are enough pages here; steal them! 1046 */ 1047 pa = ptoa(VM_PHYSMEM_PTR(bank)->avail_start); 1048 VM_PHYSMEM_PTR(bank)->avail_start += npgs; 1049 VM_PHYSMEM_PTR(bank)->start += npgs; 1050 1051 /* 1052 * Have we used up this segment? 1053 */ 1054 if (VM_PHYSMEM_PTR(bank)->avail_start == VM_PHYSMEM_PTR(bank)->end) { 1055 if (vm_nphysseg == 1) 1056 panic("pmap_steal_memory: out of memory!"); 1057 1058 /* Remove this segment from the list. */ 1059 vm_nphysseg--; 1060 for (x = bank; x < vm_nphysseg; x++) { 1061 /* structure copy */ 1062 VM_PHYSMEM_PTR_SWAP(x, x + 1); 1063 } 1064 } 1065 1066 va = ALPHA_PHYS_TO_K0SEG(pa); 1067 memset((void *)va, 0, size); 1068 pmap_pages_stolen += npgs; 1069 return (va); 1070 } 1071 1072 /* 1073 * If we got here, this was no memory left. 1074 */ 1075 panic("pmap_steal_memory: no memory to steal"); 1076 } 1077 1078 /* 1079 * pmap_init: [ INTERFACE ] 1080 * 1081 * Initialize the pmap module. Called by vm_init(), to initialize any 1082 * structures that the pmap system needs to map virtual memory. 1083 * 1084 * Note: no locking is necessary in this function. 1085 */ 1086 void 1087 pmap_init(void) 1088 { 1089 1090 #ifdef DEBUG 1091 if (pmapdebug & PDB_FOLLOW) 1092 printf("pmap_init()\n"); 1093 #endif 1094 1095 /* initialize protection array */ 1096 alpha_protection_init(); 1097 1098 /* 1099 * Set a low water mark on the pv_entry pool, so that we are 1100 * more likely to have these around even in extreme memory 1101 * starvation. 1102 */ 1103 pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat); 1104 1105 /* 1106 * Now it is safe to enable pv entry recording. 1107 */ 1108 pmap_initialized = true; 1109 1110 #if 0 1111 for (bank = 0; bank < vm_nphysseg; bank++) { 1112 printf("bank %d\n", bank); 1113 printf("\tstart = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->start)); 1114 printf("\tend = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->end)); 1115 printf("\tavail_start = 0x%x\n", 1116 ptoa(VM_PHYSMEM_PTR(bank)->avail_start)); 1117 printf("\tavail_end = 0x%x\n", 1118 ptoa(VM_PHYSMEM_PTR(bank)->avail_end)); 1119 } 1120 #endif 1121 } 1122 1123 /* 1124 * pmap_create: [ INTERFACE ] 1125 * 1126 * Create and return a physical map. 1127 * 1128 * Note: no locking is necessary in this function. 1129 */ 1130 pmap_t 1131 pmap_create(void) 1132 { 1133 pmap_t pmap; 1134 int i; 1135 1136 #ifdef DEBUG 1137 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 1138 printf("pmap_create()\n"); 1139 #endif 1140 1141 pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK); 1142 memset(pmap, 0, sizeof(*pmap)); 1143 1144 /* 1145 * Defer allocation of a new level 1 page table until 1146 * the first new mapping is entered; just take a reference 1147 * to the kernel kernel_lev1map. 1148 */ 1149 pmap->pm_lev1map = kernel_lev1map; 1150 1151 pmap->pm_count = 1; 1152 for (i = 0; i < pmap_ncpuids; i++) { 1153 pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 1154 /* XXX Locking? */ 1155 pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen; 1156 } 1157 mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE); 1158 1159 try_again: 1160 rw_enter(&pmap_growkernel_lock, RW_READER); 1161 1162 if (pmap_lev1map_create(pmap, cpu_number()) != 0) { 1163 rw_exit(&pmap_growkernel_lock); 1164 (void) kpause("pmap_create", false, hz >> 2, NULL); 1165 goto try_again; 1166 } 1167 1168 mutex_enter(&pmap_all_pmaps_lock); 1169 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list); 1170 mutex_exit(&pmap_all_pmaps_lock); 1171 1172 rw_exit(&pmap_growkernel_lock); 1173 1174 return (pmap); 1175 } 1176 1177 /* 1178 * pmap_destroy: [ INTERFACE ] 1179 * 1180 * Drop the reference count on the specified pmap, releasing 1181 * all resources if the reference count drops to zero. 1182 */ 1183 void 1184 pmap_destroy(pmap_t pmap) 1185 { 1186 1187 #ifdef DEBUG 1188 if (pmapdebug & PDB_FOLLOW) 1189 printf("pmap_destroy(%p)\n", pmap); 1190 #endif 1191 1192 if (atomic_dec_uint_nv(&pmap->pm_count) > 0) 1193 return; 1194 1195 rw_enter(&pmap_growkernel_lock, RW_READER); 1196 1197 /* 1198 * Remove it from the global list of all pmaps. 1199 */ 1200 mutex_enter(&pmap_all_pmaps_lock); 1201 TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list); 1202 mutex_exit(&pmap_all_pmaps_lock); 1203 1204 pmap_lev1map_destroy(pmap, cpu_number()); 1205 1206 rw_exit(&pmap_growkernel_lock); 1207 1208 /* 1209 * Since the pmap is supposed to contain no valid 1210 * mappings at this point, we should always see 1211 * kernel_lev1map here. 1212 */ 1213 KASSERT(pmap->pm_lev1map == kernel_lev1map); 1214 1215 mutex_destroy(&pmap->pm_lock); 1216 pool_cache_put(&pmap_pmap_cache, pmap); 1217 } 1218 1219 /* 1220 * pmap_reference: [ INTERFACE ] 1221 * 1222 * Add a reference to the specified pmap. 1223 */ 1224 void 1225 pmap_reference(pmap_t pmap) 1226 { 1227 1228 #ifdef DEBUG 1229 if (pmapdebug & PDB_FOLLOW) 1230 printf("pmap_reference(%p)\n", pmap); 1231 #endif 1232 1233 atomic_inc_uint(&pmap->pm_count); 1234 } 1235 1236 /* 1237 * pmap_remove: [ INTERFACE ] 1238 * 1239 * Remove the given range of addresses from the specified map. 1240 * 1241 * It is assumed that the start and end are properly 1242 * rounded to the page size. 1243 */ 1244 void 1245 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva) 1246 { 1247 pt_entry_t *l1pte, *l2pte, *l3pte; 1248 pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte; 1249 vaddr_t l1eva, l2eva, vptva; 1250 bool needisync = false; 1251 long cpu_id = cpu_number(); 1252 1253 #ifdef DEBUG 1254 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 1255 printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva); 1256 #endif 1257 1258 /* 1259 * If this is the kernel pmap, we can use a faster method 1260 * for accessing the PTEs (since the PT pages are always 1261 * resident). 1262 * 1263 * Note that this routine should NEVER be called from an 1264 * interrupt context; pmap_kremove() is used for that. 1265 */ 1266 if (pmap == pmap_kernel()) { 1267 PMAP_MAP_TO_HEAD_LOCK(); 1268 PMAP_LOCK(pmap); 1269 1270 while (sva < eva) { 1271 l3pte = PMAP_KERNEL_PTE(sva); 1272 if (pmap_pte_v(l3pte)) { 1273 #ifdef DIAGNOSTIC 1274 if (uvm_pageismanaged(pmap_pte_pa(l3pte)) && 1275 pmap_pte_pv(l3pte) == 0) 1276 panic("pmap_remove: managed page " 1277 "without PG_PVLIST for 0x%lx", 1278 sva); 1279 #endif 1280 needisync |= pmap_remove_mapping(pmap, sva, 1281 l3pte, true, cpu_id); 1282 } 1283 sva += PAGE_SIZE; 1284 } 1285 1286 PMAP_UNLOCK(pmap); 1287 PMAP_MAP_TO_HEAD_UNLOCK(); 1288 1289 if (needisync) 1290 PMAP_SYNC_ISTREAM_KERNEL(); 1291 return; 1292 } 1293 1294 #ifdef DIAGNOSTIC 1295 if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS) 1296 panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel " 1297 "address range", sva, eva); 1298 #endif 1299 1300 PMAP_MAP_TO_HEAD_LOCK(); 1301 PMAP_LOCK(pmap); 1302 1303 /* 1304 * If we're already referencing the kernel_lev1map, there 1305 * is no work for us to do. 1306 */ 1307 if (pmap->pm_lev1map == kernel_lev1map) 1308 goto out; 1309 1310 saved_l1pte = l1pte = pmap_l1pte(pmap, sva); 1311 1312 /* 1313 * Add a reference to the L1 table to it won't get 1314 * removed from under us. 1315 */ 1316 pmap_physpage_addref(saved_l1pte); 1317 1318 for (; sva < eva; sva = l1eva, l1pte++) { 1319 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1320 if (pmap_pte_v(l1pte)) { 1321 saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte); 1322 1323 /* 1324 * Add a reference to the L2 table so it won't 1325 * get removed from under us. 1326 */ 1327 pmap_physpage_addref(saved_l2pte); 1328 1329 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1330 l2eva = 1331 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1332 if (pmap_pte_v(l2pte)) { 1333 saved_l3pte = l3pte = 1334 pmap_l3pte(pmap, sva, l2pte); 1335 1336 /* 1337 * Add a reference to the L3 table so 1338 * it won't get removed from under us. 1339 */ 1340 pmap_physpage_addref(saved_l3pte); 1341 1342 /* 1343 * Remember this sva; if the L3 table 1344 * gets removed, we need to invalidate 1345 * the VPT TLB entry for it. 1346 */ 1347 vptva = sva; 1348 1349 for (; sva < l2eva && sva < eva; 1350 sva += PAGE_SIZE, l3pte++) { 1351 if (!pmap_pte_v(l3pte)) { 1352 continue; 1353 } 1354 needisync |= 1355 pmap_remove_mapping( 1356 pmap, sva, 1357 l3pte, true, 1358 cpu_id); 1359 } 1360 1361 /* 1362 * Remove the reference to the L3 1363 * table that we added above. This 1364 * may free the L3 table. 1365 */ 1366 pmap_l3pt_delref(pmap, vptva, 1367 saved_l3pte, cpu_id); 1368 } 1369 } 1370 1371 /* 1372 * Remove the reference to the L2 table that we 1373 * added above. This may free the L2 table. 1374 */ 1375 pmap_l2pt_delref(pmap, l1pte, saved_l2pte, cpu_id); 1376 } 1377 } 1378 1379 /* 1380 * Remove the reference to the L1 table that we added above. 1381 * This may free the L1 table. 1382 */ 1383 pmap_l1pt_delref(pmap, saved_l1pte, cpu_id); 1384 1385 if (needisync) 1386 PMAP_SYNC_ISTREAM_USER(pmap); 1387 1388 out: 1389 PMAP_UNLOCK(pmap); 1390 PMAP_MAP_TO_HEAD_UNLOCK(); 1391 } 1392 1393 /* 1394 * pmap_page_protect: [ INTERFACE ] 1395 * 1396 * Lower the permission for all mappings to a given page to 1397 * the permissions specified. 1398 */ 1399 void 1400 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 1401 { 1402 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 1403 pmap_t pmap; 1404 pv_entry_t pv, nextpv; 1405 bool needkisync = false; 1406 long cpu_id = cpu_number(); 1407 kmutex_t *lock; 1408 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1409 #ifdef DEBUG 1410 paddr_t pa = VM_PAGE_TO_PHYS(pg); 1411 1412 1413 if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) || 1414 (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE))) 1415 printf("pmap_page_protect(%p, %x)\n", pg, prot); 1416 #endif 1417 1418 switch (prot) { 1419 case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE: 1420 case VM_PROT_READ|VM_PROT_WRITE: 1421 return; 1422 1423 /* copy_on_write */ 1424 case VM_PROT_READ|VM_PROT_EXECUTE: 1425 case VM_PROT_READ: 1426 PMAP_HEAD_TO_MAP_LOCK(); 1427 lock = pmap_pvh_lock(pg); 1428 mutex_enter(lock); 1429 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 1430 PMAP_LOCK(pv->pv_pmap); 1431 if (*pv->pv_pte & (PG_KWE | PG_UWE)) { 1432 *pv->pv_pte &= ~(PG_KWE | PG_UWE); 1433 PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va, 1434 pmap_pte_asm(pv->pv_pte), 1435 PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id); 1436 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va, 1437 pmap_pte_asm(pv->pv_pte)); 1438 } 1439 PMAP_UNLOCK(pv->pv_pmap); 1440 } 1441 mutex_exit(lock); 1442 PMAP_HEAD_TO_MAP_UNLOCK(); 1443 PMAP_TLB_SHOOTNOW(); 1444 return; 1445 1446 /* remove_all */ 1447 default: 1448 break; 1449 } 1450 1451 PMAP_HEAD_TO_MAP_LOCK(); 1452 lock = pmap_pvh_lock(pg); 1453 mutex_enter(lock); 1454 for (pv = md->pvh_list; pv != NULL; pv = nextpv) { 1455 nextpv = pv->pv_next; 1456 pmap = pv->pv_pmap; 1457 1458 PMAP_LOCK(pmap); 1459 #ifdef DEBUG 1460 if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 || 1461 pmap_pte_pa(pv->pv_pte) != pa) 1462 panic("pmap_page_protect: bad mapping"); 1463 #endif 1464 if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte, 1465 false, cpu_id) == true) { 1466 if (pmap == pmap_kernel()) 1467 needkisync |= true; 1468 else 1469 PMAP_SYNC_ISTREAM_USER(pmap); 1470 } 1471 PMAP_UNLOCK(pmap); 1472 } 1473 1474 if (needkisync) 1475 PMAP_SYNC_ISTREAM_KERNEL(); 1476 1477 mutex_exit(lock); 1478 PMAP_HEAD_TO_MAP_UNLOCK(); 1479 } 1480 1481 /* 1482 * pmap_protect: [ INTERFACE ] 1483 * 1484 * Set the physical protection on the specified range of this map 1485 * as requested. 1486 */ 1487 void 1488 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1489 { 1490 pt_entry_t *l1pte, *l2pte, *l3pte, bits; 1491 bool isactive; 1492 bool hadasm; 1493 vaddr_t l1eva, l2eva; 1494 long cpu_id = cpu_number(); 1495 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1496 1497 #ifdef DEBUG 1498 if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) 1499 printf("pmap_protect(%p, %lx, %lx, %x)\n", 1500 pmap, sva, eva, prot); 1501 #endif 1502 1503 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1504 pmap_remove(pmap, sva, eva); 1505 return; 1506 } 1507 1508 PMAP_LOCK(pmap); 1509 1510 bits = pte_prot(pmap, prot); 1511 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1512 1513 l1pte = pmap_l1pte(pmap, sva); 1514 for (; sva < eva; sva = l1eva, l1pte++) { 1515 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1516 if (pmap_pte_v(l1pte)) { 1517 l2pte = pmap_l2pte(pmap, sva, l1pte); 1518 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1519 l2eva = 1520 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1521 if (pmap_pte_v(l2pte)) { 1522 l3pte = pmap_l3pte(pmap, sva, l2pte); 1523 for (; sva < l2eva && sva < eva; 1524 sva += PAGE_SIZE, l3pte++) { 1525 if (pmap_pte_v(l3pte) && 1526 pmap_pte_prot_chg(l3pte, 1527 bits)) { 1528 hadasm = 1529 (pmap_pte_asm(l3pte) 1530 != 0); 1531 pmap_pte_set_prot(l3pte, 1532 bits); 1533 PMAP_INVALIDATE_TLB( 1534 pmap, sva, hadasm, 1535 isactive, cpu_id); 1536 PMAP_TLB_SHOOTDOWN( 1537 pmap, sva, 1538 hadasm ? PG_ASM : 0); 1539 } 1540 } 1541 } 1542 } 1543 } 1544 } 1545 1546 PMAP_TLB_SHOOTNOW(); 1547 1548 if (prot & VM_PROT_EXECUTE) 1549 PMAP_SYNC_ISTREAM(pmap); 1550 1551 PMAP_UNLOCK(pmap); 1552 } 1553 1554 /* 1555 * pmap_enter: [ INTERFACE ] 1556 * 1557 * Insert the given physical page (p) at 1558 * the specified virtual address (v) in the 1559 * target physical map with the protection requested. 1560 * 1561 * If specified, the page will be wired down, meaning 1562 * that the related pte can not be reclaimed. 1563 * 1564 * Note: This is the only routine which MAY NOT lazy-evaluate 1565 * or lose information. That is, this routine must actually 1566 * insert this page into the given map NOW. 1567 */ 1568 int 1569 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1570 { 1571 struct vm_page *pg; /* if != NULL, managed page */ 1572 pt_entry_t *pte, npte, opte; 1573 paddr_t opa; 1574 bool tflush = true; 1575 bool hadasm = false; /* XXX gcc -Wuninitialized */ 1576 bool needisync = false; 1577 bool setisync = false; 1578 bool isactive; 1579 bool wired; 1580 long cpu_id = cpu_number(); 1581 int error = 0; 1582 kmutex_t *lock; 1583 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1584 1585 #ifdef DEBUG 1586 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1587 printf("pmap_enter(%p, %lx, %lx, %x, %x)\n", 1588 pmap, va, pa, prot, flags); 1589 #endif 1590 pg = PHYS_TO_VM_PAGE(pa); 1591 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1592 wired = (flags & PMAP_WIRED) != 0; 1593 1594 /* 1595 * Determine what we need to do about the I-stream. If 1596 * VM_PROT_EXECUTE is set, we mark a user pmap as needing 1597 * an I-sync on the way back out to userspace. We always 1598 * need an immediate I-sync for the kernel pmap. 1599 */ 1600 if (prot & VM_PROT_EXECUTE) { 1601 if (pmap == pmap_kernel()) 1602 needisync = true; 1603 else { 1604 setisync = true; 1605 needisync = (pmap->pm_cpus != 0); 1606 } 1607 } 1608 1609 PMAP_MAP_TO_HEAD_LOCK(); 1610 PMAP_LOCK(pmap); 1611 1612 if (pmap == pmap_kernel()) { 1613 #ifdef DIAGNOSTIC 1614 /* 1615 * Sanity check the virtual address. 1616 */ 1617 if (va < VM_MIN_KERNEL_ADDRESS) 1618 panic("pmap_enter: kernel pmap, invalid va 0x%lx", va); 1619 #endif 1620 pte = PMAP_KERNEL_PTE(va); 1621 } else { 1622 pt_entry_t *l1pte, *l2pte; 1623 1624 #ifdef DIAGNOSTIC 1625 /* 1626 * Sanity check the virtual address. 1627 */ 1628 if (va >= VM_MAXUSER_ADDRESS) 1629 panic("pmap_enter: user pmap, invalid va 0x%lx", va); 1630 #endif 1631 1632 KASSERT(pmap->pm_lev1map != kernel_lev1map); 1633 1634 /* 1635 * Check to see if the level 1 PTE is valid, and 1636 * allocate a new level 2 page table page if it's not. 1637 * A reference will be added to the level 2 table when 1638 * the level 3 table is created. 1639 */ 1640 l1pte = pmap_l1pte(pmap, va); 1641 if (pmap_pte_v(l1pte) == 0) { 1642 pmap_physpage_addref(l1pte); 1643 error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT); 1644 if (error) { 1645 pmap_l1pt_delref(pmap, l1pte, cpu_id); 1646 if (flags & PMAP_CANFAIL) 1647 goto out; 1648 panic("pmap_enter: unable to create L2 PT " 1649 "page"); 1650 } 1651 #ifdef DEBUG 1652 if (pmapdebug & PDB_PTPAGE) 1653 printf("pmap_enter: new level 2 table at " 1654 "0x%lx\n", pmap_pte_pa(l1pte)); 1655 #endif 1656 } 1657 1658 /* 1659 * Check to see if the level 2 PTE is valid, and 1660 * allocate a new level 3 page table page if it's not. 1661 * A reference will be added to the level 3 table when 1662 * the mapping is validated. 1663 */ 1664 l2pte = pmap_l2pte(pmap, va, l1pte); 1665 if (pmap_pte_v(l2pte) == 0) { 1666 pmap_physpage_addref(l2pte); 1667 error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT); 1668 if (error) { 1669 pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id); 1670 if (flags & PMAP_CANFAIL) 1671 goto out; 1672 panic("pmap_enter: unable to create L3 PT " 1673 "page"); 1674 } 1675 #ifdef DEBUG 1676 if (pmapdebug & PDB_PTPAGE) 1677 printf("pmap_enter: new level 3 table at " 1678 "0x%lx\n", pmap_pte_pa(l2pte)); 1679 #endif 1680 } 1681 1682 /* 1683 * Get the PTE that will map the page. 1684 */ 1685 pte = pmap_l3pte(pmap, va, l2pte); 1686 } 1687 1688 /* Remember all of the old PTE; used for TBI check later. */ 1689 opte = *pte; 1690 1691 /* 1692 * Check to see if the old mapping is valid. If not, validate the 1693 * new one immediately. 1694 */ 1695 if (pmap_pte_v(pte) == 0) { 1696 /* 1697 * No need to invalidate the TLB in this case; an invalid 1698 * mapping won't be in the TLB, and a previously valid 1699 * mapping would have been flushed when it was invalidated. 1700 */ 1701 tflush = false; 1702 1703 /* 1704 * No need to synchronize the I-stream, either, for basically 1705 * the same reason. 1706 */ 1707 setisync = needisync = false; 1708 1709 if (pmap != pmap_kernel()) { 1710 /* 1711 * New mappings gain a reference on the level 3 1712 * table. 1713 */ 1714 pmap_physpage_addref(pte); 1715 } 1716 goto validate_enterpv; 1717 } 1718 1719 opa = pmap_pte_pa(pte); 1720 hadasm = (pmap_pte_asm(pte) != 0); 1721 1722 if (opa == pa) { 1723 /* 1724 * Mapping has not changed; must be a protection or 1725 * wiring change. 1726 */ 1727 if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) { 1728 #ifdef DEBUG 1729 if (pmapdebug & PDB_ENTER) 1730 printf("pmap_enter: wiring change -> %d\n", 1731 wired); 1732 #endif 1733 /* 1734 * Adjust the wiring count. 1735 */ 1736 if (wired) 1737 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1738 else 1739 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1740 } 1741 1742 /* 1743 * Set the PTE. 1744 */ 1745 goto validate; 1746 } 1747 1748 /* 1749 * The mapping has changed. We need to invalidate the 1750 * old mapping before creating the new one. 1751 */ 1752 #ifdef DEBUG 1753 if (pmapdebug & PDB_ENTER) 1754 printf("pmap_enter: removing old mapping 0x%lx\n", va); 1755 #endif 1756 if (pmap != pmap_kernel()) { 1757 /* 1758 * Gain an extra reference on the level 3 table. 1759 * pmap_remove_mapping() will delete a reference, 1760 * and we don't want the table to be erroneously 1761 * freed. 1762 */ 1763 pmap_physpage_addref(pte); 1764 } 1765 needisync |= pmap_remove_mapping(pmap, va, pte, true, cpu_id); 1766 1767 validate_enterpv: 1768 /* 1769 * Enter the mapping into the pv_table if appropriate. 1770 */ 1771 if (pg != NULL) { 1772 error = pmap_pv_enter(pmap, pg, va, pte, true); 1773 if (error) { 1774 pmap_l3pt_delref(pmap, va, pte, cpu_id); 1775 if (flags & PMAP_CANFAIL) 1776 goto out; 1777 panic("pmap_enter: unable to enter mapping in PV " 1778 "table"); 1779 } 1780 } 1781 1782 /* 1783 * Increment counters. 1784 */ 1785 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1786 if (wired) 1787 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1788 1789 validate: 1790 /* 1791 * Build the new PTE. 1792 */ 1793 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V; 1794 if (pg != NULL) { 1795 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 1796 int attrs; 1797 1798 #ifdef DIAGNOSTIC 1799 if ((flags & VM_PROT_ALL) & ~prot) 1800 panic("pmap_enter: access type exceeds prot"); 1801 #endif 1802 lock = pmap_pvh_lock(pg); 1803 mutex_enter(lock); 1804 if (flags & VM_PROT_WRITE) 1805 md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 1806 else if (flags & VM_PROT_ALL) 1807 md->pvh_attrs |= PGA_REFERENCED; 1808 attrs = md->pvh_attrs; 1809 mutex_exit(lock); 1810 1811 /* 1812 * Set up referenced/modified emulation for new mapping. 1813 */ 1814 if ((attrs & PGA_REFERENCED) == 0) 1815 npte |= PG_FOR | PG_FOW | PG_FOE; 1816 else if ((attrs & PGA_MODIFIED) == 0) 1817 npte |= PG_FOW; 1818 1819 /* 1820 * Mapping was entered on PV list. 1821 */ 1822 npte |= PG_PVLIST; 1823 } 1824 if (wired) 1825 npte |= PG_WIRED; 1826 #ifdef DEBUG 1827 if (pmapdebug & PDB_ENTER) 1828 printf("pmap_enter: new pte = 0x%lx\n", npte); 1829 #endif 1830 1831 /* 1832 * If the PALcode portion of the new PTE is the same as the 1833 * old PTE, no TBI is necessary. 1834 */ 1835 if (PG_PALCODE(opte) == PG_PALCODE(npte)) 1836 tflush = false; 1837 1838 /* 1839 * Set the new PTE. 1840 */ 1841 PMAP_SET_PTE(pte, npte); 1842 1843 /* 1844 * Invalidate the TLB entry for this VA and any appropriate 1845 * caches. 1846 */ 1847 if (tflush) { 1848 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 1849 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 1850 PMAP_TLB_SHOOTNOW(); 1851 } 1852 if (setisync) 1853 PMAP_SET_NEEDISYNC(pmap); 1854 if (needisync) 1855 PMAP_SYNC_ISTREAM(pmap); 1856 1857 out: 1858 PMAP_UNLOCK(pmap); 1859 PMAP_MAP_TO_HEAD_UNLOCK(); 1860 1861 return error; 1862 } 1863 1864 /* 1865 * pmap_kenter_pa: [ INTERFACE ] 1866 * 1867 * Enter a va -> pa mapping into the kernel pmap without any 1868 * physical->virtual tracking. 1869 * 1870 * Note: no locking is necessary in this function. 1871 */ 1872 void 1873 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1874 { 1875 pt_entry_t *pte, npte; 1876 long cpu_id = cpu_number(); 1877 bool needisync = false; 1878 pmap_t pmap = pmap_kernel(); 1879 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1880 1881 #ifdef DEBUG 1882 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1883 printf("pmap_kenter_pa(%lx, %lx, %x)\n", 1884 va, pa, prot); 1885 #endif 1886 1887 #ifdef DIAGNOSTIC 1888 /* 1889 * Sanity check the virtual address. 1890 */ 1891 if (va < VM_MIN_KERNEL_ADDRESS) 1892 panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va); 1893 #endif 1894 1895 pte = PMAP_KERNEL_PTE(va); 1896 1897 if (pmap_pte_v(pte) == 0) 1898 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1899 if (pmap_pte_w(pte) == 0) 1900 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1901 1902 if ((prot & VM_PROT_EXECUTE) != 0 || pmap_pte_exec(pte)) 1903 needisync = true; 1904 1905 /* 1906 * Build the new PTE. 1907 */ 1908 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) | 1909 PG_V | PG_WIRED; 1910 1911 /* 1912 * Set the new PTE. 1913 */ 1914 PMAP_SET_PTE(pte, npte); 1915 #if defined(MULTIPROCESSOR) 1916 alpha_mb(); /* XXX alpha_wmb()? */ 1917 #endif 1918 1919 /* 1920 * Invalidate the TLB entry for this VA and any appropriate 1921 * caches. 1922 */ 1923 PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id); 1924 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1925 PMAP_TLB_SHOOTNOW(); 1926 1927 if (needisync) 1928 PMAP_SYNC_ISTREAM_KERNEL(); 1929 } 1930 1931 /* 1932 * pmap_kremove: [ INTERFACE ] 1933 * 1934 * Remove a mapping entered with pmap_kenter_pa() starting at va, 1935 * for size bytes (assumed to be page rounded). 1936 */ 1937 void 1938 pmap_kremove(vaddr_t va, vsize_t size) 1939 { 1940 pt_entry_t *pte; 1941 bool needisync = false; 1942 long cpu_id = cpu_number(); 1943 pmap_t pmap = pmap_kernel(); 1944 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1945 1946 #ifdef DEBUG 1947 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1948 printf("pmap_kremove(%lx, %lx)\n", 1949 va, size); 1950 #endif 1951 1952 #ifdef DIAGNOSTIC 1953 if (va < VM_MIN_KERNEL_ADDRESS) 1954 panic("pmap_kremove: user address"); 1955 #endif 1956 1957 for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) { 1958 pte = PMAP_KERNEL_PTE(va); 1959 if (pmap_pte_v(pte)) { 1960 #ifdef DIAGNOSTIC 1961 if (pmap_pte_pv(pte)) 1962 panic("pmap_kremove: PG_PVLIST mapping for " 1963 "0x%lx", va); 1964 #endif 1965 if (pmap_pte_exec(pte)) 1966 needisync = true; 1967 1968 /* Zap the mapping. */ 1969 PMAP_SET_PTE(pte, PG_NV); 1970 #if defined(MULTIPROCESSOR) 1971 alpha_mb(); /* XXX alpha_wmb()? */ 1972 #endif 1973 PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id); 1974 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1975 1976 /* Update stats. */ 1977 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 1978 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1979 } 1980 } 1981 1982 PMAP_TLB_SHOOTNOW(); 1983 1984 if (needisync) 1985 PMAP_SYNC_ISTREAM_KERNEL(); 1986 } 1987 1988 /* 1989 * pmap_unwire: [ INTERFACE ] 1990 * 1991 * Clear the wired attribute for a map/virtual-address pair. 1992 * 1993 * The mapping must already exist in the pmap. 1994 */ 1995 void 1996 pmap_unwire(pmap_t pmap, vaddr_t va) 1997 { 1998 pt_entry_t *pte; 1999 2000 #ifdef DEBUG 2001 if (pmapdebug & PDB_FOLLOW) 2002 printf("pmap_unwire(%p, %lx)\n", pmap, va); 2003 #endif 2004 2005 PMAP_LOCK(pmap); 2006 2007 pte = pmap_l3pte(pmap, va, NULL); 2008 #ifdef DIAGNOSTIC 2009 if (pte == NULL || pmap_pte_v(pte) == 0) 2010 panic("pmap_unwire"); 2011 #endif 2012 2013 /* 2014 * If wiring actually changed (always?) clear the wire bit and 2015 * update the wire count. Note that wiring is not a hardware 2016 * characteristic so there is no need to invalidate the TLB. 2017 */ 2018 if (pmap_pte_w_chg(pte, 0)) { 2019 pmap_pte_set_w(pte, false); 2020 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2021 } 2022 #ifdef DIAGNOSTIC 2023 else { 2024 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 2025 "didn't change!\n", pmap, va); 2026 } 2027 #endif 2028 2029 PMAP_UNLOCK(pmap); 2030 } 2031 2032 /* 2033 * pmap_extract: [ INTERFACE ] 2034 * 2035 * Extract the physical address associated with the given 2036 * pmap/virtual address pair. 2037 */ 2038 bool 2039 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap) 2040 { 2041 pt_entry_t *l1pte, *l2pte, *l3pte; 2042 paddr_t pa; 2043 2044 #ifdef DEBUG 2045 if (pmapdebug & PDB_FOLLOW) 2046 printf("pmap_extract(%p, %lx) -> ", pmap, va); 2047 #endif 2048 2049 /* 2050 * Take a faster path for the kernel pmap. Avoids locking, 2051 * handles K0SEG. 2052 */ 2053 if (pmap == pmap_kernel()) { 2054 pa = vtophys(va); 2055 if (pap != NULL) 2056 *pap = pa; 2057 #ifdef DEBUG 2058 if (pmapdebug & PDB_FOLLOW) 2059 printf("0x%lx (kernel vtophys)\n", pa); 2060 #endif 2061 return (pa != 0); /* XXX */ 2062 } 2063 2064 PMAP_LOCK(pmap); 2065 2066 l1pte = pmap_l1pte(pmap, va); 2067 if (pmap_pte_v(l1pte) == 0) 2068 goto out; 2069 2070 l2pte = pmap_l2pte(pmap, va, l1pte); 2071 if (pmap_pte_v(l2pte) == 0) 2072 goto out; 2073 2074 l3pte = pmap_l3pte(pmap, va, l2pte); 2075 if (pmap_pte_v(l3pte) == 0) 2076 goto out; 2077 2078 pa = pmap_pte_pa(l3pte) | (va & PGOFSET); 2079 PMAP_UNLOCK(pmap); 2080 if (pap != NULL) 2081 *pap = pa; 2082 #ifdef DEBUG 2083 if (pmapdebug & PDB_FOLLOW) 2084 printf("0x%lx\n", pa); 2085 #endif 2086 return (true); 2087 2088 out: 2089 PMAP_UNLOCK(pmap); 2090 #ifdef DEBUG 2091 if (pmapdebug & PDB_FOLLOW) 2092 printf("failed\n"); 2093 #endif 2094 return (false); 2095 } 2096 2097 /* 2098 * pmap_copy: [ INTERFACE ] 2099 * 2100 * Copy the mapping range specified by src_addr/len 2101 * from the source map to the range dst_addr/len 2102 * in the destination map. 2103 * 2104 * This routine is only advisory and need not do anything. 2105 */ 2106 /* call deleted in <machine/pmap.h> */ 2107 2108 /* 2109 * pmap_update: [ INTERFACE ] 2110 * 2111 * Require that all active physical maps contain no 2112 * incorrect entries NOW, by processing any deferred 2113 * pmap operations. 2114 */ 2115 /* call deleted in <machine/pmap.h> */ 2116 2117 /* 2118 * pmap_activate: [ INTERFACE ] 2119 * 2120 * Activate the pmap used by the specified process. This includes 2121 * reloading the MMU context if the current process, and marking 2122 * the pmap in use by the processor. 2123 */ 2124 void 2125 pmap_activate(struct lwp *l) 2126 { 2127 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2128 long cpu_id = cpu_number(); 2129 2130 #ifdef DEBUG 2131 if (pmapdebug & PDB_FOLLOW) 2132 printf("pmap_activate(%p)\n", l); 2133 #endif 2134 2135 /* Mark the pmap in use by this processor. */ 2136 atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id)); 2137 2138 /* Allocate an ASN. */ 2139 pmap_asn_alloc(pmap, cpu_id); 2140 2141 PMAP_ACTIVATE(pmap, l, cpu_id); 2142 } 2143 2144 /* 2145 * pmap_deactivate: [ INTERFACE ] 2146 * 2147 * Mark that the pmap used by the specified process is no longer 2148 * in use by the processor. 2149 * 2150 * The comment above pmap_activate() wrt. locking applies here, 2151 * as well. Note that we use only a single `atomic' operation, 2152 * so no locking is necessary. 2153 */ 2154 void 2155 pmap_deactivate(struct lwp *l) 2156 { 2157 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2158 2159 #ifdef DEBUG 2160 if (pmapdebug & PDB_FOLLOW) 2161 printf("pmap_deactivate(%p)\n", l); 2162 #endif 2163 2164 /* 2165 * Mark the pmap no longer in use by this processor. 2166 */ 2167 atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number())); 2168 } 2169 2170 /* 2171 * pmap_zero_page: [ INTERFACE ] 2172 * 2173 * Zero the specified (machine independent) page by mapping the page 2174 * into virtual memory and clear its contents, one machine dependent 2175 * page at a time. 2176 * 2177 * Note: no locking is necessary in this function. 2178 */ 2179 void 2180 pmap_zero_page(paddr_t phys) 2181 { 2182 u_long *p0, *p1, *pend; 2183 2184 #ifdef DEBUG 2185 if (pmapdebug & PDB_FOLLOW) 2186 printf("pmap_zero_page(%lx)\n", phys); 2187 #endif 2188 2189 p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys); 2190 p1 = NULL; 2191 pend = (u_long *)((u_long)p0 + PAGE_SIZE); 2192 2193 /* 2194 * Unroll the loop a bit, doing 16 quadwords per iteration. 2195 * Do only 8 back-to-back stores, and alternate registers. 2196 */ 2197 do { 2198 __asm volatile( 2199 "# BEGIN loop body\n" 2200 " addq %2, (8 * 8), %1 \n" 2201 " stq $31, (0 * 8)(%0) \n" 2202 " stq $31, (1 * 8)(%0) \n" 2203 " stq $31, (2 * 8)(%0) \n" 2204 " stq $31, (3 * 8)(%0) \n" 2205 " stq $31, (4 * 8)(%0) \n" 2206 " stq $31, (5 * 8)(%0) \n" 2207 " stq $31, (6 * 8)(%0) \n" 2208 " stq $31, (7 * 8)(%0) \n" 2209 " \n" 2210 " addq %3, (8 * 8), %0 \n" 2211 " stq $31, (0 * 8)(%1) \n" 2212 " stq $31, (1 * 8)(%1) \n" 2213 " stq $31, (2 * 8)(%1) \n" 2214 " stq $31, (3 * 8)(%1) \n" 2215 " stq $31, (4 * 8)(%1) \n" 2216 " stq $31, (5 * 8)(%1) \n" 2217 " stq $31, (6 * 8)(%1) \n" 2218 " stq $31, (7 * 8)(%1) \n" 2219 " # END loop body" 2220 : "=r" (p0), "=r" (p1) 2221 : "0" (p0), "1" (p1) 2222 : "memory"); 2223 } while (p0 < pend); 2224 } 2225 2226 /* 2227 * pmap_copy_page: [ INTERFACE ] 2228 * 2229 * Copy the specified (machine independent) page by mapping the page 2230 * into virtual memory and using memcpy to copy the page, one machine 2231 * dependent page at a time. 2232 * 2233 * Note: no locking is necessary in this function. 2234 */ 2235 void 2236 pmap_copy_page(paddr_t src, paddr_t dst) 2237 { 2238 const void *s; 2239 void *d; 2240 2241 #ifdef DEBUG 2242 if (pmapdebug & PDB_FOLLOW) 2243 printf("pmap_copy_page(%lx, %lx)\n", src, dst); 2244 #endif 2245 s = (const void *)ALPHA_PHYS_TO_K0SEG(src); 2246 d = (void *)ALPHA_PHYS_TO_K0SEG(dst); 2247 memcpy(d, s, PAGE_SIZE); 2248 } 2249 2250 /* 2251 * pmap_pageidlezero: [ INTERFACE ] 2252 * 2253 * Page zero'er for the idle loop. Returns true if the 2254 * page was zero'd, FLASE if we aborted for some reason. 2255 */ 2256 bool 2257 pmap_pageidlezero(paddr_t pa) 2258 { 2259 u_long *ptr; 2260 int i, cnt = PAGE_SIZE / sizeof(u_long); 2261 2262 for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) { 2263 if (sched_curcpu_runnable_p()) { 2264 /* 2265 * An LWP has become ready. Abort now, 2266 * so we don't keep it waiting while we 2267 * finish zeroing the page. 2268 */ 2269 return (false); 2270 } 2271 *ptr++ = 0; 2272 } 2273 2274 return (true); 2275 } 2276 2277 /* 2278 * pmap_clear_modify: [ INTERFACE ] 2279 * 2280 * Clear the modify bits on the specified physical page. 2281 */ 2282 bool 2283 pmap_clear_modify(struct vm_page *pg) 2284 { 2285 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2286 bool rv = false; 2287 long cpu_id = cpu_number(); 2288 kmutex_t *lock; 2289 2290 #ifdef DEBUG 2291 if (pmapdebug & PDB_FOLLOW) 2292 printf("pmap_clear_modify(%p)\n", pg); 2293 #endif 2294 2295 PMAP_HEAD_TO_MAP_LOCK(); 2296 lock = pmap_pvh_lock(pg); 2297 mutex_enter(lock); 2298 2299 if (md->pvh_attrs & PGA_MODIFIED) { 2300 rv = true; 2301 pmap_changebit(pg, PG_FOW, ~0, cpu_id); 2302 md->pvh_attrs &= ~PGA_MODIFIED; 2303 } 2304 2305 mutex_exit(lock); 2306 PMAP_HEAD_TO_MAP_UNLOCK(); 2307 2308 return (rv); 2309 } 2310 2311 /* 2312 * pmap_clear_reference: [ INTERFACE ] 2313 * 2314 * Clear the reference bit on the specified physical page. 2315 */ 2316 bool 2317 pmap_clear_reference(struct vm_page *pg) 2318 { 2319 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2320 bool rv = false; 2321 long cpu_id = cpu_number(); 2322 kmutex_t *lock; 2323 2324 #ifdef DEBUG 2325 if (pmapdebug & PDB_FOLLOW) 2326 printf("pmap_clear_reference(%p)\n", pg); 2327 #endif 2328 2329 PMAP_HEAD_TO_MAP_LOCK(); 2330 lock = pmap_pvh_lock(pg); 2331 mutex_enter(lock); 2332 2333 if (md->pvh_attrs & PGA_REFERENCED) { 2334 rv = true; 2335 pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id); 2336 md->pvh_attrs &= ~PGA_REFERENCED; 2337 } 2338 2339 mutex_exit(lock); 2340 PMAP_HEAD_TO_MAP_UNLOCK(); 2341 2342 return (rv); 2343 } 2344 2345 /* 2346 * pmap_is_referenced: [ INTERFACE ] 2347 * 2348 * Return whether or not the specified physical page is referenced 2349 * by any physical maps. 2350 */ 2351 /* See <machine/pmap.h> */ 2352 2353 /* 2354 * pmap_is_modified: [ INTERFACE ] 2355 * 2356 * Return whether or not the specified physical page is modified 2357 * by any physical maps. 2358 */ 2359 /* See <machine/pmap.h> */ 2360 2361 /* 2362 * pmap_phys_address: [ INTERFACE ] 2363 * 2364 * Return the physical address corresponding to the specified 2365 * cookie. Used by the device pager to decode a device driver's 2366 * mmap entry point return value. 2367 * 2368 * Note: no locking is necessary in this function. 2369 */ 2370 paddr_t 2371 pmap_phys_address(paddr_t ppn) 2372 { 2373 2374 return (alpha_ptob(ppn)); 2375 } 2376 2377 /* 2378 * Miscellaneous support routines follow 2379 */ 2380 2381 /* 2382 * alpha_protection_init: 2383 * 2384 * Initialize Alpha protection code array. 2385 * 2386 * Note: no locking is necessary in this function. 2387 */ 2388 static void 2389 alpha_protection_init(void) 2390 { 2391 int prot, *kp, *up; 2392 2393 kp = protection_codes[0]; 2394 up = protection_codes[1]; 2395 2396 for (prot = 0; prot < 8; prot++) { 2397 kp[prot] = PG_ASM; 2398 up[prot] = 0; 2399 2400 if (prot & VM_PROT_READ) { 2401 kp[prot] |= PG_KRE; 2402 up[prot] |= PG_KRE | PG_URE; 2403 } 2404 if (prot & VM_PROT_WRITE) { 2405 kp[prot] |= PG_KWE; 2406 up[prot] |= PG_KWE | PG_UWE; 2407 } 2408 if (prot & VM_PROT_EXECUTE) { 2409 kp[prot] |= PG_EXEC | PG_KRE; 2410 up[prot] |= PG_EXEC | PG_KRE | PG_URE; 2411 } else { 2412 kp[prot] |= PG_FOE; 2413 up[prot] |= PG_FOE; 2414 } 2415 } 2416 } 2417 2418 /* 2419 * pmap_remove_mapping: 2420 * 2421 * Invalidate a single page denoted by pmap/va. 2422 * 2423 * If (pte != NULL), it is the already computed PTE for the page. 2424 * 2425 * Note: locking in this function is complicated by the fact 2426 * that we can be called when the PV list is already locked. 2427 * (pmap_page_protect()). In this case, the caller must be 2428 * careful to get the next PV entry while we remove this entry 2429 * from beneath it. We assume that the pmap itself is already 2430 * locked; dolock applies only to the PV list. 2431 * 2432 * Returns true or false, indicating if an I-stream sync needs 2433 * to be initiated (for this CPU or for other CPUs). 2434 */ 2435 static bool 2436 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte, 2437 bool dolock, long cpu_id) 2438 { 2439 paddr_t pa; 2440 struct vm_page *pg; /* if != NULL, page is managed */ 2441 bool onpv; 2442 bool hadasm; 2443 bool isactive; 2444 bool needisync = false; 2445 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2446 2447 #ifdef DEBUG 2448 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 2449 printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n", 2450 pmap, va, pte, dolock, cpu_id); 2451 #endif 2452 2453 /* 2454 * PTE not provided, compute it from pmap and va. 2455 */ 2456 if (pte == NULL) { 2457 pte = pmap_l3pte(pmap, va, NULL); 2458 if (pmap_pte_v(pte) == 0) 2459 return (false); 2460 } 2461 2462 pa = pmap_pte_pa(pte); 2463 onpv = (pmap_pte_pv(pte) != 0); 2464 hadasm = (pmap_pte_asm(pte) != 0); 2465 isactive = PMAP_ISACTIVE(pmap, cpu_id); 2466 2467 /* 2468 * Determine what we need to do about the I-stream. If 2469 * PG_EXEC was set, we mark a user pmap as needing an 2470 * I-sync on the way out to userspace. We always need 2471 * an immediate I-sync for the kernel pmap. 2472 */ 2473 if (pmap_pte_exec(pte)) { 2474 if (pmap == pmap_kernel()) 2475 needisync = true; 2476 else { 2477 PMAP_SET_NEEDISYNC(pmap); 2478 needisync = (pmap->pm_cpus != 0); 2479 } 2480 } 2481 2482 /* 2483 * Update statistics 2484 */ 2485 if (pmap_pte_w(pte)) 2486 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2487 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 2488 2489 /* 2490 * Invalidate the PTE after saving the reference modify info. 2491 */ 2492 #ifdef DEBUG 2493 if (pmapdebug & PDB_REMOVE) 2494 printf("remove: invalidating pte at %p\n", pte); 2495 #endif 2496 PMAP_SET_PTE(pte, PG_NV); 2497 2498 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 2499 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 2500 PMAP_TLB_SHOOTNOW(); 2501 2502 /* 2503 * If we're removing a user mapping, check to see if we 2504 * can free page table pages. 2505 */ 2506 if (pmap != pmap_kernel()) { 2507 /* 2508 * Delete the reference on the level 3 table. It will 2509 * delete references on the level 2 and 1 tables as 2510 * appropriate. 2511 */ 2512 pmap_l3pt_delref(pmap, va, pte, cpu_id); 2513 } 2514 2515 /* 2516 * If the mapping wasn't entered on the PV list, we're all done. 2517 */ 2518 if (onpv == false) 2519 return (needisync); 2520 2521 /* 2522 * Remove it from the PV table. 2523 */ 2524 pg = PHYS_TO_VM_PAGE(pa); 2525 KASSERT(pg != NULL); 2526 pmap_pv_remove(pmap, pg, va, dolock); 2527 2528 return (needisync); 2529 } 2530 2531 /* 2532 * pmap_changebit: 2533 * 2534 * Set or clear the specified PTE bits for all mappings on the 2535 * specified page. 2536 * 2537 * Note: we assume that the pv_head is already locked, and that 2538 * the caller has acquired a PV->pmap mutex so that we can lock 2539 * the pmaps as we encounter them. 2540 */ 2541 static void 2542 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, long cpu_id) 2543 { 2544 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2545 pv_entry_t pv; 2546 pt_entry_t *pte, npte; 2547 vaddr_t va; 2548 bool hadasm, isactive; 2549 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2550 2551 #ifdef DEBUG 2552 if (pmapdebug & PDB_BITS) 2553 printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n", 2554 pg, set, mask); 2555 #endif 2556 2557 /* 2558 * Loop over all current mappings setting/clearing as apropos. 2559 */ 2560 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 2561 va = pv->pv_va; 2562 2563 PMAP_LOCK(pv->pv_pmap); 2564 2565 pte = pv->pv_pte; 2566 npte = (*pte | set) & mask; 2567 if (*pte != npte) { 2568 hadasm = (pmap_pte_asm(pte) != 0); 2569 isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id); 2570 PMAP_SET_PTE(pte, npte); 2571 PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive, 2572 cpu_id); 2573 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va, 2574 hadasm ? PG_ASM : 0); 2575 } 2576 PMAP_UNLOCK(pv->pv_pmap); 2577 } 2578 2579 PMAP_TLB_SHOOTNOW(); 2580 } 2581 2582 /* 2583 * pmap_emulate_reference: 2584 * 2585 * Emulate reference and/or modified bit hits. 2586 * Return 1 if this was an execute fault on a non-exec mapping, 2587 * otherwise return 0. 2588 */ 2589 int 2590 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type) 2591 { 2592 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2593 pt_entry_t faultoff, *pte; 2594 struct vm_page *pg; 2595 paddr_t pa; 2596 bool didlock = false; 2597 bool exec = false; 2598 long cpu_id = cpu_number(); 2599 kmutex_t *lock; 2600 2601 #ifdef DEBUG 2602 if (pmapdebug & PDB_FOLLOW) 2603 printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n", 2604 l, v, user, type); 2605 #endif 2606 2607 /* 2608 * Convert process and virtual address to physical address. 2609 */ 2610 if (v >= VM_MIN_KERNEL_ADDRESS) { 2611 if (user) 2612 panic("pmap_emulate_reference: user ref to kernel"); 2613 /* 2614 * No need to lock here; kernel PT pages never go away. 2615 */ 2616 pte = PMAP_KERNEL_PTE(v); 2617 } else { 2618 #ifdef DIAGNOSTIC 2619 if (l == NULL) 2620 panic("pmap_emulate_reference: bad proc"); 2621 if (l->l_proc->p_vmspace == NULL) 2622 panic("pmap_emulate_reference: bad p_vmspace"); 2623 #endif 2624 PMAP_LOCK(pmap); 2625 didlock = true; 2626 pte = pmap_l3pte(pmap, v, NULL); 2627 /* 2628 * We'll unlock below where we're done with the PTE. 2629 */ 2630 } 2631 exec = pmap_pte_exec(pte); 2632 if (!exec && type == ALPHA_MMCSR_FOE) { 2633 if (didlock) 2634 PMAP_UNLOCK(pmap); 2635 return (1); 2636 } 2637 #ifdef DEBUG 2638 if (pmapdebug & PDB_FOLLOW) { 2639 printf("\tpte = %p, ", pte); 2640 printf("*pte = 0x%lx\n", *pte); 2641 } 2642 #endif 2643 #ifdef DEBUG /* These checks are more expensive */ 2644 if (!pmap_pte_v(pte)) 2645 panic("pmap_emulate_reference: invalid pte"); 2646 if (type == ALPHA_MMCSR_FOW) { 2647 if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE))) 2648 panic("pmap_emulate_reference: write but unwritable"); 2649 if (!(*pte & PG_FOW)) 2650 panic("pmap_emulate_reference: write but not FOW"); 2651 } else { 2652 if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE))) 2653 panic("pmap_emulate_reference: !write but unreadable"); 2654 if (!(*pte & (PG_FOR | PG_FOE))) 2655 panic("pmap_emulate_reference: !write but not FOR|FOE"); 2656 } 2657 /* Other diagnostics? */ 2658 #endif 2659 pa = pmap_pte_pa(pte); 2660 2661 /* 2662 * We're now done with the PTE. If it was a user pmap, unlock 2663 * it now. 2664 */ 2665 if (didlock) 2666 PMAP_UNLOCK(pmap); 2667 2668 #ifdef DEBUG 2669 if (pmapdebug & PDB_FOLLOW) 2670 printf("\tpa = 0x%lx\n", pa); 2671 #endif 2672 #ifdef DIAGNOSTIC 2673 if (!uvm_pageismanaged(pa)) 2674 panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): " 2675 "pa 0x%lx not managed", l, v, user, type, pa); 2676 #endif 2677 2678 /* 2679 * Twiddle the appropriate bits to reflect the reference 2680 * and/or modification.. 2681 * 2682 * The rules: 2683 * (1) always mark page as used, and 2684 * (2) if it was a write fault, mark page as modified. 2685 */ 2686 pg = PHYS_TO_VM_PAGE(pa); 2687 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2688 2689 PMAP_HEAD_TO_MAP_LOCK(); 2690 lock = pmap_pvh_lock(pg); 2691 mutex_enter(lock); 2692 2693 if (type == ALPHA_MMCSR_FOW) { 2694 md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 2695 faultoff = PG_FOR | PG_FOW; 2696 } else { 2697 md->pvh_attrs |= PGA_REFERENCED; 2698 faultoff = PG_FOR; 2699 if (exec) { 2700 faultoff |= PG_FOE; 2701 } 2702 } 2703 pmap_changebit(pg, 0, ~faultoff, cpu_id); 2704 2705 mutex_exit(lock); 2706 PMAP_HEAD_TO_MAP_UNLOCK(); 2707 return (0); 2708 } 2709 2710 #ifdef DEBUG 2711 /* 2712 * pmap_pv_dump: 2713 * 2714 * Dump the physical->virtual data for the specified page. 2715 */ 2716 void 2717 pmap_pv_dump(paddr_t pa) 2718 { 2719 struct vm_page *pg; 2720 struct vm_page_md *md; 2721 pv_entry_t pv; 2722 kmutex_t *lock; 2723 2724 pg = PHYS_TO_VM_PAGE(pa); 2725 md = VM_PAGE_TO_MD(pg); 2726 2727 lock = pmap_pvh_lock(pg); 2728 mutex_enter(lock); 2729 2730 printf("pa 0x%lx (attrs = 0x%x):\n", pa, md->pvh_attrs); 2731 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) 2732 printf(" pmap %p, va 0x%lx\n", 2733 pv->pv_pmap, pv->pv_va); 2734 printf("\n"); 2735 2736 mutex_exit(lock); 2737 } 2738 #endif 2739 2740 /* 2741 * vtophys: 2742 * 2743 * Return the physical address corresponding to the K0SEG or 2744 * K1SEG address provided. 2745 * 2746 * Note: no locking is necessary in this function. 2747 */ 2748 paddr_t 2749 vtophys(vaddr_t vaddr) 2750 { 2751 pt_entry_t *pte; 2752 paddr_t paddr = 0; 2753 2754 if (vaddr < ALPHA_K0SEG_BASE) 2755 printf("vtophys: invalid vaddr 0x%lx", vaddr); 2756 else if (vaddr <= ALPHA_K0SEG_END) 2757 paddr = ALPHA_K0SEG_TO_PHYS(vaddr); 2758 else { 2759 pte = PMAP_KERNEL_PTE(vaddr); 2760 if (pmap_pte_v(pte)) 2761 paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET); 2762 } 2763 2764 #if 0 2765 printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr); 2766 #endif 2767 2768 return (paddr); 2769 } 2770 2771 /******************** pv_entry management ********************/ 2772 2773 /* 2774 * pmap_pv_enter: 2775 * 2776 * Add a physical->virtual entry to the pv_table. 2777 */ 2778 static int 2779 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte, 2780 bool dolock) 2781 { 2782 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2783 pv_entry_t newpv; 2784 kmutex_t *lock; 2785 2786 /* 2787 * Allocate and fill in the new pv_entry. 2788 */ 2789 newpv = pmap_pv_alloc(); 2790 if (newpv == NULL) 2791 return ENOMEM; 2792 newpv->pv_va = va; 2793 newpv->pv_pmap = pmap; 2794 newpv->pv_pte = pte; 2795 2796 if (dolock) { 2797 lock = pmap_pvh_lock(pg); 2798 mutex_enter(lock); 2799 } 2800 2801 #ifdef DEBUG 2802 { 2803 pv_entry_t pv; 2804 /* 2805 * Make sure the entry doesn't already exist. 2806 */ 2807 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 2808 if (pmap == pv->pv_pmap && va == pv->pv_va) { 2809 printf("pmap = %p, va = 0x%lx\n", pmap, va); 2810 panic("pmap_pv_enter: already in pv table"); 2811 } 2812 } 2813 } 2814 #endif 2815 2816 /* 2817 * ...and put it in the list. 2818 */ 2819 newpv->pv_next = md->pvh_list; 2820 md->pvh_list = newpv; 2821 2822 if (dolock) { 2823 mutex_exit(lock); 2824 } 2825 2826 return 0; 2827 } 2828 2829 /* 2830 * pmap_pv_remove: 2831 * 2832 * Remove a physical->virtual entry from the pv_table. 2833 */ 2834 static void 2835 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock) 2836 { 2837 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2838 pv_entry_t pv, *pvp; 2839 kmutex_t *lock; 2840 2841 if (dolock) { 2842 lock = pmap_pvh_lock(pg); 2843 mutex_enter(lock); 2844 } else { 2845 lock = NULL; /* XXX stupid gcc */ 2846 } 2847 2848 /* 2849 * Find the entry to remove. 2850 */ 2851 for (pvp = &md->pvh_list, pv = *pvp; 2852 pv != NULL; pvp = &pv->pv_next, pv = *pvp) 2853 if (pmap == pv->pv_pmap && va == pv->pv_va) 2854 break; 2855 2856 #ifdef DEBUG 2857 if (pv == NULL) 2858 panic("pmap_pv_remove: not in pv table"); 2859 #endif 2860 2861 *pvp = pv->pv_next; 2862 2863 if (dolock) { 2864 mutex_exit(lock); 2865 } 2866 2867 pmap_pv_free(pv); 2868 } 2869 2870 /* 2871 * pmap_pv_page_alloc: 2872 * 2873 * Allocate a page for the pv_entry pool. 2874 */ 2875 static void * 2876 pmap_pv_page_alloc(struct pool *pp, int flags) 2877 { 2878 paddr_t pg; 2879 2880 if (pmap_physpage_alloc(PGU_PVENT, &pg)) 2881 return ((void *)ALPHA_PHYS_TO_K0SEG(pg)); 2882 return (NULL); 2883 } 2884 2885 /* 2886 * pmap_pv_page_free: 2887 * 2888 * Free a pv_entry pool page. 2889 */ 2890 static void 2891 pmap_pv_page_free(struct pool *pp, void *v) 2892 { 2893 2894 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v)); 2895 } 2896 2897 /******************** misc. functions ********************/ 2898 2899 /* 2900 * pmap_physpage_alloc: 2901 * 2902 * Allocate a single page from the VM system and return the 2903 * physical address for that page. 2904 */ 2905 static bool 2906 pmap_physpage_alloc(int usage, paddr_t *pap) 2907 { 2908 struct vm_page *pg; 2909 paddr_t pa; 2910 2911 /* 2912 * Don't ask for a zero'd page in the L1PT case -- we will 2913 * properly initialize it in the constructor. 2914 */ 2915 2916 pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ? 2917 UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO); 2918 if (pg != NULL) { 2919 pa = VM_PAGE_TO_PHYS(pg); 2920 #ifdef DEBUG 2921 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2922 if (md->pvh_refcnt != 0) { 2923 printf("pmap_physpage_alloc: page 0x%lx has " 2924 "%d references\n", pa, md->pvh_refcnt); 2925 panic("pmap_physpage_alloc"); 2926 } 2927 #endif 2928 *pap = pa; 2929 return (true); 2930 } 2931 return (false); 2932 } 2933 2934 /* 2935 * pmap_physpage_free: 2936 * 2937 * Free the single page table page at the specified physical address. 2938 */ 2939 static void 2940 pmap_physpage_free(paddr_t pa) 2941 { 2942 struct vm_page *pg; 2943 2944 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) 2945 panic("pmap_physpage_free: bogus physical page address"); 2946 2947 #ifdef DEBUG 2948 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2949 if (md->pvh_refcnt != 0) 2950 panic("pmap_physpage_free: page still has references"); 2951 #endif 2952 2953 uvm_pagefree(pg); 2954 } 2955 2956 /* 2957 * pmap_physpage_addref: 2958 * 2959 * Add a reference to the specified special use page. 2960 */ 2961 static int 2962 pmap_physpage_addref(void *kva) 2963 { 2964 struct vm_page *pg; 2965 struct vm_page_md *md; 2966 paddr_t pa; 2967 2968 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2969 pg = PHYS_TO_VM_PAGE(pa); 2970 md = VM_PAGE_TO_MD(pg); 2971 2972 KASSERT((int)md->pvh_refcnt >= 0); 2973 2974 return atomic_inc_uint_nv(&md->pvh_refcnt); 2975 } 2976 2977 /* 2978 * pmap_physpage_delref: 2979 * 2980 * Delete a reference to the specified special use page. 2981 */ 2982 static int 2983 pmap_physpage_delref(void *kva) 2984 { 2985 struct vm_page *pg; 2986 struct vm_page_md *md; 2987 paddr_t pa; 2988 2989 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2990 pg = PHYS_TO_VM_PAGE(pa); 2991 md = VM_PAGE_TO_MD(pg); 2992 2993 KASSERT((int)md->pvh_refcnt > 0); 2994 2995 return atomic_dec_uint_nv(&md->pvh_refcnt); 2996 } 2997 2998 /******************** page table page management ********************/ 2999 3000 /* 3001 * pmap_growkernel: [ INTERFACE ] 3002 * 3003 * Grow the kernel address space. This is a hint from the 3004 * upper layer to pre-allocate more kernel PT pages. 3005 */ 3006 vaddr_t 3007 pmap_growkernel(vaddr_t maxkvaddr) 3008 { 3009 struct pmap *kpm = pmap_kernel(), *pm; 3010 paddr_t ptaddr; 3011 pt_entry_t *l1pte, *l2pte, pte; 3012 vaddr_t va; 3013 int l1idx; 3014 3015 rw_enter(&pmap_growkernel_lock, RW_WRITER); 3016 3017 if (maxkvaddr <= virtual_end) 3018 goto out; /* we are OK */ 3019 3020 va = virtual_end; 3021 3022 while (va < maxkvaddr) { 3023 /* 3024 * If there is no valid L1 PTE (i.e. no L2 PT page), 3025 * allocate a new L2 PT page and insert it into the 3026 * L1 map. 3027 */ 3028 l1pte = pmap_l1pte(kpm, va); 3029 if (pmap_pte_v(l1pte) == 0) { 3030 /* 3031 * XXX PGU_NORMAL? It's not a "traditional" PT page. 3032 */ 3033 if (uvm.page_init_done == false) { 3034 /* 3035 * We're growing the kernel pmap early (from 3036 * uvm_pageboot_alloc()). This case must 3037 * be handled a little differently. 3038 */ 3039 ptaddr = ALPHA_K0SEG_TO_PHYS( 3040 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3041 } else if (pmap_physpage_alloc(PGU_NORMAL, 3042 &ptaddr) == false) 3043 goto die; 3044 pte = (atop(ptaddr) << PG_SHIFT) | 3045 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3046 *l1pte = pte; 3047 3048 l1idx = l1pte_index(va); 3049 3050 /* Update all the user pmaps. */ 3051 mutex_enter(&pmap_all_pmaps_lock); 3052 for (pm = TAILQ_FIRST(&pmap_all_pmaps); 3053 pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) { 3054 /* Skip the kernel pmap. */ 3055 if (pm == pmap_kernel()) 3056 continue; 3057 3058 PMAP_LOCK(pm); 3059 if (pm->pm_lev1map == kernel_lev1map) { 3060 PMAP_UNLOCK(pm); 3061 continue; 3062 } 3063 pm->pm_lev1map[l1idx] = pte; 3064 PMAP_UNLOCK(pm); 3065 } 3066 mutex_exit(&pmap_all_pmaps_lock); 3067 } 3068 3069 /* 3070 * Have an L2 PT page now, add the L3 PT page. 3071 */ 3072 l2pte = pmap_l2pte(kpm, va, l1pte); 3073 KASSERT(pmap_pte_v(l2pte) == 0); 3074 if (uvm.page_init_done == false) { 3075 /* 3076 * See above. 3077 */ 3078 ptaddr = ALPHA_K0SEG_TO_PHYS( 3079 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3080 } else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == false) 3081 goto die; 3082 *l2pte = (atop(ptaddr) << PG_SHIFT) | 3083 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3084 va += ALPHA_L2SEG_SIZE; 3085 } 3086 3087 /* Invalidate the L1 PT cache. */ 3088 pool_cache_invalidate(&pmap_l1pt_cache); 3089 3090 virtual_end = va; 3091 3092 out: 3093 rw_exit(&pmap_growkernel_lock); 3094 3095 return (virtual_end); 3096 3097 die: 3098 panic("pmap_growkernel: out of memory"); 3099 } 3100 3101 /* 3102 * pmap_lev1map_create: 3103 * 3104 * Create a new level 1 page table for the specified pmap. 3105 * 3106 * Note: growkernel must already be held and the pmap either 3107 * already locked or unreferenced globally. 3108 */ 3109 static int 3110 pmap_lev1map_create(pmap_t pmap, long cpu_id) 3111 { 3112 pt_entry_t *l1pt; 3113 3114 KASSERT(pmap != pmap_kernel()); 3115 3116 KASSERT(pmap->pm_lev1map == kernel_lev1map); 3117 KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED); 3118 3119 /* Don't sleep -- we're called with locks held. */ 3120 l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT); 3121 if (l1pt == NULL) 3122 return (ENOMEM); 3123 3124 pmap->pm_lev1map = l1pt; 3125 return (0); 3126 } 3127 3128 /* 3129 * pmap_lev1map_destroy: 3130 * 3131 * Destroy the level 1 page table for the specified pmap. 3132 * 3133 * Note: growkernel must be held and the pmap must already be 3134 * locked or not globally referenced. 3135 */ 3136 static void 3137 pmap_lev1map_destroy(pmap_t pmap, long cpu_id) 3138 { 3139 pt_entry_t *l1pt = pmap->pm_lev1map; 3140 3141 KASSERT(pmap != pmap_kernel()); 3142 3143 /* 3144 * Go back to referencing the global kernel_lev1map. 3145 */ 3146 pmap->pm_lev1map = kernel_lev1map; 3147 3148 /* 3149 * Free the old level 1 page table page. 3150 */ 3151 pool_cache_put(&pmap_l1pt_cache, l1pt); 3152 } 3153 3154 /* 3155 * pmap_l1pt_ctor: 3156 * 3157 * Pool cache constructor for L1 PT pages. 3158 * 3159 * Note: The growkernel lock is held across allocations 3160 * from our pool_cache, so we don't need to acquire it 3161 * ourselves. 3162 */ 3163 static int 3164 pmap_l1pt_ctor(void *arg, void *object, int flags) 3165 { 3166 pt_entry_t *l1pt = object, pte; 3167 int i; 3168 3169 /* 3170 * Initialize the new level 1 table by zeroing the 3171 * user portion and copying the kernel mappings into 3172 * the kernel portion. 3173 */ 3174 for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++) 3175 l1pt[i] = 0; 3176 3177 for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS); 3178 i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++) 3179 l1pt[i] = kernel_lev1map[i]; 3180 3181 /* 3182 * Now, map the new virtual page table. NOTE: NO ASM! 3183 */ 3184 pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) | 3185 PG_V | PG_KRE | PG_KWE; 3186 l1pt[l1pte_index(VPTBASE)] = pte; 3187 3188 return (0); 3189 } 3190 3191 /* 3192 * pmap_l1pt_alloc: 3193 * 3194 * Page alloctaor for L1 PT pages. 3195 */ 3196 static void * 3197 pmap_l1pt_alloc(struct pool *pp, int flags) 3198 { 3199 paddr_t ptpa; 3200 3201 /* 3202 * Attempt to allocate a free page. 3203 */ 3204 if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == false) 3205 return (NULL); 3206 3207 return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa)); 3208 } 3209 3210 /* 3211 * pmap_l1pt_free: 3212 * 3213 * Page freer for L1 PT pages. 3214 */ 3215 static void 3216 pmap_l1pt_free(struct pool *pp, void *v) 3217 { 3218 3219 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v)); 3220 } 3221 3222 /* 3223 * pmap_ptpage_alloc: 3224 * 3225 * Allocate a level 2 or level 3 page table page, and 3226 * initialize the PTE that references it. 3227 * 3228 * Note: the pmap must already be locked. 3229 */ 3230 static int 3231 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage) 3232 { 3233 paddr_t ptpa; 3234 3235 /* 3236 * Allocate the page table page. 3237 */ 3238 if (pmap_physpage_alloc(usage, &ptpa) == false) 3239 return (ENOMEM); 3240 3241 /* 3242 * Initialize the referencing PTE. 3243 */ 3244 PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) | 3245 PG_V | PG_KRE | PG_KWE | PG_WIRED | 3246 (pmap == pmap_kernel() ? PG_ASM : 0)); 3247 3248 return (0); 3249 } 3250 3251 /* 3252 * pmap_ptpage_free: 3253 * 3254 * Free the level 2 or level 3 page table page referenced 3255 * be the provided PTE. 3256 * 3257 * Note: the pmap must already be locked. 3258 */ 3259 static void 3260 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte) 3261 { 3262 paddr_t ptpa; 3263 3264 /* 3265 * Extract the physical address of the page from the PTE 3266 * and clear the entry. 3267 */ 3268 ptpa = pmap_pte_pa(pte); 3269 PMAP_SET_PTE(pte, PG_NV); 3270 3271 #ifdef DEBUG 3272 pmap_zero_page(ptpa); 3273 #endif 3274 pmap_physpage_free(ptpa); 3275 } 3276 3277 /* 3278 * pmap_l3pt_delref: 3279 * 3280 * Delete a reference on a level 3 PT page. If the reference drops 3281 * to zero, free it. 3282 * 3283 * Note: the pmap must already be locked. 3284 */ 3285 static void 3286 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, long cpu_id) 3287 { 3288 pt_entry_t *l1pte, *l2pte; 3289 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 3290 3291 l1pte = pmap_l1pte(pmap, va); 3292 l2pte = pmap_l2pte(pmap, va, l1pte); 3293 3294 #ifdef DIAGNOSTIC 3295 if (pmap == pmap_kernel()) 3296 panic("pmap_l3pt_delref: kernel pmap"); 3297 #endif 3298 3299 if (pmap_physpage_delref(l3pte) == 0) { 3300 /* 3301 * No more mappings; we can free the level 3 table. 3302 */ 3303 #ifdef DEBUG 3304 if (pmapdebug & PDB_PTPAGE) 3305 printf("pmap_l3pt_delref: freeing level 3 table at " 3306 "0x%lx\n", pmap_pte_pa(l2pte)); 3307 #endif 3308 pmap_ptpage_free(pmap, l2pte); 3309 3310 /* 3311 * We've freed a level 3 table, so we must 3312 * invalidate the TLB entry for that PT page 3313 * in the Virtual Page Table VA range, because 3314 * otherwise the PALcode will service a TLB 3315 * miss using the stale VPT TLB entry it entered 3316 * behind our back to shortcut to the VA's PTE. 3317 */ 3318 PMAP_INVALIDATE_TLB(pmap, 3319 (vaddr_t)(&VPT[VPT_INDEX(va)]), false, 3320 PMAP_ISACTIVE(pmap, cpu_id), cpu_id); 3321 PMAP_TLB_SHOOTDOWN(pmap, 3322 (vaddr_t)(&VPT[VPT_INDEX(va)]), 0); 3323 PMAP_TLB_SHOOTNOW(); 3324 3325 /* 3326 * We've freed a level 3 table, so delete the reference 3327 * on the level 2 table. 3328 */ 3329 pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id); 3330 } 3331 } 3332 3333 /* 3334 * pmap_l2pt_delref: 3335 * 3336 * Delete a reference on a level 2 PT page. If the reference drops 3337 * to zero, free it. 3338 * 3339 * Note: the pmap must already be locked. 3340 */ 3341 static void 3342 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte, 3343 long cpu_id) 3344 { 3345 3346 #ifdef DIAGNOSTIC 3347 if (pmap == pmap_kernel()) 3348 panic("pmap_l2pt_delref: kernel pmap"); 3349 #endif 3350 3351 if (pmap_physpage_delref(l2pte) == 0) { 3352 /* 3353 * No more mappings in this segment; we can free the 3354 * level 2 table. 3355 */ 3356 #ifdef DEBUG 3357 if (pmapdebug & PDB_PTPAGE) 3358 printf("pmap_l2pt_delref: freeing level 2 table at " 3359 "0x%lx\n", pmap_pte_pa(l1pte)); 3360 #endif 3361 pmap_ptpage_free(pmap, l1pte); 3362 3363 /* 3364 * We've freed a level 2 table, so delete the reference 3365 * on the level 1 table. 3366 */ 3367 pmap_l1pt_delref(pmap, l1pte, cpu_id); 3368 } 3369 } 3370 3371 /* 3372 * pmap_l1pt_delref: 3373 * 3374 * Delete a reference on a level 1 PT page. If the reference drops 3375 * to zero, free it. 3376 * 3377 * Note: the pmap must already be locked. 3378 */ 3379 static void 3380 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id) 3381 { 3382 3383 #ifdef DIAGNOSTIC 3384 if (pmap == pmap_kernel()) 3385 panic("pmap_l1pt_delref: kernel pmap"); 3386 #endif 3387 3388 (void)pmap_physpage_delref(l1pte); 3389 } 3390 3391 /******************** Address Space Number management ********************/ 3392 3393 /* 3394 * pmap_asn_alloc: 3395 * 3396 * Allocate and assign an ASN to the specified pmap. 3397 * 3398 * Note: the pmap must already be locked. This may be called from 3399 * an interprocessor interrupt, and in that case, the sender of 3400 * the IPI has the pmap lock. 3401 */ 3402 static void 3403 pmap_asn_alloc(pmap_t pmap, long cpu_id) 3404 { 3405 struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id]; 3406 struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id]; 3407 3408 #ifdef DEBUG 3409 if (pmapdebug & (PDB_FOLLOW|PDB_ASN)) 3410 printf("pmap_asn_alloc(%p)\n", pmap); 3411 #endif 3412 3413 /* 3414 * If the pmap is still using the global kernel_lev1map, there 3415 * is no need to assign an ASN at this time, because only 3416 * kernel mappings exist in that map, and all kernel mappings 3417 * have PG_ASM set. If the pmap eventually gets its own 3418 * lev1map, an ASN will be allocated at that time. 3419 * 3420 * Only the kernel pmap will reference kernel_lev1map. Do the 3421 * same old fixups, but note that we no longer need the pmap 3422 * to be locked if we're in this mode, since pm_lev1map will 3423 * never change. 3424 * #endif 3425 */ 3426 if (pmap->pm_lev1map == kernel_lev1map) { 3427 #ifdef DEBUG 3428 if (pmapdebug & PDB_ASN) 3429 printf("pmap_asn_alloc: still references " 3430 "kernel_lev1map\n"); 3431 #endif 3432 #if defined(MULTIPROCESSOR) 3433 /* 3434 * In a multiprocessor system, it's possible to 3435 * get here without having PMAP_ASN_RESERVED in 3436 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy(). 3437 * 3438 * So, what we do here, is simply assign the reserved 3439 * ASN for kernel_lev1map users and let things 3440 * continue on. We do, however, let uniprocessor 3441 * configurations continue to make its assertion. 3442 */ 3443 pma->pma_asn = PMAP_ASN_RESERVED; 3444 #else 3445 KASSERT(pma->pma_asn == PMAP_ASN_RESERVED); 3446 #endif /* MULTIPROCESSOR */ 3447 return; 3448 } 3449 3450 /* 3451 * On processors which do not implement ASNs, the swpctx PALcode 3452 * operation will automatically invalidate the TLB and I-cache, 3453 * so we don't need to do that here. 3454 */ 3455 if (pmap_max_asn == 0) { 3456 /* 3457 * Refresh the pmap's generation number, to 3458 * simplify logic elsewhere. 3459 */ 3460 pma->pma_asngen = cpma->pma_asngen; 3461 #ifdef DEBUG 3462 if (pmapdebug & PDB_ASN) 3463 printf("pmap_asn_alloc: no ASNs, using asngen %lu\n", 3464 pma->pma_asngen); 3465 #endif 3466 return; 3467 } 3468 3469 /* 3470 * Hopefully, we can continue using the one we have... 3471 */ 3472 if (pma->pma_asn != PMAP_ASN_RESERVED && 3473 pma->pma_asngen == cpma->pma_asngen) { 3474 /* 3475 * ASN is still in the current generation; keep on using it. 3476 */ 3477 #ifdef DEBUG 3478 if (pmapdebug & PDB_ASN) 3479 printf("pmap_asn_alloc: same generation, keeping %u\n", 3480 pma->pma_asn); 3481 #endif 3482 return; 3483 } 3484 3485 /* 3486 * Need to assign a new ASN. Grab the next one, incrementing 3487 * the generation number if we have to. 3488 */ 3489 if (cpma->pma_asn > pmap_max_asn) { 3490 /* 3491 * Invalidate all non-PG_ASM TLB entries and the 3492 * I-cache, and bump the generation number. 3493 */ 3494 ALPHA_TBIAP(); 3495 alpha_pal_imb(); 3496 3497 cpma->pma_asn = 1; 3498 cpma->pma_asngen++; 3499 #ifdef DIAGNOSTIC 3500 if (cpma->pma_asngen == 0) { 3501 /* 3502 * The generation number has wrapped. We could 3503 * handle this scenario by traversing all of 3504 * the pmaps, and invalidating the generation 3505 * number on those which are not currently 3506 * in use by this processor. 3507 * 3508 * However... considering that we're using 3509 * an unsigned 64-bit integer for generation 3510 * numbers, on non-ASN CPUs, we won't wrap 3511 * for approx. 585 million years, or 75 billion 3512 * years on a 128-ASN CPU (assuming 1000 switch 3513 * operations per second). 3514 * 3515 * So, we don't bother. 3516 */ 3517 panic("pmap_asn_alloc: too much uptime"); 3518 } 3519 #endif 3520 #ifdef DEBUG 3521 if (pmapdebug & PDB_ASN) 3522 printf("pmap_asn_alloc: generation bumped to %lu\n", 3523 cpma->pma_asngen); 3524 #endif 3525 } 3526 3527 /* 3528 * Assign the new ASN and validate the generation number. 3529 */ 3530 pma->pma_asn = cpma->pma_asn++; 3531 pma->pma_asngen = cpma->pma_asngen; 3532 3533 #ifdef DEBUG 3534 if (pmapdebug & PDB_ASN) 3535 printf("pmap_asn_alloc: assigning %u to pmap %p\n", 3536 pma->pma_asn, pmap); 3537 #endif 3538 3539 /* 3540 * Have a new ASN, so there's no need to sync the I-stream 3541 * on the way back out to userspace. 3542 */ 3543 atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id)); 3544 } 3545 3546 #if defined(MULTIPROCESSOR) 3547 /******************** TLB shootdown code ********************/ 3548 3549 /* 3550 * pmap_tlb_shootdown: 3551 * 3552 * Cause the TLB entry for pmap/va to be shot down. 3553 * 3554 * NOTE: The pmap must be locked here. 3555 */ 3556 void 3557 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp) 3558 { 3559 struct pmap_tlb_shootdown_q *pq; 3560 struct pmap_tlb_shootdown_job *pj; 3561 struct cpu_info *ci, *self = curcpu(); 3562 u_long cpumask; 3563 CPU_INFO_ITERATOR cii; 3564 3565 KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock)); 3566 3567 cpumask = 0; 3568 3569 for (CPU_INFO_FOREACH(cii, ci)) { 3570 if (ci == self) 3571 continue; 3572 3573 /* 3574 * The pmap must be locked (unless its the kernel 3575 * pmap, in which case it is okay for it to be 3576 * unlocked), which prevents it from becoming 3577 * active on any additional processors. This makes 3578 * it safe to check for activeness. If it's not 3579 * active on the processor in question, then just 3580 * mark it as needing a new ASN the next time it 3581 * does, saving the IPI. We always have to send 3582 * the IPI for the kernel pmap. 3583 * 3584 * Note if it's marked active now, and it becomes 3585 * inactive by the time the processor receives 3586 * the IPI, that's okay, because it does the right 3587 * thing with it later. 3588 */ 3589 if (pmap != pmap_kernel() && 3590 PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) { 3591 PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid); 3592 continue; 3593 } 3594 3595 cpumask |= 1UL << ci->ci_cpuid; 3596 3597 pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; 3598 mutex_spin_enter(&pq->pq_lock); 3599 3600 /* 3601 * Allocate a job. 3602 */ 3603 if (pq->pq_count < PMAP_TLB_SHOOTDOWN_MAXJOBS) { 3604 pj = pool_cache_get(&pmap_tlb_shootdown_job_cache, 3605 PR_NOWAIT); 3606 } else { 3607 pj = NULL; 3608 } 3609 3610 /* 3611 * If a global flush is already pending, we 3612 * don't really have to do anything else. 3613 */ 3614 pq->pq_pte |= pte; 3615 if (pq->pq_tbia) { 3616 mutex_spin_exit(&pq->pq_lock); 3617 if (pj != NULL) { 3618 pool_cache_put(&pmap_tlb_shootdown_job_cache, 3619 pj); 3620 } 3621 continue; 3622 } 3623 if (pj == NULL) { 3624 /* 3625 * Couldn't allocate a job entry. Just 3626 * tell the processor to kill everything. 3627 */ 3628 pq->pq_tbia = 1; 3629 } else { 3630 pj->pj_pmap = pmap; 3631 pj->pj_va = va; 3632 pj->pj_pte = pte; 3633 pq->pq_count++; 3634 TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list); 3635 } 3636 mutex_spin_exit(&pq->pq_lock); 3637 } 3638 3639 *cpumaskp |= cpumask; 3640 } 3641 3642 /* 3643 * pmap_tlb_shootnow: 3644 * 3645 * Process the TLB shootdowns that we have been accumulating 3646 * for the specified processor set. 3647 */ 3648 void 3649 pmap_tlb_shootnow(u_long cpumask) 3650 { 3651 3652 alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN); 3653 } 3654 3655 /* 3656 * pmap_do_tlb_shootdown: 3657 * 3658 * Process pending TLB shootdown operations for this processor. 3659 */ 3660 void 3661 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep) 3662 { 3663 u_long cpu_id = ci->ci_cpuid; 3664 u_long cpu_mask = (1UL << cpu_id); 3665 struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; 3666 struct pmap_tlb_shootdown_job *pj, *next; 3667 TAILQ_HEAD(, pmap_tlb_shootdown_job) jobs; 3668 3669 TAILQ_INIT(&jobs); 3670 3671 mutex_spin_enter(&pq->pq_lock); 3672 TAILQ_CONCAT(&jobs, &pq->pq_head, pj_list); 3673 if (pq->pq_tbia) { 3674 if (pq->pq_pte & PG_ASM) 3675 ALPHA_TBIA(); 3676 else 3677 ALPHA_TBIAP(); 3678 pq->pq_tbia = 0; 3679 pq->pq_pte = 0; 3680 } else { 3681 TAILQ_FOREACH(pj, &jobs, pj_list) { 3682 PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va, 3683 pj->pj_pte & PG_ASM, 3684 pj->pj_pmap->pm_cpus & cpu_mask, cpu_id); 3685 } 3686 pq->pq_pte = 0; 3687 } 3688 pq->pq_count = 0; 3689 mutex_spin_exit(&pq->pq_lock); 3690 3691 /* Free jobs back to the cache. */ 3692 for (pj = TAILQ_FIRST(&jobs); pj != NULL; pj = next) { 3693 next = TAILQ_NEXT(pj, pj_list); 3694 pool_cache_put(&pmap_tlb_shootdown_job_cache, pj); 3695 } 3696 } 3697 #endif /* MULTIPROCESSOR */ 3698