1 /* $NetBSD: pmap.c,v 1.252 2009/11/26 00:19:11 matt Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center and by Chris G. Demetriou. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)pmap.c 8.6 (Berkeley) 5/27/94 66 */ 67 68 /* 69 * DEC Alpha physical map management code. 70 * 71 * History: 72 * 73 * This pmap started life as a Motorola 68851/68030 pmap, 74 * written by Mike Hibler at the University of Utah. 75 * 76 * It was modified for the DEC Alpha by Chris Demetriou 77 * at Carnegie Mellon University. 78 * 79 * Support for non-contiguous physical memory was added by 80 * Jason R. Thorpe of the Numerical Aerospace Simulation 81 * Facility, NASA Ames Research Center and Chris Demetriou. 82 * 83 * Page table management and a major cleanup were undertaken 84 * by Jason R. Thorpe, with lots of help from Ross Harvey of 85 * Avalon Computer Systems and from Chris Demetriou. 86 * 87 * Support for the new UVM pmap interface was written by 88 * Jason R. Thorpe. 89 * 90 * Support for ASNs was written by Jason R. Thorpe, again 91 * with help from Chris Demetriou and Ross Harvey. 92 * 93 * The locking protocol was written by Jason R. Thorpe, 94 * using Chuck Cranor's i386 pmap for UVM as a model. 95 * 96 * TLB shootdown code was written by Jason R. Thorpe. 97 * 98 * Multiprocessor modifications by Andrew Doran. 99 * 100 * Notes: 101 * 102 * All page table access is done via K0SEG. The one exception 103 * to this is for kernel mappings. Since all kernel page 104 * tables are pre-allocated, we can use the Virtual Page Table 105 * to access PTEs that map K1SEG addresses. 106 * 107 * Kernel page table pages are statically allocated in 108 * pmap_bootstrap(), and are never freed. In the future, 109 * support for dynamically adding additional kernel page 110 * table pages may be added. User page table pages are 111 * dynamically allocated and freed. 112 * 113 * Bugs/misfeatures: 114 * 115 * - Some things could be optimized. 116 */ 117 118 /* 119 * Manages physical address maps. 120 * 121 * Since the information managed by this module is 122 * also stored by the logical address mapping module, 123 * this module may throw away valid virtual-to-physical 124 * mappings at almost any time. However, invalidations 125 * of virtual-to-physical mappings must be done as 126 * requested. 127 * 128 * In order to cope with hardware architectures which 129 * make virtual-to-physical map invalidates expensive, 130 * this module may delay invalidate or reduced protection 131 * operations until such time as they are actually 132 * necessary. This module is given full information as 133 * to which processors are currently using which maps, 134 * and to when physical maps must be made correct. 135 */ 136 137 #include "opt_lockdebug.h" 138 #include "opt_sysv.h" 139 #include "opt_multiprocessor.h" 140 141 #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 142 143 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.252 2009/11/26 00:19:11 matt Exp $"); 144 145 #include <sys/param.h> 146 #include <sys/systm.h> 147 #include <sys/kernel.h> 148 #include <sys/proc.h> 149 #include <sys/malloc.h> 150 #include <sys/pool.h> 151 #include <sys/buf.h> 152 #include <sys/shm.h> 153 #include <sys/atomic.h> 154 #include <sys/cpu.h> 155 156 #include <uvm/uvm.h> 157 158 #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR) 159 #include <machine/rpb.h> 160 #endif 161 162 #ifdef DEBUG 163 #define PDB_FOLLOW 0x0001 164 #define PDB_INIT 0x0002 165 #define PDB_ENTER 0x0004 166 #define PDB_REMOVE 0x0008 167 #define PDB_CREATE 0x0010 168 #define PDB_PTPAGE 0x0020 169 #define PDB_ASN 0x0040 170 #define PDB_BITS 0x0080 171 #define PDB_COLLECT 0x0100 172 #define PDB_PROTECT 0x0200 173 #define PDB_BOOTSTRAP 0x1000 174 #define PDB_PARANOIA 0x2000 175 #define PDB_WIRING 0x4000 176 #define PDB_PVDUMP 0x8000 177 178 int debugmap = 0; 179 int pmapdebug = PDB_PARANOIA; 180 #endif 181 182 /* 183 * Given a map and a machine independent protection code, 184 * convert to an alpha protection code. 185 */ 186 #define pte_prot(m, p) (protection_codes[m == pmap_kernel() ? 0 : 1][p]) 187 static int protection_codes[2][8]; 188 189 /* 190 * kernel_lev1map: 191 * 192 * Kernel level 1 page table. This maps all kernel level 2 193 * page table pages, and is used as a template for all user 194 * pmap level 1 page tables. When a new user level 1 page 195 * table is allocated, all kernel_lev1map PTEs for kernel 196 * addresses are copied to the new map. 197 * 198 * The kernel also has an initial set of kernel level 2 page 199 * table pages. These map the kernel level 3 page table pages. 200 * As kernel level 3 page table pages are added, more level 2 201 * page table pages may be added to map them. These pages are 202 * never freed. 203 * 204 * Finally, the kernel also has an initial set of kernel level 205 * 3 page table pages. These map pages in K1SEG. More level 206 * 3 page table pages may be added at run-time if additional 207 * K1SEG address space is required. These pages are never freed. 208 * 209 * NOTE: When mappings are inserted into the kernel pmap, all 210 * level 2 and level 3 page table pages must already be allocated 211 * and mapped into the parent page table. 212 */ 213 pt_entry_t *kernel_lev1map; 214 215 /* 216 * Virtual Page Table. 217 */ 218 static pt_entry_t *VPT; 219 220 static struct pmap kernel_pmap_store 221 [(PMAP_SIZEOF(ALPHA_MAXPROCS) + sizeof(struct pmap) - 1) 222 / sizeof(struct pmap)]; 223 struct pmap *const kernel_pmap_ptr = kernel_pmap_store; 224 225 paddr_t avail_start; /* PA of first available physical page */ 226 paddr_t avail_end; /* PA of last available physical page */ 227 static vaddr_t virtual_end; /* VA of last avail page (end of kernel AS) */ 228 229 static bool pmap_initialized; /* Has pmap_init completed? */ 230 231 u_long pmap_pages_stolen; /* instrumentation */ 232 233 /* 234 * This variable contains the number of CPU IDs we need to allocate 235 * space for when allocating the pmap structure. It is used to 236 * size a per-CPU array of ASN and ASN Generation number. 237 */ 238 static u_long pmap_ncpuids; 239 240 #ifndef PMAP_PV_LOWAT 241 #define PMAP_PV_LOWAT 16 242 #endif 243 int pmap_pv_lowat = PMAP_PV_LOWAT; 244 245 /* 246 * List of all pmaps, used to update them when e.g. additional kernel 247 * page tables are allocated. This list is kept LRU-ordered by 248 * pmap_activate(). 249 */ 250 static TAILQ_HEAD(, pmap) pmap_all_pmaps; 251 252 /* 253 * The pools from which pmap structures and sub-structures are allocated. 254 */ 255 static struct pool_cache pmap_pmap_cache; 256 static struct pool_cache pmap_l1pt_cache; 257 static struct pool_cache pmap_pv_cache; 258 259 /* 260 * Address Space Numbers. 261 * 262 * On many implementations of the Alpha architecture, the TLB entries and 263 * I-cache blocks are tagged with a unique number within an implementation- 264 * specified range. When a process context becomes active, the ASN is used 265 * to match TLB entries; if a TLB entry for a particular VA does not match 266 * the current ASN, it is ignored (one could think of the processor as 267 * having a collection of <max ASN> separate TLBs). This allows operating 268 * system software to skip the TLB flush that would otherwise be necessary 269 * at context switch time. 270 * 271 * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that 272 * causes TLB entries to match any ASN. The PALcode also provides 273 * a TBI (Translation Buffer Invalidate) operation that flushes all 274 * TLB entries that _do not_ have PG_ASM. We use this bit for kernel 275 * mappings, so that invalidation of all user mappings does not invalidate 276 * kernel mappings (which are consistent across all processes). 277 * 278 * pmap_next_asn always indicates to the next ASN to use. When 279 * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation. 280 * 281 * When a new ASN generation is created, the per-process (i.e. non-PG_ASM) 282 * TLB entries and the I-cache are flushed, the generation number is bumped, 283 * and pmap_next_asn is changed to indicate the first non-reserved ASN. 284 * 285 * We reserve ASN #0 for pmaps that use the global kernel_lev1map. This 286 * prevents the following scenario: 287 * 288 * * New ASN generation starts, and process A is given ASN #0. 289 * 290 * * A new process B (and thus new pmap) is created. The ASN, 291 * for lack of a better value, is initialized to 0. 292 * 293 * * Process B runs. It is now using the TLB entries tagged 294 * by process A. *poof* 295 * 296 * In the scenario above, in addition to the processor using using incorrect 297 * TLB entires, the PALcode might use incorrect information to service a 298 * TLB miss. (The PALcode uses the recursively mapped Virtual Page Table 299 * to locate the PTE for a faulting address, and tagged TLB entires exist 300 * for the Virtual Page Table addresses in order to speed up this procedure, 301 * as well.) 302 * 303 * By reserving an ASN for kernel_lev1map users, we are guaranteeing that 304 * new pmaps will initially run with no TLB entries for user addresses 305 * or VPT mappings that map user page tables. Since kernel_lev1map only 306 * contains mappings for kernel addresses, and since those mappings 307 * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is 308 * safe (since PG_ASM mappings match any ASN). 309 * 310 * On processors that do not support ASNs, the PALcode invalidates 311 * the TLB and I-cache automatically on swpctx. We still still go 312 * through the motions of assigning an ASN (really, just refreshing 313 * the ASN generation in this particular case) to keep the logic sane 314 * in other parts of the code. 315 */ 316 static u_int pmap_max_asn; /* max ASN supported by the system */ 317 /* next ASN and cur ASN generation */ 318 static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS]; 319 320 /* 321 * Locking: 322 * 323 * READ/WRITE LOCKS 324 * ---------------- 325 * 326 * * pmap_main_lock - This lock is used to prevent deadlock and/or 327 * provide mutex access to the pmap module. Most operations lock 328 * the pmap first, then PV lists as needed. However, some operations, 329 * such as pmap_page_protect(), lock the PV lists before locking 330 * the pmaps. To prevent deadlock, we require a mutex lock on the 331 * pmap module if locking in the PV->pmap direction. This is 332 * implemented by acquiring a (shared) read lock on pmap_main_lock 333 * if locking pmap->PV and a (exclusive) write lock if locking in 334 * the PV->pmap direction. Since only one thread can hold a write 335 * lock at a time, this provides the mutex. 336 * 337 * MUTEXES 338 * ------- 339 * 340 * * pm_lock (per-pmap) - This lock protects all of the members 341 * of the pmap structure itself. This lock will be asserted 342 * in pmap_activate() and pmap_deactivate() from a critical 343 * section of mi_switch(), and must never sleep. Note that 344 * in the case of the kernel pmap, interrupts which cause 345 * memory allocation *must* be blocked while this lock is 346 * asserted. 347 * 348 * * pvh_lock (global hash) - These locks protects the PV lists 349 * for managed pages. 350 * 351 * * pmap_all_pmaps_lock - This lock protects the global list of 352 * all pmaps. Note that a pm_lock must never be held while this 353 * lock is held. 354 * 355 * * pmap_growkernel_lock - This lock protects pmap_growkernel() 356 * and the virtual_end variable. 357 * 358 * There is a lock ordering constraint for pmap_growkernel_lock. 359 * pmap_growkernel() acquires the locks in the following order: 360 * 361 * pmap_growkernel_lock (write) -> pmap_all_pmaps_lock -> 362 * pmap->pm_lock 363 * 364 * We need to ensure consistency between user pmaps and the 365 * kernel_lev1map. For this reason, pmap_growkernel_lock must 366 * be held to prevent kernel_lev1map changing across pmaps 367 * being added to / removed from the global pmaps list. 368 * 369 * Address space number management (global ASN counters and per-pmap 370 * ASN state) are not locked; they use arrays of values indexed 371 * per-processor. 372 * 373 * All internal functions which operate on a pmap are called 374 * with the pmap already locked by the caller (which will be 375 * an interface function). 376 */ 377 static krwlock_t pmap_main_lock; 378 static kmutex_t pmap_all_pmaps_lock; 379 static krwlock_t pmap_growkernel_lock; 380 381 #define PMAP_MAP_TO_HEAD_LOCK() rw_enter(&pmap_main_lock, RW_READER) 382 #define PMAP_MAP_TO_HEAD_UNLOCK() rw_exit(&pmap_main_lock) 383 #define PMAP_HEAD_TO_MAP_LOCK() rw_enter(&pmap_main_lock, RW_WRITER) 384 #define PMAP_HEAD_TO_MAP_UNLOCK() rw_exit(&pmap_main_lock) 385 386 struct { 387 kmutex_t lock; 388 } __aligned(64) static pmap_pvh_locks[64] __aligned(64); 389 390 static inline kmutex_t * 391 pmap_pvh_lock(struct vm_page *pg) 392 { 393 394 /* Cut bits 11-6 out of page address and use directly as offset. */ 395 return (kmutex_t *)((uintptr_t)&pmap_pvh_locks + 396 ((uintptr_t)pg & (63 << 6))); 397 } 398 399 #if defined(MULTIPROCESSOR) 400 /* 401 * TLB Shootdown: 402 * 403 * When a mapping is changed in a pmap, the TLB entry corresponding to 404 * the virtual address must be invalidated on all processors. In order 405 * to accomplish this on systems with multiple processors, messages are 406 * sent from the processor which performs the mapping change to all 407 * processors on which the pmap is active. For other processors, the 408 * ASN generation numbers for that processor is invalidated, so that 409 * the next time the pmap is activated on that processor, a new ASN 410 * will be allocated (which implicitly invalidates all TLB entries). 411 * 412 * Note, we can use the pool allocator to allocate job entries 413 * since pool pages are mapped with K0SEG, not with the TLB. 414 */ 415 struct pmap_tlb_shootdown_job { 416 TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list; 417 vaddr_t pj_va; /* virtual address */ 418 pmap_t pj_pmap; /* the pmap which maps the address */ 419 pt_entry_t pj_pte; /* the PTE bits */ 420 }; 421 422 static struct pmap_tlb_shootdown_q { 423 TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head; /* queue 16b */ 424 kmutex_t pq_lock; /* spin lock on queue 16b */ 425 int pq_pte; /* aggregate PTE bits 4b */ 426 int pq_count; /* number of pending requests 4b */ 427 int pq_tbia; /* pending global flush 4b */ 428 uint8_t pq_pad[64-16-16-4-4-4]; /* pad to 64 bytes */ 429 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS] __aligned(CACHE_LINE_SIZE); 430 431 /* If we have more pending jobs than this, we just nail the whole TLB. */ 432 #define PMAP_TLB_SHOOTDOWN_MAXJOBS 6 433 434 static struct pool_cache pmap_tlb_shootdown_job_cache; 435 #endif /* MULTIPROCESSOR */ 436 437 /* 438 * Internal routines 439 */ 440 static void alpha_protection_init(void); 441 static bool pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, long); 442 static void pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, long); 443 444 /* 445 * PT page management functions. 446 */ 447 static int pmap_lev1map_create(pmap_t, long); 448 static void pmap_lev1map_destroy(pmap_t, long); 449 static int pmap_ptpage_alloc(pmap_t, pt_entry_t *, int); 450 static void pmap_ptpage_free(pmap_t, pt_entry_t *); 451 static void pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, long); 452 static void pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, long); 453 static void pmap_l1pt_delref(pmap_t, pt_entry_t *, long); 454 455 static void *pmap_l1pt_alloc(struct pool *, int); 456 static void pmap_l1pt_free(struct pool *, void *); 457 458 static struct pool_allocator pmap_l1pt_allocator = { 459 pmap_l1pt_alloc, pmap_l1pt_free, 0, 460 }; 461 462 static int pmap_l1pt_ctor(void *, void *, int); 463 464 /* 465 * PV table management functions. 466 */ 467 static int pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *, 468 bool); 469 static void pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool); 470 static void *pmap_pv_page_alloc(struct pool *, int); 471 static void pmap_pv_page_free(struct pool *, void *); 472 473 static struct pool_allocator pmap_pv_page_allocator = { 474 pmap_pv_page_alloc, pmap_pv_page_free, 0, 475 }; 476 477 #ifdef DEBUG 478 void pmap_pv_dump(paddr_t); 479 #endif 480 481 #define pmap_pv_alloc() pool_cache_get(&pmap_pv_cache, PR_NOWAIT) 482 #define pmap_pv_free(pv) pool_cache_put(&pmap_pv_cache, (pv)) 483 484 /* 485 * ASN management functions. 486 */ 487 static void pmap_asn_alloc(pmap_t, long); 488 489 /* 490 * Misc. functions. 491 */ 492 static bool pmap_physpage_alloc(int, paddr_t *); 493 static void pmap_physpage_free(paddr_t); 494 static int pmap_physpage_addref(void *); 495 static int pmap_physpage_delref(void *); 496 497 /* 498 * PMAP_ISACTIVE{,_TEST}: 499 * 500 * Check to see if a pmap is active on the current processor. 501 */ 502 #define PMAP_ISACTIVE_TEST(pm, cpu_id) \ 503 (((pm)->pm_cpus & (1UL << (cpu_id))) != 0) 504 505 #if defined(DEBUG) && !defined(MULTIPROCESSOR) 506 #define PMAP_ISACTIVE(pm, cpu_id) \ 507 ({ \ 508 /* \ 509 * XXX This test is not MP-safe. \ 510 */ \ 511 int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id); \ 512 \ 513 if ((curlwp->l_flag & LW_IDLE) != 0 && \ 514 curproc->p_vmspace != NULL && \ 515 ((curproc->p_sflag & PS_WEXIT) == 0) && \ 516 (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap))) \ 517 panic("PMAP_ISACTIVE"); \ 518 (isactive_); \ 519 }) 520 #else 521 #define PMAP_ISACTIVE(pm, cpu_id) PMAP_ISACTIVE_TEST(pm, cpu_id) 522 #endif /* DEBUG && !MULTIPROCESSOR */ 523 524 /* 525 * PMAP_ACTIVATE_ASN_SANITY: 526 * 527 * DEBUG sanity checks for ASNs within PMAP_ACTIVATE. 528 */ 529 #ifdef DEBUG 530 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) \ 531 do { \ 532 struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)]; \ 533 struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)]; \ 534 \ 535 if ((pmap)->pm_lev1map == kernel_lev1map) { \ 536 /* \ 537 * This pmap implementation also ensures that pmaps \ 538 * referencing kernel_lev1map use a reserved ASN \ 539 * ASN to prevent the PALcode from servicing a TLB \ 540 * miss with the wrong PTE. \ 541 */ \ 542 if (__pma->pma_asn != PMAP_ASN_RESERVED) { \ 543 printf("kernel_lev1map with non-reserved ASN " \ 544 "(line %d)\n", __LINE__); \ 545 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 546 } \ 547 } else { \ 548 if (__pma->pma_asngen != __cpma->pma_asngen) { \ 549 /* \ 550 * ASN generation number isn't valid! \ 551 */ \ 552 printf("pmap asngen %lu, current %lu " \ 553 "(line %d)\n", \ 554 __pma->pma_asngen, \ 555 __cpma->pma_asngen, \ 556 __LINE__); \ 557 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 558 } \ 559 if (__pma->pma_asn == PMAP_ASN_RESERVED) { \ 560 /* \ 561 * DANGER WILL ROBINSON! We're going to \ 562 * pollute the VPT TLB entries! \ 563 */ \ 564 printf("Using reserved ASN! (line %d)\n", \ 565 __LINE__); \ 566 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 567 } \ 568 } \ 569 } while (/*CONSTCOND*/0) 570 #else 571 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) /* nothing */ 572 #endif 573 574 /* 575 * PMAP_ACTIVATE: 576 * 577 * This is essentially the guts of pmap_activate(), without 578 * ASN allocation. This is used by pmap_activate(), 579 * pmap_lev1map_create(), and pmap_lev1map_destroy(). 580 * 581 * This is called only when it is known that a pmap is "active" 582 * on the current processor; the ASN must already be valid. 583 */ 584 #define PMAP_ACTIVATE(pmap, l, cpu_id) \ 585 do { \ 586 struct pcb *pcb = lwp_getpcb(l); \ 587 PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id); \ 588 \ 589 pcb->pcb_hw.apcb_ptbr = \ 590 ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \ 591 pcb->pcb_hw.apcb_asn = (pmap)->pm_asni[(cpu_id)].pma_asn; \ 592 \ 593 if ((l) == curlwp) { \ 594 /* \ 595 * Page table base register has changed; switch to \ 596 * our own context again so that it will take effect. \ 597 */ \ 598 (void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr); \ 599 } \ 600 } while (/*CONSTCOND*/0) 601 602 /* 603 * PMAP_SET_NEEDISYNC: 604 * 605 * Mark that a user pmap needs an I-stream synch on its 606 * way back out to userspace. 607 */ 608 #define PMAP_SET_NEEDISYNC(pmap) (pmap)->pm_needisync = ~0UL 609 610 /* 611 * PMAP_SYNC_ISTREAM: 612 * 613 * Synchronize the I-stream for the specified pmap. For user 614 * pmaps, this is deferred until a process using the pmap returns 615 * to userspace. 616 */ 617 #if defined(MULTIPROCESSOR) 618 #define PMAP_SYNC_ISTREAM_KERNEL() \ 619 do { \ 620 alpha_pal_imb(); \ 621 alpha_broadcast_ipi(ALPHA_IPI_IMB); \ 622 } while (/*CONSTCOND*/0) 623 624 #define PMAP_SYNC_ISTREAM_USER(pmap) \ 625 do { \ 626 alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST); \ 627 /* for curcpu, will happen in userret() */ \ 628 } while (/*CONSTCOND*/0) 629 #else 630 #define PMAP_SYNC_ISTREAM_KERNEL() alpha_pal_imb() 631 #define PMAP_SYNC_ISTREAM_USER(pmap) /* will happen in userret() */ 632 #endif /* MULTIPROCESSOR */ 633 634 #define PMAP_SYNC_ISTREAM(pmap) \ 635 do { \ 636 if ((pmap) == pmap_kernel()) \ 637 PMAP_SYNC_ISTREAM_KERNEL(); \ 638 else \ 639 PMAP_SYNC_ISTREAM_USER(pmap); \ 640 } while (/*CONSTCOND*/0) 641 642 /* 643 * PMAP_INVALIDATE_ASN: 644 * 645 * Invalidate the specified pmap's ASN, so as to force allocation 646 * of a new one the next time pmap_asn_alloc() is called. 647 * 648 * NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING 649 * CONDITIONS ARE true: 650 * 651 * (1) The pmap references the global kernel_lev1map. 652 * 653 * (2) The pmap is not active on the current processor. 654 */ 655 #define PMAP_INVALIDATE_ASN(pmap, cpu_id) \ 656 do { \ 657 (pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED; \ 658 } while (/*CONSTCOND*/0) 659 660 /* 661 * PMAP_INVALIDATE_TLB: 662 * 663 * Invalidate the TLB entry for the pmap/va pair. 664 */ 665 #define PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id) \ 666 do { \ 667 if ((hadasm) || (isactive)) { \ 668 /* \ 669 * Simply invalidating the TLB entry and I-cache \ 670 * works in this case. \ 671 */ \ 672 ALPHA_TBIS((va)); \ 673 } else if ((pmap)->pm_asni[(cpu_id)].pma_asngen == \ 674 pmap_asn_info[(cpu_id)].pma_asngen) { \ 675 /* \ 676 * We can't directly invalidate the TLB entry \ 677 * in this case, so we have to force allocation \ 678 * of a new ASN the next time this pmap becomes \ 679 * active. \ 680 */ \ 681 PMAP_INVALIDATE_ASN((pmap), (cpu_id)); \ 682 } \ 683 /* \ 684 * Nothing to do in this case; the next time the \ 685 * pmap becomes active on this processor, a new \ 686 * ASN will be allocated anyway. \ 687 */ \ 688 } while (/*CONSTCOND*/0) 689 690 /* 691 * PMAP_KERNEL_PTE: 692 * 693 * Get a kernel PTE. 694 * 695 * If debugging, do a table walk. If not debugging, just use 696 * the Virtual Page Table, since all kernel page tables are 697 * pre-allocated and mapped in. 698 */ 699 #ifdef DEBUG 700 #define PMAP_KERNEL_PTE(va) \ 701 ({ \ 702 pt_entry_t *l1pte_, *l2pte_; \ 703 \ 704 l1pte_ = pmap_l1pte(pmap_kernel(), va); \ 705 if (pmap_pte_v(l1pte_) == 0) { \ 706 printf("kernel level 1 PTE not valid, va 0x%lx " \ 707 "(line %d)\n", (va), __LINE__); \ 708 panic("PMAP_KERNEL_PTE"); \ 709 } \ 710 l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_); \ 711 if (pmap_pte_v(l2pte_) == 0) { \ 712 printf("kernel level 2 PTE not valid, va 0x%lx " \ 713 "(line %d)\n", (va), __LINE__); \ 714 panic("PMAP_KERNEL_PTE"); \ 715 } \ 716 pmap_l3pte(pmap_kernel(), va, l2pte_); \ 717 }) 718 #else 719 #define PMAP_KERNEL_PTE(va) (&VPT[VPT_INDEX((va))]) 720 #endif 721 722 /* 723 * PMAP_SET_PTE: 724 * 725 * Set a PTE to a specified value. 726 */ 727 #define PMAP_SET_PTE(ptep, val) *(ptep) = (val) 728 729 /* 730 * PMAP_STAT_{INCR,DECR}: 731 * 732 * Increment or decrement a pmap statistic. 733 */ 734 #define PMAP_STAT_INCR(s, v) atomic_add_long((unsigned long *)(&(s)), (v)) 735 #define PMAP_STAT_DECR(s, v) atomic_add_long((unsigned long *)(&(s)), -(v)) 736 737 /* 738 * pmap_bootstrap: 739 * 740 * Bootstrap the system to run with virtual memory. 741 * 742 * Note: no locking is necessary in this function. 743 */ 744 void 745 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) 746 { 747 vsize_t lev2mapsize, lev3mapsize; 748 pt_entry_t *lev2map, *lev3map; 749 pt_entry_t pte; 750 vsize_t bufsz; 751 struct pcb *pcb; 752 int i; 753 754 #ifdef DEBUG 755 if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP)) 756 printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn); 757 #endif 758 759 /* 760 * Compute the number of pages kmem_map will have. 761 */ 762 kmeminit_nkmempages(); 763 764 /* 765 * Figure out how many initial PTE's are necessary to map the 766 * kernel. We also reserve space for kmem_alloc_pageable() 767 * for vm_fork(). 768 */ 769 770 /* Get size of buffer cache and set an upper limit */ 771 bufsz = buf_memcalc(); 772 buf_setvalimit(bufsz); 773 774 lev3mapsize = 775 (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) + 776 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE + 777 (maxproc * UPAGES) + nkmempages; 778 779 #ifdef SYSVSHM 780 lev3mapsize += shminfo.shmall; 781 #endif 782 lev3mapsize = roundup(lev3mapsize, NPTEPG); 783 784 /* 785 * Initialize `FYI' variables. Note we're relying on 786 * the fact that BSEARCH sorts the vm_physmem[] array 787 * for us. 788 */ 789 avail_start = ptoa(vm_physmem[0].start); 790 avail_end = ptoa(vm_physmem[vm_nphysseg - 1].end); 791 virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE; 792 793 #if 0 794 printf("avail_start = 0x%lx\n", avail_start); 795 printf("avail_end = 0x%lx\n", avail_end); 796 printf("virtual_end = 0x%lx\n", virtual_end); 797 #endif 798 799 /* 800 * Allocate a level 1 PTE table for the kernel. 801 * This is always one page long. 802 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 803 */ 804 kernel_lev1map = (pt_entry_t *) 805 uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG); 806 807 /* 808 * Allocate a level 2 PTE table for the kernel. 809 * These must map all of the level3 PTEs. 810 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 811 */ 812 lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG); 813 lev2map = (pt_entry_t *) 814 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize); 815 816 /* 817 * Allocate a level 3 PTE table for the kernel. 818 * Contains lev3mapsize PTEs. 819 */ 820 lev3map = (pt_entry_t *) 821 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize); 822 823 /* 824 * Set up level 1 page table 825 */ 826 827 /* Map all of the level 2 pte pages */ 828 for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) { 829 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) + 830 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 831 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 832 kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS + 833 (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte; 834 } 835 836 /* Map the virtual page table */ 837 pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT) 838 << PG_SHIFT; 839 pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */ 840 kernel_lev1map[l1pte_index(VPTBASE)] = pte; 841 VPT = (pt_entry_t *)VPTBASE; 842 843 #ifdef _PMAP_MAY_USE_PROM_CONSOLE 844 { 845 extern pt_entry_t prom_pte; /* XXX */ 846 extern int prom_mapped; /* XXX */ 847 848 if (pmap_uses_prom_console()) { 849 /* 850 * XXX Save old PTE so we can remap the PROM, if 851 * XXX necessary. 852 */ 853 prom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM; 854 } 855 prom_mapped = 0; 856 857 /* 858 * Actually, this code lies. The prom is still mapped, and will 859 * remain so until the context switch after alpha_init() returns. 860 */ 861 } 862 #endif 863 864 /* 865 * Set up level 2 page table. 866 */ 867 /* Map all of the level 3 pte pages */ 868 for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) { 869 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) + 870 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 871 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 872 lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+ 873 (i*PAGE_SIZE*NPTEPG))] = pte; 874 } 875 876 /* Initialize the pmap_growkernel_lock. */ 877 rw_init(&pmap_growkernel_lock); 878 879 /* 880 * Set up level three page table (lev3map) 881 */ 882 /* Nothing to do; it's already zero'd */ 883 884 /* 885 * Initialize the pmap pools and list. 886 */ 887 pmap_ncpuids = ncpuids; 888 pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 0, 889 0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL); 890 pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt", 891 &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL); 892 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 893 PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL, 894 NULL, NULL); 895 896 TAILQ_INIT(&pmap_all_pmaps); 897 898 /* 899 * Initialize the ASN logic. 900 */ 901 pmap_max_asn = maxasn; 902 for (i = 0; i < ALPHA_MAXPROCS; i++) { 903 pmap_asn_info[i].pma_asn = 1; 904 pmap_asn_info[i].pma_asngen = 0; 905 } 906 907 /* 908 * Initialize the locks. 909 */ 910 rw_init(&pmap_main_lock); 911 mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE); 912 for (i = 0; i < __arraycount(pmap_pvh_locks); i++) { 913 mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE); 914 } 915 916 /* 917 * Initialize kernel pmap. Note that all kernel mappings 918 * have PG_ASM set, so the ASN doesn't really matter for 919 * the kernel pmap. Also, since the kernel pmap always 920 * references kernel_lev1map, it always has an invalid ASN 921 * generation. 922 */ 923 memset(pmap_kernel(), 0, sizeof(struct pmap)); 924 pmap_kernel()->pm_lev1map = kernel_lev1map; 925 pmap_kernel()->pm_count = 1; 926 for (i = 0; i < ALPHA_MAXPROCS; i++) { 927 pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 928 pmap_kernel()->pm_asni[i].pma_asngen = 929 pmap_asn_info[i].pma_asngen; 930 } 931 mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE); 932 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list); 933 934 #if defined(MULTIPROCESSOR) 935 /* 936 * Initialize the TLB shootdown queues. 937 */ 938 pool_cache_bootstrap(&pmap_tlb_shootdown_job_cache, 939 sizeof(struct pmap_tlb_shootdown_job), CACHE_LINE_SIZE, 940 0, PR_LARGECACHE, "pmaptlb", NULL, IPL_VM, NULL, NULL, NULL); 941 for (i = 0; i < ALPHA_MAXPROCS; i++) { 942 TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head); 943 mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT, 944 IPL_SCHED); 945 } 946 #endif 947 948 /* 949 * Set up lwp0's PCB such that the ptbr points to the right place 950 * and has the kernel pmap's (really unused) ASN. 951 */ 952 pcb = lwp_getpcb(&lwp0); 953 pcb->pcb_hw.apcb_ptbr = 954 ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT; 955 pcb->pcb_hw.apcb_asn = pmap_kernel()->pm_asni[cpu_number()].pma_asn; 956 957 /* 958 * Mark the kernel pmap `active' on this processor. 959 */ 960 atomic_or_ulong(&pmap_kernel()->pm_cpus, 961 (1UL << cpu_number())); 962 } 963 964 #ifdef _PMAP_MAY_USE_PROM_CONSOLE 965 int 966 pmap_uses_prom_console(void) 967 { 968 969 return (cputype == ST_DEC_21000); 970 } 971 #endif /* _PMAP_MAY_USE_PROM_CONSOLE */ 972 973 /* 974 * pmap_virtual_space: [ INTERFACE ] 975 * 976 * Define the initial bounds of the kernel virtual address space. 977 */ 978 void 979 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 980 { 981 982 *vstartp = VM_MIN_KERNEL_ADDRESS; /* kernel is in K0SEG */ 983 *vendp = VM_MAX_KERNEL_ADDRESS; /* we use pmap_growkernel */ 984 } 985 986 /* 987 * pmap_steal_memory: [ INTERFACE ] 988 * 989 * Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()). 990 * This function allows for early dynamic memory allocation until the 991 * virtual memory system has been bootstrapped. After that point, either 992 * kmem_alloc or malloc should be used. This function works by stealing 993 * pages from the (to be) managed page pool, then implicitly mapping the 994 * pages (by using their k0seg addresses) and zeroing them. 995 * 996 * It may be used once the physical memory segments have been pre-loaded 997 * into the vm_physmem[] array. Early memory allocation MUST use this 998 * interface! This cannot be used after vm_page_startup(), and will 999 * generate a panic if tried. 1000 * 1001 * Note that this memory will never be freed, and in essence it is wired 1002 * down. 1003 * 1004 * We must adjust *vstartp and/or *vendp iff we use address space 1005 * from the kernel virtual address range defined by pmap_virtual_space(). 1006 * 1007 * Note: no locking is necessary in this function. 1008 */ 1009 vaddr_t 1010 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp) 1011 { 1012 int bank, npgs, x; 1013 vaddr_t va; 1014 paddr_t pa; 1015 1016 size = round_page(size); 1017 npgs = atop(size); 1018 1019 #if 0 1020 printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs); 1021 #endif 1022 1023 for (bank = 0; bank < vm_nphysseg; bank++) { 1024 if (uvm.page_init_done == true) 1025 panic("pmap_steal_memory: called _after_ bootstrap"); 1026 1027 #if 0 1028 printf(" bank %d: avail_start 0x%lx, start 0x%lx, " 1029 "avail_end 0x%lx\n", bank, vm_physmem[bank].avail_start, 1030 vm_physmem[bank].start, vm_physmem[bank].avail_end); 1031 #endif 1032 1033 if (vm_physmem[bank].avail_start != vm_physmem[bank].start || 1034 vm_physmem[bank].avail_start >= vm_physmem[bank].avail_end) 1035 continue; 1036 1037 #if 0 1038 printf(" avail_end - avail_start = 0x%lx\n", 1039 vm_physmem[bank].avail_end - vm_physmem[bank].avail_start); 1040 #endif 1041 1042 if ((vm_physmem[bank].avail_end - vm_physmem[bank].avail_start) 1043 < npgs) 1044 continue; 1045 1046 /* 1047 * There are enough pages here; steal them! 1048 */ 1049 pa = ptoa(vm_physmem[bank].avail_start); 1050 vm_physmem[bank].avail_start += npgs; 1051 vm_physmem[bank].start += npgs; 1052 1053 /* 1054 * Have we used up this segment? 1055 */ 1056 if (vm_physmem[bank].avail_start == vm_physmem[bank].end) { 1057 if (vm_nphysseg == 1) 1058 panic("pmap_steal_memory: out of memory!"); 1059 1060 /* Remove this segment from the list. */ 1061 vm_nphysseg--; 1062 for (x = bank; x < vm_nphysseg; x++) { 1063 /* structure copy */ 1064 vm_physmem[x] = vm_physmem[x + 1]; 1065 } 1066 } 1067 1068 va = ALPHA_PHYS_TO_K0SEG(pa); 1069 memset((void *)va, 0, size); 1070 pmap_pages_stolen += npgs; 1071 return (va); 1072 } 1073 1074 /* 1075 * If we got here, this was no memory left. 1076 */ 1077 panic("pmap_steal_memory: no memory to steal"); 1078 } 1079 1080 /* 1081 * pmap_init: [ INTERFACE ] 1082 * 1083 * Initialize the pmap module. Called by vm_init(), to initialize any 1084 * structures that the pmap system needs to map virtual memory. 1085 * 1086 * Note: no locking is necessary in this function. 1087 */ 1088 void 1089 pmap_init(void) 1090 { 1091 1092 #ifdef DEBUG 1093 if (pmapdebug & PDB_FOLLOW) 1094 printf("pmap_init()\n"); 1095 #endif 1096 1097 /* initialize protection array */ 1098 alpha_protection_init(); 1099 1100 /* 1101 * Set a low water mark on the pv_entry pool, so that we are 1102 * more likely to have these around even in extreme memory 1103 * starvation. 1104 */ 1105 pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat); 1106 1107 /* 1108 * Now it is safe to enable pv entry recording. 1109 */ 1110 pmap_initialized = true; 1111 1112 #if 0 1113 for (bank = 0; bank < vm_nphysseg; bank++) { 1114 printf("bank %d\n", bank); 1115 printf("\tstart = 0x%x\n", ptoa(vm_physmem[bank].start)); 1116 printf("\tend = 0x%x\n", ptoa(vm_physmem[bank].end)); 1117 printf("\tavail_start = 0x%x\n", 1118 ptoa(vm_physmem[bank].avail_start)); 1119 printf("\tavail_end = 0x%x\n", 1120 ptoa(vm_physmem[bank].avail_end)); 1121 } 1122 #endif 1123 } 1124 1125 /* 1126 * pmap_create: [ INTERFACE ] 1127 * 1128 * Create and return a physical map. 1129 * 1130 * Note: no locking is necessary in this function. 1131 */ 1132 pmap_t 1133 pmap_create(void) 1134 { 1135 pmap_t pmap; 1136 int i; 1137 1138 #ifdef DEBUG 1139 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 1140 printf("pmap_create()\n"); 1141 #endif 1142 1143 pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK); 1144 memset(pmap, 0, sizeof(*pmap)); 1145 1146 /* 1147 * Defer allocation of a new level 1 page table until 1148 * the first new mapping is entered; just take a reference 1149 * to the kernel kernel_lev1map. 1150 */ 1151 pmap->pm_lev1map = kernel_lev1map; 1152 1153 pmap->pm_count = 1; 1154 for (i = 0; i < pmap_ncpuids; i++) { 1155 pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 1156 /* XXX Locking? */ 1157 pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen; 1158 } 1159 mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE); 1160 1161 try_again: 1162 rw_enter(&pmap_growkernel_lock, RW_READER); 1163 1164 if (pmap_lev1map_create(pmap, cpu_number()) != 0) { 1165 rw_exit(&pmap_growkernel_lock); 1166 (void) kpause("pmap_create", false, hz >> 2, NULL); 1167 goto try_again; 1168 } 1169 1170 mutex_enter(&pmap_all_pmaps_lock); 1171 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list); 1172 mutex_exit(&pmap_all_pmaps_lock); 1173 1174 rw_exit(&pmap_growkernel_lock); 1175 1176 return (pmap); 1177 } 1178 1179 /* 1180 * pmap_destroy: [ INTERFACE ] 1181 * 1182 * Drop the reference count on the specified pmap, releasing 1183 * all resources if the reference count drops to zero. 1184 */ 1185 void 1186 pmap_destroy(pmap_t pmap) 1187 { 1188 1189 #ifdef DEBUG 1190 if (pmapdebug & PDB_FOLLOW) 1191 printf("pmap_destroy(%p)\n", pmap); 1192 #endif 1193 1194 if (atomic_dec_uint_nv(&pmap->pm_count) > 0) 1195 return; 1196 1197 rw_enter(&pmap_growkernel_lock, RW_READER); 1198 1199 /* 1200 * Remove it from the global list of all pmaps. 1201 */ 1202 mutex_enter(&pmap_all_pmaps_lock); 1203 TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list); 1204 mutex_exit(&pmap_all_pmaps_lock); 1205 1206 pmap_lev1map_destroy(pmap, cpu_number()); 1207 1208 rw_exit(&pmap_growkernel_lock); 1209 1210 /* 1211 * Since the pmap is supposed to contain no valid 1212 * mappings at this point, we should always see 1213 * kernel_lev1map here. 1214 */ 1215 KASSERT(pmap->pm_lev1map == kernel_lev1map); 1216 1217 mutex_destroy(&pmap->pm_lock); 1218 pool_cache_put(&pmap_pmap_cache, pmap); 1219 } 1220 1221 /* 1222 * pmap_reference: [ INTERFACE ] 1223 * 1224 * Add a reference to the specified pmap. 1225 */ 1226 void 1227 pmap_reference(pmap_t pmap) 1228 { 1229 1230 #ifdef DEBUG 1231 if (pmapdebug & PDB_FOLLOW) 1232 printf("pmap_reference(%p)\n", pmap); 1233 #endif 1234 1235 atomic_inc_uint(&pmap->pm_count); 1236 } 1237 1238 /* 1239 * pmap_remove: [ INTERFACE ] 1240 * 1241 * Remove the given range of addresses from the specified map. 1242 * 1243 * It is assumed that the start and end are properly 1244 * rounded to the page size. 1245 */ 1246 void 1247 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva) 1248 { 1249 pt_entry_t *l1pte, *l2pte, *l3pte; 1250 pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte; 1251 vaddr_t l1eva, l2eva, vptva; 1252 bool needisync = false; 1253 long cpu_id = cpu_number(); 1254 1255 #ifdef DEBUG 1256 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 1257 printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva); 1258 #endif 1259 1260 /* 1261 * If this is the kernel pmap, we can use a faster method 1262 * for accessing the PTEs (since the PT pages are always 1263 * resident). 1264 * 1265 * Note that this routine should NEVER be called from an 1266 * interrupt context; pmap_kremove() is used for that. 1267 */ 1268 if (pmap == pmap_kernel()) { 1269 PMAP_MAP_TO_HEAD_LOCK(); 1270 PMAP_LOCK(pmap); 1271 1272 while (sva < eva) { 1273 l3pte = PMAP_KERNEL_PTE(sva); 1274 if (pmap_pte_v(l3pte)) { 1275 #ifdef DIAGNOSTIC 1276 if (uvm_pageismanaged(pmap_pte_pa(l3pte)) && 1277 pmap_pte_pv(l3pte) == 0) 1278 panic("pmap_remove: managed page " 1279 "without PG_PVLIST for 0x%lx", 1280 sva); 1281 #endif 1282 needisync |= pmap_remove_mapping(pmap, sva, 1283 l3pte, true, cpu_id); 1284 } 1285 sva += PAGE_SIZE; 1286 } 1287 1288 PMAP_UNLOCK(pmap); 1289 PMAP_MAP_TO_HEAD_UNLOCK(); 1290 1291 if (needisync) 1292 PMAP_SYNC_ISTREAM_KERNEL(); 1293 return; 1294 } 1295 1296 #ifdef DIAGNOSTIC 1297 if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS) 1298 panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel " 1299 "address range", sva, eva); 1300 #endif 1301 1302 PMAP_MAP_TO_HEAD_LOCK(); 1303 PMAP_LOCK(pmap); 1304 1305 /* 1306 * If we're already referencing the kernel_lev1map, there 1307 * is no work for us to do. 1308 */ 1309 if (pmap->pm_lev1map == kernel_lev1map) 1310 goto out; 1311 1312 saved_l1pte = l1pte = pmap_l1pte(pmap, sva); 1313 1314 /* 1315 * Add a reference to the L1 table to it won't get 1316 * removed from under us. 1317 */ 1318 pmap_physpage_addref(saved_l1pte); 1319 1320 for (; sva < eva; sva = l1eva, l1pte++) { 1321 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1322 if (pmap_pte_v(l1pte)) { 1323 saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte); 1324 1325 /* 1326 * Add a reference to the L2 table so it won't 1327 * get removed from under us. 1328 */ 1329 pmap_physpage_addref(saved_l2pte); 1330 1331 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1332 l2eva = 1333 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1334 if (pmap_pte_v(l2pte)) { 1335 saved_l3pte = l3pte = 1336 pmap_l3pte(pmap, sva, l2pte); 1337 1338 /* 1339 * Add a reference to the L3 table so 1340 * it won't get removed from under us. 1341 */ 1342 pmap_physpage_addref(saved_l3pte); 1343 1344 /* 1345 * Remember this sva; if the L3 table 1346 * gets removed, we need to invalidate 1347 * the VPT TLB entry for it. 1348 */ 1349 vptva = sva; 1350 1351 for (; sva < l2eva && sva < eva; 1352 sva += PAGE_SIZE, l3pte++) { 1353 if (!pmap_pte_v(l3pte)) { 1354 continue; 1355 } 1356 needisync |= 1357 pmap_remove_mapping( 1358 pmap, sva, 1359 l3pte, true, 1360 cpu_id); 1361 } 1362 1363 /* 1364 * Remove the reference to the L3 1365 * table that we added above. This 1366 * may free the L3 table. 1367 */ 1368 pmap_l3pt_delref(pmap, vptva, 1369 saved_l3pte, cpu_id); 1370 } 1371 } 1372 1373 /* 1374 * Remove the reference to the L2 table that we 1375 * added above. This may free the L2 table. 1376 */ 1377 pmap_l2pt_delref(pmap, l1pte, saved_l2pte, cpu_id); 1378 } 1379 } 1380 1381 /* 1382 * Remove the reference to the L1 table that we added above. 1383 * This may free the L1 table. 1384 */ 1385 pmap_l1pt_delref(pmap, saved_l1pte, cpu_id); 1386 1387 if (needisync) 1388 PMAP_SYNC_ISTREAM_USER(pmap); 1389 1390 out: 1391 PMAP_UNLOCK(pmap); 1392 PMAP_MAP_TO_HEAD_UNLOCK(); 1393 } 1394 1395 /* 1396 * pmap_page_protect: [ INTERFACE ] 1397 * 1398 * Lower the permission for all mappings to a given page to 1399 * the permissions specified. 1400 */ 1401 void 1402 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 1403 { 1404 pmap_t pmap; 1405 pv_entry_t pv, nextpv; 1406 bool needkisync = false; 1407 long cpu_id = cpu_number(); 1408 kmutex_t *lock; 1409 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1410 #ifdef DEBUG 1411 paddr_t pa = VM_PAGE_TO_PHYS(pg); 1412 1413 1414 if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) || 1415 (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE))) 1416 printf("pmap_page_protect(%p, %x)\n", pg, prot); 1417 #endif 1418 1419 switch (prot) { 1420 case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE: 1421 case VM_PROT_READ|VM_PROT_WRITE: 1422 return; 1423 1424 /* copy_on_write */ 1425 case VM_PROT_READ|VM_PROT_EXECUTE: 1426 case VM_PROT_READ: 1427 PMAP_HEAD_TO_MAP_LOCK(); 1428 lock = pmap_pvh_lock(pg); 1429 mutex_enter(lock); 1430 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) { 1431 PMAP_LOCK(pv->pv_pmap); 1432 if (*pv->pv_pte & (PG_KWE | PG_UWE)) { 1433 *pv->pv_pte &= ~(PG_KWE | PG_UWE); 1434 PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va, 1435 pmap_pte_asm(pv->pv_pte), 1436 PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id); 1437 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va, 1438 pmap_pte_asm(pv->pv_pte)); 1439 } 1440 PMAP_UNLOCK(pv->pv_pmap); 1441 } 1442 mutex_exit(lock); 1443 PMAP_HEAD_TO_MAP_UNLOCK(); 1444 PMAP_TLB_SHOOTNOW(); 1445 return; 1446 1447 /* remove_all */ 1448 default: 1449 break; 1450 } 1451 1452 PMAP_HEAD_TO_MAP_LOCK(); 1453 lock = pmap_pvh_lock(pg); 1454 mutex_enter(lock); 1455 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = nextpv) { 1456 nextpv = pv->pv_next; 1457 pmap = pv->pv_pmap; 1458 1459 PMAP_LOCK(pmap); 1460 #ifdef DEBUG 1461 if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 || 1462 pmap_pte_pa(pv->pv_pte) != pa) 1463 panic("pmap_page_protect: bad mapping"); 1464 #endif 1465 if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte, 1466 false, cpu_id) == true) { 1467 if (pmap == pmap_kernel()) 1468 needkisync |= true; 1469 else 1470 PMAP_SYNC_ISTREAM_USER(pmap); 1471 } 1472 PMAP_UNLOCK(pmap); 1473 } 1474 1475 if (needkisync) 1476 PMAP_SYNC_ISTREAM_KERNEL(); 1477 1478 mutex_exit(lock); 1479 PMAP_HEAD_TO_MAP_UNLOCK(); 1480 } 1481 1482 /* 1483 * pmap_protect: [ INTERFACE ] 1484 * 1485 * Set the physical protection on the specified range of this map 1486 * as requested. 1487 */ 1488 void 1489 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1490 { 1491 pt_entry_t *l1pte, *l2pte, *l3pte, bits; 1492 bool isactive; 1493 bool hadasm; 1494 vaddr_t l1eva, l2eva; 1495 long cpu_id = cpu_number(); 1496 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1497 1498 #ifdef DEBUG 1499 if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) 1500 printf("pmap_protect(%p, %lx, %lx, %x)\n", 1501 pmap, sva, eva, prot); 1502 #endif 1503 1504 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1505 pmap_remove(pmap, sva, eva); 1506 return; 1507 } 1508 1509 PMAP_LOCK(pmap); 1510 1511 bits = pte_prot(pmap, prot); 1512 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1513 1514 l1pte = pmap_l1pte(pmap, sva); 1515 for (; sva < eva; sva = l1eva, l1pte++) { 1516 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1517 if (pmap_pte_v(l1pte)) { 1518 l2pte = pmap_l2pte(pmap, sva, l1pte); 1519 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1520 l2eva = 1521 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1522 if (pmap_pte_v(l2pte)) { 1523 l3pte = pmap_l3pte(pmap, sva, l2pte); 1524 for (; sva < l2eva && sva < eva; 1525 sva += PAGE_SIZE, l3pte++) { 1526 if (pmap_pte_v(l3pte) && 1527 pmap_pte_prot_chg(l3pte, 1528 bits)) { 1529 hadasm = 1530 (pmap_pte_asm(l3pte) 1531 != 0); 1532 pmap_pte_set_prot(l3pte, 1533 bits); 1534 PMAP_INVALIDATE_TLB( 1535 pmap, sva, hadasm, 1536 isactive, cpu_id); 1537 PMAP_TLB_SHOOTDOWN( 1538 pmap, sva, 1539 hadasm ? PG_ASM : 0); 1540 } 1541 } 1542 } 1543 } 1544 } 1545 } 1546 1547 PMAP_TLB_SHOOTNOW(); 1548 1549 if (prot & VM_PROT_EXECUTE) 1550 PMAP_SYNC_ISTREAM(pmap); 1551 1552 PMAP_UNLOCK(pmap); 1553 } 1554 1555 /* 1556 * pmap_enter: [ INTERFACE ] 1557 * 1558 * Insert the given physical page (p) at 1559 * the specified virtual address (v) in the 1560 * target physical map with the protection requested. 1561 * 1562 * If specified, the page will be wired down, meaning 1563 * that the related pte can not be reclaimed. 1564 * 1565 * Note: This is the only routine which MAY NOT lazy-evaluate 1566 * or lose information. That is, this routine must actually 1567 * insert this page into the given map NOW. 1568 */ 1569 int 1570 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1571 { 1572 struct vm_page *pg; /* if != NULL, managed page */ 1573 pt_entry_t *pte, npte, opte; 1574 paddr_t opa; 1575 bool tflush = true; 1576 bool hadasm = false; /* XXX gcc -Wuninitialized */ 1577 bool needisync = false; 1578 bool setisync = false; 1579 bool isactive; 1580 bool wired; 1581 long cpu_id = cpu_number(); 1582 int error = 0; 1583 kmutex_t *lock; 1584 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1585 1586 #ifdef DEBUG 1587 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1588 printf("pmap_enter(%p, %lx, %lx, %x, %x)\n", 1589 pmap, va, pa, prot, flags); 1590 #endif 1591 pg = PHYS_TO_VM_PAGE(pa); 1592 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1593 wired = (flags & PMAP_WIRED) != 0; 1594 1595 /* 1596 * Determine what we need to do about the I-stream. If 1597 * VM_PROT_EXECUTE is set, we mark a user pmap as needing 1598 * an I-sync on the way back out to userspace. We always 1599 * need an immediate I-sync for the kernel pmap. 1600 */ 1601 if (prot & VM_PROT_EXECUTE) { 1602 if (pmap == pmap_kernel()) 1603 needisync = true; 1604 else { 1605 setisync = true; 1606 needisync = (pmap->pm_cpus != 0); 1607 } 1608 } 1609 1610 PMAP_MAP_TO_HEAD_LOCK(); 1611 PMAP_LOCK(pmap); 1612 1613 if (pmap == pmap_kernel()) { 1614 #ifdef DIAGNOSTIC 1615 /* 1616 * Sanity check the virtual address. 1617 */ 1618 if (va < VM_MIN_KERNEL_ADDRESS) 1619 panic("pmap_enter: kernel pmap, invalid va 0x%lx", va); 1620 #endif 1621 pte = PMAP_KERNEL_PTE(va); 1622 } else { 1623 pt_entry_t *l1pte, *l2pte; 1624 1625 #ifdef DIAGNOSTIC 1626 /* 1627 * Sanity check the virtual address. 1628 */ 1629 if (va >= VM_MAXUSER_ADDRESS) 1630 panic("pmap_enter: user pmap, invalid va 0x%lx", va); 1631 #endif 1632 1633 KASSERT(pmap->pm_lev1map != kernel_lev1map); 1634 1635 /* 1636 * Check to see if the level 1 PTE is valid, and 1637 * allocate a new level 2 page table page if it's not. 1638 * A reference will be added to the level 2 table when 1639 * the level 3 table is created. 1640 */ 1641 l1pte = pmap_l1pte(pmap, va); 1642 if (pmap_pte_v(l1pte) == 0) { 1643 pmap_physpage_addref(l1pte); 1644 error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT); 1645 if (error) { 1646 pmap_l1pt_delref(pmap, l1pte, cpu_id); 1647 if (flags & PMAP_CANFAIL) 1648 goto out; 1649 panic("pmap_enter: unable to create L2 PT " 1650 "page"); 1651 } 1652 #ifdef DEBUG 1653 if (pmapdebug & PDB_PTPAGE) 1654 printf("pmap_enter: new level 2 table at " 1655 "0x%lx\n", pmap_pte_pa(l1pte)); 1656 #endif 1657 } 1658 1659 /* 1660 * Check to see if the level 2 PTE is valid, and 1661 * allocate a new level 3 page table page if it's not. 1662 * A reference will be added to the level 3 table when 1663 * the mapping is validated. 1664 */ 1665 l2pte = pmap_l2pte(pmap, va, l1pte); 1666 if (pmap_pte_v(l2pte) == 0) { 1667 pmap_physpage_addref(l2pte); 1668 error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT); 1669 if (error) { 1670 pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id); 1671 if (flags & PMAP_CANFAIL) 1672 goto out; 1673 panic("pmap_enter: unable to create L3 PT " 1674 "page"); 1675 } 1676 #ifdef DEBUG 1677 if (pmapdebug & PDB_PTPAGE) 1678 printf("pmap_enter: new level 3 table at " 1679 "0x%lx\n", pmap_pte_pa(l2pte)); 1680 #endif 1681 } 1682 1683 /* 1684 * Get the PTE that will map the page. 1685 */ 1686 pte = pmap_l3pte(pmap, va, l2pte); 1687 } 1688 1689 /* Remember all of the old PTE; used for TBI check later. */ 1690 opte = *pte; 1691 1692 /* 1693 * Check to see if the old mapping is valid. If not, validate the 1694 * new one immediately. 1695 */ 1696 if (pmap_pte_v(pte) == 0) { 1697 /* 1698 * No need to invalidate the TLB in this case; an invalid 1699 * mapping won't be in the TLB, and a previously valid 1700 * mapping would have been flushed when it was invalidated. 1701 */ 1702 tflush = false; 1703 1704 /* 1705 * No need to synchronize the I-stream, either, for basically 1706 * the same reason. 1707 */ 1708 setisync = needisync = false; 1709 1710 if (pmap != pmap_kernel()) { 1711 /* 1712 * New mappings gain a reference on the level 3 1713 * table. 1714 */ 1715 pmap_physpage_addref(pte); 1716 } 1717 goto validate_enterpv; 1718 } 1719 1720 opa = pmap_pte_pa(pte); 1721 hadasm = (pmap_pte_asm(pte) != 0); 1722 1723 if (opa == pa) { 1724 /* 1725 * Mapping has not changed; must be a protection or 1726 * wiring change. 1727 */ 1728 if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) { 1729 #ifdef DEBUG 1730 if (pmapdebug & PDB_ENTER) 1731 printf("pmap_enter: wiring change -> %d\n", 1732 wired); 1733 #endif 1734 /* 1735 * Adjust the wiring count. 1736 */ 1737 if (wired) 1738 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1739 else 1740 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1741 } 1742 1743 /* 1744 * Set the PTE. 1745 */ 1746 goto validate; 1747 } 1748 1749 /* 1750 * The mapping has changed. We need to invalidate the 1751 * old mapping before creating the new one. 1752 */ 1753 #ifdef DEBUG 1754 if (pmapdebug & PDB_ENTER) 1755 printf("pmap_enter: removing old mapping 0x%lx\n", va); 1756 #endif 1757 if (pmap != pmap_kernel()) { 1758 /* 1759 * Gain an extra reference on the level 3 table. 1760 * pmap_remove_mapping() will delete a reference, 1761 * and we don't want the table to be erroneously 1762 * freed. 1763 */ 1764 pmap_physpage_addref(pte); 1765 } 1766 needisync |= pmap_remove_mapping(pmap, va, pte, true, cpu_id); 1767 1768 validate_enterpv: 1769 /* 1770 * Enter the mapping into the pv_table if appropriate. 1771 */ 1772 if (pg != NULL) { 1773 error = pmap_pv_enter(pmap, pg, va, pte, true); 1774 if (error) { 1775 pmap_l3pt_delref(pmap, va, pte, cpu_id); 1776 if (flags & PMAP_CANFAIL) 1777 goto out; 1778 panic("pmap_enter: unable to enter mapping in PV " 1779 "table"); 1780 } 1781 } 1782 1783 /* 1784 * Increment counters. 1785 */ 1786 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1787 if (wired) 1788 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1789 1790 validate: 1791 /* 1792 * Build the new PTE. 1793 */ 1794 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V; 1795 if (pg != NULL) { 1796 int attrs; 1797 1798 #ifdef DIAGNOSTIC 1799 if ((flags & VM_PROT_ALL) & ~prot) 1800 panic("pmap_enter: access type exceeds prot"); 1801 #endif 1802 lock = pmap_pvh_lock(pg); 1803 mutex_enter(lock); 1804 if (flags & VM_PROT_WRITE) 1805 pg->mdpage.pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 1806 else if (flags & VM_PROT_ALL) 1807 pg->mdpage.pvh_attrs |= PGA_REFERENCED; 1808 attrs = pg->mdpage.pvh_attrs; 1809 mutex_exit(lock); 1810 1811 /* 1812 * Set up referenced/modified emulation for new mapping. 1813 */ 1814 if ((attrs & PGA_REFERENCED) == 0) 1815 npte |= PG_FOR | PG_FOW | PG_FOE; 1816 else if ((attrs & PGA_MODIFIED) == 0) 1817 npte |= PG_FOW; 1818 1819 /* 1820 * Mapping was entered on PV list. 1821 */ 1822 npte |= PG_PVLIST; 1823 } 1824 if (wired) 1825 npte |= PG_WIRED; 1826 #ifdef DEBUG 1827 if (pmapdebug & PDB_ENTER) 1828 printf("pmap_enter: new pte = 0x%lx\n", npte); 1829 #endif 1830 1831 /* 1832 * If the PALcode portion of the new PTE is the same as the 1833 * old PTE, no TBI is necessary. 1834 */ 1835 if (PG_PALCODE(opte) == PG_PALCODE(npte)) 1836 tflush = false; 1837 1838 /* 1839 * Set the new PTE. 1840 */ 1841 PMAP_SET_PTE(pte, npte); 1842 1843 /* 1844 * Invalidate the TLB entry for this VA and any appropriate 1845 * caches. 1846 */ 1847 if (tflush) { 1848 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 1849 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 1850 PMAP_TLB_SHOOTNOW(); 1851 } 1852 if (setisync) 1853 PMAP_SET_NEEDISYNC(pmap); 1854 if (needisync) 1855 PMAP_SYNC_ISTREAM(pmap); 1856 1857 out: 1858 PMAP_UNLOCK(pmap); 1859 PMAP_MAP_TO_HEAD_UNLOCK(); 1860 1861 return error; 1862 } 1863 1864 /* 1865 * pmap_kenter_pa: [ INTERFACE ] 1866 * 1867 * Enter a va -> pa mapping into the kernel pmap without any 1868 * physical->virtual tracking. 1869 * 1870 * Note: no locking is necessary in this function. 1871 */ 1872 void 1873 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1874 { 1875 pt_entry_t *pte, npte; 1876 long cpu_id = cpu_number(); 1877 bool needisync = false; 1878 pmap_t pmap = pmap_kernel(); 1879 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1880 1881 #ifdef DEBUG 1882 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1883 printf("pmap_kenter_pa(%lx, %lx, %x)\n", 1884 va, pa, prot); 1885 #endif 1886 1887 #ifdef DIAGNOSTIC 1888 /* 1889 * Sanity check the virtual address. 1890 */ 1891 if (va < VM_MIN_KERNEL_ADDRESS) 1892 panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va); 1893 #endif 1894 1895 pte = PMAP_KERNEL_PTE(va); 1896 1897 if (pmap_pte_v(pte) == 0) 1898 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1899 if (pmap_pte_w(pte) == 0) 1900 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1901 1902 if ((prot & VM_PROT_EXECUTE) != 0 || pmap_pte_exec(pte)) 1903 needisync = true; 1904 1905 /* 1906 * Build the new PTE. 1907 */ 1908 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) | 1909 PG_V | PG_WIRED; 1910 1911 /* 1912 * Set the new PTE. 1913 */ 1914 PMAP_SET_PTE(pte, npte); 1915 #if defined(MULTIPROCESSOR) 1916 alpha_mb(); /* XXX alpha_wmb()? */ 1917 #endif 1918 1919 /* 1920 * Invalidate the TLB entry for this VA and any appropriate 1921 * caches. 1922 */ 1923 PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id); 1924 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1925 PMAP_TLB_SHOOTNOW(); 1926 1927 if (needisync) 1928 PMAP_SYNC_ISTREAM_KERNEL(); 1929 } 1930 1931 /* 1932 * pmap_kremove: [ INTERFACE ] 1933 * 1934 * Remove a mapping entered with pmap_kenter_pa() starting at va, 1935 * for size bytes (assumed to be page rounded). 1936 */ 1937 void 1938 pmap_kremove(vaddr_t va, vsize_t size) 1939 { 1940 pt_entry_t *pte; 1941 bool needisync = false; 1942 long cpu_id = cpu_number(); 1943 pmap_t pmap = pmap_kernel(); 1944 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1945 1946 #ifdef DEBUG 1947 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1948 printf("pmap_kremove(%lx, %lx)\n", 1949 va, size); 1950 #endif 1951 1952 #ifdef DIAGNOSTIC 1953 if (va < VM_MIN_KERNEL_ADDRESS) 1954 panic("pmap_kremove: user address"); 1955 #endif 1956 1957 for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) { 1958 pte = PMAP_KERNEL_PTE(va); 1959 if (pmap_pte_v(pte)) { 1960 #ifdef DIAGNOSTIC 1961 if (pmap_pte_pv(pte)) 1962 panic("pmap_kremove: PG_PVLIST mapping for " 1963 "0x%lx", va); 1964 #endif 1965 if (pmap_pte_exec(pte)) 1966 needisync = true; 1967 1968 /* Zap the mapping. */ 1969 PMAP_SET_PTE(pte, PG_NV); 1970 #if defined(MULTIPROCESSOR) 1971 alpha_mb(); /* XXX alpha_wmb()? */ 1972 #endif 1973 PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id); 1974 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1975 1976 /* Update stats. */ 1977 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 1978 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1979 } 1980 } 1981 1982 PMAP_TLB_SHOOTNOW(); 1983 1984 if (needisync) 1985 PMAP_SYNC_ISTREAM_KERNEL(); 1986 } 1987 1988 /* 1989 * pmap_unwire: [ INTERFACE ] 1990 * 1991 * Clear the wired attribute for a map/virtual-address pair. 1992 * 1993 * The mapping must already exist in the pmap. 1994 */ 1995 void 1996 pmap_unwire(pmap_t pmap, vaddr_t va) 1997 { 1998 pt_entry_t *pte; 1999 2000 #ifdef DEBUG 2001 if (pmapdebug & PDB_FOLLOW) 2002 printf("pmap_unwire(%p, %lx)\n", pmap, va); 2003 #endif 2004 2005 PMAP_LOCK(pmap); 2006 2007 pte = pmap_l3pte(pmap, va, NULL); 2008 #ifdef DIAGNOSTIC 2009 if (pte == NULL || pmap_pte_v(pte) == 0) 2010 panic("pmap_unwire"); 2011 #endif 2012 2013 /* 2014 * If wiring actually changed (always?) clear the wire bit and 2015 * update the wire count. Note that wiring is not a hardware 2016 * characteristic so there is no need to invalidate the TLB. 2017 */ 2018 if (pmap_pte_w_chg(pte, 0)) { 2019 pmap_pte_set_w(pte, false); 2020 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2021 } 2022 #ifdef DIAGNOSTIC 2023 else { 2024 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 2025 "didn't change!\n", pmap, va); 2026 } 2027 #endif 2028 2029 PMAP_UNLOCK(pmap); 2030 } 2031 2032 /* 2033 * pmap_extract: [ INTERFACE ] 2034 * 2035 * Extract the physical address associated with the given 2036 * pmap/virtual address pair. 2037 */ 2038 bool 2039 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap) 2040 { 2041 pt_entry_t *l1pte, *l2pte, *l3pte; 2042 paddr_t pa; 2043 2044 #ifdef DEBUG 2045 if (pmapdebug & PDB_FOLLOW) 2046 printf("pmap_extract(%p, %lx) -> ", pmap, va); 2047 #endif 2048 2049 /* 2050 * Take a faster path for the kernel pmap. Avoids locking, 2051 * handles K0SEG. 2052 */ 2053 if (pmap == pmap_kernel()) { 2054 pa = vtophys(va); 2055 if (pap != NULL) 2056 *pap = pa; 2057 #ifdef DEBUG 2058 if (pmapdebug & PDB_FOLLOW) 2059 printf("0x%lx (kernel vtophys)\n", pa); 2060 #endif 2061 return (pa != 0); /* XXX */ 2062 } 2063 2064 PMAP_LOCK(pmap); 2065 2066 l1pte = pmap_l1pte(pmap, va); 2067 if (pmap_pte_v(l1pte) == 0) 2068 goto out; 2069 2070 l2pte = pmap_l2pte(pmap, va, l1pte); 2071 if (pmap_pte_v(l2pte) == 0) 2072 goto out; 2073 2074 l3pte = pmap_l3pte(pmap, va, l2pte); 2075 if (pmap_pte_v(l3pte) == 0) 2076 goto out; 2077 2078 pa = pmap_pte_pa(l3pte) | (va & PGOFSET); 2079 PMAP_UNLOCK(pmap); 2080 if (pap != NULL) 2081 *pap = pa; 2082 #ifdef DEBUG 2083 if (pmapdebug & PDB_FOLLOW) 2084 printf("0x%lx\n", pa); 2085 #endif 2086 return (true); 2087 2088 out: 2089 PMAP_UNLOCK(pmap); 2090 #ifdef DEBUG 2091 if (pmapdebug & PDB_FOLLOW) 2092 printf("failed\n"); 2093 #endif 2094 return (false); 2095 } 2096 2097 /* 2098 * pmap_copy: [ INTERFACE ] 2099 * 2100 * Copy the mapping range specified by src_addr/len 2101 * from the source map to the range dst_addr/len 2102 * in the destination map. 2103 * 2104 * This routine is only advisory and need not do anything. 2105 */ 2106 /* call deleted in <machine/pmap.h> */ 2107 2108 /* 2109 * pmap_update: [ INTERFACE ] 2110 * 2111 * Require that all active physical maps contain no 2112 * incorrect entries NOW, by processing any deferred 2113 * pmap operations. 2114 */ 2115 /* call deleted in <machine/pmap.h> */ 2116 2117 /* 2118 * pmap_activate: [ INTERFACE ] 2119 * 2120 * Activate the pmap used by the specified process. This includes 2121 * reloading the MMU context if the current process, and marking 2122 * the pmap in use by the processor. 2123 */ 2124 void 2125 pmap_activate(struct lwp *l) 2126 { 2127 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2128 long cpu_id = cpu_number(); 2129 2130 #ifdef DEBUG 2131 if (pmapdebug & PDB_FOLLOW) 2132 printf("pmap_activate(%p)\n", l); 2133 #endif 2134 2135 /* Mark the pmap in use by this processor. */ 2136 atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id)); 2137 2138 /* Allocate an ASN. */ 2139 pmap_asn_alloc(pmap, cpu_id); 2140 2141 PMAP_ACTIVATE(pmap, l, cpu_id); 2142 } 2143 2144 /* 2145 * pmap_deactivate: [ INTERFACE ] 2146 * 2147 * Mark that the pmap used by the specified process is no longer 2148 * in use by the processor. 2149 * 2150 * The comment above pmap_activate() wrt. locking applies here, 2151 * as well. Note that we use only a single `atomic' operation, 2152 * so no locking is necessary. 2153 */ 2154 void 2155 pmap_deactivate(struct lwp *l) 2156 { 2157 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2158 2159 #ifdef DEBUG 2160 if (pmapdebug & PDB_FOLLOW) 2161 printf("pmap_deactivate(%p)\n", l); 2162 #endif 2163 2164 /* 2165 * Mark the pmap no longer in use by this processor. 2166 */ 2167 atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number())); 2168 } 2169 2170 /* 2171 * pmap_zero_page: [ INTERFACE ] 2172 * 2173 * Zero the specified (machine independent) page by mapping the page 2174 * into virtual memory and clear its contents, one machine dependent 2175 * page at a time. 2176 * 2177 * Note: no locking is necessary in this function. 2178 */ 2179 void 2180 pmap_zero_page(paddr_t phys) 2181 { 2182 u_long *p0, *p1, *pend; 2183 2184 #ifdef DEBUG 2185 if (pmapdebug & PDB_FOLLOW) 2186 printf("pmap_zero_page(%lx)\n", phys); 2187 #endif 2188 2189 p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys); 2190 p1 = NULL; 2191 pend = (u_long *)((u_long)p0 + PAGE_SIZE); 2192 2193 /* 2194 * Unroll the loop a bit, doing 16 quadwords per iteration. 2195 * Do only 8 back-to-back stores, and alternate registers. 2196 */ 2197 do { 2198 __asm volatile( 2199 "# BEGIN loop body\n" 2200 " addq %2, (8 * 8), %1 \n" 2201 " stq $31, (0 * 8)(%0) \n" 2202 " stq $31, (1 * 8)(%0) \n" 2203 " stq $31, (2 * 8)(%0) \n" 2204 " stq $31, (3 * 8)(%0) \n" 2205 " stq $31, (4 * 8)(%0) \n" 2206 " stq $31, (5 * 8)(%0) \n" 2207 " stq $31, (6 * 8)(%0) \n" 2208 " stq $31, (7 * 8)(%0) \n" 2209 " \n" 2210 " addq %3, (8 * 8), %0 \n" 2211 " stq $31, (0 * 8)(%1) \n" 2212 " stq $31, (1 * 8)(%1) \n" 2213 " stq $31, (2 * 8)(%1) \n" 2214 " stq $31, (3 * 8)(%1) \n" 2215 " stq $31, (4 * 8)(%1) \n" 2216 " stq $31, (5 * 8)(%1) \n" 2217 " stq $31, (6 * 8)(%1) \n" 2218 " stq $31, (7 * 8)(%1) \n" 2219 " # END loop body" 2220 : "=r" (p0), "=r" (p1) 2221 : "0" (p0), "1" (p1) 2222 : "memory"); 2223 } while (p0 < pend); 2224 } 2225 2226 /* 2227 * pmap_copy_page: [ INTERFACE ] 2228 * 2229 * Copy the specified (machine independent) page by mapping the page 2230 * into virtual memory and using memcpy to copy the page, one machine 2231 * dependent page at a time. 2232 * 2233 * Note: no locking is necessary in this function. 2234 */ 2235 void 2236 pmap_copy_page(paddr_t src, paddr_t dst) 2237 { 2238 const void *s; 2239 void *d; 2240 2241 #ifdef DEBUG 2242 if (pmapdebug & PDB_FOLLOW) 2243 printf("pmap_copy_page(%lx, %lx)\n", src, dst); 2244 #endif 2245 s = (const void *)ALPHA_PHYS_TO_K0SEG(src); 2246 d = (void *)ALPHA_PHYS_TO_K0SEG(dst); 2247 memcpy(d, s, PAGE_SIZE); 2248 } 2249 2250 /* 2251 * pmap_pageidlezero: [ INTERFACE ] 2252 * 2253 * Page zero'er for the idle loop. Returns true if the 2254 * page was zero'd, FLASE if we aborted for some reason. 2255 */ 2256 bool 2257 pmap_pageidlezero(paddr_t pa) 2258 { 2259 u_long *ptr; 2260 int i, cnt = PAGE_SIZE / sizeof(u_long); 2261 2262 for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) { 2263 if (sched_curcpu_runnable_p()) { 2264 /* 2265 * An LWP has become ready. Abort now, 2266 * so we don't keep it waiting while we 2267 * finish zeroing the page. 2268 */ 2269 return (false); 2270 } 2271 *ptr++ = 0; 2272 } 2273 2274 return (true); 2275 } 2276 2277 /* 2278 * pmap_clear_modify: [ INTERFACE ] 2279 * 2280 * Clear the modify bits on the specified physical page. 2281 */ 2282 bool 2283 pmap_clear_modify(struct vm_page *pg) 2284 { 2285 bool rv = false; 2286 long cpu_id = cpu_number(); 2287 kmutex_t *lock; 2288 2289 #ifdef DEBUG 2290 if (pmapdebug & PDB_FOLLOW) 2291 printf("pmap_clear_modify(%p)\n", pg); 2292 #endif 2293 2294 PMAP_HEAD_TO_MAP_LOCK(); 2295 lock = pmap_pvh_lock(pg); 2296 mutex_enter(lock); 2297 2298 if (pg->mdpage.pvh_attrs & PGA_MODIFIED) { 2299 rv = true; 2300 pmap_changebit(pg, PG_FOW, ~0, cpu_id); 2301 pg->mdpage.pvh_attrs &= ~PGA_MODIFIED; 2302 } 2303 2304 mutex_exit(lock); 2305 PMAP_HEAD_TO_MAP_UNLOCK(); 2306 2307 return (rv); 2308 } 2309 2310 /* 2311 * pmap_clear_reference: [ INTERFACE ] 2312 * 2313 * Clear the reference bit on the specified physical page. 2314 */ 2315 bool 2316 pmap_clear_reference(struct vm_page *pg) 2317 { 2318 bool rv = false; 2319 long cpu_id = cpu_number(); 2320 kmutex_t *lock; 2321 2322 #ifdef DEBUG 2323 if (pmapdebug & PDB_FOLLOW) 2324 printf("pmap_clear_reference(%p)\n", pg); 2325 #endif 2326 2327 PMAP_HEAD_TO_MAP_LOCK(); 2328 lock = pmap_pvh_lock(pg); 2329 mutex_enter(lock); 2330 2331 if (pg->mdpage.pvh_attrs & PGA_REFERENCED) { 2332 rv = true; 2333 pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id); 2334 pg->mdpage.pvh_attrs &= ~PGA_REFERENCED; 2335 } 2336 2337 mutex_exit(lock); 2338 PMAP_HEAD_TO_MAP_UNLOCK(); 2339 2340 return (rv); 2341 } 2342 2343 /* 2344 * pmap_is_referenced: [ INTERFACE ] 2345 * 2346 * Return whether or not the specified physical page is referenced 2347 * by any physical maps. 2348 */ 2349 /* See <machine/pmap.h> */ 2350 2351 /* 2352 * pmap_is_modified: [ INTERFACE ] 2353 * 2354 * Return whether or not the specified physical page is modified 2355 * by any physical maps. 2356 */ 2357 /* See <machine/pmap.h> */ 2358 2359 /* 2360 * pmap_phys_address: [ INTERFACE ] 2361 * 2362 * Return the physical address corresponding to the specified 2363 * cookie. Used by the device pager to decode a device driver's 2364 * mmap entry point return value. 2365 * 2366 * Note: no locking is necessary in this function. 2367 */ 2368 paddr_t 2369 pmap_phys_address(paddr_t ppn) 2370 { 2371 2372 return (alpha_ptob(ppn)); 2373 } 2374 2375 /* 2376 * Miscellaneous support routines follow 2377 */ 2378 2379 /* 2380 * alpha_protection_init: 2381 * 2382 * Initialize Alpha protection code array. 2383 * 2384 * Note: no locking is necessary in this function. 2385 */ 2386 static void 2387 alpha_protection_init(void) 2388 { 2389 int prot, *kp, *up; 2390 2391 kp = protection_codes[0]; 2392 up = protection_codes[1]; 2393 2394 for (prot = 0; prot < 8; prot++) { 2395 kp[prot] = PG_ASM; 2396 up[prot] = 0; 2397 2398 if (prot & VM_PROT_READ) { 2399 kp[prot] |= PG_KRE; 2400 up[prot] |= PG_KRE | PG_URE; 2401 } 2402 if (prot & VM_PROT_WRITE) { 2403 kp[prot] |= PG_KWE; 2404 up[prot] |= PG_KWE | PG_UWE; 2405 } 2406 if (prot & VM_PROT_EXECUTE) { 2407 kp[prot] |= PG_EXEC | PG_KRE; 2408 up[prot] |= PG_EXEC | PG_KRE | PG_URE; 2409 } else { 2410 kp[prot] |= PG_FOE; 2411 up[prot] |= PG_FOE; 2412 } 2413 } 2414 } 2415 2416 /* 2417 * pmap_remove_mapping: 2418 * 2419 * Invalidate a single page denoted by pmap/va. 2420 * 2421 * If (pte != NULL), it is the already computed PTE for the page. 2422 * 2423 * Note: locking in this function is complicated by the fact 2424 * that we can be called when the PV list is already locked. 2425 * (pmap_page_protect()). In this case, the caller must be 2426 * careful to get the next PV entry while we remove this entry 2427 * from beneath it. We assume that the pmap itself is already 2428 * locked; dolock applies only to the PV list. 2429 * 2430 * Returns true or false, indicating if an I-stream sync needs 2431 * to be initiated (for this CPU or for other CPUs). 2432 */ 2433 static bool 2434 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte, 2435 bool dolock, long cpu_id) 2436 { 2437 paddr_t pa; 2438 struct vm_page *pg; /* if != NULL, page is managed */ 2439 bool onpv; 2440 bool hadasm; 2441 bool isactive; 2442 bool needisync = false; 2443 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2444 2445 #ifdef DEBUG 2446 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 2447 printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n", 2448 pmap, va, pte, dolock, cpu_id); 2449 #endif 2450 2451 /* 2452 * PTE not provided, compute it from pmap and va. 2453 */ 2454 if (pte == NULL) { 2455 pte = pmap_l3pte(pmap, va, NULL); 2456 if (pmap_pte_v(pte) == 0) 2457 return (false); 2458 } 2459 2460 pa = pmap_pte_pa(pte); 2461 onpv = (pmap_pte_pv(pte) != 0); 2462 hadasm = (pmap_pte_asm(pte) != 0); 2463 isactive = PMAP_ISACTIVE(pmap, cpu_id); 2464 2465 /* 2466 * Determine what we need to do about the I-stream. If 2467 * PG_EXEC was set, we mark a user pmap as needing an 2468 * I-sync on the way out to userspace. We always need 2469 * an immediate I-sync for the kernel pmap. 2470 */ 2471 if (pmap_pte_exec(pte)) { 2472 if (pmap == pmap_kernel()) 2473 needisync = true; 2474 else { 2475 PMAP_SET_NEEDISYNC(pmap); 2476 needisync = (pmap->pm_cpus != 0); 2477 } 2478 } 2479 2480 /* 2481 * Update statistics 2482 */ 2483 if (pmap_pte_w(pte)) 2484 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2485 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 2486 2487 /* 2488 * Invalidate the PTE after saving the reference modify info. 2489 */ 2490 #ifdef DEBUG 2491 if (pmapdebug & PDB_REMOVE) 2492 printf("remove: invalidating pte at %p\n", pte); 2493 #endif 2494 PMAP_SET_PTE(pte, PG_NV); 2495 2496 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 2497 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 2498 PMAP_TLB_SHOOTNOW(); 2499 2500 /* 2501 * If we're removing a user mapping, check to see if we 2502 * can free page table pages. 2503 */ 2504 if (pmap != pmap_kernel()) { 2505 /* 2506 * Delete the reference on the level 3 table. It will 2507 * delete references on the level 2 and 1 tables as 2508 * appropriate. 2509 */ 2510 pmap_l3pt_delref(pmap, va, pte, cpu_id); 2511 } 2512 2513 /* 2514 * If the mapping wasn't entered on the PV list, we're all done. 2515 */ 2516 if (onpv == false) 2517 return (needisync); 2518 2519 /* 2520 * Remove it from the PV table. 2521 */ 2522 pg = PHYS_TO_VM_PAGE(pa); 2523 KASSERT(pg != NULL); 2524 pmap_pv_remove(pmap, pg, va, dolock); 2525 2526 return (needisync); 2527 } 2528 2529 /* 2530 * pmap_changebit: 2531 * 2532 * Set or clear the specified PTE bits for all mappings on the 2533 * specified page. 2534 * 2535 * Note: we assume that the pv_head is already locked, and that 2536 * the caller has acquired a PV->pmap mutex so that we can lock 2537 * the pmaps as we encounter them. 2538 */ 2539 static void 2540 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, long cpu_id) 2541 { 2542 pv_entry_t pv; 2543 pt_entry_t *pte, npte; 2544 vaddr_t va; 2545 bool hadasm, isactive; 2546 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2547 2548 #ifdef DEBUG 2549 if (pmapdebug & PDB_BITS) 2550 printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n", 2551 pg, set, mask); 2552 #endif 2553 2554 /* 2555 * Loop over all current mappings setting/clearing as apropos. 2556 */ 2557 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) { 2558 va = pv->pv_va; 2559 2560 PMAP_LOCK(pv->pv_pmap); 2561 2562 pte = pv->pv_pte; 2563 npte = (*pte | set) & mask; 2564 if (*pte != npte) { 2565 hadasm = (pmap_pte_asm(pte) != 0); 2566 isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id); 2567 PMAP_SET_PTE(pte, npte); 2568 PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive, 2569 cpu_id); 2570 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va, 2571 hadasm ? PG_ASM : 0); 2572 } 2573 PMAP_UNLOCK(pv->pv_pmap); 2574 } 2575 2576 PMAP_TLB_SHOOTNOW(); 2577 } 2578 2579 /* 2580 * pmap_emulate_reference: 2581 * 2582 * Emulate reference and/or modified bit hits. 2583 * Return 1 if this was an execute fault on a non-exec mapping, 2584 * otherwise return 0. 2585 */ 2586 int 2587 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type) 2588 { 2589 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2590 pt_entry_t faultoff, *pte; 2591 struct vm_page *pg; 2592 paddr_t pa; 2593 bool didlock = false; 2594 bool exec = false; 2595 long cpu_id = cpu_number(); 2596 kmutex_t *lock; 2597 2598 #ifdef DEBUG 2599 if (pmapdebug & PDB_FOLLOW) 2600 printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n", 2601 l, v, user, type); 2602 #endif 2603 2604 /* 2605 * Convert process and virtual address to physical address. 2606 */ 2607 if (v >= VM_MIN_KERNEL_ADDRESS) { 2608 if (user) 2609 panic("pmap_emulate_reference: user ref to kernel"); 2610 /* 2611 * No need to lock here; kernel PT pages never go away. 2612 */ 2613 pte = PMAP_KERNEL_PTE(v); 2614 } else { 2615 #ifdef DIAGNOSTIC 2616 if (l == NULL) 2617 panic("pmap_emulate_reference: bad proc"); 2618 if (l->l_proc->p_vmspace == NULL) 2619 panic("pmap_emulate_reference: bad p_vmspace"); 2620 #endif 2621 PMAP_LOCK(pmap); 2622 didlock = true; 2623 pte = pmap_l3pte(pmap, v, NULL); 2624 /* 2625 * We'll unlock below where we're done with the PTE. 2626 */ 2627 } 2628 exec = pmap_pte_exec(pte); 2629 if (!exec && type == ALPHA_MMCSR_FOE) { 2630 if (didlock) 2631 PMAP_UNLOCK(pmap); 2632 return (1); 2633 } 2634 #ifdef DEBUG 2635 if (pmapdebug & PDB_FOLLOW) { 2636 printf("\tpte = %p, ", pte); 2637 printf("*pte = 0x%lx\n", *pte); 2638 } 2639 #endif 2640 #ifdef DEBUG /* These checks are more expensive */ 2641 if (!pmap_pte_v(pte)) 2642 panic("pmap_emulate_reference: invalid pte"); 2643 if (type == ALPHA_MMCSR_FOW) { 2644 if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE))) 2645 panic("pmap_emulate_reference: write but unwritable"); 2646 if (!(*pte & PG_FOW)) 2647 panic("pmap_emulate_reference: write but not FOW"); 2648 } else { 2649 if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE))) 2650 panic("pmap_emulate_reference: !write but unreadable"); 2651 if (!(*pte & (PG_FOR | PG_FOE))) 2652 panic("pmap_emulate_reference: !write but not FOR|FOE"); 2653 } 2654 /* Other diagnostics? */ 2655 #endif 2656 pa = pmap_pte_pa(pte); 2657 2658 /* 2659 * We're now done with the PTE. If it was a user pmap, unlock 2660 * it now. 2661 */ 2662 if (didlock) 2663 PMAP_UNLOCK(pmap); 2664 2665 #ifdef DEBUG 2666 if (pmapdebug & PDB_FOLLOW) 2667 printf("\tpa = 0x%lx\n", pa); 2668 #endif 2669 #ifdef DIAGNOSTIC 2670 if (!uvm_pageismanaged(pa)) 2671 panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): " 2672 "pa 0x%lx not managed", l, v, user, type, pa); 2673 #endif 2674 2675 /* 2676 * Twiddle the appropriate bits to reflect the reference 2677 * and/or modification.. 2678 * 2679 * The rules: 2680 * (1) always mark page as used, and 2681 * (2) if it was a write fault, mark page as modified. 2682 */ 2683 pg = PHYS_TO_VM_PAGE(pa); 2684 2685 PMAP_HEAD_TO_MAP_LOCK(); 2686 lock = pmap_pvh_lock(pg); 2687 mutex_enter(lock); 2688 2689 if (type == ALPHA_MMCSR_FOW) { 2690 pg->mdpage.pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 2691 faultoff = PG_FOR | PG_FOW; 2692 } else { 2693 pg->mdpage.pvh_attrs |= PGA_REFERENCED; 2694 faultoff = PG_FOR; 2695 if (exec) { 2696 faultoff |= PG_FOE; 2697 } 2698 } 2699 pmap_changebit(pg, 0, ~faultoff, cpu_id); 2700 2701 mutex_exit(lock); 2702 PMAP_HEAD_TO_MAP_UNLOCK(); 2703 return (0); 2704 } 2705 2706 #ifdef DEBUG 2707 /* 2708 * pmap_pv_dump: 2709 * 2710 * Dump the physical->virtual data for the specified page. 2711 */ 2712 void 2713 pmap_pv_dump(paddr_t pa) 2714 { 2715 struct vm_page *pg; 2716 pv_entry_t pv; 2717 kmutex_t *lock; 2718 2719 pg = PHYS_TO_VM_PAGE(pa); 2720 2721 lock = pmap_pvh_lock(pg); 2722 mutex_enter(lock); 2723 2724 printf("pa 0x%lx (attrs = 0x%x):\n", pa, pg->mdpage.pvh_attrs); 2725 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) 2726 printf(" pmap %p, va 0x%lx\n", 2727 pv->pv_pmap, pv->pv_va); 2728 printf("\n"); 2729 2730 mutex_exit(lock); 2731 } 2732 #endif 2733 2734 /* 2735 * vtophys: 2736 * 2737 * Return the physical address corresponding to the K0SEG or 2738 * K1SEG address provided. 2739 * 2740 * Note: no locking is necessary in this function. 2741 */ 2742 paddr_t 2743 vtophys(vaddr_t vaddr) 2744 { 2745 pt_entry_t *pte; 2746 paddr_t paddr = 0; 2747 2748 if (vaddr < ALPHA_K0SEG_BASE) 2749 printf("vtophys: invalid vaddr 0x%lx", vaddr); 2750 else if (vaddr <= ALPHA_K0SEG_END) 2751 paddr = ALPHA_K0SEG_TO_PHYS(vaddr); 2752 else { 2753 pte = PMAP_KERNEL_PTE(vaddr); 2754 if (pmap_pte_v(pte)) 2755 paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET); 2756 } 2757 2758 #if 0 2759 printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr); 2760 #endif 2761 2762 return (paddr); 2763 } 2764 2765 /******************** pv_entry management ********************/ 2766 2767 /* 2768 * pmap_pv_enter: 2769 * 2770 * Add a physical->virtual entry to the pv_table. 2771 */ 2772 static int 2773 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte, 2774 bool dolock) 2775 { 2776 pv_entry_t newpv; 2777 kmutex_t *lock; 2778 2779 /* 2780 * Allocate and fill in the new pv_entry. 2781 */ 2782 newpv = pmap_pv_alloc(); 2783 if (newpv == NULL) 2784 return ENOMEM; 2785 newpv->pv_va = va; 2786 newpv->pv_pmap = pmap; 2787 newpv->pv_pte = pte; 2788 2789 if (dolock) { 2790 lock = pmap_pvh_lock(pg); 2791 mutex_enter(lock); 2792 } 2793 2794 #ifdef DEBUG 2795 { 2796 pv_entry_t pv; 2797 /* 2798 * Make sure the entry doesn't already exist. 2799 */ 2800 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) { 2801 if (pmap == pv->pv_pmap && va == pv->pv_va) { 2802 printf("pmap = %p, va = 0x%lx\n", pmap, va); 2803 panic("pmap_pv_enter: already in pv table"); 2804 } 2805 } 2806 } 2807 #endif 2808 2809 /* 2810 * ...and put it in the list. 2811 */ 2812 newpv->pv_next = pg->mdpage.pvh_list; 2813 pg->mdpage.pvh_list = newpv; 2814 2815 if (dolock) { 2816 mutex_exit(lock); 2817 } 2818 2819 return 0; 2820 } 2821 2822 /* 2823 * pmap_pv_remove: 2824 * 2825 * Remove a physical->virtual entry from the pv_table. 2826 */ 2827 static void 2828 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock) 2829 { 2830 pv_entry_t pv, *pvp; 2831 kmutex_t *lock; 2832 2833 if (dolock) { 2834 lock = pmap_pvh_lock(pg); 2835 mutex_enter(lock); 2836 } else { 2837 lock = NULL; /* XXX stupid gcc */ 2838 } 2839 2840 /* 2841 * Find the entry to remove. 2842 */ 2843 for (pvp = &pg->mdpage.pvh_list, pv = *pvp; 2844 pv != NULL; pvp = &pv->pv_next, pv = *pvp) 2845 if (pmap == pv->pv_pmap && va == pv->pv_va) 2846 break; 2847 2848 #ifdef DEBUG 2849 if (pv == NULL) 2850 panic("pmap_pv_remove: not in pv table"); 2851 #endif 2852 2853 *pvp = pv->pv_next; 2854 2855 if (dolock) { 2856 mutex_exit(lock); 2857 } 2858 2859 pmap_pv_free(pv); 2860 } 2861 2862 /* 2863 * pmap_pv_page_alloc: 2864 * 2865 * Allocate a page for the pv_entry pool. 2866 */ 2867 static void * 2868 pmap_pv_page_alloc(struct pool *pp, int flags) 2869 { 2870 paddr_t pg; 2871 2872 if (pmap_physpage_alloc(PGU_PVENT, &pg)) 2873 return ((void *)ALPHA_PHYS_TO_K0SEG(pg)); 2874 return (NULL); 2875 } 2876 2877 /* 2878 * pmap_pv_page_free: 2879 * 2880 * Free a pv_entry pool page. 2881 */ 2882 static void 2883 pmap_pv_page_free(struct pool *pp, void *v) 2884 { 2885 2886 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v)); 2887 } 2888 2889 /******************** misc. functions ********************/ 2890 2891 /* 2892 * pmap_physpage_alloc: 2893 * 2894 * Allocate a single page from the VM system and return the 2895 * physical address for that page. 2896 */ 2897 static bool 2898 pmap_physpage_alloc(int usage, paddr_t *pap) 2899 { 2900 struct vm_page *pg; 2901 paddr_t pa; 2902 2903 /* 2904 * Don't ask for a zero'd page in the L1PT case -- we will 2905 * properly initialize it in the constructor. 2906 */ 2907 2908 pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ? 2909 UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO); 2910 if (pg != NULL) { 2911 pa = VM_PAGE_TO_PHYS(pg); 2912 #ifdef DEBUG 2913 if (pg->mdpage.pvh_refcnt != 0) { 2914 printf("pmap_physpage_alloc: page 0x%lx has " 2915 "%d references\n", pa, pg->mdpage.pvh_refcnt); 2916 panic("pmap_physpage_alloc"); 2917 } 2918 #endif 2919 *pap = pa; 2920 return (true); 2921 } 2922 return (false); 2923 } 2924 2925 /* 2926 * pmap_physpage_free: 2927 * 2928 * Free the single page table page at the specified physical address. 2929 */ 2930 static void 2931 pmap_physpage_free(paddr_t pa) 2932 { 2933 struct vm_page *pg; 2934 2935 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) 2936 panic("pmap_physpage_free: bogus physical page address"); 2937 2938 #ifdef DEBUG 2939 if (pg->mdpage.pvh_refcnt != 0) 2940 panic("pmap_physpage_free: page still has references"); 2941 #endif 2942 2943 uvm_pagefree(pg); 2944 } 2945 2946 /* 2947 * pmap_physpage_addref: 2948 * 2949 * Add a reference to the specified special use page. 2950 */ 2951 static int 2952 pmap_physpage_addref(void *kva) 2953 { 2954 struct vm_page *pg; 2955 paddr_t pa; 2956 2957 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2958 pg = PHYS_TO_VM_PAGE(pa); 2959 2960 KASSERT((int)pg->mdpage.pvh_refcnt >= 0); 2961 2962 return atomic_inc_uint_nv(&pg->mdpage.pvh_refcnt); 2963 } 2964 2965 /* 2966 * pmap_physpage_delref: 2967 * 2968 * Delete a reference to the specified special use page. 2969 */ 2970 static int 2971 pmap_physpage_delref(void *kva) 2972 { 2973 struct vm_page *pg; 2974 paddr_t pa; 2975 2976 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2977 pg = PHYS_TO_VM_PAGE(pa); 2978 2979 KASSERT((int)pg->mdpage.pvh_refcnt > 0); 2980 2981 return atomic_dec_uint_nv(&pg->mdpage.pvh_refcnt); 2982 } 2983 2984 /******************** page table page management ********************/ 2985 2986 /* 2987 * pmap_growkernel: [ INTERFACE ] 2988 * 2989 * Grow the kernel address space. This is a hint from the 2990 * upper layer to pre-allocate more kernel PT pages. 2991 */ 2992 vaddr_t 2993 pmap_growkernel(vaddr_t maxkvaddr) 2994 { 2995 struct pmap *kpm = pmap_kernel(), *pm; 2996 paddr_t ptaddr; 2997 pt_entry_t *l1pte, *l2pte, pte; 2998 vaddr_t va; 2999 int l1idx; 3000 3001 rw_enter(&pmap_growkernel_lock, RW_WRITER); 3002 3003 if (maxkvaddr <= virtual_end) 3004 goto out; /* we are OK */ 3005 3006 va = virtual_end; 3007 3008 while (va < maxkvaddr) { 3009 /* 3010 * If there is no valid L1 PTE (i.e. no L2 PT page), 3011 * allocate a new L2 PT page and insert it into the 3012 * L1 map. 3013 */ 3014 l1pte = pmap_l1pte(kpm, va); 3015 if (pmap_pte_v(l1pte) == 0) { 3016 /* 3017 * XXX PGU_NORMAL? It's not a "traditional" PT page. 3018 */ 3019 if (uvm.page_init_done == false) { 3020 /* 3021 * We're growing the kernel pmap early (from 3022 * uvm_pageboot_alloc()). This case must 3023 * be handled a little differently. 3024 */ 3025 ptaddr = ALPHA_K0SEG_TO_PHYS( 3026 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3027 } else if (pmap_physpage_alloc(PGU_NORMAL, 3028 &ptaddr) == false) 3029 goto die; 3030 pte = (atop(ptaddr) << PG_SHIFT) | 3031 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3032 *l1pte = pte; 3033 3034 l1idx = l1pte_index(va); 3035 3036 /* Update all the user pmaps. */ 3037 mutex_enter(&pmap_all_pmaps_lock); 3038 for (pm = TAILQ_FIRST(&pmap_all_pmaps); 3039 pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) { 3040 /* Skip the kernel pmap. */ 3041 if (pm == pmap_kernel()) 3042 continue; 3043 3044 PMAP_LOCK(pm); 3045 if (pm->pm_lev1map == kernel_lev1map) { 3046 PMAP_UNLOCK(pm); 3047 continue; 3048 } 3049 pm->pm_lev1map[l1idx] = pte; 3050 PMAP_UNLOCK(pm); 3051 } 3052 mutex_exit(&pmap_all_pmaps_lock); 3053 } 3054 3055 /* 3056 * Have an L2 PT page now, add the L3 PT page. 3057 */ 3058 l2pte = pmap_l2pte(kpm, va, l1pte); 3059 KASSERT(pmap_pte_v(l2pte) == 0); 3060 if (uvm.page_init_done == false) { 3061 /* 3062 * See above. 3063 */ 3064 ptaddr = ALPHA_K0SEG_TO_PHYS( 3065 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3066 } else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == false) 3067 goto die; 3068 *l2pte = (atop(ptaddr) << PG_SHIFT) | 3069 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3070 va += ALPHA_L2SEG_SIZE; 3071 } 3072 3073 /* Invalidate the L1 PT cache. */ 3074 pool_cache_invalidate(&pmap_l1pt_cache); 3075 3076 virtual_end = va; 3077 3078 out: 3079 rw_exit(&pmap_growkernel_lock); 3080 3081 return (virtual_end); 3082 3083 die: 3084 panic("pmap_growkernel: out of memory"); 3085 } 3086 3087 /* 3088 * pmap_lev1map_create: 3089 * 3090 * Create a new level 1 page table for the specified pmap. 3091 * 3092 * Note: growkernel must already be held and the pmap either 3093 * already locked or unreferenced globally. 3094 */ 3095 static int 3096 pmap_lev1map_create(pmap_t pmap, long cpu_id) 3097 { 3098 pt_entry_t *l1pt; 3099 3100 KASSERT(pmap != pmap_kernel()); 3101 3102 KASSERT(pmap->pm_lev1map == kernel_lev1map); 3103 KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED); 3104 3105 /* Don't sleep -- we're called with locks held. */ 3106 l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT); 3107 if (l1pt == NULL) 3108 return (ENOMEM); 3109 3110 pmap->pm_lev1map = l1pt; 3111 return (0); 3112 } 3113 3114 /* 3115 * pmap_lev1map_destroy: 3116 * 3117 * Destroy the level 1 page table for the specified pmap. 3118 * 3119 * Note: growkernel must be held and the pmap must already be 3120 * locked or not globally referenced. 3121 */ 3122 static void 3123 pmap_lev1map_destroy(pmap_t pmap, long cpu_id) 3124 { 3125 pt_entry_t *l1pt = pmap->pm_lev1map; 3126 3127 KASSERT(pmap != pmap_kernel()); 3128 3129 /* 3130 * Go back to referencing the global kernel_lev1map. 3131 */ 3132 pmap->pm_lev1map = kernel_lev1map; 3133 3134 /* 3135 * Free the old level 1 page table page. 3136 */ 3137 pool_cache_put(&pmap_l1pt_cache, l1pt); 3138 } 3139 3140 /* 3141 * pmap_l1pt_ctor: 3142 * 3143 * Pool cache constructor for L1 PT pages. 3144 * 3145 * Note: The growkernel lock is held across allocations 3146 * from our pool_cache, so we don't need to acquire it 3147 * ourselves. 3148 */ 3149 static int 3150 pmap_l1pt_ctor(void *arg, void *object, int flags) 3151 { 3152 pt_entry_t *l1pt = object, pte; 3153 int i; 3154 3155 /* 3156 * Initialize the new level 1 table by zeroing the 3157 * user portion and copying the kernel mappings into 3158 * the kernel portion. 3159 */ 3160 for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++) 3161 l1pt[i] = 0; 3162 3163 for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS); 3164 i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++) 3165 l1pt[i] = kernel_lev1map[i]; 3166 3167 /* 3168 * Now, map the new virtual page table. NOTE: NO ASM! 3169 */ 3170 pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) | 3171 PG_V | PG_KRE | PG_KWE; 3172 l1pt[l1pte_index(VPTBASE)] = pte; 3173 3174 return (0); 3175 } 3176 3177 /* 3178 * pmap_l1pt_alloc: 3179 * 3180 * Page alloctaor for L1 PT pages. 3181 */ 3182 static void * 3183 pmap_l1pt_alloc(struct pool *pp, int flags) 3184 { 3185 paddr_t ptpa; 3186 3187 /* 3188 * Attempt to allocate a free page. 3189 */ 3190 if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == false) 3191 return (NULL); 3192 3193 return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa)); 3194 } 3195 3196 /* 3197 * pmap_l1pt_free: 3198 * 3199 * Page freer for L1 PT pages. 3200 */ 3201 static void 3202 pmap_l1pt_free(struct pool *pp, void *v) 3203 { 3204 3205 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v)); 3206 } 3207 3208 /* 3209 * pmap_ptpage_alloc: 3210 * 3211 * Allocate a level 2 or level 3 page table page, and 3212 * initialize the PTE that references it. 3213 * 3214 * Note: the pmap must already be locked. 3215 */ 3216 static int 3217 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage) 3218 { 3219 paddr_t ptpa; 3220 3221 /* 3222 * Allocate the page table page. 3223 */ 3224 if (pmap_physpage_alloc(usage, &ptpa) == false) 3225 return (ENOMEM); 3226 3227 /* 3228 * Initialize the referencing PTE. 3229 */ 3230 PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) | 3231 PG_V | PG_KRE | PG_KWE | PG_WIRED | 3232 (pmap == pmap_kernel() ? PG_ASM : 0)); 3233 3234 return (0); 3235 } 3236 3237 /* 3238 * pmap_ptpage_free: 3239 * 3240 * Free the level 2 or level 3 page table page referenced 3241 * be the provided PTE. 3242 * 3243 * Note: the pmap must already be locked. 3244 */ 3245 static void 3246 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte) 3247 { 3248 paddr_t ptpa; 3249 3250 /* 3251 * Extract the physical address of the page from the PTE 3252 * and clear the entry. 3253 */ 3254 ptpa = pmap_pte_pa(pte); 3255 PMAP_SET_PTE(pte, PG_NV); 3256 3257 #ifdef DEBUG 3258 pmap_zero_page(ptpa); 3259 #endif 3260 pmap_physpage_free(ptpa); 3261 } 3262 3263 /* 3264 * pmap_l3pt_delref: 3265 * 3266 * Delete a reference on a level 3 PT page. If the reference drops 3267 * to zero, free it. 3268 * 3269 * Note: the pmap must already be locked. 3270 */ 3271 static void 3272 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, long cpu_id) 3273 { 3274 pt_entry_t *l1pte, *l2pte; 3275 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 3276 3277 l1pte = pmap_l1pte(pmap, va); 3278 l2pte = pmap_l2pte(pmap, va, l1pte); 3279 3280 #ifdef DIAGNOSTIC 3281 if (pmap == pmap_kernel()) 3282 panic("pmap_l3pt_delref: kernel pmap"); 3283 #endif 3284 3285 if (pmap_physpage_delref(l3pte) == 0) { 3286 /* 3287 * No more mappings; we can free the level 3 table. 3288 */ 3289 #ifdef DEBUG 3290 if (pmapdebug & PDB_PTPAGE) 3291 printf("pmap_l3pt_delref: freeing level 3 table at " 3292 "0x%lx\n", pmap_pte_pa(l2pte)); 3293 #endif 3294 pmap_ptpage_free(pmap, l2pte); 3295 3296 /* 3297 * We've freed a level 3 table, so we must 3298 * invalidate the TLB entry for that PT page 3299 * in the Virtual Page Table VA range, because 3300 * otherwise the PALcode will service a TLB 3301 * miss using the stale VPT TLB entry it entered 3302 * behind our back to shortcut to the VA's PTE. 3303 */ 3304 PMAP_INVALIDATE_TLB(pmap, 3305 (vaddr_t)(&VPT[VPT_INDEX(va)]), false, 3306 PMAP_ISACTIVE(pmap, cpu_id), cpu_id); 3307 PMAP_TLB_SHOOTDOWN(pmap, 3308 (vaddr_t)(&VPT[VPT_INDEX(va)]), 0); 3309 PMAP_TLB_SHOOTNOW(); 3310 3311 /* 3312 * We've freed a level 3 table, so delete the reference 3313 * on the level 2 table. 3314 */ 3315 pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id); 3316 } 3317 } 3318 3319 /* 3320 * pmap_l2pt_delref: 3321 * 3322 * Delete a reference on a level 2 PT page. If the reference drops 3323 * to zero, free it. 3324 * 3325 * Note: the pmap must already be locked. 3326 */ 3327 static void 3328 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte, 3329 long cpu_id) 3330 { 3331 3332 #ifdef DIAGNOSTIC 3333 if (pmap == pmap_kernel()) 3334 panic("pmap_l2pt_delref: kernel pmap"); 3335 #endif 3336 3337 if (pmap_physpage_delref(l2pte) == 0) { 3338 /* 3339 * No more mappings in this segment; we can free the 3340 * level 2 table. 3341 */ 3342 #ifdef DEBUG 3343 if (pmapdebug & PDB_PTPAGE) 3344 printf("pmap_l2pt_delref: freeing level 2 table at " 3345 "0x%lx\n", pmap_pte_pa(l1pte)); 3346 #endif 3347 pmap_ptpage_free(pmap, l1pte); 3348 3349 /* 3350 * We've freed a level 2 table, so delete the reference 3351 * on the level 1 table. 3352 */ 3353 pmap_l1pt_delref(pmap, l1pte, cpu_id); 3354 } 3355 } 3356 3357 /* 3358 * pmap_l1pt_delref: 3359 * 3360 * Delete a reference on a level 1 PT page. If the reference drops 3361 * to zero, free it. 3362 * 3363 * Note: the pmap must already be locked. 3364 */ 3365 static void 3366 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id) 3367 { 3368 3369 #ifdef DIAGNOSTIC 3370 if (pmap == pmap_kernel()) 3371 panic("pmap_l1pt_delref: kernel pmap"); 3372 #endif 3373 3374 (void)pmap_physpage_delref(l1pte); 3375 } 3376 3377 /******************** Address Space Number management ********************/ 3378 3379 /* 3380 * pmap_asn_alloc: 3381 * 3382 * Allocate and assign an ASN to the specified pmap. 3383 * 3384 * Note: the pmap must already be locked. This may be called from 3385 * an interprocessor interrupt, and in that case, the sender of 3386 * the IPI has the pmap lock. 3387 */ 3388 static void 3389 pmap_asn_alloc(pmap_t pmap, long cpu_id) 3390 { 3391 struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id]; 3392 struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id]; 3393 3394 #ifdef DEBUG 3395 if (pmapdebug & (PDB_FOLLOW|PDB_ASN)) 3396 printf("pmap_asn_alloc(%p)\n", pmap); 3397 #endif 3398 3399 /* 3400 * If the pmap is still using the global kernel_lev1map, there 3401 * is no need to assign an ASN at this time, because only 3402 * kernel mappings exist in that map, and all kernel mappings 3403 * have PG_ASM set. If the pmap eventually gets its own 3404 * lev1map, an ASN will be allocated at that time. 3405 * 3406 * Only the kernel pmap will reference kernel_lev1map. Do the 3407 * same old fixups, but note that we no longer need the pmap 3408 * to be locked if we're in this mode, since pm_lev1map will 3409 * never change. 3410 * #endif 3411 */ 3412 if (pmap->pm_lev1map == kernel_lev1map) { 3413 #ifdef DEBUG 3414 if (pmapdebug & PDB_ASN) 3415 printf("pmap_asn_alloc: still references " 3416 "kernel_lev1map\n"); 3417 #endif 3418 #if defined(MULTIPROCESSOR) 3419 /* 3420 * In a multiprocessor system, it's possible to 3421 * get here without having PMAP_ASN_RESERVED in 3422 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy(). 3423 * 3424 * So, what we do here, is simply assign the reserved 3425 * ASN for kernel_lev1map users and let things 3426 * continue on. We do, however, let uniprocessor 3427 * configurations continue to make its assertion. 3428 */ 3429 pma->pma_asn = PMAP_ASN_RESERVED; 3430 #else 3431 KASSERT(pma->pma_asn == PMAP_ASN_RESERVED); 3432 #endif /* MULTIPROCESSOR */ 3433 return; 3434 } 3435 3436 /* 3437 * On processors which do not implement ASNs, the swpctx PALcode 3438 * operation will automatically invalidate the TLB and I-cache, 3439 * so we don't need to do that here. 3440 */ 3441 if (pmap_max_asn == 0) { 3442 /* 3443 * Refresh the pmap's generation number, to 3444 * simplify logic elsewhere. 3445 */ 3446 pma->pma_asngen = cpma->pma_asngen; 3447 #ifdef DEBUG 3448 if (pmapdebug & PDB_ASN) 3449 printf("pmap_asn_alloc: no ASNs, using asngen %lu\n", 3450 pma->pma_asngen); 3451 #endif 3452 return; 3453 } 3454 3455 /* 3456 * Hopefully, we can continue using the one we have... 3457 */ 3458 if (pma->pma_asn != PMAP_ASN_RESERVED && 3459 pma->pma_asngen == cpma->pma_asngen) { 3460 /* 3461 * ASN is still in the current generation; keep on using it. 3462 */ 3463 #ifdef DEBUG 3464 if (pmapdebug & PDB_ASN) 3465 printf("pmap_asn_alloc: same generation, keeping %u\n", 3466 pma->pma_asn); 3467 #endif 3468 return; 3469 } 3470 3471 /* 3472 * Need to assign a new ASN. Grab the next one, incrementing 3473 * the generation number if we have to. 3474 */ 3475 if (cpma->pma_asn > pmap_max_asn) { 3476 /* 3477 * Invalidate all non-PG_ASM TLB entries and the 3478 * I-cache, and bump the generation number. 3479 */ 3480 ALPHA_TBIAP(); 3481 alpha_pal_imb(); 3482 3483 cpma->pma_asn = 1; 3484 cpma->pma_asngen++; 3485 #ifdef DIAGNOSTIC 3486 if (cpma->pma_asngen == 0) { 3487 /* 3488 * The generation number has wrapped. We could 3489 * handle this scenario by traversing all of 3490 * the pmaps, and invalidating the generation 3491 * number on those which are not currently 3492 * in use by this processor. 3493 * 3494 * However... considering that we're using 3495 * an unsigned 64-bit integer for generation 3496 * numbers, on non-ASN CPUs, we won't wrap 3497 * for approx. 585 million years, or 75 billion 3498 * years on a 128-ASN CPU (assuming 1000 switch 3499 * operations per second). 3500 * 3501 * So, we don't bother. 3502 */ 3503 panic("pmap_asn_alloc: too much uptime"); 3504 } 3505 #endif 3506 #ifdef DEBUG 3507 if (pmapdebug & PDB_ASN) 3508 printf("pmap_asn_alloc: generation bumped to %lu\n", 3509 cpma->pma_asngen); 3510 #endif 3511 } 3512 3513 /* 3514 * Assign the new ASN and validate the generation number. 3515 */ 3516 pma->pma_asn = cpma->pma_asn++; 3517 pma->pma_asngen = cpma->pma_asngen; 3518 3519 #ifdef DEBUG 3520 if (pmapdebug & PDB_ASN) 3521 printf("pmap_asn_alloc: assigning %u to pmap %p\n", 3522 pma->pma_asn, pmap); 3523 #endif 3524 3525 /* 3526 * Have a new ASN, so there's no need to sync the I-stream 3527 * on the way back out to userspace. 3528 */ 3529 atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id)); 3530 } 3531 3532 #if defined(MULTIPROCESSOR) 3533 /******************** TLB shootdown code ********************/ 3534 3535 /* 3536 * pmap_tlb_shootdown: 3537 * 3538 * Cause the TLB entry for pmap/va to be shot down. 3539 * 3540 * NOTE: The pmap must be locked here. 3541 */ 3542 void 3543 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp) 3544 { 3545 struct pmap_tlb_shootdown_q *pq; 3546 struct pmap_tlb_shootdown_job *pj; 3547 struct cpu_info *ci, *self = curcpu(); 3548 u_long cpumask; 3549 CPU_INFO_ITERATOR cii; 3550 3551 KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock)); 3552 3553 cpumask = 0; 3554 3555 for (CPU_INFO_FOREACH(cii, ci)) { 3556 if (ci == self) 3557 continue; 3558 3559 /* 3560 * The pmap must be locked (unless its the kernel 3561 * pmap, in which case it is okay for it to be 3562 * unlocked), which prevents it from becoming 3563 * active on any additional processors. This makes 3564 * it safe to check for activeness. If it's not 3565 * active on the processor in question, then just 3566 * mark it as needing a new ASN the next time it 3567 * does, saving the IPI. We always have to send 3568 * the IPI for the kernel pmap. 3569 * 3570 * Note if it's marked active now, and it becomes 3571 * inactive by the time the processor receives 3572 * the IPI, that's okay, because it does the right 3573 * thing with it later. 3574 */ 3575 if (pmap != pmap_kernel() && 3576 PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) { 3577 PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid); 3578 continue; 3579 } 3580 3581 cpumask |= 1UL << ci->ci_cpuid; 3582 3583 pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; 3584 mutex_spin_enter(&pq->pq_lock); 3585 3586 /* 3587 * Allocate a job. 3588 */ 3589 if (pq->pq_count < PMAP_TLB_SHOOTDOWN_MAXJOBS) { 3590 pj = pool_cache_get(&pmap_tlb_shootdown_job_cache, 3591 PR_NOWAIT); 3592 } else { 3593 pj = NULL; 3594 } 3595 3596 /* 3597 * If a global flush is already pending, we 3598 * don't really have to do anything else. 3599 */ 3600 pq->pq_pte |= pte; 3601 if (pq->pq_tbia) { 3602 mutex_spin_exit(&pq->pq_lock); 3603 if (pj != NULL) { 3604 pool_cache_put(&pmap_tlb_shootdown_job_cache, 3605 pj); 3606 } 3607 continue; 3608 } 3609 if (pj == NULL) { 3610 /* 3611 * Couldn't allocate a job entry. Just 3612 * tell the processor to kill everything. 3613 */ 3614 pq->pq_tbia = 1; 3615 } else { 3616 pj->pj_pmap = pmap; 3617 pj->pj_va = va; 3618 pj->pj_pte = pte; 3619 pq->pq_count++; 3620 TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list); 3621 } 3622 mutex_spin_exit(&pq->pq_lock); 3623 } 3624 3625 *cpumaskp |= cpumask; 3626 } 3627 3628 /* 3629 * pmap_tlb_shootnow: 3630 * 3631 * Process the TLB shootdowns that we have been accumulating 3632 * for the specified processor set. 3633 */ 3634 void 3635 pmap_tlb_shootnow(u_long cpumask) 3636 { 3637 3638 alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN); 3639 } 3640 3641 /* 3642 * pmap_do_tlb_shootdown: 3643 * 3644 * Process pending TLB shootdown operations for this processor. 3645 */ 3646 void 3647 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep) 3648 { 3649 u_long cpu_id = ci->ci_cpuid; 3650 u_long cpu_mask = (1UL << cpu_id); 3651 struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; 3652 struct pmap_tlb_shootdown_job *pj, *next; 3653 TAILQ_HEAD(, pmap_tlb_shootdown_job) jobs; 3654 3655 TAILQ_INIT(&jobs); 3656 3657 mutex_spin_enter(&pq->pq_lock); 3658 TAILQ_CONCAT(&jobs, &pq->pq_head, pj_list); 3659 if (pq->pq_tbia) { 3660 if (pq->pq_pte & PG_ASM) 3661 ALPHA_TBIA(); 3662 else 3663 ALPHA_TBIAP(); 3664 pq->pq_tbia = 0; 3665 pq->pq_pte = 0; 3666 } else { 3667 TAILQ_FOREACH(pj, &jobs, pj_list) { 3668 PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va, 3669 pj->pj_pte & PG_ASM, 3670 pj->pj_pmap->pm_cpus & cpu_mask, cpu_id); 3671 } 3672 pq->pq_pte = 0; 3673 } 3674 pq->pq_count = 0; 3675 mutex_spin_exit(&pq->pq_lock); 3676 3677 /* Free jobs back to the cache. */ 3678 for (pj = TAILQ_FIRST(&jobs); pj != NULL; pj = next) { 3679 next = TAILQ_NEXT(pj, pj_list); 3680 pool_cache_put(&pmap_tlb_shootdown_job_cache, pj); 3681 } 3682 } 3683 #endif /* MULTIPROCESSOR */ 3684