1 /* $NetBSD: uvm_page.c,v 1.253 2023/07/17 12:55:37 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1997 Charles D. Cranor and Washington University. 34 * Copyright (c) 1991, 1993, The Regents of the University of California. 35 * 36 * All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * The Mach Operating System project at Carnegie-Mellon University. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 66 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 67 * 68 * 69 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 70 * All rights reserved. 71 * 72 * Permission to use, copy, modify and distribute this software and 73 * its documentation is hereby granted, provided that both the copyright 74 * notice and this permission notice appear in all copies of the 75 * software, derivative works or modified versions, and any portions 76 * thereof, and that both notices appear in supporting documentation. 77 * 78 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 79 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 80 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 81 * 82 * Carnegie Mellon requests users of this software to return to 83 * 84 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 85 * School of Computer Science 86 * Carnegie Mellon University 87 * Pittsburgh PA 15213-3890 88 * 89 * any improvements or extensions that they make and grant Carnegie the 90 * rights to redistribute these changes. 91 */ 92 93 /* 94 * uvm_page.c: page ops. 95 */ 96 97 #include <sys/cdefs.h> 98 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.253 2023/07/17 12:55:37 riastradh Exp $"); 99 100 #include "opt_ddb.h" 101 #include "opt_uvm.h" 102 #include "opt_uvmhist.h" 103 #include "opt_readahead.h" 104 105 #include <sys/param.h> 106 #include <sys/systm.h> 107 #include <sys/sched.h> 108 #include <sys/kernel.h> 109 #include <sys/vnode.h> 110 #include <sys/proc.h> 111 #include <sys/radixtree.h> 112 #include <sys/atomic.h> 113 #include <sys/cpu.h> 114 115 #include <ddb/db_active.h> 116 117 #include <uvm/uvm.h> 118 #include <uvm/uvm_ddb.h> 119 #include <uvm/uvm_pdpolicy.h> 120 #include <uvm/uvm_pgflcache.h> 121 122 /* 123 * number of pages per-CPU to reserve for the kernel. 124 */ 125 #ifndef UVM_RESERVED_PAGES_PER_CPU 126 #define UVM_RESERVED_PAGES_PER_CPU 5 127 #endif 128 int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU; 129 130 /* 131 * physical memory size; 132 */ 133 psize_t physmem; 134 135 /* 136 * local variables 137 */ 138 139 /* 140 * these variables record the values returned by vm_page_bootstrap, 141 * for debugging purposes. The implementation of uvm_pageboot_alloc 142 * and pmap_startup here also uses them internally. 143 */ 144 145 static vaddr_t virtual_space_start; 146 static vaddr_t virtual_space_end; 147 148 /* 149 * we allocate an initial number of page colors in uvm_page_init(), 150 * and remember them. We may re-color pages as cache sizes are 151 * discovered during the autoconfiguration phase. But we can never 152 * free the initial set of buckets, since they are allocated using 153 * uvm_pageboot_alloc(). 154 */ 155 156 static size_t recolored_pages_memsize /* = 0 */; 157 static char *recolored_pages_mem; 158 159 /* 160 * freelist locks - one per bucket. 161 */ 162 163 union uvm_freelist_lock uvm_freelist_locks[PGFL_MAX_BUCKETS] 164 __cacheline_aligned; 165 166 /* 167 * basic NUMA information. 168 */ 169 170 static struct uvm_page_numa_region { 171 struct uvm_page_numa_region *next; 172 paddr_t start; 173 paddr_t size; 174 u_int numa_id; 175 } *uvm_page_numa_region; 176 177 #ifdef DEBUG 178 kmutex_t uvm_zerochecklock __cacheline_aligned; 179 vaddr_t uvm_zerocheckkva; 180 #endif /* DEBUG */ 181 182 /* 183 * These functions are reserved for uvm(9) internal use and are not 184 * exported in the header file uvm_physseg.h 185 * 186 * Thus they are redefined here. 187 */ 188 void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *); 189 void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t); 190 191 /* returns a pgs array */ 192 struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t); 193 194 /* 195 * inline functions 196 */ 197 198 /* 199 * uvm_pageinsert: insert a page in the object. 200 * 201 * => caller must lock object 202 * => call should have already set pg's object and offset pointers 203 * and bumped the version counter 204 */ 205 206 static inline void 207 uvm_pageinsert_object(struct uvm_object *uobj, struct vm_page *pg) 208 { 209 210 KASSERT(uobj == pg->uobject); 211 KASSERT(rw_write_held(uobj->vmobjlock)); 212 KASSERT((pg->flags & PG_TABLED) == 0); 213 214 if ((pg->flags & PG_STAT) != 0) { 215 /* Cannot use uvm_pagegetdirty(): not yet in radix tree. */ 216 const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 217 218 if ((pg->flags & PG_FILE) != 0) { 219 if (uobj->uo_npages == 0) { 220 struct vnode *vp = (struct vnode *)uobj; 221 mutex_enter(vp->v_interlock); 222 KASSERT((vp->v_iflag & VI_PAGES) == 0); 223 vp->v_iflag |= VI_PAGES; 224 vholdl(vp); 225 mutex_exit(vp->v_interlock); 226 } 227 if (UVM_OBJ_IS_VTEXT(uobj)) { 228 cpu_count(CPU_COUNT_EXECPAGES, 1); 229 } 230 cpu_count(CPU_COUNT_FILEUNKNOWN + status, 1); 231 } else { 232 cpu_count(CPU_COUNT_ANONUNKNOWN + status, 1); 233 } 234 } 235 pg->flags |= PG_TABLED; 236 uobj->uo_npages++; 237 } 238 239 static inline int 240 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg) 241 { 242 const uint64_t idx = pg->offset >> PAGE_SHIFT; 243 int error; 244 245 KASSERT(rw_write_held(uobj->vmobjlock)); 246 247 error = radix_tree_insert_node(&uobj->uo_pages, idx, pg); 248 if (error != 0) { 249 return error; 250 } 251 if ((pg->flags & PG_CLEAN) == 0) { 252 uvm_obj_page_set_dirty(pg); 253 } 254 KASSERT(((pg->flags & PG_CLEAN) == 0) == 255 uvm_obj_page_dirty_p(pg)); 256 return 0; 257 } 258 259 /* 260 * uvm_page_remove: remove page from object. 261 * 262 * => caller must lock object 263 */ 264 265 static inline void 266 uvm_pageremove_object(struct uvm_object *uobj, struct vm_page *pg) 267 { 268 269 KASSERT(uobj == pg->uobject); 270 KASSERT(rw_write_held(uobj->vmobjlock)); 271 KASSERT(pg->flags & PG_TABLED); 272 273 if ((pg->flags & PG_STAT) != 0) { 274 /* Cannot use uvm_pagegetdirty(): no longer in radix tree. */ 275 const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 276 277 if ((pg->flags & PG_FILE) != 0) { 278 if (uobj->uo_npages == 1) { 279 struct vnode *vp = (struct vnode *)uobj; 280 mutex_enter(vp->v_interlock); 281 KASSERT((vp->v_iflag & VI_PAGES) != 0); 282 vp->v_iflag &= ~VI_PAGES; 283 holdrelel(vp); 284 mutex_exit(vp->v_interlock); 285 } 286 if (UVM_OBJ_IS_VTEXT(uobj)) { 287 cpu_count(CPU_COUNT_EXECPAGES, -1); 288 } 289 cpu_count(CPU_COUNT_FILEUNKNOWN + status, -1); 290 } else { 291 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 292 } 293 } 294 uobj->uo_npages--; 295 pg->flags &= ~PG_TABLED; 296 pg->uobject = NULL; 297 } 298 299 static inline void 300 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg) 301 { 302 struct vm_page *opg __unused; 303 304 KASSERT(rw_write_held(uobj->vmobjlock)); 305 306 opg = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT); 307 KASSERT(pg == opg); 308 } 309 310 static void 311 uvm_page_init_bucket(struct pgfreelist *pgfl, struct pgflbucket *pgb, int num) 312 { 313 int i; 314 315 pgb->pgb_nfree = 0; 316 for (i = 0; i < uvmexp.ncolors; i++) { 317 LIST_INIT(&pgb->pgb_colors[i]); 318 } 319 pgfl->pgfl_buckets[num] = pgb; 320 } 321 322 /* 323 * uvm_page_init: init the page system. called from uvm_init(). 324 * 325 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 326 */ 327 328 void 329 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) 330 { 331 static struct uvm_cpu boot_cpu __cacheline_aligned; 332 psize_t freepages, pagecount, bucketsize, n; 333 struct pgflbucket *pgb; 334 struct vm_page *pagearray; 335 char *bucketarray; 336 uvm_physseg_t bank; 337 int fl, b; 338 339 KASSERT(ncpu <= 1); 340 341 /* 342 * init the page queues and free page queue locks, except the 343 * free list; we allocate that later (with the initial vm_page 344 * structures). 345 */ 346 347 curcpu()->ci_data.cpu_uvm = &boot_cpu; 348 uvmpdpol_init(); 349 for (b = 0; b < __arraycount(uvm_freelist_locks); b++) { 350 mutex_init(&uvm_freelist_locks[b].lock, MUTEX_DEFAULT, IPL_VM); 351 } 352 353 /* 354 * allocate vm_page structures. 355 */ 356 357 /* 358 * sanity check: 359 * before calling this function the MD code is expected to register 360 * some free RAM with the uvm_page_physload() function. our job 361 * now is to allocate vm_page structures for this memory. 362 */ 363 364 if (uvm_physseg_get_last() == UVM_PHYSSEG_TYPE_INVALID) 365 panic("uvm_page_bootstrap: no memory pre-allocated"); 366 367 /* 368 * first calculate the number of free pages... 369 * 370 * note that we use start/end rather than avail_start/avail_end. 371 * this allows us to allocate extra vm_page structures in case we 372 * want to return some memory to the pool after booting. 373 */ 374 375 freepages = 0; 376 377 for (bank = uvm_physseg_get_first(); 378 uvm_physseg_valid_p(bank) ; 379 bank = uvm_physseg_get_next(bank)) { 380 freepages += (uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank)); 381 } 382 383 /* 384 * Let MD code initialize the number of colors, or default 385 * to 1 color if MD code doesn't care. 386 */ 387 if (uvmexp.ncolors == 0) 388 uvmexp.ncolors = 1; 389 uvmexp.colormask = uvmexp.ncolors - 1; 390 KASSERT((uvmexp.colormask & uvmexp.ncolors) == 0); 391 392 /* We always start with only 1 bucket. */ 393 uvm.bucketcount = 1; 394 395 /* 396 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 397 * use. for each page of memory we use we need a vm_page structure. 398 * thus, the total number of pages we can use is the total size of 399 * the memory divided by the PAGE_SIZE plus the size of the vm_page 400 * structure. we add one to freepages as a fudge factor to avoid 401 * truncation errors (since we can only allocate in terms of whole 402 * pages). 403 */ 404 pagecount = ((freepages + 1) << PAGE_SHIFT) / 405 (PAGE_SIZE + sizeof(struct vm_page)); 406 bucketsize = offsetof(struct pgflbucket, pgb_colors[uvmexp.ncolors]); 407 bucketsize = roundup2(bucketsize, coherency_unit); 408 bucketarray = (void *)uvm_pageboot_alloc( 409 bucketsize * VM_NFREELIST + 410 pagecount * sizeof(struct vm_page)); 411 pagearray = (struct vm_page *) 412 (bucketarray + bucketsize * VM_NFREELIST); 413 414 for (fl = 0; fl < VM_NFREELIST; fl++) { 415 pgb = (struct pgflbucket *)(bucketarray + bucketsize * fl); 416 uvm_page_init_bucket(&uvm.page_free[fl], pgb, 0); 417 } 418 memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 419 420 /* 421 * init the freelist cache in the disabled state. 422 */ 423 uvm_pgflcache_init(); 424 425 /* 426 * init the vm_page structures and put them in the correct place. 427 */ 428 /* First init the extent */ 429 430 for (bank = uvm_physseg_get_first(), 431 uvm_physseg_seg_chomp_slab(bank, pagearray, pagecount); 432 uvm_physseg_valid_p(bank); 433 bank = uvm_physseg_get_next(bank)) { 434 435 n = uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank); 436 uvm_physseg_seg_alloc_from_slab(bank, n); 437 uvm_physseg_init_seg(bank, pagearray); 438 439 /* set up page array pointers */ 440 pagearray += n; 441 pagecount -= n; 442 } 443 444 /* 445 * pass up the values of virtual_space_start and 446 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 447 * layers of the VM. 448 */ 449 450 *kvm_startp = round_page(virtual_space_start); 451 *kvm_endp = trunc_page(virtual_space_end); 452 453 /* 454 * init various thresholds. 455 */ 456 457 uvmexp.reserve_pagedaemon = 1; 458 uvmexp.reserve_kernel = vm_page_reserve_kernel; 459 460 /* 461 * done! 462 */ 463 464 uvm.page_init_done = true; 465 } 466 467 /* 468 * uvm_pgfl_lock: lock all freelist buckets 469 */ 470 471 void 472 uvm_pgfl_lock(void) 473 { 474 int i; 475 476 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 477 mutex_spin_enter(&uvm_freelist_locks[i].lock); 478 } 479 } 480 481 /* 482 * uvm_pgfl_unlock: unlock all freelist buckets 483 */ 484 485 void 486 uvm_pgfl_unlock(void) 487 { 488 int i; 489 490 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 491 mutex_spin_exit(&uvm_freelist_locks[i].lock); 492 } 493 } 494 495 /* 496 * uvm_setpagesize: set the page size 497 * 498 * => sets page_shift and page_mask from uvmexp.pagesize. 499 */ 500 501 void 502 uvm_setpagesize(void) 503 { 504 505 /* 506 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE 507 * to be a constant (indicated by being a non-zero value). 508 */ 509 if (uvmexp.pagesize == 0) { 510 if (PAGE_SIZE == 0) 511 panic("uvm_setpagesize: uvmexp.pagesize not set"); 512 uvmexp.pagesize = PAGE_SIZE; 513 } 514 uvmexp.pagemask = uvmexp.pagesize - 1; 515 if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 516 panic("uvm_setpagesize: page size %u (%#x) not a power of two", 517 uvmexp.pagesize, uvmexp.pagesize); 518 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 519 if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 520 break; 521 } 522 523 /* 524 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 525 */ 526 527 vaddr_t 528 uvm_pageboot_alloc(vsize_t size) 529 { 530 static bool initialized = false; 531 vaddr_t addr; 532 #if !defined(PMAP_STEAL_MEMORY) 533 vaddr_t vaddr; 534 paddr_t paddr; 535 #endif 536 537 /* 538 * on first call to this function, initialize ourselves. 539 */ 540 if (initialized == false) { 541 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 542 543 /* round it the way we like it */ 544 virtual_space_start = round_page(virtual_space_start); 545 virtual_space_end = trunc_page(virtual_space_end); 546 547 initialized = true; 548 } 549 550 /* round to page size */ 551 size = round_page(size); 552 uvmexp.bootpages += atop(size); 553 554 #if defined(PMAP_STEAL_MEMORY) 555 556 /* 557 * defer bootstrap allocation to MD code (it may want to allocate 558 * from a direct-mapped segment). pmap_steal_memory should adjust 559 * virtual_space_start/virtual_space_end if necessary. 560 */ 561 562 addr = pmap_steal_memory(size, &virtual_space_start, 563 &virtual_space_end); 564 565 return addr; 566 567 #else /* !PMAP_STEAL_MEMORY */ 568 569 /* 570 * allocate virtual memory for this request 571 */ 572 if (virtual_space_start == virtual_space_end || 573 (virtual_space_end - virtual_space_start) < size) 574 panic("uvm_pageboot_alloc: out of virtual space"); 575 576 addr = virtual_space_start; 577 578 #ifdef PMAP_GROWKERNEL 579 /* 580 * If the kernel pmap can't map the requested space, 581 * then allocate more resources for it. 582 */ 583 if (uvm_maxkaddr < (addr + size)) { 584 uvm_maxkaddr = pmap_growkernel(addr + size); 585 if (uvm_maxkaddr < (addr + size)) 586 panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 587 } 588 #endif 589 590 virtual_space_start += size; 591 592 /* 593 * allocate and mapin physical pages to back new virtual pages 594 */ 595 596 for (vaddr = round_page(addr) ; vaddr < addr + size ; 597 vaddr += PAGE_SIZE) { 598 599 if (!uvm_page_physget(&paddr)) 600 panic("uvm_pageboot_alloc: out of memory"); 601 602 /* 603 * Note this memory is no longer managed, so using 604 * pmap_kenter is safe. 605 */ 606 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 607 } 608 pmap_update(pmap_kernel()); 609 return addr; 610 #endif /* PMAP_STEAL_MEMORY */ 611 } 612 613 #if !defined(PMAP_STEAL_MEMORY) 614 /* 615 * uvm_page_physget: "steal" one page from the vm_physmem structure. 616 * 617 * => attempt to allocate it off the end of a segment in which the "avail" 618 * values match the start/end values. if we can't do that, then we 619 * will advance both values (making them equal, and removing some 620 * vm_page structures from the non-avail area). 621 * => return false if out of memory. 622 */ 623 624 /* subroutine: try to allocate from memory chunks on the specified freelist */ 625 static bool uvm_page_physget_freelist(paddr_t *, int); 626 627 static bool 628 uvm_page_physget_freelist(paddr_t *paddrp, int freelist) 629 { 630 uvm_physseg_t lcv; 631 632 /* pass 1: try allocating from a matching end */ 633 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 634 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 635 #else 636 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 637 #endif 638 { 639 if (uvm.page_init_done == true) 640 panic("uvm_page_physget: called _after_ bootstrap"); 641 642 /* Try to match at front or back on unused segment */ 643 if (uvm_page_physunload(lcv, freelist, paddrp)) 644 return true; 645 } 646 647 /* pass2: forget about matching ends, just allocate something */ 648 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 649 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 650 #else 651 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 652 #endif 653 { 654 /* Try the front regardless. */ 655 if (uvm_page_physunload_force(lcv, freelist, paddrp)) 656 return true; 657 } 658 return false; 659 } 660 661 bool 662 uvm_page_physget(paddr_t *paddrp) 663 { 664 int i; 665 666 /* try in the order of freelist preference */ 667 for (i = 0; i < VM_NFREELIST; i++) 668 if (uvm_page_physget_freelist(paddrp, i) == true) 669 return (true); 670 return (false); 671 } 672 #endif /* PMAP_STEAL_MEMORY */ 673 674 /* 675 * PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages 676 * back from an I/O mapping (ugh!). used in some MD code as well. 677 */ 678 struct vm_page * 679 uvm_phys_to_vm_page(paddr_t pa) 680 { 681 paddr_t pf = atop(pa); 682 paddr_t off; 683 uvm_physseg_t upm; 684 685 upm = uvm_physseg_find(pf, &off); 686 if (upm != UVM_PHYSSEG_TYPE_INVALID) 687 return uvm_physseg_get_pg(upm, off); 688 return(NULL); 689 } 690 691 paddr_t 692 uvm_vm_page_to_phys(const struct vm_page *pg) 693 { 694 695 return pg->phys_addr & ~(PAGE_SIZE - 1); 696 } 697 698 /* 699 * uvm_page_numa_load: load NUMA range description. 700 */ 701 void 702 uvm_page_numa_load(paddr_t start, paddr_t size, u_int numa_id) 703 { 704 struct uvm_page_numa_region *d; 705 706 KASSERT(numa_id < PGFL_MAX_BUCKETS); 707 708 d = kmem_alloc(sizeof(*d), KM_SLEEP); 709 d->start = start; 710 d->size = size; 711 d->numa_id = numa_id; 712 d->next = uvm_page_numa_region; 713 uvm_page_numa_region = d; 714 } 715 716 /* 717 * uvm_page_numa_lookup: lookup NUMA node for the given page. 718 */ 719 static u_int 720 uvm_page_numa_lookup(struct vm_page *pg) 721 { 722 struct uvm_page_numa_region *d; 723 static bool warned; 724 paddr_t pa; 725 726 KASSERT(uvm_page_numa_region != NULL); 727 728 pa = VM_PAGE_TO_PHYS(pg); 729 for (d = uvm_page_numa_region; d != NULL; d = d->next) { 730 if (pa >= d->start && pa < d->start + d->size) { 731 return d->numa_id; 732 } 733 } 734 735 if (!warned) { 736 printf("uvm_page_numa_lookup: failed, first pg=%p pa=%#" 737 PRIxPADDR "\n", pg, VM_PAGE_TO_PHYS(pg)); 738 warned = true; 739 } 740 741 return 0; 742 } 743 744 /* 745 * uvm_page_redim: adjust freelist dimensions if they have changed. 746 */ 747 748 static void 749 uvm_page_redim(int newncolors, int newnbuckets) 750 { 751 struct pgfreelist npgfl; 752 struct pgflbucket *opgb, *npgb; 753 struct pgflist *ohead, *nhead; 754 struct vm_page *pg; 755 size_t bucketsize, bucketmemsize, oldbucketmemsize; 756 int fl, ob, oc, nb, nc, obuckets, ocolors; 757 char *bucketarray, *oldbucketmem, *bucketmem; 758 759 KASSERT(((newncolors - 1) & newncolors) == 0); 760 761 /* Anything to do? */ 762 if (newncolors <= uvmexp.ncolors && 763 newnbuckets == uvm.bucketcount) { 764 return; 765 } 766 if (uvm.page_init_done == false) { 767 uvmexp.ncolors = newncolors; 768 return; 769 } 770 771 bucketsize = offsetof(struct pgflbucket, pgb_colors[newncolors]); 772 bucketsize = roundup2(bucketsize, coherency_unit); 773 bucketmemsize = bucketsize * newnbuckets * VM_NFREELIST + 774 coherency_unit - 1; 775 bucketmem = kmem_zalloc(bucketmemsize, KM_SLEEP); 776 bucketarray = (char *)roundup2((uintptr_t)bucketmem, coherency_unit); 777 778 ocolors = uvmexp.ncolors; 779 obuckets = uvm.bucketcount; 780 781 /* Freelist cache musn't be enabled. */ 782 uvm_pgflcache_pause(); 783 784 /* Make sure we should still do this. */ 785 uvm_pgfl_lock(); 786 if (newncolors <= uvmexp.ncolors && 787 newnbuckets == uvm.bucketcount) { 788 uvm_pgfl_unlock(); 789 uvm_pgflcache_resume(); 790 kmem_free(bucketmem, bucketmemsize); 791 return; 792 } 793 794 uvmexp.ncolors = newncolors; 795 uvmexp.colormask = uvmexp.ncolors - 1; 796 uvm.bucketcount = newnbuckets; 797 798 for (fl = 0; fl < VM_NFREELIST; fl++) { 799 /* Init new buckets in new freelist. */ 800 memset(&npgfl, 0, sizeof(npgfl)); 801 for (nb = 0; nb < newnbuckets; nb++) { 802 npgb = (struct pgflbucket *)bucketarray; 803 uvm_page_init_bucket(&npgfl, npgb, nb); 804 bucketarray += bucketsize; 805 } 806 /* Now transfer pages from the old freelist. */ 807 for (nb = ob = 0; ob < obuckets; ob++) { 808 opgb = uvm.page_free[fl].pgfl_buckets[ob]; 809 for (oc = 0; oc < ocolors; oc++) { 810 ohead = &opgb->pgb_colors[oc]; 811 while ((pg = LIST_FIRST(ohead)) != NULL) { 812 LIST_REMOVE(pg, pageq.list); 813 /* 814 * Here we decide on the NEW color & 815 * bucket for the page. For NUMA 816 * we'll use the info that the 817 * hardware gave us. For non-NUMA 818 * assign take physical page frame 819 * number and cache color into 820 * account. We do this to try and 821 * avoid defeating any memory 822 * interleaving in the hardware. 823 */ 824 KASSERT( 825 uvm_page_get_bucket(pg) == ob); 826 KASSERT(fl == 827 uvm_page_get_freelist(pg)); 828 if (uvm_page_numa_region != NULL) { 829 nb = uvm_page_numa_lookup(pg); 830 } else { 831 nb = atop(VM_PAGE_TO_PHYS(pg)) 832 / uvmexp.ncolors / 8 833 % newnbuckets; 834 } 835 uvm_page_set_bucket(pg, nb); 836 npgb = npgfl.pgfl_buckets[nb]; 837 npgb->pgb_nfree++; 838 nc = VM_PGCOLOR(pg); 839 nhead = &npgb->pgb_colors[nc]; 840 LIST_INSERT_HEAD(nhead, pg, pageq.list); 841 } 842 } 843 } 844 /* Install the new freelist. */ 845 memcpy(&uvm.page_free[fl], &npgfl, sizeof(npgfl)); 846 } 847 848 /* Unlock and free the old memory. */ 849 oldbucketmemsize = recolored_pages_memsize; 850 oldbucketmem = recolored_pages_mem; 851 recolored_pages_memsize = bucketmemsize; 852 recolored_pages_mem = bucketmem; 853 854 uvm_pgfl_unlock(); 855 uvm_pgflcache_resume(); 856 857 if (oldbucketmemsize) { 858 kmem_free(oldbucketmem, oldbucketmemsize); 859 } 860 861 /* 862 * this calls uvm_km_alloc() which may want to hold 863 * uvm_freelist_lock. 864 */ 865 uvm_pager_realloc_emerg(); 866 } 867 868 /* 869 * uvm_page_recolor: Recolor the pages if the new color count is 870 * larger than the old one. 871 */ 872 873 void 874 uvm_page_recolor(int newncolors) 875 { 876 877 uvm_page_redim(newncolors, uvm.bucketcount); 878 } 879 880 /* 881 * uvm_page_rebucket: Determine a bucket structure and redim the free 882 * lists to match. 883 */ 884 885 void 886 uvm_page_rebucket(void) 887 { 888 u_int min_numa, max_numa, npackage, shift; 889 struct cpu_info *ci, *ci2, *ci3; 890 CPU_INFO_ITERATOR cii; 891 892 /* 893 * If we have more than one NUMA node, and the maximum NUMA node ID 894 * is less than PGFL_MAX_BUCKETS, then we'll use NUMA distribution 895 * for free pages. 896 */ 897 min_numa = (u_int)-1; 898 max_numa = 0; 899 for (CPU_INFO_FOREACH(cii, ci)) { 900 if (ci->ci_numa_id < min_numa) { 901 min_numa = ci->ci_numa_id; 902 } 903 if (ci->ci_numa_id > max_numa) { 904 max_numa = ci->ci_numa_id; 905 } 906 } 907 if (min_numa != max_numa && max_numa < PGFL_MAX_BUCKETS) { 908 aprint_debug("UVM: using NUMA allocation scheme\n"); 909 for (CPU_INFO_FOREACH(cii, ci)) { 910 ci->ci_data.cpu_uvm->pgflbucket = ci->ci_numa_id; 911 } 912 uvm_page_redim(uvmexp.ncolors, max_numa + 1); 913 return; 914 } 915 916 /* 917 * Otherwise we'll go with a scheme to maximise L2/L3 cache locality 918 * and minimise lock contention. Count the total number of CPU 919 * packages, and then try to distribute the buckets among CPU 920 * packages evenly. 921 */ 922 npackage = curcpu()->ci_nsibling[CPUREL_PACKAGE1ST]; 923 924 /* 925 * Figure out how to arrange the packages & buckets, and the total 926 * number of buckets we need. XXX 2 may not be the best factor. 927 */ 928 for (shift = 0; npackage > PGFL_MAX_BUCKETS; shift++) { 929 npackage >>= 1; 930 } 931 uvm_page_redim(uvmexp.ncolors, npackage); 932 933 /* 934 * Now tell each CPU which bucket to use. In the outer loop, scroll 935 * through all CPU packages. 936 */ 937 npackage = 0; 938 ci = curcpu(); 939 ci2 = ci->ci_sibling[CPUREL_PACKAGE1ST]; 940 do { 941 /* 942 * In the inner loop, scroll through all CPUs in the package 943 * and assign the same bucket ID. 944 */ 945 ci3 = ci2; 946 do { 947 ci3->ci_data.cpu_uvm->pgflbucket = npackage >> shift; 948 ci3 = ci3->ci_sibling[CPUREL_PACKAGE]; 949 } while (ci3 != ci2); 950 npackage++; 951 ci2 = ci2->ci_sibling[CPUREL_PACKAGE1ST]; 952 } while (ci2 != ci->ci_sibling[CPUREL_PACKAGE1ST]); 953 954 aprint_debug("UVM: using package allocation scheme, " 955 "%d package(s) per bucket\n", 1 << shift); 956 } 957 958 /* 959 * uvm_cpu_attach: initialize per-CPU data structures. 960 */ 961 962 void 963 uvm_cpu_attach(struct cpu_info *ci) 964 { 965 struct uvm_cpu *ucpu; 966 967 /* Already done in uvm_page_init(). */ 968 if (!CPU_IS_PRIMARY(ci)) { 969 /* Add more reserve pages for this CPU. */ 970 uvmexp.reserve_kernel += vm_page_reserve_kernel; 971 972 /* Allocate per-CPU data structures. */ 973 ucpu = kmem_zalloc(sizeof(struct uvm_cpu) + coherency_unit - 1, 974 KM_SLEEP); 975 ucpu = (struct uvm_cpu *)roundup2((uintptr_t)ucpu, 976 coherency_unit); 977 ci->ci_data.cpu_uvm = ucpu; 978 } else { 979 ucpu = ci->ci_data.cpu_uvm; 980 } 981 982 uvmpdpol_init_cpu(ucpu); 983 } 984 985 /* 986 * uvm_availmem: fetch the total amount of free memory in pages. this can 987 * have a detrimental effect on performance due to false sharing; don't call 988 * unless needed. 989 * 990 * some users can request the amount of free memory so often that it begins 991 * to impact upon performance. if calling frequently and an inexact value 992 * is okay, call with cached = true. 993 */ 994 995 int 996 uvm_availmem(bool cached) 997 { 998 int64_t fp; 999 1000 cpu_count_sync(cached); 1001 if ((fp = cpu_count_get(CPU_COUNT_FREEPAGES)) < 0) { 1002 /* 1003 * XXXAD could briefly go negative because it's impossible 1004 * to get a clean snapshot. address this for other counters 1005 * used as running totals before NetBSD 10 although less 1006 * important for those. 1007 */ 1008 fp = 0; 1009 } 1010 return (int)fp; 1011 } 1012 1013 /* 1014 * uvm_pagealloc_pgb: helper routine that tries to allocate any color from a 1015 * specific freelist and specific bucket only. 1016 * 1017 * => must be at IPL_VM or higher to protect per-CPU data structures. 1018 */ 1019 1020 static struct vm_page * 1021 uvm_pagealloc_pgb(struct uvm_cpu *ucpu, int f, int b, int *trycolorp, int flags) 1022 { 1023 int c, trycolor, colormask; 1024 struct pgflbucket *pgb; 1025 struct vm_page *pg; 1026 kmutex_t *lock; 1027 bool fill; 1028 1029 /* 1030 * Skip the bucket if empty, no lock needed. There could be many 1031 * empty freelists/buckets. 1032 */ 1033 pgb = uvm.page_free[f].pgfl_buckets[b]; 1034 if (pgb->pgb_nfree == 0) { 1035 return NULL; 1036 } 1037 1038 /* Skip bucket if low on memory. */ 1039 lock = &uvm_freelist_locks[b].lock; 1040 mutex_spin_enter(lock); 1041 if (__predict_false(pgb->pgb_nfree <= uvmexp.reserve_kernel)) { 1042 if ((flags & UVM_PGA_USERESERVE) == 0 || 1043 (pgb->pgb_nfree <= uvmexp.reserve_pagedaemon && 1044 curlwp != uvm.pagedaemon_lwp)) { 1045 mutex_spin_exit(lock); 1046 return NULL; 1047 } 1048 fill = false; 1049 } else { 1050 fill = true; 1051 } 1052 1053 /* Try all page colors as needed. */ 1054 c = trycolor = *trycolorp; 1055 colormask = uvmexp.colormask; 1056 do { 1057 pg = LIST_FIRST(&pgb->pgb_colors[c]); 1058 if (__predict_true(pg != NULL)) { 1059 /* 1060 * Got a free page! PG_FREE must be cleared under 1061 * lock because of uvm_pglistalloc(). 1062 */ 1063 LIST_REMOVE(pg, pageq.list); 1064 KASSERT(pg->flags == PG_FREE); 1065 pg->flags = PG_BUSY | PG_CLEAN | PG_FAKE; 1066 pgb->pgb_nfree--; 1067 CPU_COUNT(CPU_COUNT_FREEPAGES, -1); 1068 1069 /* 1070 * While we have the bucket locked and our data 1071 * structures fresh in L1 cache, we have an ideal 1072 * opportunity to grab some pages for the freelist 1073 * cache without causing extra contention. Only do 1074 * so if we found pages in this CPU's preferred 1075 * bucket. 1076 */ 1077 if (__predict_true(b == ucpu->pgflbucket && fill)) { 1078 uvm_pgflcache_fill(ucpu, f, b, c); 1079 } 1080 mutex_spin_exit(lock); 1081 KASSERT(uvm_page_get_bucket(pg) == b); 1082 CPU_COUNT(c == trycolor ? 1083 CPU_COUNT_COLORHIT : CPU_COUNT_COLORMISS, 1); 1084 CPU_COUNT(CPU_COUNT_CPUMISS, 1); 1085 *trycolorp = c; 1086 return pg; 1087 } 1088 c = (c + 1) & colormask; 1089 } while (c != trycolor); 1090 mutex_spin_exit(lock); 1091 1092 return NULL; 1093 } 1094 1095 /* 1096 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat that allocates 1097 * any color from any bucket, in a specific freelist. 1098 * 1099 * => must be at IPL_VM or higher to protect per-CPU data structures. 1100 */ 1101 1102 static struct vm_page * 1103 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int f, int *trycolorp, int flags) 1104 { 1105 int b, trybucket, bucketcount; 1106 struct vm_page *pg; 1107 1108 /* Try for the exact thing in the per-CPU cache. */ 1109 if ((pg = uvm_pgflcache_alloc(ucpu, f, *trycolorp)) != NULL) { 1110 CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1111 CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1112 return pg; 1113 } 1114 1115 /* Walk through all buckets, trying our preferred bucket first. */ 1116 trybucket = ucpu->pgflbucket; 1117 b = trybucket; 1118 bucketcount = uvm.bucketcount; 1119 do { 1120 pg = uvm_pagealloc_pgb(ucpu, f, b, trycolorp, flags); 1121 if (pg != NULL) { 1122 return pg; 1123 } 1124 b = (b + 1 == bucketcount ? 0 : b + 1); 1125 } while (b != trybucket); 1126 1127 return NULL; 1128 } 1129 1130 /* 1131 * uvm_pagealloc_strat: allocate vm_page from a particular free list. 1132 * 1133 * => return null if no pages free 1134 * => wake up pagedaemon if number of free pages drops below low water mark 1135 * => if obj != NULL, obj must be locked (to put in obj's tree) 1136 * => if anon != NULL, anon must be locked (to put in anon) 1137 * => only one of obj or anon can be non-null 1138 * => caller must activate/deactivate page if it is not wired. 1139 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 1140 * => policy decision: it is more important to pull a page off of the 1141 * appropriate priority free list than it is to get a page from the 1142 * correct bucket or color bin. This is because we live with the 1143 * consequences of a bad free list decision for the entire 1144 * lifetime of the page, e.g. if the page comes from memory that 1145 * is slower to access. 1146 */ 1147 1148 struct vm_page * 1149 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, 1150 int flags, int strat, int free_list) 1151 { 1152 int color, lcv, error, s; 1153 struct uvm_cpu *ucpu; 1154 struct vm_page *pg; 1155 lwp_t *l; 1156 1157 KASSERT(obj == NULL || anon == NULL); 1158 KASSERT(anon == NULL || (flags & UVM_FLAG_COLORMATCH) || off == 0); 1159 KASSERT(off == trunc_page(off)); 1160 KASSERT(obj == NULL || rw_write_held(obj->vmobjlock)); 1161 KASSERT(anon == NULL || anon->an_lock == NULL || 1162 rw_write_held(anon->an_lock)); 1163 1164 /* 1165 * This implements a global round-robin page coloring 1166 * algorithm. 1167 */ 1168 1169 s = splvm(); 1170 ucpu = curcpu()->ci_data.cpu_uvm; 1171 if (flags & UVM_FLAG_COLORMATCH) { 1172 color = atop(off) & uvmexp.colormask; 1173 } else { 1174 color = ucpu->pgflcolor; 1175 } 1176 1177 /* 1178 * fail if any of these conditions is true: 1179 * [1] there really are no free pages, or 1180 * [2] only kernel "reserved" pages remain and 1181 * reserved pages have not been requested. 1182 * [3] only pagedaemon "reserved" pages remain and 1183 * the requestor isn't the pagedaemon. 1184 * we make kernel reserve pages available if called by a 1185 * kernel thread. 1186 */ 1187 l = curlwp; 1188 if (__predict_true(l != NULL) && (l->l_flag & LW_SYSTEM) != 0) { 1189 flags |= UVM_PGA_USERESERVE; 1190 } 1191 1192 again: 1193 switch (strat) { 1194 case UVM_PGA_STRAT_NORMAL: 1195 /* Check freelists: descending priority (ascending id) order. */ 1196 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1197 pg = uvm_pagealloc_pgfl(ucpu, lcv, &color, flags); 1198 if (pg != NULL) { 1199 goto gotit; 1200 } 1201 } 1202 1203 /* No pages free! Have pagedaemon free some memory. */ 1204 splx(s); 1205 uvm_kick_pdaemon(); 1206 return NULL; 1207 1208 case UVM_PGA_STRAT_ONLY: 1209 case UVM_PGA_STRAT_FALLBACK: 1210 /* Attempt to allocate from the specified free list. */ 1211 KASSERT(free_list >= 0); 1212 KASSERT(free_list < VM_NFREELIST); 1213 pg = uvm_pagealloc_pgfl(ucpu, free_list, &color, flags); 1214 if (pg != NULL) { 1215 goto gotit; 1216 } 1217 1218 /* Fall back, if possible. */ 1219 if (strat == UVM_PGA_STRAT_FALLBACK) { 1220 strat = UVM_PGA_STRAT_NORMAL; 1221 goto again; 1222 } 1223 1224 /* No pages free! Have pagedaemon free some memory. */ 1225 splx(s); 1226 uvm_kick_pdaemon(); 1227 return NULL; 1228 1229 case UVM_PGA_STRAT_NUMA: 1230 /* 1231 * NUMA strategy (experimental): allocating from the correct 1232 * bucket is more important than observing freelist 1233 * priority. Look only to the current NUMA node; if that 1234 * fails, we need to look to other NUMA nodes, so retry with 1235 * the normal strategy. 1236 */ 1237 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1238 pg = uvm_pgflcache_alloc(ucpu, lcv, color); 1239 if (pg != NULL) { 1240 CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1241 CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1242 goto gotit; 1243 } 1244 pg = uvm_pagealloc_pgb(ucpu, lcv, 1245 ucpu->pgflbucket, &color, flags); 1246 if (pg != NULL) { 1247 goto gotit; 1248 } 1249 } 1250 strat = UVM_PGA_STRAT_NORMAL; 1251 goto again; 1252 1253 default: 1254 panic("uvm_pagealloc_strat: bad strat %d", strat); 1255 /* NOTREACHED */ 1256 } 1257 1258 gotit: 1259 /* 1260 * We now know which color we actually allocated from; set 1261 * the next color accordingly. 1262 */ 1263 1264 ucpu->pgflcolor = (color + 1) & uvmexp.colormask; 1265 1266 /* 1267 * while still at IPL_VM, update allocation statistics. 1268 */ 1269 1270 if (anon) { 1271 CPU_COUNT(CPU_COUNT_ANONCLEAN, 1); 1272 } 1273 splx(s); 1274 KASSERT(pg->flags == (PG_BUSY|PG_CLEAN|PG_FAKE)); 1275 1276 /* 1277 * assign the page to the object. as the page was free, we know 1278 * that pg->uobject and pg->uanon are NULL. we only need to take 1279 * the page's interlock if we are changing the values. 1280 */ 1281 if (anon != NULL || obj != NULL) { 1282 mutex_enter(&pg->interlock); 1283 } 1284 pg->offset = off; 1285 pg->uobject = obj; 1286 pg->uanon = anon; 1287 KASSERT(uvm_page_owner_locked_p(pg, true)); 1288 if (anon) { 1289 anon->an_page = pg; 1290 pg->flags |= PG_ANON; 1291 mutex_exit(&pg->interlock); 1292 } else if (obj) { 1293 /* 1294 * set PG_FILE|PG_AOBJ before the first uvm_pageinsert. 1295 */ 1296 if (UVM_OBJ_IS_VNODE(obj)) { 1297 pg->flags |= PG_FILE; 1298 } else if (UVM_OBJ_IS_AOBJ(obj)) { 1299 pg->flags |= PG_AOBJ; 1300 } 1301 uvm_pageinsert_object(obj, pg); 1302 mutex_exit(&pg->interlock); 1303 error = uvm_pageinsert_tree(obj, pg); 1304 if (error != 0) { 1305 mutex_enter(&pg->interlock); 1306 uvm_pageremove_object(obj, pg); 1307 mutex_exit(&pg->interlock); 1308 uvm_pagefree(pg); 1309 return NULL; 1310 } 1311 } 1312 1313 #if defined(UVM_PAGE_TRKOWN) 1314 pg->owner_tag = NULL; 1315 #endif 1316 UVM_PAGE_OWN(pg, "new alloc"); 1317 1318 if (flags & UVM_PGA_ZERO) { 1319 /* A zero'd page is not clean. */ 1320 if (obj != NULL || anon != NULL) { 1321 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1322 } 1323 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1324 } 1325 1326 return(pg); 1327 } 1328 1329 /* 1330 * uvm_pagereplace: replace a page with another 1331 * 1332 * => object must be locked 1333 * => page interlocks must be held 1334 */ 1335 1336 void 1337 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg) 1338 { 1339 struct uvm_object *uobj = oldpg->uobject; 1340 struct vm_page *pg __diagused; 1341 uint64_t idx; 1342 1343 KASSERT((oldpg->flags & PG_TABLED) != 0); 1344 KASSERT(uobj != NULL); 1345 KASSERT((newpg->flags & PG_TABLED) == 0); 1346 KASSERT(newpg->uobject == NULL); 1347 KASSERT(rw_write_held(uobj->vmobjlock)); 1348 KASSERT(mutex_owned(&oldpg->interlock)); 1349 KASSERT(mutex_owned(&newpg->interlock)); 1350 1351 newpg->uobject = uobj; 1352 newpg->offset = oldpg->offset; 1353 idx = newpg->offset >> PAGE_SHIFT; 1354 pg = radix_tree_replace_node(&uobj->uo_pages, idx, newpg); 1355 KASSERT(pg == oldpg); 1356 if (((oldpg->flags ^ newpg->flags) & PG_CLEAN) != 0) { 1357 if ((newpg->flags & PG_CLEAN) != 0) { 1358 uvm_obj_page_clear_dirty(newpg); 1359 } else { 1360 uvm_obj_page_set_dirty(newpg); 1361 } 1362 } 1363 /* 1364 * oldpg's PG_STAT is stable. newpg is not reachable by others yet. 1365 */ 1366 newpg->flags |= 1367 (newpg->flags & ~PG_STAT) | (oldpg->flags & PG_STAT); 1368 uvm_pageinsert_object(uobj, newpg); 1369 uvm_pageremove_object(uobj, oldpg); 1370 } 1371 1372 /* 1373 * uvm_pagerealloc: reallocate a page from one object to another 1374 * 1375 * => both objects must be locked 1376 */ 1377 1378 int 1379 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) 1380 { 1381 int error = 0; 1382 1383 /* 1384 * remove it from the old object 1385 */ 1386 1387 if (pg->uobject) { 1388 uvm_pageremove_tree(pg->uobject, pg); 1389 uvm_pageremove_object(pg->uobject, pg); 1390 } 1391 1392 /* 1393 * put it in the new object 1394 */ 1395 1396 if (newobj) { 1397 mutex_enter(&pg->interlock); 1398 pg->uobject = newobj; 1399 pg->offset = newoff; 1400 if (UVM_OBJ_IS_VNODE(newobj)) { 1401 pg->flags |= PG_FILE; 1402 } else if (UVM_OBJ_IS_AOBJ(newobj)) { 1403 pg->flags |= PG_AOBJ; 1404 } 1405 uvm_pageinsert_object(newobj, pg); 1406 mutex_exit(&pg->interlock); 1407 error = uvm_pageinsert_tree(newobj, pg); 1408 if (error != 0) { 1409 mutex_enter(&pg->interlock); 1410 uvm_pageremove_object(newobj, pg); 1411 mutex_exit(&pg->interlock); 1412 } 1413 } 1414 1415 return error; 1416 } 1417 1418 /* 1419 * uvm_pagefree: free page 1420 * 1421 * => erase page's identity (i.e. remove from object) 1422 * => put page on free list 1423 * => caller must lock owning object (either anon or uvm_object) 1424 * => assumes all valid mappings of pg are gone 1425 */ 1426 1427 void 1428 uvm_pagefree(struct vm_page *pg) 1429 { 1430 struct pgfreelist *pgfl; 1431 struct pgflbucket *pgb; 1432 struct uvm_cpu *ucpu; 1433 kmutex_t *lock; 1434 int bucket, s; 1435 bool locked; 1436 1437 #ifdef DEBUG 1438 if (pg->uobject == (void *)0xdeadbeef && 1439 pg->uanon == (void *)0xdeadbeef) { 1440 panic("uvm_pagefree: freeing free page %p", pg); 1441 } 1442 #endif /* DEBUG */ 1443 1444 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1445 KASSERT(!(pg->flags & PG_FREE)); 1446 KASSERT(pg->uobject == NULL || rw_write_held(pg->uobject->vmobjlock)); 1447 KASSERT(pg->uobject != NULL || pg->uanon == NULL || 1448 rw_write_held(pg->uanon->an_lock)); 1449 1450 /* 1451 * remove the page from the object's tree before acquiring any page 1452 * interlocks: this can acquire locks to free radixtree nodes. 1453 */ 1454 if (pg->uobject != NULL) { 1455 uvm_pageremove_tree(pg->uobject, pg); 1456 } 1457 1458 /* 1459 * if the page is loaned, resolve the loan instead of freeing. 1460 */ 1461 1462 if (pg->loan_count) { 1463 KASSERT(pg->wire_count == 0); 1464 1465 /* 1466 * if the page is owned by an anon then we just want to 1467 * drop anon ownership. the kernel will free the page when 1468 * it is done with it. if the page is owned by an object, 1469 * remove it from the object and mark it dirty for the benefit 1470 * of possible anon owners. 1471 * 1472 * regardless of previous ownership, wakeup any waiters, 1473 * unbusy the page, and we're done. 1474 */ 1475 1476 uvm_pagelock(pg); 1477 locked = true; 1478 if (pg->uobject != NULL) { 1479 uvm_pageremove_object(pg->uobject, pg); 1480 pg->flags &= ~(PG_FILE|PG_AOBJ); 1481 } else if (pg->uanon != NULL) { 1482 if ((pg->flags & PG_ANON) == 0) { 1483 pg->loan_count--; 1484 } else { 1485 const unsigned status = uvm_pagegetdirty(pg); 1486 pg->flags &= ~PG_ANON; 1487 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1488 } 1489 pg->uanon->an_page = NULL; 1490 pg->uanon = NULL; 1491 } 1492 if (pg->pqflags & PQ_WANTED) { 1493 wakeup(pg); 1494 } 1495 pg->pqflags &= ~PQ_WANTED; 1496 pg->flags &= ~(PG_BUSY|PG_RELEASED|PG_PAGER1); 1497 #ifdef UVM_PAGE_TRKOWN 1498 pg->owner_tag = NULL; 1499 #endif 1500 KASSERT((pg->flags & PG_STAT) == 0); 1501 if (pg->loan_count) { 1502 KASSERT(pg->uobject == NULL); 1503 if (pg->uanon == NULL) { 1504 uvm_pagedequeue(pg); 1505 } 1506 uvm_pageunlock(pg); 1507 return; 1508 } 1509 } else if (pg->uobject != NULL || pg->uanon != NULL || 1510 pg->wire_count != 0) { 1511 uvm_pagelock(pg); 1512 locked = true; 1513 } else { 1514 locked = false; 1515 } 1516 1517 /* 1518 * remove page from its object or anon. 1519 */ 1520 if (pg->uobject != NULL) { 1521 uvm_pageremove_object(pg->uobject, pg); 1522 } else if (pg->uanon != NULL) { 1523 const unsigned int status = uvm_pagegetdirty(pg); 1524 pg->uanon->an_page = NULL; 1525 pg->uanon = NULL; 1526 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1527 } 1528 1529 /* 1530 * if the page was wired, unwire it now. 1531 */ 1532 1533 if (pg->wire_count) { 1534 pg->wire_count = 0; 1535 atomic_dec_uint(&uvmexp.wired); 1536 } 1537 if (locked) { 1538 /* 1539 * wake anyone waiting on the page. 1540 */ 1541 if ((pg->pqflags & PQ_WANTED) != 0) { 1542 pg->pqflags &= ~PQ_WANTED; 1543 wakeup(pg); 1544 } 1545 1546 /* 1547 * now remove the page from the queues. 1548 */ 1549 uvm_pagedequeue(pg); 1550 uvm_pageunlock(pg); 1551 } else { 1552 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1553 } 1554 1555 /* 1556 * and put on free queue 1557 */ 1558 1559 #ifdef DEBUG 1560 pg->uobject = (void *)0xdeadbeef; 1561 pg->uanon = (void *)0xdeadbeef; 1562 #endif /* DEBUG */ 1563 1564 /* Try to send the page to the per-CPU cache. */ 1565 s = splvm(); 1566 ucpu = curcpu()->ci_data.cpu_uvm; 1567 bucket = uvm_page_get_bucket(pg); 1568 if (bucket == ucpu->pgflbucket && uvm_pgflcache_free(ucpu, pg)) { 1569 splx(s); 1570 return; 1571 } 1572 1573 /* Didn't work. Never mind, send it to a global bucket. */ 1574 pgfl = &uvm.page_free[uvm_page_get_freelist(pg)]; 1575 pgb = pgfl->pgfl_buckets[bucket]; 1576 lock = &uvm_freelist_locks[bucket].lock; 1577 1578 mutex_spin_enter(lock); 1579 /* PG_FREE must be set under lock because of uvm_pglistalloc(). */ 1580 pg->flags = PG_FREE; 1581 LIST_INSERT_HEAD(&pgb->pgb_colors[VM_PGCOLOR(pg)], pg, pageq.list); 1582 pgb->pgb_nfree++; 1583 CPU_COUNT(CPU_COUNT_FREEPAGES, 1); 1584 mutex_spin_exit(lock); 1585 splx(s); 1586 } 1587 1588 /* 1589 * uvm_page_unbusy: unbusy an array of pages. 1590 * 1591 * => pages must either all belong to the same object, or all belong to anons. 1592 * => if pages are object-owned, object must be locked. 1593 * => if pages are anon-owned, anons must be locked. 1594 * => caller must make sure that anon-owned pages are not PG_RELEASED. 1595 */ 1596 1597 void 1598 uvm_page_unbusy(struct vm_page **pgs, int npgs) 1599 { 1600 struct vm_page *pg; 1601 int i, pageout_done; 1602 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1603 1604 pageout_done = 0; 1605 for (i = 0; i < npgs; i++) { 1606 pg = pgs[i]; 1607 if (pg == NULL || pg == PGO_DONTCARE) { 1608 continue; 1609 } 1610 1611 KASSERT(uvm_page_owner_locked_p(pg, true)); 1612 KASSERT(pg->flags & PG_BUSY); 1613 1614 if (pg->flags & PG_PAGEOUT) { 1615 pg->flags &= ~PG_PAGEOUT; 1616 pg->flags |= PG_RELEASED; 1617 pageout_done++; 1618 atomic_inc_uint(&uvmexp.pdfreed); 1619 } 1620 if (pg->flags & PG_RELEASED) { 1621 UVMHIST_LOG(ubchist, "releasing pg %#jx", 1622 (uintptr_t)pg, 0, 0, 0); 1623 KASSERT(pg->uobject != NULL || 1624 (pg->uanon != NULL && pg->uanon->an_ref > 0)); 1625 pg->flags &= ~PG_RELEASED; 1626 uvm_pagefree(pg); 1627 } else { 1628 UVMHIST_LOG(ubchist, "unbusying pg %#jx", 1629 (uintptr_t)pg, 0, 0, 0); 1630 KASSERT((pg->flags & PG_FAKE) == 0); 1631 pg->flags &= ~PG_BUSY; 1632 uvm_pagelock(pg); 1633 uvm_pagewakeup(pg); 1634 uvm_pageunlock(pg); 1635 UVM_PAGE_OWN(pg, NULL); 1636 } 1637 } 1638 if (pageout_done != 0) { 1639 uvm_pageout_done(pageout_done); 1640 } 1641 } 1642 1643 /* 1644 * uvm_pagewait: wait for a busy page 1645 * 1646 * => page must be known PG_BUSY 1647 * => object must be read or write locked 1648 * => object will be unlocked on return 1649 */ 1650 1651 void 1652 uvm_pagewait(struct vm_page *pg, krwlock_t *lock, const char *wmesg) 1653 { 1654 1655 KASSERT(rw_lock_held(lock)); 1656 KASSERT((pg->flags & PG_BUSY) != 0); 1657 KASSERT(uvm_page_owner_locked_p(pg, false)); 1658 1659 mutex_enter(&pg->interlock); 1660 pg->pqflags |= PQ_WANTED; 1661 rw_exit(lock); 1662 UVM_UNLOCK_AND_WAIT(pg, &pg->interlock, false, wmesg, 0); 1663 } 1664 1665 /* 1666 * uvm_pagewakeup: wake anyone waiting on a page 1667 * 1668 * => page interlock must be held 1669 */ 1670 1671 void 1672 uvm_pagewakeup(struct vm_page *pg) 1673 { 1674 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1675 1676 KASSERT(mutex_owned(&pg->interlock)); 1677 1678 UVMHIST_LOG(ubchist, "waking pg %#jx", (uintptr_t)pg, 0, 0, 0); 1679 1680 if ((pg->pqflags & PQ_WANTED) != 0) { 1681 wakeup(pg); 1682 pg->pqflags &= ~PQ_WANTED; 1683 } 1684 } 1685 1686 /* 1687 * uvm_pagewanted_p: return true if someone is waiting on the page 1688 * 1689 * => object must be write locked (lock out all concurrent access) 1690 */ 1691 1692 bool 1693 uvm_pagewanted_p(struct vm_page *pg) 1694 { 1695 1696 KASSERT(uvm_page_owner_locked_p(pg, true)); 1697 1698 return (atomic_load_relaxed(&pg->pqflags) & PQ_WANTED) != 0; 1699 } 1700 1701 #if defined(UVM_PAGE_TRKOWN) 1702 /* 1703 * uvm_page_own: set or release page ownership 1704 * 1705 * => this is a debugging function that keeps track of who sets PG_BUSY 1706 * and where they do it. it can be used to track down problems 1707 * such a process setting "PG_BUSY" and never releasing it. 1708 * => page's object [if any] must be locked 1709 * => if "tag" is NULL then we are releasing page ownership 1710 */ 1711 void 1712 uvm_page_own(struct vm_page *pg, const char *tag) 1713 { 1714 1715 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0); 1716 KASSERT(uvm_page_owner_locked_p(pg, true)); 1717 1718 /* gain ownership? */ 1719 if (tag) { 1720 KASSERT((pg->flags & PG_BUSY) != 0); 1721 if (pg->owner_tag) { 1722 printf("uvm_page_own: page %p already owned " 1723 "by proc %d.%d [%s]\n", pg, 1724 pg->owner, pg->lowner, pg->owner_tag); 1725 panic("uvm_page_own"); 1726 } 1727 pg->owner = curproc->p_pid; 1728 pg->lowner = curlwp->l_lid; 1729 pg->owner_tag = tag; 1730 return; 1731 } 1732 1733 /* drop ownership */ 1734 KASSERT((pg->flags & PG_BUSY) == 0); 1735 if (pg->owner_tag == NULL) { 1736 printf("uvm_page_own: dropping ownership of an non-owned " 1737 "page (%p)\n", pg); 1738 panic("uvm_page_own"); 1739 } 1740 pg->owner_tag = NULL; 1741 } 1742 #endif 1743 1744 /* 1745 * uvm_pagelookup: look up a page 1746 * 1747 * => caller should lock object to keep someone from pulling the page 1748 * out from under it 1749 */ 1750 1751 struct vm_page * 1752 uvm_pagelookup(struct uvm_object *obj, voff_t off) 1753 { 1754 struct vm_page *pg; 1755 1756 KASSERT(db_active || rw_lock_held(obj->vmobjlock)); 1757 1758 pg = radix_tree_lookup_node(&obj->uo_pages, off >> PAGE_SHIFT); 1759 1760 KASSERT(pg == NULL || obj->uo_npages != 0); 1761 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1762 (pg->flags & PG_BUSY) != 0); 1763 return pg; 1764 } 1765 1766 /* 1767 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp 1768 * 1769 * => caller must lock objects 1770 * => caller must hold pg->interlock 1771 */ 1772 1773 void 1774 uvm_pagewire(struct vm_page *pg) 1775 { 1776 1777 KASSERT(uvm_page_owner_locked_p(pg, true)); 1778 KASSERT(mutex_owned(&pg->interlock)); 1779 #if defined(READAHEAD_STATS) 1780 if ((pg->flags & PG_READAHEAD) != 0) { 1781 uvm_ra_hit.ev_count++; 1782 pg->flags &= ~PG_READAHEAD; 1783 } 1784 #endif /* defined(READAHEAD_STATS) */ 1785 if (pg->wire_count == 0) { 1786 uvm_pagedequeue(pg); 1787 atomic_inc_uint(&uvmexp.wired); 1788 } 1789 pg->wire_count++; 1790 KASSERT(pg->wire_count > 0); /* detect wraparound */ 1791 } 1792 1793 /* 1794 * uvm_pageunwire: unwire the page. 1795 * 1796 * => activate if wire count goes to zero. 1797 * => caller must lock objects 1798 * => caller must hold pg->interlock 1799 */ 1800 1801 void 1802 uvm_pageunwire(struct vm_page *pg) 1803 { 1804 1805 KASSERT(uvm_page_owner_locked_p(pg, true)); 1806 KASSERT(pg->wire_count != 0); 1807 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1808 KASSERT(mutex_owned(&pg->interlock)); 1809 pg->wire_count--; 1810 if (pg->wire_count == 0) { 1811 uvm_pageactivate(pg); 1812 KASSERT(uvmexp.wired != 0); 1813 atomic_dec_uint(&uvmexp.wired); 1814 } 1815 } 1816 1817 /* 1818 * uvm_pagedeactivate: deactivate page 1819 * 1820 * => caller must lock objects 1821 * => caller must check to make sure page is not wired 1822 * => object that page belongs to must be locked (so we can adjust pg->flags) 1823 * => caller must clear the reference on the page before calling 1824 * => caller must hold pg->interlock 1825 */ 1826 1827 void 1828 uvm_pagedeactivate(struct vm_page *pg) 1829 { 1830 1831 KASSERT(uvm_page_owner_locked_p(pg, false)); 1832 KASSERT(mutex_owned(&pg->interlock)); 1833 if (pg->wire_count == 0) { 1834 KASSERT(uvmpdpol_pageisqueued_p(pg)); 1835 uvmpdpol_pagedeactivate(pg); 1836 } 1837 } 1838 1839 /* 1840 * uvm_pageactivate: activate page 1841 * 1842 * => caller must lock objects 1843 * => caller must hold pg->interlock 1844 */ 1845 1846 void 1847 uvm_pageactivate(struct vm_page *pg) 1848 { 1849 1850 KASSERT(uvm_page_owner_locked_p(pg, false)); 1851 KASSERT(mutex_owned(&pg->interlock)); 1852 #if defined(READAHEAD_STATS) 1853 if ((pg->flags & PG_READAHEAD) != 0) { 1854 uvm_ra_hit.ev_count++; 1855 pg->flags &= ~PG_READAHEAD; 1856 } 1857 #endif /* defined(READAHEAD_STATS) */ 1858 if (pg->wire_count == 0) { 1859 uvmpdpol_pageactivate(pg); 1860 } 1861 } 1862 1863 /* 1864 * uvm_pagedequeue: remove a page from any paging queue 1865 * 1866 * => caller must lock objects 1867 * => caller must hold pg->interlock 1868 */ 1869 void 1870 uvm_pagedequeue(struct vm_page *pg) 1871 { 1872 1873 KASSERT(uvm_page_owner_locked_p(pg, true)); 1874 KASSERT(mutex_owned(&pg->interlock)); 1875 if (uvmpdpol_pageisqueued_p(pg)) { 1876 uvmpdpol_pagedequeue(pg); 1877 } 1878 } 1879 1880 /* 1881 * uvm_pageenqueue: add a page to a paging queue without activating. 1882 * used where a page is not really demanded (yet). eg. read-ahead 1883 * 1884 * => caller must lock objects 1885 * => caller must hold pg->interlock 1886 */ 1887 void 1888 uvm_pageenqueue(struct vm_page *pg) 1889 { 1890 1891 KASSERT(uvm_page_owner_locked_p(pg, false)); 1892 KASSERT(mutex_owned(&pg->interlock)); 1893 if (pg->wire_count == 0 && !uvmpdpol_pageisqueued_p(pg)) { 1894 uvmpdpol_pageenqueue(pg); 1895 } 1896 } 1897 1898 /* 1899 * uvm_pagelock: acquire page interlock 1900 */ 1901 void 1902 uvm_pagelock(struct vm_page *pg) 1903 { 1904 1905 mutex_enter(&pg->interlock); 1906 } 1907 1908 /* 1909 * uvm_pagelock2: acquire two page interlocks 1910 */ 1911 void 1912 uvm_pagelock2(struct vm_page *pg1, struct vm_page *pg2) 1913 { 1914 1915 if (pg1 < pg2) { 1916 mutex_enter(&pg1->interlock); 1917 mutex_enter(&pg2->interlock); 1918 } else { 1919 mutex_enter(&pg2->interlock); 1920 mutex_enter(&pg1->interlock); 1921 } 1922 } 1923 1924 /* 1925 * uvm_pageunlock: release page interlock, and if a page replacement intent 1926 * is set on the page, pass it to uvmpdpol to make real. 1927 * 1928 * => caller must hold pg->interlock 1929 */ 1930 void 1931 uvm_pageunlock(struct vm_page *pg) 1932 { 1933 1934 if ((pg->pqflags & PQ_INTENT_SET) == 0 || 1935 (pg->pqflags & PQ_INTENT_QUEUED) != 0) { 1936 mutex_exit(&pg->interlock); 1937 return; 1938 } 1939 pg->pqflags |= PQ_INTENT_QUEUED; 1940 mutex_exit(&pg->interlock); 1941 uvmpdpol_pagerealize(pg); 1942 } 1943 1944 /* 1945 * uvm_pageunlock2: release two page interlocks, and for both pages if a 1946 * page replacement intent is set on the page, pass it to uvmpdpol to make 1947 * real. 1948 * 1949 * => caller must hold pg->interlock 1950 */ 1951 void 1952 uvm_pageunlock2(struct vm_page *pg1, struct vm_page *pg2) 1953 { 1954 1955 if ((pg1->pqflags & PQ_INTENT_SET) == 0 || 1956 (pg1->pqflags & PQ_INTENT_QUEUED) != 0) { 1957 mutex_exit(&pg1->interlock); 1958 pg1 = NULL; 1959 } else { 1960 pg1->pqflags |= PQ_INTENT_QUEUED; 1961 mutex_exit(&pg1->interlock); 1962 } 1963 1964 if ((pg2->pqflags & PQ_INTENT_SET) == 0 || 1965 (pg2->pqflags & PQ_INTENT_QUEUED) != 0) { 1966 mutex_exit(&pg2->interlock); 1967 pg2 = NULL; 1968 } else { 1969 pg2->pqflags |= PQ_INTENT_QUEUED; 1970 mutex_exit(&pg2->interlock); 1971 } 1972 1973 if (pg1 != NULL) { 1974 uvmpdpol_pagerealize(pg1); 1975 } 1976 if (pg2 != NULL) { 1977 uvmpdpol_pagerealize(pg2); 1978 } 1979 } 1980 1981 /* 1982 * uvm_pagezero: zero fill a page 1983 * 1984 * => if page is part of an object then the object should be locked 1985 * to protect pg->flags. 1986 */ 1987 1988 void 1989 uvm_pagezero(struct vm_page *pg) 1990 { 1991 1992 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1993 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1994 } 1995 1996 /* 1997 * uvm_pagecopy: copy a page 1998 * 1999 * => if page is part of an object then the object should be locked 2000 * to protect pg->flags. 2001 */ 2002 2003 void 2004 uvm_pagecopy(struct vm_page *src, struct vm_page *dst) 2005 { 2006 2007 uvm_pagemarkdirty(dst, UVM_PAGE_STATUS_DIRTY); 2008 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 2009 } 2010 2011 /* 2012 * uvm_pageismanaged: test it see that a page (specified by PA) is managed. 2013 */ 2014 2015 bool 2016 uvm_pageismanaged(paddr_t pa) 2017 { 2018 2019 return (uvm_physseg_find(atop(pa), NULL) != UVM_PHYSSEG_TYPE_INVALID); 2020 } 2021 2022 /* 2023 * uvm_page_lookup_freelist: look up the free list for the specified page 2024 */ 2025 2026 int 2027 uvm_page_lookup_freelist(struct vm_page *pg) 2028 { 2029 uvm_physseg_t upm; 2030 2031 upm = uvm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL); 2032 KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID); 2033 return uvm_physseg_get_free_list(upm); 2034 } 2035 2036 /* 2037 * uvm_page_owner_locked_p: return true if object associated with page is 2038 * locked. this is a weak check for runtime assertions only. 2039 */ 2040 2041 bool 2042 uvm_page_owner_locked_p(struct vm_page *pg, bool exclusive) 2043 { 2044 2045 if (pg->uobject != NULL) { 2046 return exclusive 2047 ? rw_write_held(pg->uobject->vmobjlock) 2048 : rw_lock_held(pg->uobject->vmobjlock); 2049 } 2050 if (pg->uanon != NULL) { 2051 return exclusive 2052 ? rw_write_held(pg->uanon->an_lock) 2053 : rw_lock_held(pg->uanon->an_lock); 2054 } 2055 return true; 2056 } 2057 2058 /* 2059 * uvm_pagereadonly_p: return if the page should be mapped read-only 2060 */ 2061 2062 bool 2063 uvm_pagereadonly_p(struct vm_page *pg) 2064 { 2065 struct uvm_object * const uobj = pg->uobject; 2066 2067 KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock)); 2068 KASSERT(uobj != NULL || rw_lock_held(pg->uanon->an_lock)); 2069 if ((pg->flags & PG_RDONLY) != 0) { 2070 return true; 2071 } 2072 if (uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) { 2073 return true; 2074 } 2075 if (uobj == NULL) { 2076 return false; 2077 } 2078 return UVM_OBJ_NEEDS_WRITEFAULT(uobj); 2079 } 2080 2081 #ifdef PMAP_DIRECT 2082 /* 2083 * Call pmap to translate physical address into a virtual and to run a callback 2084 * for it. Used to avoid actually mapping the pages, pmap most likely uses direct map 2085 * or equivalent. 2086 */ 2087 int 2088 uvm_direct_process(struct vm_page **pgs, u_int npages, voff_t off, vsize_t len, 2089 int (*process)(void *, size_t, void *), void *arg) 2090 { 2091 int error = 0; 2092 paddr_t pa; 2093 size_t todo; 2094 voff_t pgoff = (off & PAGE_MASK); 2095 struct vm_page *pg; 2096 2097 KASSERT(npages > 0); 2098 KASSERT(len > 0); 2099 2100 for (int i = 0; i < npages; i++) { 2101 pg = pgs[i]; 2102 2103 KASSERT(len > 0); 2104 2105 /* 2106 * Caller is responsible for ensuring all the pages are 2107 * available. 2108 */ 2109 KASSERT(pg != NULL); 2110 KASSERT(pg != PGO_DONTCARE); 2111 2112 pa = VM_PAGE_TO_PHYS(pg); 2113 todo = MIN(len, PAGE_SIZE - pgoff); 2114 2115 error = pmap_direct_process(pa, pgoff, todo, process, arg); 2116 if (error) 2117 break; 2118 2119 pgoff = 0; 2120 len -= todo; 2121 } 2122 2123 KASSERTMSG(error != 0 || len == 0, "len %lu != 0 for non-error", len); 2124 return error; 2125 } 2126 #endif /* PMAP_DIRECT */ 2127 2128 #if defined(DDB) || defined(DEBUGPRINT) 2129 2130 /* 2131 * uvm_page_printit: actually print the page 2132 */ 2133 2134 static const char page_flagbits[] = UVM_PGFLAGBITS; 2135 static const char page_pqflagbits[] = UVM_PQFLAGBITS; 2136 2137 void 2138 uvm_page_printit(struct vm_page *pg, bool full, 2139 void (*pr)(const char *, ...)) 2140 { 2141 struct vm_page *tpg; 2142 struct uvm_object *uobj; 2143 struct pgflbucket *pgb; 2144 struct pgflist *pgl; 2145 char pgbuf[128]; 2146 2147 (*pr)("PAGE %p:\n", pg); 2148 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); 2149 (*pr)(" flags=%s\n", pgbuf); 2150 snprintb(pgbuf, sizeof(pgbuf), page_pqflagbits, pg->pqflags); 2151 (*pr)(" pqflags=%s\n", pgbuf); 2152 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 2153 pg->uobject, pg->uanon, (long long)pg->offset); 2154 (*pr)(" loan_count=%d wire_count=%d bucket=%d freelist=%d\n", 2155 pg->loan_count, pg->wire_count, uvm_page_get_bucket(pg), 2156 uvm_page_get_freelist(pg)); 2157 (*pr)(" pa=0x%lx\n", (long)VM_PAGE_TO_PHYS(pg)); 2158 #if defined(UVM_PAGE_TRKOWN) 2159 if (pg->flags & PG_BUSY) 2160 (*pr)(" owning process = %d.%d, tag=%s\n", 2161 pg->owner, pg->lowner, pg->owner_tag); 2162 else 2163 (*pr)(" page not busy, no owner\n"); 2164 #else 2165 (*pr)(" [page ownership tracking disabled]\n"); 2166 #endif 2167 2168 if (!full) 2169 return; 2170 2171 /* cross-verify object/anon */ 2172 if ((pg->flags & PG_FREE) == 0) { 2173 if (pg->flags & PG_ANON) { 2174 if (pg->uanon == NULL || pg->uanon->an_page != pg) 2175 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2176 (pg->uanon) ? pg->uanon->an_page : NULL); 2177 else 2178 (*pr)(" anon backpointer is OK\n"); 2179 } else { 2180 uobj = pg->uobject; 2181 if (uobj) { 2182 (*pr)(" checking object list\n"); 2183 tpg = uvm_pagelookup(uobj, pg->offset); 2184 if (tpg) 2185 (*pr)(" page found on object list\n"); 2186 else 2187 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 2188 } 2189 } 2190 } 2191 2192 /* cross-verify page queue */ 2193 if (pg->flags & PG_FREE) { 2194 int fl = uvm_page_get_freelist(pg); 2195 int b = uvm_page_get_bucket(pg); 2196 pgb = uvm.page_free[fl].pgfl_buckets[b]; 2197 pgl = &pgb->pgb_colors[VM_PGCOLOR(pg)]; 2198 (*pr)(" checking pageq list\n"); 2199 LIST_FOREACH(tpg, pgl, pageq.list) { 2200 if (tpg == pg) { 2201 break; 2202 } 2203 } 2204 if (tpg) 2205 (*pr)(" page found on pageq list\n"); 2206 else 2207 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 2208 } 2209 } 2210 2211 /* 2212 * uvm_page_printall - print a summary of all managed pages 2213 */ 2214 2215 void 2216 uvm_page_printall(void (*pr)(const char *, ...)) 2217 { 2218 uvm_physseg_t i; 2219 paddr_t pfn; 2220 struct vm_page *pg; 2221 2222 (*pr)("%18s %4s %4s %18s %18s" 2223 #ifdef UVM_PAGE_TRKOWN 2224 " OWNER" 2225 #endif 2226 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 2227 for (i = uvm_physseg_get_first(); 2228 uvm_physseg_valid_p(i); 2229 i = uvm_physseg_get_next(i)) { 2230 for (pfn = uvm_physseg_get_start(i); 2231 pfn < uvm_physseg_get_end(i); 2232 pfn++) { 2233 pg = PHYS_TO_VM_PAGE(ptoa(pfn)); 2234 2235 (*pr)("%18p %04x %08x %18p %18p", 2236 pg, pg->flags, pg->pqflags, pg->uobject, 2237 pg->uanon); 2238 #ifdef UVM_PAGE_TRKOWN 2239 if (pg->flags & PG_BUSY) 2240 (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 2241 #endif 2242 (*pr)("\n"); 2243 } 2244 } 2245 } 2246 2247 /* 2248 * uvm_page_print_freelists - print a summary freelists 2249 */ 2250 2251 void 2252 uvm_page_print_freelists(void (*pr)(const char *, ...)) 2253 { 2254 struct pgfreelist *pgfl; 2255 struct pgflbucket *pgb; 2256 int fl, b, c; 2257 2258 (*pr)("There are %d freelists with %d buckets of %d colors.\n\n", 2259 VM_NFREELIST, uvm.bucketcount, uvmexp.ncolors); 2260 2261 for (fl = 0; fl < VM_NFREELIST; fl++) { 2262 pgfl = &uvm.page_free[fl]; 2263 (*pr)("freelist(%d) @ %p\n", fl, pgfl); 2264 for (b = 0; b < uvm.bucketcount; b++) { 2265 pgb = uvm.page_free[fl].pgfl_buckets[b]; 2266 (*pr)(" bucket(%d) @ %p, nfree = %d, lock @ %p:\n", 2267 b, pgb, pgb->pgb_nfree, 2268 &uvm_freelist_locks[b].lock); 2269 for (c = 0; c < uvmexp.ncolors; c++) { 2270 (*pr)(" color(%d) @ %p, ", c, 2271 &pgb->pgb_colors[c]); 2272 (*pr)("first page = %p\n", 2273 LIST_FIRST(&pgb->pgb_colors[c])); 2274 } 2275 } 2276 } 2277 } 2278 2279 #endif /* DDB || DEBUGPRINT */ 2280