1 /* $NetBSD: uvm_page.c,v 1.252 2023/04/09 09:00:56 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1997 Charles D. Cranor and Washington University. 34 * Copyright (c) 1991, 1993, The Regents of the University of California. 35 * 36 * All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * The Mach Operating System project at Carnegie-Mellon University. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 66 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 67 * 68 * 69 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 70 * All rights reserved. 71 * 72 * Permission to use, copy, modify and distribute this software and 73 * its documentation is hereby granted, provided that both the copyright 74 * notice and this permission notice appear in all copies of the 75 * software, derivative works or modified versions, and any portions 76 * thereof, and that both notices appear in supporting documentation. 77 * 78 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 79 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 80 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 81 * 82 * Carnegie Mellon requests users of this software to return to 83 * 84 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 85 * School of Computer Science 86 * Carnegie Mellon University 87 * Pittsburgh PA 15213-3890 88 * 89 * any improvements or extensions that they make and grant Carnegie the 90 * rights to redistribute these changes. 91 */ 92 93 /* 94 * uvm_page.c: page ops. 95 */ 96 97 #include <sys/cdefs.h> 98 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.252 2023/04/09 09:00:56 riastradh Exp $"); 99 100 #include "opt_ddb.h" 101 #include "opt_uvm.h" 102 #include "opt_uvmhist.h" 103 #include "opt_readahead.h" 104 105 #include <sys/param.h> 106 #include <sys/systm.h> 107 #include <sys/sched.h> 108 #include <sys/kernel.h> 109 #include <sys/vnode.h> 110 #include <sys/proc.h> 111 #include <sys/radixtree.h> 112 #include <sys/atomic.h> 113 #include <sys/cpu.h> 114 115 #include <ddb/db_active.h> 116 117 #include <uvm/uvm.h> 118 #include <uvm/uvm_ddb.h> 119 #include <uvm/uvm_pdpolicy.h> 120 #include <uvm/uvm_pgflcache.h> 121 122 /* 123 * number of pages per-CPU to reserve for the kernel. 124 */ 125 #ifndef UVM_RESERVED_PAGES_PER_CPU 126 #define UVM_RESERVED_PAGES_PER_CPU 5 127 #endif 128 int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU; 129 130 /* 131 * physical memory size; 132 */ 133 psize_t physmem; 134 135 /* 136 * local variables 137 */ 138 139 /* 140 * these variables record the values returned by vm_page_bootstrap, 141 * for debugging purposes. The implementation of uvm_pageboot_alloc 142 * and pmap_startup here also uses them internally. 143 */ 144 145 static vaddr_t virtual_space_start; 146 static vaddr_t virtual_space_end; 147 148 /* 149 * we allocate an initial number of page colors in uvm_page_init(), 150 * and remember them. We may re-color pages as cache sizes are 151 * discovered during the autoconfiguration phase. But we can never 152 * free the initial set of buckets, since they are allocated using 153 * uvm_pageboot_alloc(). 154 */ 155 156 static size_t recolored_pages_memsize /* = 0 */; 157 static char *recolored_pages_mem; 158 159 /* 160 * freelist locks - one per bucket. 161 */ 162 163 union uvm_freelist_lock uvm_freelist_locks[PGFL_MAX_BUCKETS] 164 __cacheline_aligned; 165 166 /* 167 * basic NUMA information. 168 */ 169 170 static struct uvm_page_numa_region { 171 struct uvm_page_numa_region *next; 172 paddr_t start; 173 paddr_t size; 174 u_int numa_id; 175 } *uvm_page_numa_region; 176 177 #ifdef DEBUG 178 kmutex_t uvm_zerochecklock __cacheline_aligned; 179 vaddr_t uvm_zerocheckkva; 180 #endif /* DEBUG */ 181 182 /* 183 * These functions are reserved for uvm(9) internal use and are not 184 * exported in the header file uvm_physseg.h 185 * 186 * Thus they are redefined here. 187 */ 188 void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *); 189 void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t); 190 191 /* returns a pgs array */ 192 struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t); 193 194 /* 195 * inline functions 196 */ 197 198 /* 199 * uvm_pageinsert: insert a page in the object. 200 * 201 * => caller must lock object 202 * => call should have already set pg's object and offset pointers 203 * and bumped the version counter 204 */ 205 206 static inline void 207 uvm_pageinsert_object(struct uvm_object *uobj, struct vm_page *pg) 208 { 209 210 KASSERT(uobj == pg->uobject); 211 KASSERT(rw_write_held(uobj->vmobjlock)); 212 KASSERT((pg->flags & PG_TABLED) == 0); 213 214 if ((pg->flags & PG_STAT) != 0) { 215 /* Cannot use uvm_pagegetdirty(): not yet in radix tree. */ 216 const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 217 218 if ((pg->flags & PG_FILE) != 0) { 219 if (uobj->uo_npages == 0) { 220 struct vnode *vp = (struct vnode *)uobj; 221 mutex_enter(vp->v_interlock); 222 KASSERT((vp->v_iflag & VI_PAGES) == 0); 223 vp->v_iflag |= VI_PAGES; 224 vholdl(vp); 225 mutex_exit(vp->v_interlock); 226 } 227 if (UVM_OBJ_IS_VTEXT(uobj)) { 228 cpu_count(CPU_COUNT_EXECPAGES, 1); 229 } 230 cpu_count(CPU_COUNT_FILEUNKNOWN + status, 1); 231 } else { 232 cpu_count(CPU_COUNT_ANONUNKNOWN + status, 1); 233 } 234 } 235 pg->flags |= PG_TABLED; 236 uobj->uo_npages++; 237 } 238 239 static inline int 240 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg) 241 { 242 const uint64_t idx = pg->offset >> PAGE_SHIFT; 243 int error; 244 245 KASSERT(rw_write_held(uobj->vmobjlock)); 246 247 error = radix_tree_insert_node(&uobj->uo_pages, idx, pg); 248 if (error != 0) { 249 return error; 250 } 251 if ((pg->flags & PG_CLEAN) == 0) { 252 uvm_obj_page_set_dirty(pg); 253 } 254 KASSERT(((pg->flags & PG_CLEAN) == 0) == 255 uvm_obj_page_dirty_p(pg)); 256 return 0; 257 } 258 259 /* 260 * uvm_page_remove: remove page from object. 261 * 262 * => caller must lock object 263 */ 264 265 static inline void 266 uvm_pageremove_object(struct uvm_object *uobj, struct vm_page *pg) 267 { 268 269 KASSERT(uobj == pg->uobject); 270 KASSERT(rw_write_held(uobj->vmobjlock)); 271 KASSERT(pg->flags & PG_TABLED); 272 273 if ((pg->flags & PG_STAT) != 0) { 274 /* Cannot use uvm_pagegetdirty(): no longer in radix tree. */ 275 const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 276 277 if ((pg->flags & PG_FILE) != 0) { 278 if (uobj->uo_npages == 1) { 279 struct vnode *vp = (struct vnode *)uobj; 280 mutex_enter(vp->v_interlock); 281 KASSERT((vp->v_iflag & VI_PAGES) != 0); 282 vp->v_iflag &= ~VI_PAGES; 283 holdrelel(vp); 284 mutex_exit(vp->v_interlock); 285 } 286 if (UVM_OBJ_IS_VTEXT(uobj)) { 287 cpu_count(CPU_COUNT_EXECPAGES, -1); 288 } 289 cpu_count(CPU_COUNT_FILEUNKNOWN + status, -1); 290 } else { 291 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 292 } 293 } 294 uobj->uo_npages--; 295 pg->flags &= ~PG_TABLED; 296 pg->uobject = NULL; 297 } 298 299 static inline void 300 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg) 301 { 302 struct vm_page *opg __unused; 303 304 KASSERT(rw_write_held(uobj->vmobjlock)); 305 306 opg = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT); 307 KASSERT(pg == opg); 308 } 309 310 static void 311 uvm_page_init_bucket(struct pgfreelist *pgfl, struct pgflbucket *pgb, int num) 312 { 313 int i; 314 315 pgb->pgb_nfree = 0; 316 for (i = 0; i < uvmexp.ncolors; i++) { 317 LIST_INIT(&pgb->pgb_colors[i]); 318 } 319 pgfl->pgfl_buckets[num] = pgb; 320 } 321 322 /* 323 * uvm_page_init: init the page system. called from uvm_init(). 324 * 325 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 326 */ 327 328 void 329 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) 330 { 331 static struct uvm_cpu boot_cpu __cacheline_aligned; 332 psize_t freepages, pagecount, bucketsize, n; 333 struct pgflbucket *pgb; 334 struct vm_page *pagearray; 335 char *bucketarray; 336 uvm_physseg_t bank; 337 int fl, b; 338 339 KASSERT(ncpu <= 1); 340 341 /* 342 * init the page queues and free page queue locks, except the 343 * free list; we allocate that later (with the initial vm_page 344 * structures). 345 */ 346 347 curcpu()->ci_data.cpu_uvm = &boot_cpu; 348 uvmpdpol_init(); 349 for (b = 0; b < __arraycount(uvm_freelist_locks); b++) { 350 mutex_init(&uvm_freelist_locks[b].lock, MUTEX_DEFAULT, IPL_VM); 351 } 352 353 /* 354 * allocate vm_page structures. 355 */ 356 357 /* 358 * sanity check: 359 * before calling this function the MD code is expected to register 360 * some free RAM with the uvm_page_physload() function. our job 361 * now is to allocate vm_page structures for this memory. 362 */ 363 364 if (uvm_physseg_get_last() == UVM_PHYSSEG_TYPE_INVALID) 365 panic("uvm_page_bootstrap: no memory pre-allocated"); 366 367 /* 368 * first calculate the number of free pages... 369 * 370 * note that we use start/end rather than avail_start/avail_end. 371 * this allows us to allocate extra vm_page structures in case we 372 * want to return some memory to the pool after booting. 373 */ 374 375 freepages = 0; 376 377 for (bank = uvm_physseg_get_first(); 378 uvm_physseg_valid_p(bank) ; 379 bank = uvm_physseg_get_next(bank)) { 380 freepages += (uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank)); 381 } 382 383 /* 384 * Let MD code initialize the number of colors, or default 385 * to 1 color if MD code doesn't care. 386 */ 387 if (uvmexp.ncolors == 0) 388 uvmexp.ncolors = 1; 389 uvmexp.colormask = uvmexp.ncolors - 1; 390 KASSERT((uvmexp.colormask & uvmexp.ncolors) == 0); 391 392 /* We always start with only 1 bucket. */ 393 uvm.bucketcount = 1; 394 395 /* 396 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 397 * use. for each page of memory we use we need a vm_page structure. 398 * thus, the total number of pages we can use is the total size of 399 * the memory divided by the PAGE_SIZE plus the size of the vm_page 400 * structure. we add one to freepages as a fudge factor to avoid 401 * truncation errors (since we can only allocate in terms of whole 402 * pages). 403 */ 404 pagecount = ((freepages + 1) << PAGE_SHIFT) / 405 (PAGE_SIZE + sizeof(struct vm_page)); 406 bucketsize = offsetof(struct pgflbucket, pgb_colors[uvmexp.ncolors]); 407 bucketsize = roundup2(bucketsize, coherency_unit); 408 bucketarray = (void *)uvm_pageboot_alloc( 409 bucketsize * VM_NFREELIST + 410 pagecount * sizeof(struct vm_page)); 411 pagearray = (struct vm_page *) 412 (bucketarray + bucketsize * VM_NFREELIST); 413 414 for (fl = 0; fl < VM_NFREELIST; fl++) { 415 pgb = (struct pgflbucket *)(bucketarray + bucketsize * fl); 416 uvm_page_init_bucket(&uvm.page_free[fl], pgb, 0); 417 } 418 memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 419 420 /* 421 * init the freelist cache in the disabled state. 422 */ 423 uvm_pgflcache_init(); 424 425 /* 426 * init the vm_page structures and put them in the correct place. 427 */ 428 /* First init the extent */ 429 430 for (bank = uvm_physseg_get_first(), 431 uvm_physseg_seg_chomp_slab(bank, pagearray, pagecount); 432 uvm_physseg_valid_p(bank); 433 bank = uvm_physseg_get_next(bank)) { 434 435 n = uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank); 436 uvm_physseg_seg_alloc_from_slab(bank, n); 437 uvm_physseg_init_seg(bank, pagearray); 438 439 /* set up page array pointers */ 440 pagearray += n; 441 pagecount -= n; 442 } 443 444 /* 445 * pass up the values of virtual_space_start and 446 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 447 * layers of the VM. 448 */ 449 450 *kvm_startp = round_page(virtual_space_start); 451 *kvm_endp = trunc_page(virtual_space_end); 452 453 /* 454 * init various thresholds. 455 */ 456 457 uvmexp.reserve_pagedaemon = 1; 458 uvmexp.reserve_kernel = vm_page_reserve_kernel; 459 460 /* 461 * done! 462 */ 463 464 uvm.page_init_done = true; 465 } 466 467 /* 468 * uvm_pgfl_lock: lock all freelist buckets 469 */ 470 471 void 472 uvm_pgfl_lock(void) 473 { 474 int i; 475 476 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 477 mutex_spin_enter(&uvm_freelist_locks[i].lock); 478 } 479 } 480 481 /* 482 * uvm_pgfl_unlock: unlock all freelist buckets 483 */ 484 485 void 486 uvm_pgfl_unlock(void) 487 { 488 int i; 489 490 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 491 mutex_spin_exit(&uvm_freelist_locks[i].lock); 492 } 493 } 494 495 /* 496 * uvm_setpagesize: set the page size 497 * 498 * => sets page_shift and page_mask from uvmexp.pagesize. 499 */ 500 501 void 502 uvm_setpagesize(void) 503 { 504 505 /* 506 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE 507 * to be a constant (indicated by being a non-zero value). 508 */ 509 if (uvmexp.pagesize == 0) { 510 if (PAGE_SIZE == 0) 511 panic("uvm_setpagesize: uvmexp.pagesize not set"); 512 uvmexp.pagesize = PAGE_SIZE; 513 } 514 uvmexp.pagemask = uvmexp.pagesize - 1; 515 if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 516 panic("uvm_setpagesize: page size %u (%#x) not a power of two", 517 uvmexp.pagesize, uvmexp.pagesize); 518 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 519 if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 520 break; 521 } 522 523 /* 524 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 525 */ 526 527 vaddr_t 528 uvm_pageboot_alloc(vsize_t size) 529 { 530 static bool initialized = false; 531 vaddr_t addr; 532 #if !defined(PMAP_STEAL_MEMORY) 533 vaddr_t vaddr; 534 paddr_t paddr; 535 #endif 536 537 /* 538 * on first call to this function, initialize ourselves. 539 */ 540 if (initialized == false) { 541 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 542 543 /* round it the way we like it */ 544 virtual_space_start = round_page(virtual_space_start); 545 virtual_space_end = trunc_page(virtual_space_end); 546 547 initialized = true; 548 } 549 550 /* round to page size */ 551 size = round_page(size); 552 uvmexp.bootpages += atop(size); 553 554 #if defined(PMAP_STEAL_MEMORY) 555 556 /* 557 * defer bootstrap allocation to MD code (it may want to allocate 558 * from a direct-mapped segment). pmap_steal_memory should adjust 559 * virtual_space_start/virtual_space_end if necessary. 560 */ 561 562 addr = pmap_steal_memory(size, &virtual_space_start, 563 &virtual_space_end); 564 565 return addr; 566 567 #else /* !PMAP_STEAL_MEMORY */ 568 569 /* 570 * allocate virtual memory for this request 571 */ 572 if (virtual_space_start == virtual_space_end || 573 (virtual_space_end - virtual_space_start) < size) 574 panic("uvm_pageboot_alloc: out of virtual space"); 575 576 addr = virtual_space_start; 577 578 #ifdef PMAP_GROWKERNEL 579 /* 580 * If the kernel pmap can't map the requested space, 581 * then allocate more resources for it. 582 */ 583 if (uvm_maxkaddr < (addr + size)) { 584 uvm_maxkaddr = pmap_growkernel(addr + size); 585 if (uvm_maxkaddr < (addr + size)) 586 panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 587 } 588 #endif 589 590 virtual_space_start += size; 591 592 /* 593 * allocate and mapin physical pages to back new virtual pages 594 */ 595 596 for (vaddr = round_page(addr) ; vaddr < addr + size ; 597 vaddr += PAGE_SIZE) { 598 599 if (!uvm_page_physget(&paddr)) 600 panic("uvm_pageboot_alloc: out of memory"); 601 602 /* 603 * Note this memory is no longer managed, so using 604 * pmap_kenter is safe. 605 */ 606 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 607 } 608 pmap_update(pmap_kernel()); 609 return addr; 610 #endif /* PMAP_STEAL_MEMORY */ 611 } 612 613 #if !defined(PMAP_STEAL_MEMORY) 614 /* 615 * uvm_page_physget: "steal" one page from the vm_physmem structure. 616 * 617 * => attempt to allocate it off the end of a segment in which the "avail" 618 * values match the start/end values. if we can't do that, then we 619 * will advance both values (making them equal, and removing some 620 * vm_page structures from the non-avail area). 621 * => return false if out of memory. 622 */ 623 624 /* subroutine: try to allocate from memory chunks on the specified freelist */ 625 static bool uvm_page_physget_freelist(paddr_t *, int); 626 627 static bool 628 uvm_page_physget_freelist(paddr_t *paddrp, int freelist) 629 { 630 uvm_physseg_t lcv; 631 632 /* pass 1: try allocating from a matching end */ 633 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 634 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 635 #else 636 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 637 #endif 638 { 639 if (uvm.page_init_done == true) 640 panic("uvm_page_physget: called _after_ bootstrap"); 641 642 /* Try to match at front or back on unused segment */ 643 if (uvm_page_physunload(lcv, freelist, paddrp)) 644 return true; 645 } 646 647 /* pass2: forget about matching ends, just allocate something */ 648 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 649 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 650 #else 651 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 652 #endif 653 { 654 /* Try the front regardless. */ 655 if (uvm_page_physunload_force(lcv, freelist, paddrp)) 656 return true; 657 } 658 return false; 659 } 660 661 bool 662 uvm_page_physget(paddr_t *paddrp) 663 { 664 int i; 665 666 /* try in the order of freelist preference */ 667 for (i = 0; i < VM_NFREELIST; i++) 668 if (uvm_page_physget_freelist(paddrp, i) == true) 669 return (true); 670 return (false); 671 } 672 #endif /* PMAP_STEAL_MEMORY */ 673 674 /* 675 * PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages 676 * back from an I/O mapping (ugh!). used in some MD code as well. 677 */ 678 struct vm_page * 679 uvm_phys_to_vm_page(paddr_t pa) 680 { 681 paddr_t pf = atop(pa); 682 paddr_t off; 683 uvm_physseg_t upm; 684 685 upm = uvm_physseg_find(pf, &off); 686 if (upm != UVM_PHYSSEG_TYPE_INVALID) 687 return uvm_physseg_get_pg(upm, off); 688 return(NULL); 689 } 690 691 paddr_t 692 uvm_vm_page_to_phys(const struct vm_page *pg) 693 { 694 695 return pg->phys_addr & ~(PAGE_SIZE - 1); 696 } 697 698 /* 699 * uvm_page_numa_load: load NUMA range description. 700 */ 701 void 702 uvm_page_numa_load(paddr_t start, paddr_t size, u_int numa_id) 703 { 704 struct uvm_page_numa_region *d; 705 706 KASSERT(numa_id < PGFL_MAX_BUCKETS); 707 708 d = kmem_alloc(sizeof(*d), KM_SLEEP); 709 d->start = start; 710 d->size = size; 711 d->numa_id = numa_id; 712 d->next = uvm_page_numa_region; 713 uvm_page_numa_region = d; 714 } 715 716 /* 717 * uvm_page_numa_lookup: lookup NUMA node for the given page. 718 */ 719 static u_int 720 uvm_page_numa_lookup(struct vm_page *pg) 721 { 722 struct uvm_page_numa_region *d; 723 static bool warned; 724 paddr_t pa; 725 726 KASSERT(uvm_page_numa_region != NULL); 727 728 pa = VM_PAGE_TO_PHYS(pg); 729 for (d = uvm_page_numa_region; d != NULL; d = d->next) { 730 if (pa >= d->start && pa < d->start + d->size) { 731 return d->numa_id; 732 } 733 } 734 735 if (!warned) { 736 printf("uvm_page_numa_lookup: failed, first pg=%p pa=%#" 737 PRIxPADDR "\n", pg, VM_PAGE_TO_PHYS(pg)); 738 warned = true; 739 } 740 741 return 0; 742 } 743 744 /* 745 * uvm_page_redim: adjust freelist dimensions if they have changed. 746 */ 747 748 static void 749 uvm_page_redim(int newncolors, int newnbuckets) 750 { 751 struct pgfreelist npgfl; 752 struct pgflbucket *opgb, *npgb; 753 struct pgflist *ohead, *nhead; 754 struct vm_page *pg; 755 size_t bucketsize, bucketmemsize, oldbucketmemsize; 756 int fl, ob, oc, nb, nc, obuckets, ocolors; 757 char *bucketarray, *oldbucketmem, *bucketmem; 758 759 KASSERT(((newncolors - 1) & newncolors) == 0); 760 761 /* Anything to do? */ 762 if (newncolors <= uvmexp.ncolors && 763 newnbuckets == uvm.bucketcount) { 764 return; 765 } 766 if (uvm.page_init_done == false) { 767 uvmexp.ncolors = newncolors; 768 return; 769 } 770 771 bucketsize = offsetof(struct pgflbucket, pgb_colors[newncolors]); 772 bucketsize = roundup2(bucketsize, coherency_unit); 773 bucketmemsize = bucketsize * newnbuckets * VM_NFREELIST + 774 coherency_unit - 1; 775 bucketmem = kmem_zalloc(bucketmemsize, KM_SLEEP); 776 bucketarray = (char *)roundup2((uintptr_t)bucketmem, coherency_unit); 777 778 ocolors = uvmexp.ncolors; 779 obuckets = uvm.bucketcount; 780 781 /* Freelist cache musn't be enabled. */ 782 uvm_pgflcache_pause(); 783 784 /* Make sure we should still do this. */ 785 uvm_pgfl_lock(); 786 if (newncolors <= uvmexp.ncolors && 787 newnbuckets == uvm.bucketcount) { 788 uvm_pgfl_unlock(); 789 uvm_pgflcache_resume(); 790 kmem_free(bucketmem, bucketmemsize); 791 return; 792 } 793 794 uvmexp.ncolors = newncolors; 795 uvmexp.colormask = uvmexp.ncolors - 1; 796 uvm.bucketcount = newnbuckets; 797 798 for (fl = 0; fl < VM_NFREELIST; fl++) { 799 /* Init new buckets in new freelist. */ 800 memset(&npgfl, 0, sizeof(npgfl)); 801 for (nb = 0; nb < newnbuckets; nb++) { 802 npgb = (struct pgflbucket *)bucketarray; 803 uvm_page_init_bucket(&npgfl, npgb, nb); 804 bucketarray += bucketsize; 805 } 806 /* Now transfer pages from the old freelist. */ 807 for (nb = ob = 0; ob < obuckets; ob++) { 808 opgb = uvm.page_free[fl].pgfl_buckets[ob]; 809 for (oc = 0; oc < ocolors; oc++) { 810 ohead = &opgb->pgb_colors[oc]; 811 while ((pg = LIST_FIRST(ohead)) != NULL) { 812 LIST_REMOVE(pg, pageq.list); 813 /* 814 * Here we decide on the NEW color & 815 * bucket for the page. For NUMA 816 * we'll use the info that the 817 * hardware gave us. For non-NUMA 818 * assign take physical page frame 819 * number and cache color into 820 * account. We do this to try and 821 * avoid defeating any memory 822 * interleaving in the hardware. 823 */ 824 KASSERT( 825 uvm_page_get_bucket(pg) == ob); 826 KASSERT(fl == 827 uvm_page_get_freelist(pg)); 828 if (uvm_page_numa_region != NULL) { 829 nb = uvm_page_numa_lookup(pg); 830 } else { 831 nb = atop(VM_PAGE_TO_PHYS(pg)) 832 / uvmexp.ncolors / 8 833 % newnbuckets; 834 } 835 uvm_page_set_bucket(pg, nb); 836 npgb = npgfl.pgfl_buckets[nb]; 837 npgb->pgb_nfree++; 838 nc = VM_PGCOLOR(pg); 839 nhead = &npgb->pgb_colors[nc]; 840 LIST_INSERT_HEAD(nhead, pg, pageq.list); 841 } 842 } 843 } 844 /* Install the new freelist. */ 845 memcpy(&uvm.page_free[fl], &npgfl, sizeof(npgfl)); 846 } 847 848 /* Unlock and free the old memory. */ 849 oldbucketmemsize = recolored_pages_memsize; 850 oldbucketmem = recolored_pages_mem; 851 recolored_pages_memsize = bucketmemsize; 852 recolored_pages_mem = bucketmem; 853 854 uvm_pgfl_unlock(); 855 uvm_pgflcache_resume(); 856 857 if (oldbucketmemsize) { 858 kmem_free(oldbucketmem, oldbucketmemsize); 859 } 860 861 /* 862 * this calls uvm_km_alloc() which may want to hold 863 * uvm_freelist_lock. 864 */ 865 uvm_pager_realloc_emerg(); 866 } 867 868 /* 869 * uvm_page_recolor: Recolor the pages if the new color count is 870 * larger than the old one. 871 */ 872 873 void 874 uvm_page_recolor(int newncolors) 875 { 876 877 uvm_page_redim(newncolors, uvm.bucketcount); 878 } 879 880 /* 881 * uvm_page_rebucket: Determine a bucket structure and redim the free 882 * lists to match. 883 */ 884 885 void 886 uvm_page_rebucket(void) 887 { 888 u_int min_numa, max_numa, npackage, shift; 889 struct cpu_info *ci, *ci2, *ci3; 890 CPU_INFO_ITERATOR cii; 891 892 /* 893 * If we have more than one NUMA node, and the maximum NUMA node ID 894 * is less than PGFL_MAX_BUCKETS, then we'll use NUMA distribution 895 * for free pages. 896 */ 897 min_numa = (u_int)-1; 898 max_numa = 0; 899 for (CPU_INFO_FOREACH(cii, ci)) { 900 if (ci->ci_numa_id < min_numa) { 901 min_numa = ci->ci_numa_id; 902 } 903 if (ci->ci_numa_id > max_numa) { 904 max_numa = ci->ci_numa_id; 905 } 906 } 907 if (min_numa != max_numa && max_numa < PGFL_MAX_BUCKETS) { 908 aprint_debug("UVM: using NUMA allocation scheme\n"); 909 for (CPU_INFO_FOREACH(cii, ci)) { 910 ci->ci_data.cpu_uvm->pgflbucket = ci->ci_numa_id; 911 } 912 uvm_page_redim(uvmexp.ncolors, max_numa + 1); 913 return; 914 } 915 916 /* 917 * Otherwise we'll go with a scheme to maximise L2/L3 cache locality 918 * and minimise lock contention. Count the total number of CPU 919 * packages, and then try to distribute the buckets among CPU 920 * packages evenly. 921 */ 922 npackage = curcpu()->ci_nsibling[CPUREL_PACKAGE1ST]; 923 924 /* 925 * Figure out how to arrange the packages & buckets, and the total 926 * number of buckets we need. XXX 2 may not be the best factor. 927 */ 928 for (shift = 0; npackage > PGFL_MAX_BUCKETS; shift++) { 929 npackage >>= 1; 930 } 931 uvm_page_redim(uvmexp.ncolors, npackage); 932 933 /* 934 * Now tell each CPU which bucket to use. In the outer loop, scroll 935 * through all CPU packages. 936 */ 937 npackage = 0; 938 ci = curcpu(); 939 ci2 = ci->ci_sibling[CPUREL_PACKAGE1ST]; 940 do { 941 /* 942 * In the inner loop, scroll through all CPUs in the package 943 * and assign the same bucket ID. 944 */ 945 ci3 = ci2; 946 do { 947 ci3->ci_data.cpu_uvm->pgflbucket = npackage >> shift; 948 ci3 = ci3->ci_sibling[CPUREL_PACKAGE]; 949 } while (ci3 != ci2); 950 npackage++; 951 ci2 = ci2->ci_sibling[CPUREL_PACKAGE1ST]; 952 } while (ci2 != ci->ci_sibling[CPUREL_PACKAGE1ST]); 953 954 aprint_debug("UVM: using package allocation scheme, " 955 "%d package(s) per bucket\n", 1 << shift); 956 } 957 958 /* 959 * uvm_cpu_attach: initialize per-CPU data structures. 960 */ 961 962 void 963 uvm_cpu_attach(struct cpu_info *ci) 964 { 965 struct uvm_cpu *ucpu; 966 967 /* Already done in uvm_page_init(). */ 968 if (!CPU_IS_PRIMARY(ci)) { 969 /* Add more reserve pages for this CPU. */ 970 uvmexp.reserve_kernel += vm_page_reserve_kernel; 971 972 /* Allocate per-CPU data structures. */ 973 ucpu = kmem_zalloc(sizeof(struct uvm_cpu) + coherency_unit - 1, 974 KM_SLEEP); 975 ucpu = (struct uvm_cpu *)roundup2((uintptr_t)ucpu, 976 coherency_unit); 977 ci->ci_data.cpu_uvm = ucpu; 978 } else { 979 ucpu = ci->ci_data.cpu_uvm; 980 } 981 982 uvmpdpol_init_cpu(ucpu); 983 984 /* 985 * Attach RNG source for this CPU's VM events 986 */ 987 rnd_attach_source(&ucpu->rs, ci->ci_data.cpu_name, RND_TYPE_VM, 988 RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE| 989 RND_FLAG_ESTIMATE_VALUE); 990 } 991 992 /* 993 * uvm_availmem: fetch the total amount of free memory in pages. this can 994 * have a detrimental effect on performance due to false sharing; don't call 995 * unless needed. 996 * 997 * some users can request the amount of free memory so often that it begins 998 * to impact upon performance. if calling frequently and an inexact value 999 * is okay, call with cached = true. 1000 */ 1001 1002 int 1003 uvm_availmem(bool cached) 1004 { 1005 int64_t fp; 1006 1007 cpu_count_sync(cached); 1008 if ((fp = cpu_count_get(CPU_COUNT_FREEPAGES)) < 0) { 1009 /* 1010 * XXXAD could briefly go negative because it's impossible 1011 * to get a clean snapshot. address this for other counters 1012 * used as running totals before NetBSD 10 although less 1013 * important for those. 1014 */ 1015 fp = 0; 1016 } 1017 return (int)fp; 1018 } 1019 1020 /* 1021 * uvm_pagealloc_pgb: helper routine that tries to allocate any color from a 1022 * specific freelist and specific bucket only. 1023 * 1024 * => must be at IPL_VM or higher to protect per-CPU data structures. 1025 */ 1026 1027 static struct vm_page * 1028 uvm_pagealloc_pgb(struct uvm_cpu *ucpu, int f, int b, int *trycolorp, int flags) 1029 { 1030 int c, trycolor, colormask; 1031 struct pgflbucket *pgb; 1032 struct vm_page *pg; 1033 kmutex_t *lock; 1034 bool fill; 1035 1036 /* 1037 * Skip the bucket if empty, no lock needed. There could be many 1038 * empty freelists/buckets. 1039 */ 1040 pgb = uvm.page_free[f].pgfl_buckets[b]; 1041 if (pgb->pgb_nfree == 0) { 1042 return NULL; 1043 } 1044 1045 /* Skip bucket if low on memory. */ 1046 lock = &uvm_freelist_locks[b].lock; 1047 mutex_spin_enter(lock); 1048 if (__predict_false(pgb->pgb_nfree <= uvmexp.reserve_kernel)) { 1049 if ((flags & UVM_PGA_USERESERVE) == 0 || 1050 (pgb->pgb_nfree <= uvmexp.reserve_pagedaemon && 1051 curlwp != uvm.pagedaemon_lwp)) { 1052 mutex_spin_exit(lock); 1053 return NULL; 1054 } 1055 fill = false; 1056 } else { 1057 fill = true; 1058 } 1059 1060 /* Try all page colors as needed. */ 1061 c = trycolor = *trycolorp; 1062 colormask = uvmexp.colormask; 1063 do { 1064 pg = LIST_FIRST(&pgb->pgb_colors[c]); 1065 if (__predict_true(pg != NULL)) { 1066 /* 1067 * Got a free page! PG_FREE must be cleared under 1068 * lock because of uvm_pglistalloc(). 1069 */ 1070 LIST_REMOVE(pg, pageq.list); 1071 KASSERT(pg->flags == PG_FREE); 1072 pg->flags = PG_BUSY | PG_CLEAN | PG_FAKE; 1073 pgb->pgb_nfree--; 1074 CPU_COUNT(CPU_COUNT_FREEPAGES, -1); 1075 1076 /* 1077 * While we have the bucket locked and our data 1078 * structures fresh in L1 cache, we have an ideal 1079 * opportunity to grab some pages for the freelist 1080 * cache without causing extra contention. Only do 1081 * so if we found pages in this CPU's preferred 1082 * bucket. 1083 */ 1084 if (__predict_true(b == ucpu->pgflbucket && fill)) { 1085 uvm_pgflcache_fill(ucpu, f, b, c); 1086 } 1087 mutex_spin_exit(lock); 1088 KASSERT(uvm_page_get_bucket(pg) == b); 1089 CPU_COUNT(c == trycolor ? 1090 CPU_COUNT_COLORHIT : CPU_COUNT_COLORMISS, 1); 1091 CPU_COUNT(CPU_COUNT_CPUMISS, 1); 1092 *trycolorp = c; 1093 return pg; 1094 } 1095 c = (c + 1) & colormask; 1096 } while (c != trycolor); 1097 mutex_spin_exit(lock); 1098 1099 return NULL; 1100 } 1101 1102 /* 1103 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat that allocates 1104 * any color from any bucket, in a specific freelist. 1105 * 1106 * => must be at IPL_VM or higher to protect per-CPU data structures. 1107 */ 1108 1109 static struct vm_page * 1110 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int f, int *trycolorp, int flags) 1111 { 1112 int b, trybucket, bucketcount; 1113 struct vm_page *pg; 1114 1115 /* Try for the exact thing in the per-CPU cache. */ 1116 if ((pg = uvm_pgflcache_alloc(ucpu, f, *trycolorp)) != NULL) { 1117 CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1118 CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1119 return pg; 1120 } 1121 1122 /* Walk through all buckets, trying our preferred bucket first. */ 1123 trybucket = ucpu->pgflbucket; 1124 b = trybucket; 1125 bucketcount = uvm.bucketcount; 1126 do { 1127 pg = uvm_pagealloc_pgb(ucpu, f, b, trycolorp, flags); 1128 if (pg != NULL) { 1129 return pg; 1130 } 1131 b = (b + 1 == bucketcount ? 0 : b + 1); 1132 } while (b != trybucket); 1133 1134 return NULL; 1135 } 1136 1137 /* 1138 * uvm_pagealloc_strat: allocate vm_page from a particular free list. 1139 * 1140 * => return null if no pages free 1141 * => wake up pagedaemon if number of free pages drops below low water mark 1142 * => if obj != NULL, obj must be locked (to put in obj's tree) 1143 * => if anon != NULL, anon must be locked (to put in anon) 1144 * => only one of obj or anon can be non-null 1145 * => caller must activate/deactivate page if it is not wired. 1146 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 1147 * => policy decision: it is more important to pull a page off of the 1148 * appropriate priority free list than it is to get a page from the 1149 * correct bucket or color bin. This is because we live with the 1150 * consequences of a bad free list decision for the entire 1151 * lifetime of the page, e.g. if the page comes from memory that 1152 * is slower to access. 1153 */ 1154 1155 struct vm_page * 1156 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, 1157 int flags, int strat, int free_list) 1158 { 1159 int color, lcv, error, s; 1160 struct uvm_cpu *ucpu; 1161 struct vm_page *pg; 1162 lwp_t *l; 1163 1164 KASSERT(obj == NULL || anon == NULL); 1165 KASSERT(anon == NULL || (flags & UVM_FLAG_COLORMATCH) || off == 0); 1166 KASSERT(off == trunc_page(off)); 1167 KASSERT(obj == NULL || rw_write_held(obj->vmobjlock)); 1168 KASSERT(anon == NULL || anon->an_lock == NULL || 1169 rw_write_held(anon->an_lock)); 1170 1171 /* 1172 * This implements a global round-robin page coloring 1173 * algorithm. 1174 */ 1175 1176 s = splvm(); 1177 ucpu = curcpu()->ci_data.cpu_uvm; 1178 if (flags & UVM_FLAG_COLORMATCH) { 1179 color = atop(off) & uvmexp.colormask; 1180 } else { 1181 color = ucpu->pgflcolor; 1182 } 1183 1184 /* 1185 * fail if any of these conditions is true: 1186 * [1] there really are no free pages, or 1187 * [2] only kernel "reserved" pages remain and 1188 * reserved pages have not been requested. 1189 * [3] only pagedaemon "reserved" pages remain and 1190 * the requestor isn't the pagedaemon. 1191 * we make kernel reserve pages available if called by a 1192 * kernel thread. 1193 */ 1194 l = curlwp; 1195 if (__predict_true(l != NULL) && (l->l_flag & LW_SYSTEM) != 0) { 1196 flags |= UVM_PGA_USERESERVE; 1197 } 1198 1199 again: 1200 switch (strat) { 1201 case UVM_PGA_STRAT_NORMAL: 1202 /* Check freelists: descending priority (ascending id) order. */ 1203 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1204 pg = uvm_pagealloc_pgfl(ucpu, lcv, &color, flags); 1205 if (pg != NULL) { 1206 goto gotit; 1207 } 1208 } 1209 1210 /* No pages free! Have pagedaemon free some memory. */ 1211 splx(s); 1212 uvm_kick_pdaemon(); 1213 return NULL; 1214 1215 case UVM_PGA_STRAT_ONLY: 1216 case UVM_PGA_STRAT_FALLBACK: 1217 /* Attempt to allocate from the specified free list. */ 1218 KASSERT(free_list >= 0); 1219 KASSERT(free_list < VM_NFREELIST); 1220 pg = uvm_pagealloc_pgfl(ucpu, free_list, &color, flags); 1221 if (pg != NULL) { 1222 goto gotit; 1223 } 1224 1225 /* Fall back, if possible. */ 1226 if (strat == UVM_PGA_STRAT_FALLBACK) { 1227 strat = UVM_PGA_STRAT_NORMAL; 1228 goto again; 1229 } 1230 1231 /* No pages free! Have pagedaemon free some memory. */ 1232 splx(s); 1233 uvm_kick_pdaemon(); 1234 return NULL; 1235 1236 case UVM_PGA_STRAT_NUMA: 1237 /* 1238 * NUMA strategy (experimental): allocating from the correct 1239 * bucket is more important than observing freelist 1240 * priority. Look only to the current NUMA node; if that 1241 * fails, we need to look to other NUMA nodes, so retry with 1242 * the normal strategy. 1243 */ 1244 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1245 pg = uvm_pgflcache_alloc(ucpu, lcv, color); 1246 if (pg != NULL) { 1247 CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1248 CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1249 goto gotit; 1250 } 1251 pg = uvm_pagealloc_pgb(ucpu, lcv, 1252 ucpu->pgflbucket, &color, flags); 1253 if (pg != NULL) { 1254 goto gotit; 1255 } 1256 } 1257 strat = UVM_PGA_STRAT_NORMAL; 1258 goto again; 1259 1260 default: 1261 panic("uvm_pagealloc_strat: bad strat %d", strat); 1262 /* NOTREACHED */ 1263 } 1264 1265 gotit: 1266 /* 1267 * We now know which color we actually allocated from; set 1268 * the next color accordingly. 1269 */ 1270 1271 ucpu->pgflcolor = (color + 1) & uvmexp.colormask; 1272 1273 /* 1274 * while still at IPL_VM, update allocation statistics. 1275 */ 1276 1277 if (anon) { 1278 CPU_COUNT(CPU_COUNT_ANONCLEAN, 1); 1279 } 1280 splx(s); 1281 KASSERT(pg->flags == (PG_BUSY|PG_CLEAN|PG_FAKE)); 1282 1283 /* 1284 * assign the page to the object. as the page was free, we know 1285 * that pg->uobject and pg->uanon are NULL. we only need to take 1286 * the page's interlock if we are changing the values. 1287 */ 1288 if (anon != NULL || obj != NULL) { 1289 mutex_enter(&pg->interlock); 1290 } 1291 pg->offset = off; 1292 pg->uobject = obj; 1293 pg->uanon = anon; 1294 KASSERT(uvm_page_owner_locked_p(pg, true)); 1295 if (anon) { 1296 anon->an_page = pg; 1297 pg->flags |= PG_ANON; 1298 mutex_exit(&pg->interlock); 1299 } else if (obj) { 1300 /* 1301 * set PG_FILE|PG_AOBJ before the first uvm_pageinsert. 1302 */ 1303 if (UVM_OBJ_IS_VNODE(obj)) { 1304 pg->flags |= PG_FILE; 1305 } else if (UVM_OBJ_IS_AOBJ(obj)) { 1306 pg->flags |= PG_AOBJ; 1307 } 1308 uvm_pageinsert_object(obj, pg); 1309 mutex_exit(&pg->interlock); 1310 error = uvm_pageinsert_tree(obj, pg); 1311 if (error != 0) { 1312 mutex_enter(&pg->interlock); 1313 uvm_pageremove_object(obj, pg); 1314 mutex_exit(&pg->interlock); 1315 uvm_pagefree(pg); 1316 return NULL; 1317 } 1318 } 1319 1320 #if defined(UVM_PAGE_TRKOWN) 1321 pg->owner_tag = NULL; 1322 #endif 1323 UVM_PAGE_OWN(pg, "new alloc"); 1324 1325 if (flags & UVM_PGA_ZERO) { 1326 /* A zero'd page is not clean. */ 1327 if (obj != NULL || anon != NULL) { 1328 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1329 } 1330 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1331 } 1332 1333 return(pg); 1334 } 1335 1336 /* 1337 * uvm_pagereplace: replace a page with another 1338 * 1339 * => object must be locked 1340 * => page interlocks must be held 1341 */ 1342 1343 void 1344 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg) 1345 { 1346 struct uvm_object *uobj = oldpg->uobject; 1347 struct vm_page *pg __diagused; 1348 uint64_t idx; 1349 1350 KASSERT((oldpg->flags & PG_TABLED) != 0); 1351 KASSERT(uobj != NULL); 1352 KASSERT((newpg->flags & PG_TABLED) == 0); 1353 KASSERT(newpg->uobject == NULL); 1354 KASSERT(rw_write_held(uobj->vmobjlock)); 1355 KASSERT(mutex_owned(&oldpg->interlock)); 1356 KASSERT(mutex_owned(&newpg->interlock)); 1357 1358 newpg->uobject = uobj; 1359 newpg->offset = oldpg->offset; 1360 idx = newpg->offset >> PAGE_SHIFT; 1361 pg = radix_tree_replace_node(&uobj->uo_pages, idx, newpg); 1362 KASSERT(pg == oldpg); 1363 if (((oldpg->flags ^ newpg->flags) & PG_CLEAN) != 0) { 1364 if ((newpg->flags & PG_CLEAN) != 0) { 1365 uvm_obj_page_clear_dirty(newpg); 1366 } else { 1367 uvm_obj_page_set_dirty(newpg); 1368 } 1369 } 1370 /* 1371 * oldpg's PG_STAT is stable. newpg is not reachable by others yet. 1372 */ 1373 newpg->flags |= 1374 (newpg->flags & ~PG_STAT) | (oldpg->flags & PG_STAT); 1375 uvm_pageinsert_object(uobj, newpg); 1376 uvm_pageremove_object(uobj, oldpg); 1377 } 1378 1379 /* 1380 * uvm_pagerealloc: reallocate a page from one object to another 1381 * 1382 * => both objects must be locked 1383 */ 1384 1385 int 1386 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) 1387 { 1388 int error = 0; 1389 1390 /* 1391 * remove it from the old object 1392 */ 1393 1394 if (pg->uobject) { 1395 uvm_pageremove_tree(pg->uobject, pg); 1396 uvm_pageremove_object(pg->uobject, pg); 1397 } 1398 1399 /* 1400 * put it in the new object 1401 */ 1402 1403 if (newobj) { 1404 mutex_enter(&pg->interlock); 1405 pg->uobject = newobj; 1406 pg->offset = newoff; 1407 if (UVM_OBJ_IS_VNODE(newobj)) { 1408 pg->flags |= PG_FILE; 1409 } else if (UVM_OBJ_IS_AOBJ(newobj)) { 1410 pg->flags |= PG_AOBJ; 1411 } 1412 uvm_pageinsert_object(newobj, pg); 1413 mutex_exit(&pg->interlock); 1414 error = uvm_pageinsert_tree(newobj, pg); 1415 if (error != 0) { 1416 mutex_enter(&pg->interlock); 1417 uvm_pageremove_object(newobj, pg); 1418 mutex_exit(&pg->interlock); 1419 } 1420 } 1421 1422 return error; 1423 } 1424 1425 /* 1426 * uvm_pagefree: free page 1427 * 1428 * => erase page's identity (i.e. remove from object) 1429 * => put page on free list 1430 * => caller must lock owning object (either anon or uvm_object) 1431 * => assumes all valid mappings of pg are gone 1432 */ 1433 1434 void 1435 uvm_pagefree(struct vm_page *pg) 1436 { 1437 struct pgfreelist *pgfl; 1438 struct pgflbucket *pgb; 1439 struct uvm_cpu *ucpu; 1440 kmutex_t *lock; 1441 int bucket, s; 1442 bool locked; 1443 1444 #ifdef DEBUG 1445 if (pg->uobject == (void *)0xdeadbeef && 1446 pg->uanon == (void *)0xdeadbeef) { 1447 panic("uvm_pagefree: freeing free page %p", pg); 1448 } 1449 #endif /* DEBUG */ 1450 1451 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1452 KASSERT(!(pg->flags & PG_FREE)); 1453 KASSERT(pg->uobject == NULL || rw_write_held(pg->uobject->vmobjlock)); 1454 KASSERT(pg->uobject != NULL || pg->uanon == NULL || 1455 rw_write_held(pg->uanon->an_lock)); 1456 1457 /* 1458 * remove the page from the object's tree before acquiring any page 1459 * interlocks: this can acquire locks to free radixtree nodes. 1460 */ 1461 if (pg->uobject != NULL) { 1462 uvm_pageremove_tree(pg->uobject, pg); 1463 } 1464 1465 /* 1466 * if the page is loaned, resolve the loan instead of freeing. 1467 */ 1468 1469 if (pg->loan_count) { 1470 KASSERT(pg->wire_count == 0); 1471 1472 /* 1473 * if the page is owned by an anon then we just want to 1474 * drop anon ownership. the kernel will free the page when 1475 * it is done with it. if the page is owned by an object, 1476 * remove it from the object and mark it dirty for the benefit 1477 * of possible anon owners. 1478 * 1479 * regardless of previous ownership, wakeup any waiters, 1480 * unbusy the page, and we're done. 1481 */ 1482 1483 uvm_pagelock(pg); 1484 locked = true; 1485 if (pg->uobject != NULL) { 1486 uvm_pageremove_object(pg->uobject, pg); 1487 pg->flags &= ~(PG_FILE|PG_AOBJ); 1488 } else if (pg->uanon != NULL) { 1489 if ((pg->flags & PG_ANON) == 0) { 1490 pg->loan_count--; 1491 } else { 1492 const unsigned status = uvm_pagegetdirty(pg); 1493 pg->flags &= ~PG_ANON; 1494 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1495 } 1496 pg->uanon->an_page = NULL; 1497 pg->uanon = NULL; 1498 } 1499 if (pg->pqflags & PQ_WANTED) { 1500 wakeup(pg); 1501 } 1502 pg->pqflags &= ~PQ_WANTED; 1503 pg->flags &= ~(PG_BUSY|PG_RELEASED|PG_PAGER1); 1504 #ifdef UVM_PAGE_TRKOWN 1505 pg->owner_tag = NULL; 1506 #endif 1507 KASSERT((pg->flags & PG_STAT) == 0); 1508 if (pg->loan_count) { 1509 KASSERT(pg->uobject == NULL); 1510 if (pg->uanon == NULL) { 1511 uvm_pagedequeue(pg); 1512 } 1513 uvm_pageunlock(pg); 1514 return; 1515 } 1516 } else if (pg->uobject != NULL || pg->uanon != NULL || 1517 pg->wire_count != 0) { 1518 uvm_pagelock(pg); 1519 locked = true; 1520 } else { 1521 locked = false; 1522 } 1523 1524 /* 1525 * remove page from its object or anon. 1526 */ 1527 if (pg->uobject != NULL) { 1528 uvm_pageremove_object(pg->uobject, pg); 1529 } else if (pg->uanon != NULL) { 1530 const unsigned int status = uvm_pagegetdirty(pg); 1531 pg->uanon->an_page = NULL; 1532 pg->uanon = NULL; 1533 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1534 } 1535 1536 /* 1537 * if the page was wired, unwire it now. 1538 */ 1539 1540 if (pg->wire_count) { 1541 pg->wire_count = 0; 1542 atomic_dec_uint(&uvmexp.wired); 1543 } 1544 if (locked) { 1545 /* 1546 * wake anyone waiting on the page. 1547 */ 1548 if ((pg->pqflags & PQ_WANTED) != 0) { 1549 pg->pqflags &= ~PQ_WANTED; 1550 wakeup(pg); 1551 } 1552 1553 /* 1554 * now remove the page from the queues. 1555 */ 1556 uvm_pagedequeue(pg); 1557 uvm_pageunlock(pg); 1558 } else { 1559 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1560 } 1561 1562 /* 1563 * and put on free queue 1564 */ 1565 1566 #ifdef DEBUG 1567 pg->uobject = (void *)0xdeadbeef; 1568 pg->uanon = (void *)0xdeadbeef; 1569 #endif /* DEBUG */ 1570 1571 /* Try to send the page to the per-CPU cache. */ 1572 s = splvm(); 1573 ucpu = curcpu()->ci_data.cpu_uvm; 1574 bucket = uvm_page_get_bucket(pg); 1575 if (bucket == ucpu->pgflbucket && uvm_pgflcache_free(ucpu, pg)) { 1576 splx(s); 1577 return; 1578 } 1579 1580 /* Didn't work. Never mind, send it to a global bucket. */ 1581 pgfl = &uvm.page_free[uvm_page_get_freelist(pg)]; 1582 pgb = pgfl->pgfl_buckets[bucket]; 1583 lock = &uvm_freelist_locks[bucket].lock; 1584 1585 mutex_spin_enter(lock); 1586 /* PG_FREE must be set under lock because of uvm_pglistalloc(). */ 1587 pg->flags = PG_FREE; 1588 LIST_INSERT_HEAD(&pgb->pgb_colors[VM_PGCOLOR(pg)], pg, pageq.list); 1589 pgb->pgb_nfree++; 1590 CPU_COUNT(CPU_COUNT_FREEPAGES, 1); 1591 mutex_spin_exit(lock); 1592 splx(s); 1593 } 1594 1595 /* 1596 * uvm_page_unbusy: unbusy an array of pages. 1597 * 1598 * => pages must either all belong to the same object, or all belong to anons. 1599 * => if pages are object-owned, object must be locked. 1600 * => if pages are anon-owned, anons must be locked. 1601 * => caller must make sure that anon-owned pages are not PG_RELEASED. 1602 */ 1603 1604 void 1605 uvm_page_unbusy(struct vm_page **pgs, int npgs) 1606 { 1607 struct vm_page *pg; 1608 int i, pageout_done; 1609 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1610 1611 pageout_done = 0; 1612 for (i = 0; i < npgs; i++) { 1613 pg = pgs[i]; 1614 if (pg == NULL || pg == PGO_DONTCARE) { 1615 continue; 1616 } 1617 1618 KASSERT(uvm_page_owner_locked_p(pg, true)); 1619 KASSERT(pg->flags & PG_BUSY); 1620 1621 if (pg->flags & PG_PAGEOUT) { 1622 pg->flags &= ~PG_PAGEOUT; 1623 pg->flags |= PG_RELEASED; 1624 pageout_done++; 1625 atomic_inc_uint(&uvmexp.pdfreed); 1626 } 1627 if (pg->flags & PG_RELEASED) { 1628 UVMHIST_LOG(ubchist, "releasing pg %#jx", 1629 (uintptr_t)pg, 0, 0, 0); 1630 KASSERT(pg->uobject != NULL || 1631 (pg->uanon != NULL && pg->uanon->an_ref > 0)); 1632 pg->flags &= ~PG_RELEASED; 1633 uvm_pagefree(pg); 1634 } else { 1635 UVMHIST_LOG(ubchist, "unbusying pg %#jx", 1636 (uintptr_t)pg, 0, 0, 0); 1637 KASSERT((pg->flags & PG_FAKE) == 0); 1638 pg->flags &= ~PG_BUSY; 1639 uvm_pagelock(pg); 1640 uvm_pagewakeup(pg); 1641 uvm_pageunlock(pg); 1642 UVM_PAGE_OWN(pg, NULL); 1643 } 1644 } 1645 if (pageout_done != 0) { 1646 uvm_pageout_done(pageout_done); 1647 } 1648 } 1649 1650 /* 1651 * uvm_pagewait: wait for a busy page 1652 * 1653 * => page must be known PG_BUSY 1654 * => object must be read or write locked 1655 * => object will be unlocked on return 1656 */ 1657 1658 void 1659 uvm_pagewait(struct vm_page *pg, krwlock_t *lock, const char *wmesg) 1660 { 1661 1662 KASSERT(rw_lock_held(lock)); 1663 KASSERT((pg->flags & PG_BUSY) != 0); 1664 KASSERT(uvm_page_owner_locked_p(pg, false)); 1665 1666 mutex_enter(&pg->interlock); 1667 pg->pqflags |= PQ_WANTED; 1668 rw_exit(lock); 1669 UVM_UNLOCK_AND_WAIT(pg, &pg->interlock, false, wmesg, 0); 1670 } 1671 1672 /* 1673 * uvm_pagewakeup: wake anyone waiting on a page 1674 * 1675 * => page interlock must be held 1676 */ 1677 1678 void 1679 uvm_pagewakeup(struct vm_page *pg) 1680 { 1681 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1682 1683 KASSERT(mutex_owned(&pg->interlock)); 1684 1685 UVMHIST_LOG(ubchist, "waking pg %#jx", (uintptr_t)pg, 0, 0, 0); 1686 1687 if ((pg->pqflags & PQ_WANTED) != 0) { 1688 wakeup(pg); 1689 pg->pqflags &= ~PQ_WANTED; 1690 } 1691 } 1692 1693 /* 1694 * uvm_pagewanted_p: return true if someone is waiting on the page 1695 * 1696 * => object must be write locked (lock out all concurrent access) 1697 */ 1698 1699 bool 1700 uvm_pagewanted_p(struct vm_page *pg) 1701 { 1702 1703 KASSERT(uvm_page_owner_locked_p(pg, true)); 1704 1705 return (atomic_load_relaxed(&pg->pqflags) & PQ_WANTED) != 0; 1706 } 1707 1708 #if defined(UVM_PAGE_TRKOWN) 1709 /* 1710 * uvm_page_own: set or release page ownership 1711 * 1712 * => this is a debugging function that keeps track of who sets PG_BUSY 1713 * and where they do it. it can be used to track down problems 1714 * such a process setting "PG_BUSY" and never releasing it. 1715 * => page's object [if any] must be locked 1716 * => if "tag" is NULL then we are releasing page ownership 1717 */ 1718 void 1719 uvm_page_own(struct vm_page *pg, const char *tag) 1720 { 1721 1722 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0); 1723 KASSERT(uvm_page_owner_locked_p(pg, true)); 1724 1725 /* gain ownership? */ 1726 if (tag) { 1727 KASSERT((pg->flags & PG_BUSY) != 0); 1728 if (pg->owner_tag) { 1729 printf("uvm_page_own: page %p already owned " 1730 "by proc %d.%d [%s]\n", pg, 1731 pg->owner, pg->lowner, pg->owner_tag); 1732 panic("uvm_page_own"); 1733 } 1734 pg->owner = curproc->p_pid; 1735 pg->lowner = curlwp->l_lid; 1736 pg->owner_tag = tag; 1737 return; 1738 } 1739 1740 /* drop ownership */ 1741 KASSERT((pg->flags & PG_BUSY) == 0); 1742 if (pg->owner_tag == NULL) { 1743 printf("uvm_page_own: dropping ownership of an non-owned " 1744 "page (%p)\n", pg); 1745 panic("uvm_page_own"); 1746 } 1747 pg->owner_tag = NULL; 1748 } 1749 #endif 1750 1751 /* 1752 * uvm_pagelookup: look up a page 1753 * 1754 * => caller should lock object to keep someone from pulling the page 1755 * out from under it 1756 */ 1757 1758 struct vm_page * 1759 uvm_pagelookup(struct uvm_object *obj, voff_t off) 1760 { 1761 struct vm_page *pg; 1762 1763 KASSERT(db_active || rw_lock_held(obj->vmobjlock)); 1764 1765 pg = radix_tree_lookup_node(&obj->uo_pages, off >> PAGE_SHIFT); 1766 1767 KASSERT(pg == NULL || obj->uo_npages != 0); 1768 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1769 (pg->flags & PG_BUSY) != 0); 1770 return pg; 1771 } 1772 1773 /* 1774 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp 1775 * 1776 * => caller must lock objects 1777 * => caller must hold pg->interlock 1778 */ 1779 1780 void 1781 uvm_pagewire(struct vm_page *pg) 1782 { 1783 1784 KASSERT(uvm_page_owner_locked_p(pg, true)); 1785 KASSERT(mutex_owned(&pg->interlock)); 1786 #if defined(READAHEAD_STATS) 1787 if ((pg->flags & PG_READAHEAD) != 0) { 1788 uvm_ra_hit.ev_count++; 1789 pg->flags &= ~PG_READAHEAD; 1790 } 1791 #endif /* defined(READAHEAD_STATS) */ 1792 if (pg->wire_count == 0) { 1793 uvm_pagedequeue(pg); 1794 atomic_inc_uint(&uvmexp.wired); 1795 } 1796 pg->wire_count++; 1797 KASSERT(pg->wire_count > 0); /* detect wraparound */ 1798 } 1799 1800 /* 1801 * uvm_pageunwire: unwire the page. 1802 * 1803 * => activate if wire count goes to zero. 1804 * => caller must lock objects 1805 * => caller must hold pg->interlock 1806 */ 1807 1808 void 1809 uvm_pageunwire(struct vm_page *pg) 1810 { 1811 1812 KASSERT(uvm_page_owner_locked_p(pg, true)); 1813 KASSERT(pg->wire_count != 0); 1814 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1815 KASSERT(mutex_owned(&pg->interlock)); 1816 pg->wire_count--; 1817 if (pg->wire_count == 0) { 1818 uvm_pageactivate(pg); 1819 KASSERT(uvmexp.wired != 0); 1820 atomic_dec_uint(&uvmexp.wired); 1821 } 1822 } 1823 1824 /* 1825 * uvm_pagedeactivate: deactivate page 1826 * 1827 * => caller must lock objects 1828 * => caller must check to make sure page is not wired 1829 * => object that page belongs to must be locked (so we can adjust pg->flags) 1830 * => caller must clear the reference on the page before calling 1831 * => caller must hold pg->interlock 1832 */ 1833 1834 void 1835 uvm_pagedeactivate(struct vm_page *pg) 1836 { 1837 1838 KASSERT(uvm_page_owner_locked_p(pg, false)); 1839 KASSERT(mutex_owned(&pg->interlock)); 1840 if (pg->wire_count == 0) { 1841 KASSERT(uvmpdpol_pageisqueued_p(pg)); 1842 uvmpdpol_pagedeactivate(pg); 1843 } 1844 } 1845 1846 /* 1847 * uvm_pageactivate: activate page 1848 * 1849 * => caller must lock objects 1850 * => caller must hold pg->interlock 1851 */ 1852 1853 void 1854 uvm_pageactivate(struct vm_page *pg) 1855 { 1856 1857 KASSERT(uvm_page_owner_locked_p(pg, false)); 1858 KASSERT(mutex_owned(&pg->interlock)); 1859 #if defined(READAHEAD_STATS) 1860 if ((pg->flags & PG_READAHEAD) != 0) { 1861 uvm_ra_hit.ev_count++; 1862 pg->flags &= ~PG_READAHEAD; 1863 } 1864 #endif /* defined(READAHEAD_STATS) */ 1865 if (pg->wire_count == 0) { 1866 uvmpdpol_pageactivate(pg); 1867 } 1868 } 1869 1870 /* 1871 * uvm_pagedequeue: remove a page from any paging queue 1872 * 1873 * => caller must lock objects 1874 * => caller must hold pg->interlock 1875 */ 1876 void 1877 uvm_pagedequeue(struct vm_page *pg) 1878 { 1879 1880 KASSERT(uvm_page_owner_locked_p(pg, true)); 1881 KASSERT(mutex_owned(&pg->interlock)); 1882 if (uvmpdpol_pageisqueued_p(pg)) { 1883 uvmpdpol_pagedequeue(pg); 1884 } 1885 } 1886 1887 /* 1888 * uvm_pageenqueue: add a page to a paging queue without activating. 1889 * used where a page is not really demanded (yet). eg. read-ahead 1890 * 1891 * => caller must lock objects 1892 * => caller must hold pg->interlock 1893 */ 1894 void 1895 uvm_pageenqueue(struct vm_page *pg) 1896 { 1897 1898 KASSERT(uvm_page_owner_locked_p(pg, false)); 1899 KASSERT(mutex_owned(&pg->interlock)); 1900 if (pg->wire_count == 0 && !uvmpdpol_pageisqueued_p(pg)) { 1901 uvmpdpol_pageenqueue(pg); 1902 } 1903 } 1904 1905 /* 1906 * uvm_pagelock: acquire page interlock 1907 */ 1908 void 1909 uvm_pagelock(struct vm_page *pg) 1910 { 1911 1912 mutex_enter(&pg->interlock); 1913 } 1914 1915 /* 1916 * uvm_pagelock2: acquire two page interlocks 1917 */ 1918 void 1919 uvm_pagelock2(struct vm_page *pg1, struct vm_page *pg2) 1920 { 1921 1922 if (pg1 < pg2) { 1923 mutex_enter(&pg1->interlock); 1924 mutex_enter(&pg2->interlock); 1925 } else { 1926 mutex_enter(&pg2->interlock); 1927 mutex_enter(&pg1->interlock); 1928 } 1929 } 1930 1931 /* 1932 * uvm_pageunlock: release page interlock, and if a page replacement intent 1933 * is set on the page, pass it to uvmpdpol to make real. 1934 * 1935 * => caller must hold pg->interlock 1936 */ 1937 void 1938 uvm_pageunlock(struct vm_page *pg) 1939 { 1940 1941 if ((pg->pqflags & PQ_INTENT_SET) == 0 || 1942 (pg->pqflags & PQ_INTENT_QUEUED) != 0) { 1943 mutex_exit(&pg->interlock); 1944 return; 1945 } 1946 pg->pqflags |= PQ_INTENT_QUEUED; 1947 mutex_exit(&pg->interlock); 1948 uvmpdpol_pagerealize(pg); 1949 } 1950 1951 /* 1952 * uvm_pageunlock2: release two page interlocks, and for both pages if a 1953 * page replacement intent is set on the page, pass it to uvmpdpol to make 1954 * real. 1955 * 1956 * => caller must hold pg->interlock 1957 */ 1958 void 1959 uvm_pageunlock2(struct vm_page *pg1, struct vm_page *pg2) 1960 { 1961 1962 if ((pg1->pqflags & PQ_INTENT_SET) == 0 || 1963 (pg1->pqflags & PQ_INTENT_QUEUED) != 0) { 1964 mutex_exit(&pg1->interlock); 1965 pg1 = NULL; 1966 } else { 1967 pg1->pqflags |= PQ_INTENT_QUEUED; 1968 mutex_exit(&pg1->interlock); 1969 } 1970 1971 if ((pg2->pqflags & PQ_INTENT_SET) == 0 || 1972 (pg2->pqflags & PQ_INTENT_QUEUED) != 0) { 1973 mutex_exit(&pg2->interlock); 1974 pg2 = NULL; 1975 } else { 1976 pg2->pqflags |= PQ_INTENT_QUEUED; 1977 mutex_exit(&pg2->interlock); 1978 } 1979 1980 if (pg1 != NULL) { 1981 uvmpdpol_pagerealize(pg1); 1982 } 1983 if (pg2 != NULL) { 1984 uvmpdpol_pagerealize(pg2); 1985 } 1986 } 1987 1988 /* 1989 * uvm_pagezero: zero fill a page 1990 * 1991 * => if page is part of an object then the object should be locked 1992 * to protect pg->flags. 1993 */ 1994 1995 void 1996 uvm_pagezero(struct vm_page *pg) 1997 { 1998 1999 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 2000 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 2001 } 2002 2003 /* 2004 * uvm_pagecopy: copy a page 2005 * 2006 * => if page is part of an object then the object should be locked 2007 * to protect pg->flags. 2008 */ 2009 2010 void 2011 uvm_pagecopy(struct vm_page *src, struct vm_page *dst) 2012 { 2013 2014 uvm_pagemarkdirty(dst, UVM_PAGE_STATUS_DIRTY); 2015 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 2016 } 2017 2018 /* 2019 * uvm_pageismanaged: test it see that a page (specified by PA) is managed. 2020 */ 2021 2022 bool 2023 uvm_pageismanaged(paddr_t pa) 2024 { 2025 2026 return (uvm_physseg_find(atop(pa), NULL) != UVM_PHYSSEG_TYPE_INVALID); 2027 } 2028 2029 /* 2030 * uvm_page_lookup_freelist: look up the free list for the specified page 2031 */ 2032 2033 int 2034 uvm_page_lookup_freelist(struct vm_page *pg) 2035 { 2036 uvm_physseg_t upm; 2037 2038 upm = uvm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL); 2039 KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID); 2040 return uvm_physseg_get_free_list(upm); 2041 } 2042 2043 /* 2044 * uvm_page_owner_locked_p: return true if object associated with page is 2045 * locked. this is a weak check for runtime assertions only. 2046 */ 2047 2048 bool 2049 uvm_page_owner_locked_p(struct vm_page *pg, bool exclusive) 2050 { 2051 2052 if (pg->uobject != NULL) { 2053 return exclusive 2054 ? rw_write_held(pg->uobject->vmobjlock) 2055 : rw_lock_held(pg->uobject->vmobjlock); 2056 } 2057 if (pg->uanon != NULL) { 2058 return exclusive 2059 ? rw_write_held(pg->uanon->an_lock) 2060 : rw_lock_held(pg->uanon->an_lock); 2061 } 2062 return true; 2063 } 2064 2065 /* 2066 * uvm_pagereadonly_p: return if the page should be mapped read-only 2067 */ 2068 2069 bool 2070 uvm_pagereadonly_p(struct vm_page *pg) 2071 { 2072 struct uvm_object * const uobj = pg->uobject; 2073 2074 KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock)); 2075 KASSERT(uobj != NULL || rw_lock_held(pg->uanon->an_lock)); 2076 if ((pg->flags & PG_RDONLY) != 0) { 2077 return true; 2078 } 2079 if (uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) { 2080 return true; 2081 } 2082 if (uobj == NULL) { 2083 return false; 2084 } 2085 return UVM_OBJ_NEEDS_WRITEFAULT(uobj); 2086 } 2087 2088 #ifdef PMAP_DIRECT 2089 /* 2090 * Call pmap to translate physical address into a virtual and to run a callback 2091 * for it. Used to avoid actually mapping the pages, pmap most likely uses direct map 2092 * or equivalent. 2093 */ 2094 int 2095 uvm_direct_process(struct vm_page **pgs, u_int npages, voff_t off, vsize_t len, 2096 int (*process)(void *, size_t, void *), void *arg) 2097 { 2098 int error = 0; 2099 paddr_t pa; 2100 size_t todo; 2101 voff_t pgoff = (off & PAGE_MASK); 2102 struct vm_page *pg; 2103 2104 KASSERT(npages > 0); 2105 KASSERT(len > 0); 2106 2107 for (int i = 0; i < npages; i++) { 2108 pg = pgs[i]; 2109 2110 KASSERT(len > 0); 2111 2112 /* 2113 * Caller is responsible for ensuring all the pages are 2114 * available. 2115 */ 2116 KASSERT(pg != NULL); 2117 KASSERT(pg != PGO_DONTCARE); 2118 2119 pa = VM_PAGE_TO_PHYS(pg); 2120 todo = MIN(len, PAGE_SIZE - pgoff); 2121 2122 error = pmap_direct_process(pa, pgoff, todo, process, arg); 2123 if (error) 2124 break; 2125 2126 pgoff = 0; 2127 len -= todo; 2128 } 2129 2130 KASSERTMSG(error != 0 || len == 0, "len %lu != 0 for non-error", len); 2131 return error; 2132 } 2133 #endif /* PMAP_DIRECT */ 2134 2135 #if defined(DDB) || defined(DEBUGPRINT) 2136 2137 /* 2138 * uvm_page_printit: actually print the page 2139 */ 2140 2141 static const char page_flagbits[] = UVM_PGFLAGBITS; 2142 static const char page_pqflagbits[] = UVM_PQFLAGBITS; 2143 2144 void 2145 uvm_page_printit(struct vm_page *pg, bool full, 2146 void (*pr)(const char *, ...)) 2147 { 2148 struct vm_page *tpg; 2149 struct uvm_object *uobj; 2150 struct pgflbucket *pgb; 2151 struct pgflist *pgl; 2152 char pgbuf[128]; 2153 2154 (*pr)("PAGE %p:\n", pg); 2155 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); 2156 (*pr)(" flags=%s\n", pgbuf); 2157 snprintb(pgbuf, sizeof(pgbuf), page_pqflagbits, pg->pqflags); 2158 (*pr)(" pqflags=%s\n", pgbuf); 2159 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 2160 pg->uobject, pg->uanon, (long long)pg->offset); 2161 (*pr)(" loan_count=%d wire_count=%d bucket=%d freelist=%d\n", 2162 pg->loan_count, pg->wire_count, uvm_page_get_bucket(pg), 2163 uvm_page_get_freelist(pg)); 2164 (*pr)(" pa=0x%lx\n", (long)VM_PAGE_TO_PHYS(pg)); 2165 #if defined(UVM_PAGE_TRKOWN) 2166 if (pg->flags & PG_BUSY) 2167 (*pr)(" owning process = %d.%d, tag=%s\n", 2168 pg->owner, pg->lowner, pg->owner_tag); 2169 else 2170 (*pr)(" page not busy, no owner\n"); 2171 #else 2172 (*pr)(" [page ownership tracking disabled]\n"); 2173 #endif 2174 2175 if (!full) 2176 return; 2177 2178 /* cross-verify object/anon */ 2179 if ((pg->flags & PG_FREE) == 0) { 2180 if (pg->flags & PG_ANON) { 2181 if (pg->uanon == NULL || pg->uanon->an_page != pg) 2182 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2183 (pg->uanon) ? pg->uanon->an_page : NULL); 2184 else 2185 (*pr)(" anon backpointer is OK\n"); 2186 } else { 2187 uobj = pg->uobject; 2188 if (uobj) { 2189 (*pr)(" checking object list\n"); 2190 tpg = uvm_pagelookup(uobj, pg->offset); 2191 if (tpg) 2192 (*pr)(" page found on object list\n"); 2193 else 2194 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 2195 } 2196 } 2197 } 2198 2199 /* cross-verify page queue */ 2200 if (pg->flags & PG_FREE) { 2201 int fl = uvm_page_get_freelist(pg); 2202 int b = uvm_page_get_bucket(pg); 2203 pgb = uvm.page_free[fl].pgfl_buckets[b]; 2204 pgl = &pgb->pgb_colors[VM_PGCOLOR(pg)]; 2205 (*pr)(" checking pageq list\n"); 2206 LIST_FOREACH(tpg, pgl, pageq.list) { 2207 if (tpg == pg) { 2208 break; 2209 } 2210 } 2211 if (tpg) 2212 (*pr)(" page found on pageq list\n"); 2213 else 2214 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 2215 } 2216 } 2217 2218 /* 2219 * uvm_page_printall - print a summary of all managed pages 2220 */ 2221 2222 void 2223 uvm_page_printall(void (*pr)(const char *, ...)) 2224 { 2225 uvm_physseg_t i; 2226 paddr_t pfn; 2227 struct vm_page *pg; 2228 2229 (*pr)("%18s %4s %4s %18s %18s" 2230 #ifdef UVM_PAGE_TRKOWN 2231 " OWNER" 2232 #endif 2233 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 2234 for (i = uvm_physseg_get_first(); 2235 uvm_physseg_valid_p(i); 2236 i = uvm_physseg_get_next(i)) { 2237 for (pfn = uvm_physseg_get_start(i); 2238 pfn < uvm_physseg_get_end(i); 2239 pfn++) { 2240 pg = PHYS_TO_VM_PAGE(ptoa(pfn)); 2241 2242 (*pr)("%18p %04x %08x %18p %18p", 2243 pg, pg->flags, pg->pqflags, pg->uobject, 2244 pg->uanon); 2245 #ifdef UVM_PAGE_TRKOWN 2246 if (pg->flags & PG_BUSY) 2247 (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 2248 #endif 2249 (*pr)("\n"); 2250 } 2251 } 2252 } 2253 2254 /* 2255 * uvm_page_print_freelists - print a summary freelists 2256 */ 2257 2258 void 2259 uvm_page_print_freelists(void (*pr)(const char *, ...)) 2260 { 2261 struct pgfreelist *pgfl; 2262 struct pgflbucket *pgb; 2263 int fl, b, c; 2264 2265 (*pr)("There are %d freelists with %d buckets of %d colors.\n\n", 2266 VM_NFREELIST, uvm.bucketcount, uvmexp.ncolors); 2267 2268 for (fl = 0; fl < VM_NFREELIST; fl++) { 2269 pgfl = &uvm.page_free[fl]; 2270 (*pr)("freelist(%d) @ %p\n", fl, pgfl); 2271 for (b = 0; b < uvm.bucketcount; b++) { 2272 pgb = uvm.page_free[fl].pgfl_buckets[b]; 2273 (*pr)(" bucket(%d) @ %p, nfree = %d, lock @ %p:\n", 2274 b, pgb, pgb->pgb_nfree, 2275 &uvm_freelist_locks[b].lock); 2276 for (c = 0; c < uvmexp.ncolors; c++) { 2277 (*pr)(" color(%d) @ %p, ", c, 2278 &pgb->pgb_colors[c]); 2279 (*pr)("first page = %p\n", 2280 LIST_FIRST(&pgb->pgb_colors[c])); 2281 } 2282 } 2283 } 2284 } 2285 2286 #endif /* DDB || DEBUGPRINT */ 2287