1 /* $NetBSD: uvm_page.c,v 1.251 2022/10/26 23:38:09 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1997 Charles D. Cranor and Washington University. 34 * Copyright (c) 1991, 1993, The Regents of the University of California. 35 * 36 * All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * The Mach Operating System project at Carnegie-Mellon University. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 66 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 67 * 68 * 69 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 70 * All rights reserved. 71 * 72 * Permission to use, copy, modify and distribute this software and 73 * its documentation is hereby granted, provided that both the copyright 74 * notice and this permission notice appear in all copies of the 75 * software, derivative works or modified versions, and any portions 76 * thereof, and that both notices appear in supporting documentation. 77 * 78 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 79 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 80 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 81 * 82 * Carnegie Mellon requests users of this software to return to 83 * 84 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 85 * School of Computer Science 86 * Carnegie Mellon University 87 * Pittsburgh PA 15213-3890 88 * 89 * any improvements or extensions that they make and grant Carnegie the 90 * rights to redistribute these changes. 91 */ 92 93 /* 94 * uvm_page.c: page ops. 95 */ 96 97 #include <sys/cdefs.h> 98 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.251 2022/10/26 23:38:09 riastradh Exp $"); 99 100 #include "opt_ddb.h" 101 #include "opt_uvm.h" 102 #include "opt_uvmhist.h" 103 #include "opt_readahead.h" 104 105 #include <sys/param.h> 106 #include <sys/systm.h> 107 #include <sys/sched.h> 108 #include <sys/kernel.h> 109 #include <sys/vnode.h> 110 #include <sys/proc.h> 111 #include <sys/radixtree.h> 112 #include <sys/atomic.h> 113 #include <sys/cpu.h> 114 115 #include <ddb/db_active.h> 116 117 #include <uvm/uvm.h> 118 #include <uvm/uvm_ddb.h> 119 #include <uvm/uvm_pdpolicy.h> 120 #include <uvm/uvm_pgflcache.h> 121 122 /* 123 * number of pages per-CPU to reserve for the kernel. 124 */ 125 #ifndef UVM_RESERVED_PAGES_PER_CPU 126 #define UVM_RESERVED_PAGES_PER_CPU 5 127 #endif 128 int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU; 129 130 /* 131 * physical memory size; 132 */ 133 psize_t physmem; 134 135 /* 136 * local variables 137 */ 138 139 /* 140 * these variables record the values returned by vm_page_bootstrap, 141 * for debugging purposes. The implementation of uvm_pageboot_alloc 142 * and pmap_startup here also uses them internally. 143 */ 144 145 static vaddr_t virtual_space_start; 146 static vaddr_t virtual_space_end; 147 148 /* 149 * we allocate an initial number of page colors in uvm_page_init(), 150 * and remember them. We may re-color pages as cache sizes are 151 * discovered during the autoconfiguration phase. But we can never 152 * free the initial set of buckets, since they are allocated using 153 * uvm_pageboot_alloc(). 154 */ 155 156 static size_t recolored_pages_memsize /* = 0 */; 157 static char *recolored_pages_mem; 158 159 /* 160 * freelist locks - one per bucket. 161 */ 162 163 union uvm_freelist_lock uvm_freelist_locks[PGFL_MAX_BUCKETS] 164 __cacheline_aligned; 165 166 /* 167 * basic NUMA information. 168 */ 169 170 static struct uvm_page_numa_region { 171 struct uvm_page_numa_region *next; 172 paddr_t start; 173 paddr_t size; 174 u_int numa_id; 175 } *uvm_page_numa_region; 176 177 #ifdef DEBUG 178 kmutex_t uvm_zerochecklock __cacheline_aligned; 179 vaddr_t uvm_zerocheckkva; 180 #endif /* DEBUG */ 181 182 /* 183 * These functions are reserved for uvm(9) internal use and are not 184 * exported in the header file uvm_physseg.h 185 * 186 * Thus they are redefined here. 187 */ 188 void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *); 189 void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t); 190 191 /* returns a pgs array */ 192 struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t); 193 194 /* 195 * inline functions 196 */ 197 198 /* 199 * uvm_pageinsert: insert a page in the object. 200 * 201 * => caller must lock object 202 * => call should have already set pg's object and offset pointers 203 * and bumped the version counter 204 */ 205 206 static inline void 207 uvm_pageinsert_object(struct uvm_object *uobj, struct vm_page *pg) 208 { 209 210 KASSERT(uobj == pg->uobject); 211 KASSERT(rw_write_held(uobj->vmobjlock)); 212 KASSERT((pg->flags & PG_TABLED) == 0); 213 214 if ((pg->flags & PG_STAT) != 0) { 215 /* Cannot use uvm_pagegetdirty(): not yet in radix tree. */ 216 const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 217 218 if ((pg->flags & PG_FILE) != 0) { 219 if (uobj->uo_npages == 0) { 220 struct vnode *vp = (struct vnode *)uobj; 221 mutex_enter(vp->v_interlock); 222 KASSERT((vp->v_iflag & VI_PAGES) == 0); 223 vp->v_iflag |= VI_PAGES; 224 vholdl(vp); 225 mutex_exit(vp->v_interlock); 226 } 227 if (UVM_OBJ_IS_VTEXT(uobj)) { 228 cpu_count(CPU_COUNT_EXECPAGES, 1); 229 } 230 cpu_count(CPU_COUNT_FILEUNKNOWN + status, 1); 231 } else { 232 cpu_count(CPU_COUNT_ANONUNKNOWN + status, 1); 233 } 234 } 235 pg->flags |= PG_TABLED; 236 uobj->uo_npages++; 237 } 238 239 static inline int 240 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg) 241 { 242 const uint64_t idx = pg->offset >> PAGE_SHIFT; 243 int error; 244 245 KASSERT(rw_write_held(uobj->vmobjlock)); 246 247 error = radix_tree_insert_node(&uobj->uo_pages, idx, pg); 248 if (error != 0) { 249 return error; 250 } 251 if ((pg->flags & PG_CLEAN) == 0) { 252 uvm_obj_page_set_dirty(pg); 253 } 254 KASSERT(((pg->flags & PG_CLEAN) == 0) == 255 uvm_obj_page_dirty_p(pg)); 256 return 0; 257 } 258 259 /* 260 * uvm_page_remove: remove page from object. 261 * 262 * => caller must lock object 263 */ 264 265 static inline void 266 uvm_pageremove_object(struct uvm_object *uobj, struct vm_page *pg) 267 { 268 269 KASSERT(uobj == pg->uobject); 270 KASSERT(rw_write_held(uobj->vmobjlock)); 271 KASSERT(pg->flags & PG_TABLED); 272 273 if ((pg->flags & PG_STAT) != 0) { 274 /* Cannot use uvm_pagegetdirty(): no longer in radix tree. */ 275 const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 276 277 if ((pg->flags & PG_FILE) != 0) { 278 if (uobj->uo_npages == 1) { 279 struct vnode *vp = (struct vnode *)uobj; 280 mutex_enter(vp->v_interlock); 281 KASSERT((vp->v_iflag & VI_PAGES) != 0); 282 vp->v_iflag &= ~VI_PAGES; 283 holdrelel(vp); 284 mutex_exit(vp->v_interlock); 285 } 286 if (UVM_OBJ_IS_VTEXT(uobj)) { 287 cpu_count(CPU_COUNT_EXECPAGES, -1); 288 } 289 cpu_count(CPU_COUNT_FILEUNKNOWN + status, -1); 290 } else { 291 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 292 } 293 } 294 uobj->uo_npages--; 295 pg->flags &= ~PG_TABLED; 296 pg->uobject = NULL; 297 } 298 299 static inline void 300 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg) 301 { 302 struct vm_page *opg __unused; 303 304 KASSERT(rw_write_held(uobj->vmobjlock)); 305 306 opg = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT); 307 KASSERT(pg == opg); 308 } 309 310 static void 311 uvm_page_init_bucket(struct pgfreelist *pgfl, struct pgflbucket *pgb, int num) 312 { 313 int i; 314 315 pgb->pgb_nfree = 0; 316 for (i = 0; i < uvmexp.ncolors; i++) { 317 LIST_INIT(&pgb->pgb_colors[i]); 318 } 319 pgfl->pgfl_buckets[num] = pgb; 320 } 321 322 /* 323 * uvm_page_init: init the page system. called from uvm_init(). 324 * 325 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 326 */ 327 328 void 329 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) 330 { 331 static struct uvm_cpu boot_cpu __cacheline_aligned; 332 psize_t freepages, pagecount, bucketsize, n; 333 struct pgflbucket *pgb; 334 struct vm_page *pagearray; 335 char *bucketarray; 336 uvm_physseg_t bank; 337 int fl, b; 338 339 KASSERT(ncpu <= 1); 340 341 /* 342 * init the page queues and free page queue locks, except the 343 * free list; we allocate that later (with the initial vm_page 344 * structures). 345 */ 346 347 curcpu()->ci_data.cpu_uvm = &boot_cpu; 348 uvmpdpol_init(); 349 for (b = 0; b < __arraycount(uvm_freelist_locks); b++) { 350 mutex_init(&uvm_freelist_locks[b].lock, MUTEX_DEFAULT, IPL_VM); 351 } 352 353 /* 354 * allocate vm_page structures. 355 */ 356 357 /* 358 * sanity check: 359 * before calling this function the MD code is expected to register 360 * some free RAM with the uvm_page_physload() function. our job 361 * now is to allocate vm_page structures for this memory. 362 */ 363 364 if (uvm_physseg_get_last() == UVM_PHYSSEG_TYPE_INVALID) 365 panic("uvm_page_bootstrap: no memory pre-allocated"); 366 367 /* 368 * first calculate the number of free pages... 369 * 370 * note that we use start/end rather than avail_start/avail_end. 371 * this allows us to allocate extra vm_page structures in case we 372 * want to return some memory to the pool after booting. 373 */ 374 375 freepages = 0; 376 377 for (bank = uvm_physseg_get_first(); 378 uvm_physseg_valid_p(bank) ; 379 bank = uvm_physseg_get_next(bank)) { 380 freepages += (uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank)); 381 } 382 383 /* 384 * Let MD code initialize the number of colors, or default 385 * to 1 color if MD code doesn't care. 386 */ 387 if (uvmexp.ncolors == 0) 388 uvmexp.ncolors = 1; 389 uvmexp.colormask = uvmexp.ncolors - 1; 390 KASSERT((uvmexp.colormask & uvmexp.ncolors) == 0); 391 392 /* We always start with only 1 bucket. */ 393 uvm.bucketcount = 1; 394 395 /* 396 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 397 * use. for each page of memory we use we need a vm_page structure. 398 * thus, the total number of pages we can use is the total size of 399 * the memory divided by the PAGE_SIZE plus the size of the vm_page 400 * structure. we add one to freepages as a fudge factor to avoid 401 * truncation errors (since we can only allocate in terms of whole 402 * pages). 403 */ 404 pagecount = ((freepages + 1) << PAGE_SHIFT) / 405 (PAGE_SIZE + sizeof(struct vm_page)); 406 bucketsize = offsetof(struct pgflbucket, pgb_colors[uvmexp.ncolors]); 407 bucketsize = roundup2(bucketsize, coherency_unit); 408 bucketarray = (void *)uvm_pageboot_alloc( 409 bucketsize * VM_NFREELIST + 410 pagecount * sizeof(struct vm_page)); 411 pagearray = (struct vm_page *) 412 (bucketarray + bucketsize * VM_NFREELIST); 413 414 for (fl = 0; fl < VM_NFREELIST; fl++) { 415 pgb = (struct pgflbucket *)(bucketarray + bucketsize * fl); 416 uvm_page_init_bucket(&uvm.page_free[fl], pgb, 0); 417 } 418 memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 419 420 /* 421 * init the freelist cache in the disabled state. 422 */ 423 uvm_pgflcache_init(); 424 425 /* 426 * init the vm_page structures and put them in the correct place. 427 */ 428 /* First init the extent */ 429 430 for (bank = uvm_physseg_get_first(), 431 uvm_physseg_seg_chomp_slab(bank, pagearray, pagecount); 432 uvm_physseg_valid_p(bank); 433 bank = uvm_physseg_get_next(bank)) { 434 435 n = uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank); 436 uvm_physseg_seg_alloc_from_slab(bank, n); 437 uvm_physseg_init_seg(bank, pagearray); 438 439 /* set up page array pointers */ 440 pagearray += n; 441 pagecount -= n; 442 } 443 444 /* 445 * pass up the values of virtual_space_start and 446 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 447 * layers of the VM. 448 */ 449 450 *kvm_startp = round_page(virtual_space_start); 451 *kvm_endp = trunc_page(virtual_space_end); 452 453 /* 454 * init various thresholds. 455 */ 456 457 uvmexp.reserve_pagedaemon = 1; 458 uvmexp.reserve_kernel = vm_page_reserve_kernel; 459 460 /* 461 * done! 462 */ 463 464 uvm.page_init_done = true; 465 } 466 467 /* 468 * uvm_pgfl_lock: lock all freelist buckets 469 */ 470 471 void 472 uvm_pgfl_lock(void) 473 { 474 int i; 475 476 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 477 mutex_spin_enter(&uvm_freelist_locks[i].lock); 478 } 479 } 480 481 /* 482 * uvm_pgfl_unlock: unlock all freelist buckets 483 */ 484 485 void 486 uvm_pgfl_unlock(void) 487 { 488 int i; 489 490 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 491 mutex_spin_exit(&uvm_freelist_locks[i].lock); 492 } 493 } 494 495 /* 496 * uvm_setpagesize: set the page size 497 * 498 * => sets page_shift and page_mask from uvmexp.pagesize. 499 */ 500 501 void 502 uvm_setpagesize(void) 503 { 504 505 /* 506 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE 507 * to be a constant (indicated by being a non-zero value). 508 */ 509 if (uvmexp.pagesize == 0) { 510 if (PAGE_SIZE == 0) 511 panic("uvm_setpagesize: uvmexp.pagesize not set"); 512 uvmexp.pagesize = PAGE_SIZE; 513 } 514 uvmexp.pagemask = uvmexp.pagesize - 1; 515 if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 516 panic("uvm_setpagesize: page size %u (%#x) not a power of two", 517 uvmexp.pagesize, uvmexp.pagesize); 518 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 519 if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 520 break; 521 } 522 523 /* 524 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 525 */ 526 527 vaddr_t 528 uvm_pageboot_alloc(vsize_t size) 529 { 530 static bool initialized = false; 531 vaddr_t addr; 532 #if !defined(PMAP_STEAL_MEMORY) 533 vaddr_t vaddr; 534 paddr_t paddr; 535 #endif 536 537 /* 538 * on first call to this function, initialize ourselves. 539 */ 540 if (initialized == false) { 541 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 542 543 /* round it the way we like it */ 544 virtual_space_start = round_page(virtual_space_start); 545 virtual_space_end = trunc_page(virtual_space_end); 546 547 initialized = true; 548 } 549 550 /* round to page size */ 551 size = round_page(size); 552 uvmexp.bootpages += atop(size); 553 554 #if defined(PMAP_STEAL_MEMORY) 555 556 /* 557 * defer bootstrap allocation to MD code (it may want to allocate 558 * from a direct-mapped segment). pmap_steal_memory should adjust 559 * virtual_space_start/virtual_space_end if necessary. 560 */ 561 562 addr = pmap_steal_memory(size, &virtual_space_start, 563 &virtual_space_end); 564 565 return addr; 566 567 #else /* !PMAP_STEAL_MEMORY */ 568 569 /* 570 * allocate virtual memory for this request 571 */ 572 if (virtual_space_start == virtual_space_end || 573 (virtual_space_end - virtual_space_start) < size) 574 panic("uvm_pageboot_alloc: out of virtual space"); 575 576 addr = virtual_space_start; 577 578 #ifdef PMAP_GROWKERNEL 579 /* 580 * If the kernel pmap can't map the requested space, 581 * then allocate more resources for it. 582 */ 583 if (uvm_maxkaddr < (addr + size)) { 584 uvm_maxkaddr = pmap_growkernel(addr + size); 585 if (uvm_maxkaddr < (addr + size)) 586 panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 587 } 588 #endif 589 590 virtual_space_start += size; 591 592 /* 593 * allocate and mapin physical pages to back new virtual pages 594 */ 595 596 for (vaddr = round_page(addr) ; vaddr < addr + size ; 597 vaddr += PAGE_SIZE) { 598 599 if (!uvm_page_physget(&paddr)) 600 panic("uvm_pageboot_alloc: out of memory"); 601 602 /* 603 * Note this memory is no longer managed, so using 604 * pmap_kenter is safe. 605 */ 606 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 607 } 608 pmap_update(pmap_kernel()); 609 return addr; 610 #endif /* PMAP_STEAL_MEMORY */ 611 } 612 613 #if !defined(PMAP_STEAL_MEMORY) 614 /* 615 * uvm_page_physget: "steal" one page from the vm_physmem structure. 616 * 617 * => attempt to allocate it off the end of a segment in which the "avail" 618 * values match the start/end values. if we can't do that, then we 619 * will advance both values (making them equal, and removing some 620 * vm_page structures from the non-avail area). 621 * => return false if out of memory. 622 */ 623 624 /* subroutine: try to allocate from memory chunks on the specified freelist */ 625 static bool uvm_page_physget_freelist(paddr_t *, int); 626 627 static bool 628 uvm_page_physget_freelist(paddr_t *paddrp, int freelist) 629 { 630 uvm_physseg_t lcv; 631 632 /* pass 1: try allocating from a matching end */ 633 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 634 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 635 #else 636 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 637 #endif 638 { 639 if (uvm.page_init_done == true) 640 panic("uvm_page_physget: called _after_ bootstrap"); 641 642 /* Try to match at front or back on unused segment */ 643 if (uvm_page_physunload(lcv, freelist, paddrp)) 644 return true; 645 } 646 647 /* pass2: forget about matching ends, just allocate something */ 648 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 649 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 650 #else 651 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 652 #endif 653 { 654 /* Try the front regardless. */ 655 if (uvm_page_physunload_force(lcv, freelist, paddrp)) 656 return true; 657 } 658 return false; 659 } 660 661 bool 662 uvm_page_physget(paddr_t *paddrp) 663 { 664 int i; 665 666 /* try in the order of freelist preference */ 667 for (i = 0; i < VM_NFREELIST; i++) 668 if (uvm_page_physget_freelist(paddrp, i) == true) 669 return (true); 670 return (false); 671 } 672 #endif /* PMAP_STEAL_MEMORY */ 673 674 /* 675 * PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages 676 * back from an I/O mapping (ugh!). used in some MD code as well. 677 */ 678 struct vm_page * 679 uvm_phys_to_vm_page(paddr_t pa) 680 { 681 paddr_t pf = atop(pa); 682 paddr_t off; 683 uvm_physseg_t upm; 684 685 upm = uvm_physseg_find(pf, &off); 686 if (upm != UVM_PHYSSEG_TYPE_INVALID) 687 return uvm_physseg_get_pg(upm, off); 688 return(NULL); 689 } 690 691 paddr_t 692 uvm_vm_page_to_phys(const struct vm_page *pg) 693 { 694 695 return pg->phys_addr & ~(PAGE_SIZE - 1); 696 } 697 698 /* 699 * uvm_page_numa_load: load NUMA range description. 700 */ 701 void 702 uvm_page_numa_load(paddr_t start, paddr_t size, u_int numa_id) 703 { 704 struct uvm_page_numa_region *d; 705 706 KASSERT(numa_id < PGFL_MAX_BUCKETS); 707 708 d = kmem_alloc(sizeof(*d), KM_SLEEP); 709 d->start = start; 710 d->size = size; 711 d->numa_id = numa_id; 712 d->next = uvm_page_numa_region; 713 uvm_page_numa_region = d; 714 } 715 716 /* 717 * uvm_page_numa_lookup: lookup NUMA node for the given page. 718 */ 719 static u_int 720 uvm_page_numa_lookup(struct vm_page *pg) 721 { 722 struct uvm_page_numa_region *d; 723 static bool warned; 724 paddr_t pa; 725 726 KASSERT(uvm_page_numa_region != NULL); 727 728 pa = VM_PAGE_TO_PHYS(pg); 729 for (d = uvm_page_numa_region; d != NULL; d = d->next) { 730 if (pa >= d->start && pa < d->start + d->size) { 731 return d->numa_id; 732 } 733 } 734 735 if (!warned) { 736 printf("uvm_page_numa_lookup: failed, first pg=%p pa=%#" 737 PRIxPADDR "\n", pg, VM_PAGE_TO_PHYS(pg)); 738 warned = true; 739 } 740 741 return 0; 742 } 743 744 /* 745 * uvm_page_redim: adjust freelist dimensions if they have changed. 746 */ 747 748 static void 749 uvm_page_redim(int newncolors, int newnbuckets) 750 { 751 struct pgfreelist npgfl; 752 struct pgflbucket *opgb, *npgb; 753 struct pgflist *ohead, *nhead; 754 struct vm_page *pg; 755 size_t bucketsize, bucketmemsize, oldbucketmemsize; 756 int fl, ob, oc, nb, nc, obuckets, ocolors; 757 char *bucketarray, *oldbucketmem, *bucketmem; 758 759 KASSERT(((newncolors - 1) & newncolors) == 0); 760 761 /* Anything to do? */ 762 if (newncolors <= uvmexp.ncolors && 763 newnbuckets == uvm.bucketcount) { 764 return; 765 } 766 if (uvm.page_init_done == false) { 767 uvmexp.ncolors = newncolors; 768 return; 769 } 770 771 bucketsize = offsetof(struct pgflbucket, pgb_colors[newncolors]); 772 bucketsize = roundup2(bucketsize, coherency_unit); 773 bucketmemsize = bucketsize * newnbuckets * VM_NFREELIST + 774 coherency_unit - 1; 775 bucketmem = kmem_zalloc(bucketmemsize, KM_SLEEP); 776 bucketarray = (char *)roundup2((uintptr_t)bucketmem, coherency_unit); 777 778 ocolors = uvmexp.ncolors; 779 obuckets = uvm.bucketcount; 780 781 /* Freelist cache musn't be enabled. */ 782 uvm_pgflcache_pause(); 783 784 /* Make sure we should still do this. */ 785 uvm_pgfl_lock(); 786 if (newncolors <= uvmexp.ncolors && 787 newnbuckets == uvm.bucketcount) { 788 uvm_pgfl_unlock(); 789 uvm_pgflcache_resume(); 790 kmem_free(bucketmem, bucketmemsize); 791 return; 792 } 793 794 uvmexp.ncolors = newncolors; 795 uvmexp.colormask = uvmexp.ncolors - 1; 796 uvm.bucketcount = newnbuckets; 797 798 for (fl = 0; fl < VM_NFREELIST; fl++) { 799 /* Init new buckets in new freelist. */ 800 memset(&npgfl, 0, sizeof(npgfl)); 801 for (nb = 0; nb < newnbuckets; nb++) { 802 npgb = (struct pgflbucket *)bucketarray; 803 uvm_page_init_bucket(&npgfl, npgb, nb); 804 bucketarray += bucketsize; 805 } 806 /* Now transfer pages from the old freelist. */ 807 for (nb = ob = 0; ob < obuckets; ob++) { 808 opgb = uvm.page_free[fl].pgfl_buckets[ob]; 809 for (oc = 0; oc < ocolors; oc++) { 810 ohead = &opgb->pgb_colors[oc]; 811 while ((pg = LIST_FIRST(ohead)) != NULL) { 812 LIST_REMOVE(pg, pageq.list); 813 /* 814 * Here we decide on the NEW color & 815 * bucket for the page. For NUMA 816 * we'll use the info that the 817 * hardware gave us. For non-NUMA 818 * assign take physical page frame 819 * number and cache color into 820 * account. We do this to try and 821 * avoid defeating any memory 822 * interleaving in the hardware. 823 */ 824 KASSERT( 825 uvm_page_get_bucket(pg) == ob); 826 KASSERT(fl == 827 uvm_page_get_freelist(pg)); 828 if (uvm_page_numa_region != NULL) { 829 nb = uvm_page_numa_lookup(pg); 830 } else { 831 nb = atop(VM_PAGE_TO_PHYS(pg)) 832 / uvmexp.ncolors / 8 833 % newnbuckets; 834 } 835 uvm_page_set_bucket(pg, nb); 836 npgb = npgfl.pgfl_buckets[nb]; 837 npgb->pgb_nfree++; 838 nc = VM_PGCOLOR(pg); 839 nhead = &npgb->pgb_colors[nc]; 840 LIST_INSERT_HEAD(nhead, pg, pageq.list); 841 } 842 } 843 } 844 /* Install the new freelist. */ 845 memcpy(&uvm.page_free[fl], &npgfl, sizeof(npgfl)); 846 } 847 848 /* Unlock and free the old memory. */ 849 oldbucketmemsize = recolored_pages_memsize; 850 oldbucketmem = recolored_pages_mem; 851 recolored_pages_memsize = bucketmemsize; 852 recolored_pages_mem = bucketmem; 853 854 uvm_pgfl_unlock(); 855 uvm_pgflcache_resume(); 856 857 if (oldbucketmemsize) { 858 kmem_free(oldbucketmem, oldbucketmemsize); 859 } 860 861 /* 862 * this calls uvm_km_alloc() which may want to hold 863 * uvm_freelist_lock. 864 */ 865 uvm_pager_realloc_emerg(); 866 } 867 868 /* 869 * uvm_page_recolor: Recolor the pages if the new color count is 870 * larger than the old one. 871 */ 872 873 void 874 uvm_page_recolor(int newncolors) 875 { 876 877 uvm_page_redim(newncolors, uvm.bucketcount); 878 } 879 880 /* 881 * uvm_page_rebucket: Determine a bucket structure and redim the free 882 * lists to match. 883 */ 884 885 void 886 uvm_page_rebucket(void) 887 { 888 u_int min_numa, max_numa, npackage, shift; 889 struct cpu_info *ci, *ci2, *ci3; 890 CPU_INFO_ITERATOR cii; 891 892 /* 893 * If we have more than one NUMA node, and the maximum NUMA node ID 894 * is less than PGFL_MAX_BUCKETS, then we'll use NUMA distribution 895 * for free pages. 896 */ 897 min_numa = (u_int)-1; 898 max_numa = 0; 899 for (CPU_INFO_FOREACH(cii, ci)) { 900 if (ci->ci_numa_id < min_numa) { 901 min_numa = ci->ci_numa_id; 902 } 903 if (ci->ci_numa_id > max_numa) { 904 max_numa = ci->ci_numa_id; 905 } 906 } 907 if (min_numa != max_numa && max_numa < PGFL_MAX_BUCKETS) { 908 aprint_debug("UVM: using NUMA allocation scheme\n"); 909 for (CPU_INFO_FOREACH(cii, ci)) { 910 ci->ci_data.cpu_uvm->pgflbucket = ci->ci_numa_id; 911 } 912 uvm_page_redim(uvmexp.ncolors, max_numa + 1); 913 return; 914 } 915 916 /* 917 * Otherwise we'll go with a scheme to maximise L2/L3 cache locality 918 * and minimise lock contention. Count the total number of CPU 919 * packages, and then try to distribute the buckets among CPU 920 * packages evenly. 921 */ 922 npackage = curcpu()->ci_nsibling[CPUREL_PACKAGE1ST]; 923 924 /* 925 * Figure out how to arrange the packages & buckets, and the total 926 * number of buckets we need. XXX 2 may not be the best factor. 927 */ 928 for (shift = 0; npackage > PGFL_MAX_BUCKETS; shift++) { 929 npackage >>= 1; 930 } 931 uvm_page_redim(uvmexp.ncolors, npackage); 932 933 /* 934 * Now tell each CPU which bucket to use. In the outer loop, scroll 935 * through all CPU packages. 936 */ 937 npackage = 0; 938 ci = curcpu(); 939 ci2 = ci->ci_sibling[CPUREL_PACKAGE1ST]; 940 do { 941 /* 942 * In the inner loop, scroll through all CPUs in the package 943 * and assign the same bucket ID. 944 */ 945 ci3 = ci2; 946 do { 947 ci3->ci_data.cpu_uvm->pgflbucket = npackage >> shift; 948 ci3 = ci3->ci_sibling[CPUREL_PACKAGE]; 949 } while (ci3 != ci2); 950 npackage++; 951 ci2 = ci2->ci_sibling[CPUREL_PACKAGE1ST]; 952 } while (ci2 != ci->ci_sibling[CPUREL_PACKAGE1ST]); 953 954 aprint_debug("UVM: using package allocation scheme, " 955 "%d package(s) per bucket\n", 1 << shift); 956 } 957 958 /* 959 * uvm_cpu_attach: initialize per-CPU data structures. 960 */ 961 962 void 963 uvm_cpu_attach(struct cpu_info *ci) 964 { 965 struct uvm_cpu *ucpu; 966 967 /* Already done in uvm_page_init(). */ 968 if (!CPU_IS_PRIMARY(ci)) { 969 /* Add more reserve pages for this CPU. */ 970 uvmexp.reserve_kernel += vm_page_reserve_kernel; 971 972 /* Allocate per-CPU data structures. */ 973 ucpu = kmem_zalloc(sizeof(struct uvm_cpu) + coherency_unit - 1, 974 KM_SLEEP); 975 ucpu = (struct uvm_cpu *)roundup2((uintptr_t)ucpu, 976 coherency_unit); 977 ci->ci_data.cpu_uvm = ucpu; 978 } else { 979 ucpu = ci->ci_data.cpu_uvm; 980 } 981 982 uvmpdpol_init_cpu(ucpu); 983 984 /* 985 * Attach RNG source for this CPU's VM events 986 */ 987 rnd_attach_source(&ucpu->rs, ci->ci_data.cpu_name, RND_TYPE_VM, 988 RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE| 989 RND_FLAG_ESTIMATE_VALUE); 990 } 991 992 /* 993 * uvm_availmem: fetch the total amount of free memory in pages. this can 994 * have a detrimental effect on performance due to false sharing; don't call 995 * unless needed. 996 * 997 * some users can request the amount of free memory so often that it begins 998 * to impact upon performance. if calling frequently and an inexact value 999 * is okay, call with cached = true. 1000 */ 1001 1002 int 1003 uvm_availmem(bool cached) 1004 { 1005 int64_t fp; 1006 1007 cpu_count_sync(cached); 1008 if ((fp = cpu_count_get(CPU_COUNT_FREEPAGES)) < 0) { 1009 /* 1010 * XXXAD could briefly go negative because it's impossible 1011 * to get a clean snapshot. address this for other counters 1012 * used as running totals before NetBSD 10 although less 1013 * important for those. 1014 */ 1015 fp = 0; 1016 } 1017 return (int)fp; 1018 } 1019 1020 /* 1021 * uvm_pagealloc_pgb: helper routine that tries to allocate any color from a 1022 * specific freelist and specific bucket only. 1023 * 1024 * => must be at IPL_VM or higher to protect per-CPU data structures. 1025 */ 1026 1027 static struct vm_page * 1028 uvm_pagealloc_pgb(struct uvm_cpu *ucpu, int f, int b, int *trycolorp, int flags) 1029 { 1030 int c, trycolor, colormask; 1031 struct pgflbucket *pgb; 1032 struct vm_page *pg; 1033 kmutex_t *lock; 1034 bool fill; 1035 1036 /* 1037 * Skip the bucket if empty, no lock needed. There could be many 1038 * empty freelists/buckets. 1039 */ 1040 pgb = uvm.page_free[f].pgfl_buckets[b]; 1041 if (pgb->pgb_nfree == 0) { 1042 return NULL; 1043 } 1044 1045 /* Skip bucket if low on memory. */ 1046 lock = &uvm_freelist_locks[b].lock; 1047 mutex_spin_enter(lock); 1048 if (__predict_false(pgb->pgb_nfree <= uvmexp.reserve_kernel)) { 1049 if ((flags & UVM_PGA_USERESERVE) == 0 || 1050 (pgb->pgb_nfree <= uvmexp.reserve_pagedaemon && 1051 curlwp != uvm.pagedaemon_lwp)) { 1052 mutex_spin_exit(lock); 1053 return NULL; 1054 } 1055 fill = false; 1056 } else { 1057 fill = true; 1058 } 1059 1060 /* Try all page colors as needed. */ 1061 c = trycolor = *trycolorp; 1062 colormask = uvmexp.colormask; 1063 do { 1064 pg = LIST_FIRST(&pgb->pgb_colors[c]); 1065 if (__predict_true(pg != NULL)) { 1066 /* 1067 * Got a free page! PG_FREE must be cleared under 1068 * lock because of uvm_pglistalloc(). 1069 */ 1070 LIST_REMOVE(pg, pageq.list); 1071 KASSERT(pg->flags == PG_FREE); 1072 pg->flags = PG_BUSY | PG_CLEAN | PG_FAKE; 1073 pgb->pgb_nfree--; 1074 CPU_COUNT(CPU_COUNT_FREEPAGES, -1); 1075 1076 /* 1077 * While we have the bucket locked and our data 1078 * structures fresh in L1 cache, we have an ideal 1079 * opportunity to grab some pages for the freelist 1080 * cache without causing extra contention. Only do 1081 * so if we found pages in this CPU's preferred 1082 * bucket. 1083 */ 1084 if (__predict_true(b == ucpu->pgflbucket && fill)) { 1085 uvm_pgflcache_fill(ucpu, f, b, c); 1086 } 1087 mutex_spin_exit(lock); 1088 KASSERT(uvm_page_get_bucket(pg) == b); 1089 CPU_COUNT(c == trycolor ? 1090 CPU_COUNT_COLORHIT : CPU_COUNT_COLORMISS, 1); 1091 CPU_COUNT(CPU_COUNT_CPUMISS, 1); 1092 *trycolorp = c; 1093 return pg; 1094 } 1095 c = (c + 1) & colormask; 1096 } while (c != trycolor); 1097 mutex_spin_exit(lock); 1098 1099 return NULL; 1100 } 1101 1102 /* 1103 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat that allocates 1104 * any color from any bucket, in a specific freelist. 1105 * 1106 * => must be at IPL_VM or higher to protect per-CPU data structures. 1107 */ 1108 1109 static struct vm_page * 1110 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int f, int *trycolorp, int flags) 1111 { 1112 int b, trybucket, bucketcount; 1113 struct vm_page *pg; 1114 1115 /* Try for the exact thing in the per-CPU cache. */ 1116 if ((pg = uvm_pgflcache_alloc(ucpu, f, *trycolorp)) != NULL) { 1117 CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1118 CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1119 return pg; 1120 } 1121 1122 /* Walk through all buckets, trying our preferred bucket first. */ 1123 trybucket = ucpu->pgflbucket; 1124 b = trybucket; 1125 bucketcount = uvm.bucketcount; 1126 do { 1127 pg = uvm_pagealloc_pgb(ucpu, f, b, trycolorp, flags); 1128 if (pg != NULL) { 1129 return pg; 1130 } 1131 b = (b + 1 == bucketcount ? 0 : b + 1); 1132 } while (b != trybucket); 1133 1134 return NULL; 1135 } 1136 1137 /* 1138 * uvm_pagealloc_strat: allocate vm_page from a particular free list. 1139 * 1140 * => return null if no pages free 1141 * => wake up pagedaemon if number of free pages drops below low water mark 1142 * => if obj != NULL, obj must be locked (to put in obj's tree) 1143 * => if anon != NULL, anon must be locked (to put in anon) 1144 * => only one of obj or anon can be non-null 1145 * => caller must activate/deactivate page if it is not wired. 1146 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 1147 * => policy decision: it is more important to pull a page off of the 1148 * appropriate priority free list than it is to get a page from the 1149 * correct bucket or color bin. This is because we live with the 1150 * consequences of a bad free list decision for the entire 1151 * lifetime of the page, e.g. if the page comes from memory that 1152 * is slower to access. 1153 */ 1154 1155 struct vm_page * 1156 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, 1157 int flags, int strat, int free_list) 1158 { 1159 int color, lcv, error, s; 1160 struct uvm_cpu *ucpu; 1161 struct vm_page *pg; 1162 lwp_t *l; 1163 1164 KASSERT(obj == NULL || anon == NULL); 1165 KASSERT(anon == NULL || (flags & UVM_FLAG_COLORMATCH) || off == 0); 1166 KASSERT(off == trunc_page(off)); 1167 KASSERT(obj == NULL || rw_write_held(obj->vmobjlock)); 1168 KASSERT(anon == NULL || anon->an_lock == NULL || 1169 rw_write_held(anon->an_lock)); 1170 1171 /* 1172 * This implements a global round-robin page coloring 1173 * algorithm. 1174 */ 1175 1176 s = splvm(); 1177 ucpu = curcpu()->ci_data.cpu_uvm; 1178 if (flags & UVM_FLAG_COLORMATCH) { 1179 color = atop(off) & uvmexp.colormask; 1180 } else { 1181 color = ucpu->pgflcolor; 1182 } 1183 1184 /* 1185 * fail if any of these conditions is true: 1186 * [1] there really are no free pages, or 1187 * [2] only kernel "reserved" pages remain and 1188 * reserved pages have not been requested. 1189 * [3] only pagedaemon "reserved" pages remain and 1190 * the requestor isn't the pagedaemon. 1191 * we make kernel reserve pages available if called by a 1192 * kernel thread. 1193 */ 1194 l = curlwp; 1195 if (__predict_true(l != NULL) && (l->l_flag & LW_SYSTEM) != 0) { 1196 flags |= UVM_PGA_USERESERVE; 1197 } 1198 1199 again: 1200 switch (strat) { 1201 case UVM_PGA_STRAT_NORMAL: 1202 /* Check freelists: descending priority (ascending id) order. */ 1203 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1204 pg = uvm_pagealloc_pgfl(ucpu, lcv, &color, flags); 1205 if (pg != NULL) { 1206 goto gotit; 1207 } 1208 } 1209 1210 /* No pages free! Have pagedaemon free some memory. */ 1211 splx(s); 1212 uvm_kick_pdaemon(); 1213 return NULL; 1214 1215 case UVM_PGA_STRAT_ONLY: 1216 case UVM_PGA_STRAT_FALLBACK: 1217 /* Attempt to allocate from the specified free list. */ 1218 KASSERT(free_list >= 0 && free_list < VM_NFREELIST); 1219 pg = uvm_pagealloc_pgfl(ucpu, free_list, &color, flags); 1220 if (pg != NULL) { 1221 goto gotit; 1222 } 1223 1224 /* Fall back, if possible. */ 1225 if (strat == UVM_PGA_STRAT_FALLBACK) { 1226 strat = UVM_PGA_STRAT_NORMAL; 1227 goto again; 1228 } 1229 1230 /* No pages free! Have pagedaemon free some memory. */ 1231 splx(s); 1232 uvm_kick_pdaemon(); 1233 return NULL; 1234 1235 case UVM_PGA_STRAT_NUMA: 1236 /* 1237 * NUMA strategy (experimental): allocating from the correct 1238 * bucket is more important than observing freelist 1239 * priority. Look only to the current NUMA node; if that 1240 * fails, we need to look to other NUMA nodes, so retry with 1241 * the normal strategy. 1242 */ 1243 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1244 pg = uvm_pgflcache_alloc(ucpu, lcv, color); 1245 if (pg != NULL) { 1246 CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1247 CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1248 goto gotit; 1249 } 1250 pg = uvm_pagealloc_pgb(ucpu, lcv, 1251 ucpu->pgflbucket, &color, flags); 1252 if (pg != NULL) { 1253 goto gotit; 1254 } 1255 } 1256 strat = UVM_PGA_STRAT_NORMAL; 1257 goto again; 1258 1259 default: 1260 panic("uvm_pagealloc_strat: bad strat %d", strat); 1261 /* NOTREACHED */ 1262 } 1263 1264 gotit: 1265 /* 1266 * We now know which color we actually allocated from; set 1267 * the next color accordingly. 1268 */ 1269 1270 ucpu->pgflcolor = (color + 1) & uvmexp.colormask; 1271 1272 /* 1273 * while still at IPL_VM, update allocation statistics. 1274 */ 1275 1276 if (anon) { 1277 CPU_COUNT(CPU_COUNT_ANONCLEAN, 1); 1278 } 1279 splx(s); 1280 KASSERT(pg->flags == (PG_BUSY|PG_CLEAN|PG_FAKE)); 1281 1282 /* 1283 * assign the page to the object. as the page was free, we know 1284 * that pg->uobject and pg->uanon are NULL. we only need to take 1285 * the page's interlock if we are changing the values. 1286 */ 1287 if (anon != NULL || obj != NULL) { 1288 mutex_enter(&pg->interlock); 1289 } 1290 pg->offset = off; 1291 pg->uobject = obj; 1292 pg->uanon = anon; 1293 KASSERT(uvm_page_owner_locked_p(pg, true)); 1294 if (anon) { 1295 anon->an_page = pg; 1296 pg->flags |= PG_ANON; 1297 mutex_exit(&pg->interlock); 1298 } else if (obj) { 1299 /* 1300 * set PG_FILE|PG_AOBJ before the first uvm_pageinsert. 1301 */ 1302 if (UVM_OBJ_IS_VNODE(obj)) { 1303 pg->flags |= PG_FILE; 1304 } else if (UVM_OBJ_IS_AOBJ(obj)) { 1305 pg->flags |= PG_AOBJ; 1306 } 1307 uvm_pageinsert_object(obj, pg); 1308 mutex_exit(&pg->interlock); 1309 error = uvm_pageinsert_tree(obj, pg); 1310 if (error != 0) { 1311 mutex_enter(&pg->interlock); 1312 uvm_pageremove_object(obj, pg); 1313 mutex_exit(&pg->interlock); 1314 uvm_pagefree(pg); 1315 return NULL; 1316 } 1317 } 1318 1319 #if defined(UVM_PAGE_TRKOWN) 1320 pg->owner_tag = NULL; 1321 #endif 1322 UVM_PAGE_OWN(pg, "new alloc"); 1323 1324 if (flags & UVM_PGA_ZERO) { 1325 /* A zero'd page is not clean. */ 1326 if (obj != NULL || anon != NULL) { 1327 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1328 } 1329 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1330 } 1331 1332 return(pg); 1333 } 1334 1335 /* 1336 * uvm_pagereplace: replace a page with another 1337 * 1338 * => object must be locked 1339 * => page interlocks must be held 1340 */ 1341 1342 void 1343 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg) 1344 { 1345 struct uvm_object *uobj = oldpg->uobject; 1346 struct vm_page *pg __diagused; 1347 uint64_t idx; 1348 1349 KASSERT((oldpg->flags & PG_TABLED) != 0); 1350 KASSERT(uobj != NULL); 1351 KASSERT((newpg->flags & PG_TABLED) == 0); 1352 KASSERT(newpg->uobject == NULL); 1353 KASSERT(rw_write_held(uobj->vmobjlock)); 1354 KASSERT(mutex_owned(&oldpg->interlock)); 1355 KASSERT(mutex_owned(&newpg->interlock)); 1356 1357 newpg->uobject = uobj; 1358 newpg->offset = oldpg->offset; 1359 idx = newpg->offset >> PAGE_SHIFT; 1360 pg = radix_tree_replace_node(&uobj->uo_pages, idx, newpg); 1361 KASSERT(pg == oldpg); 1362 if (((oldpg->flags ^ newpg->flags) & PG_CLEAN) != 0) { 1363 if ((newpg->flags & PG_CLEAN) != 0) { 1364 uvm_obj_page_clear_dirty(newpg); 1365 } else { 1366 uvm_obj_page_set_dirty(newpg); 1367 } 1368 } 1369 /* 1370 * oldpg's PG_STAT is stable. newpg is not reachable by others yet. 1371 */ 1372 newpg->flags |= 1373 (newpg->flags & ~PG_STAT) | (oldpg->flags & PG_STAT); 1374 uvm_pageinsert_object(uobj, newpg); 1375 uvm_pageremove_object(uobj, oldpg); 1376 } 1377 1378 /* 1379 * uvm_pagerealloc: reallocate a page from one object to another 1380 * 1381 * => both objects must be locked 1382 */ 1383 1384 int 1385 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) 1386 { 1387 int error = 0; 1388 1389 /* 1390 * remove it from the old object 1391 */ 1392 1393 if (pg->uobject) { 1394 uvm_pageremove_tree(pg->uobject, pg); 1395 uvm_pageremove_object(pg->uobject, pg); 1396 } 1397 1398 /* 1399 * put it in the new object 1400 */ 1401 1402 if (newobj) { 1403 mutex_enter(&pg->interlock); 1404 pg->uobject = newobj; 1405 pg->offset = newoff; 1406 if (UVM_OBJ_IS_VNODE(newobj)) { 1407 pg->flags |= PG_FILE; 1408 } else if (UVM_OBJ_IS_AOBJ(newobj)) { 1409 pg->flags |= PG_AOBJ; 1410 } 1411 uvm_pageinsert_object(newobj, pg); 1412 mutex_exit(&pg->interlock); 1413 error = uvm_pageinsert_tree(newobj, pg); 1414 if (error != 0) { 1415 mutex_enter(&pg->interlock); 1416 uvm_pageremove_object(newobj, pg); 1417 mutex_exit(&pg->interlock); 1418 } 1419 } 1420 1421 return error; 1422 } 1423 1424 /* 1425 * uvm_pagefree: free page 1426 * 1427 * => erase page's identity (i.e. remove from object) 1428 * => put page on free list 1429 * => caller must lock owning object (either anon or uvm_object) 1430 * => assumes all valid mappings of pg are gone 1431 */ 1432 1433 void 1434 uvm_pagefree(struct vm_page *pg) 1435 { 1436 struct pgfreelist *pgfl; 1437 struct pgflbucket *pgb; 1438 struct uvm_cpu *ucpu; 1439 kmutex_t *lock; 1440 int bucket, s; 1441 bool locked; 1442 1443 #ifdef DEBUG 1444 if (pg->uobject == (void *)0xdeadbeef && 1445 pg->uanon == (void *)0xdeadbeef) { 1446 panic("uvm_pagefree: freeing free page %p", pg); 1447 } 1448 #endif /* DEBUG */ 1449 1450 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1451 KASSERT(!(pg->flags & PG_FREE)); 1452 KASSERT(pg->uobject == NULL || rw_write_held(pg->uobject->vmobjlock)); 1453 KASSERT(pg->uobject != NULL || pg->uanon == NULL || 1454 rw_write_held(pg->uanon->an_lock)); 1455 1456 /* 1457 * remove the page from the object's tree before acquiring any page 1458 * interlocks: this can acquire locks to free radixtree nodes. 1459 */ 1460 if (pg->uobject != NULL) { 1461 uvm_pageremove_tree(pg->uobject, pg); 1462 } 1463 1464 /* 1465 * if the page is loaned, resolve the loan instead of freeing. 1466 */ 1467 1468 if (pg->loan_count) { 1469 KASSERT(pg->wire_count == 0); 1470 1471 /* 1472 * if the page is owned by an anon then we just want to 1473 * drop anon ownership. the kernel will free the page when 1474 * it is done with it. if the page is owned by an object, 1475 * remove it from the object and mark it dirty for the benefit 1476 * of possible anon owners. 1477 * 1478 * regardless of previous ownership, wakeup any waiters, 1479 * unbusy the page, and we're done. 1480 */ 1481 1482 uvm_pagelock(pg); 1483 locked = true; 1484 if (pg->uobject != NULL) { 1485 uvm_pageremove_object(pg->uobject, pg); 1486 pg->flags &= ~(PG_FILE|PG_AOBJ); 1487 } else if (pg->uanon != NULL) { 1488 if ((pg->flags & PG_ANON) == 0) { 1489 pg->loan_count--; 1490 } else { 1491 const unsigned status = uvm_pagegetdirty(pg); 1492 pg->flags &= ~PG_ANON; 1493 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1494 } 1495 pg->uanon->an_page = NULL; 1496 pg->uanon = NULL; 1497 } 1498 if (pg->pqflags & PQ_WANTED) { 1499 wakeup(pg); 1500 } 1501 pg->pqflags &= ~PQ_WANTED; 1502 pg->flags &= ~(PG_BUSY|PG_RELEASED|PG_PAGER1); 1503 #ifdef UVM_PAGE_TRKOWN 1504 pg->owner_tag = NULL; 1505 #endif 1506 KASSERT((pg->flags & PG_STAT) == 0); 1507 if (pg->loan_count) { 1508 KASSERT(pg->uobject == NULL); 1509 if (pg->uanon == NULL) { 1510 uvm_pagedequeue(pg); 1511 } 1512 uvm_pageunlock(pg); 1513 return; 1514 } 1515 } else if (pg->uobject != NULL || pg->uanon != NULL || 1516 pg->wire_count != 0) { 1517 uvm_pagelock(pg); 1518 locked = true; 1519 } else { 1520 locked = false; 1521 } 1522 1523 /* 1524 * remove page from its object or anon. 1525 */ 1526 if (pg->uobject != NULL) { 1527 uvm_pageremove_object(pg->uobject, pg); 1528 } else if (pg->uanon != NULL) { 1529 const unsigned int status = uvm_pagegetdirty(pg); 1530 pg->uanon->an_page = NULL; 1531 pg->uanon = NULL; 1532 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1533 } 1534 1535 /* 1536 * if the page was wired, unwire it now. 1537 */ 1538 1539 if (pg->wire_count) { 1540 pg->wire_count = 0; 1541 atomic_dec_uint(&uvmexp.wired); 1542 } 1543 if (locked) { 1544 /* 1545 * wake anyone waiting on the page. 1546 */ 1547 if ((pg->pqflags & PQ_WANTED) != 0) { 1548 pg->pqflags &= ~PQ_WANTED; 1549 wakeup(pg); 1550 } 1551 1552 /* 1553 * now remove the page from the queues. 1554 */ 1555 uvm_pagedequeue(pg); 1556 uvm_pageunlock(pg); 1557 } else { 1558 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1559 } 1560 1561 /* 1562 * and put on free queue 1563 */ 1564 1565 #ifdef DEBUG 1566 pg->uobject = (void *)0xdeadbeef; 1567 pg->uanon = (void *)0xdeadbeef; 1568 #endif /* DEBUG */ 1569 1570 /* Try to send the page to the per-CPU cache. */ 1571 s = splvm(); 1572 ucpu = curcpu()->ci_data.cpu_uvm; 1573 bucket = uvm_page_get_bucket(pg); 1574 if (bucket == ucpu->pgflbucket && uvm_pgflcache_free(ucpu, pg)) { 1575 splx(s); 1576 return; 1577 } 1578 1579 /* Didn't work. Never mind, send it to a global bucket. */ 1580 pgfl = &uvm.page_free[uvm_page_get_freelist(pg)]; 1581 pgb = pgfl->pgfl_buckets[bucket]; 1582 lock = &uvm_freelist_locks[bucket].lock; 1583 1584 mutex_spin_enter(lock); 1585 /* PG_FREE must be set under lock because of uvm_pglistalloc(). */ 1586 pg->flags = PG_FREE; 1587 LIST_INSERT_HEAD(&pgb->pgb_colors[VM_PGCOLOR(pg)], pg, pageq.list); 1588 pgb->pgb_nfree++; 1589 CPU_COUNT(CPU_COUNT_FREEPAGES, 1); 1590 mutex_spin_exit(lock); 1591 splx(s); 1592 } 1593 1594 /* 1595 * uvm_page_unbusy: unbusy an array of pages. 1596 * 1597 * => pages must either all belong to the same object, or all belong to anons. 1598 * => if pages are object-owned, object must be locked. 1599 * => if pages are anon-owned, anons must be locked. 1600 * => caller must make sure that anon-owned pages are not PG_RELEASED. 1601 */ 1602 1603 void 1604 uvm_page_unbusy(struct vm_page **pgs, int npgs) 1605 { 1606 struct vm_page *pg; 1607 int i, pageout_done; 1608 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1609 1610 pageout_done = 0; 1611 for (i = 0; i < npgs; i++) { 1612 pg = pgs[i]; 1613 if (pg == NULL || pg == PGO_DONTCARE) { 1614 continue; 1615 } 1616 1617 KASSERT(uvm_page_owner_locked_p(pg, true)); 1618 KASSERT(pg->flags & PG_BUSY); 1619 1620 if (pg->flags & PG_PAGEOUT) { 1621 pg->flags &= ~PG_PAGEOUT; 1622 pg->flags |= PG_RELEASED; 1623 pageout_done++; 1624 atomic_inc_uint(&uvmexp.pdfreed); 1625 } 1626 if (pg->flags & PG_RELEASED) { 1627 UVMHIST_LOG(ubchist, "releasing pg %#jx", 1628 (uintptr_t)pg, 0, 0, 0); 1629 KASSERT(pg->uobject != NULL || 1630 (pg->uanon != NULL && pg->uanon->an_ref > 0)); 1631 pg->flags &= ~PG_RELEASED; 1632 uvm_pagefree(pg); 1633 } else { 1634 UVMHIST_LOG(ubchist, "unbusying pg %#jx", 1635 (uintptr_t)pg, 0, 0, 0); 1636 KASSERT((pg->flags & PG_FAKE) == 0); 1637 pg->flags &= ~PG_BUSY; 1638 uvm_pagelock(pg); 1639 uvm_pagewakeup(pg); 1640 uvm_pageunlock(pg); 1641 UVM_PAGE_OWN(pg, NULL); 1642 } 1643 } 1644 if (pageout_done != 0) { 1645 uvm_pageout_done(pageout_done); 1646 } 1647 } 1648 1649 /* 1650 * uvm_pagewait: wait for a busy page 1651 * 1652 * => page must be known PG_BUSY 1653 * => object must be read or write locked 1654 * => object will be unlocked on return 1655 */ 1656 1657 void 1658 uvm_pagewait(struct vm_page *pg, krwlock_t *lock, const char *wmesg) 1659 { 1660 1661 KASSERT(rw_lock_held(lock)); 1662 KASSERT((pg->flags & PG_BUSY) != 0); 1663 KASSERT(uvm_page_owner_locked_p(pg, false)); 1664 1665 mutex_enter(&pg->interlock); 1666 pg->pqflags |= PQ_WANTED; 1667 rw_exit(lock); 1668 UVM_UNLOCK_AND_WAIT(pg, &pg->interlock, false, wmesg, 0); 1669 } 1670 1671 /* 1672 * uvm_pagewakeup: wake anyone waiting on a page 1673 * 1674 * => page interlock must be held 1675 */ 1676 1677 void 1678 uvm_pagewakeup(struct vm_page *pg) 1679 { 1680 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1681 1682 KASSERT(mutex_owned(&pg->interlock)); 1683 1684 UVMHIST_LOG(ubchist, "waking pg %#jx", (uintptr_t)pg, 0, 0, 0); 1685 1686 if ((pg->pqflags & PQ_WANTED) != 0) { 1687 wakeup(pg); 1688 pg->pqflags &= ~PQ_WANTED; 1689 } 1690 } 1691 1692 /* 1693 * uvm_pagewanted_p: return true if someone is waiting on the page 1694 * 1695 * => object must be write locked (lock out all concurrent access) 1696 */ 1697 1698 bool 1699 uvm_pagewanted_p(struct vm_page *pg) 1700 { 1701 1702 KASSERT(uvm_page_owner_locked_p(pg, true)); 1703 1704 return (atomic_load_relaxed(&pg->pqflags) & PQ_WANTED) != 0; 1705 } 1706 1707 #if defined(UVM_PAGE_TRKOWN) 1708 /* 1709 * uvm_page_own: set or release page ownership 1710 * 1711 * => this is a debugging function that keeps track of who sets PG_BUSY 1712 * and where they do it. it can be used to track down problems 1713 * such a process setting "PG_BUSY" and never releasing it. 1714 * => page's object [if any] must be locked 1715 * => if "tag" is NULL then we are releasing page ownership 1716 */ 1717 void 1718 uvm_page_own(struct vm_page *pg, const char *tag) 1719 { 1720 1721 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0); 1722 KASSERT(uvm_page_owner_locked_p(pg, true)); 1723 1724 /* gain ownership? */ 1725 if (tag) { 1726 KASSERT((pg->flags & PG_BUSY) != 0); 1727 if (pg->owner_tag) { 1728 printf("uvm_page_own: page %p already owned " 1729 "by proc %d.%d [%s]\n", pg, 1730 pg->owner, pg->lowner, pg->owner_tag); 1731 panic("uvm_page_own"); 1732 } 1733 pg->owner = curproc->p_pid; 1734 pg->lowner = curlwp->l_lid; 1735 pg->owner_tag = tag; 1736 return; 1737 } 1738 1739 /* drop ownership */ 1740 KASSERT((pg->flags & PG_BUSY) == 0); 1741 if (pg->owner_tag == NULL) { 1742 printf("uvm_page_own: dropping ownership of an non-owned " 1743 "page (%p)\n", pg); 1744 panic("uvm_page_own"); 1745 } 1746 pg->owner_tag = NULL; 1747 } 1748 #endif 1749 1750 /* 1751 * uvm_pagelookup: look up a page 1752 * 1753 * => caller should lock object to keep someone from pulling the page 1754 * out from under it 1755 */ 1756 1757 struct vm_page * 1758 uvm_pagelookup(struct uvm_object *obj, voff_t off) 1759 { 1760 struct vm_page *pg; 1761 1762 KASSERT(db_active || rw_lock_held(obj->vmobjlock)); 1763 1764 pg = radix_tree_lookup_node(&obj->uo_pages, off >> PAGE_SHIFT); 1765 1766 KASSERT(pg == NULL || obj->uo_npages != 0); 1767 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1768 (pg->flags & PG_BUSY) != 0); 1769 return pg; 1770 } 1771 1772 /* 1773 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp 1774 * 1775 * => caller must lock objects 1776 * => caller must hold pg->interlock 1777 */ 1778 1779 void 1780 uvm_pagewire(struct vm_page *pg) 1781 { 1782 1783 KASSERT(uvm_page_owner_locked_p(pg, true)); 1784 KASSERT(mutex_owned(&pg->interlock)); 1785 #if defined(READAHEAD_STATS) 1786 if ((pg->flags & PG_READAHEAD) != 0) { 1787 uvm_ra_hit.ev_count++; 1788 pg->flags &= ~PG_READAHEAD; 1789 } 1790 #endif /* defined(READAHEAD_STATS) */ 1791 if (pg->wire_count == 0) { 1792 uvm_pagedequeue(pg); 1793 atomic_inc_uint(&uvmexp.wired); 1794 } 1795 pg->wire_count++; 1796 KASSERT(pg->wire_count > 0); /* detect wraparound */ 1797 } 1798 1799 /* 1800 * uvm_pageunwire: unwire the page. 1801 * 1802 * => activate if wire count goes to zero. 1803 * => caller must lock objects 1804 * => caller must hold pg->interlock 1805 */ 1806 1807 void 1808 uvm_pageunwire(struct vm_page *pg) 1809 { 1810 1811 KASSERT(uvm_page_owner_locked_p(pg, true)); 1812 KASSERT(pg->wire_count != 0); 1813 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1814 KASSERT(mutex_owned(&pg->interlock)); 1815 pg->wire_count--; 1816 if (pg->wire_count == 0) { 1817 uvm_pageactivate(pg); 1818 KASSERT(uvmexp.wired != 0); 1819 atomic_dec_uint(&uvmexp.wired); 1820 } 1821 } 1822 1823 /* 1824 * uvm_pagedeactivate: deactivate page 1825 * 1826 * => caller must lock objects 1827 * => caller must check to make sure page is not wired 1828 * => object that page belongs to must be locked (so we can adjust pg->flags) 1829 * => caller must clear the reference on the page before calling 1830 * => caller must hold pg->interlock 1831 */ 1832 1833 void 1834 uvm_pagedeactivate(struct vm_page *pg) 1835 { 1836 1837 KASSERT(uvm_page_owner_locked_p(pg, false)); 1838 KASSERT(mutex_owned(&pg->interlock)); 1839 if (pg->wire_count == 0) { 1840 KASSERT(uvmpdpol_pageisqueued_p(pg)); 1841 uvmpdpol_pagedeactivate(pg); 1842 } 1843 } 1844 1845 /* 1846 * uvm_pageactivate: activate page 1847 * 1848 * => caller must lock objects 1849 * => caller must hold pg->interlock 1850 */ 1851 1852 void 1853 uvm_pageactivate(struct vm_page *pg) 1854 { 1855 1856 KASSERT(uvm_page_owner_locked_p(pg, false)); 1857 KASSERT(mutex_owned(&pg->interlock)); 1858 #if defined(READAHEAD_STATS) 1859 if ((pg->flags & PG_READAHEAD) != 0) { 1860 uvm_ra_hit.ev_count++; 1861 pg->flags &= ~PG_READAHEAD; 1862 } 1863 #endif /* defined(READAHEAD_STATS) */ 1864 if (pg->wire_count == 0) { 1865 uvmpdpol_pageactivate(pg); 1866 } 1867 } 1868 1869 /* 1870 * uvm_pagedequeue: remove a page from any paging queue 1871 * 1872 * => caller must lock objects 1873 * => caller must hold pg->interlock 1874 */ 1875 void 1876 uvm_pagedequeue(struct vm_page *pg) 1877 { 1878 1879 KASSERT(uvm_page_owner_locked_p(pg, true)); 1880 KASSERT(mutex_owned(&pg->interlock)); 1881 if (uvmpdpol_pageisqueued_p(pg)) { 1882 uvmpdpol_pagedequeue(pg); 1883 } 1884 } 1885 1886 /* 1887 * uvm_pageenqueue: add a page to a paging queue without activating. 1888 * used where a page is not really demanded (yet). eg. read-ahead 1889 * 1890 * => caller must lock objects 1891 * => caller must hold pg->interlock 1892 */ 1893 void 1894 uvm_pageenqueue(struct vm_page *pg) 1895 { 1896 1897 KASSERT(uvm_page_owner_locked_p(pg, false)); 1898 KASSERT(mutex_owned(&pg->interlock)); 1899 if (pg->wire_count == 0 && !uvmpdpol_pageisqueued_p(pg)) { 1900 uvmpdpol_pageenqueue(pg); 1901 } 1902 } 1903 1904 /* 1905 * uvm_pagelock: acquire page interlock 1906 */ 1907 void 1908 uvm_pagelock(struct vm_page *pg) 1909 { 1910 1911 mutex_enter(&pg->interlock); 1912 } 1913 1914 /* 1915 * uvm_pagelock2: acquire two page interlocks 1916 */ 1917 void 1918 uvm_pagelock2(struct vm_page *pg1, struct vm_page *pg2) 1919 { 1920 1921 if (pg1 < pg2) { 1922 mutex_enter(&pg1->interlock); 1923 mutex_enter(&pg2->interlock); 1924 } else { 1925 mutex_enter(&pg2->interlock); 1926 mutex_enter(&pg1->interlock); 1927 } 1928 } 1929 1930 /* 1931 * uvm_pageunlock: release page interlock, and if a page replacement intent 1932 * is set on the page, pass it to uvmpdpol to make real. 1933 * 1934 * => caller must hold pg->interlock 1935 */ 1936 void 1937 uvm_pageunlock(struct vm_page *pg) 1938 { 1939 1940 if ((pg->pqflags & PQ_INTENT_SET) == 0 || 1941 (pg->pqflags & PQ_INTENT_QUEUED) != 0) { 1942 mutex_exit(&pg->interlock); 1943 return; 1944 } 1945 pg->pqflags |= PQ_INTENT_QUEUED; 1946 mutex_exit(&pg->interlock); 1947 uvmpdpol_pagerealize(pg); 1948 } 1949 1950 /* 1951 * uvm_pageunlock2: release two page interlocks, and for both pages if a 1952 * page replacement intent is set on the page, pass it to uvmpdpol to make 1953 * real. 1954 * 1955 * => caller must hold pg->interlock 1956 */ 1957 void 1958 uvm_pageunlock2(struct vm_page *pg1, struct vm_page *pg2) 1959 { 1960 1961 if ((pg1->pqflags & PQ_INTENT_SET) == 0 || 1962 (pg1->pqflags & PQ_INTENT_QUEUED) != 0) { 1963 mutex_exit(&pg1->interlock); 1964 pg1 = NULL; 1965 } else { 1966 pg1->pqflags |= PQ_INTENT_QUEUED; 1967 mutex_exit(&pg1->interlock); 1968 } 1969 1970 if ((pg2->pqflags & PQ_INTENT_SET) == 0 || 1971 (pg2->pqflags & PQ_INTENT_QUEUED) != 0) { 1972 mutex_exit(&pg2->interlock); 1973 pg2 = NULL; 1974 } else { 1975 pg2->pqflags |= PQ_INTENT_QUEUED; 1976 mutex_exit(&pg2->interlock); 1977 } 1978 1979 if (pg1 != NULL) { 1980 uvmpdpol_pagerealize(pg1); 1981 } 1982 if (pg2 != NULL) { 1983 uvmpdpol_pagerealize(pg2); 1984 } 1985 } 1986 1987 /* 1988 * uvm_pagezero: zero fill a page 1989 * 1990 * => if page is part of an object then the object should be locked 1991 * to protect pg->flags. 1992 */ 1993 1994 void 1995 uvm_pagezero(struct vm_page *pg) 1996 { 1997 1998 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1999 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 2000 } 2001 2002 /* 2003 * uvm_pagecopy: copy a page 2004 * 2005 * => if page is part of an object then the object should be locked 2006 * to protect pg->flags. 2007 */ 2008 2009 void 2010 uvm_pagecopy(struct vm_page *src, struct vm_page *dst) 2011 { 2012 2013 uvm_pagemarkdirty(dst, UVM_PAGE_STATUS_DIRTY); 2014 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 2015 } 2016 2017 /* 2018 * uvm_pageismanaged: test it see that a page (specified by PA) is managed. 2019 */ 2020 2021 bool 2022 uvm_pageismanaged(paddr_t pa) 2023 { 2024 2025 return (uvm_physseg_find(atop(pa), NULL) != UVM_PHYSSEG_TYPE_INVALID); 2026 } 2027 2028 /* 2029 * uvm_page_lookup_freelist: look up the free list for the specified page 2030 */ 2031 2032 int 2033 uvm_page_lookup_freelist(struct vm_page *pg) 2034 { 2035 uvm_physseg_t upm; 2036 2037 upm = uvm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL); 2038 KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID); 2039 return uvm_physseg_get_free_list(upm); 2040 } 2041 2042 /* 2043 * uvm_page_owner_locked_p: return true if object associated with page is 2044 * locked. this is a weak check for runtime assertions only. 2045 */ 2046 2047 bool 2048 uvm_page_owner_locked_p(struct vm_page *pg, bool exclusive) 2049 { 2050 2051 if (pg->uobject != NULL) { 2052 return exclusive 2053 ? rw_write_held(pg->uobject->vmobjlock) 2054 : rw_lock_held(pg->uobject->vmobjlock); 2055 } 2056 if (pg->uanon != NULL) { 2057 return exclusive 2058 ? rw_write_held(pg->uanon->an_lock) 2059 : rw_lock_held(pg->uanon->an_lock); 2060 } 2061 return true; 2062 } 2063 2064 /* 2065 * uvm_pagereadonly_p: return if the page should be mapped read-only 2066 */ 2067 2068 bool 2069 uvm_pagereadonly_p(struct vm_page *pg) 2070 { 2071 struct uvm_object * const uobj = pg->uobject; 2072 2073 KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock)); 2074 KASSERT(uobj != NULL || rw_lock_held(pg->uanon->an_lock)); 2075 if ((pg->flags & PG_RDONLY) != 0) { 2076 return true; 2077 } 2078 if (uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) { 2079 return true; 2080 } 2081 if (uobj == NULL) { 2082 return false; 2083 } 2084 return UVM_OBJ_NEEDS_WRITEFAULT(uobj); 2085 } 2086 2087 #ifdef PMAP_DIRECT 2088 /* 2089 * Call pmap to translate physical address into a virtual and to run a callback 2090 * for it. Used to avoid actually mapping the pages, pmap most likely uses direct map 2091 * or equivalent. 2092 */ 2093 int 2094 uvm_direct_process(struct vm_page **pgs, u_int npages, voff_t off, vsize_t len, 2095 int (*process)(void *, size_t, void *), void *arg) 2096 { 2097 int error = 0; 2098 paddr_t pa; 2099 size_t todo; 2100 voff_t pgoff = (off & PAGE_MASK); 2101 struct vm_page *pg; 2102 2103 KASSERT(npages > 0 && len > 0); 2104 2105 for (int i = 0; i < npages; i++) { 2106 pg = pgs[i]; 2107 2108 KASSERT(len > 0); 2109 2110 /* 2111 * Caller is responsible for ensuring all the pages are 2112 * available. 2113 */ 2114 KASSERT(pg != NULL && pg != PGO_DONTCARE); 2115 2116 pa = VM_PAGE_TO_PHYS(pg); 2117 todo = MIN(len, PAGE_SIZE - pgoff); 2118 2119 error = pmap_direct_process(pa, pgoff, todo, process, arg); 2120 if (error) 2121 break; 2122 2123 pgoff = 0; 2124 len -= todo; 2125 } 2126 2127 KASSERTMSG(error != 0 || len == 0, "len %lu != 0 for non-error", len); 2128 return error; 2129 } 2130 #endif /* PMAP_DIRECT */ 2131 2132 #if defined(DDB) || defined(DEBUGPRINT) 2133 2134 /* 2135 * uvm_page_printit: actually print the page 2136 */ 2137 2138 static const char page_flagbits[] = UVM_PGFLAGBITS; 2139 static const char page_pqflagbits[] = UVM_PQFLAGBITS; 2140 2141 void 2142 uvm_page_printit(struct vm_page *pg, bool full, 2143 void (*pr)(const char *, ...)) 2144 { 2145 struct vm_page *tpg; 2146 struct uvm_object *uobj; 2147 struct pgflbucket *pgb; 2148 struct pgflist *pgl; 2149 char pgbuf[128]; 2150 2151 (*pr)("PAGE %p:\n", pg); 2152 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); 2153 (*pr)(" flags=%s\n", pgbuf); 2154 snprintb(pgbuf, sizeof(pgbuf), page_pqflagbits, pg->pqflags); 2155 (*pr)(" pqflags=%s\n", pgbuf); 2156 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 2157 pg->uobject, pg->uanon, (long long)pg->offset); 2158 (*pr)(" loan_count=%d wire_count=%d bucket=%d freelist=%d\n", 2159 pg->loan_count, pg->wire_count, uvm_page_get_bucket(pg), 2160 uvm_page_get_freelist(pg)); 2161 (*pr)(" pa=0x%lx\n", (long)VM_PAGE_TO_PHYS(pg)); 2162 #if defined(UVM_PAGE_TRKOWN) 2163 if (pg->flags & PG_BUSY) 2164 (*pr)(" owning process = %d.%d, tag=%s\n", 2165 pg->owner, pg->lowner, pg->owner_tag); 2166 else 2167 (*pr)(" page not busy, no owner\n"); 2168 #else 2169 (*pr)(" [page ownership tracking disabled]\n"); 2170 #endif 2171 2172 if (!full) 2173 return; 2174 2175 /* cross-verify object/anon */ 2176 if ((pg->flags & PG_FREE) == 0) { 2177 if (pg->flags & PG_ANON) { 2178 if (pg->uanon == NULL || pg->uanon->an_page != pg) 2179 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2180 (pg->uanon) ? pg->uanon->an_page : NULL); 2181 else 2182 (*pr)(" anon backpointer is OK\n"); 2183 } else { 2184 uobj = pg->uobject; 2185 if (uobj) { 2186 (*pr)(" checking object list\n"); 2187 tpg = uvm_pagelookup(uobj, pg->offset); 2188 if (tpg) 2189 (*pr)(" page found on object list\n"); 2190 else 2191 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 2192 } 2193 } 2194 } 2195 2196 /* cross-verify page queue */ 2197 if (pg->flags & PG_FREE) { 2198 int fl = uvm_page_get_freelist(pg); 2199 int b = uvm_page_get_bucket(pg); 2200 pgb = uvm.page_free[fl].pgfl_buckets[b]; 2201 pgl = &pgb->pgb_colors[VM_PGCOLOR(pg)]; 2202 (*pr)(" checking pageq list\n"); 2203 LIST_FOREACH(tpg, pgl, pageq.list) { 2204 if (tpg == pg) { 2205 break; 2206 } 2207 } 2208 if (tpg) 2209 (*pr)(" page found on pageq list\n"); 2210 else 2211 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 2212 } 2213 } 2214 2215 /* 2216 * uvm_page_printall - print a summary of all managed pages 2217 */ 2218 2219 void 2220 uvm_page_printall(void (*pr)(const char *, ...)) 2221 { 2222 uvm_physseg_t i; 2223 paddr_t pfn; 2224 struct vm_page *pg; 2225 2226 (*pr)("%18s %4s %4s %18s %18s" 2227 #ifdef UVM_PAGE_TRKOWN 2228 " OWNER" 2229 #endif 2230 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 2231 for (i = uvm_physseg_get_first(); 2232 uvm_physseg_valid_p(i); 2233 i = uvm_physseg_get_next(i)) { 2234 for (pfn = uvm_physseg_get_start(i); 2235 pfn < uvm_physseg_get_end(i); 2236 pfn++) { 2237 pg = PHYS_TO_VM_PAGE(ptoa(pfn)); 2238 2239 (*pr)("%18p %04x %08x %18p %18p", 2240 pg, pg->flags, pg->pqflags, pg->uobject, 2241 pg->uanon); 2242 #ifdef UVM_PAGE_TRKOWN 2243 if (pg->flags & PG_BUSY) 2244 (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 2245 #endif 2246 (*pr)("\n"); 2247 } 2248 } 2249 } 2250 2251 /* 2252 * uvm_page_print_freelists - print a summary freelists 2253 */ 2254 2255 void 2256 uvm_page_print_freelists(void (*pr)(const char *, ...)) 2257 { 2258 struct pgfreelist *pgfl; 2259 struct pgflbucket *pgb; 2260 int fl, b, c; 2261 2262 (*pr)("There are %d freelists with %d buckets of %d colors.\n\n", 2263 VM_NFREELIST, uvm.bucketcount, uvmexp.ncolors); 2264 2265 for (fl = 0; fl < VM_NFREELIST; fl++) { 2266 pgfl = &uvm.page_free[fl]; 2267 (*pr)("freelist(%d) @ %p\n", fl, pgfl); 2268 for (b = 0; b < uvm.bucketcount; b++) { 2269 pgb = uvm.page_free[fl].pgfl_buckets[b]; 2270 (*pr)(" bucket(%d) @ %p, nfree = %d, lock @ %p:\n", 2271 b, pgb, pgb->pgb_nfree, 2272 &uvm_freelist_locks[b].lock); 2273 for (c = 0; c < uvmexp.ncolors; c++) { 2274 (*pr)(" color(%d) @ %p, ", c, 2275 &pgb->pgb_colors[c]); 2276 (*pr)("first page = %p\n", 2277 LIST_FIRST(&pgb->pgb_colors[c])); 2278 } 2279 } 2280 } 2281 } 2282 2283 #endif /* DDB || DEBUGPRINT */ 2284