1 /* $NetBSD: uvm_page.c,v 1.250 2020/12/20 11:11:34 skrll Exp $ */ 2 3 /*- 4 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1997 Charles D. Cranor and Washington University. 34 * Copyright (c) 1991, 1993, The Regents of the University of California. 35 * 36 * All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * The Mach Operating System project at Carnegie-Mellon University. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 66 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 67 * 68 * 69 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 70 * All rights reserved. 71 * 72 * Permission to use, copy, modify and distribute this software and 73 * its documentation is hereby granted, provided that both the copyright 74 * notice and this permission notice appear in all copies of the 75 * software, derivative works or modified versions, and any portions 76 * thereof, and that both notices appear in supporting documentation. 77 * 78 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 79 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 80 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 81 * 82 * Carnegie Mellon requests users of this software to return to 83 * 84 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 85 * School of Computer Science 86 * Carnegie Mellon University 87 * Pittsburgh PA 15213-3890 88 * 89 * any improvements or extensions that they make and grant Carnegie the 90 * rights to redistribute these changes. 91 */ 92 93 /* 94 * uvm_page.c: page ops. 95 */ 96 97 #include <sys/cdefs.h> 98 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.250 2020/12/20 11:11:34 skrll Exp $"); 99 100 #include "opt_ddb.h" 101 #include "opt_uvm.h" 102 #include "opt_uvmhist.h" 103 #include "opt_readahead.h" 104 105 #include <sys/param.h> 106 #include <sys/systm.h> 107 #include <sys/sched.h> 108 #include <sys/kernel.h> 109 #include <sys/vnode.h> 110 #include <sys/proc.h> 111 #include <sys/radixtree.h> 112 #include <sys/atomic.h> 113 #include <sys/cpu.h> 114 115 #include <uvm/uvm.h> 116 #include <uvm/uvm_ddb.h> 117 #include <uvm/uvm_pdpolicy.h> 118 #include <uvm/uvm_pgflcache.h> 119 120 /* 121 * number of pages per-CPU to reserve for the kernel. 122 */ 123 #ifndef UVM_RESERVED_PAGES_PER_CPU 124 #define UVM_RESERVED_PAGES_PER_CPU 5 125 #endif 126 int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU; 127 128 /* 129 * physical memory size; 130 */ 131 psize_t physmem; 132 133 /* 134 * local variables 135 */ 136 137 /* 138 * these variables record the values returned by vm_page_bootstrap, 139 * for debugging purposes. The implementation of uvm_pageboot_alloc 140 * and pmap_startup here also uses them internally. 141 */ 142 143 static vaddr_t virtual_space_start; 144 static vaddr_t virtual_space_end; 145 146 /* 147 * we allocate an initial number of page colors in uvm_page_init(), 148 * and remember them. We may re-color pages as cache sizes are 149 * discovered during the autoconfiguration phase. But we can never 150 * free the initial set of buckets, since they are allocated using 151 * uvm_pageboot_alloc(). 152 */ 153 154 static size_t recolored_pages_memsize /* = 0 */; 155 static char *recolored_pages_mem; 156 157 /* 158 * freelist locks - one per bucket. 159 */ 160 161 union uvm_freelist_lock uvm_freelist_locks[PGFL_MAX_BUCKETS] 162 __cacheline_aligned; 163 164 /* 165 * basic NUMA information. 166 */ 167 168 static struct uvm_page_numa_region { 169 struct uvm_page_numa_region *next; 170 paddr_t start; 171 paddr_t size; 172 u_int numa_id; 173 } *uvm_page_numa_region; 174 175 #ifdef DEBUG 176 kmutex_t uvm_zerochecklock __cacheline_aligned; 177 vaddr_t uvm_zerocheckkva; 178 #endif /* DEBUG */ 179 180 /* 181 * These functions are reserved for uvm(9) internal use and are not 182 * exported in the header file uvm_physseg.h 183 * 184 * Thus they are redefined here. 185 */ 186 void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *); 187 void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t); 188 189 /* returns a pgs array */ 190 struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t); 191 192 /* 193 * inline functions 194 */ 195 196 /* 197 * uvm_pageinsert: insert a page in the object. 198 * 199 * => caller must lock object 200 * => call should have already set pg's object and offset pointers 201 * and bumped the version counter 202 */ 203 204 static inline void 205 uvm_pageinsert_object(struct uvm_object *uobj, struct vm_page *pg) 206 { 207 208 KASSERT(uobj == pg->uobject); 209 KASSERT(rw_write_held(uobj->vmobjlock)); 210 KASSERT((pg->flags & PG_TABLED) == 0); 211 212 if ((pg->flags & PG_STAT) != 0) { 213 /* Cannot use uvm_pagegetdirty(): not yet in radix tree. */ 214 const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 215 216 if ((pg->flags & PG_FILE) != 0) { 217 if (uobj->uo_npages == 0) { 218 struct vnode *vp = (struct vnode *)uobj; 219 mutex_enter(vp->v_interlock); 220 KASSERT((vp->v_iflag & VI_PAGES) == 0); 221 vp->v_iflag |= VI_PAGES; 222 vholdl(vp); 223 mutex_exit(vp->v_interlock); 224 } 225 if (UVM_OBJ_IS_VTEXT(uobj)) { 226 cpu_count(CPU_COUNT_EXECPAGES, 1); 227 } 228 cpu_count(CPU_COUNT_FILEUNKNOWN + status, 1); 229 } else { 230 cpu_count(CPU_COUNT_ANONUNKNOWN + status, 1); 231 } 232 } 233 pg->flags |= PG_TABLED; 234 uobj->uo_npages++; 235 } 236 237 static inline int 238 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg) 239 { 240 const uint64_t idx = pg->offset >> PAGE_SHIFT; 241 int error; 242 243 KASSERT(rw_write_held(uobj->vmobjlock)); 244 245 error = radix_tree_insert_node(&uobj->uo_pages, idx, pg); 246 if (error != 0) { 247 return error; 248 } 249 if ((pg->flags & PG_CLEAN) == 0) { 250 uvm_obj_page_set_dirty(pg); 251 } 252 KASSERT(((pg->flags & PG_CLEAN) == 0) == 253 uvm_obj_page_dirty_p(pg)); 254 return 0; 255 } 256 257 /* 258 * uvm_page_remove: remove page from object. 259 * 260 * => caller must lock object 261 */ 262 263 static inline void 264 uvm_pageremove_object(struct uvm_object *uobj, struct vm_page *pg) 265 { 266 267 KASSERT(uobj == pg->uobject); 268 KASSERT(rw_write_held(uobj->vmobjlock)); 269 KASSERT(pg->flags & PG_TABLED); 270 271 if ((pg->flags & PG_STAT) != 0) { 272 /* Cannot use uvm_pagegetdirty(): no longer in radix tree. */ 273 const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 274 275 if ((pg->flags & PG_FILE) != 0) { 276 if (uobj->uo_npages == 1) { 277 struct vnode *vp = (struct vnode *)uobj; 278 mutex_enter(vp->v_interlock); 279 KASSERT((vp->v_iflag & VI_PAGES) != 0); 280 vp->v_iflag &= ~VI_PAGES; 281 holdrelel(vp); 282 mutex_exit(vp->v_interlock); 283 } 284 if (UVM_OBJ_IS_VTEXT(uobj)) { 285 cpu_count(CPU_COUNT_EXECPAGES, -1); 286 } 287 cpu_count(CPU_COUNT_FILEUNKNOWN + status, -1); 288 } else { 289 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 290 } 291 } 292 uobj->uo_npages--; 293 pg->flags &= ~PG_TABLED; 294 pg->uobject = NULL; 295 } 296 297 static inline void 298 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg) 299 { 300 struct vm_page *opg __unused; 301 302 KASSERT(rw_write_held(uobj->vmobjlock)); 303 304 opg = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT); 305 KASSERT(pg == opg); 306 } 307 308 static void 309 uvm_page_init_bucket(struct pgfreelist *pgfl, struct pgflbucket *pgb, int num) 310 { 311 int i; 312 313 pgb->pgb_nfree = 0; 314 for (i = 0; i < uvmexp.ncolors; i++) { 315 LIST_INIT(&pgb->pgb_colors[i]); 316 } 317 pgfl->pgfl_buckets[num] = pgb; 318 } 319 320 /* 321 * uvm_page_init: init the page system. called from uvm_init(). 322 * 323 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 324 */ 325 326 void 327 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) 328 { 329 static struct uvm_cpu boot_cpu __cacheline_aligned; 330 psize_t freepages, pagecount, bucketsize, n; 331 struct pgflbucket *pgb; 332 struct vm_page *pagearray; 333 char *bucketarray; 334 uvm_physseg_t bank; 335 int fl, b; 336 337 KASSERT(ncpu <= 1); 338 339 /* 340 * init the page queues and free page queue locks, except the 341 * free list; we allocate that later (with the initial vm_page 342 * structures). 343 */ 344 345 curcpu()->ci_data.cpu_uvm = &boot_cpu; 346 uvmpdpol_init(); 347 for (b = 0; b < __arraycount(uvm_freelist_locks); b++) { 348 mutex_init(&uvm_freelist_locks[b].lock, MUTEX_DEFAULT, IPL_VM); 349 } 350 351 /* 352 * allocate vm_page structures. 353 */ 354 355 /* 356 * sanity check: 357 * before calling this function the MD code is expected to register 358 * some free RAM with the uvm_page_physload() function. our job 359 * now is to allocate vm_page structures for this memory. 360 */ 361 362 if (uvm_physseg_get_last() == UVM_PHYSSEG_TYPE_INVALID) 363 panic("uvm_page_bootstrap: no memory pre-allocated"); 364 365 /* 366 * first calculate the number of free pages... 367 * 368 * note that we use start/end rather than avail_start/avail_end. 369 * this allows us to allocate extra vm_page structures in case we 370 * want to return some memory to the pool after booting. 371 */ 372 373 freepages = 0; 374 375 for (bank = uvm_physseg_get_first(); 376 uvm_physseg_valid_p(bank) ; 377 bank = uvm_physseg_get_next(bank)) { 378 freepages += (uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank)); 379 } 380 381 /* 382 * Let MD code initialize the number of colors, or default 383 * to 1 color if MD code doesn't care. 384 */ 385 if (uvmexp.ncolors == 0) 386 uvmexp.ncolors = 1; 387 uvmexp.colormask = uvmexp.ncolors - 1; 388 KASSERT((uvmexp.colormask & uvmexp.ncolors) == 0); 389 390 /* We always start with only 1 bucket. */ 391 uvm.bucketcount = 1; 392 393 /* 394 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 395 * use. for each page of memory we use we need a vm_page structure. 396 * thus, the total number of pages we can use is the total size of 397 * the memory divided by the PAGE_SIZE plus the size of the vm_page 398 * structure. we add one to freepages as a fudge factor to avoid 399 * truncation errors (since we can only allocate in terms of whole 400 * pages). 401 */ 402 pagecount = ((freepages + 1) << PAGE_SHIFT) / 403 (PAGE_SIZE + sizeof(struct vm_page)); 404 bucketsize = offsetof(struct pgflbucket, pgb_colors[uvmexp.ncolors]); 405 bucketsize = roundup2(bucketsize, coherency_unit); 406 bucketarray = (void *)uvm_pageboot_alloc( 407 bucketsize * VM_NFREELIST + 408 pagecount * sizeof(struct vm_page)); 409 pagearray = (struct vm_page *) 410 (bucketarray + bucketsize * VM_NFREELIST); 411 412 for (fl = 0; fl < VM_NFREELIST; fl++) { 413 pgb = (struct pgflbucket *)(bucketarray + bucketsize * fl); 414 uvm_page_init_bucket(&uvm.page_free[fl], pgb, 0); 415 } 416 memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 417 418 /* 419 * init the freelist cache in the disabled state. 420 */ 421 uvm_pgflcache_init(); 422 423 /* 424 * init the vm_page structures and put them in the correct place. 425 */ 426 /* First init the extent */ 427 428 for (bank = uvm_physseg_get_first(), 429 uvm_physseg_seg_chomp_slab(bank, pagearray, pagecount); 430 uvm_physseg_valid_p(bank); 431 bank = uvm_physseg_get_next(bank)) { 432 433 n = uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank); 434 uvm_physseg_seg_alloc_from_slab(bank, n); 435 uvm_physseg_init_seg(bank, pagearray); 436 437 /* set up page array pointers */ 438 pagearray += n; 439 pagecount -= n; 440 } 441 442 /* 443 * pass up the values of virtual_space_start and 444 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 445 * layers of the VM. 446 */ 447 448 *kvm_startp = round_page(virtual_space_start); 449 *kvm_endp = trunc_page(virtual_space_end); 450 451 /* 452 * init various thresholds. 453 */ 454 455 uvmexp.reserve_pagedaemon = 1; 456 uvmexp.reserve_kernel = vm_page_reserve_kernel; 457 458 /* 459 * done! 460 */ 461 462 uvm.page_init_done = true; 463 } 464 465 /* 466 * uvm_pgfl_lock: lock all freelist buckets 467 */ 468 469 void 470 uvm_pgfl_lock(void) 471 { 472 int i; 473 474 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 475 mutex_spin_enter(&uvm_freelist_locks[i].lock); 476 } 477 } 478 479 /* 480 * uvm_pgfl_unlock: unlock all freelist buckets 481 */ 482 483 void 484 uvm_pgfl_unlock(void) 485 { 486 int i; 487 488 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 489 mutex_spin_exit(&uvm_freelist_locks[i].lock); 490 } 491 } 492 493 /* 494 * uvm_setpagesize: set the page size 495 * 496 * => sets page_shift and page_mask from uvmexp.pagesize. 497 */ 498 499 void 500 uvm_setpagesize(void) 501 { 502 503 /* 504 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE 505 * to be a constant (indicated by being a non-zero value). 506 */ 507 if (uvmexp.pagesize == 0) { 508 if (PAGE_SIZE == 0) 509 panic("uvm_setpagesize: uvmexp.pagesize not set"); 510 uvmexp.pagesize = PAGE_SIZE; 511 } 512 uvmexp.pagemask = uvmexp.pagesize - 1; 513 if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 514 panic("uvm_setpagesize: page size %u (%#x) not a power of two", 515 uvmexp.pagesize, uvmexp.pagesize); 516 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 517 if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 518 break; 519 } 520 521 /* 522 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 523 */ 524 525 vaddr_t 526 uvm_pageboot_alloc(vsize_t size) 527 { 528 static bool initialized = false; 529 vaddr_t addr; 530 #if !defined(PMAP_STEAL_MEMORY) 531 vaddr_t vaddr; 532 paddr_t paddr; 533 #endif 534 535 /* 536 * on first call to this function, initialize ourselves. 537 */ 538 if (initialized == false) { 539 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 540 541 /* round it the way we like it */ 542 virtual_space_start = round_page(virtual_space_start); 543 virtual_space_end = trunc_page(virtual_space_end); 544 545 initialized = true; 546 } 547 548 /* round to page size */ 549 size = round_page(size); 550 uvmexp.bootpages += atop(size); 551 552 #if defined(PMAP_STEAL_MEMORY) 553 554 /* 555 * defer bootstrap allocation to MD code (it may want to allocate 556 * from a direct-mapped segment). pmap_steal_memory should adjust 557 * virtual_space_start/virtual_space_end if necessary. 558 */ 559 560 addr = pmap_steal_memory(size, &virtual_space_start, 561 &virtual_space_end); 562 563 return addr; 564 565 #else /* !PMAP_STEAL_MEMORY */ 566 567 /* 568 * allocate virtual memory for this request 569 */ 570 if (virtual_space_start == virtual_space_end || 571 (virtual_space_end - virtual_space_start) < size) 572 panic("uvm_pageboot_alloc: out of virtual space"); 573 574 addr = virtual_space_start; 575 576 #ifdef PMAP_GROWKERNEL 577 /* 578 * If the kernel pmap can't map the requested space, 579 * then allocate more resources for it. 580 */ 581 if (uvm_maxkaddr < (addr + size)) { 582 uvm_maxkaddr = pmap_growkernel(addr + size); 583 if (uvm_maxkaddr < (addr + size)) 584 panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 585 } 586 #endif 587 588 virtual_space_start += size; 589 590 /* 591 * allocate and mapin physical pages to back new virtual pages 592 */ 593 594 for (vaddr = round_page(addr) ; vaddr < addr + size ; 595 vaddr += PAGE_SIZE) { 596 597 if (!uvm_page_physget(&paddr)) 598 panic("uvm_pageboot_alloc: out of memory"); 599 600 /* 601 * Note this memory is no longer managed, so using 602 * pmap_kenter is safe. 603 */ 604 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 605 } 606 pmap_update(pmap_kernel()); 607 return addr; 608 #endif /* PMAP_STEAL_MEMORY */ 609 } 610 611 #if !defined(PMAP_STEAL_MEMORY) 612 /* 613 * uvm_page_physget: "steal" one page from the vm_physmem structure. 614 * 615 * => attempt to allocate it off the end of a segment in which the "avail" 616 * values match the start/end values. if we can't do that, then we 617 * will advance both values (making them equal, and removing some 618 * vm_page structures from the non-avail area). 619 * => return false if out of memory. 620 */ 621 622 /* subroutine: try to allocate from memory chunks on the specified freelist */ 623 static bool uvm_page_physget_freelist(paddr_t *, int); 624 625 static bool 626 uvm_page_physget_freelist(paddr_t *paddrp, int freelist) 627 { 628 uvm_physseg_t lcv; 629 630 /* pass 1: try allocating from a matching end */ 631 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 632 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 633 #else 634 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 635 #endif 636 { 637 if (uvm.page_init_done == true) 638 panic("uvm_page_physget: called _after_ bootstrap"); 639 640 /* Try to match at front or back on unused segment */ 641 if (uvm_page_physunload(lcv, freelist, paddrp)) 642 return true; 643 } 644 645 /* pass2: forget about matching ends, just allocate something */ 646 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 647 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 648 #else 649 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 650 #endif 651 { 652 /* Try the front regardless. */ 653 if (uvm_page_physunload_force(lcv, freelist, paddrp)) 654 return true; 655 } 656 return false; 657 } 658 659 bool 660 uvm_page_physget(paddr_t *paddrp) 661 { 662 int i; 663 664 /* try in the order of freelist preference */ 665 for (i = 0; i < VM_NFREELIST; i++) 666 if (uvm_page_physget_freelist(paddrp, i) == true) 667 return (true); 668 return (false); 669 } 670 #endif /* PMAP_STEAL_MEMORY */ 671 672 /* 673 * PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages 674 * back from an I/O mapping (ugh!). used in some MD code as well. 675 */ 676 struct vm_page * 677 uvm_phys_to_vm_page(paddr_t pa) 678 { 679 paddr_t pf = atop(pa); 680 paddr_t off; 681 uvm_physseg_t upm; 682 683 upm = uvm_physseg_find(pf, &off); 684 if (upm != UVM_PHYSSEG_TYPE_INVALID) 685 return uvm_physseg_get_pg(upm, off); 686 return(NULL); 687 } 688 689 paddr_t 690 uvm_vm_page_to_phys(const struct vm_page *pg) 691 { 692 693 return pg->phys_addr & ~(PAGE_SIZE - 1); 694 } 695 696 /* 697 * uvm_page_numa_load: load NUMA range description. 698 */ 699 void 700 uvm_page_numa_load(paddr_t start, paddr_t size, u_int numa_id) 701 { 702 struct uvm_page_numa_region *d; 703 704 KASSERT(numa_id < PGFL_MAX_BUCKETS); 705 706 d = kmem_alloc(sizeof(*d), KM_SLEEP); 707 d->start = start; 708 d->size = size; 709 d->numa_id = numa_id; 710 d->next = uvm_page_numa_region; 711 uvm_page_numa_region = d; 712 } 713 714 /* 715 * uvm_page_numa_lookup: lookup NUMA node for the given page. 716 */ 717 static u_int 718 uvm_page_numa_lookup(struct vm_page *pg) 719 { 720 struct uvm_page_numa_region *d; 721 static bool warned; 722 paddr_t pa; 723 724 KASSERT(uvm_page_numa_region != NULL); 725 726 pa = VM_PAGE_TO_PHYS(pg); 727 for (d = uvm_page_numa_region; d != NULL; d = d->next) { 728 if (pa >= d->start && pa < d->start + d->size) { 729 return d->numa_id; 730 } 731 } 732 733 if (!warned) { 734 printf("uvm_page_numa_lookup: failed, first pg=%p pa=%#" 735 PRIxPADDR "\n", pg, VM_PAGE_TO_PHYS(pg)); 736 warned = true; 737 } 738 739 return 0; 740 } 741 742 /* 743 * uvm_page_redim: adjust freelist dimensions if they have changed. 744 */ 745 746 static void 747 uvm_page_redim(int newncolors, int newnbuckets) 748 { 749 struct pgfreelist npgfl; 750 struct pgflbucket *opgb, *npgb; 751 struct pgflist *ohead, *nhead; 752 struct vm_page *pg; 753 size_t bucketsize, bucketmemsize, oldbucketmemsize; 754 int fl, ob, oc, nb, nc, obuckets, ocolors; 755 char *bucketarray, *oldbucketmem, *bucketmem; 756 757 KASSERT(((newncolors - 1) & newncolors) == 0); 758 759 /* Anything to do? */ 760 if (newncolors <= uvmexp.ncolors && 761 newnbuckets == uvm.bucketcount) { 762 return; 763 } 764 if (uvm.page_init_done == false) { 765 uvmexp.ncolors = newncolors; 766 return; 767 } 768 769 bucketsize = offsetof(struct pgflbucket, pgb_colors[newncolors]); 770 bucketsize = roundup2(bucketsize, coherency_unit); 771 bucketmemsize = bucketsize * newnbuckets * VM_NFREELIST + 772 coherency_unit - 1; 773 bucketmem = kmem_zalloc(bucketmemsize, KM_SLEEP); 774 bucketarray = (char *)roundup2((uintptr_t)bucketmem, coherency_unit); 775 776 ocolors = uvmexp.ncolors; 777 obuckets = uvm.bucketcount; 778 779 /* Freelist cache musn't be enabled. */ 780 uvm_pgflcache_pause(); 781 782 /* Make sure we should still do this. */ 783 uvm_pgfl_lock(); 784 if (newncolors <= uvmexp.ncolors && 785 newnbuckets == uvm.bucketcount) { 786 uvm_pgfl_unlock(); 787 uvm_pgflcache_resume(); 788 kmem_free(bucketmem, bucketmemsize); 789 return; 790 } 791 792 uvmexp.ncolors = newncolors; 793 uvmexp.colormask = uvmexp.ncolors - 1; 794 uvm.bucketcount = newnbuckets; 795 796 for (fl = 0; fl < VM_NFREELIST; fl++) { 797 /* Init new buckets in new freelist. */ 798 memset(&npgfl, 0, sizeof(npgfl)); 799 for (nb = 0; nb < newnbuckets; nb++) { 800 npgb = (struct pgflbucket *)bucketarray; 801 uvm_page_init_bucket(&npgfl, npgb, nb); 802 bucketarray += bucketsize; 803 } 804 /* Now transfer pages from the old freelist. */ 805 for (nb = ob = 0; ob < obuckets; ob++) { 806 opgb = uvm.page_free[fl].pgfl_buckets[ob]; 807 for (oc = 0; oc < ocolors; oc++) { 808 ohead = &opgb->pgb_colors[oc]; 809 while ((pg = LIST_FIRST(ohead)) != NULL) { 810 LIST_REMOVE(pg, pageq.list); 811 /* 812 * Here we decide on the NEW color & 813 * bucket for the page. For NUMA 814 * we'll use the info that the 815 * hardware gave us. For non-NUMA 816 * assign take physical page frame 817 * number and cache color into 818 * account. We do this to try and 819 * avoid defeating any memory 820 * interleaving in the hardware. 821 */ 822 KASSERT( 823 uvm_page_get_bucket(pg) == ob); 824 KASSERT(fl == 825 uvm_page_get_freelist(pg)); 826 if (uvm_page_numa_region != NULL) { 827 nb = uvm_page_numa_lookup(pg); 828 } else { 829 nb = atop(VM_PAGE_TO_PHYS(pg)) 830 / uvmexp.ncolors / 8 831 % newnbuckets; 832 } 833 uvm_page_set_bucket(pg, nb); 834 npgb = npgfl.pgfl_buckets[nb]; 835 npgb->pgb_nfree++; 836 nc = VM_PGCOLOR(pg); 837 nhead = &npgb->pgb_colors[nc]; 838 LIST_INSERT_HEAD(nhead, pg, pageq.list); 839 } 840 } 841 } 842 /* Install the new freelist. */ 843 memcpy(&uvm.page_free[fl], &npgfl, sizeof(npgfl)); 844 } 845 846 /* Unlock and free the old memory. */ 847 oldbucketmemsize = recolored_pages_memsize; 848 oldbucketmem = recolored_pages_mem; 849 recolored_pages_memsize = bucketmemsize; 850 recolored_pages_mem = bucketmem; 851 852 uvm_pgfl_unlock(); 853 uvm_pgflcache_resume(); 854 855 if (oldbucketmemsize) { 856 kmem_free(oldbucketmem, oldbucketmemsize); 857 } 858 859 /* 860 * this calls uvm_km_alloc() which may want to hold 861 * uvm_freelist_lock. 862 */ 863 uvm_pager_realloc_emerg(); 864 } 865 866 /* 867 * uvm_page_recolor: Recolor the pages if the new color count is 868 * larger than the old one. 869 */ 870 871 void 872 uvm_page_recolor(int newncolors) 873 { 874 875 uvm_page_redim(newncolors, uvm.bucketcount); 876 } 877 878 /* 879 * uvm_page_rebucket: Determine a bucket structure and redim the free 880 * lists to match. 881 */ 882 883 void 884 uvm_page_rebucket(void) 885 { 886 u_int min_numa, max_numa, npackage, shift; 887 struct cpu_info *ci, *ci2, *ci3; 888 CPU_INFO_ITERATOR cii; 889 890 /* 891 * If we have more than one NUMA node, and the maximum NUMA node ID 892 * is less than PGFL_MAX_BUCKETS, then we'll use NUMA distribution 893 * for free pages. 894 */ 895 min_numa = (u_int)-1; 896 max_numa = 0; 897 for (CPU_INFO_FOREACH(cii, ci)) { 898 if (ci->ci_numa_id < min_numa) { 899 min_numa = ci->ci_numa_id; 900 } 901 if (ci->ci_numa_id > max_numa) { 902 max_numa = ci->ci_numa_id; 903 } 904 } 905 if (min_numa != max_numa && max_numa < PGFL_MAX_BUCKETS) { 906 aprint_debug("UVM: using NUMA allocation scheme\n"); 907 for (CPU_INFO_FOREACH(cii, ci)) { 908 ci->ci_data.cpu_uvm->pgflbucket = ci->ci_numa_id; 909 } 910 uvm_page_redim(uvmexp.ncolors, max_numa + 1); 911 return; 912 } 913 914 /* 915 * Otherwise we'll go with a scheme to maximise L2/L3 cache locality 916 * and minimise lock contention. Count the total number of CPU 917 * packages, and then try to distribute the buckets among CPU 918 * packages evenly. 919 */ 920 npackage = curcpu()->ci_nsibling[CPUREL_PACKAGE1ST]; 921 922 /* 923 * Figure out how to arrange the packages & buckets, and the total 924 * number of buckets we need. XXX 2 may not be the best factor. 925 */ 926 for (shift = 0; npackage > PGFL_MAX_BUCKETS; shift++) { 927 npackage >>= 1; 928 } 929 uvm_page_redim(uvmexp.ncolors, npackage); 930 931 /* 932 * Now tell each CPU which bucket to use. In the outer loop, scroll 933 * through all CPU packages. 934 */ 935 npackage = 0; 936 ci = curcpu(); 937 ci2 = ci->ci_sibling[CPUREL_PACKAGE1ST]; 938 do { 939 /* 940 * In the inner loop, scroll through all CPUs in the package 941 * and assign the same bucket ID. 942 */ 943 ci3 = ci2; 944 do { 945 ci3->ci_data.cpu_uvm->pgflbucket = npackage >> shift; 946 ci3 = ci3->ci_sibling[CPUREL_PACKAGE]; 947 } while (ci3 != ci2); 948 npackage++; 949 ci2 = ci2->ci_sibling[CPUREL_PACKAGE1ST]; 950 } while (ci2 != ci->ci_sibling[CPUREL_PACKAGE1ST]); 951 952 aprint_debug("UVM: using package allocation scheme, " 953 "%d package(s) per bucket\n", 1 << shift); 954 } 955 956 /* 957 * uvm_cpu_attach: initialize per-CPU data structures. 958 */ 959 960 void 961 uvm_cpu_attach(struct cpu_info *ci) 962 { 963 struct uvm_cpu *ucpu; 964 965 /* Already done in uvm_page_init(). */ 966 if (!CPU_IS_PRIMARY(ci)) { 967 /* Add more reserve pages for this CPU. */ 968 uvmexp.reserve_kernel += vm_page_reserve_kernel; 969 970 /* Allocate per-CPU data structures. */ 971 ucpu = kmem_zalloc(sizeof(struct uvm_cpu) + coherency_unit - 1, 972 KM_SLEEP); 973 ucpu = (struct uvm_cpu *)roundup2((uintptr_t)ucpu, 974 coherency_unit); 975 ci->ci_data.cpu_uvm = ucpu; 976 } else { 977 ucpu = ci->ci_data.cpu_uvm; 978 } 979 980 uvmpdpol_init_cpu(ucpu); 981 982 /* 983 * Attach RNG source for this CPU's VM events 984 */ 985 rnd_attach_source(&ucpu->rs, ci->ci_data.cpu_name, RND_TYPE_VM, 986 RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE| 987 RND_FLAG_ESTIMATE_VALUE); 988 } 989 990 /* 991 * uvm_availmem: fetch the total amount of free memory in pages. this can 992 * have a detrimental effect on performance due to false sharing; don't call 993 * unless needed. 994 * 995 * some users can request the amount of free memory so often that it begins 996 * to impact upon performance. if calling frequently and an inexact value 997 * is okay, call with cached = true. 998 */ 999 1000 int 1001 uvm_availmem(bool cached) 1002 { 1003 int64_t fp; 1004 1005 cpu_count_sync(cached); 1006 if ((fp = cpu_count_get(CPU_COUNT_FREEPAGES)) < 0) { 1007 /* 1008 * XXXAD could briefly go negative because it's impossible 1009 * to get a clean snapshot. address this for other counters 1010 * used as running totals before NetBSD 10 although less 1011 * important for those. 1012 */ 1013 fp = 0; 1014 } 1015 return (int)fp; 1016 } 1017 1018 /* 1019 * uvm_pagealloc_pgb: helper routine that tries to allocate any color from a 1020 * specific freelist and specific bucket only. 1021 * 1022 * => must be at IPL_VM or higher to protect per-CPU data structures. 1023 */ 1024 1025 static struct vm_page * 1026 uvm_pagealloc_pgb(struct uvm_cpu *ucpu, int f, int b, int *trycolorp, int flags) 1027 { 1028 int c, trycolor, colormask; 1029 struct pgflbucket *pgb; 1030 struct vm_page *pg; 1031 kmutex_t *lock; 1032 bool fill; 1033 1034 /* 1035 * Skip the bucket if empty, no lock needed. There could be many 1036 * empty freelists/buckets. 1037 */ 1038 pgb = uvm.page_free[f].pgfl_buckets[b]; 1039 if (pgb->pgb_nfree == 0) { 1040 return NULL; 1041 } 1042 1043 /* Skip bucket if low on memory. */ 1044 lock = &uvm_freelist_locks[b].lock; 1045 mutex_spin_enter(lock); 1046 if (__predict_false(pgb->pgb_nfree <= uvmexp.reserve_kernel)) { 1047 if ((flags & UVM_PGA_USERESERVE) == 0 || 1048 (pgb->pgb_nfree <= uvmexp.reserve_pagedaemon && 1049 curlwp != uvm.pagedaemon_lwp)) { 1050 mutex_spin_exit(lock); 1051 return NULL; 1052 } 1053 fill = false; 1054 } else { 1055 fill = true; 1056 } 1057 1058 /* Try all page colors as needed. */ 1059 c = trycolor = *trycolorp; 1060 colormask = uvmexp.colormask; 1061 do { 1062 pg = LIST_FIRST(&pgb->pgb_colors[c]); 1063 if (__predict_true(pg != NULL)) { 1064 /* 1065 * Got a free page! PG_FREE must be cleared under 1066 * lock because of uvm_pglistalloc(). 1067 */ 1068 LIST_REMOVE(pg, pageq.list); 1069 KASSERT(pg->flags == PG_FREE); 1070 pg->flags = PG_BUSY | PG_CLEAN | PG_FAKE; 1071 pgb->pgb_nfree--; 1072 CPU_COUNT(CPU_COUNT_FREEPAGES, -1); 1073 1074 /* 1075 * While we have the bucket locked and our data 1076 * structures fresh in L1 cache, we have an ideal 1077 * opportunity to grab some pages for the freelist 1078 * cache without causing extra contention. Only do 1079 * so if we found pages in this CPU's preferred 1080 * bucket. 1081 */ 1082 if (__predict_true(b == ucpu->pgflbucket && fill)) { 1083 uvm_pgflcache_fill(ucpu, f, b, c); 1084 } 1085 mutex_spin_exit(lock); 1086 KASSERT(uvm_page_get_bucket(pg) == b); 1087 CPU_COUNT(c == trycolor ? 1088 CPU_COUNT_COLORHIT : CPU_COUNT_COLORMISS, 1); 1089 CPU_COUNT(CPU_COUNT_CPUMISS, 1); 1090 *trycolorp = c; 1091 return pg; 1092 } 1093 c = (c + 1) & colormask; 1094 } while (c != trycolor); 1095 mutex_spin_exit(lock); 1096 1097 return NULL; 1098 } 1099 1100 /* 1101 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat that allocates 1102 * any color from any bucket, in a specific freelist. 1103 * 1104 * => must be at IPL_VM or higher to protect per-CPU data structures. 1105 */ 1106 1107 static struct vm_page * 1108 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int f, int *trycolorp, int flags) 1109 { 1110 int b, trybucket, bucketcount; 1111 struct vm_page *pg; 1112 1113 /* Try for the exact thing in the per-CPU cache. */ 1114 if ((pg = uvm_pgflcache_alloc(ucpu, f, *trycolorp)) != NULL) { 1115 CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1116 CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1117 return pg; 1118 } 1119 1120 /* Walk through all buckets, trying our preferred bucket first. */ 1121 trybucket = ucpu->pgflbucket; 1122 b = trybucket; 1123 bucketcount = uvm.bucketcount; 1124 do { 1125 pg = uvm_pagealloc_pgb(ucpu, f, b, trycolorp, flags); 1126 if (pg != NULL) { 1127 return pg; 1128 } 1129 b = (b + 1 == bucketcount ? 0 : b + 1); 1130 } while (b != trybucket); 1131 1132 return NULL; 1133 } 1134 1135 /* 1136 * uvm_pagealloc_strat: allocate vm_page from a particular free list. 1137 * 1138 * => return null if no pages free 1139 * => wake up pagedaemon if number of free pages drops below low water mark 1140 * => if obj != NULL, obj must be locked (to put in obj's tree) 1141 * => if anon != NULL, anon must be locked (to put in anon) 1142 * => only one of obj or anon can be non-null 1143 * => caller must activate/deactivate page if it is not wired. 1144 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 1145 * => policy decision: it is more important to pull a page off of the 1146 * appropriate priority free list than it is to get a page from the 1147 * correct bucket or color bin. This is because we live with the 1148 * consequences of a bad free list decision for the entire 1149 * lifetime of the page, e.g. if the page comes from memory that 1150 * is slower to access. 1151 */ 1152 1153 struct vm_page * 1154 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, 1155 int flags, int strat, int free_list) 1156 { 1157 int color, lcv, error, s; 1158 struct uvm_cpu *ucpu; 1159 struct vm_page *pg; 1160 lwp_t *l; 1161 1162 KASSERT(obj == NULL || anon == NULL); 1163 KASSERT(anon == NULL || (flags & UVM_FLAG_COLORMATCH) || off == 0); 1164 KASSERT(off == trunc_page(off)); 1165 KASSERT(obj == NULL || rw_write_held(obj->vmobjlock)); 1166 KASSERT(anon == NULL || anon->an_lock == NULL || 1167 rw_write_held(anon->an_lock)); 1168 1169 /* 1170 * This implements a global round-robin page coloring 1171 * algorithm. 1172 */ 1173 1174 s = splvm(); 1175 ucpu = curcpu()->ci_data.cpu_uvm; 1176 if (flags & UVM_FLAG_COLORMATCH) { 1177 color = atop(off) & uvmexp.colormask; 1178 } else { 1179 color = ucpu->pgflcolor; 1180 } 1181 1182 /* 1183 * fail if any of these conditions is true: 1184 * [1] there really are no free pages, or 1185 * [2] only kernel "reserved" pages remain and 1186 * reserved pages have not been requested. 1187 * [3] only pagedaemon "reserved" pages remain and 1188 * the requestor isn't the pagedaemon. 1189 * we make kernel reserve pages available if called by a 1190 * kernel thread. 1191 */ 1192 l = curlwp; 1193 if (__predict_true(l != NULL) && (l->l_flag & LW_SYSTEM) != 0) { 1194 flags |= UVM_PGA_USERESERVE; 1195 } 1196 1197 again: 1198 switch (strat) { 1199 case UVM_PGA_STRAT_NORMAL: 1200 /* Check freelists: descending priority (ascending id) order. */ 1201 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1202 pg = uvm_pagealloc_pgfl(ucpu, lcv, &color, flags); 1203 if (pg != NULL) { 1204 goto gotit; 1205 } 1206 } 1207 1208 /* No pages free! Have pagedaemon free some memory. */ 1209 splx(s); 1210 uvm_kick_pdaemon(); 1211 return NULL; 1212 1213 case UVM_PGA_STRAT_ONLY: 1214 case UVM_PGA_STRAT_FALLBACK: 1215 /* Attempt to allocate from the specified free list. */ 1216 KASSERT(free_list >= 0 && free_list < VM_NFREELIST); 1217 pg = uvm_pagealloc_pgfl(ucpu, free_list, &color, flags); 1218 if (pg != NULL) { 1219 goto gotit; 1220 } 1221 1222 /* Fall back, if possible. */ 1223 if (strat == UVM_PGA_STRAT_FALLBACK) { 1224 strat = UVM_PGA_STRAT_NORMAL; 1225 goto again; 1226 } 1227 1228 /* No pages free! Have pagedaemon free some memory. */ 1229 splx(s); 1230 uvm_kick_pdaemon(); 1231 return NULL; 1232 1233 case UVM_PGA_STRAT_NUMA: 1234 /* 1235 * NUMA strategy (experimental): allocating from the correct 1236 * bucket is more important than observing freelist 1237 * priority. Look only to the current NUMA node; if that 1238 * fails, we need to look to other NUMA nodes, so retry with 1239 * the normal strategy. 1240 */ 1241 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1242 pg = uvm_pgflcache_alloc(ucpu, lcv, color); 1243 if (pg != NULL) { 1244 CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1245 CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1246 goto gotit; 1247 } 1248 pg = uvm_pagealloc_pgb(ucpu, lcv, 1249 ucpu->pgflbucket, &color, flags); 1250 if (pg != NULL) { 1251 goto gotit; 1252 } 1253 } 1254 strat = UVM_PGA_STRAT_NORMAL; 1255 goto again; 1256 1257 default: 1258 panic("uvm_pagealloc_strat: bad strat %d", strat); 1259 /* NOTREACHED */ 1260 } 1261 1262 gotit: 1263 /* 1264 * We now know which color we actually allocated from; set 1265 * the next color accordingly. 1266 */ 1267 1268 ucpu->pgflcolor = (color + 1) & uvmexp.colormask; 1269 1270 /* 1271 * while still at IPL_VM, update allocation statistics. 1272 */ 1273 1274 if (anon) { 1275 CPU_COUNT(CPU_COUNT_ANONCLEAN, 1); 1276 } 1277 splx(s); 1278 KASSERT(pg->flags == (PG_BUSY|PG_CLEAN|PG_FAKE)); 1279 1280 /* 1281 * assign the page to the object. as the page was free, we know 1282 * that pg->uobject and pg->uanon are NULL. we only need to take 1283 * the page's interlock if we are changing the values. 1284 */ 1285 if (anon != NULL || obj != NULL) { 1286 mutex_enter(&pg->interlock); 1287 } 1288 pg->offset = off; 1289 pg->uobject = obj; 1290 pg->uanon = anon; 1291 KASSERT(uvm_page_owner_locked_p(pg, true)); 1292 if (anon) { 1293 anon->an_page = pg; 1294 pg->flags |= PG_ANON; 1295 mutex_exit(&pg->interlock); 1296 } else if (obj) { 1297 /* 1298 * set PG_FILE|PG_AOBJ before the first uvm_pageinsert. 1299 */ 1300 if (UVM_OBJ_IS_VNODE(obj)) { 1301 pg->flags |= PG_FILE; 1302 } else if (UVM_OBJ_IS_AOBJ(obj)) { 1303 pg->flags |= PG_AOBJ; 1304 } 1305 uvm_pageinsert_object(obj, pg); 1306 mutex_exit(&pg->interlock); 1307 error = uvm_pageinsert_tree(obj, pg); 1308 if (error != 0) { 1309 mutex_enter(&pg->interlock); 1310 uvm_pageremove_object(obj, pg); 1311 mutex_exit(&pg->interlock); 1312 uvm_pagefree(pg); 1313 return NULL; 1314 } 1315 } 1316 1317 #if defined(UVM_PAGE_TRKOWN) 1318 pg->owner_tag = NULL; 1319 #endif 1320 UVM_PAGE_OWN(pg, "new alloc"); 1321 1322 if (flags & UVM_PGA_ZERO) { 1323 /* A zero'd page is not clean. */ 1324 if (obj != NULL || anon != NULL) { 1325 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1326 } 1327 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1328 } 1329 1330 return(pg); 1331 } 1332 1333 /* 1334 * uvm_pagereplace: replace a page with another 1335 * 1336 * => object must be locked 1337 * => page interlocks must be held 1338 */ 1339 1340 void 1341 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg) 1342 { 1343 struct uvm_object *uobj = oldpg->uobject; 1344 struct vm_page *pg __diagused; 1345 uint64_t idx; 1346 1347 KASSERT((oldpg->flags & PG_TABLED) != 0); 1348 KASSERT(uobj != NULL); 1349 KASSERT((newpg->flags & PG_TABLED) == 0); 1350 KASSERT(newpg->uobject == NULL); 1351 KASSERT(rw_write_held(uobj->vmobjlock)); 1352 KASSERT(mutex_owned(&oldpg->interlock)); 1353 KASSERT(mutex_owned(&newpg->interlock)); 1354 1355 newpg->uobject = uobj; 1356 newpg->offset = oldpg->offset; 1357 idx = newpg->offset >> PAGE_SHIFT; 1358 pg = radix_tree_replace_node(&uobj->uo_pages, idx, newpg); 1359 KASSERT(pg == oldpg); 1360 if (((oldpg->flags ^ newpg->flags) & PG_CLEAN) != 0) { 1361 if ((newpg->flags & PG_CLEAN) != 0) { 1362 uvm_obj_page_clear_dirty(newpg); 1363 } else { 1364 uvm_obj_page_set_dirty(newpg); 1365 } 1366 } 1367 /* 1368 * oldpg's PG_STAT is stable. newpg is not reachable by others yet. 1369 */ 1370 newpg->flags |= 1371 (newpg->flags & ~PG_STAT) | (oldpg->flags & PG_STAT); 1372 uvm_pageinsert_object(uobj, newpg); 1373 uvm_pageremove_object(uobj, oldpg); 1374 } 1375 1376 /* 1377 * uvm_pagerealloc: reallocate a page from one object to another 1378 * 1379 * => both objects must be locked 1380 */ 1381 1382 int 1383 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) 1384 { 1385 int error = 0; 1386 1387 /* 1388 * remove it from the old object 1389 */ 1390 1391 if (pg->uobject) { 1392 uvm_pageremove_tree(pg->uobject, pg); 1393 uvm_pageremove_object(pg->uobject, pg); 1394 } 1395 1396 /* 1397 * put it in the new object 1398 */ 1399 1400 if (newobj) { 1401 mutex_enter(&pg->interlock); 1402 pg->uobject = newobj; 1403 pg->offset = newoff; 1404 if (UVM_OBJ_IS_VNODE(newobj)) { 1405 pg->flags |= PG_FILE; 1406 } else if (UVM_OBJ_IS_AOBJ(newobj)) { 1407 pg->flags |= PG_AOBJ; 1408 } 1409 uvm_pageinsert_object(newobj, pg); 1410 mutex_exit(&pg->interlock); 1411 error = uvm_pageinsert_tree(newobj, pg); 1412 if (error != 0) { 1413 mutex_enter(&pg->interlock); 1414 uvm_pageremove_object(newobj, pg); 1415 mutex_exit(&pg->interlock); 1416 } 1417 } 1418 1419 return error; 1420 } 1421 1422 /* 1423 * uvm_pagefree: free page 1424 * 1425 * => erase page's identity (i.e. remove from object) 1426 * => put page on free list 1427 * => caller must lock owning object (either anon or uvm_object) 1428 * => assumes all valid mappings of pg are gone 1429 */ 1430 1431 void 1432 uvm_pagefree(struct vm_page *pg) 1433 { 1434 struct pgfreelist *pgfl; 1435 struct pgflbucket *pgb; 1436 struct uvm_cpu *ucpu; 1437 kmutex_t *lock; 1438 int bucket, s; 1439 bool locked; 1440 1441 #ifdef DEBUG 1442 if (pg->uobject == (void *)0xdeadbeef && 1443 pg->uanon == (void *)0xdeadbeef) { 1444 panic("uvm_pagefree: freeing free page %p", pg); 1445 } 1446 #endif /* DEBUG */ 1447 1448 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1449 KASSERT(!(pg->flags & PG_FREE)); 1450 KASSERT(pg->uobject == NULL || rw_write_held(pg->uobject->vmobjlock)); 1451 KASSERT(pg->uobject != NULL || pg->uanon == NULL || 1452 rw_write_held(pg->uanon->an_lock)); 1453 1454 /* 1455 * remove the page from the object's tree before acquiring any page 1456 * interlocks: this can acquire locks to free radixtree nodes. 1457 */ 1458 if (pg->uobject != NULL) { 1459 uvm_pageremove_tree(pg->uobject, pg); 1460 } 1461 1462 /* 1463 * if the page is loaned, resolve the loan instead of freeing. 1464 */ 1465 1466 if (pg->loan_count) { 1467 KASSERT(pg->wire_count == 0); 1468 1469 /* 1470 * if the page is owned by an anon then we just want to 1471 * drop anon ownership. the kernel will free the page when 1472 * it is done with it. if the page is owned by an object, 1473 * remove it from the object and mark it dirty for the benefit 1474 * of possible anon owners. 1475 * 1476 * regardless of previous ownership, wakeup any waiters, 1477 * unbusy the page, and we're done. 1478 */ 1479 1480 uvm_pagelock(pg); 1481 locked = true; 1482 if (pg->uobject != NULL) { 1483 uvm_pageremove_object(pg->uobject, pg); 1484 pg->flags &= ~(PG_FILE|PG_AOBJ); 1485 } else if (pg->uanon != NULL) { 1486 if ((pg->flags & PG_ANON) == 0) { 1487 pg->loan_count--; 1488 } else { 1489 const unsigned status = uvm_pagegetdirty(pg); 1490 pg->flags &= ~PG_ANON; 1491 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1492 } 1493 pg->uanon->an_page = NULL; 1494 pg->uanon = NULL; 1495 } 1496 if (pg->pqflags & PQ_WANTED) { 1497 wakeup(pg); 1498 } 1499 pg->pqflags &= ~PQ_WANTED; 1500 pg->flags &= ~(PG_BUSY|PG_RELEASED|PG_PAGER1); 1501 #ifdef UVM_PAGE_TRKOWN 1502 pg->owner_tag = NULL; 1503 #endif 1504 KASSERT((pg->flags & PG_STAT) == 0); 1505 if (pg->loan_count) { 1506 KASSERT(pg->uobject == NULL); 1507 if (pg->uanon == NULL) { 1508 uvm_pagedequeue(pg); 1509 } 1510 uvm_pageunlock(pg); 1511 return; 1512 } 1513 } else if (pg->uobject != NULL || pg->uanon != NULL || 1514 pg->wire_count != 0) { 1515 uvm_pagelock(pg); 1516 locked = true; 1517 } else { 1518 locked = false; 1519 } 1520 1521 /* 1522 * remove page from its object or anon. 1523 */ 1524 if (pg->uobject != NULL) { 1525 uvm_pageremove_object(pg->uobject, pg); 1526 } else if (pg->uanon != NULL) { 1527 const unsigned int status = uvm_pagegetdirty(pg); 1528 pg->uanon->an_page = NULL; 1529 pg->uanon = NULL; 1530 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1531 } 1532 1533 /* 1534 * if the page was wired, unwire it now. 1535 */ 1536 1537 if (pg->wire_count) { 1538 pg->wire_count = 0; 1539 atomic_dec_uint(&uvmexp.wired); 1540 } 1541 if (locked) { 1542 /* 1543 * wake anyone waiting on the page. 1544 */ 1545 if ((pg->pqflags & PQ_WANTED) != 0) { 1546 pg->pqflags &= ~PQ_WANTED; 1547 wakeup(pg); 1548 } 1549 1550 /* 1551 * now remove the page from the queues. 1552 */ 1553 uvm_pagedequeue(pg); 1554 uvm_pageunlock(pg); 1555 } else { 1556 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1557 } 1558 1559 /* 1560 * and put on free queue 1561 */ 1562 1563 #ifdef DEBUG 1564 pg->uobject = (void *)0xdeadbeef; 1565 pg->uanon = (void *)0xdeadbeef; 1566 #endif /* DEBUG */ 1567 1568 /* Try to send the page to the per-CPU cache. */ 1569 s = splvm(); 1570 ucpu = curcpu()->ci_data.cpu_uvm; 1571 bucket = uvm_page_get_bucket(pg); 1572 if (bucket == ucpu->pgflbucket && uvm_pgflcache_free(ucpu, pg)) { 1573 splx(s); 1574 return; 1575 } 1576 1577 /* Didn't work. Never mind, send it to a global bucket. */ 1578 pgfl = &uvm.page_free[uvm_page_get_freelist(pg)]; 1579 pgb = pgfl->pgfl_buckets[bucket]; 1580 lock = &uvm_freelist_locks[bucket].lock; 1581 1582 mutex_spin_enter(lock); 1583 /* PG_FREE must be set under lock because of uvm_pglistalloc(). */ 1584 pg->flags = PG_FREE; 1585 LIST_INSERT_HEAD(&pgb->pgb_colors[VM_PGCOLOR(pg)], pg, pageq.list); 1586 pgb->pgb_nfree++; 1587 CPU_COUNT(CPU_COUNT_FREEPAGES, 1); 1588 mutex_spin_exit(lock); 1589 splx(s); 1590 } 1591 1592 /* 1593 * uvm_page_unbusy: unbusy an array of pages. 1594 * 1595 * => pages must either all belong to the same object, or all belong to anons. 1596 * => if pages are object-owned, object must be locked. 1597 * => if pages are anon-owned, anons must be locked. 1598 * => caller must make sure that anon-owned pages are not PG_RELEASED. 1599 */ 1600 1601 void 1602 uvm_page_unbusy(struct vm_page **pgs, int npgs) 1603 { 1604 struct vm_page *pg; 1605 int i, pageout_done; 1606 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1607 1608 pageout_done = 0; 1609 for (i = 0; i < npgs; i++) { 1610 pg = pgs[i]; 1611 if (pg == NULL || pg == PGO_DONTCARE) { 1612 continue; 1613 } 1614 1615 KASSERT(uvm_page_owner_locked_p(pg, true)); 1616 KASSERT(pg->flags & PG_BUSY); 1617 1618 if (pg->flags & PG_PAGEOUT) { 1619 pg->flags &= ~PG_PAGEOUT; 1620 pg->flags |= PG_RELEASED; 1621 pageout_done++; 1622 atomic_inc_uint(&uvmexp.pdfreed); 1623 } 1624 if (pg->flags & PG_RELEASED) { 1625 UVMHIST_LOG(ubchist, "releasing pg %#jx", 1626 (uintptr_t)pg, 0, 0, 0); 1627 KASSERT(pg->uobject != NULL || 1628 (pg->uanon != NULL && pg->uanon->an_ref > 0)); 1629 pg->flags &= ~PG_RELEASED; 1630 uvm_pagefree(pg); 1631 } else { 1632 UVMHIST_LOG(ubchist, "unbusying pg %#jx", 1633 (uintptr_t)pg, 0, 0, 0); 1634 KASSERT((pg->flags & PG_FAKE) == 0); 1635 pg->flags &= ~PG_BUSY; 1636 uvm_pagelock(pg); 1637 uvm_pagewakeup(pg); 1638 uvm_pageunlock(pg); 1639 UVM_PAGE_OWN(pg, NULL); 1640 } 1641 } 1642 if (pageout_done != 0) { 1643 uvm_pageout_done(pageout_done); 1644 } 1645 } 1646 1647 /* 1648 * uvm_pagewait: wait for a busy page 1649 * 1650 * => page must be known PG_BUSY 1651 * => object must be read or write locked 1652 * => object will be unlocked on return 1653 */ 1654 1655 void 1656 uvm_pagewait(struct vm_page *pg, krwlock_t *lock, const char *wmesg) 1657 { 1658 1659 KASSERT(rw_lock_held(lock)); 1660 KASSERT((pg->flags & PG_BUSY) != 0); 1661 KASSERT(uvm_page_owner_locked_p(pg, false)); 1662 1663 mutex_enter(&pg->interlock); 1664 pg->pqflags |= PQ_WANTED; 1665 rw_exit(lock); 1666 UVM_UNLOCK_AND_WAIT(pg, &pg->interlock, false, wmesg, 0); 1667 } 1668 1669 /* 1670 * uvm_pagewakeup: wake anyone waiting on a page 1671 * 1672 * => page interlock must be held 1673 */ 1674 1675 void 1676 uvm_pagewakeup(struct vm_page *pg) 1677 { 1678 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1679 1680 KASSERT(mutex_owned(&pg->interlock)); 1681 1682 UVMHIST_LOG(ubchist, "waking pg %#jx", (uintptr_t)pg, 0, 0, 0); 1683 1684 if ((pg->pqflags & PQ_WANTED) != 0) { 1685 wakeup(pg); 1686 pg->pqflags &= ~PQ_WANTED; 1687 } 1688 } 1689 1690 /* 1691 * uvm_pagewanted_p: return true if someone is waiting on the page 1692 * 1693 * => object must be write locked (lock out all concurrent access) 1694 */ 1695 1696 bool 1697 uvm_pagewanted_p(struct vm_page *pg) 1698 { 1699 1700 KASSERT(uvm_page_owner_locked_p(pg, true)); 1701 1702 return (atomic_load_relaxed(&pg->pqflags) & PQ_WANTED) != 0; 1703 } 1704 1705 #if defined(UVM_PAGE_TRKOWN) 1706 /* 1707 * uvm_page_own: set or release page ownership 1708 * 1709 * => this is a debugging function that keeps track of who sets PG_BUSY 1710 * and where they do it. it can be used to track down problems 1711 * such a process setting "PG_BUSY" and never releasing it. 1712 * => page's object [if any] must be locked 1713 * => if "tag" is NULL then we are releasing page ownership 1714 */ 1715 void 1716 uvm_page_own(struct vm_page *pg, const char *tag) 1717 { 1718 1719 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0); 1720 KASSERT(uvm_page_owner_locked_p(pg, true)); 1721 1722 /* gain ownership? */ 1723 if (tag) { 1724 KASSERT((pg->flags & PG_BUSY) != 0); 1725 if (pg->owner_tag) { 1726 printf("uvm_page_own: page %p already owned " 1727 "by proc %d.%d [%s]\n", pg, 1728 pg->owner, pg->lowner, pg->owner_tag); 1729 panic("uvm_page_own"); 1730 } 1731 pg->owner = curproc->p_pid; 1732 pg->lowner = curlwp->l_lid; 1733 pg->owner_tag = tag; 1734 return; 1735 } 1736 1737 /* drop ownership */ 1738 KASSERT((pg->flags & PG_BUSY) == 0); 1739 if (pg->owner_tag == NULL) { 1740 printf("uvm_page_own: dropping ownership of an non-owned " 1741 "page (%p)\n", pg); 1742 panic("uvm_page_own"); 1743 } 1744 pg->owner_tag = NULL; 1745 } 1746 #endif 1747 1748 /* 1749 * uvm_pagelookup: look up a page 1750 * 1751 * => caller should lock object to keep someone from pulling the page 1752 * out from under it 1753 */ 1754 1755 struct vm_page * 1756 uvm_pagelookup(struct uvm_object *obj, voff_t off) 1757 { 1758 struct vm_page *pg; 1759 bool ddb __diagused = false; 1760 #ifdef DDB 1761 extern int db_active; 1762 ddb = db_active != 0; 1763 #endif 1764 1765 KASSERT(ddb || rw_lock_held(obj->vmobjlock)); 1766 1767 pg = radix_tree_lookup_node(&obj->uo_pages, off >> PAGE_SHIFT); 1768 1769 KASSERT(pg == NULL || obj->uo_npages != 0); 1770 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1771 (pg->flags & PG_BUSY) != 0); 1772 return pg; 1773 } 1774 1775 /* 1776 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp 1777 * 1778 * => caller must lock objects 1779 * => caller must hold pg->interlock 1780 */ 1781 1782 void 1783 uvm_pagewire(struct vm_page *pg) 1784 { 1785 1786 KASSERT(uvm_page_owner_locked_p(pg, true)); 1787 KASSERT(mutex_owned(&pg->interlock)); 1788 #if defined(READAHEAD_STATS) 1789 if ((pg->flags & PG_READAHEAD) != 0) { 1790 uvm_ra_hit.ev_count++; 1791 pg->flags &= ~PG_READAHEAD; 1792 } 1793 #endif /* defined(READAHEAD_STATS) */ 1794 if (pg->wire_count == 0) { 1795 uvm_pagedequeue(pg); 1796 atomic_inc_uint(&uvmexp.wired); 1797 } 1798 pg->wire_count++; 1799 KASSERT(pg->wire_count > 0); /* detect wraparound */ 1800 } 1801 1802 /* 1803 * uvm_pageunwire: unwire the page. 1804 * 1805 * => activate if wire count goes to zero. 1806 * => caller must lock objects 1807 * => caller must hold pg->interlock 1808 */ 1809 1810 void 1811 uvm_pageunwire(struct vm_page *pg) 1812 { 1813 1814 KASSERT(uvm_page_owner_locked_p(pg, true)); 1815 KASSERT(pg->wire_count != 0); 1816 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1817 KASSERT(mutex_owned(&pg->interlock)); 1818 pg->wire_count--; 1819 if (pg->wire_count == 0) { 1820 uvm_pageactivate(pg); 1821 KASSERT(uvmexp.wired != 0); 1822 atomic_dec_uint(&uvmexp.wired); 1823 } 1824 } 1825 1826 /* 1827 * uvm_pagedeactivate: deactivate page 1828 * 1829 * => caller must lock objects 1830 * => caller must check to make sure page is not wired 1831 * => object that page belongs to must be locked (so we can adjust pg->flags) 1832 * => caller must clear the reference on the page before calling 1833 * => caller must hold pg->interlock 1834 */ 1835 1836 void 1837 uvm_pagedeactivate(struct vm_page *pg) 1838 { 1839 1840 KASSERT(uvm_page_owner_locked_p(pg, false)); 1841 KASSERT(mutex_owned(&pg->interlock)); 1842 if (pg->wire_count == 0) { 1843 KASSERT(uvmpdpol_pageisqueued_p(pg)); 1844 uvmpdpol_pagedeactivate(pg); 1845 } 1846 } 1847 1848 /* 1849 * uvm_pageactivate: activate page 1850 * 1851 * => caller must lock objects 1852 * => caller must hold pg->interlock 1853 */ 1854 1855 void 1856 uvm_pageactivate(struct vm_page *pg) 1857 { 1858 1859 KASSERT(uvm_page_owner_locked_p(pg, false)); 1860 KASSERT(mutex_owned(&pg->interlock)); 1861 #if defined(READAHEAD_STATS) 1862 if ((pg->flags & PG_READAHEAD) != 0) { 1863 uvm_ra_hit.ev_count++; 1864 pg->flags &= ~PG_READAHEAD; 1865 } 1866 #endif /* defined(READAHEAD_STATS) */ 1867 if (pg->wire_count == 0) { 1868 uvmpdpol_pageactivate(pg); 1869 } 1870 } 1871 1872 /* 1873 * uvm_pagedequeue: remove a page from any paging queue 1874 * 1875 * => caller must lock objects 1876 * => caller must hold pg->interlock 1877 */ 1878 void 1879 uvm_pagedequeue(struct vm_page *pg) 1880 { 1881 1882 KASSERT(uvm_page_owner_locked_p(pg, true)); 1883 KASSERT(mutex_owned(&pg->interlock)); 1884 if (uvmpdpol_pageisqueued_p(pg)) { 1885 uvmpdpol_pagedequeue(pg); 1886 } 1887 } 1888 1889 /* 1890 * uvm_pageenqueue: add a page to a paging queue without activating. 1891 * used where a page is not really demanded (yet). eg. read-ahead 1892 * 1893 * => caller must lock objects 1894 * => caller must hold pg->interlock 1895 */ 1896 void 1897 uvm_pageenqueue(struct vm_page *pg) 1898 { 1899 1900 KASSERT(uvm_page_owner_locked_p(pg, false)); 1901 KASSERT(mutex_owned(&pg->interlock)); 1902 if (pg->wire_count == 0 && !uvmpdpol_pageisqueued_p(pg)) { 1903 uvmpdpol_pageenqueue(pg); 1904 } 1905 } 1906 1907 /* 1908 * uvm_pagelock: acquire page interlock 1909 */ 1910 void 1911 uvm_pagelock(struct vm_page *pg) 1912 { 1913 1914 mutex_enter(&pg->interlock); 1915 } 1916 1917 /* 1918 * uvm_pagelock2: acquire two page interlocks 1919 */ 1920 void 1921 uvm_pagelock2(struct vm_page *pg1, struct vm_page *pg2) 1922 { 1923 1924 if (pg1 < pg2) { 1925 mutex_enter(&pg1->interlock); 1926 mutex_enter(&pg2->interlock); 1927 } else { 1928 mutex_enter(&pg2->interlock); 1929 mutex_enter(&pg1->interlock); 1930 } 1931 } 1932 1933 /* 1934 * uvm_pageunlock: release page interlock, and if a page replacement intent 1935 * is set on the page, pass it to uvmpdpol to make real. 1936 * 1937 * => caller must hold pg->interlock 1938 */ 1939 void 1940 uvm_pageunlock(struct vm_page *pg) 1941 { 1942 1943 if ((pg->pqflags & PQ_INTENT_SET) == 0 || 1944 (pg->pqflags & PQ_INTENT_QUEUED) != 0) { 1945 mutex_exit(&pg->interlock); 1946 return; 1947 } 1948 pg->pqflags |= PQ_INTENT_QUEUED; 1949 mutex_exit(&pg->interlock); 1950 uvmpdpol_pagerealize(pg); 1951 } 1952 1953 /* 1954 * uvm_pageunlock2: release two page interlocks, and for both pages if a 1955 * page replacement intent is set on the page, pass it to uvmpdpol to make 1956 * real. 1957 * 1958 * => caller must hold pg->interlock 1959 */ 1960 void 1961 uvm_pageunlock2(struct vm_page *pg1, struct vm_page *pg2) 1962 { 1963 1964 if ((pg1->pqflags & PQ_INTENT_SET) == 0 || 1965 (pg1->pqflags & PQ_INTENT_QUEUED) != 0) { 1966 mutex_exit(&pg1->interlock); 1967 pg1 = NULL; 1968 } else { 1969 pg1->pqflags |= PQ_INTENT_QUEUED; 1970 mutex_exit(&pg1->interlock); 1971 } 1972 1973 if ((pg2->pqflags & PQ_INTENT_SET) == 0 || 1974 (pg2->pqflags & PQ_INTENT_QUEUED) != 0) { 1975 mutex_exit(&pg2->interlock); 1976 pg2 = NULL; 1977 } else { 1978 pg2->pqflags |= PQ_INTENT_QUEUED; 1979 mutex_exit(&pg2->interlock); 1980 } 1981 1982 if (pg1 != NULL) { 1983 uvmpdpol_pagerealize(pg1); 1984 } 1985 if (pg2 != NULL) { 1986 uvmpdpol_pagerealize(pg2); 1987 } 1988 } 1989 1990 /* 1991 * uvm_pagezero: zero fill a page 1992 * 1993 * => if page is part of an object then the object should be locked 1994 * to protect pg->flags. 1995 */ 1996 1997 void 1998 uvm_pagezero(struct vm_page *pg) 1999 { 2000 2001 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 2002 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 2003 } 2004 2005 /* 2006 * uvm_pagecopy: copy a page 2007 * 2008 * => if page is part of an object then the object should be locked 2009 * to protect pg->flags. 2010 */ 2011 2012 void 2013 uvm_pagecopy(struct vm_page *src, struct vm_page *dst) 2014 { 2015 2016 uvm_pagemarkdirty(dst, UVM_PAGE_STATUS_DIRTY); 2017 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 2018 } 2019 2020 /* 2021 * uvm_pageismanaged: test it see that a page (specified by PA) is managed. 2022 */ 2023 2024 bool 2025 uvm_pageismanaged(paddr_t pa) 2026 { 2027 2028 return (uvm_physseg_find(atop(pa), NULL) != UVM_PHYSSEG_TYPE_INVALID); 2029 } 2030 2031 /* 2032 * uvm_page_lookup_freelist: look up the free list for the specified page 2033 */ 2034 2035 int 2036 uvm_page_lookup_freelist(struct vm_page *pg) 2037 { 2038 uvm_physseg_t upm; 2039 2040 upm = uvm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL); 2041 KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID); 2042 return uvm_physseg_get_free_list(upm); 2043 } 2044 2045 /* 2046 * uvm_page_owner_locked_p: return true if object associated with page is 2047 * locked. this is a weak check for runtime assertions only. 2048 */ 2049 2050 bool 2051 uvm_page_owner_locked_p(struct vm_page *pg, bool exclusive) 2052 { 2053 2054 if (pg->uobject != NULL) { 2055 return exclusive 2056 ? rw_write_held(pg->uobject->vmobjlock) 2057 : rw_lock_held(pg->uobject->vmobjlock); 2058 } 2059 if (pg->uanon != NULL) { 2060 return exclusive 2061 ? rw_write_held(pg->uanon->an_lock) 2062 : rw_lock_held(pg->uanon->an_lock); 2063 } 2064 return true; 2065 } 2066 2067 /* 2068 * uvm_pagereadonly_p: return if the page should be mapped read-only 2069 */ 2070 2071 bool 2072 uvm_pagereadonly_p(struct vm_page *pg) 2073 { 2074 struct uvm_object * const uobj = pg->uobject; 2075 2076 KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock)); 2077 KASSERT(uobj != NULL || rw_lock_held(pg->uanon->an_lock)); 2078 if ((pg->flags & PG_RDONLY) != 0) { 2079 return true; 2080 } 2081 if (uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) { 2082 return true; 2083 } 2084 if (uobj == NULL) { 2085 return false; 2086 } 2087 return UVM_OBJ_NEEDS_WRITEFAULT(uobj); 2088 } 2089 2090 #ifdef PMAP_DIRECT 2091 /* 2092 * Call pmap to translate physical address into a virtual and to run a callback 2093 * for it. Used to avoid actually mapping the pages, pmap most likely uses direct map 2094 * or equivalent. 2095 */ 2096 int 2097 uvm_direct_process(struct vm_page **pgs, u_int npages, voff_t off, vsize_t len, 2098 int (*process)(void *, size_t, void *), void *arg) 2099 { 2100 int error = 0; 2101 paddr_t pa; 2102 size_t todo; 2103 voff_t pgoff = (off & PAGE_MASK); 2104 struct vm_page *pg; 2105 2106 KASSERT(npages > 0 && len > 0); 2107 2108 for (int i = 0; i < npages; i++) { 2109 pg = pgs[i]; 2110 2111 KASSERT(len > 0); 2112 2113 /* 2114 * Caller is responsible for ensuring all the pages are 2115 * available. 2116 */ 2117 KASSERT(pg != NULL && pg != PGO_DONTCARE); 2118 2119 pa = VM_PAGE_TO_PHYS(pg); 2120 todo = MIN(len, PAGE_SIZE - pgoff); 2121 2122 error = pmap_direct_process(pa, pgoff, todo, process, arg); 2123 if (error) 2124 break; 2125 2126 pgoff = 0; 2127 len -= todo; 2128 } 2129 2130 KASSERTMSG(error != 0 || len == 0, "len %lu != 0 for non-error", len); 2131 return error; 2132 } 2133 #endif /* PMAP_DIRECT */ 2134 2135 #if defined(DDB) || defined(DEBUGPRINT) 2136 2137 /* 2138 * uvm_page_printit: actually print the page 2139 */ 2140 2141 static const char page_flagbits[] = UVM_PGFLAGBITS; 2142 static const char page_pqflagbits[] = UVM_PQFLAGBITS; 2143 2144 void 2145 uvm_page_printit(struct vm_page *pg, bool full, 2146 void (*pr)(const char *, ...)) 2147 { 2148 struct vm_page *tpg; 2149 struct uvm_object *uobj; 2150 struct pgflbucket *pgb; 2151 struct pgflist *pgl; 2152 char pgbuf[128]; 2153 2154 (*pr)("PAGE %p:\n", pg); 2155 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); 2156 (*pr)(" flags=%s\n", pgbuf); 2157 snprintb(pgbuf, sizeof(pgbuf), page_pqflagbits, pg->pqflags); 2158 (*pr)(" pqflags=%s\n", pgbuf); 2159 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 2160 pg->uobject, pg->uanon, (long long)pg->offset); 2161 (*pr)(" loan_count=%d wire_count=%d bucket=%d freelist=%d\n", 2162 pg->loan_count, pg->wire_count, uvm_page_get_bucket(pg), 2163 uvm_page_get_freelist(pg)); 2164 (*pr)(" pa=0x%lx\n", (long)VM_PAGE_TO_PHYS(pg)); 2165 #if defined(UVM_PAGE_TRKOWN) 2166 if (pg->flags & PG_BUSY) 2167 (*pr)(" owning process = %d.%d, tag=%s\n", 2168 pg->owner, pg->lowner, pg->owner_tag); 2169 else 2170 (*pr)(" page not busy, no owner\n"); 2171 #else 2172 (*pr)(" [page ownership tracking disabled]\n"); 2173 #endif 2174 2175 if (!full) 2176 return; 2177 2178 /* cross-verify object/anon */ 2179 if ((pg->flags & PG_FREE) == 0) { 2180 if (pg->flags & PG_ANON) { 2181 if (pg->uanon == NULL || pg->uanon->an_page != pg) 2182 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2183 (pg->uanon) ? pg->uanon->an_page : NULL); 2184 else 2185 (*pr)(" anon backpointer is OK\n"); 2186 } else { 2187 uobj = pg->uobject; 2188 if (uobj) { 2189 (*pr)(" checking object list\n"); 2190 tpg = uvm_pagelookup(uobj, pg->offset); 2191 if (tpg) 2192 (*pr)(" page found on object list\n"); 2193 else 2194 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 2195 } 2196 } 2197 } 2198 2199 /* cross-verify page queue */ 2200 if (pg->flags & PG_FREE) { 2201 int fl = uvm_page_get_freelist(pg); 2202 int b = uvm_page_get_bucket(pg); 2203 pgb = uvm.page_free[fl].pgfl_buckets[b]; 2204 pgl = &pgb->pgb_colors[VM_PGCOLOR(pg)]; 2205 (*pr)(" checking pageq list\n"); 2206 LIST_FOREACH(tpg, pgl, pageq.list) { 2207 if (tpg == pg) { 2208 break; 2209 } 2210 } 2211 if (tpg) 2212 (*pr)(" page found on pageq list\n"); 2213 else 2214 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 2215 } 2216 } 2217 2218 /* 2219 * uvm_page_printall - print a summary of all managed pages 2220 */ 2221 2222 void 2223 uvm_page_printall(void (*pr)(const char *, ...)) 2224 { 2225 uvm_physseg_t i; 2226 paddr_t pfn; 2227 struct vm_page *pg; 2228 2229 (*pr)("%18s %4s %4s %18s %18s" 2230 #ifdef UVM_PAGE_TRKOWN 2231 " OWNER" 2232 #endif 2233 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 2234 for (i = uvm_physseg_get_first(); 2235 uvm_physseg_valid_p(i); 2236 i = uvm_physseg_get_next(i)) { 2237 for (pfn = uvm_physseg_get_start(i); 2238 pfn < uvm_physseg_get_end(i); 2239 pfn++) { 2240 pg = PHYS_TO_VM_PAGE(ptoa(pfn)); 2241 2242 (*pr)("%18p %04x %08x %18p %18p", 2243 pg, pg->flags, pg->pqflags, pg->uobject, 2244 pg->uanon); 2245 #ifdef UVM_PAGE_TRKOWN 2246 if (pg->flags & PG_BUSY) 2247 (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 2248 #endif 2249 (*pr)("\n"); 2250 } 2251 } 2252 } 2253 2254 /* 2255 * uvm_page_print_freelists - print a summary freelists 2256 */ 2257 2258 void 2259 uvm_page_print_freelists(void (*pr)(const char *, ...)) 2260 { 2261 struct pgfreelist *pgfl; 2262 struct pgflbucket *pgb; 2263 int fl, b, c; 2264 2265 (*pr)("There are %d freelists with %d buckets of %d colors.\n\n", 2266 VM_NFREELIST, uvm.bucketcount, uvmexp.ncolors); 2267 2268 for (fl = 0; fl < VM_NFREELIST; fl++) { 2269 pgfl = &uvm.page_free[fl]; 2270 (*pr)("freelist(%d) @ %p\n", fl, pgfl); 2271 for (b = 0; b < uvm.bucketcount; b++) { 2272 pgb = uvm.page_free[fl].pgfl_buckets[b]; 2273 (*pr)(" bucket(%d) @ %p, nfree = %d, lock @ %p:\n", 2274 b, pgb, pgb->pgb_nfree, 2275 &uvm_freelist_locks[b].lock); 2276 for (c = 0; c < uvmexp.ncolors; c++) { 2277 (*pr)(" color(%d) @ %p, ", c, 2278 &pgb->pgb_colors[c]); 2279 (*pr)("first page = %p\n", 2280 LIST_FIRST(&pgb->pgb_colors[c])); 2281 } 2282 } 2283 } 2284 } 2285 2286 #endif /* DDB || DEBUGPRINT */ 2287