1 /* $NetBSD: uvm_page.c,v 1.198 2018/05/19 15:03:26 jdolecek Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 37 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 38 * 39 * 40 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 41 * All rights reserved. 42 * 43 * Permission to use, copy, modify and distribute this software and 44 * its documentation is hereby granted, provided that both the copyright 45 * notice and this permission notice appear in all copies of the 46 * software, derivative works or modified versions, and any portions 47 * thereof, and that both notices appear in supporting documentation. 48 * 49 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 50 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 51 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 52 * 53 * Carnegie Mellon requests users of this software to return to 54 * 55 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 56 * School of Computer Science 57 * Carnegie Mellon University 58 * Pittsburgh PA 15213-3890 59 * 60 * any improvements or extensions that they make and grant Carnegie the 61 * rights to redistribute these changes. 62 */ 63 64 /* 65 * uvm_page.c: page ops. 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.198 2018/05/19 15:03:26 jdolecek Exp $"); 70 71 #include "opt_ddb.h" 72 #include "opt_uvm.h" 73 #include "opt_uvmhist.h" 74 #include "opt_readahead.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/sched.h> 79 #include <sys/kernel.h> 80 #include <sys/vnode.h> 81 #include <sys/proc.h> 82 #include <sys/atomic.h> 83 #include <sys/cpu.h> 84 #include <sys/extent.h> 85 86 #include <uvm/uvm.h> 87 #include <uvm/uvm_ddb.h> 88 #include <uvm/uvm_pdpolicy.h> 89 90 /* 91 * Some supported CPUs in a given architecture don't support all 92 * of the things necessary to do idle page zero'ing efficiently. 93 * We therefore provide a way to enable it from machdep code here. 94 */ 95 bool vm_page_zero_enable = false; 96 97 /* 98 * number of pages per-CPU to reserve for the kernel. 99 */ 100 #ifndef UVM_RESERVED_PAGES_PER_CPU 101 #define UVM_RESERVED_PAGES_PER_CPU 5 102 #endif 103 int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU; 104 105 /* 106 * physical memory size; 107 */ 108 psize_t physmem; 109 110 /* 111 * local variables 112 */ 113 114 /* 115 * these variables record the values returned by vm_page_bootstrap, 116 * for debugging purposes. The implementation of uvm_pageboot_alloc 117 * and pmap_startup here also uses them internally. 118 */ 119 120 static vaddr_t virtual_space_start; 121 static vaddr_t virtual_space_end; 122 123 /* 124 * we allocate an initial number of page colors in uvm_page_init(), 125 * and remember them. We may re-color pages as cache sizes are 126 * discovered during the autoconfiguration phase. But we can never 127 * free the initial set of buckets, since they are allocated using 128 * uvm_pageboot_alloc(). 129 */ 130 131 static size_t recolored_pages_memsize /* = 0 */; 132 133 #ifdef DEBUG 134 vaddr_t uvm_zerocheckkva; 135 #endif /* DEBUG */ 136 137 /* 138 * These functions are reserved for uvm(9) internal use and are not 139 * exported in the header file uvm_physseg.h 140 * 141 * Thus they are redefined here. 142 */ 143 void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *); 144 void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t); 145 146 /* returns a pgs array */ 147 struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t); 148 149 /* 150 * local prototypes 151 */ 152 153 static void uvm_pageinsert(struct uvm_object *, struct vm_page *); 154 static void uvm_pageremove(struct uvm_object *, struct vm_page *); 155 156 /* 157 * per-object tree of pages 158 */ 159 160 static signed int 161 uvm_page_compare_nodes(void *ctx, const void *n1, const void *n2) 162 { 163 const struct vm_page *pg1 = n1; 164 const struct vm_page *pg2 = n2; 165 const voff_t a = pg1->offset; 166 const voff_t b = pg2->offset; 167 168 if (a < b) 169 return -1; 170 if (a > b) 171 return 1; 172 return 0; 173 } 174 175 static signed int 176 uvm_page_compare_key(void *ctx, const void *n, const void *key) 177 { 178 const struct vm_page *pg = n; 179 const voff_t a = pg->offset; 180 const voff_t b = *(const voff_t *)key; 181 182 if (a < b) 183 return -1; 184 if (a > b) 185 return 1; 186 return 0; 187 } 188 189 const rb_tree_ops_t uvm_page_tree_ops = { 190 .rbto_compare_nodes = uvm_page_compare_nodes, 191 .rbto_compare_key = uvm_page_compare_key, 192 .rbto_node_offset = offsetof(struct vm_page, rb_node), 193 .rbto_context = NULL 194 }; 195 196 /* 197 * inline functions 198 */ 199 200 /* 201 * uvm_pageinsert: insert a page in the object. 202 * 203 * => caller must lock object 204 * => caller must lock page queues 205 * => call should have already set pg's object and offset pointers 206 * and bumped the version counter 207 */ 208 209 static inline void 210 uvm_pageinsert_list(struct uvm_object *uobj, struct vm_page *pg, 211 struct vm_page *where) 212 { 213 214 KASSERT(uobj == pg->uobject); 215 KASSERT(mutex_owned(uobj->vmobjlock)); 216 KASSERT((pg->flags & PG_TABLED) == 0); 217 KASSERT(where == NULL || (where->flags & PG_TABLED)); 218 KASSERT(where == NULL || (where->uobject == uobj)); 219 220 if (UVM_OBJ_IS_VNODE(uobj)) { 221 if (uobj->uo_npages == 0) { 222 struct vnode *vp = (struct vnode *)uobj; 223 224 vholdl(vp); 225 } 226 if (UVM_OBJ_IS_VTEXT(uobj)) { 227 atomic_inc_uint(&uvmexp.execpages); 228 } else { 229 atomic_inc_uint(&uvmexp.filepages); 230 } 231 } else if (UVM_OBJ_IS_AOBJ(uobj)) { 232 atomic_inc_uint(&uvmexp.anonpages); 233 } 234 235 if (where) 236 TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq.queue); 237 else 238 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); 239 pg->flags |= PG_TABLED; 240 uobj->uo_npages++; 241 } 242 243 244 static inline void 245 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg) 246 { 247 struct vm_page *ret __diagused; 248 249 KASSERT(uobj == pg->uobject); 250 ret = rb_tree_insert_node(&uobj->rb_tree, pg); 251 KASSERT(ret == pg); 252 } 253 254 static inline void 255 uvm_pageinsert(struct uvm_object *uobj, struct vm_page *pg) 256 { 257 258 KDASSERT(uobj != NULL); 259 uvm_pageinsert_tree(uobj, pg); 260 uvm_pageinsert_list(uobj, pg, NULL); 261 } 262 263 /* 264 * uvm_page_remove: remove page from object. 265 * 266 * => caller must lock object 267 * => caller must lock page queues 268 */ 269 270 static inline void 271 uvm_pageremove_list(struct uvm_object *uobj, struct vm_page *pg) 272 { 273 274 KASSERT(uobj == pg->uobject); 275 KASSERT(mutex_owned(uobj->vmobjlock)); 276 KASSERT(pg->flags & PG_TABLED); 277 278 if (UVM_OBJ_IS_VNODE(uobj)) { 279 if (uobj->uo_npages == 1) { 280 struct vnode *vp = (struct vnode *)uobj; 281 282 holdrelel(vp); 283 } 284 if (UVM_OBJ_IS_VTEXT(uobj)) { 285 atomic_dec_uint(&uvmexp.execpages); 286 } else { 287 atomic_dec_uint(&uvmexp.filepages); 288 } 289 } else if (UVM_OBJ_IS_AOBJ(uobj)) { 290 atomic_dec_uint(&uvmexp.anonpages); 291 } 292 293 /* object should be locked */ 294 uobj->uo_npages--; 295 TAILQ_REMOVE(&uobj->memq, pg, listq.queue); 296 pg->flags &= ~PG_TABLED; 297 pg->uobject = NULL; 298 } 299 300 static inline void 301 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg) 302 { 303 304 KASSERT(uobj == pg->uobject); 305 rb_tree_remove_node(&uobj->rb_tree, pg); 306 } 307 308 static inline void 309 uvm_pageremove(struct uvm_object *uobj, struct vm_page *pg) 310 { 311 312 KDASSERT(uobj != NULL); 313 uvm_pageremove_tree(uobj, pg); 314 uvm_pageremove_list(uobj, pg); 315 } 316 317 static void 318 uvm_page_init_buckets(struct pgfreelist *pgfl) 319 { 320 int color, i; 321 322 for (color = 0; color < uvmexp.ncolors; color++) { 323 for (i = 0; i < PGFL_NQUEUES; i++) { 324 LIST_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]); 325 } 326 } 327 } 328 329 /* 330 * uvm_page_init: init the page system. called from uvm_init(). 331 * 332 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 333 */ 334 335 void 336 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) 337 { 338 static struct uvm_cpu boot_cpu; 339 psize_t freepages, pagecount, bucketcount, n; 340 struct pgflbucket *bucketarray, *cpuarray; 341 struct vm_page *pagearray; 342 uvm_physseg_t bank; 343 int lcv; 344 345 KASSERT(ncpu <= 1); 346 CTASSERT(sizeof(pagearray->offset) >= sizeof(struct uvm_cpu *)); 347 348 /* 349 * init the page queues and page queue locks, except the free 350 * list; we allocate that later (with the initial vm_page 351 * structures). 352 */ 353 354 uvm.cpus[0] = &boot_cpu; 355 curcpu()->ci_data.cpu_uvm = &boot_cpu; 356 uvmpdpol_init(); 357 mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE); 358 mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM); 359 360 /* 361 * allocate vm_page structures. 362 */ 363 364 /* 365 * sanity check: 366 * before calling this function the MD code is expected to register 367 * some free RAM with the uvm_page_physload() function. our job 368 * now is to allocate vm_page structures for this memory. 369 */ 370 371 if (uvm_physseg_get_last() == UVM_PHYSSEG_TYPE_INVALID) 372 panic("uvm_page_bootstrap: no memory pre-allocated"); 373 374 /* 375 * first calculate the number of free pages... 376 * 377 * note that we use start/end rather than avail_start/avail_end. 378 * this allows us to allocate extra vm_page structures in case we 379 * want to return some memory to the pool after booting. 380 */ 381 382 freepages = 0; 383 384 for (bank = uvm_physseg_get_first(); 385 uvm_physseg_valid_p(bank) ; 386 bank = uvm_physseg_get_next(bank)) { 387 freepages += (uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank)); 388 } 389 390 /* 391 * Let MD code initialize the number of colors, or default 392 * to 1 color if MD code doesn't care. 393 */ 394 if (uvmexp.ncolors == 0) 395 uvmexp.ncolors = 1; 396 uvmexp.colormask = uvmexp.ncolors - 1; 397 KASSERT((uvmexp.colormask & uvmexp.ncolors) == 0); 398 399 /* 400 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 401 * use. for each page of memory we use we need a vm_page structure. 402 * thus, the total number of pages we can use is the total size of 403 * the memory divided by the PAGE_SIZE plus the size of the vm_page 404 * structure. we add one to freepages as a fudge factor to avoid 405 * truncation errors (since we can only allocate in terms of whole 406 * pages). 407 */ 408 409 bucketcount = uvmexp.ncolors * VM_NFREELIST; 410 pagecount = ((freepages + 1) << PAGE_SHIFT) / 411 (PAGE_SIZE + sizeof(struct vm_page)); 412 413 bucketarray = (void *)uvm_pageboot_alloc((bucketcount * 414 sizeof(struct pgflbucket) * 2) + (pagecount * 415 sizeof(struct vm_page))); 416 cpuarray = bucketarray + bucketcount; 417 pagearray = (struct vm_page *)(bucketarray + bucketcount * 2); 418 419 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 420 uvm.page_free[lcv].pgfl_buckets = 421 (bucketarray + (lcv * uvmexp.ncolors)); 422 uvm_page_init_buckets(&uvm.page_free[lcv]); 423 uvm.cpus[0]->page_free[lcv].pgfl_buckets = 424 (cpuarray + (lcv * uvmexp.ncolors)); 425 uvm_page_init_buckets(&uvm.cpus[0]->page_free[lcv]); 426 } 427 memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 428 429 /* 430 * init the vm_page structures and put them in the correct place. 431 */ 432 /* First init the extent */ 433 434 for (bank = uvm_physseg_get_first(), 435 uvm_physseg_seg_chomp_slab(bank, pagearray, pagecount); 436 uvm_physseg_valid_p(bank); 437 bank = uvm_physseg_get_next(bank)) { 438 439 n = uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank); 440 uvm_physseg_seg_alloc_from_slab(bank, n); 441 uvm_physseg_init_seg(bank, pagearray); 442 443 /* set up page array pointers */ 444 pagearray += n; 445 pagecount -= n; 446 } 447 448 /* 449 * pass up the values of virtual_space_start and 450 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 451 * layers of the VM. 452 */ 453 454 *kvm_startp = round_page(virtual_space_start); 455 *kvm_endp = trunc_page(virtual_space_end); 456 #ifdef DEBUG 457 /* 458 * steal kva for uvm_pagezerocheck(). 459 */ 460 uvm_zerocheckkva = *kvm_startp; 461 *kvm_startp += PAGE_SIZE; 462 #endif /* DEBUG */ 463 464 /* 465 * init various thresholds. 466 */ 467 468 uvmexp.reserve_pagedaemon = 1; 469 uvmexp.reserve_kernel = vm_page_reserve_kernel; 470 471 /* 472 * determine if we should zero pages in the idle loop. 473 */ 474 475 uvm.cpus[0]->page_idle_zero = vm_page_zero_enable; 476 477 /* 478 * done! 479 */ 480 481 uvm.page_init_done = true; 482 } 483 484 /* 485 * uvm_setpagesize: set the page size 486 * 487 * => sets page_shift and page_mask from uvmexp.pagesize. 488 */ 489 490 void 491 uvm_setpagesize(void) 492 { 493 494 /* 495 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE 496 * to be a constant (indicated by being a non-zero value). 497 */ 498 if (uvmexp.pagesize == 0) { 499 if (PAGE_SIZE == 0) 500 panic("uvm_setpagesize: uvmexp.pagesize not set"); 501 uvmexp.pagesize = PAGE_SIZE; 502 } 503 uvmexp.pagemask = uvmexp.pagesize - 1; 504 if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 505 panic("uvm_setpagesize: page size %u (%#x) not a power of two", 506 uvmexp.pagesize, uvmexp.pagesize); 507 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 508 if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 509 break; 510 } 511 512 /* 513 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 514 */ 515 516 vaddr_t 517 uvm_pageboot_alloc(vsize_t size) 518 { 519 static bool initialized = false; 520 vaddr_t addr; 521 #if !defined(PMAP_STEAL_MEMORY) 522 vaddr_t vaddr; 523 paddr_t paddr; 524 #endif 525 526 /* 527 * on first call to this function, initialize ourselves. 528 */ 529 if (initialized == false) { 530 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 531 532 /* round it the way we like it */ 533 virtual_space_start = round_page(virtual_space_start); 534 virtual_space_end = trunc_page(virtual_space_end); 535 536 initialized = true; 537 } 538 539 /* round to page size */ 540 size = round_page(size); 541 uvmexp.bootpages += atop(size); 542 543 #if defined(PMAP_STEAL_MEMORY) 544 545 /* 546 * defer bootstrap allocation to MD code (it may want to allocate 547 * from a direct-mapped segment). pmap_steal_memory should adjust 548 * virtual_space_start/virtual_space_end if necessary. 549 */ 550 551 addr = pmap_steal_memory(size, &virtual_space_start, 552 &virtual_space_end); 553 554 return(addr); 555 556 #else /* !PMAP_STEAL_MEMORY */ 557 558 /* 559 * allocate virtual memory for this request 560 */ 561 if (virtual_space_start == virtual_space_end || 562 (virtual_space_end - virtual_space_start) < size) 563 panic("uvm_pageboot_alloc: out of virtual space"); 564 565 addr = virtual_space_start; 566 567 #ifdef PMAP_GROWKERNEL 568 /* 569 * If the kernel pmap can't map the requested space, 570 * then allocate more resources for it. 571 */ 572 if (uvm_maxkaddr < (addr + size)) { 573 uvm_maxkaddr = pmap_growkernel(addr + size); 574 if (uvm_maxkaddr < (addr + size)) 575 panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 576 } 577 #endif 578 579 virtual_space_start += size; 580 581 /* 582 * allocate and mapin physical pages to back new virtual pages 583 */ 584 585 for (vaddr = round_page(addr) ; vaddr < addr + size ; 586 vaddr += PAGE_SIZE) { 587 588 if (!uvm_page_physget(&paddr)) 589 panic("uvm_pageboot_alloc: out of memory"); 590 591 /* 592 * Note this memory is no longer managed, so using 593 * pmap_kenter is safe. 594 */ 595 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 596 } 597 pmap_update(pmap_kernel()); 598 return(addr); 599 #endif /* PMAP_STEAL_MEMORY */ 600 } 601 602 #if !defined(PMAP_STEAL_MEMORY) 603 /* 604 * uvm_page_physget: "steal" one page from the vm_physmem structure. 605 * 606 * => attempt to allocate it off the end of a segment in which the "avail" 607 * values match the start/end values. if we can't do that, then we 608 * will advance both values (making them equal, and removing some 609 * vm_page structures from the non-avail area). 610 * => return false if out of memory. 611 */ 612 613 /* subroutine: try to allocate from memory chunks on the specified freelist */ 614 static bool uvm_page_physget_freelist(paddr_t *, int); 615 616 static bool 617 uvm_page_physget_freelist(paddr_t *paddrp, int freelist) 618 { 619 uvm_physseg_t lcv; 620 621 /* pass 1: try allocating from a matching end */ 622 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 623 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 624 #else 625 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 626 #endif 627 { 628 if (uvm.page_init_done == true) 629 panic("uvm_page_physget: called _after_ bootstrap"); 630 631 /* Try to match at front or back on unused segment */ 632 if (uvm_page_physunload(lcv, freelist, paddrp) == false) { 633 if (paddrp == NULL) /* freelist fail, try next */ 634 continue; 635 } else 636 return true; 637 } 638 639 /* pass2: forget about matching ends, just allocate something */ 640 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 641 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 642 #else 643 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 644 #endif 645 { 646 /* Try the front regardless. */ 647 if (uvm_page_physunload_force(lcv, freelist, paddrp) == false) { 648 if (paddrp == NULL) /* freelist fail, try next */ 649 continue; 650 } else 651 return true; 652 } 653 return false; 654 } 655 656 bool 657 uvm_page_physget(paddr_t *paddrp) 658 { 659 int i; 660 661 /* try in the order of freelist preference */ 662 for (i = 0; i < VM_NFREELIST; i++) 663 if (uvm_page_physget_freelist(paddrp, i) == true) 664 return (true); 665 return (false); 666 } 667 #endif /* PMAP_STEAL_MEMORY */ 668 669 /* 670 * PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages 671 * back from an I/O mapping (ugh!). used in some MD code as well. 672 */ 673 struct vm_page * 674 uvm_phys_to_vm_page(paddr_t pa) 675 { 676 paddr_t pf = atop(pa); 677 paddr_t off; 678 uvm_physseg_t upm; 679 680 upm = uvm_physseg_find(pf, &off); 681 if (upm != UVM_PHYSSEG_TYPE_INVALID) 682 return uvm_physseg_get_pg(upm, off); 683 return(NULL); 684 } 685 686 paddr_t 687 uvm_vm_page_to_phys(const struct vm_page *pg) 688 { 689 690 return pg->phys_addr; 691 } 692 693 /* 694 * uvm_page_recolor: Recolor the pages if the new bucket count is 695 * larger than the old one. 696 */ 697 698 void 699 uvm_page_recolor(int newncolors) 700 { 701 struct pgflbucket *bucketarray, *cpuarray, *oldbucketarray; 702 struct pgfreelist gpgfl, pgfl; 703 struct vm_page *pg; 704 vsize_t bucketcount; 705 size_t bucketmemsize, oldbucketmemsize; 706 int color, i, ocolors; 707 int lcv; 708 struct uvm_cpu *ucpu; 709 710 KASSERT(((newncolors - 1) & newncolors) == 0); 711 712 if (newncolors <= uvmexp.ncolors) 713 return; 714 715 if (uvm.page_init_done == false) { 716 uvmexp.ncolors = newncolors; 717 return; 718 } 719 720 bucketcount = newncolors * VM_NFREELIST; 721 bucketmemsize = bucketcount * sizeof(struct pgflbucket) * 2; 722 bucketarray = kmem_alloc(bucketmemsize, KM_SLEEP); 723 cpuarray = bucketarray + bucketcount; 724 725 mutex_spin_enter(&uvm_fpageqlock); 726 727 /* Make sure we should still do this. */ 728 if (newncolors <= uvmexp.ncolors) { 729 mutex_spin_exit(&uvm_fpageqlock); 730 kmem_free(bucketarray, bucketmemsize); 731 return; 732 } 733 734 oldbucketarray = uvm.page_free[0].pgfl_buckets; 735 ocolors = uvmexp.ncolors; 736 737 uvmexp.ncolors = newncolors; 738 uvmexp.colormask = uvmexp.ncolors - 1; 739 740 ucpu = curcpu()->ci_data.cpu_uvm; 741 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 742 gpgfl.pgfl_buckets = (bucketarray + (lcv * newncolors)); 743 pgfl.pgfl_buckets = (cpuarray + (lcv * uvmexp.ncolors)); 744 uvm_page_init_buckets(&gpgfl); 745 uvm_page_init_buckets(&pgfl); 746 for (color = 0; color < ocolors; color++) { 747 for (i = 0; i < PGFL_NQUEUES; i++) { 748 while ((pg = LIST_FIRST(&uvm.page_free[ 749 lcv].pgfl_buckets[color].pgfl_queues[i])) 750 != NULL) { 751 LIST_REMOVE(pg, pageq.list); /* global */ 752 LIST_REMOVE(pg, listq.list); /* cpu */ 753 LIST_INSERT_HEAD(&gpgfl.pgfl_buckets[ 754 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[ 755 i], pg, pageq.list); 756 LIST_INSERT_HEAD(&pgfl.pgfl_buckets[ 757 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[ 758 i], pg, listq.list); 759 } 760 } 761 } 762 uvm.page_free[lcv].pgfl_buckets = gpgfl.pgfl_buckets; 763 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets; 764 } 765 766 oldbucketmemsize = recolored_pages_memsize; 767 768 recolored_pages_memsize = bucketmemsize; 769 mutex_spin_exit(&uvm_fpageqlock); 770 771 if (oldbucketmemsize) { 772 kmem_free(oldbucketarray, oldbucketmemsize); 773 } 774 775 /* 776 * this calls uvm_km_alloc() which may want to hold 777 * uvm_fpageqlock. 778 */ 779 uvm_pager_realloc_emerg(); 780 } 781 782 /* 783 * uvm_cpu_attach: initialize per-CPU data structures. 784 */ 785 786 void 787 uvm_cpu_attach(struct cpu_info *ci) 788 { 789 struct pgflbucket *bucketarray; 790 struct pgfreelist pgfl; 791 struct uvm_cpu *ucpu; 792 vsize_t bucketcount; 793 int lcv; 794 795 if (CPU_IS_PRIMARY(ci)) { 796 /* Already done in uvm_page_init(). */ 797 goto attachrnd; 798 } 799 800 /* Add more reserve pages for this CPU. */ 801 uvmexp.reserve_kernel += vm_page_reserve_kernel; 802 803 /* Configure this CPU's free lists. */ 804 bucketcount = uvmexp.ncolors * VM_NFREELIST; 805 bucketarray = kmem_alloc(bucketcount * sizeof(struct pgflbucket), 806 KM_SLEEP); 807 ucpu = kmem_zalloc(sizeof(*ucpu), KM_SLEEP); 808 uvm.cpus[cpu_index(ci)] = ucpu; 809 ci->ci_data.cpu_uvm = ucpu; 810 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 811 pgfl.pgfl_buckets = (bucketarray + (lcv * uvmexp.ncolors)); 812 uvm_page_init_buckets(&pgfl); 813 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets; 814 } 815 816 attachrnd: 817 /* 818 * Attach RNG source for this CPU's VM events 819 */ 820 rnd_attach_source(&uvm.cpus[cpu_index(ci)]->rs, 821 ci->ci_data.cpu_name, RND_TYPE_VM, 822 RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE| 823 RND_FLAG_ESTIMATE_VALUE); 824 825 } 826 827 /* 828 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat 829 */ 830 831 static struct vm_page * 832 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int flist, int try1, int try2, 833 int *trycolorp) 834 { 835 struct pgflist *freeq; 836 struct vm_page *pg; 837 int color, trycolor = *trycolorp; 838 struct pgfreelist *gpgfl, *pgfl; 839 840 KASSERT(mutex_owned(&uvm_fpageqlock)); 841 842 color = trycolor; 843 pgfl = &ucpu->page_free[flist]; 844 gpgfl = &uvm.page_free[flist]; 845 do { 846 /* cpu, try1 */ 847 if ((pg = LIST_FIRST((freeq = 848 &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) { 849 KASSERT(pg->pqflags & PQ_FREE); 850 KASSERT(try1 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); 851 KASSERT(try1 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); 852 KASSERT(ucpu == VM_FREE_PAGE_TO_CPU(pg)); 853 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--; 854 uvmexp.cpuhit++; 855 goto gotit; 856 } 857 /* global, try1 */ 858 if ((pg = LIST_FIRST((freeq = 859 &gpgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) { 860 KASSERT(pg->pqflags & PQ_FREE); 861 KASSERT(try1 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); 862 KASSERT(try1 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); 863 KASSERT(ucpu != VM_FREE_PAGE_TO_CPU(pg)); 864 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--; 865 uvmexp.cpumiss++; 866 goto gotit; 867 } 868 /* cpu, try2 */ 869 if ((pg = LIST_FIRST((freeq = 870 &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) { 871 KASSERT(pg->pqflags & PQ_FREE); 872 KASSERT(try2 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); 873 KASSERT(try2 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); 874 KASSERT(ucpu == VM_FREE_PAGE_TO_CPU(pg)); 875 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--; 876 uvmexp.cpuhit++; 877 goto gotit; 878 } 879 /* global, try2 */ 880 if ((pg = LIST_FIRST((freeq = 881 &gpgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) { 882 KASSERT(pg->pqflags & PQ_FREE); 883 KASSERT(try2 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); 884 KASSERT(try2 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); 885 KASSERT(ucpu != VM_FREE_PAGE_TO_CPU(pg)); 886 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--; 887 uvmexp.cpumiss++; 888 goto gotit; 889 } 890 color = (color + 1) & uvmexp.colormask; 891 } while (color != trycolor); 892 893 return (NULL); 894 895 gotit: 896 LIST_REMOVE(pg, pageq.list); /* global list */ 897 LIST_REMOVE(pg, listq.list); /* per-cpu list */ 898 uvmexp.free--; 899 900 /* update zero'd page count */ 901 if (pg->flags & PG_ZERO) 902 uvmexp.zeropages--; 903 904 if (color == trycolor) 905 uvmexp.colorhit++; 906 else { 907 uvmexp.colormiss++; 908 *trycolorp = color; 909 } 910 911 return (pg); 912 } 913 914 /* 915 * uvm_pagealloc_strat: allocate vm_page from a particular free list. 916 * 917 * => return null if no pages free 918 * => wake up pagedaemon if number of free pages drops below low water mark 919 * => if obj != NULL, obj must be locked (to put in obj's tree) 920 * => if anon != NULL, anon must be locked (to put in anon) 921 * => only one of obj or anon can be non-null 922 * => caller must activate/deactivate page if it is not wired. 923 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 924 * => policy decision: it is more important to pull a page off of the 925 * appropriate priority free list than it is to get a zero'd or 926 * unknown contents page. This is because we live with the 927 * consequences of a bad free list decision for the entire 928 * lifetime of the page, e.g. if the page comes from memory that 929 * is slower to access. 930 */ 931 932 struct vm_page * 933 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, 934 int flags, int strat, int free_list) 935 { 936 int try1, try2, zeroit = 0, color; 937 int lcv; 938 struct uvm_cpu *ucpu; 939 struct vm_page *pg; 940 lwp_t *l; 941 942 KASSERT(obj == NULL || anon == NULL); 943 KASSERT(anon == NULL || (flags & UVM_FLAG_COLORMATCH) || off == 0); 944 KASSERT(off == trunc_page(off)); 945 KASSERT(obj == NULL || mutex_owned(obj->vmobjlock)); 946 KASSERT(anon == NULL || anon->an_lock == NULL || 947 mutex_owned(anon->an_lock)); 948 949 mutex_spin_enter(&uvm_fpageqlock); 950 951 /* 952 * This implements a global round-robin page coloring 953 * algorithm. 954 */ 955 956 ucpu = curcpu()->ci_data.cpu_uvm; 957 if (flags & UVM_FLAG_COLORMATCH) { 958 color = atop(off) & uvmexp.colormask; 959 } else { 960 color = ucpu->page_free_nextcolor; 961 } 962 963 /* 964 * check to see if we need to generate some free pages waking 965 * the pagedaemon. 966 */ 967 968 uvm_kick_pdaemon(); 969 970 /* 971 * fail if any of these conditions is true: 972 * [1] there really are no free pages, or 973 * [2] only kernel "reserved" pages remain and 974 * reserved pages have not been requested. 975 * [3] only pagedaemon "reserved" pages remain and 976 * the requestor isn't the pagedaemon. 977 * we make kernel reserve pages available if called by a 978 * kernel thread or a realtime thread. 979 */ 980 l = curlwp; 981 if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) { 982 flags |= UVM_PGA_USERESERVE; 983 } 984 if ((uvmexp.free <= uvmexp.reserve_kernel && 985 (flags & UVM_PGA_USERESERVE) == 0) || 986 (uvmexp.free <= uvmexp.reserve_pagedaemon && 987 curlwp != uvm.pagedaemon_lwp)) 988 goto fail; 989 990 #if PGFL_NQUEUES != 2 991 #error uvm_pagealloc_strat needs to be updated 992 #endif 993 994 /* 995 * If we want a zero'd page, try the ZEROS queue first, otherwise 996 * we try the UNKNOWN queue first. 997 */ 998 if (flags & UVM_PGA_ZERO) { 999 try1 = PGFL_ZEROS; 1000 try2 = PGFL_UNKNOWN; 1001 } else { 1002 try1 = PGFL_UNKNOWN; 1003 try2 = PGFL_ZEROS; 1004 } 1005 1006 again: 1007 switch (strat) { 1008 case UVM_PGA_STRAT_NORMAL: 1009 /* Check freelists: descending priority (ascending id) order */ 1010 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1011 pg = uvm_pagealloc_pgfl(ucpu, lcv, 1012 try1, try2, &color); 1013 if (pg != NULL) 1014 goto gotit; 1015 } 1016 1017 /* No pages free! */ 1018 goto fail; 1019 1020 case UVM_PGA_STRAT_ONLY: 1021 case UVM_PGA_STRAT_FALLBACK: 1022 /* Attempt to allocate from the specified free list. */ 1023 KASSERT(free_list >= 0 && free_list < VM_NFREELIST); 1024 pg = uvm_pagealloc_pgfl(ucpu, free_list, 1025 try1, try2, &color); 1026 if (pg != NULL) 1027 goto gotit; 1028 1029 /* Fall back, if possible. */ 1030 if (strat == UVM_PGA_STRAT_FALLBACK) { 1031 strat = UVM_PGA_STRAT_NORMAL; 1032 goto again; 1033 } 1034 1035 /* No pages free! */ 1036 goto fail; 1037 1038 default: 1039 panic("uvm_pagealloc_strat: bad strat %d", strat); 1040 /* NOTREACHED */ 1041 } 1042 1043 gotit: 1044 /* 1045 * We now know which color we actually allocated from; set 1046 * the next color accordingly. 1047 */ 1048 1049 ucpu->page_free_nextcolor = (color + 1) & uvmexp.colormask; 1050 1051 /* 1052 * update allocation statistics and remember if we have to 1053 * zero the page 1054 */ 1055 1056 if (flags & UVM_PGA_ZERO) { 1057 if (pg->flags & PG_ZERO) { 1058 uvmexp.pga_zerohit++; 1059 zeroit = 0; 1060 } else { 1061 uvmexp.pga_zeromiss++; 1062 zeroit = 1; 1063 } 1064 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) { 1065 ucpu->page_idle_zero = vm_page_zero_enable; 1066 } 1067 } 1068 KASSERT(pg->pqflags == PQ_FREE); 1069 1070 pg->offset = off; 1071 pg->uobject = obj; 1072 pg->uanon = anon; 1073 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE; 1074 if (anon) { 1075 anon->an_page = pg; 1076 pg->pqflags = PQ_ANON; 1077 atomic_inc_uint(&uvmexp.anonpages); 1078 } else { 1079 if (obj) { 1080 uvm_pageinsert(obj, pg); 1081 } 1082 pg->pqflags = 0; 1083 } 1084 mutex_spin_exit(&uvm_fpageqlock); 1085 1086 #if defined(UVM_PAGE_TRKOWN) 1087 pg->owner_tag = NULL; 1088 #endif 1089 UVM_PAGE_OWN(pg, "new alloc"); 1090 1091 if (flags & UVM_PGA_ZERO) { 1092 /* 1093 * A zero'd page is not clean. If we got a page not already 1094 * zero'd, then we have to zero it ourselves. 1095 */ 1096 pg->flags &= ~PG_CLEAN; 1097 if (zeroit) 1098 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1099 } 1100 1101 return(pg); 1102 1103 fail: 1104 mutex_spin_exit(&uvm_fpageqlock); 1105 return (NULL); 1106 } 1107 1108 /* 1109 * uvm_pagereplace: replace a page with another 1110 * 1111 * => object must be locked 1112 */ 1113 1114 void 1115 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg) 1116 { 1117 struct uvm_object *uobj = oldpg->uobject; 1118 1119 KASSERT((oldpg->flags & PG_TABLED) != 0); 1120 KASSERT(uobj != NULL); 1121 KASSERT((newpg->flags & PG_TABLED) == 0); 1122 KASSERT(newpg->uobject == NULL); 1123 KASSERT(mutex_owned(uobj->vmobjlock)); 1124 1125 newpg->uobject = uobj; 1126 newpg->offset = oldpg->offset; 1127 1128 uvm_pageremove_tree(uobj, oldpg); 1129 uvm_pageinsert_tree(uobj, newpg); 1130 uvm_pageinsert_list(uobj, newpg, oldpg); 1131 uvm_pageremove_list(uobj, oldpg); 1132 } 1133 1134 /* 1135 * uvm_pagerealloc: reallocate a page from one object to another 1136 * 1137 * => both objects must be locked 1138 */ 1139 1140 void 1141 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) 1142 { 1143 /* 1144 * remove it from the old object 1145 */ 1146 1147 if (pg->uobject) { 1148 uvm_pageremove(pg->uobject, pg); 1149 } 1150 1151 /* 1152 * put it in the new object 1153 */ 1154 1155 if (newobj) { 1156 pg->uobject = newobj; 1157 pg->offset = newoff; 1158 uvm_pageinsert(newobj, pg); 1159 } 1160 } 1161 1162 #ifdef DEBUG 1163 /* 1164 * check if page is zero-filled 1165 * 1166 * - called with free page queue lock held. 1167 */ 1168 void 1169 uvm_pagezerocheck(struct vm_page *pg) 1170 { 1171 int *p, *ep; 1172 1173 KASSERT(uvm_zerocheckkva != 0); 1174 KASSERT(mutex_owned(&uvm_fpageqlock)); 1175 1176 /* 1177 * XXX assuming pmap_kenter_pa and pmap_kremove never call 1178 * uvm page allocator. 1179 * 1180 * it might be better to have "CPU-local temporary map" pmap interface. 1181 */ 1182 pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ, 0); 1183 p = (int *)uvm_zerocheckkva; 1184 ep = (int *)((char *)p + PAGE_SIZE); 1185 pmap_update(pmap_kernel()); 1186 while (p < ep) { 1187 if (*p != 0) 1188 panic("PG_ZERO page isn't zero-filled"); 1189 p++; 1190 } 1191 pmap_kremove(uvm_zerocheckkva, PAGE_SIZE); 1192 /* 1193 * pmap_update() is not necessary here because no one except us 1194 * uses this VA. 1195 */ 1196 } 1197 #endif /* DEBUG */ 1198 1199 /* 1200 * uvm_pagefree: free page 1201 * 1202 * => erase page's identity (i.e. remove from object) 1203 * => put page on free list 1204 * => caller must lock owning object (either anon or uvm_object) 1205 * => caller must lock page queues 1206 * => assumes all valid mappings of pg are gone 1207 */ 1208 1209 void 1210 uvm_pagefree(struct vm_page *pg) 1211 { 1212 struct pgflist *pgfl; 1213 struct uvm_cpu *ucpu; 1214 int index, color, queue; 1215 bool iszero; 1216 1217 #ifdef DEBUG 1218 if (pg->uobject == (void *)0xdeadbeef && 1219 pg->uanon == (void *)0xdeadbeef) { 1220 panic("uvm_pagefree: freeing free page %p", pg); 1221 } 1222 #endif /* DEBUG */ 1223 1224 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1225 KASSERT(!(pg->pqflags & PQ_FREE)); 1226 //KASSERT(mutex_owned(&uvm_pageqlock) || !uvmpdpol_pageisqueued_p(pg)); 1227 KASSERT(pg->uobject == NULL || mutex_owned(pg->uobject->vmobjlock)); 1228 KASSERT(pg->uobject != NULL || pg->uanon == NULL || 1229 mutex_owned(pg->uanon->an_lock)); 1230 1231 /* 1232 * if the page is loaned, resolve the loan instead of freeing. 1233 */ 1234 1235 if (pg->loan_count) { 1236 KASSERT(pg->wire_count == 0); 1237 1238 /* 1239 * if the page is owned by an anon then we just want to 1240 * drop anon ownership. the kernel will free the page when 1241 * it is done with it. if the page is owned by an object, 1242 * remove it from the object and mark it dirty for the benefit 1243 * of possible anon owners. 1244 * 1245 * regardless of previous ownership, wakeup any waiters, 1246 * unbusy the page, and we're done. 1247 */ 1248 1249 if (pg->uobject != NULL) { 1250 uvm_pageremove(pg->uobject, pg); 1251 pg->flags &= ~PG_CLEAN; 1252 } else if (pg->uanon != NULL) { 1253 if ((pg->pqflags & PQ_ANON) == 0) { 1254 pg->loan_count--; 1255 } else { 1256 pg->pqflags &= ~PQ_ANON; 1257 atomic_dec_uint(&uvmexp.anonpages); 1258 } 1259 pg->uanon->an_page = NULL; 1260 pg->uanon = NULL; 1261 } 1262 if (pg->flags & PG_WANTED) { 1263 wakeup(pg); 1264 } 1265 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1); 1266 #ifdef UVM_PAGE_TRKOWN 1267 pg->owner_tag = NULL; 1268 #endif 1269 if (pg->loan_count) { 1270 KASSERT(pg->uobject == NULL); 1271 if (pg->uanon == NULL) { 1272 KASSERT(mutex_owned(&uvm_pageqlock)); 1273 uvm_pagedequeue(pg); 1274 } 1275 return; 1276 } 1277 } 1278 1279 /* 1280 * remove page from its object or anon. 1281 */ 1282 1283 if (pg->uobject != NULL) { 1284 uvm_pageremove(pg->uobject, pg); 1285 } else if (pg->uanon != NULL) { 1286 pg->uanon->an_page = NULL; 1287 atomic_dec_uint(&uvmexp.anonpages); 1288 } 1289 1290 /* 1291 * now remove the page from the queues. 1292 */ 1293 if (uvmpdpol_pageisqueued_p(pg)) { 1294 KASSERT(mutex_owned(&uvm_pageqlock)); 1295 uvm_pagedequeue(pg); 1296 } 1297 1298 /* 1299 * if the page was wired, unwire it now. 1300 */ 1301 1302 if (pg->wire_count) { 1303 pg->wire_count = 0; 1304 uvmexp.wired--; 1305 } 1306 1307 /* 1308 * and put on free queue 1309 */ 1310 1311 iszero = (pg->flags & PG_ZERO); 1312 index = uvm_page_lookup_freelist(pg); 1313 color = VM_PGCOLOR_BUCKET(pg); 1314 queue = (iszero ? PGFL_ZEROS : PGFL_UNKNOWN); 1315 1316 #ifdef DEBUG 1317 pg->uobject = (void *)0xdeadbeef; 1318 pg->uanon = (void *)0xdeadbeef; 1319 #endif 1320 1321 mutex_spin_enter(&uvm_fpageqlock); 1322 pg->pqflags = PQ_FREE; 1323 1324 #ifdef DEBUG 1325 if (iszero) 1326 uvm_pagezerocheck(pg); 1327 #endif /* DEBUG */ 1328 1329 1330 /* global list */ 1331 pgfl = &uvm.page_free[index].pgfl_buckets[color].pgfl_queues[queue]; 1332 LIST_INSERT_HEAD(pgfl, pg, pageq.list); 1333 uvmexp.free++; 1334 if (iszero) { 1335 uvmexp.zeropages++; 1336 } 1337 1338 /* per-cpu list */ 1339 ucpu = curcpu()->ci_data.cpu_uvm; 1340 pg->offset = (uintptr_t)ucpu; 1341 pgfl = &ucpu->page_free[index].pgfl_buckets[color].pgfl_queues[queue]; 1342 LIST_INSERT_HEAD(pgfl, pg, listq.list); 1343 ucpu->pages[queue]++; 1344 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) { 1345 ucpu->page_idle_zero = vm_page_zero_enable; 1346 } 1347 1348 mutex_spin_exit(&uvm_fpageqlock); 1349 } 1350 1351 /* 1352 * uvm_page_unbusy: unbusy an array of pages. 1353 * 1354 * => pages must either all belong to the same object, or all belong to anons. 1355 * => if pages are object-owned, object must be locked. 1356 * => if pages are anon-owned, anons must be locked. 1357 * => caller must lock page queues if pages may be released. 1358 * => caller must make sure that anon-owned pages are not PG_RELEASED. 1359 */ 1360 1361 void 1362 uvm_page_unbusy(struct vm_page **pgs, int npgs) 1363 { 1364 struct vm_page *pg; 1365 int i; 1366 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist); 1367 1368 for (i = 0; i < npgs; i++) { 1369 pg = pgs[i]; 1370 if (pg == NULL || pg == PGO_DONTCARE) { 1371 continue; 1372 } 1373 1374 KASSERT(uvm_page_locked_p(pg)); 1375 KASSERT(pg->flags & PG_BUSY); 1376 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1377 if (pg->flags & PG_WANTED) { 1378 wakeup(pg); 1379 } 1380 if (pg->flags & PG_RELEASED) { 1381 UVMHIST_LOG(ubchist, "releasing pg %#jx", 1382 (uintptr_t)pg, 0, 0, 0); 1383 KASSERT(pg->uobject != NULL || 1384 (pg->uanon != NULL && pg->uanon->an_ref > 0)); 1385 pg->flags &= ~PG_RELEASED; 1386 uvm_pagefree(pg); 1387 } else { 1388 UVMHIST_LOG(ubchist, "unbusying pg %#jx", 1389 (uintptr_t)pg, 0, 0, 0); 1390 KASSERT((pg->flags & PG_FAKE) == 0); 1391 pg->flags &= ~(PG_WANTED|PG_BUSY); 1392 UVM_PAGE_OWN(pg, NULL); 1393 } 1394 } 1395 } 1396 1397 #if defined(UVM_PAGE_TRKOWN) 1398 /* 1399 * uvm_page_own: set or release page ownership 1400 * 1401 * => this is a debugging function that keeps track of who sets PG_BUSY 1402 * and where they do it. it can be used to track down problems 1403 * such a process setting "PG_BUSY" and never releasing it. 1404 * => page's object [if any] must be locked 1405 * => if "tag" is NULL then we are releasing page ownership 1406 */ 1407 void 1408 uvm_page_own(struct vm_page *pg, const char *tag) 1409 { 1410 1411 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0); 1412 KASSERT((pg->flags & PG_WANTED) == 0); 1413 KASSERT(uvm_page_locked_p(pg)); 1414 1415 /* gain ownership? */ 1416 if (tag) { 1417 KASSERT((pg->flags & PG_BUSY) != 0); 1418 if (pg->owner_tag) { 1419 printf("uvm_page_own: page %p already owned " 1420 "by proc %d [%s]\n", pg, 1421 pg->owner, pg->owner_tag); 1422 panic("uvm_page_own"); 1423 } 1424 pg->owner = curproc->p_pid; 1425 pg->lowner = curlwp->l_lid; 1426 pg->owner_tag = tag; 1427 return; 1428 } 1429 1430 /* drop ownership */ 1431 KASSERT((pg->flags & PG_BUSY) == 0); 1432 if (pg->owner_tag == NULL) { 1433 printf("uvm_page_own: dropping ownership of an non-owned " 1434 "page (%p)\n", pg); 1435 panic("uvm_page_own"); 1436 } 1437 if (!uvmpdpol_pageisqueued_p(pg)) { 1438 KASSERT((pg->uanon == NULL && pg->uobject == NULL) || 1439 pg->wire_count > 0); 1440 } else { 1441 KASSERT(pg->wire_count == 0); 1442 } 1443 pg->owner_tag = NULL; 1444 } 1445 #endif 1446 1447 /* 1448 * uvm_pageidlezero: zero free pages while the system is idle. 1449 * 1450 * => try to complete one color bucket at a time, to reduce our impact 1451 * on the CPU cache. 1452 * => we loop until we either reach the target or there is a lwp ready 1453 * to run, or MD code detects a reason to break early. 1454 */ 1455 void 1456 uvm_pageidlezero(void) 1457 { 1458 struct vm_page *pg; 1459 struct pgfreelist *pgfl, *gpgfl; 1460 struct uvm_cpu *ucpu; 1461 int free_list, firstbucket, nextbucket; 1462 bool lcont = false; 1463 1464 ucpu = curcpu()->ci_data.cpu_uvm; 1465 if (!ucpu->page_idle_zero || 1466 ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) { 1467 ucpu->page_idle_zero = false; 1468 return; 1469 } 1470 if (!mutex_tryenter(&uvm_fpageqlock)) { 1471 /* Contention: let other CPUs to use the lock. */ 1472 return; 1473 } 1474 firstbucket = ucpu->page_free_nextcolor; 1475 nextbucket = firstbucket; 1476 do { 1477 for (free_list = 0; free_list < VM_NFREELIST; free_list++) { 1478 if (sched_curcpu_runnable_p()) { 1479 goto quit; 1480 } 1481 pgfl = &ucpu->page_free[free_list]; 1482 gpgfl = &uvm.page_free[free_list]; 1483 while ((pg = LIST_FIRST(&pgfl->pgfl_buckets[ 1484 nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) { 1485 if (lcont || sched_curcpu_runnable_p()) { 1486 goto quit; 1487 } 1488 LIST_REMOVE(pg, pageq.list); /* global list */ 1489 LIST_REMOVE(pg, listq.list); /* per-cpu list */ 1490 ucpu->pages[PGFL_UNKNOWN]--; 1491 uvmexp.free--; 1492 KASSERT(pg->pqflags == PQ_FREE); 1493 pg->pqflags = 0; 1494 mutex_spin_exit(&uvm_fpageqlock); 1495 #ifdef PMAP_PAGEIDLEZERO 1496 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) { 1497 1498 /* 1499 * The machine-dependent code detected 1500 * some reason for us to abort zeroing 1501 * pages, probably because there is a 1502 * process now ready to run. 1503 */ 1504 1505 mutex_spin_enter(&uvm_fpageqlock); 1506 pg->pqflags = PQ_FREE; 1507 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[ 1508 nextbucket].pgfl_queues[ 1509 PGFL_UNKNOWN], pg, pageq.list); 1510 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[ 1511 nextbucket].pgfl_queues[ 1512 PGFL_UNKNOWN], pg, listq.list); 1513 ucpu->pages[PGFL_UNKNOWN]++; 1514 uvmexp.free++; 1515 uvmexp.zeroaborts++; 1516 goto quit; 1517 } 1518 #else 1519 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1520 #endif /* PMAP_PAGEIDLEZERO */ 1521 pg->flags |= PG_ZERO; 1522 1523 if (!mutex_tryenter(&uvm_fpageqlock)) { 1524 lcont = true; 1525 mutex_spin_enter(&uvm_fpageqlock); 1526 } else { 1527 lcont = false; 1528 } 1529 pg->pqflags = PQ_FREE; 1530 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[ 1531 nextbucket].pgfl_queues[PGFL_ZEROS], 1532 pg, pageq.list); 1533 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[ 1534 nextbucket].pgfl_queues[PGFL_ZEROS], 1535 pg, listq.list); 1536 ucpu->pages[PGFL_ZEROS]++; 1537 uvmexp.free++; 1538 uvmexp.zeropages++; 1539 } 1540 } 1541 if (ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) { 1542 break; 1543 } 1544 nextbucket = (nextbucket + 1) & uvmexp.colormask; 1545 } while (nextbucket != firstbucket); 1546 ucpu->page_idle_zero = false; 1547 quit: 1548 mutex_spin_exit(&uvm_fpageqlock); 1549 } 1550 1551 /* 1552 * uvm_pagelookup: look up a page 1553 * 1554 * => caller should lock object to keep someone from pulling the page 1555 * out from under it 1556 */ 1557 1558 struct vm_page * 1559 uvm_pagelookup(struct uvm_object *obj, voff_t off) 1560 { 1561 struct vm_page *pg; 1562 1563 KASSERT(mutex_owned(obj->vmobjlock)); 1564 1565 pg = rb_tree_find_node(&obj->rb_tree, &off); 1566 1567 KASSERT(pg == NULL || obj->uo_npages != 0); 1568 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1569 (pg->flags & PG_BUSY) != 0); 1570 return pg; 1571 } 1572 1573 /* 1574 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp 1575 * 1576 * => caller must lock page queues 1577 */ 1578 1579 void 1580 uvm_pagewire(struct vm_page *pg) 1581 { 1582 KASSERT(mutex_owned(&uvm_pageqlock)); 1583 #if defined(READAHEAD_STATS) 1584 if ((pg->pqflags & PQ_READAHEAD) != 0) { 1585 uvm_ra_hit.ev_count++; 1586 pg->pqflags &= ~PQ_READAHEAD; 1587 } 1588 #endif /* defined(READAHEAD_STATS) */ 1589 if (pg->wire_count == 0) { 1590 uvm_pagedequeue(pg); 1591 uvmexp.wired++; 1592 } 1593 pg->wire_count++; 1594 KASSERT(pg->wire_count > 0); /* detect wraparound */ 1595 } 1596 1597 /* 1598 * uvm_pageunwire: unwire the page. 1599 * 1600 * => activate if wire count goes to zero. 1601 * => caller must lock page queues 1602 */ 1603 1604 void 1605 uvm_pageunwire(struct vm_page *pg) 1606 { 1607 KASSERT(mutex_owned(&uvm_pageqlock)); 1608 pg->wire_count--; 1609 if (pg->wire_count == 0) { 1610 uvm_pageactivate(pg); 1611 uvmexp.wired--; 1612 } 1613 } 1614 1615 /* 1616 * uvm_pagedeactivate: deactivate page 1617 * 1618 * => caller must lock page queues 1619 * => caller must check to make sure page is not wired 1620 * => object that page belongs to must be locked (so we can adjust pg->flags) 1621 * => caller must clear the reference on the page before calling 1622 */ 1623 1624 void 1625 uvm_pagedeactivate(struct vm_page *pg) 1626 { 1627 1628 KASSERT(mutex_owned(&uvm_pageqlock)); 1629 KASSERT(uvm_page_locked_p(pg)); 1630 KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg)); 1631 uvmpdpol_pagedeactivate(pg); 1632 } 1633 1634 /* 1635 * uvm_pageactivate: activate page 1636 * 1637 * => caller must lock page queues 1638 */ 1639 1640 void 1641 uvm_pageactivate(struct vm_page *pg) 1642 { 1643 1644 KASSERT(mutex_owned(&uvm_pageqlock)); 1645 KASSERT(uvm_page_locked_p(pg)); 1646 #if defined(READAHEAD_STATS) 1647 if ((pg->pqflags & PQ_READAHEAD) != 0) { 1648 uvm_ra_hit.ev_count++; 1649 pg->pqflags &= ~PQ_READAHEAD; 1650 } 1651 #endif /* defined(READAHEAD_STATS) */ 1652 if (pg->wire_count != 0) { 1653 return; 1654 } 1655 uvmpdpol_pageactivate(pg); 1656 } 1657 1658 /* 1659 * uvm_pagedequeue: remove a page from any paging queue 1660 */ 1661 1662 void 1663 uvm_pagedequeue(struct vm_page *pg) 1664 { 1665 1666 if (uvmpdpol_pageisqueued_p(pg)) { 1667 KASSERT(mutex_owned(&uvm_pageqlock)); 1668 } 1669 1670 uvmpdpol_pagedequeue(pg); 1671 } 1672 1673 /* 1674 * uvm_pageenqueue: add a page to a paging queue without activating. 1675 * used where a page is not really demanded (yet). eg. read-ahead 1676 */ 1677 1678 void 1679 uvm_pageenqueue(struct vm_page *pg) 1680 { 1681 1682 KASSERT(mutex_owned(&uvm_pageqlock)); 1683 if (pg->wire_count != 0) { 1684 return; 1685 } 1686 uvmpdpol_pageenqueue(pg); 1687 } 1688 1689 /* 1690 * uvm_pagezero: zero fill a page 1691 * 1692 * => if page is part of an object then the object should be locked 1693 * to protect pg->flags. 1694 */ 1695 1696 void 1697 uvm_pagezero(struct vm_page *pg) 1698 { 1699 pg->flags &= ~PG_CLEAN; 1700 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1701 } 1702 1703 /* 1704 * uvm_pagecopy: copy a page 1705 * 1706 * => if page is part of an object then the object should be locked 1707 * to protect pg->flags. 1708 */ 1709 1710 void 1711 uvm_pagecopy(struct vm_page *src, struct vm_page *dst) 1712 { 1713 1714 dst->flags &= ~PG_CLEAN; 1715 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 1716 } 1717 1718 /* 1719 * uvm_pageismanaged: test it see that a page (specified by PA) is managed. 1720 */ 1721 1722 bool 1723 uvm_pageismanaged(paddr_t pa) 1724 { 1725 1726 return (uvm_physseg_find(atop(pa), NULL) != UVM_PHYSSEG_TYPE_INVALID); 1727 } 1728 1729 /* 1730 * uvm_page_lookup_freelist: look up the free list for the specified page 1731 */ 1732 1733 int 1734 uvm_page_lookup_freelist(struct vm_page *pg) 1735 { 1736 uvm_physseg_t upm; 1737 1738 upm = uvm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL); 1739 KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID); 1740 return uvm_physseg_get_free_list(upm); 1741 } 1742 1743 /* 1744 * uvm_page_locked_p: return true if object associated with page is 1745 * locked. this is a weak check for runtime assertions only. 1746 */ 1747 1748 bool 1749 uvm_page_locked_p(struct vm_page *pg) 1750 { 1751 1752 if (pg->uobject != NULL) { 1753 return mutex_owned(pg->uobject->vmobjlock); 1754 } 1755 if (pg->uanon != NULL) { 1756 return mutex_owned(pg->uanon->an_lock); 1757 } 1758 return true; 1759 } 1760 1761 #ifdef PMAP_DIRECT 1762 /* 1763 * Call pmap to translate physical address into a virtual and to run a callback 1764 * for it. Used to avoid actually mapping the pages, pmap most likely uses direct map 1765 * or equivalent. 1766 */ 1767 int 1768 uvm_direct_process(struct vm_page **pgs, u_int npages, voff_t off, vsize_t len, 1769 int (*process)(void *, size_t, void *), void *arg) 1770 { 1771 int error = 0; 1772 paddr_t pa; 1773 size_t todo; 1774 voff_t pgoff = (off & PAGE_MASK); 1775 struct vm_page *pg; 1776 1777 KASSERT(npages > 0 && len > 0); 1778 1779 for (int i = 0; i < npages; i++) { 1780 pg = pgs[i]; 1781 1782 KASSERT(len > 0); 1783 1784 /* 1785 * Caller is responsible for ensuring all the pages are 1786 * available. 1787 */ 1788 KASSERT(pg != NULL && pg != PGO_DONTCARE); 1789 1790 pa = VM_PAGE_TO_PHYS(pg); 1791 todo = MIN(len, PAGE_SIZE - pgoff); 1792 1793 error = pmap_direct_process(pa, pgoff, todo, process, arg); 1794 if (error) 1795 break; 1796 1797 pgoff = 0; 1798 len -= todo; 1799 } 1800 1801 KASSERTMSG(error != 0 || len == 0, "len %lu != 0 for non-error", len); 1802 return error; 1803 } 1804 #endif /* PMAP_DIRECT */ 1805 1806 #if defined(DDB) || defined(DEBUGPRINT) 1807 1808 /* 1809 * uvm_page_printit: actually print the page 1810 */ 1811 1812 static const char page_flagbits[] = UVM_PGFLAGBITS; 1813 static const char page_pqflagbits[] = UVM_PQFLAGBITS; 1814 1815 void 1816 uvm_page_printit(struct vm_page *pg, bool full, 1817 void (*pr)(const char *, ...)) 1818 { 1819 struct vm_page *tpg; 1820 struct uvm_object *uobj; 1821 struct pgflist *pgl; 1822 char pgbuf[128]; 1823 char pqbuf[128]; 1824 1825 (*pr)("PAGE %p:\n", pg); 1826 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); 1827 snprintb(pqbuf, sizeof(pqbuf), page_pqflagbits, pg->pqflags); 1828 (*pr)(" flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n", 1829 pgbuf, pqbuf, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg)); 1830 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", 1831 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); 1832 #if defined(UVM_PAGE_TRKOWN) 1833 if (pg->flags & PG_BUSY) 1834 (*pr)(" owning process = %d, tag=%s\n", 1835 pg->owner, pg->owner_tag); 1836 else 1837 (*pr)(" page not busy, no owner\n"); 1838 #else 1839 (*pr)(" [page ownership tracking disabled]\n"); 1840 #endif 1841 1842 if (!full) 1843 return; 1844 1845 /* cross-verify object/anon */ 1846 if ((pg->pqflags & PQ_FREE) == 0) { 1847 if (pg->pqflags & PQ_ANON) { 1848 if (pg->uanon == NULL || pg->uanon->an_page != pg) 1849 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 1850 (pg->uanon) ? pg->uanon->an_page : NULL); 1851 else 1852 (*pr)(" anon backpointer is OK\n"); 1853 } else { 1854 uobj = pg->uobject; 1855 if (uobj) { 1856 (*pr)(" checking object list\n"); 1857 TAILQ_FOREACH(tpg, &uobj->memq, listq.queue) { 1858 if (tpg == pg) { 1859 break; 1860 } 1861 } 1862 if (tpg) 1863 (*pr)(" page found on object list\n"); 1864 else 1865 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 1866 } 1867 } 1868 } 1869 1870 /* cross-verify page queue */ 1871 if (pg->pqflags & PQ_FREE) { 1872 int fl = uvm_page_lookup_freelist(pg); 1873 int color = VM_PGCOLOR_BUCKET(pg); 1874 pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[ 1875 ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN]; 1876 } else { 1877 pgl = NULL; 1878 } 1879 1880 if (pgl) { 1881 (*pr)(" checking pageq list\n"); 1882 LIST_FOREACH(tpg, pgl, pageq.list) { 1883 if (tpg == pg) { 1884 break; 1885 } 1886 } 1887 if (tpg) 1888 (*pr)(" page found on pageq list\n"); 1889 else 1890 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 1891 } 1892 } 1893 1894 /* 1895 * uvm_pages_printthem - print a summary of all managed pages 1896 */ 1897 1898 void 1899 uvm_page_printall(void (*pr)(const char *, ...)) 1900 { 1901 uvm_physseg_t i; 1902 paddr_t pfn; 1903 struct vm_page *pg; 1904 1905 (*pr)("%18s %4s %4s %18s %18s" 1906 #ifdef UVM_PAGE_TRKOWN 1907 " OWNER" 1908 #endif 1909 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 1910 for (i = uvm_physseg_get_first(); 1911 uvm_physseg_valid_p(i); 1912 i = uvm_physseg_get_next(i)) { 1913 for (pfn = uvm_physseg_get_start(i); 1914 pfn < uvm_physseg_get_end(i); 1915 pfn++) { 1916 pg = PHYS_TO_VM_PAGE(ptoa(pfn)); 1917 1918 (*pr)("%18p %04x %04x %18p %18p", 1919 pg, pg->flags, pg->pqflags, pg->uobject, 1920 pg->uanon); 1921 #ifdef UVM_PAGE_TRKOWN 1922 if (pg->flags & PG_BUSY) 1923 (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 1924 #endif 1925 (*pr)("\n"); 1926 } 1927 } 1928 } 1929 1930 #endif /* DDB || DEBUGPRINT */ 1931