1 /* $NetBSD: uvm_page.c,v 1.187 2015/04/11 19:24:13 joerg Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 37 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 38 * 39 * 40 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 41 * All rights reserved. 42 * 43 * Permission to use, copy, modify and distribute this software and 44 * its documentation is hereby granted, provided that both the copyright 45 * notice and this permission notice appear in all copies of the 46 * software, derivative works or modified versions, and any portions 47 * thereof, and that both notices appear in supporting documentation. 48 * 49 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 50 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 51 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 52 * 53 * Carnegie Mellon requests users of this software to return to 54 * 55 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 56 * School of Computer Science 57 * Carnegie Mellon University 58 * Pittsburgh PA 15213-3890 59 * 60 * any improvements or extensions that they make and grant Carnegie the 61 * rights to redistribute these changes. 62 */ 63 64 /* 65 * uvm_page.c: page ops. 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.187 2015/04/11 19:24:13 joerg Exp $"); 70 71 #include "opt_ddb.h" 72 #include "opt_uvm.h" 73 #include "opt_uvmhist.h" 74 #include "opt_readahead.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/sched.h> 79 #include <sys/kernel.h> 80 #include <sys/vnode.h> 81 #include <sys/proc.h> 82 #include <sys/atomic.h> 83 #include <sys/cpu.h> 84 85 #include <uvm/uvm.h> 86 #include <uvm/uvm_ddb.h> 87 #include <uvm/uvm_pdpolicy.h> 88 89 /* 90 * global vars... XXXCDC: move to uvm. structure. 91 */ 92 93 /* 94 * physical memory config is stored in vm_physmem. 95 */ 96 97 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */ 98 int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */ 99 #define vm_nphysmem vm_nphysseg 100 101 /* 102 * Some supported CPUs in a given architecture don't support all 103 * of the things necessary to do idle page zero'ing efficiently. 104 * We therefore provide a way to enable it from machdep code here. 105 */ 106 bool vm_page_zero_enable = false; 107 108 /* 109 * number of pages per-CPU to reserve for the kernel. 110 */ 111 #ifndef UVM_RESERVED_PAGES_PER_CPU 112 #define UVM_RESERVED_PAGES_PER_CPU 5 113 #endif 114 int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU; 115 116 /* 117 * physical memory size; 118 */ 119 int physmem; 120 121 /* 122 * local variables 123 */ 124 125 /* 126 * these variables record the values returned by vm_page_bootstrap, 127 * for debugging purposes. The implementation of uvm_pageboot_alloc 128 * and pmap_startup here also uses them internally. 129 */ 130 131 static vaddr_t virtual_space_start; 132 static vaddr_t virtual_space_end; 133 134 /* 135 * we allocate an initial number of page colors in uvm_page_init(), 136 * and remember them. We may re-color pages as cache sizes are 137 * discovered during the autoconfiguration phase. But we can never 138 * free the initial set of buckets, since they are allocated using 139 * uvm_pageboot_alloc(). 140 */ 141 142 static size_t recolored_pages_memsize /* = 0 */; 143 144 #ifdef DEBUG 145 vaddr_t uvm_zerocheckkva; 146 #endif /* DEBUG */ 147 148 /* 149 * local prototypes 150 */ 151 152 static void uvm_pageinsert(struct uvm_object *, struct vm_page *); 153 static void uvm_pageremove(struct uvm_object *, struct vm_page *); 154 155 /* 156 * per-object tree of pages 157 */ 158 159 static signed int 160 uvm_page_compare_nodes(void *ctx, const void *n1, const void *n2) 161 { 162 const struct vm_page *pg1 = n1; 163 const struct vm_page *pg2 = n2; 164 const voff_t a = pg1->offset; 165 const voff_t b = pg2->offset; 166 167 if (a < b) 168 return -1; 169 if (a > b) 170 return 1; 171 return 0; 172 } 173 174 static signed int 175 uvm_page_compare_key(void *ctx, const void *n, const void *key) 176 { 177 const struct vm_page *pg = n; 178 const voff_t a = pg->offset; 179 const voff_t b = *(const voff_t *)key; 180 181 if (a < b) 182 return -1; 183 if (a > b) 184 return 1; 185 return 0; 186 } 187 188 const rb_tree_ops_t uvm_page_tree_ops = { 189 .rbto_compare_nodes = uvm_page_compare_nodes, 190 .rbto_compare_key = uvm_page_compare_key, 191 .rbto_node_offset = offsetof(struct vm_page, rb_node), 192 .rbto_context = NULL 193 }; 194 195 /* 196 * inline functions 197 */ 198 199 /* 200 * uvm_pageinsert: insert a page in the object. 201 * 202 * => caller must lock object 203 * => caller must lock page queues 204 * => call should have already set pg's object and offset pointers 205 * and bumped the version counter 206 */ 207 208 static inline void 209 uvm_pageinsert_list(struct uvm_object *uobj, struct vm_page *pg, 210 struct vm_page *where) 211 { 212 213 KASSERT(uobj == pg->uobject); 214 KASSERT(mutex_owned(uobj->vmobjlock)); 215 KASSERT((pg->flags & PG_TABLED) == 0); 216 KASSERT(where == NULL || (where->flags & PG_TABLED)); 217 KASSERT(where == NULL || (where->uobject == uobj)); 218 219 if (UVM_OBJ_IS_VNODE(uobj)) { 220 if (uobj->uo_npages == 0) { 221 struct vnode *vp = (struct vnode *)uobj; 222 223 vholdl(vp); 224 } 225 if (UVM_OBJ_IS_VTEXT(uobj)) { 226 atomic_inc_uint(&uvmexp.execpages); 227 } else { 228 atomic_inc_uint(&uvmexp.filepages); 229 } 230 } else if (UVM_OBJ_IS_AOBJ(uobj)) { 231 atomic_inc_uint(&uvmexp.anonpages); 232 } 233 234 if (where) 235 TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq.queue); 236 else 237 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); 238 pg->flags |= PG_TABLED; 239 uobj->uo_npages++; 240 } 241 242 243 static inline void 244 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg) 245 { 246 struct vm_page *ret __diagused; 247 248 KASSERT(uobj == pg->uobject); 249 ret = rb_tree_insert_node(&uobj->rb_tree, pg); 250 KASSERT(ret == pg); 251 } 252 253 static inline void 254 uvm_pageinsert(struct uvm_object *uobj, struct vm_page *pg) 255 { 256 257 KDASSERT(uobj != NULL); 258 uvm_pageinsert_tree(uobj, pg); 259 uvm_pageinsert_list(uobj, pg, NULL); 260 } 261 262 /* 263 * uvm_page_remove: remove page from object. 264 * 265 * => caller must lock object 266 * => caller must lock page queues 267 */ 268 269 static inline void 270 uvm_pageremove_list(struct uvm_object *uobj, struct vm_page *pg) 271 { 272 273 KASSERT(uobj == pg->uobject); 274 KASSERT(mutex_owned(uobj->vmobjlock)); 275 KASSERT(pg->flags & PG_TABLED); 276 277 if (UVM_OBJ_IS_VNODE(uobj)) { 278 if (uobj->uo_npages == 1) { 279 struct vnode *vp = (struct vnode *)uobj; 280 281 holdrelel(vp); 282 } 283 if (UVM_OBJ_IS_VTEXT(uobj)) { 284 atomic_dec_uint(&uvmexp.execpages); 285 } else { 286 atomic_dec_uint(&uvmexp.filepages); 287 } 288 } else if (UVM_OBJ_IS_AOBJ(uobj)) { 289 atomic_dec_uint(&uvmexp.anonpages); 290 } 291 292 /* object should be locked */ 293 uobj->uo_npages--; 294 TAILQ_REMOVE(&uobj->memq, pg, listq.queue); 295 pg->flags &= ~PG_TABLED; 296 pg->uobject = NULL; 297 } 298 299 static inline void 300 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg) 301 { 302 303 KASSERT(uobj == pg->uobject); 304 rb_tree_remove_node(&uobj->rb_tree, pg); 305 } 306 307 static inline void 308 uvm_pageremove(struct uvm_object *uobj, struct vm_page *pg) 309 { 310 311 KDASSERT(uobj != NULL); 312 uvm_pageremove_tree(uobj, pg); 313 uvm_pageremove_list(uobj, pg); 314 } 315 316 static void 317 uvm_page_init_buckets(struct pgfreelist *pgfl) 318 { 319 int color, i; 320 321 for (color = 0; color < uvmexp.ncolors; color++) { 322 for (i = 0; i < PGFL_NQUEUES; i++) { 323 LIST_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]); 324 } 325 } 326 } 327 328 /* 329 * uvm_page_init: init the page system. called from uvm_init(). 330 * 331 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 332 */ 333 334 void 335 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) 336 { 337 static struct uvm_cpu boot_cpu; 338 psize_t freepages, pagecount, bucketcount, n; 339 struct pgflbucket *bucketarray, *cpuarray; 340 struct vm_physseg *seg; 341 struct vm_page *pagearray; 342 int lcv; 343 u_int i; 344 paddr_t paddr; 345 346 KASSERT(ncpu <= 1); 347 CTASSERT(sizeof(pagearray->offset) >= sizeof(struct uvm_cpu *)); 348 349 /* 350 * init the page queues and page queue locks, except the free 351 * list; we allocate that later (with the initial vm_page 352 * structures). 353 */ 354 355 uvm.cpus[0] = &boot_cpu; 356 curcpu()->ci_data.cpu_uvm = &boot_cpu; 357 uvmpdpol_init(); 358 mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE); 359 mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM); 360 361 /* 362 * allocate vm_page structures. 363 */ 364 365 /* 366 * sanity check: 367 * before calling this function the MD code is expected to register 368 * some free RAM with the uvm_page_physload() function. our job 369 * now is to allocate vm_page structures for this memory. 370 */ 371 372 if (vm_nphysmem == 0) 373 panic("uvm_page_bootstrap: no memory pre-allocated"); 374 375 /* 376 * first calculate the number of free pages... 377 * 378 * note that we use start/end rather than avail_start/avail_end. 379 * this allows us to allocate extra vm_page structures in case we 380 * want to return some memory to the pool after booting. 381 */ 382 383 freepages = 0; 384 for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) { 385 seg = VM_PHYSMEM_PTR(lcv); 386 freepages += (seg->end - seg->start); 387 } 388 389 /* 390 * Let MD code initialize the number of colors, or default 391 * to 1 color if MD code doesn't care. 392 */ 393 if (uvmexp.ncolors == 0) 394 uvmexp.ncolors = 1; 395 uvmexp.colormask = uvmexp.ncolors - 1; 396 KASSERT((uvmexp.colormask & uvmexp.ncolors) == 0); 397 398 /* 399 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 400 * use. for each page of memory we use we need a vm_page structure. 401 * thus, the total number of pages we can use is the total size of 402 * the memory divided by the PAGE_SIZE plus the size of the vm_page 403 * structure. we add one to freepages as a fudge factor to avoid 404 * truncation errors (since we can only allocate in terms of whole 405 * pages). 406 */ 407 408 bucketcount = uvmexp.ncolors * VM_NFREELIST; 409 pagecount = ((freepages + 1) << PAGE_SHIFT) / 410 (PAGE_SIZE + sizeof(struct vm_page)); 411 412 bucketarray = (void *)uvm_pageboot_alloc((bucketcount * 413 sizeof(struct pgflbucket) * 2) + (pagecount * 414 sizeof(struct vm_page))); 415 cpuarray = bucketarray + bucketcount; 416 pagearray = (struct vm_page *)(bucketarray + bucketcount * 2); 417 418 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 419 uvm.page_free[lcv].pgfl_buckets = 420 (bucketarray + (lcv * uvmexp.ncolors)); 421 uvm_page_init_buckets(&uvm.page_free[lcv]); 422 uvm.cpus[0]->page_free[lcv].pgfl_buckets = 423 (cpuarray + (lcv * uvmexp.ncolors)); 424 uvm_page_init_buckets(&uvm.cpus[0]->page_free[lcv]); 425 } 426 memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 427 428 /* 429 * init the vm_page structures and put them in the correct place. 430 */ 431 432 for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) { 433 seg = VM_PHYSMEM_PTR(lcv); 434 n = seg->end - seg->start; 435 436 /* set up page array pointers */ 437 seg->pgs = pagearray; 438 pagearray += n; 439 pagecount -= n; 440 seg->lastpg = seg->pgs + n; 441 442 /* init and free vm_pages (we've already zeroed them) */ 443 paddr = ctob(seg->start); 444 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) { 445 seg->pgs[i].phys_addr = paddr; 446 #ifdef __HAVE_VM_PAGE_MD 447 VM_MDPAGE_INIT(&seg->pgs[i]); 448 #endif 449 if (atop(paddr) >= seg->avail_start && 450 atop(paddr) < seg->avail_end) { 451 uvmexp.npages++; 452 /* add page to free pool */ 453 uvm_pagefree(&seg->pgs[i]); 454 } 455 } 456 } 457 458 /* 459 * pass up the values of virtual_space_start and 460 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 461 * layers of the VM. 462 */ 463 464 *kvm_startp = round_page(virtual_space_start); 465 *kvm_endp = trunc_page(virtual_space_end); 466 #ifdef DEBUG 467 /* 468 * steal kva for uvm_pagezerocheck(). 469 */ 470 uvm_zerocheckkva = *kvm_startp; 471 *kvm_startp += PAGE_SIZE; 472 #endif /* DEBUG */ 473 474 /* 475 * init various thresholds. 476 */ 477 478 uvmexp.reserve_pagedaemon = 1; 479 uvmexp.reserve_kernel = vm_page_reserve_kernel; 480 481 /* 482 * determine if we should zero pages in the idle loop. 483 */ 484 485 uvm.cpus[0]->page_idle_zero = vm_page_zero_enable; 486 487 /* 488 * done! 489 */ 490 491 uvm.page_init_done = true; 492 } 493 494 /* 495 * uvm_setpagesize: set the page size 496 * 497 * => sets page_shift and page_mask from uvmexp.pagesize. 498 */ 499 500 void 501 uvm_setpagesize(void) 502 { 503 504 /* 505 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE 506 * to be a constant (indicated by being a non-zero value). 507 */ 508 if (uvmexp.pagesize == 0) { 509 if (PAGE_SIZE == 0) 510 panic("uvm_setpagesize: uvmexp.pagesize not set"); 511 uvmexp.pagesize = PAGE_SIZE; 512 } 513 uvmexp.pagemask = uvmexp.pagesize - 1; 514 if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 515 panic("uvm_setpagesize: page size %u (%#x) not a power of two", 516 uvmexp.pagesize, uvmexp.pagesize); 517 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 518 if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 519 break; 520 } 521 522 /* 523 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 524 */ 525 526 vaddr_t 527 uvm_pageboot_alloc(vsize_t size) 528 { 529 static bool initialized = false; 530 vaddr_t addr; 531 #if !defined(PMAP_STEAL_MEMORY) 532 vaddr_t vaddr; 533 paddr_t paddr; 534 #endif 535 536 /* 537 * on first call to this function, initialize ourselves. 538 */ 539 if (initialized == false) { 540 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 541 542 /* round it the way we like it */ 543 virtual_space_start = round_page(virtual_space_start); 544 virtual_space_end = trunc_page(virtual_space_end); 545 546 initialized = true; 547 } 548 549 /* round to page size */ 550 size = round_page(size); 551 552 #if defined(PMAP_STEAL_MEMORY) 553 554 /* 555 * defer bootstrap allocation to MD code (it may want to allocate 556 * from a direct-mapped segment). pmap_steal_memory should adjust 557 * virtual_space_start/virtual_space_end if necessary. 558 */ 559 560 addr = pmap_steal_memory(size, &virtual_space_start, 561 &virtual_space_end); 562 563 return(addr); 564 565 #else /* !PMAP_STEAL_MEMORY */ 566 567 /* 568 * allocate virtual memory for this request 569 */ 570 if (virtual_space_start == virtual_space_end || 571 (virtual_space_end - virtual_space_start) < size) 572 panic("uvm_pageboot_alloc: out of virtual space"); 573 574 addr = virtual_space_start; 575 576 #ifdef PMAP_GROWKERNEL 577 /* 578 * If the kernel pmap can't map the requested space, 579 * then allocate more resources for it. 580 */ 581 if (uvm_maxkaddr < (addr + size)) { 582 uvm_maxkaddr = pmap_growkernel(addr + size); 583 if (uvm_maxkaddr < (addr + size)) 584 panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 585 } 586 #endif 587 588 virtual_space_start += size; 589 590 /* 591 * allocate and mapin physical pages to back new virtual pages 592 */ 593 594 for (vaddr = round_page(addr) ; vaddr < addr + size ; 595 vaddr += PAGE_SIZE) { 596 597 if (!uvm_page_physget(&paddr)) 598 panic("uvm_pageboot_alloc: out of memory"); 599 600 /* 601 * Note this memory is no longer managed, so using 602 * pmap_kenter is safe. 603 */ 604 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 605 } 606 pmap_update(pmap_kernel()); 607 return(addr); 608 #endif /* PMAP_STEAL_MEMORY */ 609 } 610 611 #if !defined(PMAP_STEAL_MEMORY) 612 /* 613 * uvm_page_physget: "steal" one page from the vm_physmem structure. 614 * 615 * => attempt to allocate it off the end of a segment in which the "avail" 616 * values match the start/end values. if we can't do that, then we 617 * will advance both values (making them equal, and removing some 618 * vm_page structures from the non-avail area). 619 * => return false if out of memory. 620 */ 621 622 /* subroutine: try to allocate from memory chunks on the specified freelist */ 623 static bool uvm_page_physget_freelist(paddr_t *, int); 624 625 static bool 626 uvm_page_physget_freelist(paddr_t *paddrp, int freelist) 627 { 628 struct vm_physseg *seg; 629 int lcv, x; 630 631 /* pass 1: try allocating from a matching end */ 632 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 633 for (lcv = vm_nphysmem - 1 ; lcv >= 0 ; lcv--) 634 #else 635 for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) 636 #endif 637 { 638 seg = VM_PHYSMEM_PTR(lcv); 639 640 if (uvm.page_init_done == true) 641 panic("uvm_page_physget: called _after_ bootstrap"); 642 643 if (seg->free_list != freelist) 644 continue; 645 646 /* try from front */ 647 if (seg->avail_start == seg->start && 648 seg->avail_start < seg->avail_end) { 649 *paddrp = ctob(seg->avail_start); 650 seg->avail_start++; 651 seg->start++; 652 /* nothing left? nuke it */ 653 if (seg->avail_start == seg->end) { 654 if (vm_nphysmem == 1) 655 panic("uvm_page_physget: out of memory!"); 656 vm_nphysmem--; 657 for (x = lcv ; x < vm_nphysmem ; x++) 658 /* structure copy */ 659 VM_PHYSMEM_PTR_SWAP(x, x + 1); 660 } 661 return (true); 662 } 663 664 /* try from rear */ 665 if (seg->avail_end == seg->end && 666 seg->avail_start < seg->avail_end) { 667 *paddrp = ctob(seg->avail_end - 1); 668 seg->avail_end--; 669 seg->end--; 670 /* nothing left? nuke it */ 671 if (seg->avail_end == seg->start) { 672 if (vm_nphysmem == 1) 673 panic("uvm_page_physget: out of memory!"); 674 vm_nphysmem--; 675 for (x = lcv ; x < vm_nphysmem ; x++) 676 /* structure copy */ 677 VM_PHYSMEM_PTR_SWAP(x, x + 1); 678 } 679 return (true); 680 } 681 } 682 683 /* pass2: forget about matching ends, just allocate something */ 684 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 685 for (lcv = vm_nphysmem - 1 ; lcv >= 0 ; lcv--) 686 #else 687 for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) 688 #endif 689 { 690 seg = VM_PHYSMEM_PTR(lcv); 691 692 /* any room in this bank? */ 693 if (seg->avail_start >= seg->avail_end) 694 continue; /* nope */ 695 696 *paddrp = ctob(seg->avail_start); 697 seg->avail_start++; 698 /* truncate! */ 699 seg->start = seg->avail_start; 700 701 /* nothing left? nuke it */ 702 if (seg->avail_start == seg->end) { 703 if (vm_nphysmem == 1) 704 panic("uvm_page_physget: out of memory!"); 705 vm_nphysmem--; 706 for (x = lcv ; x < vm_nphysmem ; x++) 707 /* structure copy */ 708 VM_PHYSMEM_PTR_SWAP(x, x + 1); 709 } 710 return (true); 711 } 712 713 return (false); /* whoops! */ 714 } 715 716 bool 717 uvm_page_physget(paddr_t *paddrp) 718 { 719 int i; 720 721 /* try in the order of freelist preference */ 722 for (i = 0; i < VM_NFREELIST; i++) 723 if (uvm_page_physget_freelist(paddrp, i) == true) 724 return (true); 725 return (false); 726 } 727 #endif /* PMAP_STEAL_MEMORY */ 728 729 /* 730 * uvm_page_physload: load physical memory into VM system 731 * 732 * => all args are PFs 733 * => all pages in start/end get vm_page structures 734 * => areas marked by avail_start/avail_end get added to the free page pool 735 * => we are limited to VM_PHYSSEG_MAX physical memory segments 736 */ 737 738 void 739 uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start, 740 paddr_t avail_end, int free_list) 741 { 742 int preload, lcv; 743 psize_t npages; 744 struct vm_page *pgs; 745 struct vm_physseg *ps; 746 747 if (uvmexp.pagesize == 0) 748 panic("uvm_page_physload: page size not set!"); 749 if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT) 750 panic("uvm_page_physload: bad free list %d", free_list); 751 if (start >= end) 752 panic("uvm_page_physload: start >= end"); 753 754 /* 755 * do we have room? 756 */ 757 758 if (vm_nphysmem == VM_PHYSSEG_MAX) { 759 printf("uvm_page_physload: unable to load physical memory " 760 "segment\n"); 761 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n", 762 VM_PHYSSEG_MAX, (long long)start, (long long)end); 763 printf("\tincrease VM_PHYSSEG_MAX\n"); 764 return; 765 } 766 767 /* 768 * check to see if this is a "preload" (i.e. uvm_page_init hasn't been 769 * called yet, so kmem is not available). 770 */ 771 772 for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) { 773 if (VM_PHYSMEM_PTR(lcv)->pgs) 774 break; 775 } 776 preload = (lcv == vm_nphysmem); 777 778 /* 779 * if VM is already running, attempt to kmem_alloc vm_page structures 780 */ 781 782 if (!preload) { 783 panic("uvm_page_physload: tried to add RAM after vm_mem_init"); 784 } else { 785 pgs = NULL; 786 npages = 0; 787 } 788 789 /* 790 * now insert us in the proper place in vm_physmem[] 791 */ 792 793 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM) 794 /* random: put it at the end (easy!) */ 795 ps = VM_PHYSMEM_PTR(vm_nphysmem); 796 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) 797 { 798 int x; 799 /* sort by address for binary search */ 800 for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) 801 if (start < VM_PHYSMEM_PTR(lcv)->start) 802 break; 803 ps = VM_PHYSMEM_PTR(lcv); 804 /* move back other entries, if necessary ... */ 805 for (x = vm_nphysmem ; x > lcv ; x--) 806 /* structure copy */ 807 VM_PHYSMEM_PTR_SWAP(x, x - 1); 808 } 809 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 810 { 811 int x; 812 /* sort by largest segment first */ 813 for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) 814 if ((end - start) > 815 (VM_PHYSMEM_PTR(lcv)->end - VM_PHYSMEM_PTR(lcv)->start)) 816 break; 817 ps = VM_PHYSMEM_PTR(lcv); 818 /* move back other entries, if necessary ... */ 819 for (x = vm_nphysmem ; x > lcv ; x--) 820 /* structure copy */ 821 VM_PHYSMEM_PTR_SWAP(x, x - 1); 822 } 823 #else 824 panic("uvm_page_physload: unknown physseg strategy selected!"); 825 #endif 826 827 ps->start = start; 828 ps->end = end; 829 ps->avail_start = avail_start; 830 ps->avail_end = avail_end; 831 if (preload) { 832 ps->pgs = NULL; 833 } else { 834 ps->pgs = pgs; 835 ps->lastpg = pgs + npages; 836 } 837 ps->free_list = free_list; 838 vm_nphysmem++; 839 840 if (!preload) { 841 uvmpdpol_reinit(); 842 } 843 } 844 845 /* 846 * when VM_PHYSSEG_MAX is 1, we can simplify these functions 847 */ 848 849 #if VM_PHYSSEG_MAX == 1 850 static inline int vm_physseg_find_contig(struct vm_physseg *, int, paddr_t, int *); 851 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) 852 static inline int vm_physseg_find_bsearch(struct vm_physseg *, int, paddr_t, int *); 853 #else 854 static inline int vm_physseg_find_linear(struct vm_physseg *, int, paddr_t, int *); 855 #endif 856 857 /* 858 * vm_physseg_find: find vm_physseg structure that belongs to a PA 859 */ 860 int 861 vm_physseg_find(paddr_t pframe, int *offp) 862 { 863 864 #if VM_PHYSSEG_MAX == 1 865 return vm_physseg_find_contig(vm_physmem, vm_nphysseg, pframe, offp); 866 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) 867 return vm_physseg_find_bsearch(vm_physmem, vm_nphysseg, pframe, offp); 868 #else 869 return vm_physseg_find_linear(vm_physmem, vm_nphysseg, pframe, offp); 870 #endif 871 } 872 873 #if VM_PHYSSEG_MAX == 1 874 static inline int 875 vm_physseg_find_contig(struct vm_physseg *segs, int nsegs, paddr_t pframe, int *offp) 876 { 877 878 /* 'contig' case */ 879 if (pframe >= segs[0].start && pframe < segs[0].end) { 880 if (offp) 881 *offp = pframe - segs[0].start; 882 return(0); 883 } 884 return(-1); 885 } 886 887 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) 888 889 static inline int 890 vm_physseg_find_bsearch(struct vm_physseg *segs, int nsegs, paddr_t pframe, int *offp) 891 { 892 /* binary search for it */ 893 u_int start, len, guess; 894 895 /* 896 * if try is too large (thus target is less than try) we reduce 897 * the length to trunc(len/2) [i.e. everything smaller than "try"] 898 * 899 * if the try is too small (thus target is greater than try) then 900 * we set the new start to be (try + 1). this means we need to 901 * reduce the length to (round(len/2) - 1). 902 * 903 * note "adjust" below which takes advantage of the fact that 904 * (round(len/2) - 1) == trunc((len - 1) / 2) 905 * for any value of len we may have 906 */ 907 908 for (start = 0, len = nsegs ; len != 0 ; len = len / 2) { 909 guess = start + (len / 2); /* try in the middle */ 910 911 /* start past our try? */ 912 if (pframe >= segs[guess].start) { 913 /* was try correct? */ 914 if (pframe < segs[guess].end) { 915 if (offp) 916 *offp = pframe - segs[guess].start; 917 return guess; /* got it */ 918 } 919 start = guess + 1; /* next time, start here */ 920 len--; /* "adjust" */ 921 } else { 922 /* 923 * pframe before try, just reduce length of 924 * region, done in "for" loop 925 */ 926 } 927 } 928 return(-1); 929 } 930 931 #else 932 933 static inline int 934 vm_physseg_find_linear(struct vm_physseg *segs, int nsegs, paddr_t pframe, int *offp) 935 { 936 /* linear search for it */ 937 int lcv; 938 939 for (lcv = 0; lcv < nsegs; lcv++) { 940 if (pframe >= segs[lcv].start && 941 pframe < segs[lcv].end) { 942 if (offp) 943 *offp = pframe - segs[lcv].start; 944 return(lcv); /* got it */ 945 } 946 } 947 return(-1); 948 } 949 #endif 950 951 /* 952 * PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages 953 * back from an I/O mapping (ugh!). used in some MD code as well. 954 */ 955 struct vm_page * 956 uvm_phys_to_vm_page(paddr_t pa) 957 { 958 paddr_t pf = atop(pa); 959 int off; 960 int psi; 961 962 psi = vm_physseg_find(pf, &off); 963 if (psi != -1) 964 return(&VM_PHYSMEM_PTR(psi)->pgs[off]); 965 return(NULL); 966 } 967 968 paddr_t 969 uvm_vm_page_to_phys(const struct vm_page *pg) 970 { 971 972 return pg->phys_addr; 973 } 974 975 /* 976 * uvm_page_recolor: Recolor the pages if the new bucket count is 977 * larger than the old one. 978 */ 979 980 void 981 uvm_page_recolor(int newncolors) 982 { 983 struct pgflbucket *bucketarray, *cpuarray, *oldbucketarray; 984 struct pgfreelist gpgfl, pgfl; 985 struct vm_page *pg; 986 vsize_t bucketcount; 987 size_t bucketmemsize, oldbucketmemsize; 988 int lcv, color, i, ocolors; 989 struct uvm_cpu *ucpu; 990 991 KASSERT(((newncolors - 1) & newncolors) == 0); 992 993 if (newncolors <= uvmexp.ncolors) 994 return; 995 996 if (uvm.page_init_done == false) { 997 uvmexp.ncolors = newncolors; 998 return; 999 } 1000 1001 bucketcount = newncolors * VM_NFREELIST; 1002 bucketmemsize = bucketcount * sizeof(struct pgflbucket) * 2; 1003 bucketarray = kmem_alloc(bucketmemsize, KM_SLEEP); 1004 cpuarray = bucketarray + bucketcount; 1005 if (bucketarray == NULL) { 1006 printf("WARNING: unable to allocate %ld page color buckets\n", 1007 (long) bucketcount); 1008 return; 1009 } 1010 1011 mutex_spin_enter(&uvm_fpageqlock); 1012 1013 /* Make sure we should still do this. */ 1014 if (newncolors <= uvmexp.ncolors) { 1015 mutex_spin_exit(&uvm_fpageqlock); 1016 kmem_free(bucketarray, bucketmemsize); 1017 return; 1018 } 1019 1020 oldbucketarray = uvm.page_free[0].pgfl_buckets; 1021 ocolors = uvmexp.ncolors; 1022 1023 uvmexp.ncolors = newncolors; 1024 uvmexp.colormask = uvmexp.ncolors - 1; 1025 1026 ucpu = curcpu()->ci_data.cpu_uvm; 1027 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1028 gpgfl.pgfl_buckets = (bucketarray + (lcv * newncolors)); 1029 pgfl.pgfl_buckets = (cpuarray + (lcv * uvmexp.ncolors)); 1030 uvm_page_init_buckets(&gpgfl); 1031 uvm_page_init_buckets(&pgfl); 1032 for (color = 0; color < ocolors; color++) { 1033 for (i = 0; i < PGFL_NQUEUES; i++) { 1034 while ((pg = LIST_FIRST(&uvm.page_free[ 1035 lcv].pgfl_buckets[color].pgfl_queues[i])) 1036 != NULL) { 1037 LIST_REMOVE(pg, pageq.list); /* global */ 1038 LIST_REMOVE(pg, listq.list); /* cpu */ 1039 LIST_INSERT_HEAD(&gpgfl.pgfl_buckets[ 1040 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[ 1041 i], pg, pageq.list); 1042 LIST_INSERT_HEAD(&pgfl.pgfl_buckets[ 1043 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[ 1044 i], pg, listq.list); 1045 } 1046 } 1047 } 1048 uvm.page_free[lcv].pgfl_buckets = gpgfl.pgfl_buckets; 1049 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets; 1050 } 1051 1052 oldbucketmemsize = recolored_pages_memsize; 1053 1054 recolored_pages_memsize = bucketmemsize; 1055 mutex_spin_exit(&uvm_fpageqlock); 1056 1057 if (oldbucketmemsize) { 1058 kmem_free(oldbucketarray, recolored_pages_memsize); 1059 } 1060 1061 /* 1062 * this calls uvm_km_alloc() which may want to hold 1063 * uvm_fpageqlock. 1064 */ 1065 uvm_pager_realloc_emerg(); 1066 } 1067 1068 /* 1069 * uvm_cpu_attach: initialize per-CPU data structures. 1070 */ 1071 1072 void 1073 uvm_cpu_attach(struct cpu_info *ci) 1074 { 1075 struct pgflbucket *bucketarray; 1076 struct pgfreelist pgfl; 1077 struct uvm_cpu *ucpu; 1078 vsize_t bucketcount; 1079 int lcv; 1080 1081 if (CPU_IS_PRIMARY(ci)) { 1082 /* Already done in uvm_page_init(). */ 1083 goto attachrnd; 1084 } 1085 1086 /* Add more reserve pages for this CPU. */ 1087 uvmexp.reserve_kernel += vm_page_reserve_kernel; 1088 1089 /* Configure this CPU's free lists. */ 1090 bucketcount = uvmexp.ncolors * VM_NFREELIST; 1091 bucketarray = kmem_alloc(bucketcount * sizeof(struct pgflbucket), 1092 KM_SLEEP); 1093 ucpu = kmem_zalloc(sizeof(*ucpu), KM_SLEEP); 1094 uvm.cpus[cpu_index(ci)] = ucpu; 1095 ci->ci_data.cpu_uvm = ucpu; 1096 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1097 pgfl.pgfl_buckets = (bucketarray + (lcv * uvmexp.ncolors)); 1098 uvm_page_init_buckets(&pgfl); 1099 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets; 1100 } 1101 1102 attachrnd: 1103 /* 1104 * Attach RNG source for this CPU's VM events 1105 */ 1106 rnd_attach_source(&uvm.cpus[cpu_index(ci)]->rs, 1107 ci->ci_data.cpu_name, RND_TYPE_VM, 1108 RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE| 1109 RND_FLAG_ESTIMATE_VALUE); 1110 1111 } 1112 1113 /* 1114 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat 1115 */ 1116 1117 static struct vm_page * 1118 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int flist, int try1, int try2, 1119 int *trycolorp) 1120 { 1121 struct pgflist *freeq; 1122 struct vm_page *pg; 1123 int color, trycolor = *trycolorp; 1124 struct pgfreelist *gpgfl, *pgfl; 1125 1126 KASSERT(mutex_owned(&uvm_fpageqlock)); 1127 1128 color = trycolor; 1129 pgfl = &ucpu->page_free[flist]; 1130 gpgfl = &uvm.page_free[flist]; 1131 do { 1132 /* cpu, try1 */ 1133 if ((pg = LIST_FIRST((freeq = 1134 &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) { 1135 KASSERT(pg->pqflags & PQ_FREE); 1136 KASSERT(try1 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); 1137 KASSERT(try1 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); 1138 KASSERT(ucpu == VM_FREE_PAGE_TO_CPU(pg)); 1139 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--; 1140 uvmexp.cpuhit++; 1141 goto gotit; 1142 } 1143 /* global, try1 */ 1144 if ((pg = LIST_FIRST((freeq = 1145 &gpgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) { 1146 KASSERT(pg->pqflags & PQ_FREE); 1147 KASSERT(try1 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); 1148 KASSERT(try1 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); 1149 KASSERT(ucpu != VM_FREE_PAGE_TO_CPU(pg)); 1150 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--; 1151 uvmexp.cpumiss++; 1152 goto gotit; 1153 } 1154 /* cpu, try2 */ 1155 if ((pg = LIST_FIRST((freeq = 1156 &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) { 1157 KASSERT(pg->pqflags & PQ_FREE); 1158 KASSERT(try2 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); 1159 KASSERT(try2 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); 1160 KASSERT(ucpu == VM_FREE_PAGE_TO_CPU(pg)); 1161 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--; 1162 uvmexp.cpuhit++; 1163 goto gotit; 1164 } 1165 /* global, try2 */ 1166 if ((pg = LIST_FIRST((freeq = 1167 &gpgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) { 1168 KASSERT(pg->pqflags & PQ_FREE); 1169 KASSERT(try2 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); 1170 KASSERT(try2 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); 1171 KASSERT(ucpu != VM_FREE_PAGE_TO_CPU(pg)); 1172 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--; 1173 uvmexp.cpumiss++; 1174 goto gotit; 1175 } 1176 color = (color + 1) & uvmexp.colormask; 1177 } while (color != trycolor); 1178 1179 return (NULL); 1180 1181 gotit: 1182 LIST_REMOVE(pg, pageq.list); /* global list */ 1183 LIST_REMOVE(pg, listq.list); /* per-cpu list */ 1184 uvmexp.free--; 1185 1186 /* update zero'd page count */ 1187 if (pg->flags & PG_ZERO) 1188 uvmexp.zeropages--; 1189 1190 if (color == trycolor) 1191 uvmexp.colorhit++; 1192 else { 1193 uvmexp.colormiss++; 1194 *trycolorp = color; 1195 } 1196 1197 return (pg); 1198 } 1199 1200 /* 1201 * uvm_pagealloc_strat: allocate vm_page from a particular free list. 1202 * 1203 * => return null if no pages free 1204 * => wake up pagedaemon if number of free pages drops below low water mark 1205 * => if obj != NULL, obj must be locked (to put in obj's tree) 1206 * => if anon != NULL, anon must be locked (to put in anon) 1207 * => only one of obj or anon can be non-null 1208 * => caller must activate/deactivate page if it is not wired. 1209 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 1210 * => policy decision: it is more important to pull a page off of the 1211 * appropriate priority free list than it is to get a zero'd or 1212 * unknown contents page. This is because we live with the 1213 * consequences of a bad free list decision for the entire 1214 * lifetime of the page, e.g. if the page comes from memory that 1215 * is slower to access. 1216 */ 1217 1218 struct vm_page * 1219 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, 1220 int flags, int strat, int free_list) 1221 { 1222 int lcv, try1, try2, zeroit = 0, color; 1223 struct uvm_cpu *ucpu; 1224 struct vm_page *pg; 1225 lwp_t *l; 1226 1227 KASSERT(obj == NULL || anon == NULL); 1228 KASSERT(anon == NULL || (flags & UVM_FLAG_COLORMATCH) || off == 0); 1229 KASSERT(off == trunc_page(off)); 1230 KASSERT(obj == NULL || mutex_owned(obj->vmobjlock)); 1231 KASSERT(anon == NULL || anon->an_lock == NULL || 1232 mutex_owned(anon->an_lock)); 1233 1234 mutex_spin_enter(&uvm_fpageqlock); 1235 1236 /* 1237 * This implements a global round-robin page coloring 1238 * algorithm. 1239 */ 1240 1241 ucpu = curcpu()->ci_data.cpu_uvm; 1242 if (flags & UVM_FLAG_COLORMATCH) { 1243 color = atop(off) & uvmexp.colormask; 1244 } else { 1245 color = ucpu->page_free_nextcolor; 1246 } 1247 1248 /* 1249 * check to see if we need to generate some free pages waking 1250 * the pagedaemon. 1251 */ 1252 1253 uvm_kick_pdaemon(); 1254 1255 /* 1256 * fail if any of these conditions is true: 1257 * [1] there really are no free pages, or 1258 * [2] only kernel "reserved" pages remain and 1259 * reserved pages have not been requested. 1260 * [3] only pagedaemon "reserved" pages remain and 1261 * the requestor isn't the pagedaemon. 1262 * we make kernel reserve pages available if called by a 1263 * kernel thread or a realtime thread. 1264 */ 1265 l = curlwp; 1266 if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) { 1267 flags |= UVM_PGA_USERESERVE; 1268 } 1269 if ((uvmexp.free <= uvmexp.reserve_kernel && 1270 (flags & UVM_PGA_USERESERVE) == 0) || 1271 (uvmexp.free <= uvmexp.reserve_pagedaemon && 1272 curlwp != uvm.pagedaemon_lwp)) 1273 goto fail; 1274 1275 #if PGFL_NQUEUES != 2 1276 #error uvm_pagealloc_strat needs to be updated 1277 #endif 1278 1279 /* 1280 * If we want a zero'd page, try the ZEROS queue first, otherwise 1281 * we try the UNKNOWN queue first. 1282 */ 1283 if (flags & UVM_PGA_ZERO) { 1284 try1 = PGFL_ZEROS; 1285 try2 = PGFL_UNKNOWN; 1286 } else { 1287 try1 = PGFL_UNKNOWN; 1288 try2 = PGFL_ZEROS; 1289 } 1290 1291 again: 1292 switch (strat) { 1293 case UVM_PGA_STRAT_NORMAL: 1294 /* Check freelists: descending priority (ascending id) order */ 1295 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1296 pg = uvm_pagealloc_pgfl(ucpu, lcv, 1297 try1, try2, &color); 1298 if (pg != NULL) 1299 goto gotit; 1300 } 1301 1302 /* No pages free! */ 1303 goto fail; 1304 1305 case UVM_PGA_STRAT_ONLY: 1306 case UVM_PGA_STRAT_FALLBACK: 1307 /* Attempt to allocate from the specified free list. */ 1308 KASSERT(free_list >= 0 && free_list < VM_NFREELIST); 1309 pg = uvm_pagealloc_pgfl(ucpu, free_list, 1310 try1, try2, &color); 1311 if (pg != NULL) 1312 goto gotit; 1313 1314 /* Fall back, if possible. */ 1315 if (strat == UVM_PGA_STRAT_FALLBACK) { 1316 strat = UVM_PGA_STRAT_NORMAL; 1317 goto again; 1318 } 1319 1320 /* No pages free! */ 1321 goto fail; 1322 1323 default: 1324 panic("uvm_pagealloc_strat: bad strat %d", strat); 1325 /* NOTREACHED */ 1326 } 1327 1328 gotit: 1329 /* 1330 * We now know which color we actually allocated from; set 1331 * the next color accordingly. 1332 */ 1333 1334 ucpu->page_free_nextcolor = (color + 1) & uvmexp.colormask; 1335 1336 /* 1337 * update allocation statistics and remember if we have to 1338 * zero the page 1339 */ 1340 1341 if (flags & UVM_PGA_ZERO) { 1342 if (pg->flags & PG_ZERO) { 1343 uvmexp.pga_zerohit++; 1344 zeroit = 0; 1345 } else { 1346 uvmexp.pga_zeromiss++; 1347 zeroit = 1; 1348 } 1349 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) { 1350 ucpu->page_idle_zero = vm_page_zero_enable; 1351 } 1352 } 1353 KASSERT(pg->pqflags == PQ_FREE); 1354 1355 pg->offset = off; 1356 pg->uobject = obj; 1357 pg->uanon = anon; 1358 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE; 1359 if (anon) { 1360 anon->an_page = pg; 1361 pg->pqflags = PQ_ANON; 1362 atomic_inc_uint(&uvmexp.anonpages); 1363 } else { 1364 if (obj) { 1365 uvm_pageinsert(obj, pg); 1366 } 1367 pg->pqflags = 0; 1368 } 1369 mutex_spin_exit(&uvm_fpageqlock); 1370 1371 #if defined(UVM_PAGE_TRKOWN) 1372 pg->owner_tag = NULL; 1373 #endif 1374 UVM_PAGE_OWN(pg, "new alloc"); 1375 1376 if (flags & UVM_PGA_ZERO) { 1377 /* 1378 * A zero'd page is not clean. If we got a page not already 1379 * zero'd, then we have to zero it ourselves. 1380 */ 1381 pg->flags &= ~PG_CLEAN; 1382 if (zeroit) 1383 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1384 } 1385 1386 return(pg); 1387 1388 fail: 1389 mutex_spin_exit(&uvm_fpageqlock); 1390 return (NULL); 1391 } 1392 1393 /* 1394 * uvm_pagereplace: replace a page with another 1395 * 1396 * => object must be locked 1397 */ 1398 1399 void 1400 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg) 1401 { 1402 struct uvm_object *uobj = oldpg->uobject; 1403 1404 KASSERT((oldpg->flags & PG_TABLED) != 0); 1405 KASSERT(uobj != NULL); 1406 KASSERT((newpg->flags & PG_TABLED) == 0); 1407 KASSERT(newpg->uobject == NULL); 1408 KASSERT(mutex_owned(uobj->vmobjlock)); 1409 1410 newpg->uobject = uobj; 1411 newpg->offset = oldpg->offset; 1412 1413 uvm_pageremove_tree(uobj, oldpg); 1414 uvm_pageinsert_tree(uobj, newpg); 1415 uvm_pageinsert_list(uobj, newpg, oldpg); 1416 uvm_pageremove_list(uobj, oldpg); 1417 } 1418 1419 /* 1420 * uvm_pagerealloc: reallocate a page from one object to another 1421 * 1422 * => both objects must be locked 1423 */ 1424 1425 void 1426 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) 1427 { 1428 /* 1429 * remove it from the old object 1430 */ 1431 1432 if (pg->uobject) { 1433 uvm_pageremove(pg->uobject, pg); 1434 } 1435 1436 /* 1437 * put it in the new object 1438 */ 1439 1440 if (newobj) { 1441 pg->uobject = newobj; 1442 pg->offset = newoff; 1443 uvm_pageinsert(newobj, pg); 1444 } 1445 } 1446 1447 #ifdef DEBUG 1448 /* 1449 * check if page is zero-filled 1450 * 1451 * - called with free page queue lock held. 1452 */ 1453 void 1454 uvm_pagezerocheck(struct vm_page *pg) 1455 { 1456 int *p, *ep; 1457 1458 KASSERT(uvm_zerocheckkva != 0); 1459 KASSERT(mutex_owned(&uvm_fpageqlock)); 1460 1461 /* 1462 * XXX assuming pmap_kenter_pa and pmap_kremove never call 1463 * uvm page allocator. 1464 * 1465 * it might be better to have "CPU-local temporary map" pmap interface. 1466 */ 1467 pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ, 0); 1468 p = (int *)uvm_zerocheckkva; 1469 ep = (int *)((char *)p + PAGE_SIZE); 1470 pmap_update(pmap_kernel()); 1471 while (p < ep) { 1472 if (*p != 0) 1473 panic("PG_ZERO page isn't zero-filled"); 1474 p++; 1475 } 1476 pmap_kremove(uvm_zerocheckkva, PAGE_SIZE); 1477 /* 1478 * pmap_update() is not necessary here because no one except us 1479 * uses this VA. 1480 */ 1481 } 1482 #endif /* DEBUG */ 1483 1484 /* 1485 * uvm_pagefree: free page 1486 * 1487 * => erase page's identity (i.e. remove from object) 1488 * => put page on free list 1489 * => caller must lock owning object (either anon or uvm_object) 1490 * => caller must lock page queues 1491 * => assumes all valid mappings of pg are gone 1492 */ 1493 1494 void 1495 uvm_pagefree(struct vm_page *pg) 1496 { 1497 struct pgflist *pgfl; 1498 struct uvm_cpu *ucpu; 1499 int index, color, queue; 1500 bool iszero; 1501 1502 #ifdef DEBUG 1503 if (pg->uobject == (void *)0xdeadbeef && 1504 pg->uanon == (void *)0xdeadbeef) { 1505 panic("uvm_pagefree: freeing free page %p", pg); 1506 } 1507 #endif /* DEBUG */ 1508 1509 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1510 KASSERT(!(pg->pqflags & PQ_FREE)); 1511 //KASSERT(mutex_owned(&uvm_pageqlock) || !uvmpdpol_pageisqueued_p(pg)); 1512 KASSERT(pg->uobject == NULL || mutex_owned(pg->uobject->vmobjlock)); 1513 KASSERT(pg->uobject != NULL || pg->uanon == NULL || 1514 mutex_owned(pg->uanon->an_lock)); 1515 1516 /* 1517 * if the page is loaned, resolve the loan instead of freeing. 1518 */ 1519 1520 if (pg->loan_count) { 1521 KASSERT(pg->wire_count == 0); 1522 1523 /* 1524 * if the page is owned by an anon then we just want to 1525 * drop anon ownership. the kernel will free the page when 1526 * it is done with it. if the page is owned by an object, 1527 * remove it from the object and mark it dirty for the benefit 1528 * of possible anon owners. 1529 * 1530 * regardless of previous ownership, wakeup any waiters, 1531 * unbusy the page, and we're done. 1532 */ 1533 1534 if (pg->uobject != NULL) { 1535 uvm_pageremove(pg->uobject, pg); 1536 pg->flags &= ~PG_CLEAN; 1537 } else if (pg->uanon != NULL) { 1538 if ((pg->pqflags & PQ_ANON) == 0) { 1539 pg->loan_count--; 1540 } else { 1541 pg->pqflags &= ~PQ_ANON; 1542 atomic_dec_uint(&uvmexp.anonpages); 1543 } 1544 pg->uanon->an_page = NULL; 1545 pg->uanon = NULL; 1546 } 1547 if (pg->flags & PG_WANTED) { 1548 wakeup(pg); 1549 } 1550 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1); 1551 #ifdef UVM_PAGE_TRKOWN 1552 pg->owner_tag = NULL; 1553 #endif 1554 if (pg->loan_count) { 1555 KASSERT(pg->uobject == NULL); 1556 if (pg->uanon == NULL) { 1557 KASSERT(mutex_owned(&uvm_pageqlock)); 1558 uvm_pagedequeue(pg); 1559 } 1560 return; 1561 } 1562 } 1563 1564 /* 1565 * remove page from its object or anon. 1566 */ 1567 1568 if (pg->uobject != NULL) { 1569 uvm_pageremove(pg->uobject, pg); 1570 } else if (pg->uanon != NULL) { 1571 pg->uanon->an_page = NULL; 1572 atomic_dec_uint(&uvmexp.anonpages); 1573 } 1574 1575 /* 1576 * now remove the page from the queues. 1577 */ 1578 if (uvmpdpol_pageisqueued_p(pg)) { 1579 KASSERT(mutex_owned(&uvm_pageqlock)); 1580 uvm_pagedequeue(pg); 1581 } 1582 1583 /* 1584 * if the page was wired, unwire it now. 1585 */ 1586 1587 if (pg->wire_count) { 1588 pg->wire_count = 0; 1589 uvmexp.wired--; 1590 } 1591 1592 /* 1593 * and put on free queue 1594 */ 1595 1596 iszero = (pg->flags & PG_ZERO); 1597 index = uvm_page_lookup_freelist(pg); 1598 color = VM_PGCOLOR_BUCKET(pg); 1599 queue = (iszero ? PGFL_ZEROS : PGFL_UNKNOWN); 1600 1601 #ifdef DEBUG 1602 pg->uobject = (void *)0xdeadbeef; 1603 pg->uanon = (void *)0xdeadbeef; 1604 #endif 1605 1606 mutex_spin_enter(&uvm_fpageqlock); 1607 pg->pqflags = PQ_FREE; 1608 1609 #ifdef DEBUG 1610 if (iszero) 1611 uvm_pagezerocheck(pg); 1612 #endif /* DEBUG */ 1613 1614 1615 /* global list */ 1616 pgfl = &uvm.page_free[index].pgfl_buckets[color].pgfl_queues[queue]; 1617 LIST_INSERT_HEAD(pgfl, pg, pageq.list); 1618 uvmexp.free++; 1619 if (iszero) { 1620 uvmexp.zeropages++; 1621 } 1622 1623 /* per-cpu list */ 1624 ucpu = curcpu()->ci_data.cpu_uvm; 1625 pg->offset = (uintptr_t)ucpu; 1626 pgfl = &ucpu->page_free[index].pgfl_buckets[color].pgfl_queues[queue]; 1627 LIST_INSERT_HEAD(pgfl, pg, listq.list); 1628 ucpu->pages[queue]++; 1629 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) { 1630 ucpu->page_idle_zero = vm_page_zero_enable; 1631 } 1632 1633 mutex_spin_exit(&uvm_fpageqlock); 1634 } 1635 1636 /* 1637 * uvm_page_unbusy: unbusy an array of pages. 1638 * 1639 * => pages must either all belong to the same object, or all belong to anons. 1640 * => if pages are object-owned, object must be locked. 1641 * => if pages are anon-owned, anons must be locked. 1642 * => caller must lock page queues if pages may be released. 1643 * => caller must make sure that anon-owned pages are not PG_RELEASED. 1644 */ 1645 1646 void 1647 uvm_page_unbusy(struct vm_page **pgs, int npgs) 1648 { 1649 struct vm_page *pg; 1650 int i; 1651 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist); 1652 1653 for (i = 0; i < npgs; i++) { 1654 pg = pgs[i]; 1655 if (pg == NULL || pg == PGO_DONTCARE) { 1656 continue; 1657 } 1658 1659 KASSERT(uvm_page_locked_p(pg)); 1660 KASSERT(pg->flags & PG_BUSY); 1661 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1662 if (pg->flags & PG_WANTED) { 1663 wakeup(pg); 1664 } 1665 if (pg->flags & PG_RELEASED) { 1666 UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0); 1667 KASSERT(pg->uobject != NULL || 1668 (pg->uanon != NULL && pg->uanon->an_ref > 0)); 1669 pg->flags &= ~PG_RELEASED; 1670 uvm_pagefree(pg); 1671 } else { 1672 UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0); 1673 KASSERT((pg->flags & PG_FAKE) == 0); 1674 pg->flags &= ~(PG_WANTED|PG_BUSY); 1675 UVM_PAGE_OWN(pg, NULL); 1676 } 1677 } 1678 } 1679 1680 #if defined(UVM_PAGE_TRKOWN) 1681 /* 1682 * uvm_page_own: set or release page ownership 1683 * 1684 * => this is a debugging function that keeps track of who sets PG_BUSY 1685 * and where they do it. it can be used to track down problems 1686 * such a process setting "PG_BUSY" and never releasing it. 1687 * => page's object [if any] must be locked 1688 * => if "tag" is NULL then we are releasing page ownership 1689 */ 1690 void 1691 uvm_page_own(struct vm_page *pg, const char *tag) 1692 { 1693 1694 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0); 1695 KASSERT((pg->flags & PG_WANTED) == 0); 1696 KASSERT(uvm_page_locked_p(pg)); 1697 1698 /* gain ownership? */ 1699 if (tag) { 1700 KASSERT((pg->flags & PG_BUSY) != 0); 1701 if (pg->owner_tag) { 1702 printf("uvm_page_own: page %p already owned " 1703 "by proc %d [%s]\n", pg, 1704 pg->owner, pg->owner_tag); 1705 panic("uvm_page_own"); 1706 } 1707 pg->owner = curproc->p_pid; 1708 pg->lowner = curlwp->l_lid; 1709 pg->owner_tag = tag; 1710 return; 1711 } 1712 1713 /* drop ownership */ 1714 KASSERT((pg->flags & PG_BUSY) == 0); 1715 if (pg->owner_tag == NULL) { 1716 printf("uvm_page_own: dropping ownership of an non-owned " 1717 "page (%p)\n", pg); 1718 panic("uvm_page_own"); 1719 } 1720 if (!uvmpdpol_pageisqueued_p(pg)) { 1721 KASSERT((pg->uanon == NULL && pg->uobject == NULL) || 1722 pg->wire_count > 0); 1723 } else { 1724 KASSERT(pg->wire_count == 0); 1725 } 1726 pg->owner_tag = NULL; 1727 } 1728 #endif 1729 1730 /* 1731 * uvm_pageidlezero: zero free pages while the system is idle. 1732 * 1733 * => try to complete one color bucket at a time, to reduce our impact 1734 * on the CPU cache. 1735 * => we loop until we either reach the target or there is a lwp ready 1736 * to run, or MD code detects a reason to break early. 1737 */ 1738 void 1739 uvm_pageidlezero(void) 1740 { 1741 struct vm_page *pg; 1742 struct pgfreelist *pgfl, *gpgfl; 1743 struct uvm_cpu *ucpu; 1744 int free_list, firstbucket, nextbucket; 1745 bool lcont = false; 1746 1747 ucpu = curcpu()->ci_data.cpu_uvm; 1748 if (!ucpu->page_idle_zero || 1749 ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) { 1750 ucpu->page_idle_zero = false; 1751 return; 1752 } 1753 if (!mutex_tryenter(&uvm_fpageqlock)) { 1754 /* Contention: let other CPUs to use the lock. */ 1755 return; 1756 } 1757 firstbucket = ucpu->page_free_nextcolor; 1758 nextbucket = firstbucket; 1759 do { 1760 for (free_list = 0; free_list < VM_NFREELIST; free_list++) { 1761 if (sched_curcpu_runnable_p()) { 1762 goto quit; 1763 } 1764 pgfl = &ucpu->page_free[free_list]; 1765 gpgfl = &uvm.page_free[free_list]; 1766 while ((pg = LIST_FIRST(&pgfl->pgfl_buckets[ 1767 nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) { 1768 if (lcont || sched_curcpu_runnable_p()) { 1769 goto quit; 1770 } 1771 LIST_REMOVE(pg, pageq.list); /* global list */ 1772 LIST_REMOVE(pg, listq.list); /* per-cpu list */ 1773 ucpu->pages[PGFL_UNKNOWN]--; 1774 uvmexp.free--; 1775 KASSERT(pg->pqflags == PQ_FREE); 1776 pg->pqflags = 0; 1777 mutex_spin_exit(&uvm_fpageqlock); 1778 #ifdef PMAP_PAGEIDLEZERO 1779 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) { 1780 1781 /* 1782 * The machine-dependent code detected 1783 * some reason for us to abort zeroing 1784 * pages, probably because there is a 1785 * process now ready to run. 1786 */ 1787 1788 mutex_spin_enter(&uvm_fpageqlock); 1789 pg->pqflags = PQ_FREE; 1790 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[ 1791 nextbucket].pgfl_queues[ 1792 PGFL_UNKNOWN], pg, pageq.list); 1793 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[ 1794 nextbucket].pgfl_queues[ 1795 PGFL_UNKNOWN], pg, listq.list); 1796 ucpu->pages[PGFL_UNKNOWN]++; 1797 uvmexp.free++; 1798 uvmexp.zeroaborts++; 1799 goto quit; 1800 } 1801 #else 1802 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1803 #endif /* PMAP_PAGEIDLEZERO */ 1804 pg->flags |= PG_ZERO; 1805 1806 if (!mutex_tryenter(&uvm_fpageqlock)) { 1807 lcont = true; 1808 mutex_spin_enter(&uvm_fpageqlock); 1809 } else { 1810 lcont = false; 1811 } 1812 pg->pqflags = PQ_FREE; 1813 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[ 1814 nextbucket].pgfl_queues[PGFL_ZEROS], 1815 pg, pageq.list); 1816 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[ 1817 nextbucket].pgfl_queues[PGFL_ZEROS], 1818 pg, listq.list); 1819 ucpu->pages[PGFL_ZEROS]++; 1820 uvmexp.free++; 1821 uvmexp.zeropages++; 1822 } 1823 } 1824 if (ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) { 1825 break; 1826 } 1827 nextbucket = (nextbucket + 1) & uvmexp.colormask; 1828 } while (nextbucket != firstbucket); 1829 ucpu->page_idle_zero = false; 1830 quit: 1831 mutex_spin_exit(&uvm_fpageqlock); 1832 } 1833 1834 /* 1835 * uvm_pagelookup: look up a page 1836 * 1837 * => caller should lock object to keep someone from pulling the page 1838 * out from under it 1839 */ 1840 1841 struct vm_page * 1842 uvm_pagelookup(struct uvm_object *obj, voff_t off) 1843 { 1844 struct vm_page *pg; 1845 1846 KASSERT(mutex_owned(obj->vmobjlock)); 1847 1848 pg = rb_tree_find_node(&obj->rb_tree, &off); 1849 1850 KASSERT(pg == NULL || obj->uo_npages != 0); 1851 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1852 (pg->flags & PG_BUSY) != 0); 1853 return pg; 1854 } 1855 1856 /* 1857 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp 1858 * 1859 * => caller must lock page queues 1860 */ 1861 1862 void 1863 uvm_pagewire(struct vm_page *pg) 1864 { 1865 KASSERT(mutex_owned(&uvm_pageqlock)); 1866 #if defined(READAHEAD_STATS) 1867 if ((pg->pqflags & PQ_READAHEAD) != 0) { 1868 uvm_ra_hit.ev_count++; 1869 pg->pqflags &= ~PQ_READAHEAD; 1870 } 1871 #endif /* defined(READAHEAD_STATS) */ 1872 if (pg->wire_count == 0) { 1873 uvm_pagedequeue(pg); 1874 uvmexp.wired++; 1875 } 1876 pg->wire_count++; 1877 } 1878 1879 /* 1880 * uvm_pageunwire: unwire the page. 1881 * 1882 * => activate if wire count goes to zero. 1883 * => caller must lock page queues 1884 */ 1885 1886 void 1887 uvm_pageunwire(struct vm_page *pg) 1888 { 1889 KASSERT(mutex_owned(&uvm_pageqlock)); 1890 pg->wire_count--; 1891 if (pg->wire_count == 0) { 1892 uvm_pageactivate(pg); 1893 uvmexp.wired--; 1894 } 1895 } 1896 1897 /* 1898 * uvm_pagedeactivate: deactivate page 1899 * 1900 * => caller must lock page queues 1901 * => caller must check to make sure page is not wired 1902 * => object that page belongs to must be locked (so we can adjust pg->flags) 1903 * => caller must clear the reference on the page before calling 1904 */ 1905 1906 void 1907 uvm_pagedeactivate(struct vm_page *pg) 1908 { 1909 1910 KASSERT(mutex_owned(&uvm_pageqlock)); 1911 KASSERT(uvm_page_locked_p(pg)); 1912 KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg)); 1913 uvmpdpol_pagedeactivate(pg); 1914 } 1915 1916 /* 1917 * uvm_pageactivate: activate page 1918 * 1919 * => caller must lock page queues 1920 */ 1921 1922 void 1923 uvm_pageactivate(struct vm_page *pg) 1924 { 1925 1926 KASSERT(mutex_owned(&uvm_pageqlock)); 1927 KASSERT(uvm_page_locked_p(pg)); 1928 #if defined(READAHEAD_STATS) 1929 if ((pg->pqflags & PQ_READAHEAD) != 0) { 1930 uvm_ra_hit.ev_count++; 1931 pg->pqflags &= ~PQ_READAHEAD; 1932 } 1933 #endif /* defined(READAHEAD_STATS) */ 1934 if (pg->wire_count != 0) { 1935 return; 1936 } 1937 uvmpdpol_pageactivate(pg); 1938 } 1939 1940 /* 1941 * uvm_pagedequeue: remove a page from any paging queue 1942 */ 1943 1944 void 1945 uvm_pagedequeue(struct vm_page *pg) 1946 { 1947 1948 if (uvmpdpol_pageisqueued_p(pg)) { 1949 KASSERT(mutex_owned(&uvm_pageqlock)); 1950 } 1951 1952 uvmpdpol_pagedequeue(pg); 1953 } 1954 1955 /* 1956 * uvm_pageenqueue: add a page to a paging queue without activating. 1957 * used where a page is not really demanded (yet). eg. read-ahead 1958 */ 1959 1960 void 1961 uvm_pageenqueue(struct vm_page *pg) 1962 { 1963 1964 KASSERT(mutex_owned(&uvm_pageqlock)); 1965 if (pg->wire_count != 0) { 1966 return; 1967 } 1968 uvmpdpol_pageenqueue(pg); 1969 } 1970 1971 /* 1972 * uvm_pagezero: zero fill a page 1973 * 1974 * => if page is part of an object then the object should be locked 1975 * to protect pg->flags. 1976 */ 1977 1978 void 1979 uvm_pagezero(struct vm_page *pg) 1980 { 1981 pg->flags &= ~PG_CLEAN; 1982 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1983 } 1984 1985 /* 1986 * uvm_pagecopy: copy a page 1987 * 1988 * => if page is part of an object then the object should be locked 1989 * to protect pg->flags. 1990 */ 1991 1992 void 1993 uvm_pagecopy(struct vm_page *src, struct vm_page *dst) 1994 { 1995 1996 dst->flags &= ~PG_CLEAN; 1997 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 1998 } 1999 2000 /* 2001 * uvm_pageismanaged: test it see that a page (specified by PA) is managed. 2002 */ 2003 2004 bool 2005 uvm_pageismanaged(paddr_t pa) 2006 { 2007 2008 return (vm_physseg_find(atop(pa), NULL) != -1); 2009 } 2010 2011 /* 2012 * uvm_page_lookup_freelist: look up the free list for the specified page 2013 */ 2014 2015 int 2016 uvm_page_lookup_freelist(struct vm_page *pg) 2017 { 2018 int lcv; 2019 2020 lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL); 2021 KASSERT(lcv != -1); 2022 return (VM_PHYSMEM_PTR(lcv)->free_list); 2023 } 2024 2025 /* 2026 * uvm_page_locked_p: return true if object associated with page is 2027 * locked. this is a weak check for runtime assertions only. 2028 */ 2029 2030 bool 2031 uvm_page_locked_p(struct vm_page *pg) 2032 { 2033 2034 if (pg->uobject != NULL) { 2035 return mutex_owned(pg->uobject->vmobjlock); 2036 } 2037 if (pg->uanon != NULL) { 2038 return mutex_owned(pg->uanon->an_lock); 2039 } 2040 return true; 2041 } 2042 2043 #if defined(DDB) || defined(DEBUGPRINT) 2044 2045 /* 2046 * uvm_page_printit: actually print the page 2047 */ 2048 2049 static const char page_flagbits[] = UVM_PGFLAGBITS; 2050 static const char page_pqflagbits[] = UVM_PQFLAGBITS; 2051 2052 void 2053 uvm_page_printit(struct vm_page *pg, bool full, 2054 void (*pr)(const char *, ...)) 2055 { 2056 struct vm_page *tpg; 2057 struct uvm_object *uobj; 2058 struct pgflist *pgl; 2059 char pgbuf[128]; 2060 char pqbuf[128]; 2061 2062 (*pr)("PAGE %p:\n", pg); 2063 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); 2064 snprintb(pqbuf, sizeof(pqbuf), page_pqflagbits, pg->pqflags); 2065 (*pr)(" flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n", 2066 pgbuf, pqbuf, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg)); 2067 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", 2068 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); 2069 #if defined(UVM_PAGE_TRKOWN) 2070 if (pg->flags & PG_BUSY) 2071 (*pr)(" owning process = %d, tag=%s\n", 2072 pg->owner, pg->owner_tag); 2073 else 2074 (*pr)(" page not busy, no owner\n"); 2075 #else 2076 (*pr)(" [page ownership tracking disabled]\n"); 2077 #endif 2078 2079 if (!full) 2080 return; 2081 2082 /* cross-verify object/anon */ 2083 if ((pg->pqflags & PQ_FREE) == 0) { 2084 if (pg->pqflags & PQ_ANON) { 2085 if (pg->uanon == NULL || pg->uanon->an_page != pg) 2086 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2087 (pg->uanon) ? pg->uanon->an_page : NULL); 2088 else 2089 (*pr)(" anon backpointer is OK\n"); 2090 } else { 2091 uobj = pg->uobject; 2092 if (uobj) { 2093 (*pr)(" checking object list\n"); 2094 TAILQ_FOREACH(tpg, &uobj->memq, listq.queue) { 2095 if (tpg == pg) { 2096 break; 2097 } 2098 } 2099 if (tpg) 2100 (*pr)(" page found on object list\n"); 2101 else 2102 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 2103 } 2104 } 2105 } 2106 2107 /* cross-verify page queue */ 2108 if (pg->pqflags & PQ_FREE) { 2109 int fl = uvm_page_lookup_freelist(pg); 2110 int color = VM_PGCOLOR_BUCKET(pg); 2111 pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[ 2112 ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN]; 2113 } else { 2114 pgl = NULL; 2115 } 2116 2117 if (pgl) { 2118 (*pr)(" checking pageq list\n"); 2119 LIST_FOREACH(tpg, pgl, pageq.list) { 2120 if (tpg == pg) { 2121 break; 2122 } 2123 } 2124 if (tpg) 2125 (*pr)(" page found on pageq list\n"); 2126 else 2127 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 2128 } 2129 } 2130 2131 /* 2132 * uvm_pages_printthem - print a summary of all managed pages 2133 */ 2134 2135 void 2136 uvm_page_printall(void (*pr)(const char *, ...)) 2137 { 2138 unsigned i; 2139 struct vm_page *pg; 2140 2141 (*pr)("%18s %4s %4s %18s %18s" 2142 #ifdef UVM_PAGE_TRKOWN 2143 " OWNER" 2144 #endif 2145 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 2146 for (i = 0; i < vm_nphysmem; i++) { 2147 for (pg = VM_PHYSMEM_PTR(i)->pgs; pg < VM_PHYSMEM_PTR(i)->lastpg; pg++) { 2148 (*pr)("%18p %04x %04x %18p %18p", 2149 pg, pg->flags, pg->pqflags, pg->uobject, 2150 pg->uanon); 2151 #ifdef UVM_PAGE_TRKOWN 2152 if (pg->flags & PG_BUSY) 2153 (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 2154 #endif 2155 (*pr)("\n"); 2156 } 2157 } 2158 } 2159 2160 #endif /* DDB || DEBUGPRINT */ 2161