1 /* $NetBSD: uvm_page.c,v 1.141 2008/12/13 11:34:43 ad Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Charles D. Cranor, 23 * Washington University, the University of California, Berkeley and 24 * its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 42 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 43 * 44 * 45 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 46 * All rights reserved. 47 * 48 * Permission to use, copy, modify and distribute this software and 49 * its documentation is hereby granted, provided that both the copyright 50 * notice and this permission notice appear in all copies of the 51 * software, derivative works or modified versions, and any portions 52 * thereof, and that both notices appear in supporting documentation. 53 * 54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 57 * 58 * Carnegie Mellon requests users of this software to return to 59 * 60 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 61 * School of Computer Science 62 * Carnegie Mellon University 63 * Pittsburgh PA 15213-3890 64 * 65 * any improvements or extensions that they make and grant Carnegie the 66 * rights to redistribute these changes. 67 */ 68 69 /* 70 * uvm_page.c: page ops. 71 */ 72 73 #include <sys/cdefs.h> 74 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.141 2008/12/13 11:34:43 ad Exp $"); 75 76 #include "opt_uvmhist.h" 77 #include "opt_readahead.h" 78 79 #include <sys/param.h> 80 #include <sys/systm.h> 81 #include <sys/malloc.h> 82 #include <sys/sched.h> 83 #include <sys/kernel.h> 84 #include <sys/vnode.h> 85 #include <sys/proc.h> 86 #include <sys/atomic.h> 87 #include <sys/cpu.h> 88 89 #include <uvm/uvm.h> 90 #include <uvm/uvm_pdpolicy.h> 91 92 /* 93 * global vars... XXXCDC: move to uvm. structure. 94 */ 95 96 /* 97 * physical memory config is stored in vm_physmem. 98 */ 99 100 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */ 101 int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */ 102 103 /* 104 * Some supported CPUs in a given architecture don't support all 105 * of the things necessary to do idle page zero'ing efficiently. 106 * We therefore provide a way to disable it from machdep code here. 107 */ 108 /* 109 * XXX disabled until we can find a way to do this without causing 110 * problems for either CPU caches or DMA latency. 111 */ 112 bool vm_page_zero_enable = false; 113 114 /* 115 * number of pages per-CPU to reserve for the kernel. 116 */ 117 int vm_page_reserve_kernel = 5; 118 119 /* 120 * local variables 121 */ 122 123 /* 124 * these variables record the values returned by vm_page_bootstrap, 125 * for debugging purposes. The implementation of uvm_pageboot_alloc 126 * and pmap_startup here also uses them internally. 127 */ 128 129 static vaddr_t virtual_space_start; 130 static vaddr_t virtual_space_end; 131 132 /* 133 * we allocate an initial number of page colors in uvm_page_init(), 134 * and remember them. We may re-color pages as cache sizes are 135 * discovered during the autoconfiguration phase. But we can never 136 * free the initial set of buckets, since they are allocated using 137 * uvm_pageboot_alloc(). 138 */ 139 140 static bool have_recolored_pages /* = false */; 141 142 MALLOC_DEFINE(M_VMPAGE, "VM page", "VM page"); 143 144 #ifdef DEBUG 145 vaddr_t uvm_zerocheckkva; 146 #endif /* DEBUG */ 147 148 /* 149 * local prototypes 150 */ 151 152 static void uvm_pageinsert(struct vm_page *); 153 static void uvm_pageremove(struct vm_page *); 154 155 /* 156 * per-object tree of pages 157 */ 158 159 static signed int 160 uvm_page_compare_nodes(const struct rb_node *n1, const struct rb_node *n2) 161 { 162 const struct vm_page *pg1 = (const void *)n1; 163 const struct vm_page *pg2 = (const void *)n2; 164 const voff_t a = pg1->offset; 165 const voff_t b = pg2->offset; 166 167 if (a < b) 168 return 1; 169 if (a > b) 170 return -1; 171 return 0; 172 } 173 174 static signed int 175 uvm_page_compare_key(const struct rb_node *n, const void *key) 176 { 177 const struct vm_page *pg = (const void *)n; 178 const voff_t a = pg->offset; 179 const voff_t b = *(const voff_t *)key; 180 181 if (a < b) 182 return 1; 183 if (a > b) 184 return -1; 185 return 0; 186 } 187 188 const struct rb_tree_ops uvm_page_tree_ops = { 189 .rbto_compare_nodes = uvm_page_compare_nodes, 190 .rbto_compare_key = uvm_page_compare_key, 191 }; 192 193 /* 194 * inline functions 195 */ 196 197 /* 198 * uvm_pageinsert: insert a page in the object. 199 * 200 * => caller must lock object 201 * => caller must lock page queues 202 * => call should have already set pg's object and offset pointers 203 * and bumped the version counter 204 */ 205 206 static inline void 207 uvm_pageinsert_list(struct uvm_object *uobj, struct vm_page *pg, 208 struct vm_page *where) 209 { 210 211 KASSERT(uobj == pg->uobject); 212 KASSERT(mutex_owned(&uobj->vmobjlock)); 213 KASSERT((pg->flags & PG_TABLED) == 0); 214 KASSERT(where == NULL || (where->flags & PG_TABLED)); 215 KASSERT(where == NULL || (where->uobject == uobj)); 216 217 if (UVM_OBJ_IS_VNODE(uobj)) { 218 if (uobj->uo_npages == 0) { 219 struct vnode *vp = (struct vnode *)uobj; 220 221 vholdl(vp); 222 } 223 if (UVM_OBJ_IS_VTEXT(uobj)) { 224 atomic_inc_uint(&uvmexp.execpages); 225 } else { 226 atomic_inc_uint(&uvmexp.filepages); 227 } 228 } else if (UVM_OBJ_IS_AOBJ(uobj)) { 229 atomic_inc_uint(&uvmexp.anonpages); 230 } 231 232 if (where) 233 TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq.queue); 234 else 235 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); 236 pg->flags |= PG_TABLED; 237 uobj->uo_npages++; 238 } 239 240 241 static inline void 242 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg) 243 { 244 bool success; 245 246 KASSERT(uobj == pg->uobject); 247 success = rb_tree_insert_node(&uobj->rb_tree, &pg->rb_node); 248 KASSERT(success); 249 } 250 251 static inline void 252 uvm_pageinsert(struct vm_page *pg) 253 { 254 struct uvm_object *uobj = pg->uobject; 255 256 uvm_pageinsert_tree(uobj, pg); 257 uvm_pageinsert_list(uobj, pg, NULL); 258 } 259 260 /* 261 * uvm_page_remove: remove page from object. 262 * 263 * => caller must lock object 264 * => caller must lock page queues 265 */ 266 267 static inline void 268 uvm_pageremove_list(struct uvm_object *uobj, struct vm_page *pg) 269 { 270 271 KASSERT(uobj == pg->uobject); 272 KASSERT(mutex_owned(&uobj->vmobjlock)); 273 KASSERT(pg->flags & PG_TABLED); 274 275 if (UVM_OBJ_IS_VNODE(uobj)) { 276 if (uobj->uo_npages == 1) { 277 struct vnode *vp = (struct vnode *)uobj; 278 279 holdrelel(vp); 280 } 281 if (UVM_OBJ_IS_VTEXT(uobj)) { 282 atomic_dec_uint(&uvmexp.execpages); 283 } else { 284 atomic_dec_uint(&uvmexp.filepages); 285 } 286 } else if (UVM_OBJ_IS_AOBJ(uobj)) { 287 atomic_dec_uint(&uvmexp.anonpages); 288 } 289 290 /* object should be locked */ 291 uobj->uo_npages--; 292 TAILQ_REMOVE(&uobj->memq, pg, listq.queue); 293 pg->flags &= ~PG_TABLED; 294 pg->uobject = NULL; 295 } 296 297 static inline void 298 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg) 299 { 300 301 KASSERT(uobj == pg->uobject); 302 rb_tree_remove_node(&uobj->rb_tree, &pg->rb_node); 303 } 304 305 static inline void 306 uvm_pageremove(struct vm_page *pg) 307 { 308 struct uvm_object *uobj = pg->uobject; 309 310 uvm_pageremove_tree(uobj, pg); 311 uvm_pageremove_list(uobj, pg); 312 } 313 314 static void 315 uvm_page_init_buckets(struct pgfreelist *pgfl) 316 { 317 int color, i; 318 319 for (color = 0; color < uvmexp.ncolors; color++) { 320 for (i = 0; i < PGFL_NQUEUES; i++) { 321 LIST_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]); 322 } 323 } 324 } 325 326 /* 327 * uvm_page_init: init the page system. called from uvm_init(). 328 * 329 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 330 */ 331 332 void 333 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) 334 { 335 vsize_t freepages, pagecount, bucketcount, n; 336 struct pgflbucket *bucketarray, *cpuarray; 337 struct vm_page *pagearray; 338 int lcv; 339 u_int i; 340 paddr_t paddr; 341 342 KASSERT(ncpu <= 1); 343 CTASSERT(sizeof(pagearray->offset) >= sizeof(struct uvm_cpu *)); 344 345 /* 346 * init the page queues and page queue locks, except the free 347 * list; we allocate that later (with the initial vm_page 348 * structures). 349 */ 350 351 curcpu()->ci_data.cpu_uvm = &uvm.cpus[0]; 352 uvmpdpol_init(); 353 mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE); 354 mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM); 355 356 /* 357 * allocate vm_page structures. 358 */ 359 360 /* 361 * sanity check: 362 * before calling this function the MD code is expected to register 363 * some free RAM with the uvm_page_physload() function. our job 364 * now is to allocate vm_page structures for this memory. 365 */ 366 367 if (vm_nphysseg == 0) 368 panic("uvm_page_bootstrap: no memory pre-allocated"); 369 370 /* 371 * first calculate the number of free pages... 372 * 373 * note that we use start/end rather than avail_start/avail_end. 374 * this allows us to allocate extra vm_page structures in case we 375 * want to return some memory to the pool after booting. 376 */ 377 378 freepages = 0; 379 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 380 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start); 381 382 /* 383 * Let MD code initialize the number of colors, or default 384 * to 1 color if MD code doesn't care. 385 */ 386 if (uvmexp.ncolors == 0) 387 uvmexp.ncolors = 1; 388 uvmexp.colormask = uvmexp.ncolors - 1; 389 390 /* 391 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 392 * use. for each page of memory we use we need a vm_page structure. 393 * thus, the total number of pages we can use is the total size of 394 * the memory divided by the PAGE_SIZE plus the size of the vm_page 395 * structure. we add one to freepages as a fudge factor to avoid 396 * truncation errors (since we can only allocate in terms of whole 397 * pages). 398 */ 399 400 bucketcount = uvmexp.ncolors * VM_NFREELIST; 401 pagecount = ((freepages + 1) << PAGE_SHIFT) / 402 (PAGE_SIZE + sizeof(struct vm_page)); 403 404 bucketarray = (void *)uvm_pageboot_alloc((bucketcount * 405 sizeof(struct pgflbucket) * 2) + (pagecount * 406 sizeof(struct vm_page))); 407 cpuarray = bucketarray + bucketcount; 408 pagearray = (struct vm_page *)(bucketarray + bucketcount * 2); 409 410 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 411 uvm.page_free[lcv].pgfl_buckets = 412 (bucketarray + (lcv * uvmexp.ncolors)); 413 uvm_page_init_buckets(&uvm.page_free[lcv]); 414 uvm.cpus[0].page_free[lcv].pgfl_buckets = 415 (cpuarray + (lcv * uvmexp.ncolors)); 416 uvm_page_init_buckets(&uvm.cpus[0].page_free[lcv]); 417 } 418 memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 419 420 /* 421 * init the vm_page structures and put them in the correct place. 422 */ 423 424 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { 425 n = vm_physmem[lcv].end - vm_physmem[lcv].start; 426 427 /* set up page array pointers */ 428 vm_physmem[lcv].pgs = pagearray; 429 pagearray += n; 430 pagecount -= n; 431 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1); 432 433 /* init and free vm_pages (we've already zeroed them) */ 434 paddr = ptoa(vm_physmem[lcv].start); 435 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) { 436 vm_physmem[lcv].pgs[i].phys_addr = paddr; 437 #ifdef __HAVE_VM_PAGE_MD 438 VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]); 439 #endif 440 if (atop(paddr) >= vm_physmem[lcv].avail_start && 441 atop(paddr) <= vm_physmem[lcv].avail_end) { 442 uvmexp.npages++; 443 /* add page to free pool */ 444 uvm_pagefree(&vm_physmem[lcv].pgs[i]); 445 } 446 } 447 } 448 449 /* 450 * pass up the values of virtual_space_start and 451 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 452 * layers of the VM. 453 */ 454 455 *kvm_startp = round_page(virtual_space_start); 456 *kvm_endp = trunc_page(virtual_space_end); 457 #ifdef DEBUG 458 /* 459 * steal kva for uvm_pagezerocheck(). 460 */ 461 uvm_zerocheckkva = *kvm_startp; 462 *kvm_startp += PAGE_SIZE; 463 #endif /* DEBUG */ 464 465 /* 466 * init various thresholds. 467 */ 468 469 uvmexp.reserve_pagedaemon = 1; 470 uvmexp.reserve_kernel = vm_page_reserve_kernel; 471 472 /* 473 * determine if we should zero pages in the idle loop. 474 */ 475 476 uvm.cpus[0].page_idle_zero = vm_page_zero_enable; 477 478 /* 479 * done! 480 */ 481 482 uvm.page_init_done = true; 483 } 484 485 /* 486 * uvm_setpagesize: set the page size 487 * 488 * => sets page_shift and page_mask from uvmexp.pagesize. 489 */ 490 491 void 492 uvm_setpagesize(void) 493 { 494 495 /* 496 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE 497 * to be a constant (indicated by being a non-zero value). 498 */ 499 if (uvmexp.pagesize == 0) { 500 if (PAGE_SIZE == 0) 501 panic("uvm_setpagesize: uvmexp.pagesize not set"); 502 uvmexp.pagesize = PAGE_SIZE; 503 } 504 uvmexp.pagemask = uvmexp.pagesize - 1; 505 if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 506 panic("uvm_setpagesize: page size not a power of two"); 507 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 508 if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 509 break; 510 } 511 512 /* 513 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 514 */ 515 516 vaddr_t 517 uvm_pageboot_alloc(vsize_t size) 518 { 519 static bool initialized = false; 520 vaddr_t addr; 521 #if !defined(PMAP_STEAL_MEMORY) 522 vaddr_t vaddr; 523 paddr_t paddr; 524 #endif 525 526 /* 527 * on first call to this function, initialize ourselves. 528 */ 529 if (initialized == false) { 530 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 531 532 /* round it the way we like it */ 533 virtual_space_start = round_page(virtual_space_start); 534 virtual_space_end = trunc_page(virtual_space_end); 535 536 initialized = true; 537 } 538 539 /* round to page size */ 540 size = round_page(size); 541 542 #if defined(PMAP_STEAL_MEMORY) 543 544 /* 545 * defer bootstrap allocation to MD code (it may want to allocate 546 * from a direct-mapped segment). pmap_steal_memory should adjust 547 * virtual_space_start/virtual_space_end if necessary. 548 */ 549 550 addr = pmap_steal_memory(size, &virtual_space_start, 551 &virtual_space_end); 552 553 return(addr); 554 555 #else /* !PMAP_STEAL_MEMORY */ 556 557 /* 558 * allocate virtual memory for this request 559 */ 560 if (virtual_space_start == virtual_space_end || 561 (virtual_space_end - virtual_space_start) < size) 562 panic("uvm_pageboot_alloc: out of virtual space"); 563 564 addr = virtual_space_start; 565 566 #ifdef PMAP_GROWKERNEL 567 /* 568 * If the kernel pmap can't map the requested space, 569 * then allocate more resources for it. 570 */ 571 if (uvm_maxkaddr < (addr + size)) { 572 uvm_maxkaddr = pmap_growkernel(addr + size); 573 if (uvm_maxkaddr < (addr + size)) 574 panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 575 } 576 #endif 577 578 virtual_space_start += size; 579 580 /* 581 * allocate and mapin physical pages to back new virtual pages 582 */ 583 584 for (vaddr = round_page(addr) ; vaddr < addr + size ; 585 vaddr += PAGE_SIZE) { 586 587 if (!uvm_page_physget(&paddr)) 588 panic("uvm_pageboot_alloc: out of memory"); 589 590 /* 591 * Note this memory is no longer managed, so using 592 * pmap_kenter is safe. 593 */ 594 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE); 595 } 596 pmap_update(pmap_kernel()); 597 return(addr); 598 #endif /* PMAP_STEAL_MEMORY */ 599 } 600 601 #if !defined(PMAP_STEAL_MEMORY) 602 /* 603 * uvm_page_physget: "steal" one page from the vm_physmem structure. 604 * 605 * => attempt to allocate it off the end of a segment in which the "avail" 606 * values match the start/end values. if we can't do that, then we 607 * will advance both values (making them equal, and removing some 608 * vm_page structures from the non-avail area). 609 * => return false if out of memory. 610 */ 611 612 /* subroutine: try to allocate from memory chunks on the specified freelist */ 613 static bool uvm_page_physget_freelist(paddr_t *, int); 614 615 static bool 616 uvm_page_physget_freelist(paddr_t *paddrp, int freelist) 617 { 618 int lcv, x; 619 620 /* pass 1: try allocating from a matching end */ 621 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 622 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--) 623 #else 624 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 625 #endif 626 { 627 628 if (uvm.page_init_done == true) 629 panic("uvm_page_physget: called _after_ bootstrap"); 630 631 if (vm_physmem[lcv].free_list != freelist) 632 continue; 633 634 /* try from front */ 635 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start && 636 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) { 637 *paddrp = ptoa(vm_physmem[lcv].avail_start); 638 vm_physmem[lcv].avail_start++; 639 vm_physmem[lcv].start++; 640 /* nothing left? nuke it */ 641 if (vm_physmem[lcv].avail_start == 642 vm_physmem[lcv].end) { 643 if (vm_nphysseg == 1) 644 panic("uvm_page_physget: out of memory!"); 645 vm_nphysseg--; 646 for (x = lcv ; x < vm_nphysseg ; x++) 647 /* structure copy */ 648 vm_physmem[x] = vm_physmem[x+1]; 649 } 650 return (true); 651 } 652 653 /* try from rear */ 654 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end && 655 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) { 656 *paddrp = ptoa(vm_physmem[lcv].avail_end - 1); 657 vm_physmem[lcv].avail_end--; 658 vm_physmem[lcv].end--; 659 /* nothing left? nuke it */ 660 if (vm_physmem[lcv].avail_end == 661 vm_physmem[lcv].start) { 662 if (vm_nphysseg == 1) 663 panic("uvm_page_physget: out of memory!"); 664 vm_nphysseg--; 665 for (x = lcv ; x < vm_nphysseg ; x++) 666 /* structure copy */ 667 vm_physmem[x] = vm_physmem[x+1]; 668 } 669 return (true); 670 } 671 } 672 673 /* pass2: forget about matching ends, just allocate something */ 674 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 675 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--) 676 #else 677 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 678 #endif 679 { 680 681 /* any room in this bank? */ 682 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end) 683 continue; /* nope */ 684 685 *paddrp = ptoa(vm_physmem[lcv].avail_start); 686 vm_physmem[lcv].avail_start++; 687 /* truncate! */ 688 vm_physmem[lcv].start = vm_physmem[lcv].avail_start; 689 690 /* nothing left? nuke it */ 691 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) { 692 if (vm_nphysseg == 1) 693 panic("uvm_page_physget: out of memory!"); 694 vm_nphysseg--; 695 for (x = lcv ; x < vm_nphysseg ; x++) 696 /* structure copy */ 697 vm_physmem[x] = vm_physmem[x+1]; 698 } 699 return (true); 700 } 701 702 return (false); /* whoops! */ 703 } 704 705 bool 706 uvm_page_physget(paddr_t *paddrp) 707 { 708 int i; 709 710 /* try in the order of freelist preference */ 711 for (i = 0; i < VM_NFREELIST; i++) 712 if (uvm_page_physget_freelist(paddrp, i) == true) 713 return (true); 714 return (false); 715 } 716 #endif /* PMAP_STEAL_MEMORY */ 717 718 /* 719 * uvm_page_physload: load physical memory into VM system 720 * 721 * => all args are PFs 722 * => all pages in start/end get vm_page structures 723 * => areas marked by avail_start/avail_end get added to the free page pool 724 * => we are limited to VM_PHYSSEG_MAX physical memory segments 725 */ 726 727 void 728 uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start, 729 paddr_t avail_end, int free_list) 730 { 731 int preload, lcv; 732 psize_t npages; 733 struct vm_page *pgs; 734 struct vm_physseg *ps; 735 736 if (uvmexp.pagesize == 0) 737 panic("uvm_page_physload: page size not set!"); 738 if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT) 739 panic("uvm_page_physload: bad free list %d", free_list); 740 if (start >= end) 741 panic("uvm_page_physload: start >= end"); 742 743 /* 744 * do we have room? 745 */ 746 747 if (vm_nphysseg == VM_PHYSSEG_MAX) { 748 printf("uvm_page_physload: unable to load physical memory " 749 "segment\n"); 750 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n", 751 VM_PHYSSEG_MAX, (long long)start, (long long)end); 752 printf("\tincrease VM_PHYSSEG_MAX\n"); 753 return; 754 } 755 756 /* 757 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been 758 * called yet, so malloc is not available). 759 */ 760 761 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { 762 if (vm_physmem[lcv].pgs) 763 break; 764 } 765 preload = (lcv == vm_nphysseg); 766 767 /* 768 * if VM is already running, attempt to malloc() vm_page structures 769 */ 770 771 if (!preload) { 772 #if defined(VM_PHYSSEG_NOADD) 773 panic("uvm_page_physload: tried to add RAM after vm_mem_init"); 774 #else 775 /* XXXCDC: need some sort of lockout for this case */ 776 paddr_t paddr; 777 npages = end - start; /* # of pages */ 778 pgs = malloc(sizeof(struct vm_page) * npages, 779 M_VMPAGE, M_NOWAIT); 780 if (pgs == NULL) { 781 printf("uvm_page_physload: can not malloc vm_page " 782 "structs for segment\n"); 783 printf("\tignoring 0x%lx -> 0x%lx\n", start, end); 784 return; 785 } 786 /* zero data, init phys_addr and free_list, and free pages */ 787 memset(pgs, 0, sizeof(struct vm_page) * npages); 788 for (lcv = 0, paddr = ptoa(start) ; 789 lcv < npages ; lcv++, paddr += PAGE_SIZE) { 790 pgs[lcv].phys_addr = paddr; 791 pgs[lcv].free_list = free_list; 792 if (atop(paddr) >= avail_start && 793 atop(paddr) <= avail_end) 794 uvm_pagefree(&pgs[lcv]); 795 } 796 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */ 797 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */ 798 #endif 799 } else { 800 pgs = NULL; 801 npages = 0; 802 } 803 804 /* 805 * now insert us in the proper place in vm_physmem[] 806 */ 807 808 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM) 809 /* random: put it at the end (easy!) */ 810 ps = &vm_physmem[vm_nphysseg]; 811 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) 812 { 813 int x; 814 /* sort by address for binary search */ 815 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 816 if (start < vm_physmem[lcv].start) 817 break; 818 ps = &vm_physmem[lcv]; 819 /* move back other entries, if necessary ... */ 820 for (x = vm_nphysseg ; x > lcv ; x--) 821 /* structure copy */ 822 vm_physmem[x] = vm_physmem[x - 1]; 823 } 824 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 825 { 826 int x; 827 /* sort by largest segment first */ 828 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 829 if ((end - start) > 830 (vm_physmem[lcv].end - vm_physmem[lcv].start)) 831 break; 832 ps = &vm_physmem[lcv]; 833 /* move back other entries, if necessary ... */ 834 for (x = vm_nphysseg ; x > lcv ; x--) 835 /* structure copy */ 836 vm_physmem[x] = vm_physmem[x - 1]; 837 } 838 #else 839 panic("uvm_page_physload: unknown physseg strategy selected!"); 840 #endif 841 842 ps->start = start; 843 ps->end = end; 844 ps->avail_start = avail_start; 845 ps->avail_end = avail_end; 846 if (preload) { 847 ps->pgs = NULL; 848 } else { 849 ps->pgs = pgs; 850 ps->lastpg = pgs + npages - 1; 851 } 852 ps->free_list = free_list; 853 vm_nphysseg++; 854 855 if (!preload) { 856 uvmpdpol_reinit(); 857 } 858 } 859 860 /* 861 * uvm_page_recolor: Recolor the pages if the new bucket count is 862 * larger than the old one. 863 */ 864 865 void 866 uvm_page_recolor(int newncolors) 867 { 868 struct pgflbucket *bucketarray, *cpuarray, *oldbucketarray; 869 struct pgfreelist gpgfl, pgfl; 870 struct vm_page *pg; 871 vsize_t bucketcount; 872 int lcv, color, i, ocolors; 873 struct uvm_cpu *ucpu; 874 875 if (newncolors <= uvmexp.ncolors) 876 return; 877 878 if (uvm.page_init_done == false) { 879 uvmexp.ncolors = newncolors; 880 return; 881 } 882 883 bucketcount = newncolors * VM_NFREELIST; 884 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket) * 2, 885 M_VMPAGE, M_NOWAIT); 886 cpuarray = bucketarray + bucketcount; 887 if (bucketarray == NULL) { 888 printf("WARNING: unable to allocate %ld page color buckets\n", 889 (long) bucketcount); 890 return; 891 } 892 893 mutex_spin_enter(&uvm_fpageqlock); 894 895 /* Make sure we should still do this. */ 896 if (newncolors <= uvmexp.ncolors) { 897 mutex_spin_exit(&uvm_fpageqlock); 898 free(bucketarray, M_VMPAGE); 899 return; 900 } 901 902 oldbucketarray = uvm.page_free[0].pgfl_buckets; 903 ocolors = uvmexp.ncolors; 904 905 uvmexp.ncolors = newncolors; 906 uvmexp.colormask = uvmexp.ncolors - 1; 907 908 ucpu = curcpu()->ci_data.cpu_uvm; 909 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 910 gpgfl.pgfl_buckets = (bucketarray + (lcv * newncolors)); 911 pgfl.pgfl_buckets = (cpuarray + (lcv * uvmexp.ncolors)); 912 uvm_page_init_buckets(&gpgfl); 913 uvm_page_init_buckets(&pgfl); 914 for (color = 0; color < ocolors; color++) { 915 for (i = 0; i < PGFL_NQUEUES; i++) { 916 while ((pg = LIST_FIRST(&uvm.page_free[ 917 lcv].pgfl_buckets[color].pgfl_queues[i])) 918 != NULL) { 919 LIST_REMOVE(pg, pageq.list); /* global */ 920 LIST_REMOVE(pg, listq.list); /* cpu */ 921 LIST_INSERT_HEAD(&gpgfl.pgfl_buckets[ 922 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[ 923 i], pg, pageq.list); 924 LIST_INSERT_HEAD(&pgfl.pgfl_buckets[ 925 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[ 926 i], pg, listq.list); 927 } 928 } 929 } 930 uvm.page_free[lcv].pgfl_buckets = gpgfl.pgfl_buckets; 931 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets; 932 } 933 934 if (have_recolored_pages) { 935 mutex_spin_exit(&uvm_fpageqlock); 936 free(oldbucketarray, M_VMPAGE); 937 return; 938 } 939 940 have_recolored_pages = true; 941 mutex_spin_exit(&uvm_fpageqlock); 942 } 943 944 /* 945 * uvm_cpu_attach: initialize per-CPU data structures. 946 */ 947 948 void 949 uvm_cpu_attach(struct cpu_info *ci) 950 { 951 struct pgflbucket *bucketarray; 952 struct pgfreelist pgfl; 953 struct uvm_cpu *ucpu; 954 vsize_t bucketcount; 955 int lcv; 956 957 if (CPU_IS_PRIMARY(ci)) { 958 /* Already done in uvm_page_init(). */ 959 return; 960 } 961 962 /* Add more reserve pages for this CPU. */ 963 uvmexp.reserve_kernel += vm_page_reserve_kernel; 964 965 /* Configure this CPU's free lists. */ 966 bucketcount = uvmexp.ncolors * VM_NFREELIST; 967 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket), 968 M_VMPAGE, M_WAITOK); 969 ucpu = &uvm.cpus[cpu_index(ci)]; 970 ci->ci_data.cpu_uvm = ucpu; 971 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 972 pgfl.pgfl_buckets = (bucketarray + (lcv * uvmexp.ncolors)); 973 uvm_page_init_buckets(&pgfl); 974 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets; 975 } 976 } 977 978 /* 979 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat 980 */ 981 982 static struct vm_page * 983 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int flist, int try1, int try2, 984 int *trycolorp) 985 { 986 struct pgflist *freeq; 987 struct vm_page *pg; 988 int color, trycolor = *trycolorp; 989 struct pgfreelist *gpgfl, *pgfl; 990 991 KASSERT(mutex_owned(&uvm_fpageqlock)); 992 993 color = trycolor; 994 pgfl = &ucpu->page_free[flist]; 995 gpgfl = &uvm.page_free[flist]; 996 do { 997 /* cpu, try1 */ 998 if ((pg = LIST_FIRST((freeq = 999 &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) { 1000 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--; 1001 uvmexp.cpuhit++; 1002 goto gotit; 1003 } 1004 /* global, try1 */ 1005 if ((pg = LIST_FIRST((freeq = 1006 &gpgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) { 1007 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--; 1008 uvmexp.cpumiss++; 1009 goto gotit; 1010 } 1011 /* cpu, try2 */ 1012 if ((pg = LIST_FIRST((freeq = 1013 &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) { 1014 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--; 1015 uvmexp.cpuhit++; 1016 goto gotit; 1017 } 1018 /* global, try2 */ 1019 if ((pg = LIST_FIRST((freeq = 1020 &gpgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) { 1021 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--; 1022 uvmexp.cpumiss++; 1023 goto gotit; 1024 } 1025 color = (color + 1) & uvmexp.colormask; 1026 } while (color != trycolor); 1027 1028 return (NULL); 1029 1030 gotit: 1031 LIST_REMOVE(pg, pageq.list); /* global list */ 1032 LIST_REMOVE(pg, listq.list); /* per-cpu list */ 1033 uvmexp.free--; 1034 1035 /* update zero'd page count */ 1036 if (pg->flags & PG_ZERO) 1037 uvmexp.zeropages--; 1038 1039 if (color == trycolor) 1040 uvmexp.colorhit++; 1041 else { 1042 uvmexp.colormiss++; 1043 *trycolorp = color; 1044 } 1045 1046 return (pg); 1047 } 1048 1049 /* 1050 * uvm_pagealloc_strat: allocate vm_page from a particular free list. 1051 * 1052 * => return null if no pages free 1053 * => wake up pagedaemon if number of free pages drops below low water mark 1054 * => if obj != NULL, obj must be locked (to put in obj's tree) 1055 * => if anon != NULL, anon must be locked (to put in anon) 1056 * => only one of obj or anon can be non-null 1057 * => caller must activate/deactivate page if it is not wired. 1058 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 1059 * => policy decision: it is more important to pull a page off of the 1060 * appropriate priority free list than it is to get a zero'd or 1061 * unknown contents page. This is because we live with the 1062 * consequences of a bad free list decision for the entire 1063 * lifetime of the page, e.g. if the page comes from memory that 1064 * is slower to access. 1065 */ 1066 1067 struct vm_page * 1068 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, 1069 int flags, int strat, int free_list) 1070 { 1071 int lcv, try1, try2, zeroit = 0, color; 1072 struct uvm_cpu *ucpu; 1073 struct vm_page *pg; 1074 lwp_t *l; 1075 1076 KASSERT(obj == NULL || anon == NULL); 1077 KASSERT(anon == NULL || off == 0); 1078 KASSERT(off == trunc_page(off)); 1079 KASSERT(obj == NULL || mutex_owned(&obj->vmobjlock)); 1080 KASSERT(anon == NULL || mutex_owned(&anon->an_lock)); 1081 1082 mutex_spin_enter(&uvm_fpageqlock); 1083 1084 /* 1085 * This implements a global round-robin page coloring 1086 * algorithm. 1087 * 1088 * XXXJRT: What about virtually-indexed caches? 1089 */ 1090 1091 ucpu = curcpu()->ci_data.cpu_uvm; 1092 color = ucpu->page_free_nextcolor; 1093 1094 /* 1095 * check to see if we need to generate some free pages waking 1096 * the pagedaemon. 1097 */ 1098 1099 uvm_kick_pdaemon(); 1100 1101 /* 1102 * fail if any of these conditions is true: 1103 * [1] there really are no free pages, or 1104 * [2] only kernel "reserved" pages remain and 1105 * reserved pages have not been requested. 1106 * [3] only pagedaemon "reserved" pages remain and 1107 * the requestor isn't the pagedaemon. 1108 * we make kernel reserve pages available if called by a 1109 * kernel thread or a realtime thread. 1110 */ 1111 l = curlwp; 1112 if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) { 1113 flags |= UVM_PGA_USERESERVE; 1114 } 1115 if ((uvmexp.free <= uvmexp.reserve_kernel && 1116 (flags & UVM_PGA_USERESERVE) == 0) || 1117 (uvmexp.free <= uvmexp.reserve_pagedaemon && 1118 curlwp != uvm.pagedaemon_lwp)) 1119 goto fail; 1120 1121 #if PGFL_NQUEUES != 2 1122 #error uvm_pagealloc_strat needs to be updated 1123 #endif 1124 1125 /* 1126 * If we want a zero'd page, try the ZEROS queue first, otherwise 1127 * we try the UNKNOWN queue first. 1128 */ 1129 if (flags & UVM_PGA_ZERO) { 1130 try1 = PGFL_ZEROS; 1131 try2 = PGFL_UNKNOWN; 1132 } else { 1133 try1 = PGFL_UNKNOWN; 1134 try2 = PGFL_ZEROS; 1135 } 1136 1137 again: 1138 switch (strat) { 1139 case UVM_PGA_STRAT_NORMAL: 1140 /* Check all freelists in descending priority order. */ 1141 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1142 pg = uvm_pagealloc_pgfl(ucpu, lcv, 1143 try1, try2, &color); 1144 if (pg != NULL) 1145 goto gotit; 1146 } 1147 1148 /* No pages free! */ 1149 goto fail; 1150 1151 case UVM_PGA_STRAT_ONLY: 1152 case UVM_PGA_STRAT_FALLBACK: 1153 /* Attempt to allocate from the specified free list. */ 1154 KASSERT(free_list >= 0 && free_list < VM_NFREELIST); 1155 pg = uvm_pagealloc_pgfl(ucpu, free_list, 1156 try1, try2, &color); 1157 if (pg != NULL) 1158 goto gotit; 1159 1160 /* Fall back, if possible. */ 1161 if (strat == UVM_PGA_STRAT_FALLBACK) { 1162 strat = UVM_PGA_STRAT_NORMAL; 1163 goto again; 1164 } 1165 1166 /* No pages free! */ 1167 goto fail; 1168 1169 default: 1170 panic("uvm_pagealloc_strat: bad strat %d", strat); 1171 /* NOTREACHED */ 1172 } 1173 1174 gotit: 1175 /* 1176 * We now know which color we actually allocated from; set 1177 * the next color accordingly. 1178 */ 1179 1180 ucpu->page_free_nextcolor = (color + 1) & uvmexp.colormask; 1181 1182 /* 1183 * update allocation statistics and remember if we have to 1184 * zero the page 1185 */ 1186 1187 if (flags & UVM_PGA_ZERO) { 1188 if (pg->flags & PG_ZERO) { 1189 uvmexp.pga_zerohit++; 1190 zeroit = 0; 1191 } else { 1192 uvmexp.pga_zeromiss++; 1193 zeroit = 1; 1194 } 1195 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) { 1196 ucpu->page_idle_zero = vm_page_zero_enable; 1197 } 1198 } 1199 mutex_spin_exit(&uvm_fpageqlock); 1200 1201 pg->offset = off; 1202 pg->uobject = obj; 1203 pg->uanon = anon; 1204 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE; 1205 if (anon) { 1206 anon->an_page = pg; 1207 pg->pqflags = PQ_ANON; 1208 atomic_inc_uint(&uvmexp.anonpages); 1209 } else { 1210 if (obj) { 1211 uvm_pageinsert(pg); 1212 } 1213 pg->pqflags = 0; 1214 } 1215 #if defined(UVM_PAGE_TRKOWN) 1216 pg->owner_tag = NULL; 1217 #endif 1218 UVM_PAGE_OWN(pg, "new alloc"); 1219 1220 if (flags & UVM_PGA_ZERO) { 1221 /* 1222 * A zero'd page is not clean. If we got a page not already 1223 * zero'd, then we have to zero it ourselves. 1224 */ 1225 pg->flags &= ~PG_CLEAN; 1226 if (zeroit) 1227 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1228 } 1229 1230 return(pg); 1231 1232 fail: 1233 mutex_spin_exit(&uvm_fpageqlock); 1234 return (NULL); 1235 } 1236 1237 /* 1238 * uvm_pagereplace: replace a page with another 1239 * 1240 * => object must be locked 1241 */ 1242 1243 void 1244 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg) 1245 { 1246 struct uvm_object *uobj = oldpg->uobject; 1247 1248 KASSERT((oldpg->flags & PG_TABLED) != 0); 1249 KASSERT(uobj != NULL); 1250 KASSERT((newpg->flags & PG_TABLED) == 0); 1251 KASSERT(newpg->uobject == NULL); 1252 KASSERT(mutex_owned(&uobj->vmobjlock)); 1253 1254 newpg->uobject = uobj; 1255 newpg->offset = oldpg->offset; 1256 1257 uvm_pageremove_tree(uobj, oldpg); 1258 uvm_pageinsert_tree(uobj, newpg); 1259 uvm_pageinsert_list(uobj, newpg, oldpg); 1260 uvm_pageremove_list(uobj, oldpg); 1261 } 1262 1263 /* 1264 * uvm_pagerealloc: reallocate a page from one object to another 1265 * 1266 * => both objects must be locked 1267 */ 1268 1269 void 1270 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) 1271 { 1272 /* 1273 * remove it from the old object 1274 */ 1275 1276 if (pg->uobject) { 1277 uvm_pageremove(pg); 1278 } 1279 1280 /* 1281 * put it in the new object 1282 */ 1283 1284 if (newobj) { 1285 pg->uobject = newobj; 1286 pg->offset = newoff; 1287 uvm_pageinsert(pg); 1288 } 1289 } 1290 1291 #ifdef DEBUG 1292 /* 1293 * check if page is zero-filled 1294 * 1295 * - called with free page queue lock held. 1296 */ 1297 void 1298 uvm_pagezerocheck(struct vm_page *pg) 1299 { 1300 int *p, *ep; 1301 1302 KASSERT(uvm_zerocheckkva != 0); 1303 KASSERT(mutex_owned(&uvm_fpageqlock)); 1304 1305 /* 1306 * XXX assuming pmap_kenter_pa and pmap_kremove never call 1307 * uvm page allocator. 1308 * 1309 * it might be better to have "CPU-local temporary map" pmap interface. 1310 */ 1311 pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ); 1312 p = (int *)uvm_zerocheckkva; 1313 ep = (int *)((char *)p + PAGE_SIZE); 1314 pmap_update(pmap_kernel()); 1315 while (p < ep) { 1316 if (*p != 0) 1317 panic("PG_ZERO page isn't zero-filled"); 1318 p++; 1319 } 1320 pmap_kremove(uvm_zerocheckkva, PAGE_SIZE); 1321 /* 1322 * pmap_update() is not necessary here because no one except us 1323 * uses this VA. 1324 */ 1325 } 1326 #endif /* DEBUG */ 1327 1328 /* 1329 * uvm_pagefree: free page 1330 * 1331 * => erase page's identity (i.e. remove from object) 1332 * => put page on free list 1333 * => caller must lock owning object (either anon or uvm_object) 1334 * => caller must lock page queues 1335 * => assumes all valid mappings of pg are gone 1336 */ 1337 1338 void 1339 uvm_pagefree(struct vm_page *pg) 1340 { 1341 struct pgflist *pgfl; 1342 struct uvm_cpu *ucpu; 1343 int index, color, queue; 1344 bool iszero; 1345 1346 #ifdef DEBUG 1347 if (pg->uobject == (void *)0xdeadbeef && 1348 pg->uanon == (void *)0xdeadbeef) { 1349 panic("uvm_pagefree: freeing free page %p", pg); 1350 } 1351 #endif /* DEBUG */ 1352 1353 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1354 KASSERT(mutex_owned(&uvm_pageqlock) || !uvmpdpol_pageisqueued_p(pg)); 1355 KASSERT(pg->uobject == NULL || mutex_owned(&pg->uobject->vmobjlock)); 1356 KASSERT(pg->uobject != NULL || pg->uanon == NULL || 1357 mutex_owned(&pg->uanon->an_lock)); 1358 1359 /* 1360 * if the page is loaned, resolve the loan instead of freeing. 1361 */ 1362 1363 if (pg->loan_count) { 1364 KASSERT(pg->wire_count == 0); 1365 1366 /* 1367 * if the page is owned by an anon then we just want to 1368 * drop anon ownership. the kernel will free the page when 1369 * it is done with it. if the page is owned by an object, 1370 * remove it from the object and mark it dirty for the benefit 1371 * of possible anon owners. 1372 * 1373 * regardless of previous ownership, wakeup any waiters, 1374 * unbusy the page, and we're done. 1375 */ 1376 1377 if (pg->uobject != NULL) { 1378 uvm_pageremove(pg); 1379 pg->flags &= ~PG_CLEAN; 1380 } else if (pg->uanon != NULL) { 1381 if ((pg->pqflags & PQ_ANON) == 0) { 1382 pg->loan_count--; 1383 } else { 1384 pg->pqflags &= ~PQ_ANON; 1385 atomic_dec_uint(&uvmexp.anonpages); 1386 } 1387 pg->uanon->an_page = NULL; 1388 pg->uanon = NULL; 1389 } 1390 if (pg->flags & PG_WANTED) { 1391 wakeup(pg); 1392 } 1393 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1); 1394 #ifdef UVM_PAGE_TRKOWN 1395 pg->owner_tag = NULL; 1396 #endif 1397 if (pg->loan_count) { 1398 KASSERT(pg->uobject == NULL); 1399 if (pg->uanon == NULL) { 1400 uvm_pagedequeue(pg); 1401 } 1402 return; 1403 } 1404 } 1405 1406 /* 1407 * remove page from its object or anon. 1408 */ 1409 1410 if (pg->uobject != NULL) { 1411 uvm_pageremove(pg); 1412 } else if (pg->uanon != NULL) { 1413 pg->uanon->an_page = NULL; 1414 atomic_dec_uint(&uvmexp.anonpages); 1415 } 1416 1417 /* 1418 * now remove the page from the queues. 1419 */ 1420 1421 uvm_pagedequeue(pg); 1422 1423 /* 1424 * if the page was wired, unwire it now. 1425 */ 1426 1427 if (pg->wire_count) { 1428 pg->wire_count = 0; 1429 uvmexp.wired--; 1430 } 1431 1432 /* 1433 * and put on free queue 1434 */ 1435 1436 iszero = (pg->flags & PG_ZERO); 1437 index = uvm_page_lookup_freelist(pg); 1438 color = VM_PGCOLOR_BUCKET(pg); 1439 queue = (iszero ? PGFL_ZEROS : PGFL_UNKNOWN); 1440 1441 pg->pqflags = PQ_FREE; 1442 #ifdef DEBUG 1443 pg->uobject = (void *)0xdeadbeef; 1444 pg->uanon = (void *)0xdeadbeef; 1445 #endif 1446 1447 mutex_spin_enter(&uvm_fpageqlock); 1448 1449 #ifdef DEBUG 1450 if (iszero) 1451 uvm_pagezerocheck(pg); 1452 #endif /* DEBUG */ 1453 1454 1455 /* global list */ 1456 pgfl = &uvm.page_free[index].pgfl_buckets[color].pgfl_queues[queue]; 1457 LIST_INSERT_HEAD(pgfl, pg, pageq.list); 1458 uvmexp.free++; 1459 if (iszero) { 1460 uvmexp.zeropages++; 1461 } 1462 1463 /* per-cpu list */ 1464 ucpu = curcpu()->ci_data.cpu_uvm; 1465 pg->offset = (uintptr_t)ucpu; 1466 pgfl = &ucpu->page_free[index].pgfl_buckets[color].pgfl_queues[queue]; 1467 LIST_INSERT_HEAD(pgfl, pg, listq.list); 1468 ucpu->pages[queue]++; 1469 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) { 1470 ucpu->page_idle_zero = vm_page_zero_enable; 1471 } 1472 1473 mutex_spin_exit(&uvm_fpageqlock); 1474 } 1475 1476 /* 1477 * uvm_page_unbusy: unbusy an array of pages. 1478 * 1479 * => pages must either all belong to the same object, or all belong to anons. 1480 * => if pages are object-owned, object must be locked. 1481 * => if pages are anon-owned, anons must be locked. 1482 * => caller must lock page queues if pages may be released. 1483 * => caller must make sure that anon-owned pages are not PG_RELEASED. 1484 */ 1485 1486 void 1487 uvm_page_unbusy(struct vm_page **pgs, int npgs) 1488 { 1489 struct vm_page *pg; 1490 int i; 1491 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist); 1492 1493 for (i = 0; i < npgs; i++) { 1494 pg = pgs[i]; 1495 if (pg == NULL || pg == PGO_DONTCARE) { 1496 continue; 1497 } 1498 1499 KASSERT(pg->uobject == NULL || 1500 mutex_owned(&pg->uobject->vmobjlock)); 1501 KASSERT(pg->uobject != NULL || 1502 (pg->uanon != NULL && mutex_owned(&pg->uanon->an_lock))); 1503 1504 KASSERT(pg->flags & PG_BUSY); 1505 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1506 if (pg->flags & PG_WANTED) { 1507 wakeup(pg); 1508 } 1509 if (pg->flags & PG_RELEASED) { 1510 UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0); 1511 KASSERT(pg->uobject != NULL || 1512 (pg->uanon != NULL && pg->uanon->an_ref > 0)); 1513 pg->flags &= ~PG_RELEASED; 1514 uvm_pagefree(pg); 1515 } else { 1516 UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0); 1517 pg->flags &= ~(PG_WANTED|PG_BUSY); 1518 UVM_PAGE_OWN(pg, NULL); 1519 } 1520 } 1521 } 1522 1523 #if defined(UVM_PAGE_TRKOWN) 1524 /* 1525 * uvm_page_own: set or release page ownership 1526 * 1527 * => this is a debugging function that keeps track of who sets PG_BUSY 1528 * and where they do it. it can be used to track down problems 1529 * such a process setting "PG_BUSY" and never releasing it. 1530 * => page's object [if any] must be locked 1531 * => if "tag" is NULL then we are releasing page ownership 1532 */ 1533 void 1534 uvm_page_own(struct vm_page *pg, const char *tag) 1535 { 1536 struct uvm_object *uobj; 1537 struct vm_anon *anon; 1538 1539 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0); 1540 1541 uobj = pg->uobject; 1542 anon = pg->uanon; 1543 if (uobj != NULL) { 1544 KASSERT(mutex_owned(&uobj->vmobjlock)); 1545 } else if (anon != NULL) { 1546 KASSERT(mutex_owned(&anon->an_lock)); 1547 } 1548 1549 KASSERT((pg->flags & PG_WANTED) == 0); 1550 1551 /* gain ownership? */ 1552 if (tag) { 1553 KASSERT((pg->flags & PG_BUSY) != 0); 1554 if (pg->owner_tag) { 1555 printf("uvm_page_own: page %p already owned " 1556 "by proc %d [%s]\n", pg, 1557 pg->owner, pg->owner_tag); 1558 panic("uvm_page_own"); 1559 } 1560 pg->owner = (curproc) ? curproc->p_pid : (pid_t) -1; 1561 pg->lowner = (curlwp) ? curlwp->l_lid : (lwpid_t) -1; 1562 pg->owner_tag = tag; 1563 return; 1564 } 1565 1566 /* drop ownership */ 1567 KASSERT((pg->flags & PG_BUSY) == 0); 1568 if (pg->owner_tag == NULL) { 1569 printf("uvm_page_own: dropping ownership of an non-owned " 1570 "page (%p)\n", pg); 1571 panic("uvm_page_own"); 1572 } 1573 if (!uvmpdpol_pageisqueued_p(pg)) { 1574 KASSERT((pg->uanon == NULL && pg->uobject == NULL) || 1575 pg->wire_count > 0); 1576 } else { 1577 KASSERT(pg->wire_count == 0); 1578 } 1579 pg->owner_tag = NULL; 1580 } 1581 #endif 1582 1583 /* 1584 * uvm_pageidlezero: zero free pages while the system is idle. 1585 * 1586 * => try to complete one color bucket at a time, to reduce our impact 1587 * on the CPU cache. 1588 * => we loop until we either reach the target or there is a lwp ready 1589 * to run, or MD code detects a reason to break early. 1590 */ 1591 void 1592 uvm_pageidlezero(void) 1593 { 1594 struct vm_page *pg; 1595 struct pgfreelist *pgfl, *gpgfl; 1596 struct uvm_cpu *ucpu; 1597 int free_list, firstbucket, nextbucket; 1598 1599 ucpu = curcpu()->ci_data.cpu_uvm; 1600 if (!ucpu->page_idle_zero || 1601 ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) { 1602 ucpu->page_idle_zero = false; 1603 return; 1604 } 1605 mutex_enter(&uvm_fpageqlock); 1606 firstbucket = ucpu->page_free_nextcolor; 1607 nextbucket = firstbucket; 1608 do { 1609 for (free_list = 0; free_list < VM_NFREELIST; free_list++) { 1610 if (sched_curcpu_runnable_p()) { 1611 goto quit; 1612 } 1613 pgfl = &ucpu->page_free[free_list]; 1614 gpgfl = &uvm.page_free[free_list]; 1615 while ((pg = LIST_FIRST(&pgfl->pgfl_buckets[ 1616 nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) { 1617 if (sched_curcpu_runnable_p()) { 1618 goto quit; 1619 } 1620 LIST_REMOVE(pg, pageq.list); /* global list */ 1621 LIST_REMOVE(pg, listq.list); /* per-cpu list */ 1622 ucpu->pages[PGFL_UNKNOWN]--; 1623 uvmexp.free--; 1624 mutex_spin_exit(&uvm_fpageqlock); 1625 #ifdef PMAP_PAGEIDLEZERO 1626 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) { 1627 1628 /* 1629 * The machine-dependent code detected 1630 * some reason for us to abort zeroing 1631 * pages, probably because there is a 1632 * process now ready to run. 1633 */ 1634 1635 mutex_spin_enter(&uvm_fpageqlock); 1636 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[ 1637 nextbucket].pgfl_queues[ 1638 PGFL_UNKNOWN], pg, pageq.list); 1639 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[ 1640 nextbucket].pgfl_queues[ 1641 PGFL_UNKNOWN], pg, listq.list); 1642 ucpu->pages[PGFL_UNKNOWN]++; 1643 uvmexp.free++; 1644 uvmexp.zeroaborts++; 1645 goto quit; 1646 } 1647 #else 1648 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1649 #endif /* PMAP_PAGEIDLEZERO */ 1650 pg->flags |= PG_ZERO; 1651 1652 mutex_spin_enter(&uvm_fpageqlock); 1653 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[ 1654 nextbucket].pgfl_queues[PGFL_ZEROS], 1655 pg, pageq.list); 1656 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[ 1657 nextbucket].pgfl_queues[PGFL_ZEROS], 1658 pg, listq.list); 1659 ucpu->pages[PGFL_ZEROS]++; 1660 uvmexp.free++; 1661 uvmexp.zeropages++; 1662 } 1663 } 1664 if (ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) { 1665 break; 1666 } 1667 nextbucket = (nextbucket + 1) & uvmexp.colormask; 1668 } while (nextbucket != firstbucket); 1669 ucpu->page_idle_zero = false; 1670 quit: 1671 mutex_spin_exit(&uvm_fpageqlock); 1672 } 1673 1674 /* 1675 * uvm_pagelookup: look up a page 1676 * 1677 * => caller should lock object to keep someone from pulling the page 1678 * out from under it 1679 */ 1680 1681 struct vm_page * 1682 uvm_pagelookup(struct uvm_object *obj, voff_t off) 1683 { 1684 struct vm_page *pg; 1685 1686 KASSERT(mutex_owned(&obj->vmobjlock)); 1687 1688 pg = (struct vm_page *)rb_tree_find_node(&obj->rb_tree, &off); 1689 1690 KASSERT(pg == NULL || obj->uo_npages != 0); 1691 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1692 (pg->flags & PG_BUSY) != 0); 1693 return(pg); 1694 } 1695 1696 /* 1697 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp 1698 * 1699 * => caller must lock page queues 1700 */ 1701 1702 void 1703 uvm_pagewire(struct vm_page *pg) 1704 { 1705 KASSERT(mutex_owned(&uvm_pageqlock)); 1706 #if defined(READAHEAD_STATS) 1707 if ((pg->pqflags & PQ_READAHEAD) != 0) { 1708 uvm_ra_hit.ev_count++; 1709 pg->pqflags &= ~PQ_READAHEAD; 1710 } 1711 #endif /* defined(READAHEAD_STATS) */ 1712 if (pg->wire_count == 0) { 1713 uvm_pagedequeue(pg); 1714 uvmexp.wired++; 1715 } 1716 pg->wire_count++; 1717 } 1718 1719 /* 1720 * uvm_pageunwire: unwire the page. 1721 * 1722 * => activate if wire count goes to zero. 1723 * => caller must lock page queues 1724 */ 1725 1726 void 1727 uvm_pageunwire(struct vm_page *pg) 1728 { 1729 KASSERT(mutex_owned(&uvm_pageqlock)); 1730 pg->wire_count--; 1731 if (pg->wire_count == 0) { 1732 uvm_pageactivate(pg); 1733 uvmexp.wired--; 1734 } 1735 } 1736 1737 /* 1738 * uvm_pagedeactivate: deactivate page 1739 * 1740 * => caller must lock page queues 1741 * => caller must check to make sure page is not wired 1742 * => object that page belongs to must be locked (so we can adjust pg->flags) 1743 * => caller must clear the reference on the page before calling 1744 */ 1745 1746 void 1747 uvm_pagedeactivate(struct vm_page *pg) 1748 { 1749 1750 KASSERT(mutex_owned(&uvm_pageqlock)); 1751 KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg)); 1752 uvmpdpol_pagedeactivate(pg); 1753 } 1754 1755 /* 1756 * uvm_pageactivate: activate page 1757 * 1758 * => caller must lock page queues 1759 */ 1760 1761 void 1762 uvm_pageactivate(struct vm_page *pg) 1763 { 1764 1765 KASSERT(mutex_owned(&uvm_pageqlock)); 1766 #if defined(READAHEAD_STATS) 1767 if ((pg->pqflags & PQ_READAHEAD) != 0) { 1768 uvm_ra_hit.ev_count++; 1769 pg->pqflags &= ~PQ_READAHEAD; 1770 } 1771 #endif /* defined(READAHEAD_STATS) */ 1772 if (pg->wire_count != 0) { 1773 return; 1774 } 1775 uvmpdpol_pageactivate(pg); 1776 } 1777 1778 /* 1779 * uvm_pagedequeue: remove a page from any paging queue 1780 */ 1781 1782 void 1783 uvm_pagedequeue(struct vm_page *pg) 1784 { 1785 1786 if (uvmpdpol_pageisqueued_p(pg)) { 1787 KASSERT(mutex_owned(&uvm_pageqlock)); 1788 } 1789 1790 uvmpdpol_pagedequeue(pg); 1791 } 1792 1793 /* 1794 * uvm_pageenqueue: add a page to a paging queue without activating. 1795 * used where a page is not really demanded (yet). eg. read-ahead 1796 */ 1797 1798 void 1799 uvm_pageenqueue(struct vm_page *pg) 1800 { 1801 1802 KASSERT(mutex_owned(&uvm_pageqlock)); 1803 if (pg->wire_count != 0) { 1804 return; 1805 } 1806 uvmpdpol_pageenqueue(pg); 1807 } 1808 1809 /* 1810 * uvm_pagezero: zero fill a page 1811 * 1812 * => if page is part of an object then the object should be locked 1813 * to protect pg->flags. 1814 */ 1815 1816 void 1817 uvm_pagezero(struct vm_page *pg) 1818 { 1819 pg->flags &= ~PG_CLEAN; 1820 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1821 } 1822 1823 /* 1824 * uvm_pagecopy: copy a page 1825 * 1826 * => if page is part of an object then the object should be locked 1827 * to protect pg->flags. 1828 */ 1829 1830 void 1831 uvm_pagecopy(struct vm_page *src, struct vm_page *dst) 1832 { 1833 1834 dst->flags &= ~PG_CLEAN; 1835 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 1836 } 1837 1838 /* 1839 * uvm_page_lookup_freelist: look up the free list for the specified page 1840 */ 1841 1842 int 1843 uvm_page_lookup_freelist(struct vm_page *pg) 1844 { 1845 int lcv; 1846 1847 lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL); 1848 KASSERT(lcv != -1); 1849 return (vm_physmem[lcv].free_list); 1850 } 1851