1 /* $NetBSD: uvm_page.c,v 1.153 2010/01/27 03:56:33 uebayasi Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Charles D. Cranor, 23 * Washington University, the University of California, Berkeley and 24 * its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 42 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 43 * 44 * 45 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 46 * All rights reserved. 47 * 48 * Permission to use, copy, modify and distribute this software and 49 * its documentation is hereby granted, provided that both the copyright 50 * notice and this permission notice appear in all copies of the 51 * software, derivative works or modified versions, and any portions 52 * thereof, and that both notices appear in supporting documentation. 53 * 54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 57 * 58 * Carnegie Mellon requests users of this software to return to 59 * 60 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 61 * School of Computer Science 62 * Carnegie Mellon University 63 * Pittsburgh PA 15213-3890 64 * 65 * any improvements or extensions that they make and grant Carnegie the 66 * rights to redistribute these changes. 67 */ 68 69 /* 70 * uvm_page.c: page ops. 71 */ 72 73 #include <sys/cdefs.h> 74 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.153 2010/01/27 03:56:33 uebayasi Exp $"); 75 76 #include "opt_ddb.h" 77 #include "opt_uvmhist.h" 78 #include "opt_readahead.h" 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/malloc.h> 83 #include <sys/sched.h> 84 #include <sys/kernel.h> 85 #include <sys/vnode.h> 86 #include <sys/proc.h> 87 #include <sys/atomic.h> 88 #include <sys/cpu.h> 89 90 #include <uvm/uvm.h> 91 #include <uvm/uvm_ddb.h> 92 #include <uvm/uvm_pdpolicy.h> 93 94 /* 95 * global vars... XXXCDC: move to uvm. structure. 96 */ 97 98 /* 99 * physical memory config is stored in vm_physmem. 100 */ 101 102 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */ 103 int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */ 104 105 /* 106 * Some supported CPUs in a given architecture don't support all 107 * of the things necessary to do idle page zero'ing efficiently. 108 * We therefore provide a way to disable it from machdep code here. 109 */ 110 /* 111 * XXX disabled until we can find a way to do this without causing 112 * problems for either CPU caches or DMA latency. 113 */ 114 bool vm_page_zero_enable = false; 115 116 /* 117 * number of pages per-CPU to reserve for the kernel. 118 */ 119 int vm_page_reserve_kernel = 5; 120 121 /* 122 * physical memory size; 123 */ 124 int physmem; 125 126 /* 127 * local variables 128 */ 129 130 /* 131 * these variables record the values returned by vm_page_bootstrap, 132 * for debugging purposes. The implementation of uvm_pageboot_alloc 133 * and pmap_startup here also uses them internally. 134 */ 135 136 static vaddr_t virtual_space_start; 137 static vaddr_t virtual_space_end; 138 139 /* 140 * we allocate an initial number of page colors in uvm_page_init(), 141 * and remember them. We may re-color pages as cache sizes are 142 * discovered during the autoconfiguration phase. But we can never 143 * free the initial set of buckets, since they are allocated using 144 * uvm_pageboot_alloc(). 145 */ 146 147 static bool have_recolored_pages /* = false */; 148 149 MALLOC_DEFINE(M_VMPAGE, "VM page", "VM page"); 150 151 #ifdef DEBUG 152 vaddr_t uvm_zerocheckkva; 153 #endif /* DEBUG */ 154 155 /* 156 * local prototypes 157 */ 158 159 static void uvm_pageinsert(struct uvm_object *, struct vm_page *); 160 static void uvm_pageremove(struct uvm_object *, struct vm_page *); 161 162 /* 163 * per-object tree of pages 164 */ 165 166 static signed int 167 uvm_page_compare_nodes(const struct rb_node *n1, const struct rb_node *n2) 168 { 169 const struct vm_page *pg1 = (const void *)n1; 170 const struct vm_page *pg2 = (const void *)n2; 171 const voff_t a = pg1->offset; 172 const voff_t b = pg2->offset; 173 174 if (a < b) 175 return 1; 176 if (a > b) 177 return -1; 178 return 0; 179 } 180 181 static signed int 182 uvm_page_compare_key(const struct rb_node *n, const void *key) 183 { 184 const struct vm_page *pg = (const void *)n; 185 const voff_t a = pg->offset; 186 const voff_t b = *(const voff_t *)key; 187 188 if (a < b) 189 return 1; 190 if (a > b) 191 return -1; 192 return 0; 193 } 194 195 const struct rb_tree_ops uvm_page_tree_ops = { 196 .rbto_compare_nodes = uvm_page_compare_nodes, 197 .rbto_compare_key = uvm_page_compare_key, 198 }; 199 200 /* 201 * inline functions 202 */ 203 204 /* 205 * uvm_pageinsert: insert a page in the object. 206 * 207 * => caller must lock object 208 * => caller must lock page queues 209 * => call should have already set pg's object and offset pointers 210 * and bumped the version counter 211 */ 212 213 static inline void 214 uvm_pageinsert_list(struct uvm_object *uobj, struct vm_page *pg, 215 struct vm_page *where) 216 { 217 218 KASSERT(uobj == pg->uobject); 219 KASSERT(mutex_owned(&uobj->vmobjlock)); 220 KASSERT((pg->flags & PG_TABLED) == 0); 221 KASSERT(where == NULL || (where->flags & PG_TABLED)); 222 KASSERT(where == NULL || (where->uobject == uobj)); 223 224 if (UVM_OBJ_IS_VNODE(uobj)) { 225 if (uobj->uo_npages == 0) { 226 struct vnode *vp = (struct vnode *)uobj; 227 228 vholdl(vp); 229 } 230 if (UVM_OBJ_IS_VTEXT(uobj)) { 231 atomic_inc_uint(&uvmexp.execpages); 232 } else { 233 atomic_inc_uint(&uvmexp.filepages); 234 } 235 } else if (UVM_OBJ_IS_AOBJ(uobj)) { 236 atomic_inc_uint(&uvmexp.anonpages); 237 } 238 239 if (where) 240 TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq.queue); 241 else 242 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); 243 pg->flags |= PG_TABLED; 244 uobj->uo_npages++; 245 } 246 247 248 static inline void 249 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg) 250 { 251 bool success; 252 253 KASSERT(uobj == pg->uobject); 254 success = rb_tree_insert_node(&uobj->rb_tree, &pg->rb_node); 255 KASSERT(success); 256 } 257 258 static inline void 259 uvm_pageinsert(struct uvm_object *uobj, struct vm_page *pg) 260 { 261 262 KDASSERT(uobj != NULL); 263 uvm_pageinsert_tree(uobj, pg); 264 uvm_pageinsert_list(uobj, pg, NULL); 265 } 266 267 /* 268 * uvm_page_remove: remove page from object. 269 * 270 * => caller must lock object 271 * => caller must lock page queues 272 */ 273 274 static inline void 275 uvm_pageremove_list(struct uvm_object *uobj, struct vm_page *pg) 276 { 277 278 KASSERT(uobj == pg->uobject); 279 KASSERT(mutex_owned(&uobj->vmobjlock)); 280 KASSERT(pg->flags & PG_TABLED); 281 282 if (UVM_OBJ_IS_VNODE(uobj)) { 283 if (uobj->uo_npages == 1) { 284 struct vnode *vp = (struct vnode *)uobj; 285 286 holdrelel(vp); 287 } 288 if (UVM_OBJ_IS_VTEXT(uobj)) { 289 atomic_dec_uint(&uvmexp.execpages); 290 } else { 291 atomic_dec_uint(&uvmexp.filepages); 292 } 293 } else if (UVM_OBJ_IS_AOBJ(uobj)) { 294 atomic_dec_uint(&uvmexp.anonpages); 295 } 296 297 /* object should be locked */ 298 uobj->uo_npages--; 299 TAILQ_REMOVE(&uobj->memq, pg, listq.queue); 300 pg->flags &= ~PG_TABLED; 301 pg->uobject = NULL; 302 } 303 304 static inline void 305 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg) 306 { 307 308 KASSERT(uobj == pg->uobject); 309 rb_tree_remove_node(&uobj->rb_tree, &pg->rb_node); 310 } 311 312 static inline void 313 uvm_pageremove(struct uvm_object *uobj, struct vm_page *pg) 314 { 315 316 KDASSERT(uobj != NULL); 317 uvm_pageremove_tree(uobj, pg); 318 uvm_pageremove_list(uobj, pg); 319 } 320 321 static void 322 uvm_page_init_buckets(struct pgfreelist *pgfl) 323 { 324 int color, i; 325 326 for (color = 0; color < uvmexp.ncolors; color++) { 327 for (i = 0; i < PGFL_NQUEUES; i++) { 328 LIST_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]); 329 } 330 } 331 } 332 333 /* 334 * uvm_page_init: init the page system. called from uvm_init(). 335 * 336 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 337 */ 338 339 void 340 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) 341 { 342 vsize_t freepages, pagecount, bucketcount, n; 343 struct pgflbucket *bucketarray, *cpuarray; 344 struct vm_page *pagearray; 345 int lcv; 346 u_int i; 347 paddr_t paddr; 348 349 KASSERT(ncpu <= 1); 350 CTASSERT(sizeof(pagearray->offset) >= sizeof(struct uvm_cpu *)); 351 352 /* 353 * init the page queues and page queue locks, except the free 354 * list; we allocate that later (with the initial vm_page 355 * structures). 356 */ 357 358 curcpu()->ci_data.cpu_uvm = &uvm.cpus[0]; 359 uvm_reclaim_init(); 360 uvmpdpol_init(); 361 mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE); 362 mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM); 363 364 /* 365 * allocate vm_page structures. 366 */ 367 368 /* 369 * sanity check: 370 * before calling this function the MD code is expected to register 371 * some free RAM with the uvm_page_physload() function. our job 372 * now is to allocate vm_page structures for this memory. 373 */ 374 375 if (vm_nphysseg == 0) 376 panic("uvm_page_bootstrap: no memory pre-allocated"); 377 378 /* 379 * first calculate the number of free pages... 380 * 381 * note that we use start/end rather than avail_start/avail_end. 382 * this allows us to allocate extra vm_page structures in case we 383 * want to return some memory to the pool after booting. 384 */ 385 386 freepages = 0; 387 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 388 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start); 389 390 /* 391 * Let MD code initialize the number of colors, or default 392 * to 1 color if MD code doesn't care. 393 */ 394 if (uvmexp.ncolors == 0) 395 uvmexp.ncolors = 1; 396 uvmexp.colormask = uvmexp.ncolors - 1; 397 398 /* 399 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 400 * use. for each page of memory we use we need a vm_page structure. 401 * thus, the total number of pages we can use is the total size of 402 * the memory divided by the PAGE_SIZE plus the size of the vm_page 403 * structure. we add one to freepages as a fudge factor to avoid 404 * truncation errors (since we can only allocate in terms of whole 405 * pages). 406 */ 407 408 bucketcount = uvmexp.ncolors * VM_NFREELIST; 409 pagecount = ((freepages + 1) << PAGE_SHIFT) / 410 (PAGE_SIZE + sizeof(struct vm_page)); 411 412 bucketarray = (void *)uvm_pageboot_alloc((bucketcount * 413 sizeof(struct pgflbucket) * 2) + (pagecount * 414 sizeof(struct vm_page))); 415 cpuarray = bucketarray + bucketcount; 416 pagearray = (struct vm_page *)(bucketarray + bucketcount * 2); 417 418 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 419 uvm.page_free[lcv].pgfl_buckets = 420 (bucketarray + (lcv * uvmexp.ncolors)); 421 uvm_page_init_buckets(&uvm.page_free[lcv]); 422 uvm.cpus[0].page_free[lcv].pgfl_buckets = 423 (cpuarray + (lcv * uvmexp.ncolors)); 424 uvm_page_init_buckets(&uvm.cpus[0].page_free[lcv]); 425 } 426 memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 427 428 /* 429 * init the vm_page structures and put them in the correct place. 430 */ 431 432 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { 433 n = vm_physmem[lcv].end - vm_physmem[lcv].start; 434 435 /* set up page array pointers */ 436 vm_physmem[lcv].pgs = pagearray; 437 pagearray += n; 438 pagecount -= n; 439 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1); 440 441 /* init and free vm_pages (we've already zeroed them) */ 442 paddr = ptoa(vm_physmem[lcv].start); 443 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) { 444 vm_physmem[lcv].pgs[i].phys_addr = paddr; 445 #ifdef __HAVE_VM_PAGE_MD 446 VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]); 447 #endif 448 if (atop(paddr) >= vm_physmem[lcv].avail_start && 449 atop(paddr) <= vm_physmem[lcv].avail_end) { 450 uvmexp.npages++; 451 /* add page to free pool */ 452 uvm_pagefree(&vm_physmem[lcv].pgs[i]); 453 } 454 } 455 } 456 457 /* 458 * pass up the values of virtual_space_start and 459 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 460 * layers of the VM. 461 */ 462 463 *kvm_startp = round_page(virtual_space_start); 464 *kvm_endp = trunc_page(virtual_space_end); 465 #ifdef DEBUG 466 /* 467 * steal kva for uvm_pagezerocheck(). 468 */ 469 uvm_zerocheckkva = *kvm_startp; 470 *kvm_startp += PAGE_SIZE; 471 #endif /* DEBUG */ 472 473 /* 474 * init various thresholds. 475 */ 476 477 uvmexp.reserve_pagedaemon = 1; 478 uvmexp.reserve_kernel = vm_page_reserve_kernel; 479 480 /* 481 * determine if we should zero pages in the idle loop. 482 */ 483 484 uvm.cpus[0].page_idle_zero = vm_page_zero_enable; 485 486 /* 487 * done! 488 */ 489 490 uvm.page_init_done = true; 491 } 492 493 /* 494 * uvm_setpagesize: set the page size 495 * 496 * => sets page_shift and page_mask from uvmexp.pagesize. 497 */ 498 499 void 500 uvm_setpagesize(void) 501 { 502 503 /* 504 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE 505 * to be a constant (indicated by being a non-zero value). 506 */ 507 if (uvmexp.pagesize == 0) { 508 if (PAGE_SIZE == 0) 509 panic("uvm_setpagesize: uvmexp.pagesize not set"); 510 uvmexp.pagesize = PAGE_SIZE; 511 } 512 uvmexp.pagemask = uvmexp.pagesize - 1; 513 if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 514 panic("uvm_setpagesize: page size not a power of two"); 515 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 516 if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 517 break; 518 } 519 520 /* 521 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 522 */ 523 524 vaddr_t 525 uvm_pageboot_alloc(vsize_t size) 526 { 527 static bool initialized = false; 528 vaddr_t addr; 529 #if !defined(PMAP_STEAL_MEMORY) 530 vaddr_t vaddr; 531 paddr_t paddr; 532 #endif 533 534 /* 535 * on first call to this function, initialize ourselves. 536 */ 537 if (initialized == false) { 538 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 539 540 /* round it the way we like it */ 541 virtual_space_start = round_page(virtual_space_start); 542 virtual_space_end = trunc_page(virtual_space_end); 543 544 initialized = true; 545 } 546 547 /* round to page size */ 548 size = round_page(size); 549 550 #if defined(PMAP_STEAL_MEMORY) 551 552 /* 553 * defer bootstrap allocation to MD code (it may want to allocate 554 * from a direct-mapped segment). pmap_steal_memory should adjust 555 * virtual_space_start/virtual_space_end if necessary. 556 */ 557 558 addr = pmap_steal_memory(size, &virtual_space_start, 559 &virtual_space_end); 560 561 return(addr); 562 563 #else /* !PMAP_STEAL_MEMORY */ 564 565 /* 566 * allocate virtual memory for this request 567 */ 568 if (virtual_space_start == virtual_space_end || 569 (virtual_space_end - virtual_space_start) < size) 570 panic("uvm_pageboot_alloc: out of virtual space"); 571 572 addr = virtual_space_start; 573 574 #ifdef PMAP_GROWKERNEL 575 /* 576 * If the kernel pmap can't map the requested space, 577 * then allocate more resources for it. 578 */ 579 if (uvm_maxkaddr < (addr + size)) { 580 uvm_maxkaddr = pmap_growkernel(addr + size); 581 if (uvm_maxkaddr < (addr + size)) 582 panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 583 } 584 #endif 585 586 virtual_space_start += size; 587 588 /* 589 * allocate and mapin physical pages to back new virtual pages 590 */ 591 592 for (vaddr = round_page(addr) ; vaddr < addr + size ; 593 vaddr += PAGE_SIZE) { 594 595 if (!uvm_page_physget(&paddr)) 596 panic("uvm_pageboot_alloc: out of memory"); 597 598 /* 599 * Note this memory is no longer managed, so using 600 * pmap_kenter is safe. 601 */ 602 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 603 } 604 pmap_update(pmap_kernel()); 605 return(addr); 606 #endif /* PMAP_STEAL_MEMORY */ 607 } 608 609 #if !defined(PMAP_STEAL_MEMORY) 610 /* 611 * uvm_page_physget: "steal" one page from the vm_physmem structure. 612 * 613 * => attempt to allocate it off the end of a segment in which the "avail" 614 * values match the start/end values. if we can't do that, then we 615 * will advance both values (making them equal, and removing some 616 * vm_page structures from the non-avail area). 617 * => return false if out of memory. 618 */ 619 620 /* subroutine: try to allocate from memory chunks on the specified freelist */ 621 static bool uvm_page_physget_freelist(paddr_t *, int); 622 623 static bool 624 uvm_page_physget_freelist(paddr_t *paddrp, int freelist) 625 { 626 int lcv, x; 627 628 /* pass 1: try allocating from a matching end */ 629 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 630 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--) 631 #else 632 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 633 #endif 634 { 635 636 if (uvm.page_init_done == true) 637 panic("uvm_page_physget: called _after_ bootstrap"); 638 639 if (vm_physmem[lcv].free_list != freelist) 640 continue; 641 642 /* try from front */ 643 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start && 644 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) { 645 *paddrp = ptoa(vm_physmem[lcv].avail_start); 646 vm_physmem[lcv].avail_start++; 647 vm_physmem[lcv].start++; 648 /* nothing left? nuke it */ 649 if (vm_physmem[lcv].avail_start == 650 vm_physmem[lcv].end) { 651 if (vm_nphysseg == 1) 652 panic("uvm_page_physget: out of memory!"); 653 vm_nphysseg--; 654 for (x = lcv ; x < vm_nphysseg ; x++) 655 /* structure copy */ 656 vm_physmem[x] = vm_physmem[x+1]; 657 } 658 return (true); 659 } 660 661 /* try from rear */ 662 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end && 663 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) { 664 *paddrp = ptoa(vm_physmem[lcv].avail_end - 1); 665 vm_physmem[lcv].avail_end--; 666 vm_physmem[lcv].end--; 667 /* nothing left? nuke it */ 668 if (vm_physmem[lcv].avail_end == 669 vm_physmem[lcv].start) { 670 if (vm_nphysseg == 1) 671 panic("uvm_page_physget: out of memory!"); 672 vm_nphysseg--; 673 for (x = lcv ; x < vm_nphysseg ; x++) 674 /* structure copy */ 675 vm_physmem[x] = vm_physmem[x+1]; 676 } 677 return (true); 678 } 679 } 680 681 /* pass2: forget about matching ends, just allocate something */ 682 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 683 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--) 684 #else 685 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 686 #endif 687 { 688 689 /* any room in this bank? */ 690 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end) 691 continue; /* nope */ 692 693 *paddrp = ptoa(vm_physmem[lcv].avail_start); 694 vm_physmem[lcv].avail_start++; 695 /* truncate! */ 696 vm_physmem[lcv].start = vm_physmem[lcv].avail_start; 697 698 /* nothing left? nuke it */ 699 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) { 700 if (vm_nphysseg == 1) 701 panic("uvm_page_physget: out of memory!"); 702 vm_nphysseg--; 703 for (x = lcv ; x < vm_nphysseg ; x++) 704 /* structure copy */ 705 vm_physmem[x] = vm_physmem[x+1]; 706 } 707 return (true); 708 } 709 710 return (false); /* whoops! */ 711 } 712 713 bool 714 uvm_page_physget(paddr_t *paddrp) 715 { 716 int i; 717 718 /* try in the order of freelist preference */ 719 for (i = 0; i < VM_NFREELIST; i++) 720 if (uvm_page_physget_freelist(paddrp, i) == true) 721 return (true); 722 return (false); 723 } 724 #endif /* PMAP_STEAL_MEMORY */ 725 726 /* 727 * uvm_page_physload: load physical memory into VM system 728 * 729 * => all args are PFs 730 * => all pages in start/end get vm_page structures 731 * => areas marked by avail_start/avail_end get added to the free page pool 732 * => we are limited to VM_PHYSSEG_MAX physical memory segments 733 */ 734 735 void 736 uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start, 737 paddr_t avail_end, int free_list) 738 { 739 int preload, lcv; 740 psize_t npages; 741 struct vm_page *pgs; 742 struct vm_physseg *ps; 743 744 if (uvmexp.pagesize == 0) 745 panic("uvm_page_physload: page size not set!"); 746 if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT) 747 panic("uvm_page_physload: bad free list %d", free_list); 748 if (start >= end) 749 panic("uvm_page_physload: start >= end"); 750 751 /* 752 * do we have room? 753 */ 754 755 if (vm_nphysseg == VM_PHYSSEG_MAX) { 756 printf("uvm_page_physload: unable to load physical memory " 757 "segment\n"); 758 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n", 759 VM_PHYSSEG_MAX, (long long)start, (long long)end); 760 printf("\tincrease VM_PHYSSEG_MAX\n"); 761 return; 762 } 763 764 /* 765 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been 766 * called yet, so malloc is not available). 767 */ 768 769 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { 770 if (vm_physmem[lcv].pgs) 771 break; 772 } 773 preload = (lcv == vm_nphysseg); 774 775 /* 776 * if VM is already running, attempt to malloc() vm_page structures 777 */ 778 779 if (!preload) { 780 #if defined(VM_PHYSSEG_NOADD) 781 panic("uvm_page_physload: tried to add RAM after vm_mem_init"); 782 #else 783 /* XXXCDC: need some sort of lockout for this case */ 784 paddr_t paddr; 785 npages = end - start; /* # of pages */ 786 pgs = malloc(sizeof(struct vm_page) * npages, 787 M_VMPAGE, M_NOWAIT); 788 if (pgs == NULL) { 789 printf("uvm_page_physload: can not malloc vm_page " 790 "structs for segment\n"); 791 printf("\tignoring 0x%lx -> 0x%lx\n", start, end); 792 return; 793 } 794 /* zero data, init phys_addr and free_list, and free pages */ 795 memset(pgs, 0, sizeof(struct vm_page) * npages); 796 for (lcv = 0, paddr = ptoa(start) ; 797 lcv < npages ; lcv++, paddr += PAGE_SIZE) { 798 pgs[lcv].phys_addr = paddr; 799 pgs[lcv].free_list = free_list; 800 if (atop(paddr) >= avail_start && 801 atop(paddr) <= avail_end) 802 uvm_pagefree(&pgs[lcv]); 803 } 804 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */ 805 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */ 806 #endif 807 } else { 808 pgs = NULL; 809 npages = 0; 810 } 811 812 /* 813 * now insert us in the proper place in vm_physmem[] 814 */ 815 816 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM) 817 /* random: put it at the end (easy!) */ 818 ps = &vm_physmem[vm_nphysseg]; 819 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) 820 { 821 int x; 822 /* sort by address for binary search */ 823 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 824 if (start < vm_physmem[lcv].start) 825 break; 826 ps = &vm_physmem[lcv]; 827 /* move back other entries, if necessary ... */ 828 for (x = vm_nphysseg ; x > lcv ; x--) 829 /* structure copy */ 830 vm_physmem[x] = vm_physmem[x - 1]; 831 } 832 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 833 { 834 int x; 835 /* sort by largest segment first */ 836 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 837 if ((end - start) > 838 (vm_physmem[lcv].end - vm_physmem[lcv].start)) 839 break; 840 ps = &vm_physmem[lcv]; 841 /* move back other entries, if necessary ... */ 842 for (x = vm_nphysseg ; x > lcv ; x--) 843 /* structure copy */ 844 vm_physmem[x] = vm_physmem[x - 1]; 845 } 846 #else 847 panic("uvm_page_physload: unknown physseg strategy selected!"); 848 #endif 849 850 ps->start = start; 851 ps->end = end; 852 ps->avail_start = avail_start; 853 ps->avail_end = avail_end; 854 if (preload) { 855 ps->pgs = NULL; 856 } else { 857 ps->pgs = pgs; 858 ps->lastpg = pgs + npages - 1; 859 } 860 ps->free_list = free_list; 861 vm_nphysseg++; 862 863 if (!preload) { 864 uvmpdpol_reinit(); 865 } 866 } 867 868 /* 869 * uvm_page_recolor: Recolor the pages if the new bucket count is 870 * larger than the old one. 871 */ 872 873 void 874 uvm_page_recolor(int newncolors) 875 { 876 struct pgflbucket *bucketarray, *cpuarray, *oldbucketarray; 877 struct pgfreelist gpgfl, pgfl; 878 struct vm_page *pg; 879 vsize_t bucketcount; 880 int lcv, color, i, ocolors; 881 struct uvm_cpu *ucpu; 882 883 if (newncolors <= uvmexp.ncolors) 884 return; 885 886 if (uvm.page_init_done == false) { 887 uvmexp.ncolors = newncolors; 888 return; 889 } 890 891 bucketcount = newncolors * VM_NFREELIST; 892 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket) * 2, 893 M_VMPAGE, M_NOWAIT); 894 cpuarray = bucketarray + bucketcount; 895 if (bucketarray == NULL) { 896 printf("WARNING: unable to allocate %ld page color buckets\n", 897 (long) bucketcount); 898 return; 899 } 900 901 mutex_spin_enter(&uvm_fpageqlock); 902 903 /* Make sure we should still do this. */ 904 if (newncolors <= uvmexp.ncolors) { 905 mutex_spin_exit(&uvm_fpageqlock); 906 free(bucketarray, M_VMPAGE); 907 return; 908 } 909 910 oldbucketarray = uvm.page_free[0].pgfl_buckets; 911 ocolors = uvmexp.ncolors; 912 913 uvmexp.ncolors = newncolors; 914 uvmexp.colormask = uvmexp.ncolors - 1; 915 916 ucpu = curcpu()->ci_data.cpu_uvm; 917 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 918 gpgfl.pgfl_buckets = (bucketarray + (lcv * newncolors)); 919 pgfl.pgfl_buckets = (cpuarray + (lcv * uvmexp.ncolors)); 920 uvm_page_init_buckets(&gpgfl); 921 uvm_page_init_buckets(&pgfl); 922 for (color = 0; color < ocolors; color++) { 923 for (i = 0; i < PGFL_NQUEUES; i++) { 924 while ((pg = LIST_FIRST(&uvm.page_free[ 925 lcv].pgfl_buckets[color].pgfl_queues[i])) 926 != NULL) { 927 LIST_REMOVE(pg, pageq.list); /* global */ 928 LIST_REMOVE(pg, listq.list); /* cpu */ 929 LIST_INSERT_HEAD(&gpgfl.pgfl_buckets[ 930 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[ 931 i], pg, pageq.list); 932 LIST_INSERT_HEAD(&pgfl.pgfl_buckets[ 933 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[ 934 i], pg, listq.list); 935 } 936 } 937 } 938 uvm.page_free[lcv].pgfl_buckets = gpgfl.pgfl_buckets; 939 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets; 940 } 941 942 if (have_recolored_pages) { 943 mutex_spin_exit(&uvm_fpageqlock); 944 free(oldbucketarray, M_VMPAGE); 945 return; 946 } 947 948 have_recolored_pages = true; 949 mutex_spin_exit(&uvm_fpageqlock); 950 } 951 952 /* 953 * uvm_cpu_attach: initialize per-CPU data structures. 954 */ 955 956 void 957 uvm_cpu_attach(struct cpu_info *ci) 958 { 959 struct pgflbucket *bucketarray; 960 struct pgfreelist pgfl; 961 struct uvm_cpu *ucpu; 962 vsize_t bucketcount; 963 int lcv; 964 965 if (CPU_IS_PRIMARY(ci)) { 966 /* Already done in uvm_page_init(). */ 967 return; 968 } 969 970 /* Add more reserve pages for this CPU. */ 971 uvmexp.reserve_kernel += vm_page_reserve_kernel; 972 973 /* Configure this CPU's free lists. */ 974 bucketcount = uvmexp.ncolors * VM_NFREELIST; 975 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket), 976 M_VMPAGE, M_WAITOK); 977 ucpu = &uvm.cpus[cpu_index(ci)]; 978 ci->ci_data.cpu_uvm = ucpu; 979 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 980 pgfl.pgfl_buckets = (bucketarray + (lcv * uvmexp.ncolors)); 981 uvm_page_init_buckets(&pgfl); 982 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets; 983 } 984 } 985 986 /* 987 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat 988 */ 989 990 static struct vm_page * 991 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int flist, int try1, int try2, 992 int *trycolorp) 993 { 994 struct pgflist *freeq; 995 struct vm_page *pg; 996 int color, trycolor = *trycolorp; 997 struct pgfreelist *gpgfl, *pgfl; 998 999 KASSERT(mutex_owned(&uvm_fpageqlock)); 1000 1001 color = trycolor; 1002 pgfl = &ucpu->page_free[flist]; 1003 gpgfl = &uvm.page_free[flist]; 1004 do { 1005 /* cpu, try1 */ 1006 if ((pg = LIST_FIRST((freeq = 1007 &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) { 1008 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--; 1009 uvmexp.cpuhit++; 1010 goto gotit; 1011 } 1012 /* global, try1 */ 1013 if ((pg = LIST_FIRST((freeq = 1014 &gpgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) { 1015 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--; 1016 uvmexp.cpumiss++; 1017 goto gotit; 1018 } 1019 /* cpu, try2 */ 1020 if ((pg = LIST_FIRST((freeq = 1021 &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) { 1022 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--; 1023 uvmexp.cpuhit++; 1024 goto gotit; 1025 } 1026 /* global, try2 */ 1027 if ((pg = LIST_FIRST((freeq = 1028 &gpgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) { 1029 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--; 1030 uvmexp.cpumiss++; 1031 goto gotit; 1032 } 1033 color = (color + 1) & uvmexp.colormask; 1034 } while (color != trycolor); 1035 1036 return (NULL); 1037 1038 gotit: 1039 LIST_REMOVE(pg, pageq.list); /* global list */ 1040 LIST_REMOVE(pg, listq.list); /* per-cpu list */ 1041 uvmexp.free--; 1042 1043 /* update zero'd page count */ 1044 if (pg->flags & PG_ZERO) 1045 uvmexp.zeropages--; 1046 1047 if (color == trycolor) 1048 uvmexp.colorhit++; 1049 else { 1050 uvmexp.colormiss++; 1051 *trycolorp = color; 1052 } 1053 1054 return (pg); 1055 } 1056 1057 /* 1058 * uvm_pagealloc_strat: allocate vm_page from a particular free list. 1059 * 1060 * => return null if no pages free 1061 * => wake up pagedaemon if number of free pages drops below low water mark 1062 * => if obj != NULL, obj must be locked (to put in obj's tree) 1063 * => if anon != NULL, anon must be locked (to put in anon) 1064 * => only one of obj or anon can be non-null 1065 * => caller must activate/deactivate page if it is not wired. 1066 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 1067 * => policy decision: it is more important to pull a page off of the 1068 * appropriate priority free list than it is to get a zero'd or 1069 * unknown contents page. This is because we live with the 1070 * consequences of a bad free list decision for the entire 1071 * lifetime of the page, e.g. if the page comes from memory that 1072 * is slower to access. 1073 */ 1074 1075 struct vm_page * 1076 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, 1077 int flags, int strat, int free_list) 1078 { 1079 int lcv, try1, try2, zeroit = 0, color; 1080 struct uvm_cpu *ucpu; 1081 struct vm_page *pg; 1082 lwp_t *l; 1083 1084 KASSERT(obj == NULL || anon == NULL); 1085 KASSERT(anon == NULL || off == 0); 1086 KASSERT(off == trunc_page(off)); 1087 KASSERT(obj == NULL || mutex_owned(&obj->vmobjlock)); 1088 KASSERT(anon == NULL || mutex_owned(&anon->an_lock)); 1089 1090 mutex_spin_enter(&uvm_fpageqlock); 1091 1092 /* 1093 * This implements a global round-robin page coloring 1094 * algorithm. 1095 * 1096 * XXXJRT: What about virtually-indexed caches? 1097 */ 1098 1099 ucpu = curcpu()->ci_data.cpu_uvm; 1100 color = ucpu->page_free_nextcolor; 1101 1102 /* 1103 * check to see if we need to generate some free pages waking 1104 * the pagedaemon. 1105 */ 1106 1107 uvm_kick_pdaemon(); 1108 1109 /* 1110 * fail if any of these conditions is true: 1111 * [1] there really are no free pages, or 1112 * [2] only kernel "reserved" pages remain and 1113 * reserved pages have not been requested. 1114 * [3] only pagedaemon "reserved" pages remain and 1115 * the requestor isn't the pagedaemon. 1116 * we make kernel reserve pages available if called by a 1117 * kernel thread or a realtime thread. 1118 */ 1119 l = curlwp; 1120 if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) { 1121 flags |= UVM_PGA_USERESERVE; 1122 } 1123 if ((uvmexp.free <= uvmexp.reserve_kernel && 1124 (flags & UVM_PGA_USERESERVE) == 0) || 1125 (uvmexp.free <= uvmexp.reserve_pagedaemon && 1126 curlwp != uvm.pagedaemon_lwp)) 1127 goto fail; 1128 1129 #if PGFL_NQUEUES != 2 1130 #error uvm_pagealloc_strat needs to be updated 1131 #endif 1132 1133 /* 1134 * If we want a zero'd page, try the ZEROS queue first, otherwise 1135 * we try the UNKNOWN queue first. 1136 */ 1137 if (flags & UVM_PGA_ZERO) { 1138 try1 = PGFL_ZEROS; 1139 try2 = PGFL_UNKNOWN; 1140 } else { 1141 try1 = PGFL_UNKNOWN; 1142 try2 = PGFL_ZEROS; 1143 } 1144 1145 again: 1146 switch (strat) { 1147 case UVM_PGA_STRAT_NORMAL: 1148 /* Check freelists: descending priority (ascending id) order */ 1149 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1150 pg = uvm_pagealloc_pgfl(ucpu, lcv, 1151 try1, try2, &color); 1152 if (pg != NULL) 1153 goto gotit; 1154 } 1155 1156 /* No pages free! */ 1157 goto fail; 1158 1159 case UVM_PGA_STRAT_ONLY: 1160 case UVM_PGA_STRAT_FALLBACK: 1161 /* Attempt to allocate from the specified free list. */ 1162 KASSERT(free_list >= 0 && free_list < VM_NFREELIST); 1163 pg = uvm_pagealloc_pgfl(ucpu, free_list, 1164 try1, try2, &color); 1165 if (pg != NULL) 1166 goto gotit; 1167 1168 /* Fall back, if possible. */ 1169 if (strat == UVM_PGA_STRAT_FALLBACK) { 1170 strat = UVM_PGA_STRAT_NORMAL; 1171 goto again; 1172 } 1173 1174 /* No pages free! */ 1175 goto fail; 1176 1177 default: 1178 panic("uvm_pagealloc_strat: bad strat %d", strat); 1179 /* NOTREACHED */ 1180 } 1181 1182 gotit: 1183 /* 1184 * We now know which color we actually allocated from; set 1185 * the next color accordingly. 1186 */ 1187 1188 ucpu->page_free_nextcolor = (color + 1) & uvmexp.colormask; 1189 1190 /* 1191 * update allocation statistics and remember if we have to 1192 * zero the page 1193 */ 1194 1195 if (flags & UVM_PGA_ZERO) { 1196 if (pg->flags & PG_ZERO) { 1197 uvmexp.pga_zerohit++; 1198 zeroit = 0; 1199 } else { 1200 uvmexp.pga_zeromiss++; 1201 zeroit = 1; 1202 } 1203 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) { 1204 ucpu->page_idle_zero = vm_page_zero_enable; 1205 } 1206 } 1207 KASSERT(pg->pqflags == PQ_FREE); 1208 1209 pg->offset = off; 1210 pg->uobject = obj; 1211 pg->uanon = anon; 1212 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE; 1213 if (anon) { 1214 anon->an_page = pg; 1215 pg->pqflags = PQ_ANON; 1216 atomic_inc_uint(&uvmexp.anonpages); 1217 } else { 1218 if (obj) { 1219 uvm_pageinsert(obj, pg); 1220 } 1221 pg->pqflags = 0; 1222 } 1223 mutex_spin_exit(&uvm_fpageqlock); 1224 1225 #if defined(UVM_PAGE_TRKOWN) 1226 pg->owner_tag = NULL; 1227 #endif 1228 UVM_PAGE_OWN(pg, "new alloc"); 1229 1230 if (flags & UVM_PGA_ZERO) { 1231 /* 1232 * A zero'd page is not clean. If we got a page not already 1233 * zero'd, then we have to zero it ourselves. 1234 */ 1235 pg->flags &= ~PG_CLEAN; 1236 if (zeroit) 1237 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1238 } 1239 1240 return(pg); 1241 1242 fail: 1243 mutex_spin_exit(&uvm_fpageqlock); 1244 return (NULL); 1245 } 1246 1247 /* 1248 * uvm_pagereplace: replace a page with another 1249 * 1250 * => object must be locked 1251 */ 1252 1253 void 1254 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg) 1255 { 1256 struct uvm_object *uobj = oldpg->uobject; 1257 1258 KASSERT((oldpg->flags & PG_TABLED) != 0); 1259 KASSERT(uobj != NULL); 1260 KASSERT((newpg->flags & PG_TABLED) == 0); 1261 KASSERT(newpg->uobject == NULL); 1262 KASSERT(mutex_owned(&uobj->vmobjlock)); 1263 1264 newpg->uobject = uobj; 1265 newpg->offset = oldpg->offset; 1266 1267 uvm_pageremove_tree(uobj, oldpg); 1268 uvm_pageinsert_tree(uobj, newpg); 1269 uvm_pageinsert_list(uobj, newpg, oldpg); 1270 uvm_pageremove_list(uobj, oldpg); 1271 } 1272 1273 /* 1274 * uvm_pagerealloc: reallocate a page from one object to another 1275 * 1276 * => both objects must be locked 1277 */ 1278 1279 void 1280 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) 1281 { 1282 /* 1283 * remove it from the old object 1284 */ 1285 1286 if (pg->uobject) { 1287 uvm_pageremove(pg->uobject, pg); 1288 } 1289 1290 /* 1291 * put it in the new object 1292 */ 1293 1294 if (newobj) { 1295 pg->uobject = newobj; 1296 pg->offset = newoff; 1297 uvm_pageinsert(newobj, pg); 1298 } 1299 } 1300 1301 #ifdef DEBUG 1302 /* 1303 * check if page is zero-filled 1304 * 1305 * - called with free page queue lock held. 1306 */ 1307 void 1308 uvm_pagezerocheck(struct vm_page *pg) 1309 { 1310 int *p, *ep; 1311 1312 KASSERT(uvm_zerocheckkva != 0); 1313 KASSERT(mutex_owned(&uvm_fpageqlock)); 1314 1315 /* 1316 * XXX assuming pmap_kenter_pa and pmap_kremove never call 1317 * uvm page allocator. 1318 * 1319 * it might be better to have "CPU-local temporary map" pmap interface. 1320 */ 1321 pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ, 0); 1322 p = (int *)uvm_zerocheckkva; 1323 ep = (int *)((char *)p + PAGE_SIZE); 1324 pmap_update(pmap_kernel()); 1325 while (p < ep) { 1326 if (*p != 0) 1327 panic("PG_ZERO page isn't zero-filled"); 1328 p++; 1329 } 1330 pmap_kremove(uvm_zerocheckkva, PAGE_SIZE); 1331 /* 1332 * pmap_update() is not necessary here because no one except us 1333 * uses this VA. 1334 */ 1335 } 1336 #endif /* DEBUG */ 1337 1338 /* 1339 * uvm_pagefree: free page 1340 * 1341 * => erase page's identity (i.e. remove from object) 1342 * => put page on free list 1343 * => caller must lock owning object (either anon or uvm_object) 1344 * => caller must lock page queues 1345 * => assumes all valid mappings of pg are gone 1346 */ 1347 1348 void 1349 uvm_pagefree(struct vm_page *pg) 1350 { 1351 struct pgflist *pgfl; 1352 struct uvm_cpu *ucpu; 1353 int index, color, queue; 1354 bool iszero; 1355 1356 #ifdef DEBUG 1357 if (pg->uobject == (void *)0xdeadbeef && 1358 pg->uanon == (void *)0xdeadbeef) { 1359 panic("uvm_pagefree: freeing free page %p", pg); 1360 } 1361 #endif /* DEBUG */ 1362 1363 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1364 KASSERT(!(pg->pqflags & PQ_FREE)); 1365 KASSERT(mutex_owned(&uvm_pageqlock) || !uvmpdpol_pageisqueued_p(pg)); 1366 KASSERT(pg->uobject == NULL || mutex_owned(&pg->uobject->vmobjlock)); 1367 KASSERT(pg->uobject != NULL || pg->uanon == NULL || 1368 mutex_owned(&pg->uanon->an_lock)); 1369 1370 /* 1371 * if the page is loaned, resolve the loan instead of freeing. 1372 */ 1373 1374 if (pg->loan_count) { 1375 KASSERT(pg->wire_count == 0); 1376 1377 /* 1378 * if the page is owned by an anon then we just want to 1379 * drop anon ownership. the kernel will free the page when 1380 * it is done with it. if the page is owned by an object, 1381 * remove it from the object and mark it dirty for the benefit 1382 * of possible anon owners. 1383 * 1384 * regardless of previous ownership, wakeup any waiters, 1385 * unbusy the page, and we're done. 1386 */ 1387 1388 if (pg->uobject != NULL) { 1389 uvm_pageremove(pg->uobject, pg); 1390 pg->flags &= ~PG_CLEAN; 1391 } else if (pg->uanon != NULL) { 1392 if ((pg->pqflags & PQ_ANON) == 0) { 1393 pg->loan_count--; 1394 } else { 1395 pg->pqflags &= ~PQ_ANON; 1396 atomic_dec_uint(&uvmexp.anonpages); 1397 } 1398 pg->uanon->an_page = NULL; 1399 pg->uanon = NULL; 1400 } 1401 if (pg->flags & PG_WANTED) { 1402 wakeup(pg); 1403 } 1404 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1); 1405 #ifdef UVM_PAGE_TRKOWN 1406 pg->owner_tag = NULL; 1407 #endif 1408 if (pg->loan_count) { 1409 KASSERT(pg->uobject == NULL); 1410 if (pg->uanon == NULL) { 1411 uvm_pagedequeue(pg); 1412 } 1413 return; 1414 } 1415 } 1416 1417 /* 1418 * remove page from its object or anon. 1419 */ 1420 1421 if (pg->uobject != NULL) { 1422 uvm_pageremove(pg->uobject, pg); 1423 } else if (pg->uanon != NULL) { 1424 pg->uanon->an_page = NULL; 1425 atomic_dec_uint(&uvmexp.anonpages); 1426 } 1427 1428 /* 1429 * now remove the page from the queues. 1430 */ 1431 1432 uvm_pagedequeue(pg); 1433 1434 /* 1435 * if the page was wired, unwire it now. 1436 */ 1437 1438 if (pg->wire_count) { 1439 pg->wire_count = 0; 1440 uvmexp.wired--; 1441 } 1442 1443 /* 1444 * and put on free queue 1445 */ 1446 1447 iszero = (pg->flags & PG_ZERO); 1448 index = uvm_page_lookup_freelist(pg); 1449 color = VM_PGCOLOR_BUCKET(pg); 1450 queue = (iszero ? PGFL_ZEROS : PGFL_UNKNOWN); 1451 1452 #ifdef DEBUG 1453 pg->uobject = (void *)0xdeadbeef; 1454 pg->uanon = (void *)0xdeadbeef; 1455 #endif 1456 1457 mutex_spin_enter(&uvm_fpageqlock); 1458 pg->pqflags = PQ_FREE; 1459 1460 #ifdef DEBUG 1461 if (iszero) 1462 uvm_pagezerocheck(pg); 1463 #endif /* DEBUG */ 1464 1465 1466 /* global list */ 1467 pgfl = &uvm.page_free[index].pgfl_buckets[color].pgfl_queues[queue]; 1468 LIST_INSERT_HEAD(pgfl, pg, pageq.list); 1469 uvmexp.free++; 1470 if (iszero) { 1471 uvmexp.zeropages++; 1472 } 1473 1474 /* per-cpu list */ 1475 ucpu = curcpu()->ci_data.cpu_uvm; 1476 pg->offset = (uintptr_t)ucpu; 1477 pgfl = &ucpu->page_free[index].pgfl_buckets[color].pgfl_queues[queue]; 1478 LIST_INSERT_HEAD(pgfl, pg, listq.list); 1479 ucpu->pages[queue]++; 1480 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) { 1481 ucpu->page_idle_zero = vm_page_zero_enable; 1482 } 1483 1484 mutex_spin_exit(&uvm_fpageqlock); 1485 } 1486 1487 /* 1488 * uvm_page_unbusy: unbusy an array of pages. 1489 * 1490 * => pages must either all belong to the same object, or all belong to anons. 1491 * => if pages are object-owned, object must be locked. 1492 * => if pages are anon-owned, anons must be locked. 1493 * => caller must lock page queues if pages may be released. 1494 * => caller must make sure that anon-owned pages are not PG_RELEASED. 1495 */ 1496 1497 void 1498 uvm_page_unbusy(struct vm_page **pgs, int npgs) 1499 { 1500 struct vm_page *pg; 1501 int i; 1502 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist); 1503 1504 for (i = 0; i < npgs; i++) { 1505 pg = pgs[i]; 1506 if (pg == NULL || pg == PGO_DONTCARE) { 1507 continue; 1508 } 1509 1510 KASSERT(pg->uobject == NULL || 1511 mutex_owned(&pg->uobject->vmobjlock)); 1512 KASSERT(pg->uobject != NULL || 1513 (pg->uanon != NULL && mutex_owned(&pg->uanon->an_lock))); 1514 1515 KASSERT(pg->flags & PG_BUSY); 1516 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1517 if (pg->flags & PG_WANTED) { 1518 wakeup(pg); 1519 } 1520 if (pg->flags & PG_RELEASED) { 1521 UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0); 1522 KASSERT(pg->uobject != NULL || 1523 (pg->uanon != NULL && pg->uanon->an_ref > 0)); 1524 pg->flags &= ~PG_RELEASED; 1525 uvm_pagefree(pg); 1526 } else { 1527 UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0); 1528 KASSERT((pg->flags & PG_FAKE) == 0); 1529 pg->flags &= ~(PG_WANTED|PG_BUSY); 1530 UVM_PAGE_OWN(pg, NULL); 1531 } 1532 } 1533 } 1534 1535 #if defined(UVM_PAGE_TRKOWN) 1536 /* 1537 * uvm_page_own: set or release page ownership 1538 * 1539 * => this is a debugging function that keeps track of who sets PG_BUSY 1540 * and where they do it. it can be used to track down problems 1541 * such a process setting "PG_BUSY" and never releasing it. 1542 * => page's object [if any] must be locked 1543 * => if "tag" is NULL then we are releasing page ownership 1544 */ 1545 void 1546 uvm_page_own(struct vm_page *pg, const char *tag) 1547 { 1548 struct uvm_object *uobj; 1549 struct vm_anon *anon; 1550 1551 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0); 1552 1553 uobj = pg->uobject; 1554 anon = pg->uanon; 1555 if (uobj != NULL) { 1556 KASSERT(mutex_owned(&uobj->vmobjlock)); 1557 } else if (anon != NULL) { 1558 KASSERT(mutex_owned(&anon->an_lock)); 1559 } 1560 1561 KASSERT((pg->flags & PG_WANTED) == 0); 1562 1563 /* gain ownership? */ 1564 if (tag) { 1565 KASSERT((pg->flags & PG_BUSY) != 0); 1566 if (pg->owner_tag) { 1567 printf("uvm_page_own: page %p already owned " 1568 "by proc %d [%s]\n", pg, 1569 pg->owner, pg->owner_tag); 1570 panic("uvm_page_own"); 1571 } 1572 pg->owner = (curproc) ? curproc->p_pid : (pid_t) -1; 1573 pg->lowner = (curlwp) ? curlwp->l_lid : (lwpid_t) -1; 1574 pg->owner_tag = tag; 1575 return; 1576 } 1577 1578 /* drop ownership */ 1579 KASSERT((pg->flags & PG_BUSY) == 0); 1580 if (pg->owner_tag == NULL) { 1581 printf("uvm_page_own: dropping ownership of an non-owned " 1582 "page (%p)\n", pg); 1583 panic("uvm_page_own"); 1584 } 1585 if (!uvmpdpol_pageisqueued_p(pg)) { 1586 KASSERT((pg->uanon == NULL && pg->uobject == NULL) || 1587 pg->wire_count > 0); 1588 } else { 1589 KASSERT(pg->wire_count == 0); 1590 } 1591 pg->owner_tag = NULL; 1592 } 1593 #endif 1594 1595 /* 1596 * uvm_pageidlezero: zero free pages while the system is idle. 1597 * 1598 * => try to complete one color bucket at a time, to reduce our impact 1599 * on the CPU cache. 1600 * => we loop until we either reach the target or there is a lwp ready 1601 * to run, or MD code detects a reason to break early. 1602 */ 1603 void 1604 uvm_pageidlezero(void) 1605 { 1606 struct vm_page *pg; 1607 struct pgfreelist *pgfl, *gpgfl; 1608 struct uvm_cpu *ucpu; 1609 int free_list, firstbucket, nextbucket; 1610 1611 ucpu = curcpu()->ci_data.cpu_uvm; 1612 if (!ucpu->page_idle_zero || 1613 ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) { 1614 ucpu->page_idle_zero = false; 1615 return; 1616 } 1617 mutex_enter(&uvm_fpageqlock); 1618 firstbucket = ucpu->page_free_nextcolor; 1619 nextbucket = firstbucket; 1620 do { 1621 for (free_list = 0; free_list < VM_NFREELIST; free_list++) { 1622 if (sched_curcpu_runnable_p()) { 1623 goto quit; 1624 } 1625 pgfl = &ucpu->page_free[free_list]; 1626 gpgfl = &uvm.page_free[free_list]; 1627 while ((pg = LIST_FIRST(&pgfl->pgfl_buckets[ 1628 nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) { 1629 if (sched_curcpu_runnable_p()) { 1630 goto quit; 1631 } 1632 LIST_REMOVE(pg, pageq.list); /* global list */ 1633 LIST_REMOVE(pg, listq.list); /* per-cpu list */ 1634 ucpu->pages[PGFL_UNKNOWN]--; 1635 uvmexp.free--; 1636 KASSERT(pg->pqflags == PQ_FREE); 1637 pg->pqflags = 0; 1638 mutex_spin_exit(&uvm_fpageqlock); 1639 #ifdef PMAP_PAGEIDLEZERO 1640 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) { 1641 1642 /* 1643 * The machine-dependent code detected 1644 * some reason for us to abort zeroing 1645 * pages, probably because there is a 1646 * process now ready to run. 1647 */ 1648 1649 mutex_spin_enter(&uvm_fpageqlock); 1650 pg->pqflags = PQ_FREE; 1651 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[ 1652 nextbucket].pgfl_queues[ 1653 PGFL_UNKNOWN], pg, pageq.list); 1654 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[ 1655 nextbucket].pgfl_queues[ 1656 PGFL_UNKNOWN], pg, listq.list); 1657 ucpu->pages[PGFL_UNKNOWN]++; 1658 uvmexp.free++; 1659 uvmexp.zeroaborts++; 1660 goto quit; 1661 } 1662 #else 1663 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1664 #endif /* PMAP_PAGEIDLEZERO */ 1665 pg->flags |= PG_ZERO; 1666 1667 mutex_spin_enter(&uvm_fpageqlock); 1668 pg->pqflags = PQ_FREE; 1669 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[ 1670 nextbucket].pgfl_queues[PGFL_ZEROS], 1671 pg, pageq.list); 1672 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[ 1673 nextbucket].pgfl_queues[PGFL_ZEROS], 1674 pg, listq.list); 1675 ucpu->pages[PGFL_ZEROS]++; 1676 uvmexp.free++; 1677 uvmexp.zeropages++; 1678 } 1679 } 1680 if (ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) { 1681 break; 1682 } 1683 nextbucket = (nextbucket + 1) & uvmexp.colormask; 1684 } while (nextbucket != firstbucket); 1685 ucpu->page_idle_zero = false; 1686 quit: 1687 mutex_spin_exit(&uvm_fpageqlock); 1688 } 1689 1690 /* 1691 * uvm_pagelookup: look up a page 1692 * 1693 * => caller should lock object to keep someone from pulling the page 1694 * out from under it 1695 */ 1696 1697 struct vm_page * 1698 uvm_pagelookup(struct uvm_object *obj, voff_t off) 1699 { 1700 struct vm_page *pg; 1701 1702 KASSERT(mutex_owned(&obj->vmobjlock)); 1703 1704 pg = (struct vm_page *)rb_tree_find_node(&obj->rb_tree, &off); 1705 1706 KASSERT(pg == NULL || obj->uo_npages != 0); 1707 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1708 (pg->flags & PG_BUSY) != 0); 1709 return(pg); 1710 } 1711 1712 /* 1713 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp 1714 * 1715 * => caller must lock page queues 1716 */ 1717 1718 void 1719 uvm_pagewire(struct vm_page *pg) 1720 { 1721 KASSERT(mutex_owned(&uvm_pageqlock)); 1722 #if defined(READAHEAD_STATS) 1723 if ((pg->pqflags & PQ_READAHEAD) != 0) { 1724 uvm_ra_hit.ev_count++; 1725 pg->pqflags &= ~PQ_READAHEAD; 1726 } 1727 #endif /* defined(READAHEAD_STATS) */ 1728 if (pg->wire_count == 0) { 1729 uvm_pagedequeue(pg); 1730 uvmexp.wired++; 1731 } 1732 pg->wire_count++; 1733 } 1734 1735 /* 1736 * uvm_pageunwire: unwire the page. 1737 * 1738 * => activate if wire count goes to zero. 1739 * => caller must lock page queues 1740 */ 1741 1742 void 1743 uvm_pageunwire(struct vm_page *pg) 1744 { 1745 KASSERT(mutex_owned(&uvm_pageqlock)); 1746 pg->wire_count--; 1747 if (pg->wire_count == 0) { 1748 uvm_pageactivate(pg); 1749 uvmexp.wired--; 1750 } 1751 } 1752 1753 /* 1754 * uvm_pagedeactivate: deactivate page 1755 * 1756 * => caller must lock page queues 1757 * => caller must check to make sure page is not wired 1758 * => object that page belongs to must be locked (so we can adjust pg->flags) 1759 * => caller must clear the reference on the page before calling 1760 */ 1761 1762 void 1763 uvm_pagedeactivate(struct vm_page *pg) 1764 { 1765 1766 KASSERT(mutex_owned(&uvm_pageqlock)); 1767 KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg)); 1768 uvmpdpol_pagedeactivate(pg); 1769 } 1770 1771 /* 1772 * uvm_pageactivate: activate page 1773 * 1774 * => caller must lock page queues 1775 */ 1776 1777 void 1778 uvm_pageactivate(struct vm_page *pg) 1779 { 1780 1781 KASSERT(mutex_owned(&uvm_pageqlock)); 1782 #if defined(READAHEAD_STATS) 1783 if ((pg->pqflags & PQ_READAHEAD) != 0) { 1784 uvm_ra_hit.ev_count++; 1785 pg->pqflags &= ~PQ_READAHEAD; 1786 } 1787 #endif /* defined(READAHEAD_STATS) */ 1788 if (pg->wire_count != 0) { 1789 return; 1790 } 1791 uvmpdpol_pageactivate(pg); 1792 } 1793 1794 /* 1795 * uvm_pagedequeue: remove a page from any paging queue 1796 */ 1797 1798 void 1799 uvm_pagedequeue(struct vm_page *pg) 1800 { 1801 1802 if (uvmpdpol_pageisqueued_p(pg)) { 1803 KASSERT(mutex_owned(&uvm_pageqlock)); 1804 } 1805 1806 uvmpdpol_pagedequeue(pg); 1807 } 1808 1809 /* 1810 * uvm_pageenqueue: add a page to a paging queue without activating. 1811 * used where a page is not really demanded (yet). eg. read-ahead 1812 */ 1813 1814 void 1815 uvm_pageenqueue(struct vm_page *pg) 1816 { 1817 1818 KASSERT(mutex_owned(&uvm_pageqlock)); 1819 if (pg->wire_count != 0) { 1820 return; 1821 } 1822 uvmpdpol_pageenqueue(pg); 1823 } 1824 1825 /* 1826 * uvm_pagezero: zero fill a page 1827 * 1828 * => if page is part of an object then the object should be locked 1829 * to protect pg->flags. 1830 */ 1831 1832 void 1833 uvm_pagezero(struct vm_page *pg) 1834 { 1835 pg->flags &= ~PG_CLEAN; 1836 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1837 } 1838 1839 /* 1840 * uvm_pagecopy: copy a page 1841 * 1842 * => if page is part of an object then the object should be locked 1843 * to protect pg->flags. 1844 */ 1845 1846 void 1847 uvm_pagecopy(struct vm_page *src, struct vm_page *dst) 1848 { 1849 1850 dst->flags &= ~PG_CLEAN; 1851 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 1852 } 1853 1854 /* 1855 * uvm_pageismanaged: test it see that a page (specified by PA) is managed. 1856 */ 1857 1858 bool 1859 uvm_pageismanaged(paddr_t pa) 1860 { 1861 1862 return (vm_physseg_find(atop(pa), NULL) != -1); 1863 } 1864 1865 /* 1866 * uvm_page_lookup_freelist: look up the free list for the specified page 1867 */ 1868 1869 int 1870 uvm_page_lookup_freelist(struct vm_page *pg) 1871 { 1872 int lcv; 1873 1874 lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL); 1875 KASSERT(lcv != -1); 1876 return (vm_physmem[lcv].free_list); 1877 } 1878 1879 #if defined(DDB) || defined(DEBUGPRINT) 1880 1881 /* 1882 * uvm_page_printit: actually print the page 1883 */ 1884 1885 static const char page_flagbits[] = UVM_PGFLAGBITS; 1886 static const char page_pqflagbits[] = UVM_PQFLAGBITS; 1887 1888 void 1889 uvm_page_printit(struct vm_page *pg, bool full, 1890 void (*pr)(const char *, ...)) 1891 { 1892 struct vm_page *tpg; 1893 struct uvm_object *uobj; 1894 struct pgflist *pgl; 1895 char pgbuf[128]; 1896 char pqbuf[128]; 1897 1898 (*pr)("PAGE %p:\n", pg); 1899 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); 1900 snprintb(pqbuf, sizeof(pqbuf), page_pqflagbits, pg->pqflags); 1901 (*pr)(" flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n", 1902 pgbuf, pqbuf, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg)); 1903 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", 1904 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); 1905 #if defined(UVM_PAGE_TRKOWN) 1906 if (pg->flags & PG_BUSY) 1907 (*pr)(" owning process = %d, tag=%s\n", 1908 pg->owner, pg->owner_tag); 1909 else 1910 (*pr)(" page not busy, no owner\n"); 1911 #else 1912 (*pr)(" [page ownership tracking disabled]\n"); 1913 #endif 1914 1915 if (!full) 1916 return; 1917 1918 /* cross-verify object/anon */ 1919 if ((pg->pqflags & PQ_FREE) == 0) { 1920 if (pg->pqflags & PQ_ANON) { 1921 if (pg->uanon == NULL || pg->uanon->an_page != pg) 1922 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 1923 (pg->uanon) ? pg->uanon->an_page : NULL); 1924 else 1925 (*pr)(" anon backpointer is OK\n"); 1926 } else { 1927 uobj = pg->uobject; 1928 if (uobj) { 1929 (*pr)(" checking object list\n"); 1930 TAILQ_FOREACH(tpg, &uobj->memq, listq.queue) { 1931 if (tpg == pg) { 1932 break; 1933 } 1934 } 1935 if (tpg) 1936 (*pr)(" page found on object list\n"); 1937 else 1938 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 1939 } 1940 } 1941 } 1942 1943 /* cross-verify page queue */ 1944 if (pg->pqflags & PQ_FREE) { 1945 int fl = uvm_page_lookup_freelist(pg); 1946 int color = VM_PGCOLOR_BUCKET(pg); 1947 pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[ 1948 ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN]; 1949 } else { 1950 pgl = NULL; 1951 } 1952 1953 if (pgl) { 1954 (*pr)(" checking pageq list\n"); 1955 LIST_FOREACH(tpg, pgl, pageq.list) { 1956 if (tpg == pg) { 1957 break; 1958 } 1959 } 1960 if (tpg) 1961 (*pr)(" page found on pageq list\n"); 1962 else 1963 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 1964 } 1965 } 1966 1967 /* 1968 * uvm_pages_printthem - print a summary of all managed pages 1969 */ 1970 1971 void 1972 uvm_page_printall(void (*pr)(const char *, ...)) 1973 { 1974 unsigned i; 1975 struct vm_page *pg; 1976 1977 (*pr)("%18s %4s %4s %18s %18s" 1978 #ifdef UVM_PAGE_TRKOWN 1979 " OWNER" 1980 #endif 1981 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 1982 for (i = 0; i < vm_nphysseg; i++) { 1983 for (pg = vm_physmem[i].pgs; pg <= vm_physmem[i].lastpg; pg++) { 1984 (*pr)("%18p %04x %04x %18p %18p", 1985 pg, pg->flags, pg->pqflags, pg->uobject, 1986 pg->uanon); 1987 #ifdef UVM_PAGE_TRKOWN 1988 if (pg->flags & PG_BUSY) 1989 (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 1990 #endif 1991 (*pr)("\n"); 1992 } 1993 } 1994 } 1995 1996 #endif /* DDB || DEBUGPRINT */ 1997