1 /* $NetBSD: uvm_pdaemon.c,v 1.36 2001/06/27 18:52:10 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Charles D. Cranor, 23 * Washington University, the University of California, Berkeley and 24 * its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 42 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp 43 * 44 * 45 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 46 * All rights reserved. 47 * 48 * Permission to use, copy, modify and distribute this software and 49 * its documentation is hereby granted, provided that both the copyright 50 * notice and this permission notice appear in all copies of the 51 * software, derivative works or modified versions, and any portions 52 * thereof, and that both notices appear in supporting documentation. 53 * 54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 57 * 58 * Carnegie Mellon requests users of this software to return to 59 * 60 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 61 * School of Computer Science 62 * Carnegie Mellon University 63 * Pittsburgh PA 15213-3890 64 * 65 * any improvements or extensions that they make and grant Carnegie the 66 * rights to redistribute these changes. 67 */ 68 69 #include "opt_uvmhist.h" 70 71 /* 72 * uvm_pdaemon.c: the page daemon 73 */ 74 75 #include <sys/param.h> 76 #include <sys/proc.h> 77 #include <sys/systm.h> 78 #include <sys/kernel.h> 79 #include <sys/pool.h> 80 #include <sys/buf.h> 81 #include <sys/vnode.h> 82 83 #include <uvm/uvm.h> 84 85 /* 86 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedeamon will reactivate 87 * in a pass thru the inactive list when swap is full. the value should be 88 * "small"... if it's too large we'll cycle the active pages thru the inactive 89 * queue too quickly to for them to be referenced and avoid being freed. 90 */ 91 92 #define UVMPD_NUMDIRTYREACTS 16 93 94 95 /* 96 * local prototypes 97 */ 98 99 static void uvmpd_scan __P((void)); 100 static boolean_t uvmpd_scan_inactive __P((struct pglist *)); 101 static void uvmpd_tune __P((void)); 102 103 /* 104 * uvm_wait: wait (sleep) for the page daemon to free some pages 105 * 106 * => should be called with all locks released 107 * => should _not_ be called by the page daemon (to avoid deadlock) 108 */ 109 110 void 111 uvm_wait(wmsg) 112 const char *wmsg; 113 { 114 int timo = 0; 115 int s = splbio(); 116 117 /* 118 * check for page daemon going to sleep (waiting for itself) 119 */ 120 121 if (curproc == uvm.pagedaemon_proc) { 122 /* 123 * now we have a problem: the pagedaemon wants to go to 124 * sleep until it frees more memory. but how can it 125 * free more memory if it is asleep? that is a deadlock. 126 * we have two options: 127 * [1] panic now 128 * [2] put a timeout on the sleep, thus causing the 129 * pagedaemon to only pause (rather than sleep forever) 130 * 131 * note that option [2] will only help us if we get lucky 132 * and some other process on the system breaks the deadlock 133 * by exiting or freeing memory (thus allowing the pagedaemon 134 * to continue). for now we panic if DEBUG is defined, 135 * otherwise we hope for the best with option [2] (better 136 * yet, this should never happen in the first place!). 137 */ 138 139 printf("pagedaemon: deadlock detected!\n"); 140 timo = hz >> 3; /* set timeout */ 141 #if defined(DEBUG) 142 /* DEBUG: panic so we can debug it */ 143 panic("pagedaemon deadlock"); 144 #endif 145 } 146 147 simple_lock(&uvm.pagedaemon_lock); 148 wakeup(&uvm.pagedaemon); /* wake the daemon! */ 149 UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm.pagedaemon_lock, FALSE, wmsg, 150 timo); 151 152 splx(s); 153 } 154 155 156 /* 157 * uvmpd_tune: tune paging parameters 158 * 159 * => called when ever memory is added (or removed?) to the system 160 * => caller must call with page queues locked 161 */ 162 163 static void 164 uvmpd_tune() 165 { 166 UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist); 167 168 uvmexp.freemin = uvmexp.npages / 20; 169 170 /* between 16k and 256k */ 171 /* XXX: what are these values good for? */ 172 uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT); 173 uvmexp.freemin = min(uvmexp.freemin, (256*1024) >> PAGE_SHIFT); 174 175 /* Make sure there's always a user page free. */ 176 if (uvmexp.freemin < uvmexp.reserve_kernel + 1) 177 uvmexp.freemin = uvmexp.reserve_kernel + 1; 178 179 uvmexp.freetarg = (uvmexp.freemin * 4) / 3; 180 if (uvmexp.freetarg <= uvmexp.freemin) 181 uvmexp.freetarg = uvmexp.freemin + 1; 182 183 /* uvmexp.inactarg: computed in main daemon loop */ 184 185 uvmexp.wiredmax = uvmexp.npages / 3; 186 UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d", 187 uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0); 188 } 189 190 /* 191 * uvm_pageout: the main loop for the pagedaemon 192 */ 193 194 void 195 uvm_pageout(void *arg) 196 { 197 int npages = 0; 198 UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist); 199 200 UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0); 201 202 /* 203 * ensure correct priority and set paging parameters... 204 */ 205 206 uvm.pagedaemon_proc = curproc; 207 (void) spl0(); 208 uvm_lock_pageq(); 209 npages = uvmexp.npages; 210 uvmpd_tune(); 211 uvm_unlock_pageq(); 212 213 /* 214 * main loop 215 */ 216 217 for (;;) { 218 simple_lock(&uvm.pagedaemon_lock); 219 220 UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0); 221 UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon, 222 &uvm.pagedaemon_lock, FALSE, "pgdaemon", 0); 223 uvmexp.pdwoke++; 224 UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0); 225 226 /* drain pool resources */ 227 pool_drain(0); 228 229 /* 230 * now lock page queues and recompute inactive count 231 */ 232 233 uvm_lock_pageq(); 234 if (npages != uvmexp.npages) { /* check for new pages? */ 235 npages = uvmexp.npages; 236 uvmpd_tune(); 237 } 238 239 uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3; 240 if (uvmexp.inactarg <= uvmexp.freetarg) { 241 uvmexp.inactarg = uvmexp.freetarg + 1; 242 } 243 244 UVMHIST_LOG(pdhist," free/ftarg=%d/%d, inact/itarg=%d/%d", 245 uvmexp.free, uvmexp.freetarg, uvmexp.inactive, 246 uvmexp.inactarg); 247 248 /* 249 * scan if needed 250 */ 251 252 if (uvmexp.free + uvmexp.paging < uvmexp.freetarg || 253 uvmexp.inactive < uvmexp.inactarg) { 254 uvmpd_scan(); 255 } 256 257 /* 258 * if there's any free memory to be had, 259 * wake up any waiters. 260 */ 261 262 if (uvmexp.free > uvmexp.reserve_kernel || 263 uvmexp.paging == 0) { 264 wakeup(&uvmexp.free); 265 } 266 267 /* 268 * scan done. unlock page queues (the only lock we are holding) 269 */ 270 271 uvm_unlock_pageq(); 272 } 273 /*NOTREACHED*/ 274 } 275 276 277 /* 278 * uvm_aiodone_daemon: main loop for the aiodone daemon. 279 */ 280 281 void 282 uvm_aiodone_daemon(void *arg) 283 { 284 int s, free; 285 struct buf *bp, *nbp; 286 UVMHIST_FUNC("uvm_aiodoned"); UVMHIST_CALLED(pdhist); 287 288 for (;;) { 289 290 /* 291 * carefully attempt to go to sleep (without losing "wakeups"!). 292 * we need splbio because we want to make sure the aio_done list 293 * is totally empty before we go to sleep. 294 */ 295 296 s = splbio(); 297 simple_lock(&uvm.aiodoned_lock); 298 if (TAILQ_FIRST(&uvm.aio_done) == NULL) { 299 UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0); 300 UVM_UNLOCK_AND_WAIT(&uvm.aiodoned, 301 &uvm.aiodoned_lock, FALSE, "aiodoned", 0); 302 UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0); 303 304 /* relock aiodoned_lock, still at splbio */ 305 simple_lock(&uvm.aiodoned_lock); 306 } 307 308 /* 309 * check for done aio structures 310 */ 311 312 bp = TAILQ_FIRST(&uvm.aio_done); 313 if (bp) { 314 TAILQ_INIT(&uvm.aio_done); 315 } 316 317 simple_unlock(&uvm.aiodoned_lock); 318 splx(s); 319 320 /* 321 * process each i/o that's done. 322 */ 323 324 free = uvmexp.free; 325 while (bp != NULL) { 326 if (bp->b_flags & B_PDAEMON) { 327 uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT; 328 } 329 nbp = TAILQ_NEXT(bp, b_freelist); 330 (*bp->b_iodone)(bp); 331 bp = nbp; 332 } 333 if (free <= uvmexp.reserve_kernel) { 334 s = uvm_lock_fpageq(); 335 wakeup(&uvm.pagedaemon); 336 uvm_unlock_fpageq(s); 337 } else { 338 simple_lock(&uvm.pagedaemon_lock); 339 wakeup(&uvmexp.free); 340 simple_unlock(&uvm.pagedaemon_lock); 341 } 342 } 343 } 344 345 346 347 /* 348 * uvmpd_scan_inactive: scan an inactive list for pages to clean or free. 349 * 350 * => called with page queues locked 351 * => we work on meeting our free target by converting inactive pages 352 * into free pages. 353 * => we handle the building of swap-backed clusters 354 * => we return TRUE if we are exiting because we met our target 355 */ 356 357 static boolean_t 358 uvmpd_scan_inactive(pglst) 359 struct pglist *pglst; 360 { 361 boolean_t retval = FALSE; /* assume we haven't hit target */ 362 int s, free, result; 363 struct vm_page *p, *nextpg; 364 struct uvm_object *uobj; 365 struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp; 366 int npages; 367 struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT]; /* XXX: see below */ 368 int swnpages, swcpages; /* XXX: see below */ 369 int swslot; 370 struct vm_anon *anon; 371 boolean_t swap_backed; 372 vaddr_t start; 373 int dirtyreacts, t; 374 UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist); 375 376 /* 377 * swslot is non-zero if we are building a swap cluster. we want 378 * to stay in the loop while we have a page to scan or we have 379 * a swap-cluster to build. 380 */ 381 382 swslot = 0; 383 swnpages = swcpages = 0; 384 free = 0; 385 dirtyreacts = 0; 386 387 for (p = TAILQ_FIRST(pglst); p != NULL || swslot != 0; p = nextpg) { 388 389 /* 390 * note that p can be NULL iff we have traversed the whole 391 * list and need to do one final swap-backed clustered pageout. 392 */ 393 394 uobj = NULL; 395 anon = NULL; 396 397 if (p) { 398 399 /* 400 * update our copy of "free" and see if we've met 401 * our target 402 */ 403 404 s = uvm_lock_fpageq(); 405 free = uvmexp.free; 406 uvm_unlock_fpageq(s); 407 408 if (free + uvmexp.paging >= uvmexp.freetarg << 2 || 409 dirtyreacts == UVMPD_NUMDIRTYREACTS) { 410 UVMHIST_LOG(pdhist," met free target: " 411 "exit loop", 0, 0, 0, 0); 412 retval = TRUE; 413 414 if (swslot == 0) { 415 /* exit now if no swap-i/o pending */ 416 break; 417 } 418 419 /* set p to null to signal final swap i/o */ 420 p = NULL; 421 } 422 } 423 424 if (p) { /* if (we have a new page to consider) */ 425 426 /* 427 * we are below target and have a new page to consider. 428 */ 429 uvmexp.pdscans++; 430 nextpg = TAILQ_NEXT(p, pageq); 431 432 /* 433 * move referenced pages back to active queue and 434 * skip to next page. 435 */ 436 437 if (pmap_is_referenced(p)) { 438 uvm_pageactivate(p); 439 uvmexp.pdreact++; 440 continue; 441 } 442 443 /* 444 * enforce the minimum thresholds on different 445 * types of memory usage. if reusing the current 446 * page would reduce that type of usage below its 447 * minimum, reactivate the page instead and move 448 * on to the next page. 449 */ 450 451 t = uvmexp.active + uvmexp.inactive + uvmexp.free; 452 if (p->uanon && 453 uvmexp.anonpages <= (t * uvmexp.anonmin) >> 8) { 454 uvm_pageactivate(p); 455 uvmexp.pdreanon++; 456 continue; 457 } 458 if (p->uobject && UVM_OBJ_IS_VTEXT(p->uobject) && 459 uvmexp.vtextpages <= (t * uvmexp.vtextmin) >> 8) { 460 uvm_pageactivate(p); 461 uvmexp.pdrevtext++; 462 continue; 463 } 464 if (p->uobject && UVM_OBJ_IS_VNODE(p->uobject) && 465 !UVM_OBJ_IS_VTEXT(p->uobject) && 466 uvmexp.vnodepages <= (t * uvmexp.vnodemin) >> 8) { 467 uvm_pageactivate(p); 468 uvmexp.pdrevnode++; 469 continue; 470 } 471 472 /* 473 * first we attempt to lock the object that this page 474 * belongs to. if our attempt fails we skip on to 475 * the next page (no harm done). it is important to 476 * "try" locking the object as we are locking in the 477 * wrong order (pageq -> object) and we don't want to 478 * deadlock. 479 * 480 * the only time we expect to see an ownerless page 481 * (i.e. a page with no uobject and !PQ_ANON) is if an 482 * anon has loaned a page from a uvm_object and the 483 * uvm_object has dropped the ownership. in that 484 * case, the anon can "take over" the loaned page 485 * and make it its own. 486 */ 487 488 /* is page part of an anon or ownerless ? */ 489 if ((p->pqflags & PQ_ANON) || p->uobject == NULL) { 490 anon = p->uanon; 491 KASSERT(anon != NULL); 492 if (!simple_lock_try(&anon->an_lock)) { 493 /* lock failed, skip this page */ 494 continue; 495 } 496 497 /* 498 * if the page is ownerless, claim it in the 499 * name of "anon"! 500 */ 501 502 if ((p->pqflags & PQ_ANON) == 0) { 503 KASSERT(p->loan_count > 0); 504 p->loan_count--; 505 p->pqflags |= PQ_ANON; 506 /* anon now owns it */ 507 } 508 if (p->flags & PG_BUSY) { 509 simple_unlock(&anon->an_lock); 510 uvmexp.pdbusy++; 511 /* someone else owns page, skip it */ 512 continue; 513 } 514 uvmexp.pdanscan++; 515 } else { 516 uobj = p->uobject; 517 KASSERT(uobj != NULL); 518 if (!simple_lock_try(&uobj->vmobjlock)) { 519 /* lock failed, skip this page */ 520 continue; 521 } 522 if (p->flags & PG_BUSY) { 523 simple_unlock(&uobj->vmobjlock); 524 uvmexp.pdbusy++; 525 /* someone else owns page, skip it */ 526 continue; 527 } 528 uvmexp.pdobscan++; 529 } 530 531 /* 532 * we now have the object and the page queues locked. 533 * the page is not busy. remove all the permissions 534 * from the page so we can sync the modified info 535 * without any race conditions. if the page is clean 536 * we can free it now and continue. 537 */ 538 539 pmap_page_protect(p, VM_PROT_NONE); 540 if ((p->flags & PG_CLEAN) != 0 && pmap_is_modified(p)) { 541 p->flags &= ~PG_CLEAN; 542 } 543 544 if (p->flags & PG_CLEAN) { 545 if (p->pqflags & PQ_SWAPBACKED) { 546 /* this page now lives only in swap */ 547 simple_lock(&uvm.swap_data_lock); 548 uvmexp.swpgonly++; 549 simple_unlock(&uvm.swap_data_lock); 550 } 551 552 uvm_pagefree(p); 553 uvmexp.pdfreed++; 554 555 if (anon) { 556 557 /* 558 * an anonymous page can only be clean 559 * if it has backing store assigned. 560 */ 561 562 KASSERT(anon->an_swslot != 0); 563 564 /* remove from object */ 565 anon->u.an_page = NULL; 566 simple_unlock(&anon->an_lock); 567 } else { 568 /* pagefree has already removed the 569 * page from the object */ 570 simple_unlock(&uobj->vmobjlock); 571 } 572 continue; 573 } 574 575 /* 576 * this page is dirty, skip it if we'll have met our 577 * free target when all the current pageouts complete. 578 */ 579 580 if (free + uvmexp.paging > uvmexp.freetarg << 2) { 581 if (anon) { 582 simple_unlock(&anon->an_lock); 583 } else { 584 simple_unlock(&uobj->vmobjlock); 585 } 586 continue; 587 } 588 589 /* 590 * this page is dirty, but we can't page it out 591 * since all pages in swap are only in swap. 592 * reactivate it so that we eventually cycle 593 * all pages thru the inactive queue. 594 */ 595 596 KASSERT(uvmexp.swpgonly <= uvmexp.swpages); 597 if ((p->pqflags & PQ_SWAPBACKED) && 598 uvmexp.swpgonly == uvmexp.swpages) { 599 dirtyreacts++; 600 uvm_pageactivate(p); 601 if (anon) { 602 simple_unlock(&anon->an_lock); 603 } else { 604 simple_unlock(&uobj->vmobjlock); 605 } 606 continue; 607 } 608 609 /* 610 * if the page is swap-backed and dirty and swap space 611 * is full, free any swap allocated to the page 612 * so that other pages can be paged out. 613 */ 614 615 KASSERT(uvmexp.swpginuse <= uvmexp.swpages); 616 if ((p->pqflags & PQ_SWAPBACKED) && 617 uvmexp.swpginuse == uvmexp.swpages) { 618 619 if ((p->pqflags & PQ_ANON) && 620 p->uanon->an_swslot) { 621 uvm_swap_free(p->uanon->an_swslot, 1); 622 p->uanon->an_swslot = 0; 623 } 624 if (p->pqflags & PQ_AOBJ) { 625 uao_dropswap(p->uobject, 626 p->offset >> PAGE_SHIFT); 627 } 628 } 629 630 /* 631 * the page we are looking at is dirty. we must 632 * clean it before it can be freed. to do this we 633 * first mark the page busy so that no one else will 634 * touch the page. 635 */ 636 637 swap_backed = ((p->pqflags & PQ_SWAPBACKED) != 0); 638 p->flags |= PG_BUSY; /* now we own it */ 639 UVM_PAGE_OWN(p, "scan_inactive"); 640 uvmexp.pgswapout++; 641 642 /* 643 * for swap-backed pages we need to (re)allocate 644 * swap space. 645 */ 646 647 if (swap_backed) { 648 649 /* 650 * free old swap slot (if any) 651 */ 652 653 if (anon) { 654 if (anon->an_swslot) { 655 uvm_swap_free(anon->an_swslot, 656 1); 657 anon->an_swslot = 0; 658 } 659 } else { 660 uao_dropswap(uobj, 661 p->offset >> PAGE_SHIFT); 662 } 663 664 /* 665 * start new cluster (if necessary) 666 */ 667 668 if (swslot == 0) { 669 swnpages = MAXBSIZE >> PAGE_SHIFT; 670 swslot = uvm_swap_alloc(&swnpages, 671 TRUE); 672 if (swslot == 0) { 673 /* no swap? give up! */ 674 p->flags &= ~PG_BUSY; 675 UVM_PAGE_OWN(p, NULL); 676 if (anon) 677 simple_unlock( 678 &anon->an_lock); 679 else 680 simple_unlock( 681 &uobj->vmobjlock); 682 continue; 683 } 684 swcpages = 0; /* cluster is empty */ 685 } 686 687 /* 688 * add block to cluster 689 */ 690 691 if (anon) { 692 anon->an_swslot = swslot + swcpages; 693 } else { 694 result = uao_set_swslot(uobj, 695 p->offset >> PAGE_SHIFT, 696 swslot + swcpages); 697 if (result == -1) { 698 p->flags &= ~PG_BUSY; 699 UVM_PAGE_OWN(p, NULL); 700 simple_unlock(&uobj->vmobjlock); 701 continue; 702 } 703 } 704 swpps[swcpages] = p; 705 swcpages++; 706 } 707 } else { 708 709 /* if p == NULL we must be doing a last swap i/o */ 710 swap_backed = TRUE; 711 } 712 713 /* 714 * now consider doing the pageout. 715 * 716 * for swap-backed pages, we do the pageout if we have either 717 * filled the cluster (in which case (swnpages == swcpages) or 718 * run out of pages (p == NULL). 719 * 720 * for object pages, we always do the pageout. 721 */ 722 723 if (swap_backed) { 724 if (p) { /* if we just added a page to cluster */ 725 if (anon) 726 simple_unlock(&anon->an_lock); 727 else 728 simple_unlock(&uobj->vmobjlock); 729 730 /* cluster not full yet? */ 731 if (swcpages < swnpages) 732 continue; 733 } 734 735 /* starting I/O now... set up for it */ 736 npages = swcpages; 737 ppsp = swpps; 738 /* for swap-backed pages only */ 739 start = (vaddr_t) swslot; 740 741 /* if this is final pageout we could have a few 742 * extra swap blocks */ 743 if (swcpages < swnpages) { 744 uvm_swap_free(swslot + swcpages, 745 (swnpages - swcpages)); 746 } 747 } else { 748 /* normal object pageout */ 749 ppsp = pps; 750 npages = sizeof(pps) / sizeof(struct vm_page *); 751 /* not looked at because PGO_ALLPAGES is set */ 752 start = 0; 753 } 754 755 /* 756 * now do the pageout. 757 * 758 * for swap_backed pages we have already built the cluster. 759 * for !swap_backed pages, uvm_pager_put will call the object's 760 * "make put cluster" function to build a cluster on our behalf. 761 * 762 * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct 763 * it to free the cluster pages for us on a successful I/O (it 764 * always does this for un-successful I/O requests). this 765 * allows us to do clustered pageout without having to deal 766 * with cluster pages at this level. 767 * 768 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST: 769 * IN: locked: uobj (if !swap_backed), page queues 770 * OUT:!locked: pageqs, uobj 771 */ 772 773 /* locked: uobj (if !swap_backed), page queues */ 774 uvmexp.pdpageouts++; 775 result = uvm_pager_put(swap_backed ? NULL : uobj, p, 776 &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0); 777 /* unlocked: pageqs, uobj */ 778 779 /* 780 * if we did i/o to swap, zero swslot to indicate that we are 781 * no longer building a swap-backed cluster. 782 */ 783 784 if (swap_backed) 785 swslot = 0; /* done with this cluster */ 786 787 /* 788 * if the pageout failed, reactivate the page and continue. 789 */ 790 791 if (result == EIO && curproc == uvm.pagedaemon_proc) { 792 uvm_lock_pageq(); 793 nextpg = TAILQ_NEXT(p, pageq); 794 uvm_pageactivate(p); 795 continue; 796 } 797 798 /* 799 * the pageout is in progress. bump counters and set up 800 * for the next loop. 801 */ 802 803 uvm_lock_pageq(); 804 uvmexp.paging += npages; 805 uvmexp.pdpending++; 806 if (p) { 807 if (p->pqflags & PQ_INACTIVE) 808 nextpg = TAILQ_NEXT(p, pageq); 809 else 810 nextpg = TAILQ_FIRST(pglst); 811 } else { 812 nextpg = NULL; 813 } 814 } 815 return (retval); 816 } 817 818 /* 819 * uvmpd_scan: scan the page queues and attempt to meet our targets. 820 * 821 * => called with pageq's locked 822 */ 823 824 void 825 uvmpd_scan() 826 { 827 int s, free, inactive_shortage, swap_shortage, pages_freed; 828 struct vm_page *p, *nextpg; 829 struct uvm_object *uobj; 830 boolean_t got_it; 831 UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist); 832 833 uvmexp.pdrevs++; /* counter */ 834 uobj = NULL; 835 836 /* 837 * get current "free" page count 838 */ 839 s = uvm_lock_fpageq(); 840 free = uvmexp.free; 841 uvm_unlock_fpageq(s); 842 843 #ifndef __SWAP_BROKEN 844 /* 845 * swap out some processes if we are below our free target. 846 * we need to unlock the page queues for this. 847 */ 848 if (free < uvmexp.freetarg) { 849 uvmexp.pdswout++; 850 UVMHIST_LOG(pdhist," free %d < target %d: swapout", free, 851 uvmexp.freetarg, 0, 0); 852 uvm_unlock_pageq(); 853 uvm_swapout_threads(); 854 uvm_lock_pageq(); 855 856 } 857 #endif 858 859 /* 860 * now we want to work on meeting our targets. first we work on our 861 * free target by converting inactive pages into free pages. then 862 * we work on meeting our inactive target by converting active pages 863 * to inactive ones. 864 */ 865 866 UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0); 867 868 /* 869 * alternate starting queue between swap and object based on the 870 * low bit of uvmexp.pdrevs (which we bump by one each call). 871 */ 872 873 got_it = FALSE; 874 pages_freed = uvmexp.pdfreed; 875 (void) uvmpd_scan_inactive(&uvm.page_inactive); 876 pages_freed = uvmexp.pdfreed - pages_freed; 877 878 /* 879 * we have done the scan to get free pages. now we work on meeting 880 * our inactive target. 881 */ 882 883 inactive_shortage = uvmexp.inactarg - uvmexp.inactive; 884 885 /* 886 * detect if we're not going to be able to page anything out 887 * until we free some swap resources from active pages. 888 */ 889 890 swap_shortage = 0; 891 if (uvmexp.free < uvmexp.freetarg && 892 uvmexp.swpginuse == uvmexp.swpages && 893 uvmexp.swpgonly < uvmexp.swpages && 894 pages_freed == 0) { 895 swap_shortage = uvmexp.freetarg - uvmexp.free; 896 } 897 898 UVMHIST_LOG(pdhist, " loop 2: inactive_shortage=%d swap_shortage=%d", 899 inactive_shortage, swap_shortage,0,0); 900 for (p = TAILQ_FIRST(&uvm.page_active); 901 p != NULL && (inactive_shortage > 0 || swap_shortage > 0); 902 p = nextpg) { 903 nextpg = TAILQ_NEXT(p, pageq); 904 if (p->flags & PG_BUSY) 905 continue; /* quick check before trying to lock */ 906 907 /* 908 * lock the page's owner. 909 */ 910 /* is page anon owned or ownerless? */ 911 if ((p->pqflags & PQ_ANON) || p->uobject == NULL) { 912 KASSERT(p->uanon != NULL); 913 if (!simple_lock_try(&p->uanon->an_lock)) 914 continue; 915 916 /* take over the page? */ 917 if ((p->pqflags & PQ_ANON) == 0) { 918 KASSERT(p->loan_count > 0); 919 p->loan_count--; 920 p->pqflags |= PQ_ANON; 921 } 922 } else { 923 if (!simple_lock_try(&p->uobject->vmobjlock)) 924 continue; 925 } 926 927 /* 928 * skip this page if it's busy. 929 */ 930 931 if ((p->flags & PG_BUSY) != 0) { 932 if (p->pqflags & PQ_ANON) 933 simple_unlock(&p->uanon->an_lock); 934 else 935 simple_unlock(&p->uobject->vmobjlock); 936 continue; 937 } 938 939 /* 940 * if there's a shortage of swap, free any swap allocated 941 * to this page so that other pages can be paged out. 942 */ 943 944 if (swap_shortage > 0) { 945 if ((p->pqflags & PQ_ANON) && p->uanon->an_swslot) { 946 uvm_swap_free(p->uanon->an_swslot, 1); 947 p->uanon->an_swslot = 0; 948 p->flags &= ~PG_CLEAN; 949 swap_shortage--; 950 } 951 if (p->pqflags & PQ_AOBJ) { 952 int slot = uao_set_swslot(p->uobject, 953 p->offset >> PAGE_SHIFT, 0); 954 if (slot) { 955 uvm_swap_free(slot, 1); 956 p->flags &= ~PG_CLEAN; 957 swap_shortage--; 958 } 959 } 960 } 961 962 /* 963 * If we're short on inactive pages, move this over 964 * to the inactive list. The second hand will sweep 965 * it later, and if it has been referenced again, it 966 * will be moved back to active. 967 */ 968 969 if (inactive_shortage > 0) { 970 pmap_clear_reference(p); 971 /* no need to check wire_count as pg is "active" */ 972 uvm_pagedeactivate(p); 973 uvmexp.pddeact++; 974 inactive_shortage--; 975 } 976 if (p->pqflags & PQ_ANON) 977 simple_unlock(&p->uanon->an_lock); 978 else 979 simple_unlock(&p->uobject->vmobjlock); 980 } 981 } 982