1 /* $NetBSD: uvm_pdaemon.c,v 1.101 2010/06/02 15:48:49 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Charles D. Cranor, 23 * Washington University, the University of California, Berkeley and 24 * its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 42 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp 43 * 44 * 45 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 46 * All rights reserved. 47 * 48 * Permission to use, copy, modify and distribute this software and 49 * its documentation is hereby granted, provided that both the copyright 50 * notice and this permission notice appear in all copies of the 51 * software, derivative works or modified versions, and any portions 52 * thereof, and that both notices appear in supporting documentation. 53 * 54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 57 * 58 * Carnegie Mellon requests users of this software to return to 59 * 60 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 61 * School of Computer Science 62 * Carnegie Mellon University 63 * Pittsburgh PA 15213-3890 64 * 65 * any improvements or extensions that they make and grant Carnegie the 66 * rights to redistribute these changes. 67 */ 68 69 /* 70 * uvm_pdaemon.c: the page daemon 71 */ 72 73 #include <sys/cdefs.h> 74 __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.101 2010/06/02 15:48:49 pooka Exp $"); 75 76 #include "opt_uvmhist.h" 77 #include "opt_readahead.h" 78 79 #include <sys/param.h> 80 #include <sys/proc.h> 81 #include <sys/systm.h> 82 #include <sys/kernel.h> 83 #include <sys/pool.h> 84 #include <sys/buf.h> 85 #include <sys/module.h> 86 #include <sys/atomic.h> 87 88 #include <uvm/uvm.h> 89 #include <uvm/uvm_pdpolicy.h> 90 91 /* 92 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate 93 * in a pass thru the inactive list when swap is full. the value should be 94 * "small"... if it's too large we'll cycle the active pages thru the inactive 95 * queue too quickly to for them to be referenced and avoid being freed. 96 */ 97 98 #define UVMPD_NUMDIRTYREACTS 16 99 100 #define UVMPD_NUMTRYLOCKOWNER 16 101 102 /* 103 * local prototypes 104 */ 105 106 static void uvmpd_scan(void); 107 static void uvmpd_scan_queue(void); 108 static void uvmpd_tune(void); 109 110 static unsigned int uvm_pagedaemon_waiters; 111 112 /* 113 * XXX hack to avoid hangs when large processes fork. 114 */ 115 u_int uvm_extrapages; 116 117 static kmutex_t uvm_reclaim_lock; 118 119 SLIST_HEAD(uvm_reclaim_hooks, uvm_reclaim_hook) uvm_reclaim_list; 120 121 /* 122 * uvm_wait: wait (sleep) for the page daemon to free some pages 123 * 124 * => should be called with all locks released 125 * => should _not_ be called by the page daemon (to avoid deadlock) 126 */ 127 128 void 129 uvm_wait(const char *wmsg) 130 { 131 int timo = 0; 132 133 mutex_spin_enter(&uvm_fpageqlock); 134 135 /* 136 * check for page daemon going to sleep (waiting for itself) 137 */ 138 139 if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == 0) { 140 /* 141 * now we have a problem: the pagedaemon wants to go to 142 * sleep until it frees more memory. but how can it 143 * free more memory if it is asleep? that is a deadlock. 144 * we have two options: 145 * [1] panic now 146 * [2] put a timeout on the sleep, thus causing the 147 * pagedaemon to only pause (rather than sleep forever) 148 * 149 * note that option [2] will only help us if we get lucky 150 * and some other process on the system breaks the deadlock 151 * by exiting or freeing memory (thus allowing the pagedaemon 152 * to continue). for now we panic if DEBUG is defined, 153 * otherwise we hope for the best with option [2] (better 154 * yet, this should never happen in the first place!). 155 */ 156 157 printf("pagedaemon: deadlock detected!\n"); 158 timo = hz >> 3; /* set timeout */ 159 #if defined(DEBUG) 160 /* DEBUG: panic so we can debug it */ 161 panic("pagedaemon deadlock"); 162 #endif 163 } 164 165 uvm_pagedaemon_waiters++; 166 wakeup(&uvm.pagedaemon); /* wake the daemon! */ 167 UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm_fpageqlock, false, wmsg, timo); 168 } 169 170 /* 171 * uvm_kick_pdaemon: perform checks to determine if we need to 172 * give the pagedaemon a nudge, and do so if necessary. 173 * 174 * => called with uvm_fpageqlock held. 175 */ 176 177 void 178 uvm_kick_pdaemon(void) 179 { 180 181 KASSERT(mutex_owned(&uvm_fpageqlock)); 182 183 if (uvmexp.free + uvmexp.paging < uvmexp.freemin || 184 (uvmexp.free + uvmexp.paging < uvmexp.freetarg && 185 uvmpdpol_needsscan_p())) { 186 wakeup(&uvm.pagedaemon); 187 } 188 } 189 190 /* 191 * uvmpd_tune: tune paging parameters 192 * 193 * => called when ever memory is added (or removed?) to the system 194 * => caller must call with page queues locked 195 */ 196 197 static void 198 uvmpd_tune(void) 199 { 200 int val; 201 202 UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist); 203 204 /* 205 * try to keep 0.5% of available RAM free, but limit to between 206 * 128k and 1024k per-CPU. XXX: what are these values good for? 207 */ 208 val = uvmexp.npages / 200; 209 val = MAX(val, (128*1024) >> PAGE_SHIFT); 210 val = MIN(val, (1024*1024) >> PAGE_SHIFT); 211 val *= ncpu; 212 213 /* Make sure there's always a user page free. */ 214 if (val < uvmexp.reserve_kernel + 1) 215 val = uvmexp.reserve_kernel + 1; 216 uvmexp.freemin = val; 217 218 /* Calculate free target. */ 219 val = (uvmexp.freemin * 4) / 3; 220 if (val <= uvmexp.freemin) 221 val = uvmexp.freemin + 1; 222 uvmexp.freetarg = val + atomic_swap_uint(&uvm_extrapages, 0); 223 224 uvmexp.wiredmax = uvmexp.npages / 3; 225 UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d", 226 uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0); 227 } 228 229 /* 230 * uvm_pageout: the main loop for the pagedaemon 231 */ 232 233 void 234 uvm_pageout(void *arg) 235 { 236 int bufcnt, npages = 0; 237 int extrapages = 0; 238 struct pool *pp; 239 uint64_t where; 240 struct uvm_reclaim_hook *hook; 241 242 UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist); 243 244 UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0); 245 246 /* 247 * ensure correct priority and set paging parameters... 248 */ 249 250 uvm.pagedaemon_lwp = curlwp; 251 mutex_enter(&uvm_pageqlock); 252 npages = uvmexp.npages; 253 uvmpd_tune(); 254 mutex_exit(&uvm_pageqlock); 255 256 /* 257 * main loop 258 */ 259 260 for (;;) { 261 bool needsscan, needsfree; 262 263 mutex_spin_enter(&uvm_fpageqlock); 264 if (uvm_pagedaemon_waiters == 0 || uvmexp.paging > 0) { 265 UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0); 266 UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon, 267 &uvm_fpageqlock, false, "pgdaemon", 0); 268 uvmexp.pdwoke++; 269 UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0); 270 } else { 271 mutex_spin_exit(&uvm_fpageqlock); 272 } 273 274 /* 275 * now lock page queues and recompute inactive count 276 */ 277 278 mutex_enter(&uvm_pageqlock); 279 if (npages != uvmexp.npages || extrapages != uvm_extrapages) { 280 npages = uvmexp.npages; 281 extrapages = uvm_extrapages; 282 mutex_spin_enter(&uvm_fpageqlock); 283 uvmpd_tune(); 284 mutex_spin_exit(&uvm_fpageqlock); 285 } 286 287 uvmpdpol_tune(); 288 289 /* 290 * Estimate a hint. Note that bufmem are returned to 291 * system only when entire pool page is empty. 292 */ 293 mutex_spin_enter(&uvm_fpageqlock); 294 bufcnt = uvmexp.freetarg - uvmexp.free; 295 if (bufcnt < 0) 296 bufcnt = 0; 297 298 UVMHIST_LOG(pdhist," free/ftarg=%d/%d", 299 uvmexp.free, uvmexp.freetarg, 0,0); 300 301 needsfree = uvmexp.free + uvmexp.paging < uvmexp.freetarg; 302 needsscan = needsfree || uvmpdpol_needsscan_p(); 303 304 /* 305 * scan if needed 306 */ 307 if (needsscan) { 308 mutex_spin_exit(&uvm_fpageqlock); 309 uvmpd_scan(); 310 mutex_spin_enter(&uvm_fpageqlock); 311 } 312 313 /* 314 * if there's any free memory to be had, 315 * wake up any waiters. 316 */ 317 if (uvmexp.free > uvmexp.reserve_kernel || 318 uvmexp.paging == 0) { 319 wakeup(&uvmexp.free); 320 uvm_pagedaemon_waiters = 0; 321 } 322 mutex_spin_exit(&uvm_fpageqlock); 323 324 /* 325 * scan done. unlock page queues (the only lock we are holding) 326 */ 327 mutex_exit(&uvm_pageqlock); 328 329 /* 330 * if we don't need free memory, we're done. 331 */ 332 333 if (!needsfree) 334 continue; 335 336 /* 337 * start draining pool resources now that we're not 338 * holding any locks. 339 */ 340 pool_drain_start(&pp, &where); 341 342 /* 343 * kill unused metadata buffers. 344 */ 345 mutex_enter(&bufcache_lock); 346 buf_drain(bufcnt << PAGE_SHIFT); 347 mutex_exit(&bufcache_lock); 348 349 mutex_enter(&uvm_reclaim_lock); 350 SLIST_FOREACH(hook, &uvm_reclaim_list, uvm_reclaim_next) { 351 (*hook->uvm_reclaim_hook)(); 352 } 353 mutex_exit(&uvm_reclaim_lock); 354 355 /* 356 * complete draining the pools. 357 */ 358 pool_drain_end(pp, where); 359 } 360 /*NOTREACHED*/ 361 } 362 363 364 /* 365 * uvm_aiodone_worker: a workqueue callback for the aiodone daemon. 366 */ 367 368 void 369 uvm_aiodone_worker(struct work *wk, void *dummy) 370 { 371 struct buf *bp = (void *)wk; 372 373 KASSERT(&bp->b_work == wk); 374 375 /* 376 * process an i/o that's done. 377 */ 378 379 (*bp->b_iodone)(bp); 380 } 381 382 void 383 uvm_pageout_start(int npages) 384 { 385 386 mutex_spin_enter(&uvm_fpageqlock); 387 uvmexp.paging += npages; 388 mutex_spin_exit(&uvm_fpageqlock); 389 } 390 391 void 392 uvm_pageout_done(int npages) 393 { 394 395 mutex_spin_enter(&uvm_fpageqlock); 396 KASSERT(uvmexp.paging >= npages); 397 uvmexp.paging -= npages; 398 399 /* 400 * wake up either of pagedaemon or LWPs waiting for it. 401 */ 402 403 if (uvmexp.free <= uvmexp.reserve_kernel) { 404 wakeup(&uvm.pagedaemon); 405 } else { 406 wakeup(&uvmexp.free); 407 uvm_pagedaemon_waiters = 0; 408 } 409 mutex_spin_exit(&uvm_fpageqlock); 410 } 411 412 /* 413 * uvmpd_trylockowner: trylock the page's owner. 414 * 415 * => called with pageq locked. 416 * => resolve orphaned O->A loaned page. 417 * => return the locked mutex on success. otherwise, return NULL. 418 */ 419 420 kmutex_t * 421 uvmpd_trylockowner(struct vm_page *pg) 422 { 423 struct uvm_object *uobj = pg->uobject; 424 kmutex_t *slock; 425 426 KASSERT(mutex_owned(&uvm_pageqlock)); 427 428 if (uobj != NULL) { 429 slock = &uobj->vmobjlock; 430 } else { 431 struct vm_anon *anon = pg->uanon; 432 433 KASSERT(anon != NULL); 434 slock = &anon->an_lock; 435 } 436 437 if (!mutex_tryenter(slock)) { 438 return NULL; 439 } 440 441 if (uobj == NULL) { 442 443 /* 444 * set PQ_ANON if it isn't set already. 445 */ 446 447 if ((pg->pqflags & PQ_ANON) == 0) { 448 KASSERT(pg->loan_count > 0); 449 pg->loan_count--; 450 pg->pqflags |= PQ_ANON; 451 /* anon now owns it */ 452 } 453 } 454 455 return slock; 456 } 457 458 #if defined(VMSWAP) 459 struct swapcluster { 460 int swc_slot; 461 int swc_nallocated; 462 int swc_nused; 463 struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)]; 464 }; 465 466 static void 467 swapcluster_init(struct swapcluster *swc) 468 { 469 470 swc->swc_slot = 0; 471 swc->swc_nused = 0; 472 } 473 474 static int 475 swapcluster_allocslots(struct swapcluster *swc) 476 { 477 int slot; 478 int npages; 479 480 if (swc->swc_slot != 0) { 481 return 0; 482 } 483 484 /* Even with strange MAXPHYS, the shift 485 implicitly rounds down to a page. */ 486 npages = MAXPHYS >> PAGE_SHIFT; 487 slot = uvm_swap_alloc(&npages, true); 488 if (slot == 0) { 489 return ENOMEM; 490 } 491 swc->swc_slot = slot; 492 swc->swc_nallocated = npages; 493 swc->swc_nused = 0; 494 495 return 0; 496 } 497 498 static int 499 swapcluster_add(struct swapcluster *swc, struct vm_page *pg) 500 { 501 int slot; 502 struct uvm_object *uobj; 503 504 KASSERT(swc->swc_slot != 0); 505 KASSERT(swc->swc_nused < swc->swc_nallocated); 506 KASSERT((pg->pqflags & PQ_SWAPBACKED) != 0); 507 508 slot = swc->swc_slot + swc->swc_nused; 509 uobj = pg->uobject; 510 if (uobj == NULL) { 511 KASSERT(mutex_owned(&pg->uanon->an_lock)); 512 pg->uanon->an_swslot = slot; 513 } else { 514 int result; 515 516 KASSERT(mutex_owned(&uobj->vmobjlock)); 517 result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot); 518 if (result == -1) { 519 return ENOMEM; 520 } 521 } 522 swc->swc_pages[swc->swc_nused] = pg; 523 swc->swc_nused++; 524 525 return 0; 526 } 527 528 static void 529 swapcluster_flush(struct swapcluster *swc, bool now) 530 { 531 int slot; 532 int nused; 533 int nallocated; 534 int error; 535 536 if (swc->swc_slot == 0) { 537 return; 538 } 539 KASSERT(swc->swc_nused <= swc->swc_nallocated); 540 541 slot = swc->swc_slot; 542 nused = swc->swc_nused; 543 nallocated = swc->swc_nallocated; 544 545 /* 546 * if this is the final pageout we could have a few 547 * unused swap blocks. if so, free them now. 548 */ 549 550 if (nused < nallocated) { 551 if (!now) { 552 return; 553 } 554 uvm_swap_free(slot + nused, nallocated - nused); 555 } 556 557 /* 558 * now start the pageout. 559 */ 560 561 if (nused > 0) { 562 uvmexp.pdpageouts++; 563 uvm_pageout_start(nused); 564 error = uvm_swap_put(slot, swc->swc_pages, nused, 0); 565 KASSERT(error == 0 || error == ENOMEM); 566 } 567 568 /* 569 * zero swslot to indicate that we are 570 * no longer building a swap-backed cluster. 571 */ 572 573 swc->swc_slot = 0; 574 swc->swc_nused = 0; 575 } 576 577 static int 578 swapcluster_nused(struct swapcluster *swc) 579 { 580 581 return swc->swc_nused; 582 } 583 584 /* 585 * uvmpd_dropswap: free any swap allocated to this page. 586 * 587 * => called with owner locked. 588 * => return true if a page had an associated slot. 589 */ 590 591 static bool 592 uvmpd_dropswap(struct vm_page *pg) 593 { 594 bool result = false; 595 struct vm_anon *anon = pg->uanon; 596 597 if ((pg->pqflags & PQ_ANON) && anon->an_swslot) { 598 uvm_swap_free(anon->an_swslot, 1); 599 anon->an_swslot = 0; 600 pg->flags &= ~PG_CLEAN; 601 result = true; 602 } else if (pg->pqflags & PQ_AOBJ) { 603 int slot = uao_set_swslot(pg->uobject, 604 pg->offset >> PAGE_SHIFT, 0); 605 if (slot) { 606 uvm_swap_free(slot, 1); 607 pg->flags &= ~PG_CLEAN; 608 result = true; 609 } 610 } 611 612 return result; 613 } 614 615 /* 616 * uvmpd_trydropswap: try to free any swap allocated to this page. 617 * 618 * => return true if a slot is successfully freed. 619 */ 620 621 bool 622 uvmpd_trydropswap(struct vm_page *pg) 623 { 624 kmutex_t *slock; 625 bool result; 626 627 if ((pg->flags & PG_BUSY) != 0) { 628 return false; 629 } 630 631 /* 632 * lock the page's owner. 633 */ 634 635 slock = uvmpd_trylockowner(pg); 636 if (slock == NULL) { 637 return false; 638 } 639 640 /* 641 * skip this page if it's busy. 642 */ 643 644 if ((pg->flags & PG_BUSY) != 0) { 645 mutex_exit(slock); 646 return false; 647 } 648 649 result = uvmpd_dropswap(pg); 650 651 mutex_exit(slock); 652 653 return result; 654 } 655 656 #endif /* defined(VMSWAP) */ 657 658 /* 659 * uvmpd_scan_queue: scan an replace candidate list for pages 660 * to clean or free. 661 * 662 * => called with page queues locked 663 * => we work on meeting our free target by converting inactive pages 664 * into free pages. 665 * => we handle the building of swap-backed clusters 666 */ 667 668 static void 669 uvmpd_scan_queue(void) 670 { 671 struct vm_page *p; 672 struct uvm_object *uobj; 673 struct vm_anon *anon; 674 #if defined(VMSWAP) 675 struct swapcluster swc; 676 #endif /* defined(VMSWAP) */ 677 int dirtyreacts; 678 int lockownerfail; 679 kmutex_t *slock; 680 UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist); 681 682 /* 683 * swslot is non-zero if we are building a swap cluster. we want 684 * to stay in the loop while we have a page to scan or we have 685 * a swap-cluster to build. 686 */ 687 688 #if defined(VMSWAP) 689 swapcluster_init(&swc); 690 #endif /* defined(VMSWAP) */ 691 692 dirtyreacts = 0; 693 lockownerfail = 0; 694 uvmpdpol_scaninit(); 695 696 while (/* CONSTCOND */ 1) { 697 698 /* 699 * see if we've met the free target. 700 */ 701 702 if (uvmexp.free + uvmexp.paging 703 #if defined(VMSWAP) 704 + swapcluster_nused(&swc) 705 #endif /* defined(VMSWAP) */ 706 >= uvmexp.freetarg << 2 || 707 dirtyreacts == UVMPD_NUMDIRTYREACTS) { 708 UVMHIST_LOG(pdhist," met free target: " 709 "exit loop", 0, 0, 0, 0); 710 break; 711 } 712 713 p = uvmpdpol_selectvictim(); 714 if (p == NULL) { 715 break; 716 } 717 KASSERT(uvmpdpol_pageisqueued_p(p)); 718 KASSERT(p->wire_count == 0); 719 720 /* 721 * we are below target and have a new page to consider. 722 */ 723 724 anon = p->uanon; 725 uobj = p->uobject; 726 727 /* 728 * first we attempt to lock the object that this page 729 * belongs to. if our attempt fails we skip on to 730 * the next page (no harm done). it is important to 731 * "try" locking the object as we are locking in the 732 * wrong order (pageq -> object) and we don't want to 733 * deadlock. 734 * 735 * the only time we expect to see an ownerless page 736 * (i.e. a page with no uobject and !PQ_ANON) is if an 737 * anon has loaned a page from a uvm_object and the 738 * uvm_object has dropped the ownership. in that 739 * case, the anon can "take over" the loaned page 740 * and make it its own. 741 */ 742 743 slock = uvmpd_trylockowner(p); 744 if (slock == NULL) { 745 /* 746 * yield cpu to make a chance for an LWP holding 747 * the lock run. otherwise we can busy-loop too long 748 * if the page queue is filled with a lot of pages 749 * from few objects. 750 */ 751 lockownerfail++; 752 if (lockownerfail > UVMPD_NUMTRYLOCKOWNER) { 753 mutex_exit(&uvm_pageqlock); 754 /* XXX Better than yielding but inadequate. */ 755 kpause("livelock", false, 1, NULL); 756 mutex_enter(&uvm_pageqlock); 757 lockownerfail = 0; 758 } 759 continue; 760 } 761 if (p->flags & PG_BUSY) { 762 mutex_exit(slock); 763 uvmexp.pdbusy++; 764 continue; 765 } 766 767 /* does the page belong to an object? */ 768 if (uobj != NULL) { 769 uvmexp.pdobscan++; 770 } else { 771 #if defined(VMSWAP) 772 KASSERT(anon != NULL); 773 uvmexp.pdanscan++; 774 #else /* defined(VMSWAP) */ 775 panic("%s: anon", __func__); 776 #endif /* defined(VMSWAP) */ 777 } 778 779 780 /* 781 * we now have the object and the page queues locked. 782 * if the page is not swap-backed, call the object's 783 * pager to flush and free the page. 784 */ 785 786 #if defined(READAHEAD_STATS) 787 if ((p->pqflags & PQ_READAHEAD) != 0) { 788 p->pqflags &= ~PQ_READAHEAD; 789 uvm_ra_miss.ev_count++; 790 } 791 #endif /* defined(READAHEAD_STATS) */ 792 793 if ((p->pqflags & PQ_SWAPBACKED) == 0) { 794 KASSERT(uobj != NULL); 795 mutex_exit(&uvm_pageqlock); 796 (void) (uobj->pgops->pgo_put)(uobj, p->offset, 797 p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE); 798 mutex_enter(&uvm_pageqlock); 799 continue; 800 } 801 802 /* 803 * the page is swap-backed. remove all the permissions 804 * from the page so we can sync the modified info 805 * without any race conditions. if the page is clean 806 * we can free it now and continue. 807 */ 808 809 pmap_page_protect(p, VM_PROT_NONE); 810 if ((p->flags & PG_CLEAN) && pmap_clear_modify(p)) { 811 p->flags &= ~(PG_CLEAN); 812 } 813 if (p->flags & PG_CLEAN) { 814 int slot; 815 int pageidx; 816 817 pageidx = p->offset >> PAGE_SHIFT; 818 uvm_pagefree(p); 819 uvmexp.pdfreed++; 820 821 /* 822 * for anons, we need to remove the page 823 * from the anon ourselves. for aobjs, 824 * pagefree did that for us. 825 */ 826 827 if (anon) { 828 KASSERT(anon->an_swslot != 0); 829 anon->an_page = NULL; 830 slot = anon->an_swslot; 831 } else { 832 slot = uao_find_swslot(uobj, pageidx); 833 } 834 mutex_exit(slock); 835 836 if (slot > 0) { 837 /* this page is now only in swap. */ 838 mutex_enter(&uvm_swap_data_lock); 839 KASSERT(uvmexp.swpgonly < uvmexp.swpginuse); 840 uvmexp.swpgonly++; 841 mutex_exit(&uvm_swap_data_lock); 842 } 843 continue; 844 } 845 846 #if defined(VMSWAP) 847 /* 848 * this page is dirty, skip it if we'll have met our 849 * free target when all the current pageouts complete. 850 */ 851 852 if (uvmexp.free + uvmexp.paging > uvmexp.freetarg << 2) { 853 mutex_exit(slock); 854 continue; 855 } 856 857 /* 858 * free any swap space allocated to the page since 859 * we'll have to write it again with its new data. 860 */ 861 862 uvmpd_dropswap(p); 863 864 /* 865 * start new swap pageout cluster (if necessary). 866 * 867 * if swap is full reactivate this page so that 868 * we eventually cycle all pages through the 869 * inactive queue. 870 */ 871 872 if (swapcluster_allocslots(&swc)) { 873 dirtyreacts++; 874 uvm_pageactivate(p); 875 mutex_exit(slock); 876 continue; 877 } 878 879 /* 880 * at this point, we're definitely going reuse this 881 * page. mark the page busy and delayed-free. 882 * we should remove the page from the page queues 883 * so we don't ever look at it again. 884 * adjust counters and such. 885 */ 886 887 p->flags |= PG_BUSY; 888 UVM_PAGE_OWN(p, "scan_queue"); 889 890 p->flags |= PG_PAGEOUT; 891 uvm_pagedequeue(p); 892 893 uvmexp.pgswapout++; 894 mutex_exit(&uvm_pageqlock); 895 896 /* 897 * add the new page to the cluster. 898 */ 899 900 if (swapcluster_add(&swc, p)) { 901 p->flags &= ~(PG_BUSY|PG_PAGEOUT); 902 UVM_PAGE_OWN(p, NULL); 903 mutex_enter(&uvm_pageqlock); 904 dirtyreacts++; 905 uvm_pageactivate(p); 906 mutex_exit(slock); 907 continue; 908 } 909 mutex_exit(slock); 910 911 swapcluster_flush(&swc, false); 912 mutex_enter(&uvm_pageqlock); 913 914 /* 915 * the pageout is in progress. bump counters and set up 916 * for the next loop. 917 */ 918 919 uvmexp.pdpending++; 920 921 #else /* defined(VMSWAP) */ 922 uvm_pageactivate(p); 923 mutex_exit(slock); 924 #endif /* defined(VMSWAP) */ 925 } 926 927 #if defined(VMSWAP) 928 mutex_exit(&uvm_pageqlock); 929 swapcluster_flush(&swc, true); 930 mutex_enter(&uvm_pageqlock); 931 #endif /* defined(VMSWAP) */ 932 } 933 934 /* 935 * uvmpd_scan: scan the page queues and attempt to meet our targets. 936 * 937 * => called with pageq's locked 938 */ 939 940 static void 941 uvmpd_scan(void) 942 { 943 int swap_shortage, pages_freed; 944 UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist); 945 946 uvmexp.pdrevs++; 947 948 /* 949 * work on meeting our targets. first we work on our free target 950 * by converting inactive pages into free pages. then we work on 951 * meeting our inactive target by converting active pages to 952 * inactive ones. 953 */ 954 955 UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0); 956 957 pages_freed = uvmexp.pdfreed; 958 uvmpd_scan_queue(); 959 pages_freed = uvmexp.pdfreed - pages_freed; 960 961 /* 962 * detect if we're not going to be able to page anything out 963 * until we free some swap resources from active pages. 964 */ 965 966 swap_shortage = 0; 967 if (uvmexp.free < uvmexp.freetarg && 968 uvmexp.swpginuse >= uvmexp.swpgavail && 969 !uvm_swapisfull() && 970 pages_freed == 0) { 971 swap_shortage = uvmexp.freetarg - uvmexp.free; 972 } 973 974 uvmpdpol_balancequeue(swap_shortage); 975 976 /* 977 * if still below the minimum target, try unloading kernel 978 * modules. 979 */ 980 981 if (uvmexp.free < uvmexp.freemin) { 982 module_thread_kick(); 983 } 984 } 985 986 /* 987 * uvm_reclaimable: decide whether to wait for pagedaemon. 988 * 989 * => return true if it seems to be worth to do uvm_wait. 990 * 991 * XXX should be tunable. 992 * XXX should consider pools, etc? 993 */ 994 995 bool 996 uvm_reclaimable(void) 997 { 998 int filepages; 999 int active, inactive; 1000 1001 /* 1002 * if swap is not full, no problem. 1003 */ 1004 1005 if (!uvm_swapisfull()) { 1006 return true; 1007 } 1008 1009 /* 1010 * file-backed pages can be reclaimed even when swap is full. 1011 * if we have more than 1/16 of pageable memory or 5MB, try to reclaim. 1012 * 1013 * XXX assume the worst case, ie. all wired pages are file-backed. 1014 * 1015 * XXX should consider about other reclaimable memory. 1016 * XXX ie. pools, traditional buffer cache. 1017 */ 1018 1019 filepages = uvmexp.filepages + uvmexp.execpages - uvmexp.wired; 1020 uvm_estimatepageable(&active, &inactive); 1021 if (filepages >= MIN((active + inactive) >> 4, 1022 5 * 1024 * 1024 >> PAGE_SHIFT)) { 1023 return true; 1024 } 1025 1026 /* 1027 * kill the process, fail allocation, etc.. 1028 */ 1029 1030 return false; 1031 } 1032 1033 void 1034 uvm_estimatepageable(int *active, int *inactive) 1035 { 1036 1037 uvmpdpol_estimatepageable(active, inactive); 1038 } 1039 1040 void 1041 uvm_reclaim_init(void) 1042 { 1043 1044 /* Initialize UVM reclaim hooks. */ 1045 mutex_init(&uvm_reclaim_lock, MUTEX_DEFAULT, IPL_NONE); 1046 SLIST_INIT(&uvm_reclaim_list); 1047 } 1048 1049 void 1050 uvm_reclaim_hook_add(struct uvm_reclaim_hook *hook) 1051 { 1052 1053 KASSERT(hook != NULL); 1054 1055 mutex_enter(&uvm_reclaim_lock); 1056 SLIST_INSERT_HEAD(&uvm_reclaim_list, hook, uvm_reclaim_next); 1057 mutex_exit(&uvm_reclaim_lock); 1058 } 1059 1060 void 1061 uvm_reclaim_hook_del(struct uvm_reclaim_hook *hook_entry) 1062 { 1063 struct uvm_reclaim_hook *hook; 1064 1065 KASSERT(hook_entry != NULL); 1066 1067 mutex_enter(&uvm_reclaim_lock); 1068 SLIST_FOREACH(hook, &uvm_reclaim_list, uvm_reclaim_next) { 1069 if (hook != hook_entry) { 1070 continue; 1071 } 1072 1073 SLIST_REMOVE(&uvm_reclaim_list, hook, uvm_reclaim_hook, 1074 uvm_reclaim_next); 1075 break; 1076 } 1077 1078 mutex_exit(&uvm_reclaim_lock); 1079 } 1080