1*6b5aed99Smpi /* $OpenBSD: uvm_pdaemon.c,v 1.134 2025/01/25 08:55:52 mpi Exp $ */ 22c932f6fSmiod /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */ 3cd7ee8acSart 4cd7ee8acSart /* 5cd7ee8acSart * Copyright (c) 1997 Charles D. Cranor and Washington University. 6cd7ee8acSart * Copyright (c) 1991, 1993, The Regents of the University of California. 7cd7ee8acSart * 8cd7ee8acSart * All rights reserved. 9cd7ee8acSart * 10cd7ee8acSart * This code is derived from software contributed to Berkeley by 11cd7ee8acSart * The Mach Operating System project at Carnegie-Mellon University. 12cd7ee8acSart * 13cd7ee8acSart * Redistribution and use in source and binary forms, with or without 14cd7ee8acSart * modification, are permitted provided that the following conditions 15cd7ee8acSart * are met: 16cd7ee8acSart * 1. Redistributions of source code must retain the above copyright 17cd7ee8acSart * notice, this list of conditions and the following disclaimer. 18cd7ee8acSart * 2. Redistributions in binary form must reproduce the above copyright 19cd7ee8acSart * notice, this list of conditions and the following disclaimer in the 20cd7ee8acSart * documentation and/or other materials provided with the distribution. 21188f0ea4Sjsg * 3. Neither the name of the University nor the names of its contributors 22cd7ee8acSart * may be used to endorse or promote products derived from this software 23cd7ee8acSart * without specific prior written permission. 24cd7ee8acSart * 25cd7ee8acSart * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26cd7ee8acSart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27cd7ee8acSart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28cd7ee8acSart * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29cd7ee8acSart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30cd7ee8acSart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31cd7ee8acSart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32cd7ee8acSart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33cd7ee8acSart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34cd7ee8acSart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35cd7ee8acSart * SUCH DAMAGE. 36cd7ee8acSart * 37cd7ee8acSart * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 38cd7ee8acSart * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp 39cd7ee8acSart * 40cd7ee8acSart * 41cd7ee8acSart * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42cd7ee8acSart * All rights reserved. 43cd7ee8acSart * 44cd7ee8acSart * Permission to use, copy, modify and distribute this software and 45cd7ee8acSart * its documentation is hereby granted, provided that both the copyright 46cd7ee8acSart * notice and this permission notice appear in all copies of the 47cd7ee8acSart * software, derivative works or modified versions, and any portions 48cd7ee8acSart * thereof, and that both notices appear in supporting documentation. 49cd7ee8acSart * 50cd7ee8acSart * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51cd7ee8acSart * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52cd7ee8acSart * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53cd7ee8acSart * 54cd7ee8acSart * Carnegie Mellon requests users of this software to return to 55cd7ee8acSart * 56cd7ee8acSart * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57cd7ee8acSart * School of Computer Science 58cd7ee8acSart * Carnegie Mellon University 59cd7ee8acSart * Pittsburgh PA 15213-3890 60cd7ee8acSart * 61cd7ee8acSart * any improvements or extensions that they make and grant Carnegie the 62cd7ee8acSart * rights to redistribute these changes. 63cd7ee8acSart */ 64cd7ee8acSart 65cd7ee8acSart /* 66cd7ee8acSart * uvm_pdaemon.c: the page daemon 67cd7ee8acSart */ 68cd7ee8acSart 69cd7ee8acSart #include <sys/param.h> 70cd7ee8acSart #include <sys/systm.h> 71cd7ee8acSart #include <sys/kernel.h> 72cd7ee8acSart #include <sys/pool.h> 73a97ba27bSbluhm #include <sys/proc.h> 741aa8821bSart #include <sys/buf.h> 7589a666daSart #include <sys/mount.h> 7603d1830dStedu #include <sys/atomic.h> 77cd7ee8acSart 78f5db0a1cSkettenis #ifdef HIBERNATE 79f5db0a1cSkettenis #include <sys/hibernate.h> 80f5db0a1cSkettenis #endif 81f5db0a1cSkettenis 82cd7ee8acSart #include <uvm/uvm.h> 83cd7ee8acSart 84a4e118acSkettenis #include "drm.h" 85a4e118acSkettenis 86a4e118acSkettenis #if NDRM > 0 87789ce988Smpi extern unsigned long drmbackoff(long); 88a4e118acSkettenis #endif 89a4e118acSkettenis 90cd7ee8acSart /* 91ca6c2cbfSmickey * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate 928a42ed70Sart * in a pass thru the inactive list when swap is full. the value should be 938a42ed70Sart * "small"... if it's too large we'll cycle the active pages thru the inactive 948a42ed70Sart * queue too quickly to for them to be referenced and avoid being freed. 958a42ed70Sart */ 968a42ed70Sart 978a42ed70Sart #define UVMPD_NUMDIRTYREACTS 16 988a42ed70Sart 998a42ed70Sart 1008a42ed70Sart /* 101cd7ee8acSart * local prototypes 102cd7ee8acSart */ 103cd7ee8acSart 104c4141b1cSmpi struct rwlock *uvmpd_trylockowner(struct vm_page *); 105ab9ceab3Smpi void uvmpd_scan(struct uvm_pmalloc *, int, int); 106ab9ceab3Smpi int uvmpd_scan_inactive(struct uvm_pmalloc *, int); 107767e8a65Smpi void uvmpd_scan_active(struct uvm_pmalloc *, int, int); 108bb34e034Sthib void uvmpd_tune(void); 109f5db0a1cSkettenis void uvmpd_drop(struct pglist *); 1105a3e8fe8Smpi int uvmpd_dropswap(struct vm_page *); 111cd7ee8acSart 112cd7ee8acSart /* 113cd7ee8acSart * uvm_wait: wait (sleep) for the page daemon to free some pages 114cd7ee8acSart * 115cd7ee8acSart * => should be called with all locks released 116cd7ee8acSart * => should _not_ be called by the page daemon (to avoid deadlock) 117cd7ee8acSart */ 118cd7ee8acSart 1194eac0bf0Ssmart void 1202023d591Soga uvm_wait(const char *wmsg) 121cd7ee8acSart { 1224bc97b15Scheloha uint64_t timo = INFSLP; 123cd7ee8acSart 124a97ba27bSbluhm #ifdef DIAGNOSTIC 125a97ba27bSbluhm if (curproc == &proc0) 126a97ba27bSbluhm panic("%s: cannot sleep for memory during boot", __func__); 127a97ba27bSbluhm #endif 128a97ba27bSbluhm 129f4cbd454Smpi /* 130f4cbd454Smpi * check for page daemon going to sleep (waiting for itself) 131f4cbd454Smpi */ 132cd7ee8acSart if (curproc == uvm.pagedaemon_proc) { 1330bd995e1Stedu printf("uvm_wait emergency bufbackoff\n"); 1340b4f309dSmpi if (bufbackoff(NULL, 4) >= 4) 1350bd995e1Stedu return; 136cd7ee8acSart /* 137cd7ee8acSart * now we have a problem: the pagedaemon wants to go to 138cd7ee8acSart * sleep until it frees more memory. but how can it 139cd7ee8acSart * free more memory if it is asleep? that is a deadlock. 140cd7ee8acSart * we have two options: 141cd7ee8acSart * [1] panic now 142cd7ee8acSart * [2] put a timeout on the sleep, thus causing the 143cd7ee8acSart * pagedaemon to only pause (rather than sleep forever) 144cd7ee8acSart * 145cd7ee8acSart * note that option [2] will only help us if we get lucky 146cd7ee8acSart * and some other process on the system breaks the deadlock 147cd7ee8acSart * by exiting or freeing memory (thus allowing the pagedaemon 148cd7ee8acSart * to continue). for now we panic if DEBUG is defined, 149cd7ee8acSart * otherwise we hope for the best with option [2] (better 150cd7ee8acSart * yet, this should never happen in the first place!). 151cd7ee8acSart */ 152cd7ee8acSart 153cd7ee8acSart printf("pagedaemon: deadlock detected!\n"); 1544bc97b15Scheloha timo = MSEC_TO_NSEC(125); /* set timeout */ 155cd7ee8acSart #if defined(DEBUG) 156cd7ee8acSart /* DEBUG: panic so we can debug it */ 157cd7ee8acSart panic("pagedaemon deadlock"); 158cd7ee8acSart #endif 159cd7ee8acSart } 160cd7ee8acSart 161d848450cSoga uvm_lock_fpageq(); 1620800515eSoga wakeup(&uvm.pagedaemon); /* wake the daemon! */ 1634bc97b15Scheloha msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM | PNORELOCK, wmsg, timo); 164cd7ee8acSart } 165cd7ee8acSart 166cd7ee8acSart /* 167cd7ee8acSart * uvmpd_tune: tune paging parameters 168cd7ee8acSart */ 169bb34e034Sthib void 17089a666daSart uvmpd_tune(void) 171cd7ee8acSart { 172a853522eSmpi int val; 173cd7ee8acSart 174a853522eSmpi val = uvmexp.npages / 30; 175cd7ee8acSart 176cd7ee8acSart /* XXX: what are these values good for? */ 177a853522eSmpi val = max(val, (16*1024) >> PAGE_SHIFT); 178cd7ee8acSart 1799956d4a2Sart /* Make sure there's always a user page free. */ 180a853522eSmpi if (val < uvmexp.reserve_kernel + 1) 181a853522eSmpi val = uvmexp.reserve_kernel + 1; 182a853522eSmpi uvmexp.freemin = val; 1839956d4a2Sart 184a853522eSmpi /* Calculate free target. */ 185a853522eSmpi val = (uvmexp.freemin * 4) / 3; 186a853522eSmpi if (val <= uvmexp.freemin) 187a853522eSmpi val = uvmexp.freemin + 1; 188a853522eSmpi uvmexp.freetarg = val; 189cd7ee8acSart 190cd7ee8acSart uvmexp.wiredmax = uvmexp.npages / 3; 191cd7ee8acSart } 192cd7ee8acSart 193cd7ee8acSart /* 194d4c6c9b5Sbeck * Indicate to the page daemon that a nowait call failed and it should 195d4c6c9b5Sbeck * recover at least some memory in the most restricted region (assumed 196d4c6c9b5Sbeck * to be dma_constraint). 197d4c6c9b5Sbeck */ 198ba03bb80Smpi struct uvm_pmalloc nowait_pma; 199d4c6c9b5Sbeck 200d7bddd8cSmpi static inline int 201d7bddd8cSmpi uvmpd_pma_done(struct uvm_pmalloc *pma) 202d7bddd8cSmpi { 203d7bddd8cSmpi if (pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) 204d7bddd8cSmpi return 1; 205d7bddd8cSmpi return 0; 206d7bddd8cSmpi } 207d7bddd8cSmpi 208d4c6c9b5Sbeck /* 209cd7ee8acSart * uvm_pageout: the main loop for the pagedaemon 210cd7ee8acSart */ 211cd7ee8acSart void 2129956d4a2Sart uvm_pageout(void *arg) 213cd7ee8acSart { 21490ee2fe0Sbeck struct uvm_constraint_range constraint; 21590ee2fe0Sbeck struct uvm_pmalloc *pma; 2164e368faeSmpi int shortage, inactive_shortage; 217cd7ee8acSart 21835164244Stedu /* ensure correct priority and set paging parameters... */ 219cd7ee8acSart uvm.pagedaemon_proc = curproc; 220cd7ee8acSart (void) spl0(); 221cd7ee8acSart uvmpd_tune(); 222cd7ee8acSart 223ba03bb80Smpi /* 224ba03bb80Smpi * XXX realistically, this is what our nowait callers probably 225ba03bb80Smpi * care about. 226ba03bb80Smpi */ 227ba03bb80Smpi nowait_pma.pm_constraint = dma_constraint; 228ba03bb80Smpi nowait_pma.pm_size = (16 << PAGE_SHIFT); /* XXX */ 229ba03bb80Smpi nowait_pma.pm_flags = 0; 230ba03bb80Smpi 2311aa8821bSart for (;;) { 2320dd50440Sbeck long size; 23390ee2fe0Sbeck 234d848450cSoga uvm_lock_fpageq(); 2351df50becSmpi if (TAILQ_EMPTY(&uvm.pmr_control.allocs) || uvmexp.paging > 0) { 2362404448fSjsg msleep_nsec(&uvm.pagedaemon, &uvm.fpageqlock, PVM, 2372404448fSjsg "pgdaemon", INFSLP); 2381aa8821bSart uvmexp.pdwoke++; 23990ee2fe0Sbeck } 24090ee2fe0Sbeck 24190ee2fe0Sbeck if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)) != NULL) { 24290ee2fe0Sbeck pma->pm_flags |= UVM_PMA_BUSY; 24390ee2fe0Sbeck constraint = pma->pm_constraint; 244d4c6c9b5Sbeck } else { 24590ee2fe0Sbeck constraint = no_constraint; 246d4c6c9b5Sbeck } 247786a9acfSmpi /* How many pages do we need to free during this round? */ 2481df50becSmpi shortage = uvmexp.freetarg - 2491df50becSmpi (uvmexp.free + uvmexp.paging) + BUFPAGES_DEFICIT; 25090ee2fe0Sbeck uvm_unlock_fpageq(); 2511aa8821bSart 252455c70dcSmpi /* 253455c70dcSmpi * now lock page queues and recompute inactive count 254455c70dcSmpi */ 2551aa8821bSart uvm_lock_pageq(); 2561aa8821bSart uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3; 2571aa8821bSart if (uvmexp.inactarg <= uvmexp.freetarg) { 2581aa8821bSart uvmexp.inactarg = uvmexp.freetarg + 1; 2591aa8821bSart } 260786a9acfSmpi inactive_shortage = 261786a9acfSmpi uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT; 262a853522eSmpi uvm_unlock_pageq(); 2631aa8821bSart 2640dd50440Sbeck size = 0; 2650dd50440Sbeck if (pma != NULL) 2660dd50440Sbeck size += pma->pm_size >> PAGE_SHIFT; 2674e368faeSmpi if (shortage > 0) 2684e368faeSmpi size += shortage; 269a853522eSmpi 270767e8a65Smpi if (size == 0) { 271767e8a65Smpi /* 272767e8a65Smpi * Since the inactive target just got updated 273ab22dc52Smpi * above, both `size' and `inactive_shortage' can 274767e8a65Smpi * be 0. 275767e8a65Smpi */ 276767e8a65Smpi if (inactive_shortage) { 277767e8a65Smpi uvm_lock_pageq(); 278767e8a65Smpi uvmpd_scan_active(NULL, 0, inactive_shortage); 279767e8a65Smpi uvm_unlock_pageq(); 280767e8a65Smpi } 281767e8a65Smpi continue; 282767e8a65Smpi } 283767e8a65Smpi 284767e8a65Smpi /* Reclaim pages from the buffer cache if possible. */ 285786a9acfSmpi shortage -= bufbackoff(&constraint, size * 2); 286a4e118acSkettenis #if NDRM > 0 287786a9acfSmpi shortage -= drmbackoff(size * 2); 288a4e118acSkettenis #endif 289786a9acfSmpi if (shortage > 0) 290786a9acfSmpi shortage -= uvm_pmr_cache_drain(); 29182673a18Smpi 292455c70dcSmpi /* 293455c70dcSmpi * scan if needed 294455c70dcSmpi */ 295a853522eSmpi uvm_lock_pageq(); 296d7bddd8cSmpi if (!uvmpd_pma_done(pma) || 297d7bddd8cSmpi (shortage > 0) || (inactive_shortage > 0)) { 298ab9ceab3Smpi uvmpd_scan(pma, shortage, inactive_shortage); 2991aa8821bSart } 3001aa8821bSart 3011aa8821bSart /* 3021aa8821bSart * if there's any free memory to be had, 3031aa8821bSart * wake up any waiters. 3041aa8821bSart */ 305d848450cSoga uvm_lock_fpageq(); 3061df50becSmpi if (uvmexp.free > uvmexp.reserve_kernel || uvmexp.paging == 0) { 3071aa8821bSart wakeup(&uvmexp.free); 3081aa8821bSart } 30990ee2fe0Sbeck 31090ee2fe0Sbeck if (pma != NULL) { 3114ed81bdbSkettenis /* 3124ed81bdbSkettenis * XXX If UVM_PMA_FREED isn't set, no pages 3134ed81bdbSkettenis * were freed. Should we set UVM_PMA_FAIL in 3144ed81bdbSkettenis * that case? 3154ed81bdbSkettenis */ 31690ee2fe0Sbeck pma->pm_flags &= ~UVM_PMA_BUSY; 3174ed81bdbSkettenis if (pma->pm_flags & UVM_PMA_FREED) { 31890ee2fe0Sbeck pma->pm_flags &= ~UVM_PMA_LINKED; 319ba03bb80Smpi TAILQ_REMOVE(&uvm.pmr_control.allocs, pma, pmq); 32090ee2fe0Sbeck wakeup(pma); 32190ee2fe0Sbeck } 3224ed81bdbSkettenis } 323d848450cSoga uvm_unlock_fpageq(); 3241aa8821bSart 325455c70dcSmpi /* 326455c70dcSmpi * scan done. unlock page queues (the only lock we are holding) 327455c70dcSmpi */ 3281aa8821bSart uvm_unlock_pageq(); 329574e7ffdSblambert 3309b1ed563Smpi sched_pause(yield); 3311aa8821bSart } 3321aa8821bSart /*NOTREACHED*/ 3331aa8821bSart } 3341aa8821bSart 3351aa8821bSart 3361aa8821bSart /* 3371aa8821bSart * uvm_aiodone_daemon: main loop for the aiodone daemon. 3381aa8821bSart */ 3391aa8821bSart void 3401aa8821bSart uvm_aiodone_daemon(void *arg) 3411aa8821bSart { 3421df50becSmpi int s, npages; 3431aa8821bSart struct buf *bp, *nbp; 3441aa8821bSart 345fdcc1051Smiod uvm.aiodoned_proc = curproc; 346f3e3a779Smpi KERNEL_UNLOCK(); 347fdcc1051Smiod 3481aa8821bSart for (;;) { 349cd7ee8acSart /* 350fa006b1dSoga * Check for done aio structures. If we've got structures to 351fa006b1dSoga * process, do so. Otherwise sleep while avoiding races. 352cd7ee8acSart */ 353fa006b1dSoga mtx_enter(&uvm.aiodoned_lock); 354fa006b1dSoga while ((bp = TAILQ_FIRST(&uvm.aio_done)) == NULL) 3552404448fSjsg msleep_nsec(&uvm.aiodoned, &uvm.aiodoned_lock, 3562404448fSjsg PVM, "aiodoned", INFSLP); 357fa006b1dSoga /* Take the list for ourselves. */ 3581aa8821bSart TAILQ_INIT(&uvm.aio_done); 359fa006b1dSoga mtx_leave(&uvm.aiodoned_lock); 360cd7ee8acSart 36135164244Stedu /* process each i/o that's done. */ 3621df50becSmpi npages = 0; 363f3e3a779Smpi KERNEL_LOCK(); 3641aa8821bSart while (bp != NULL) { 3651aa8821bSart if (bp->b_flags & B_PDAEMON) { 3661df50becSmpi npages += bp->b_bufsize >> PAGE_SHIFT; 367cd7ee8acSart } 3681aa8821bSart nbp = TAILQ_NEXT(bp, b_freelist); 36986d3bebfSart s = splbio(); /* b_iodone must by called at splbio */ 3701aa8821bSart (*bp->b_iodone)(bp); 37186d3bebfSart splx(s); 3721aa8821bSart bp = nbp; 373574e7ffdSblambert 3749b1ed563Smpi sched_pause(yield); 375cd7ee8acSart } 376f3e3a779Smpi KERNEL_UNLOCK(); 3771df50becSmpi 378fd21a507Sthib uvm_lock_fpageq(); 3791df50becSmpi atomic_sub_int(&uvmexp.paging, npages); 3801df50becSmpi wakeup(uvmexp.free <= uvmexp.reserve_kernel ? &uvm.pagedaemon : 3810800515eSoga &uvmexp.free); 382fd21a507Sthib uvm_unlock_fpageq(); 3831aa8821bSart } 384cd7ee8acSart } 385cd7ee8acSart 386c4141b1cSmpi /* 387c4141b1cSmpi * uvmpd_trylockowner: trylock the page's owner. 388c4141b1cSmpi * 389c4141b1cSmpi * => return the locked rwlock on success. otherwise, return NULL. 390c4141b1cSmpi */ 391c4141b1cSmpi struct rwlock * 392c4141b1cSmpi uvmpd_trylockowner(struct vm_page *pg) 393c4141b1cSmpi { 394c4141b1cSmpi 395c4141b1cSmpi struct uvm_object *uobj = pg->uobject; 396c4141b1cSmpi struct rwlock *slock; 397c4141b1cSmpi 398c4141b1cSmpi if (uobj != NULL) { 399c4141b1cSmpi slock = uobj->vmobjlock; 400c4141b1cSmpi } else { 401c4141b1cSmpi struct vm_anon *anon = pg->uanon; 402c4141b1cSmpi 403c4141b1cSmpi KASSERT(anon != NULL); 404c4141b1cSmpi slock = anon->an_lock; 405c4141b1cSmpi } 406c4141b1cSmpi 407c4141b1cSmpi if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) { 408c4141b1cSmpi return NULL; 409c4141b1cSmpi } 410c4141b1cSmpi 411c4141b1cSmpi return slock; 412c4141b1cSmpi } 413c4141b1cSmpi 4145ac9535fSmpi /* 4155ac9535fSmpi * uvmpd_dropswap: free any swap allocated to this page. 4165ac9535fSmpi * 4175ac9535fSmpi * => called with owner locked. 4185a3e8fe8Smpi * => return 1 if a page had an associated slot. 4195ac9535fSmpi */ 4205a3e8fe8Smpi int 4215ac9535fSmpi uvmpd_dropswap(struct vm_page *pg) 4225ac9535fSmpi { 4235ac9535fSmpi struct vm_anon *anon = pg->uanon; 4245a3e8fe8Smpi int slot, result = 0; 4255ac9535fSmpi 4265ac9535fSmpi if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) { 4275ac9535fSmpi uvm_swap_free(anon->an_swslot, 1); 4285ac9535fSmpi anon->an_swslot = 0; 4295a3e8fe8Smpi result = 1; 4305ac9535fSmpi } else if (pg->pg_flags & PQ_AOBJ) { 4315a3e8fe8Smpi slot = uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT); 4325a3e8fe8Smpi if (slot) 4335a3e8fe8Smpi result = 1; 4345ac9535fSmpi } 4355a3e8fe8Smpi 4365a3e8fe8Smpi return result; 4375ac9535fSmpi } 4381aa8821bSart 439cd7ee8acSart /* 44003c39359Smpi * Return 1 if the page `p' belongs to the memory range described by 44103c39359Smpi * 'constraint', 0 otherwise. 44203c39359Smpi */ 44303c39359Smpi static inline int 44403c39359Smpi uvmpd_match_constraint(struct vm_page *p, 44503c39359Smpi struct uvm_constraint_range *constraint) 44603c39359Smpi { 44703c39359Smpi paddr_t paddr; 44803c39359Smpi 44903c39359Smpi paddr = atop(VM_PAGE_TO_PHYS(p)); 45003c39359Smpi if (paddr >= constraint->ucr_low && paddr < constraint->ucr_high) 45103c39359Smpi return 1; 45203c39359Smpi 45303c39359Smpi return 0; 45403c39359Smpi } 45503c39359Smpi 45603c39359Smpi /* 4571aa8821bSart * uvmpd_scan_inactive: scan an inactive list for pages to clean or free. 458cd7ee8acSart * 459cd7ee8acSart * => called with page queues locked 460cd7ee8acSart * => we work on meeting our free target by converting inactive pages 461cd7ee8acSart * into free pages. 462cd7ee8acSart * => we handle the building of swap-backed clusters 463cd7ee8acSart * => we return TRUE if we are exiting because we met our target 464cd7ee8acSart */ 465c1e5f9e3Smpi int 466ab9ceab3Smpi uvmpd_scan_inactive(struct uvm_pmalloc *pma, int shortage) 467cd7ee8acSart { 468c1e5f9e3Smpi struct pglist *pglst = &uvm.page_inactive; 4694783fe62Smpi int result, freed = 0; 470cd7ee8acSart struct vm_page *p, *nextpg; 471cd7ee8acSart struct uvm_object *uobj; 4721ff2fd50Smpi struct vm_page *pps[SWCLUSTPAGES], **ppsp; 473cd7ee8acSart int npages; 4741ff2fd50Smpi struct vm_page *swpps[SWCLUSTPAGES]; /* XXX: see below */ 4750dea91ceSmpi struct rwlock *slock; 476cd7ee8acSart int swnpages, swcpages; /* XXX: see below */ 4778a42ed70Sart int swslot; 478cd7ee8acSart struct vm_anon *anon; 479cd7ee8acSart boolean_t swap_backed; 480cd7ee8acSart vaddr_t start; 4812c932f6fSmiod int dirtyreacts; 482cd7ee8acSart 483cd7ee8acSart /* 484cd7ee8acSart * swslot is non-zero if we are building a swap cluster. we want 485cd7ee8acSart * to stay in the loop while we have a page to scan or we have 486cd7ee8acSart * a swap-cluster to build. 487cd7ee8acSart */ 488cd7ee8acSart swslot = 0; 489cd7ee8acSart swnpages = swcpages = 0; 4908a42ed70Sart dirtyreacts = 0; 4915a0e53e3Smpi p = NULL; 492cd7ee8acSart 493ab9ceab3Smpi /* 494ab9ceab3Smpi * If a thread is waiting for us to release memory from a specific 495ab9ceab3Smpi * memory range start with the first page on the list that fits in 496ab9ceab3Smpi * it. 497ab9ceab3Smpi */ 4985a0e53e3Smpi TAILQ_FOREACH(p, pglst, pageq) { 499ab9ceab3Smpi if (uvmpd_pma_done(pma) || 500ab9ceab3Smpi uvmpd_match_constraint(p, &pma->pm_constraint)) 5015a0e53e3Smpi break; 5025a0e53e3Smpi } 5035a0e53e3Smpi 5045a0e53e3Smpi for (; p != NULL || swslot != 0; p = nextpg) { 505cd7ee8acSart /* 506cd7ee8acSart * note that p can be NULL iff we have traversed the whole 507cd7ee8acSart * list and need to do one final swap-backed clustered pageout. 508cd7ee8acSart */ 509cac1bff1Sart uobj = NULL; 510cac1bff1Sart anon = NULL; 511cd7ee8acSart if (p) { 512cd7ee8acSart /* 5130dea91ceSmpi * see if we've met our target 514cd7ee8acSart */ 515d7bddd8cSmpi if ((uvmpd_pma_done(pma) && 5164783fe62Smpi (uvmexp.paging >= (shortage - freed))) || 5178a42ed70Sart dirtyreacts == UVMPD_NUMDIRTYREACTS) { 518cac1bff1Sart if (swslot == 0) { 519cd7ee8acSart /* exit now if no swap-i/o pending */ 520cd7ee8acSart break; 521cac1bff1Sart } 522cd7ee8acSart 523cd7ee8acSart /* set p to null to signal final swap i/o */ 524cd7ee8acSart p = NULL; 5250dea91ceSmpi nextpg = NULL; 526cd7ee8acSart } 527cd7ee8acSart } 528cd7ee8acSart if (p) { /* if (we have a new page to consider) */ 529cd7ee8acSart /* 530cd7ee8acSart * we are below target and have a new page to consider. 531cd7ee8acSart */ 532cd7ee8acSart uvmexp.pdscans++; 533cac1bff1Sart nextpg = TAILQ_NEXT(p, pageq); 534cd7ee8acSart 535a125353dSmpi /* 536a125353dSmpi * If we are not short on memory and only interested 537ab22dc52Smpi * in releasing pages from a given memory range, do not 538a125353dSmpi * bother with other pages. 539a125353dSmpi */ 540a125353dSmpi if (uvmexp.paging >= (shortage - freed) && 541a125353dSmpi !uvmpd_pma_done(pma) && 542a125353dSmpi !uvmpd_match_constraint(p, &pma->pm_constraint)) 543a125353dSmpi continue; 544a125353dSmpi 545cd7ee8acSart anon = p->uanon; 5460dea91ceSmpi uobj = p->uobject; 547c4141b1cSmpi 548c4141b1cSmpi /* 549c4141b1cSmpi * first we attempt to lock the object that this page 550c4141b1cSmpi * belongs to. if our attempt fails we skip on to 551c4141b1cSmpi * the next page (no harm done). it is important to 552c4141b1cSmpi * "try" locking the object as we are locking in the 553c4141b1cSmpi * wrong order (pageq -> object) and we don't want to 554c4141b1cSmpi * deadlock. 555c4141b1cSmpi */ 556c4141b1cSmpi slock = uvmpd_trylockowner(p); 557c4141b1cSmpi if (slock == NULL) { 5581c92e3afSmpi continue; 5591c92e3afSmpi } 560c4141b1cSmpi 56169c04514Smpi /* 56269c04514Smpi * move referenced pages back to active queue 56369c04514Smpi * and skip to next page. 56469c04514Smpi */ 56569c04514Smpi if (pmap_is_referenced(p)) { 56669c04514Smpi uvm_pageactivate(p); 5670dea91ceSmpi rw_exit(slock); 56869c04514Smpi uvmexp.pdreact++; 56969c04514Smpi continue; 57069c04514Smpi } 571c4141b1cSmpi 5729662fca4Sart if (p->pg_flags & PG_BUSY) { 5730dea91ceSmpi rw_exit(slock); 574cd7ee8acSart uvmexp.pdbusy++; 575cd7ee8acSart continue; 576cd7ee8acSart } 577c4141b1cSmpi 578c4141b1cSmpi /* does the page belong to an object? */ 579c4141b1cSmpi if (uobj != NULL) { 580cd7ee8acSart uvmexp.pdobscan++; 581c4141b1cSmpi } else { 582c4141b1cSmpi KASSERT(anon != NULL); 583c4141b1cSmpi uvmexp.pdanscan++; 584cd7ee8acSart } 585cd7ee8acSart 586cd7ee8acSart /* 587b8a635f6Stedu * we now have the page queues locked. 5882c932f6fSmiod * the page is not busy. if the page is clean we 5892c932f6fSmiod * can free it now and continue. 590cd7ee8acSart */ 5919662fca4Sart if (p->pg_flags & PG_CLEAN) { 59265d6360cSart if (p->pg_flags & PQ_SWAPBACKED) { 5938a42ed70Sart /* this page now lives only in swap */ 594c4a864baSmpi atomic_inc_int(&uvmexp.swpgonly); 5958a42ed70Sart } 5968a42ed70Sart 5972c932f6fSmiod /* zap all mappings with pmap_page_protect... */ 5981e8cdc2eSderaadt pmap_page_protect(p, PROT_NONE); 599cd7ee8acSart uvm_pagefree(p); 600c1e5f9e3Smpi freed++; 601cd7ee8acSart 602cd7ee8acSart if (anon) { 603cac1bff1Sart 604cd7ee8acSart /* 605cd7ee8acSart * an anonymous page can only be clean 606cac1bff1Sart * if it has backing store assigned. 607cd7ee8acSart */ 608cac1bff1Sart 609cac1bff1Sart KASSERT(anon->an_swslot != 0); 610cac1bff1Sart 611cd7ee8acSart /* remove from object */ 6128d0b5bafSpedro anon->an_page = NULL; 613cd7ee8acSart } 6140dea91ceSmpi rw_exit(slock); 615cd7ee8acSart continue; 616cd7ee8acSart } 617cd7ee8acSart 618cd7ee8acSart /* 619cd7ee8acSart * this page is dirty, skip it if we'll have met our 620cd7ee8acSart * free target when all the current pageouts complete. 621cd7ee8acSart */ 622d7bddd8cSmpi if (uvmpd_pma_done(pma) && 6234783fe62Smpi (uvmexp.paging > (shortage - freed))) { 6240dea91ceSmpi rw_exit(slock); 625cd7ee8acSart continue; 626cd7ee8acSart } 627cd7ee8acSart 628cd7ee8acSart /* 6298a42ed70Sart * this page is dirty, but we can't page it out 6308a42ed70Sart * since all pages in swap are only in swap. 6318a42ed70Sart * reactivate it so that we eventually cycle 6328a42ed70Sart * all pages thru the inactive queue. 6338a42ed70Sart */ 634afd3b31eSmpi if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) { 6358a42ed70Sart dirtyreacts++; 6368a42ed70Sart uvm_pageactivate(p); 6370dea91ceSmpi rw_exit(slock); 6388a42ed70Sart continue; 6398a42ed70Sart } 6408a42ed70Sart 6418a42ed70Sart /* 6428a42ed70Sart * if the page is swap-backed and dirty and swap space 6438a42ed70Sart * is full, free any swap allocated to the page 6448a42ed70Sart * so that other pages can be paged out. 6458a42ed70Sart */ 646cb3ee63aSmpi if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled()) 6475ac9535fSmpi uvmpd_dropswap(p); 6488a42ed70Sart 6498a42ed70Sart /* 650cd7ee8acSart * the page we are looking at is dirty. we must 651cd7ee8acSart * clean it before it can be freed. to do this we 652cd7ee8acSart * first mark the page busy so that no one else will 6532c932f6fSmiod * touch the page. we write protect all the mappings 6542c932f6fSmiod * of the page so that no one touches it while it is 6552c932f6fSmiod * in I/O. 656cd7ee8acSart */ 657cd7ee8acSart 65865d6360cSart swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0); 65965d6360cSart atomic_setbits_int(&p->pg_flags, PG_BUSY); 660cd7ee8acSart UVM_PAGE_OWN(p, "scan_inactive"); 6611e8cdc2eSderaadt pmap_page_protect(p, PROT_READ); 662cd7ee8acSart uvmexp.pgswapout++; 663cd7ee8acSart 664cd7ee8acSart /* 665cd7ee8acSart * for swap-backed pages we need to (re)allocate 666cd7ee8acSart * swap space. 667cd7ee8acSart */ 668cd7ee8acSart if (swap_backed) { 66935164244Stedu /* free old swap slot (if any) */ 6705ac9535fSmpi uvmpd_dropswap(p); 671cd7ee8acSart 67235164244Stedu /* start new cluster (if necessary) */ 6731aa8821bSart if (swslot == 0) { 6741ff2fd50Smpi swnpages = SWCLUSTPAGES; 675cd7ee8acSart swslot = uvm_swap_alloc(&swnpages, 676cd7ee8acSart TRUE); 677cd7ee8acSart if (swslot == 0) { 678cd7ee8acSart /* no swap? give up! */ 67965d6360cSart atomic_clearbits_int( 68065d6360cSart &p->pg_flags, 68165d6360cSart PG_BUSY); 682cd7ee8acSart UVM_PAGE_OWN(p, NULL); 6830dea91ceSmpi rw_exit(slock); 684cd7ee8acSart continue; 685cd7ee8acSart } 686cd7ee8acSart swcpages = 0; /* cluster is empty */ 687cd7ee8acSart } 688cd7ee8acSart 68935164244Stedu /* add block to cluster */ 6901414b0faSart swpps[swcpages] = p; 6911414b0faSart if (anon) 692cd7ee8acSart anon->an_swslot = swslot + swcpages; 6931414b0faSart else 6941414b0faSart uao_set_swslot(uobj, 695cd7ee8acSart p->offset >> PAGE_SHIFT, 696cd7ee8acSart swslot + swcpages); 697cd7ee8acSart swcpages++; 698aec078ecSmpi rw_exit(slock); 699aec078ecSmpi 700aec078ecSmpi /* cluster not full yet? */ 701aec078ecSmpi if (swcpages < swnpages) 702aec078ecSmpi continue; 703cd7ee8acSart } 704cd7ee8acSart } else { 705cd7ee8acSart /* if p == NULL we must be doing a last swap i/o */ 706cd7ee8acSart swap_backed = TRUE; 707cd7ee8acSart } 708cd7ee8acSart 709cd7ee8acSart /* 710cd7ee8acSart * now consider doing the pageout. 711cd7ee8acSart * 712cd7ee8acSart * for swap-backed pages, we do the pageout if we have either 713cd7ee8acSart * filled the cluster (in which case (swnpages == swcpages) or 714cd7ee8acSart * run out of pages (p == NULL). 715cd7ee8acSart * 716cd7ee8acSart * for object pages, we always do the pageout. 717cd7ee8acSart */ 7181aa8821bSart if (swap_backed) { 719cd7ee8acSart /* starting I/O now... set up for it */ 720cd7ee8acSart npages = swcpages; 721cd7ee8acSart ppsp = swpps; 722cd7ee8acSart /* for swap-backed pages only */ 723cd7ee8acSart start = (vaddr_t) swslot; 724cd7ee8acSart 725cd7ee8acSart /* if this is final pageout we could have a few 726cd7ee8acSart * extra swap blocks */ 727cd7ee8acSart if (swcpages < swnpages) { 728cd7ee8acSart uvm_swap_free(swslot + swcpages, 729cd7ee8acSart (swnpages - swcpages)); 730cd7ee8acSart } 731cd7ee8acSart } else { 732cd7ee8acSart /* normal object pageout */ 733cd7ee8acSart ppsp = pps; 734cd7ee8acSart npages = sizeof(pps) / sizeof(struct vm_page *); 735cd7ee8acSart /* not looked at because PGO_ALLPAGES is set */ 736cd7ee8acSart start = 0; 737cd7ee8acSart } 738cd7ee8acSart 739cd7ee8acSart /* 740cd7ee8acSart * now do the pageout. 741cd7ee8acSart * 742cd7ee8acSart * for swap_backed pages we have already built the cluster. 743cd7ee8acSart * for !swap_backed pages, uvm_pager_put will call the object's 744cd7ee8acSart * "make put cluster" function to build a cluster on our behalf. 745cd7ee8acSart * 746cd7ee8acSart * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct 747cd7ee8acSart * it to free the cluster pages for us on a successful I/O (it 748cd7ee8acSart * always does this for un-successful I/O requests). this 749cd7ee8acSart * allows us to do clustered pageout without having to deal 750cd7ee8acSart * with cluster pages at this level. 751cd7ee8acSart * 752cd7ee8acSart * note locking semantics of uvm_pager_put with PGO_PDFREECLUST: 753b8a635f6Stedu * IN: locked: page queues 754b8a635f6Stedu * OUT: locked: 755b8a635f6Stedu * !locked: pageqs 756cd7ee8acSart */ 757cd7ee8acSart 758cd7ee8acSart uvmexp.pdpageouts++; 7591aa8821bSart result = uvm_pager_put(swap_backed ? NULL : uobj, p, 760cd7ee8acSart &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0); 761cd7ee8acSart 762cd7ee8acSart /* 763cd7ee8acSart * if we did i/o to swap, zero swslot to indicate that we are 764cd7ee8acSart * no longer building a swap-backed cluster. 765cd7ee8acSart */ 766cd7ee8acSart 767cd7ee8acSart if (swap_backed) 768cd7ee8acSart swslot = 0; /* done with this cluster */ 769cd7ee8acSart 770cd7ee8acSart /* 7711414b0faSart * first, we check for VM_PAGER_PEND which means that the 7721414b0faSart * async I/O is in progress and the async I/O done routine 7731414b0faSart * will clean up after us. in this case we move on to the 7741414b0faSart * next page. 7751414b0faSart * 7761414b0faSart * there is a very remote chance that the pending async i/o can 7771414b0faSart * finish _before_ we get here. if that happens, our page "p" 7781414b0faSart * may no longer be on the inactive queue. so we verify this 7791414b0faSart * when determining the next page (starting over at the head if 7801414b0faSart * we've lost our inactive page). 781cd7ee8acSart */ 782cd7ee8acSart 7831414b0faSart if (result == VM_PAGER_PEND) { 7841df50becSmpi atomic_add_int(&uvmexp.paging, npages); 7851414b0faSart uvm_lock_pageq(); 786cd7ee8acSart uvmexp.pdpending++; 787cd7ee8acSart if (p) { 78865d6360cSart if (p->pg_flags & PQ_INACTIVE) 7891aa8821bSart nextpg = TAILQ_NEXT(p, pageq); 790cd7ee8acSart else 7911aa8821bSart nextpg = TAILQ_FIRST(pglst); 792cd7ee8acSart } else { 7931aa8821bSart nextpg = NULL; 794cd7ee8acSart } 7951414b0faSart continue; 7961414b0faSart } 7971414b0faSart 79835164244Stedu /* clean up "p" if we have one */ 7991414b0faSart if (p) { 8001414b0faSart /* 8011414b0faSart * the I/O request to "p" is done and uvm_pager_put 8021414b0faSart * has freed any cluster pages it may have allocated 8031414b0faSart * during I/O. all that is left for us to do is 8041414b0faSart * clean up page "p" (which is still PG_BUSY). 8051414b0faSart * 8061414b0faSart * our result could be one of the following: 8071414b0faSart * VM_PAGER_OK: successful pageout 8081414b0faSart * 8091414b0faSart * VM_PAGER_AGAIN: tmp resource shortage, we skip 8101414b0faSart * to next page 8111414b0faSart * VM_PAGER_{FAIL,ERROR,BAD}: an error. we 8121414b0faSart * "reactivate" page to get it out of the way (it 8131414b0faSart * will eventually drift back into the inactive 8141414b0faSart * queue for a retry). 8151414b0faSart * VM_PAGER_UNLOCK: should never see this as it is 8161414b0faSart * only valid for "get" operations 8171414b0faSart */ 8181414b0faSart 8191414b0faSart /* relock p's object: page queues not lock yet, so 8201414b0faSart * no need for "try" */ 8211414b0faSart 8221c92e3afSmpi /* !swap_backed case: already locked... */ 8231c92e3afSmpi if (swap_backed) { 8240dea91ceSmpi rw_enter(slock, RW_WRITE); 8251c92e3afSmpi } 8261c92e3afSmpi 8272c932f6fSmiod #ifdef DIAGNOSTIC 8282c932f6fSmiod if (result == VM_PAGER_UNLOCK) 8292c932f6fSmiod panic("pagedaemon: pageout returned " 8302c932f6fSmiod "invalid 'unlock' code"); 8312c932f6fSmiod #endif 8322c932f6fSmiod 8331414b0faSart /* handle PG_WANTED now */ 8349662fca4Sart if (p->pg_flags & PG_WANTED) 8351414b0faSart wakeup(p); 8361414b0faSart 83765d6360cSart atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED); 8381414b0faSart UVM_PAGE_OWN(p, NULL); 8391414b0faSart 8400b0fe1a1Soga /* released during I/O? Can only happen for anons */ 8419662fca4Sart if (p->pg_flags & PG_RELEASED) { 8420b0fe1a1Soga KASSERT(anon != NULL); 84384325188Soga /* 84484325188Soga * remove page so we can get nextpg, 84584325188Soga * also zero out anon so we don't use 84684325188Soga * it after the free. 84784325188Soga */ 8488d0b5bafSpedro anon->an_page = NULL; 84984325188Soga p->uanon = NULL; 8501414b0faSart 8511414b0faSart uvm_anfree(anon); /* kills anon */ 8521e8cdc2eSderaadt pmap_page_protect(p, PROT_NONE); 8531414b0faSart anon = NULL; 8541414b0faSart uvm_lock_pageq(); 8551414b0faSart nextpg = TAILQ_NEXT(p, pageq); 8561414b0faSart /* free released page */ 8571414b0faSart uvm_pagefree(p); 8581414b0faSart } else { /* page was not released during I/O */ 8591414b0faSart uvm_lock_pageq(); 8601414b0faSart nextpg = TAILQ_NEXT(p, pageq); 8611414b0faSart if (result != VM_PAGER_OK) { 8621414b0faSart /* pageout was a failure... */ 8631414b0faSart if (result != VM_PAGER_AGAIN) 8641414b0faSart uvm_pageactivate(p); 8651414b0faSart pmap_clear_reference(p); 8661414b0faSart } else { 8671414b0faSart /* pageout was a success... */ 8681414b0faSart pmap_clear_reference(p); 8691414b0faSart pmap_clear_modify(p); 87065d6360cSart atomic_setbits_int(&p->pg_flags, 87165d6360cSart PG_CLEAN); 8721414b0faSart } 8731414b0faSart } 8741414b0faSart 8751414b0faSart /* 8761414b0faSart * drop object lock (if there is an object left). do 8771414b0faSart * a safety check of nextpg to make sure it is on the 8781414b0faSart * inactive queue (it should be since PG_BUSY pages on 8791414b0faSart * the inactive queue can't be re-queued [note: not 8801414b0faSart * true for active queue]). 8811414b0faSart */ 8820dea91ceSmpi rw_exit(slock); 8831414b0faSart 88449b3ab21Soga if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) { 88549b3ab21Soga nextpg = TAILQ_FIRST(pglst); /* reload! */ 88649b3ab21Soga } 8871414b0faSart } else { 8881414b0faSart /* 8891414b0faSart * if p is null in this loop, make sure it stays null 8901414b0faSart * in the next loop. 8911414b0faSart */ 8921414b0faSart nextpg = NULL; 8931414b0faSart 8941414b0faSart /* 8951414b0faSart * lock page queues here just so they're always locked 8961414b0faSart * at the end of the loop. 8971414b0faSart */ 8981414b0faSart uvm_lock_pageq(); 8991414b0faSart } 9001aa8821bSart } 901c1e5f9e3Smpi 902c1e5f9e3Smpi return freed; 903cd7ee8acSart } 904cd7ee8acSart 905cd7ee8acSart /* 906cd7ee8acSart * uvmpd_scan: scan the page queues and attempt to meet our targets. 907cd7ee8acSart * 908cd7ee8acSart * => called with pageq's locked 909cd7ee8acSart */ 910cd7ee8acSart 911cd7ee8acSart void 912ab9ceab3Smpi uvmpd_scan(struct uvm_pmalloc *pma, int shortage, int inactive_shortage) 913cd7ee8acSart { 9144e368faeSmpi int swap_shortage, pages_freed; 915cd7ee8acSart 916d21d8ab4Smpi MUTEX_ASSERT_LOCKED(&uvm.pageqlock); 917d21d8ab4Smpi 918cd7ee8acSart uvmexp.pdrevs++; /* counter */ 919cd7ee8acSart 920cd7ee8acSart 921f88a4ea9Smiod #ifdef __HAVE_PMAP_COLLECT 922cd7ee8acSart /* 923cd7ee8acSart * swap out some processes if we are below our free target. 924cd7ee8acSart * we need to unlock the page queues for this. 925cd7ee8acSart */ 9264e368faeSmpi if (shortage > 0) { 927cd7ee8acSart uvmexp.pdswout++; 928cd7ee8acSart uvm_unlock_pageq(); 9294e368faeSmpi shortage -= uvm_swapout_threads(); 930cd7ee8acSart uvm_lock_pageq(); 931cd7ee8acSart } 932cd7ee8acSart #endif 933cd7ee8acSart 934cd7ee8acSart /* 935cd7ee8acSart * now we want to work on meeting our targets. first we work on our 936cd7ee8acSart * free target by converting inactive pages into free pages. then 937cd7ee8acSart * we work on meeting our inactive target by converting active pages 938cd7ee8acSart * to inactive ones. 939cd7ee8acSart */ 940ab9ceab3Smpi pages_freed = uvmpd_scan_inactive(pma, shortage); 941c1e5f9e3Smpi uvmexp.pdfreed += pages_freed; 9424e368faeSmpi shortage -= pages_freed; 943cd7ee8acSart 944cd7ee8acSart /* 945cd7ee8acSart * we have done the scan to get free pages. now we work on meeting 946cd7ee8acSart * our inactive target. 947779ee49fSmpi * 9488a42ed70Sart * detect if we're not going to be able to page anything out 9498a42ed70Sart * until we free some swap resources from active pages. 9508a42ed70Sart */ 9518a42ed70Sart swap_shortage = 0; 9524e368faeSmpi if ((shortage > 0) && uvm_swapisfilled() && !uvm_swapisfull() && 9538a42ed70Sart pages_freed == 0) { 9544e368faeSmpi swap_shortage = shortage; 9558a42ed70Sart } 9568a42ed70Sart 957767e8a65Smpi uvmpd_scan_active(pma, swap_shortage, inactive_shortage); 958767e8a65Smpi } 959767e8a65Smpi 960767e8a65Smpi void 961767e8a65Smpi uvmpd_scan_active(struct uvm_pmalloc *pma, int swap_shortage, 962767e8a65Smpi int inactive_shortage) 963767e8a65Smpi { 964767e8a65Smpi struct vm_page *p, *nextpg; 965767e8a65Smpi struct rwlock *slock; 966767e8a65Smpi 967767e8a65Smpi MUTEX_ASSERT_LOCKED(&uvm.pageqlock); 968767e8a65Smpi 9698a42ed70Sart for (p = TAILQ_FIRST(&uvm.page_active); 9708a42ed70Sart p != NULL && (inactive_shortage > 0 || swap_shortage > 0); 9718a42ed70Sart p = nextpg) { 9721aa8821bSart nextpg = TAILQ_NEXT(p, pageq); 973609704bbSmpi if (p->pg_flags & PG_BUSY) { 974b8a635f6Stedu continue; 975609704bbSmpi } 976cd7ee8acSart 977609704bbSmpi /* 978a125353dSmpi * If we couldn't release enough pages from a given memory 979a125353dSmpi * range try to deactivate them first... 980a125353dSmpi * 981a125353dSmpi * ...unless we are low on swap slots, in such case we are 982a125353dSmpi * probably OOM and want to release swap resources as quickly 983a125353dSmpi * as possible. 9842d45b4c2Smpi */ 985a125353dSmpi if (inactive_shortage > 0 && swap_shortage == 0 && 986a125353dSmpi !uvmpd_pma_done(pma) && 987ab9ceab3Smpi !uvmpd_match_constraint(p, &pma->pm_constraint)) 9882d45b4c2Smpi continue; 9892d45b4c2Smpi 9902d45b4c2Smpi /* 991609704bbSmpi * lock the page's owner. 992609704bbSmpi */ 993c4141b1cSmpi slock = uvmpd_trylockowner(p); 994c4141b1cSmpi if (slock == NULL) { 9951c92e3afSmpi continue; 996609704bbSmpi } 997609704bbSmpi 998609704bbSmpi /* 999609704bbSmpi * skip this page if it's busy. 1000609704bbSmpi */ 1001609704bbSmpi if ((p->pg_flags & PG_BUSY) != 0) { 1002609704bbSmpi rw_exit(slock); 100369c04514Smpi continue; 100469c04514Smpi } 1005cd7ee8acSart 10068a42ed70Sart /* 10078a42ed70Sart * if there's a shortage of swap, free any swap allocated 10088a42ed70Sart * to this page so that other pages can be paged out. 10098a42ed70Sart */ 10108a42ed70Sart if (swap_shortage > 0) { 10115a3e8fe8Smpi if (uvmpd_dropswap(p)) { 101265d6360cSart atomic_clearbits_int(&p->pg_flags, PG_CLEAN); 10138a42ed70Sart swap_shortage--; 10148a42ed70Sart } 10158a42ed70Sart } 10168a42ed70Sart 10178a42ed70Sart /* 10182c932f6fSmiod * deactivate this page if there's a shortage of 10192c932f6fSmiod * inactive pages. 10208a42ed70Sart */ 10212c932f6fSmiod if (inactive_shortage > 0) { 1022cd7ee8acSart /* no need to check wire_count as pg is "active" */ 1023cd7ee8acSart uvm_pagedeactivate(p); 1024cd7ee8acSart uvmexp.pddeact++; 10258a42ed70Sart inactive_shortage--; 1026cd7ee8acSart } 1027609704bbSmpi 1028609704bbSmpi /* 1029609704bbSmpi * we're done with this page. 1030609704bbSmpi */ 1031609704bbSmpi rw_exit(slock); 1032cd7ee8acSart } 1033cd7ee8acSart } 1034f5db0a1cSkettenis 1035f5db0a1cSkettenis #ifdef HIBERNATE 1036f5db0a1cSkettenis 1037f5db0a1cSkettenis /* 1038f5db0a1cSkettenis * uvmpd_drop: drop clean pages from list 1039f5db0a1cSkettenis */ 1040f5db0a1cSkettenis void 1041f5db0a1cSkettenis uvmpd_drop(struct pglist *pglst) 1042f5db0a1cSkettenis { 1043f5db0a1cSkettenis struct vm_page *p, *nextpg; 1044f5db0a1cSkettenis 1045f5db0a1cSkettenis for (p = TAILQ_FIRST(pglst); p != NULL; p = nextpg) { 1046f5db0a1cSkettenis nextpg = TAILQ_NEXT(p, pageq); 1047f5db0a1cSkettenis 1048f5db0a1cSkettenis if (p->pg_flags & PQ_ANON || p->uobject == NULL) 1049f5db0a1cSkettenis continue; 1050f5db0a1cSkettenis 1051f5db0a1cSkettenis if (p->pg_flags & PG_BUSY) 1052f5db0a1cSkettenis continue; 1053f5db0a1cSkettenis 1054f5db0a1cSkettenis if (p->pg_flags & PG_CLEAN) { 105569c04514Smpi struct uvm_object * uobj = p->uobject; 105669c04514Smpi 105769c04514Smpi rw_enter(uobj->vmobjlock, RW_WRITE); 105869c04514Smpi uvm_lock_pageq(); 1059f5db0a1cSkettenis /* 1060f5db0a1cSkettenis * we now have the page queues locked. 1061f5db0a1cSkettenis * the page is not busy. if the page is clean we 1062f5db0a1cSkettenis * can free it now and continue. 1063f5db0a1cSkettenis */ 1064f5db0a1cSkettenis if (p->pg_flags & PG_CLEAN) { 1065f5db0a1cSkettenis if (p->pg_flags & PQ_SWAPBACKED) { 1066f5db0a1cSkettenis /* this page now lives only in swap */ 1067c4a864baSmpi atomic_inc_int(&uvmexp.swpgonly); 1068f5db0a1cSkettenis } 1069f5db0a1cSkettenis 1070f5db0a1cSkettenis /* zap all mappings with pmap_page_protect... */ 10711e8cdc2eSderaadt pmap_page_protect(p, PROT_NONE); 1072f5db0a1cSkettenis uvm_pagefree(p); 1073f5db0a1cSkettenis } 107469c04514Smpi uvm_unlock_pageq(); 107569c04514Smpi rw_exit(uobj->vmobjlock); 1076f5db0a1cSkettenis } 1077f5db0a1cSkettenis } 1078f5db0a1cSkettenis } 1079f5db0a1cSkettenis 1080f5db0a1cSkettenis void 1081f5db0a1cSkettenis uvmpd_hibernate(void) 1082f5db0a1cSkettenis { 1083c4e40561Smpi uvmpd_drop(&uvm.page_inactive); 1084f5db0a1cSkettenis uvmpd_drop(&uvm.page_active); 1085f5db0a1cSkettenis } 1086f5db0a1cSkettenis 1087f5db0a1cSkettenis #endif 1088