sys/uvm/uvm_pdaemon.c

*6b5aed99Smpi/*	$OpenBSD: uvm_pdaemon.c,v 1.134 2025/01/25 08:55:52 mpi Exp $	*/
2c932f6fSmiod/*	$NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $	*/
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * Copyright (c) 1997 Charles D. Cranor and Washington University.
cd7ee8acSart * Copyright (c) 1991, 1993, The Regents of the University of California.
cd7ee8acSart *
cd7ee8acSart * All rights reserved.
cd7ee8acSart *
cd7ee8acSart * This code is derived from software contributed to Berkeley by
cd7ee8acSart * The Mach Operating System project at Carnegie-Mellon University.
cd7ee8acSart *
cd7ee8acSart * Redistribution and use in source and binary forms, with or without
cd7ee8acSart * modification, are permitted provided that the following conditions
cd7ee8acSart * are met:
cd7ee8acSart * 1. Redistributions of source code must retain the above copyright
cd7ee8acSart *    notice, this list of conditions and the following disclaimer.
cd7ee8acSart * 2. Redistributions in binary form must reproduce the above copyright
cd7ee8acSart *    notice, this list of conditions and the following disclaimer in the
cd7ee8acSart *    documentation and/or other materials provided with the distribution.
188f0ea4Sjsg * 3. Neither the name of the University nor the names of its contributors
cd7ee8acSart *    may be used to endorse or promote products derived from this software
cd7ee8acSart *    without specific prior written permission.
cd7ee8acSart *
cd7ee8acSart * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
cd7ee8acSart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
cd7ee8acSart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
cd7ee8acSart * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
cd7ee8acSart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
cd7ee8acSart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
cd7ee8acSart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
cd7ee8acSart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
cd7ee8acSart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
cd7ee8acSart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
cd7ee8acSart * SUCH DAMAGE.
cd7ee8acSart *
cd7ee8acSart *	@(#)vm_pageout.c        8.5 (Berkeley) 2/14/94
cd7ee8acSart * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
cd7ee8acSart *
cd7ee8acSart *
cd7ee8acSart * Copyright (c) 1987, 1990 Carnegie-Mellon University.
cd7ee8acSart * All rights reserved.
cd7ee8acSart *
cd7ee8acSart * Permission to use, copy, modify and distribute this software and
cd7ee8acSart * its documentation is hereby granted, provided that both the copyright
cd7ee8acSart * notice and this permission notice appear in all copies of the
cd7ee8acSart * software, derivative works or modified versions, and any portions
cd7ee8acSart * thereof, and that both notices appear in supporting documentation.
cd7ee8acSart *
cd7ee8acSart * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
cd7ee8acSart * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
cd7ee8acSart * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
cd7ee8acSart *
cd7ee8acSart * Carnegie Mellon requests users of this software to return to
cd7ee8acSart *
cd7ee8acSart *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
cd7ee8acSart *  School of Computer Science
cd7ee8acSart *  Carnegie Mellon University
cd7ee8acSart *  Pittsburgh PA 15213-3890
cd7ee8acSart *
cd7ee8acSart * any improvements or extensions that they make and grant Carnegie the
cd7ee8acSart * rights to redistribute these changes.
cd7ee8acSart */
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uvm_pdaemon.c: the page daemon
cd7ee8acSart */
cd7ee8acSart
cd7ee8acSart#include <sys/param.h>
cd7ee8acSart#include <sys/systm.h>
cd7ee8acSart#include <sys/kernel.h>
cd7ee8acSart#include <sys/pool.h>
a97ba27bSbluhm#include <sys/proc.h>
1aa8821bSart#include <sys/buf.h>
89a666daSart#include <sys/mount.h>
03d1830dStedu#include <sys/atomic.h>
cd7ee8acSart
f5db0a1cSkettenis#ifdef HIBERNATE
f5db0a1cSkettenis#include <sys/hibernate.h>
f5db0a1cSkettenis#endif
f5db0a1cSkettenis
cd7ee8acSart#include <uvm/uvm.h>
cd7ee8acSart
a4e118acSkettenis#include "drm.h"
a4e118acSkettenis
a4e118acSkettenis#if NDRM > 0
789ce988Smpiextern unsigned long drmbackoff(long);
a4e118acSkettenis#endif
a4e118acSkettenis
cd7ee8acSart/*
ca6c2cbfSmickey * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
8a42ed70Sart * in a pass thru the inactive list when swap is full.  the value should be
8a42ed70Sart * "small"... if it's too large we'll cycle the active pages thru the inactive
8a42ed70Sart * queue too quickly to for them to be referenced and avoid being freed.
8a42ed70Sart */
8a42ed70Sart
8a42ed70Sart#define UVMPD_NUMDIRTYREACTS 16
8a42ed70Sart
8a42ed70Sart
8a42ed70Sart/*
cd7ee8acSart * local prototypes
cd7ee8acSart */
cd7ee8acSart
c4141b1cSmpistruct rwlock	*uvmpd_trylockowner(struct vm_page *);
ab9ceab3Smpivoid		uvmpd_scan(struct uvm_pmalloc *, int, int);
ab9ceab3Smpiint		uvmpd_scan_inactive(struct uvm_pmalloc *, int);
767e8a65Smpivoid		uvmpd_scan_active(struct uvm_pmalloc *, int, int);
bb34e034Sthibvoid		uvmpd_tune(void);
f5db0a1cSkettenisvoid		uvmpd_drop(struct pglist *);
5a3e8fe8Smpiint		uvmpd_dropswap(struct vm_page *);
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uvm_wait: wait (sleep) for the page daemon to free some pages
cd7ee8acSart *
cd7ee8acSart * => should be called with all locks released
cd7ee8acSart * => should _not_ be called by the page daemon (to avoid deadlock)
cd7ee8acSart */
cd7ee8acSart
4eac0bf0Ssmartvoid
2023d591Sogauvm_wait(const char *wmsg)
cd7ee8acSart{
4bc97b15Scheloha	uint64_t timo = INFSLP;
cd7ee8acSart
a97ba27bSbluhm#ifdef DIAGNOSTIC
a97ba27bSbluhm	if (curproc == &proc0)
a97ba27bSbluhm		panic("%s: cannot sleep for memory during boot", __func__);
a97ba27bSbluhm#endif
a97ba27bSbluhm
f4cbd454Smpi	/*
f4cbd454Smpi	 * check for page daemon going to sleep (waiting for itself)
f4cbd454Smpi	 */
cd7ee8acSart	if (curproc == uvm.pagedaemon_proc) {
0bd995e1Stedu		printf("uvm_wait emergency bufbackoff\n");
0b4f309dSmpi		if (bufbackoff(NULL, 4) >= 4)
0bd995e1Stedu			return;
cd7ee8acSart		/*
cd7ee8acSart		 * now we have a problem: the pagedaemon wants to go to
cd7ee8acSart		 * sleep until it frees more memory.   but how can it
cd7ee8acSart		 * free more memory if it is asleep?  that is a deadlock.
cd7ee8acSart		 * we have two options:
cd7ee8acSart		 *  [1] panic now
cd7ee8acSart		 *  [2] put a timeout on the sleep, thus causing the
cd7ee8acSart		 *      pagedaemon to only pause (rather than sleep forever)
cd7ee8acSart		 *
cd7ee8acSart		 * note that option [2] will only help us if we get lucky
cd7ee8acSart		 * and some other process on the system breaks the deadlock
cd7ee8acSart		 * by exiting or freeing memory (thus allowing the pagedaemon
cd7ee8acSart		 * to continue).  for now we panic if DEBUG is defined,
cd7ee8acSart		 * otherwise we hope for the best with option [2] (better
cd7ee8acSart		 * yet, this should never happen in the first place!).
cd7ee8acSart		 */
cd7ee8acSart
cd7ee8acSart		printf("pagedaemon: deadlock detected!\n");
4bc97b15Scheloha		timo = MSEC_TO_NSEC(125);	/* set timeout */
cd7ee8acSart#if defined(DEBUG)
cd7ee8acSart		/* DEBUG: panic so we can debug it */
cd7ee8acSart		panic("pagedaemon deadlock");
cd7ee8acSart#endif
cd7ee8acSart	}
cd7ee8acSart
d848450cSoga	uvm_lock_fpageq();
0800515eSoga	wakeup(&uvm.pagedaemon);		/* wake the daemon! */
4bc97b15Scheloha	msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM | PNORELOCK, wmsg, timo);
cd7ee8acSart}
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uvmpd_tune: tune paging parameters
cd7ee8acSart */
bb34e034Sthibvoid
89a666daSartuvmpd_tune(void)
cd7ee8acSart{
a853522eSmpi	int val;
cd7ee8acSart
a853522eSmpi	val = uvmexp.npages / 30;
cd7ee8acSart
cd7ee8acSart	/* XXX:  what are these values good for? */
a853522eSmpi	val = max(val, (16*1024) >> PAGE_SHIFT);
cd7ee8acSart
9956d4a2Sart	/* Make sure there's always a user page free. */
a853522eSmpi	if (val < uvmexp.reserve_kernel + 1)
a853522eSmpi		val = uvmexp.reserve_kernel + 1;
a853522eSmpi	uvmexp.freemin = val;
9956d4a2Sart
a853522eSmpi	/* Calculate free target. */
a853522eSmpi	val = (uvmexp.freemin * 4) / 3;
a853522eSmpi	if (val <= uvmexp.freemin)
a853522eSmpi		val = uvmexp.freemin + 1;
a853522eSmpi	uvmexp.freetarg = val;
cd7ee8acSart
cd7ee8acSart	uvmexp.wiredmax = uvmexp.npages / 3;
cd7ee8acSart}
cd7ee8acSart
cd7ee8acSart/*
d4c6c9b5Sbeck * Indicate to the page daemon that a nowait call failed and it should
d4c6c9b5Sbeck * recover at least some memory in the most restricted region (assumed
d4c6c9b5Sbeck * to be dma_constraint).
d4c6c9b5Sbeck */
ba03bb80Smpistruct uvm_pmalloc nowait_pma;
d4c6c9b5Sbeck
d7bddd8cSmpistatic inline int
d7bddd8cSmpiuvmpd_pma_done(struct uvm_pmalloc *pma)
d7bddd8cSmpi{
d7bddd8cSmpi	if (pma == NULL || (pma->pm_flags & UVM_PMA_FREED))
d7bddd8cSmpi		return 1;
d7bddd8cSmpi	return 0;
d7bddd8cSmpi}
d7bddd8cSmpi
d4c6c9b5Sbeck/*
cd7ee8acSart * uvm_pageout: the main loop for the pagedaemon
cd7ee8acSart */
cd7ee8acSartvoid
9956d4a2Sartuvm_pageout(void *arg)
cd7ee8acSart{
90ee2fe0Sbeck	struct uvm_constraint_range constraint;
90ee2fe0Sbeck	struct uvm_pmalloc *pma;
4e368faeSmpi	int shortage, inactive_shortage;
cd7ee8acSart
35164244Stedu	/* ensure correct priority and set paging parameters... */
cd7ee8acSart	uvm.pagedaemon_proc = curproc;
cd7ee8acSart	(void) spl0();
cd7ee8acSart	uvmpd_tune();
cd7ee8acSart
ba03bb80Smpi	/*
ba03bb80Smpi	 * XXX realistically, this is what our nowait callers probably
ba03bb80Smpi	 * care about.
ba03bb80Smpi	 */
ba03bb80Smpi	nowait_pma.pm_constraint = dma_constraint;
ba03bb80Smpi	nowait_pma.pm_size = (16 << PAGE_SHIFT); /* XXX */
ba03bb80Smpi	nowait_pma.pm_flags = 0;
ba03bb80Smpi
1aa8821bSart	for (;;) {
0dd50440Sbeck		long size;
90ee2fe0Sbeck
d848450cSoga		uvm_lock_fpageq();
1df50becSmpi		if (TAILQ_EMPTY(&uvm.pmr_control.allocs) || uvmexp.paging > 0) {
2404448fSjsg			msleep_nsec(&uvm.pagedaemon, &uvm.fpageqlock, PVM,
2404448fSjsg			    "pgdaemon", INFSLP);
1aa8821bSart			uvmexp.pdwoke++;
90ee2fe0Sbeck		}
90ee2fe0Sbeck
90ee2fe0Sbeck		if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)) != NULL) {
90ee2fe0Sbeck			pma->pm_flags |= UVM_PMA_BUSY;
90ee2fe0Sbeck			constraint = pma->pm_constraint;
d4c6c9b5Sbeck		} else {
90ee2fe0Sbeck			constraint = no_constraint;
d4c6c9b5Sbeck		}
786a9acfSmpi		/* How many pages do we need to free during this round? */
1df50becSmpi		shortage = uvmexp.freetarg -
1df50becSmpi		    (uvmexp.free + uvmexp.paging) + BUFPAGES_DEFICIT;
90ee2fe0Sbeck		uvm_unlock_fpageq();
1aa8821bSart
455c70dcSmpi		/*
455c70dcSmpi		 * now lock page queues and recompute inactive count
455c70dcSmpi		 */
1aa8821bSart		uvm_lock_pageq();
1aa8821bSart		uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
1aa8821bSart		if (uvmexp.inactarg <= uvmexp.freetarg) {
1aa8821bSart			uvmexp.inactarg = uvmexp.freetarg + 1;
1aa8821bSart		}
786a9acfSmpi		inactive_shortage =
786a9acfSmpi			uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT;
a853522eSmpi		uvm_unlock_pageq();
1aa8821bSart
0dd50440Sbeck		size = 0;
0dd50440Sbeck		if (pma != NULL)
0dd50440Sbeck			size += pma->pm_size >> PAGE_SHIFT;
4e368faeSmpi		if (shortage > 0)
4e368faeSmpi			size += shortage;
a853522eSmpi
767e8a65Smpi		if (size == 0) {
767e8a65Smpi			/*
767e8a65Smpi			 * Since the inactive target just got updated
ab22dc52Smpi			 * above, both `size' and `inactive_shortage' can
767e8a65Smpi			 * be 0.
767e8a65Smpi			 */
767e8a65Smpi			if (inactive_shortage) {
767e8a65Smpi				uvm_lock_pageq();
767e8a65Smpi				uvmpd_scan_active(NULL, 0, inactive_shortage);
767e8a65Smpi				uvm_unlock_pageq();
767e8a65Smpi			}
767e8a65Smpi			continue;
767e8a65Smpi		}
767e8a65Smpi
767e8a65Smpi		/* Reclaim pages from the buffer cache if possible. */
786a9acfSmpi		shortage -= bufbackoff(&constraint, size * 2);
a4e118acSkettenis#if NDRM > 0
786a9acfSmpi		shortage -= drmbackoff(size * 2);
a4e118acSkettenis#endif
786a9acfSmpi		if (shortage > 0)
786a9acfSmpi			shortage -= uvm_pmr_cache_drain();
82673a18Smpi
455c70dcSmpi		/*
455c70dcSmpi		 * scan if needed
455c70dcSmpi		 */
a853522eSmpi		uvm_lock_pageq();
d7bddd8cSmpi		if (!uvmpd_pma_done(pma) ||
d7bddd8cSmpi		    (shortage > 0) || (inactive_shortage > 0)) {
ab9ceab3Smpi			uvmpd_scan(pma, shortage, inactive_shortage);
1aa8821bSart		}
1aa8821bSart
1aa8821bSart		/*
1aa8821bSart		 * if there's any free memory to be had,
1aa8821bSart		 * wake up any waiters.
1aa8821bSart		 */
d848450cSoga		uvm_lock_fpageq();
1df50becSmpi		if (uvmexp.free > uvmexp.reserve_kernel || uvmexp.paging == 0) {
1aa8821bSart			wakeup(&uvmexp.free);
1aa8821bSart		}
90ee2fe0Sbeck
90ee2fe0Sbeck		if (pma != NULL) {
4ed81bdbSkettenis			/*
4ed81bdbSkettenis			 * XXX If UVM_PMA_FREED isn't set, no pages
4ed81bdbSkettenis			 * were freed.  Should we set UVM_PMA_FAIL in
4ed81bdbSkettenis			 * that case?
4ed81bdbSkettenis			 */
90ee2fe0Sbeck			pma->pm_flags &= ~UVM_PMA_BUSY;
4ed81bdbSkettenis			if (pma->pm_flags & UVM_PMA_FREED) {
90ee2fe0Sbeck				pma->pm_flags &= ~UVM_PMA_LINKED;
ba03bb80Smpi				TAILQ_REMOVE(&uvm.pmr_control.allocs, pma, pmq);
90ee2fe0Sbeck				wakeup(pma);
90ee2fe0Sbeck			}
4ed81bdbSkettenis		}
d848450cSoga		uvm_unlock_fpageq();
1aa8821bSart
455c70dcSmpi		/*
455c70dcSmpi		 * scan done.  unlock page queues (the only lock we are holding)
455c70dcSmpi		 */
1aa8821bSart		uvm_unlock_pageq();
574e7ffdSblambert
9b1ed563Smpi		sched_pause(yield);
1aa8821bSart	}
1aa8821bSart	/*NOTREACHED*/
1aa8821bSart}
1aa8821bSart
1aa8821bSart
1aa8821bSart/*
1aa8821bSart * uvm_aiodone_daemon:  main loop for the aiodone daemon.
1aa8821bSart */
1aa8821bSartvoid
1aa8821bSartuvm_aiodone_daemon(void *arg)
1aa8821bSart{
1df50becSmpi	int s, npages;
1aa8821bSart	struct buf *bp, *nbp;
1aa8821bSart
fdcc1051Smiod	uvm.aiodoned_proc = curproc;
f3e3a779Smpi	KERNEL_UNLOCK();
fdcc1051Smiod
1aa8821bSart	for (;;) {
cd7ee8acSart		/*
fa006b1dSoga		 * Check for done aio structures. If we've got structures to
fa006b1dSoga		 * process, do so. Otherwise sleep while avoiding races.
cd7ee8acSart		 */
fa006b1dSoga		mtx_enter(&uvm.aiodoned_lock);
fa006b1dSoga		while ((bp = TAILQ_FIRST(&uvm.aio_done)) == NULL)
2404448fSjsg			msleep_nsec(&uvm.aiodoned, &uvm.aiodoned_lock,
2404448fSjsg			    PVM, "aiodoned", INFSLP);
fa006b1dSoga		/* Take the list for ourselves. */
1aa8821bSart		TAILQ_INIT(&uvm.aio_done);
fa006b1dSoga		mtx_leave(&uvm.aiodoned_lock);
cd7ee8acSart
35164244Stedu		/* process each i/o that's done. */
1df50becSmpi		npages = 0;
f3e3a779Smpi		KERNEL_LOCK();
1aa8821bSart		while (bp != NULL) {
1aa8821bSart			if (bp->b_flags & B_PDAEMON) {
1df50becSmpi				npages += bp->b_bufsize >> PAGE_SHIFT;
cd7ee8acSart			}
1aa8821bSart			nbp = TAILQ_NEXT(bp, b_freelist);
86d3bebfSart			s = splbio();	/* b_iodone must by called at splbio */
1aa8821bSart			(*bp->b_iodone)(bp);
86d3bebfSart			splx(s);
1aa8821bSart			bp = nbp;
574e7ffdSblambert
9b1ed563Smpi			sched_pause(yield);
cd7ee8acSart		}
f3e3a779Smpi		KERNEL_UNLOCK();
1df50becSmpi
fd21a507Sthib		uvm_lock_fpageq();
1df50becSmpi		atomic_sub_int(&uvmexp.paging, npages);
1df50becSmpi		wakeup(uvmexp.free <= uvmexp.reserve_kernel ? &uvm.pagedaemon :
0800515eSoga		    &uvmexp.free);
fd21a507Sthib		uvm_unlock_fpageq();
1aa8821bSart	}
cd7ee8acSart}
cd7ee8acSart
c4141b1cSmpi/*
c4141b1cSmpi * uvmpd_trylockowner: trylock the page's owner.
c4141b1cSmpi *
c4141b1cSmpi * => return the locked rwlock on success.  otherwise, return NULL.
c4141b1cSmpi */
c4141b1cSmpistruct rwlock *
c4141b1cSmpiuvmpd_trylockowner(struct vm_page *pg)
c4141b1cSmpi{
c4141b1cSmpi
c4141b1cSmpi	struct uvm_object *uobj = pg->uobject;
c4141b1cSmpi	struct rwlock *slock;
c4141b1cSmpi
c4141b1cSmpi	if (uobj != NULL) {
c4141b1cSmpi		slock = uobj->vmobjlock;
c4141b1cSmpi	} else {
c4141b1cSmpi		struct vm_anon *anon = pg->uanon;
c4141b1cSmpi
c4141b1cSmpi		KASSERT(anon != NULL);
c4141b1cSmpi		slock = anon->an_lock;
c4141b1cSmpi	}
c4141b1cSmpi
c4141b1cSmpi	if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
c4141b1cSmpi		return NULL;
c4141b1cSmpi	}
c4141b1cSmpi
c4141b1cSmpi	return slock;
c4141b1cSmpi}
c4141b1cSmpi
5ac9535fSmpi/*
5ac9535fSmpi * uvmpd_dropswap: free any swap allocated to this page.
5ac9535fSmpi *
5ac9535fSmpi * => called with owner locked.
5a3e8fe8Smpi * => return 1 if a page had an associated slot.
5ac9535fSmpi */
5a3e8fe8Smpiint
5ac9535fSmpiuvmpd_dropswap(struct vm_page *pg)
5ac9535fSmpi{
5ac9535fSmpi	struct vm_anon *anon = pg->uanon;
5a3e8fe8Smpi	int slot, result = 0;
5ac9535fSmpi
5ac9535fSmpi	if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) {
5ac9535fSmpi		uvm_swap_free(anon->an_swslot, 1);
5ac9535fSmpi		anon->an_swslot = 0;
5a3e8fe8Smpi		result = 1;
5ac9535fSmpi	} else if (pg->pg_flags & PQ_AOBJ) {
5a3e8fe8Smpi		slot = uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
5a3e8fe8Smpi		if (slot)
5a3e8fe8Smpi			result = 1;
5ac9535fSmpi	}
5a3e8fe8Smpi
5a3e8fe8Smpi	return result;
5ac9535fSmpi}
1aa8821bSart
cd7ee8acSart/*
03c39359Smpi * Return 1 if the page `p' belongs to the memory range described by
03c39359Smpi * 'constraint', 0 otherwise.
03c39359Smpi */
03c39359Smpistatic inline int
03c39359Smpiuvmpd_match_constraint(struct vm_page *p,
03c39359Smpi    struct uvm_constraint_range *constraint)
03c39359Smpi{
03c39359Smpi	paddr_t paddr;
03c39359Smpi
03c39359Smpi	paddr = atop(VM_PAGE_TO_PHYS(p));
03c39359Smpi	if (paddr >= constraint->ucr_low && paddr < constraint->ucr_high)
03c39359Smpi		return 1;
03c39359Smpi
03c39359Smpi	return 0;
03c39359Smpi}
03c39359Smpi
03c39359Smpi/*
1aa8821bSart * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
cd7ee8acSart *
cd7ee8acSart * => called with page queues locked
cd7ee8acSart * => we work on meeting our free target by converting inactive pages
cd7ee8acSart *    into free pages.
cd7ee8acSart * => we handle the building of swap-backed clusters
cd7ee8acSart * => we return TRUE if we are exiting because we met our target
cd7ee8acSart */
c1e5f9e3Smpiint
ab9ceab3Smpiuvmpd_scan_inactive(struct uvm_pmalloc *pma, int shortage)
cd7ee8acSart{
c1e5f9e3Smpi	struct pglist *pglst = &uvm.page_inactive;
4783fe62Smpi	int result, freed = 0;
cd7ee8acSart	struct vm_page *p, *nextpg;
cd7ee8acSart	struct uvm_object *uobj;
1ff2fd50Smpi	struct vm_page *pps[SWCLUSTPAGES], **ppsp;
cd7ee8acSart	int npages;
1ff2fd50Smpi	struct vm_page *swpps[SWCLUSTPAGES]; 	/* XXX: see below */
0dea91ceSmpi	struct rwlock *slock;
cd7ee8acSart	int swnpages, swcpages;				/* XXX: see below */
8a42ed70Sart	int swslot;
cd7ee8acSart	struct vm_anon *anon;
cd7ee8acSart	boolean_t swap_backed;
cd7ee8acSart	vaddr_t start;
2c932f6fSmiod	int dirtyreacts;
cd7ee8acSart
cd7ee8acSart	/*
cd7ee8acSart	 * swslot is non-zero if we are building a swap cluster.  we want
cd7ee8acSart	 * to stay in the loop while we have a page to scan or we have
cd7ee8acSart	 * a swap-cluster to build.
cd7ee8acSart	 */
cd7ee8acSart	swslot = 0;
cd7ee8acSart	swnpages = swcpages = 0;
8a42ed70Sart	dirtyreacts = 0;
5a0e53e3Smpi	p = NULL;
cd7ee8acSart
ab9ceab3Smpi	/*
ab9ceab3Smpi	 * If a thread is waiting for us to release memory from a specific
ab9ceab3Smpi	 * memory range start with the first page on the list that fits in
ab9ceab3Smpi	 * it.
ab9ceab3Smpi	 */
5a0e53e3Smpi	TAILQ_FOREACH(p, pglst, pageq) {
ab9ceab3Smpi		if (uvmpd_pma_done(pma) ||
ab9ceab3Smpi		    uvmpd_match_constraint(p, &pma->pm_constraint))
5a0e53e3Smpi			break;
5a0e53e3Smpi	}
5a0e53e3Smpi
5a0e53e3Smpi	for (; p != NULL || swslot != 0; p = nextpg) {
cd7ee8acSart		/*
cd7ee8acSart		 * note that p can be NULL iff we have traversed the whole
cd7ee8acSart		 * list and need to do one final swap-backed clustered pageout.
cd7ee8acSart		 */
cac1bff1Sart		uobj = NULL;
cac1bff1Sart		anon = NULL;
cd7ee8acSart		if (p) {
cd7ee8acSart			/*
0dea91ceSmpi			 * see if we've met our target
cd7ee8acSart			 */
d7bddd8cSmpi			if ((uvmpd_pma_done(pma) &&
4783fe62Smpi			    (uvmexp.paging >= (shortage - freed))) ||
8a42ed70Sart			    dirtyreacts == UVMPD_NUMDIRTYREACTS) {
cac1bff1Sart				if (swslot == 0) {
cd7ee8acSart					/* exit now if no swap-i/o pending */
cd7ee8acSart					break;
cac1bff1Sart				}
cd7ee8acSart
cd7ee8acSart				/* set p to null to signal final swap i/o */
cd7ee8acSart				p = NULL;
0dea91ceSmpi				nextpg = NULL;
cd7ee8acSart			}
cd7ee8acSart		}
cd7ee8acSart		if (p) {	/* if (we have a new page to consider) */
cd7ee8acSart			/*
cd7ee8acSart			 * we are below target and have a new page to consider.
cd7ee8acSart			 */
cd7ee8acSart			uvmexp.pdscans++;
cac1bff1Sart			nextpg = TAILQ_NEXT(p, pageq);
cd7ee8acSart
a125353dSmpi			/*
a125353dSmpi			 * If we are not short on memory and only interested
ab22dc52Smpi			 * in releasing pages from a given memory range, do not
a125353dSmpi			 * bother with other pages.
a125353dSmpi			 */
a125353dSmpi			if (uvmexp.paging >= (shortage - freed) &&
a125353dSmpi			    !uvmpd_pma_done(pma) &&
a125353dSmpi			    !uvmpd_match_constraint(p, &pma->pm_constraint))
a125353dSmpi				continue;
a125353dSmpi
cd7ee8acSart			anon = p->uanon;
0dea91ceSmpi			uobj = p->uobject;
c4141b1cSmpi
c4141b1cSmpi			/*
c4141b1cSmpi			 * first we attempt to lock the object that this page
c4141b1cSmpi			 * belongs to.  if our attempt fails we skip on to
c4141b1cSmpi			 * the next page (no harm done).  it is important to
c4141b1cSmpi			 * "try" locking the object as we are locking in the
c4141b1cSmpi			 * wrong order (pageq -> object) and we don't want to
c4141b1cSmpi			 * deadlock.
c4141b1cSmpi			 */
c4141b1cSmpi			slock = uvmpd_trylockowner(p);
c4141b1cSmpi			if (slock == NULL) {
1c92e3afSmpi				continue;
1c92e3afSmpi			}
c4141b1cSmpi
69c04514Smpi			/*
69c04514Smpi			 * move referenced pages back to active queue
69c04514Smpi			 * and skip to next page.
69c04514Smpi			 */
69c04514Smpi			if (pmap_is_referenced(p)) {
69c04514Smpi				uvm_pageactivate(p);
0dea91ceSmpi				rw_exit(slock);
69c04514Smpi				uvmexp.pdreact++;
69c04514Smpi				continue;
69c04514Smpi			}
c4141b1cSmpi
9662fca4Sart			if (p->pg_flags & PG_BUSY) {
0dea91ceSmpi				rw_exit(slock);
cd7ee8acSart				uvmexp.pdbusy++;
cd7ee8acSart				continue;
cd7ee8acSart			}
c4141b1cSmpi
c4141b1cSmpi			/* does the page belong to an object? */
c4141b1cSmpi			if (uobj != NULL) {
cd7ee8acSart				uvmexp.pdobscan++;
c4141b1cSmpi			} else {
c4141b1cSmpi				KASSERT(anon != NULL);
c4141b1cSmpi				uvmexp.pdanscan++;
cd7ee8acSart			}
cd7ee8acSart
cd7ee8acSart			/*
b8a635f6Stedu			 * we now have the page queues locked.
2c932f6fSmiod			 * the page is not busy.   if the page is clean we
2c932f6fSmiod			 * can free it now and continue.
cd7ee8acSart			 */
9662fca4Sart			if (p->pg_flags & PG_CLEAN) {
65d6360cSart				if (p->pg_flags & PQ_SWAPBACKED) {
8a42ed70Sart					/* this page now lives only in swap */
c4a864baSmpi					atomic_inc_int(&uvmexp.swpgonly);
8a42ed70Sart				}
8a42ed70Sart
2c932f6fSmiod				/* zap all mappings with pmap_page_protect... */
1e8cdc2eSderaadt				pmap_page_protect(p, PROT_NONE);
cd7ee8acSart				uvm_pagefree(p);
c1e5f9e3Smpi				freed++;
cd7ee8acSart
cd7ee8acSart				if (anon) {
cac1bff1Sart
cd7ee8acSart					/*
cd7ee8acSart					 * an anonymous page can only be clean
cac1bff1Sart					 * if it has backing store assigned.
cd7ee8acSart					 */
cac1bff1Sart
cac1bff1Sart					KASSERT(anon->an_swslot != 0);
cac1bff1Sart
cd7ee8acSart					/* remove from object */
8d0b5bafSpedro					anon->an_page = NULL;
cd7ee8acSart				}
0dea91ceSmpi				rw_exit(slock);
cd7ee8acSart				continue;
cd7ee8acSart			}
cd7ee8acSart
cd7ee8acSart			/*
cd7ee8acSart			 * this page is dirty, skip it if we'll have met our
cd7ee8acSart			 * free target when all the current pageouts complete.
cd7ee8acSart			 */
d7bddd8cSmpi			if (uvmpd_pma_done(pma) &&
4783fe62Smpi			    (uvmexp.paging > (shortage - freed))) {
0dea91ceSmpi				rw_exit(slock);
cd7ee8acSart				continue;
cd7ee8acSart			}
cd7ee8acSart
cd7ee8acSart			/*
8a42ed70Sart			 * this page is dirty, but we can't page it out
8a42ed70Sart			 * since all pages in swap are only in swap.
8a42ed70Sart			 * reactivate it so that we eventually cycle
8a42ed70Sart			 * all pages thru the inactive queue.
8a42ed70Sart			 */
afd3b31eSmpi			if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
8a42ed70Sart				dirtyreacts++;
8a42ed70Sart				uvm_pageactivate(p);
0dea91ceSmpi				rw_exit(slock);
8a42ed70Sart				continue;
8a42ed70Sart			}
8a42ed70Sart
8a42ed70Sart			/*
8a42ed70Sart			 * if the page is swap-backed and dirty and swap space
8a42ed70Sart			 * is full, free any swap allocated to the page
8a42ed70Sart			 * so that other pages can be paged out.
8a42ed70Sart			 */
cb3ee63aSmpi			if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled())
5ac9535fSmpi				uvmpd_dropswap(p);
8a42ed70Sart
8a42ed70Sart			/*
cd7ee8acSart			 * the page we are looking at is dirty.   we must
cd7ee8acSart			 * clean it before it can be freed.  to do this we
cd7ee8acSart			 * first mark the page busy so that no one else will
2c932f6fSmiod			 * touch the page.   we write protect all the mappings
2c932f6fSmiod			 * of the page so that no one touches it while it is
2c932f6fSmiod			 * in I/O.
cd7ee8acSart			 */
cd7ee8acSart
65d6360cSart			swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
65d6360cSart			atomic_setbits_int(&p->pg_flags, PG_BUSY);
cd7ee8acSart			UVM_PAGE_OWN(p, "scan_inactive");
1e8cdc2eSderaadt			pmap_page_protect(p, PROT_READ);
cd7ee8acSart			uvmexp.pgswapout++;
cd7ee8acSart
cd7ee8acSart			/*
cd7ee8acSart			 * for swap-backed pages we need to (re)allocate
cd7ee8acSart			 * swap space.
cd7ee8acSart			 */
cd7ee8acSart			if (swap_backed) {
35164244Stedu				/* free old swap slot (if any) */
5ac9535fSmpi				uvmpd_dropswap(p);
cd7ee8acSart
35164244Stedu				/* start new cluster (if necessary) */
1aa8821bSart				if (swslot == 0) {
1ff2fd50Smpi					swnpages = SWCLUSTPAGES;
cd7ee8acSart					swslot = uvm_swap_alloc(&swnpages,
cd7ee8acSart					    TRUE);
cd7ee8acSart					if (swslot == 0) {
cd7ee8acSart						/* no swap?  give up! */
65d6360cSart						atomic_clearbits_int(
65d6360cSart						    &p->pg_flags,
65d6360cSart						    PG_BUSY);
cd7ee8acSart						UVM_PAGE_OWN(p, NULL);
0dea91ceSmpi						rw_exit(slock);
cd7ee8acSart						continue;
cd7ee8acSart					}
cd7ee8acSart					swcpages = 0;	/* cluster is empty */
cd7ee8acSart				}
cd7ee8acSart
35164244Stedu				/* add block to cluster */
1414b0faSart				swpps[swcpages] = p;
1414b0faSart				if (anon)
cd7ee8acSart					anon->an_swslot = swslot + swcpages;
1414b0faSart				else
1414b0faSart					uao_set_swslot(uobj,
cd7ee8acSart					    p->offset >> PAGE_SHIFT,
cd7ee8acSart					    swslot + swcpages);
cd7ee8acSart				swcpages++;
aec078ecSmpi				rw_exit(slock);
aec078ecSmpi
aec078ecSmpi				/* cluster not full yet? */
aec078ecSmpi				if (swcpages < swnpages)
aec078ecSmpi					continue;
cd7ee8acSart			}
cd7ee8acSart		} else {
cd7ee8acSart			/* if p == NULL we must be doing a last swap i/o */
cd7ee8acSart			swap_backed = TRUE;
cd7ee8acSart		}
cd7ee8acSart
cd7ee8acSart		/*
cd7ee8acSart		 * now consider doing the pageout.
cd7ee8acSart		 *
cd7ee8acSart		 * for swap-backed pages, we do the pageout if we have either
cd7ee8acSart		 * filled the cluster (in which case (swnpages == swcpages) or
cd7ee8acSart		 * run out of pages (p == NULL).
cd7ee8acSart		 *
cd7ee8acSart		 * for object pages, we always do the pageout.
cd7ee8acSart		 */
1aa8821bSart		if (swap_backed) {
cd7ee8acSart			/* starting I/O now... set up for it */
cd7ee8acSart			npages = swcpages;
cd7ee8acSart			ppsp = swpps;
cd7ee8acSart			/* for swap-backed pages only */
cd7ee8acSart			start = (vaddr_t) swslot;
cd7ee8acSart
cd7ee8acSart			/* if this is final pageout we could have a few
cd7ee8acSart			 * extra swap blocks */
cd7ee8acSart			if (swcpages < swnpages) {
cd7ee8acSart				uvm_swap_free(swslot + swcpages,
cd7ee8acSart				    (swnpages - swcpages));
cd7ee8acSart			}
cd7ee8acSart		} else {
cd7ee8acSart			/* normal object pageout */
cd7ee8acSart			ppsp = pps;
cd7ee8acSart			npages = sizeof(pps) / sizeof(struct vm_page *);
cd7ee8acSart			/* not looked at because PGO_ALLPAGES is set */
cd7ee8acSart			start = 0;
cd7ee8acSart		}
cd7ee8acSart
cd7ee8acSart		/*
cd7ee8acSart		 * now do the pageout.
cd7ee8acSart		 *
cd7ee8acSart		 * for swap_backed pages we have already built the cluster.
cd7ee8acSart		 * for !swap_backed pages, uvm_pager_put will call the object's
cd7ee8acSart		 * "make put cluster" function to build a cluster on our behalf.
cd7ee8acSart		 *
cd7ee8acSart		 * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
cd7ee8acSart		 * it to free the cluster pages for us on a successful I/O (it
cd7ee8acSart		 * always does this for un-successful I/O requests).  this
cd7ee8acSart		 * allows us to do clustered pageout without having to deal
cd7ee8acSart		 * with cluster pages at this level.
cd7ee8acSart		 *
cd7ee8acSart		 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
b8a635f6Stedu		 *  IN: locked: page queues
b8a635f6Stedu		 * OUT: locked:
b8a635f6Stedu		 *     !locked: pageqs
cd7ee8acSart		 */
cd7ee8acSart
cd7ee8acSart		uvmexp.pdpageouts++;
1aa8821bSart		result = uvm_pager_put(swap_backed ? NULL : uobj, p,
cd7ee8acSart		    &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
cd7ee8acSart
cd7ee8acSart		/*
cd7ee8acSart		 * if we did i/o to swap, zero swslot to indicate that we are
cd7ee8acSart		 * no longer building a swap-backed cluster.
cd7ee8acSart		 */
cd7ee8acSart
cd7ee8acSart		if (swap_backed)
cd7ee8acSart			swslot = 0;		/* done with this cluster */
cd7ee8acSart
cd7ee8acSart		/*
1414b0faSart		 * first, we check for VM_PAGER_PEND which means that the
1414b0faSart		 * async I/O is in progress and the async I/O done routine
1414b0faSart		 * will clean up after us.   in this case we move on to the
1414b0faSart		 * next page.
1414b0faSart		 *
1414b0faSart		 * there is a very remote chance that the pending async i/o can
1414b0faSart		 * finish _before_ we get here.   if that happens, our page "p"
1414b0faSart		 * may no longer be on the inactive queue.   so we verify this
1414b0faSart		 * when determining the next page (starting over at the head if
1414b0faSart		 * we've lost our inactive page).
cd7ee8acSart		 */
cd7ee8acSart
1414b0faSart		if (result == VM_PAGER_PEND) {
1df50becSmpi			atomic_add_int(&uvmexp.paging, npages);
1414b0faSart			uvm_lock_pageq();
cd7ee8acSart			uvmexp.pdpending++;
cd7ee8acSart			if (p) {
65d6360cSart				if (p->pg_flags & PQ_INACTIVE)
1aa8821bSart					nextpg = TAILQ_NEXT(p, pageq);
cd7ee8acSart				else
1aa8821bSart					nextpg = TAILQ_FIRST(pglst);
cd7ee8acSart			} else {
1aa8821bSart				nextpg = NULL;
cd7ee8acSart			}
1414b0faSart			continue;
1414b0faSart		}
1414b0faSart
35164244Stedu		/* clean up "p" if we have one */
1414b0faSart		if (p) {
1414b0faSart			/*
1414b0faSart			 * the I/O request to "p" is done and uvm_pager_put
1414b0faSart			 * has freed any cluster pages it may have allocated
1414b0faSart			 * during I/O.  all that is left for us to do is
1414b0faSart			 * clean up page "p" (which is still PG_BUSY).
1414b0faSart			 *
1414b0faSart			 * our result could be one of the following:
1414b0faSart			 *   VM_PAGER_OK: successful pageout
1414b0faSart			 *
1414b0faSart			 *   VM_PAGER_AGAIN: tmp resource shortage, we skip
1414b0faSart			 *     to next page
1414b0faSart			 *   VM_PAGER_{FAIL,ERROR,BAD}: an error.   we
1414b0faSart			 *     "reactivate" page to get it out of the way (it
1414b0faSart			 *     will eventually drift back into the inactive
1414b0faSart			 *     queue for a retry).
1414b0faSart			 *   VM_PAGER_UNLOCK: should never see this as it is
1414b0faSart			 *     only valid for "get" operations
1414b0faSart			 */
1414b0faSart
1414b0faSart			/* relock p's object: page queues not lock yet, so
1414b0faSart			 * no need for "try" */
1414b0faSart
1c92e3afSmpi			/* !swap_backed case: already locked... */
1c92e3afSmpi			if (swap_backed) {
0dea91ceSmpi				rw_enter(slock, RW_WRITE);
1c92e3afSmpi			}
1c92e3afSmpi
2c932f6fSmiod#ifdef DIAGNOSTIC
2c932f6fSmiod			if (result == VM_PAGER_UNLOCK)
2c932f6fSmiod				panic("pagedaemon: pageout returned "
2c932f6fSmiod				    "invalid 'unlock' code");
2c932f6fSmiod#endif
2c932f6fSmiod
1414b0faSart			/* handle PG_WANTED now */
9662fca4Sart			if (p->pg_flags & PG_WANTED)
1414b0faSart				wakeup(p);
1414b0faSart
65d6360cSart			atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED);
1414b0faSart			UVM_PAGE_OWN(p, NULL);
1414b0faSart
0b0fe1a1Soga			/* released during I/O? Can only happen for anons */
9662fca4Sart			if (p->pg_flags & PG_RELEASED) {
0b0fe1a1Soga				KASSERT(anon != NULL);
84325188Soga				/*
84325188Soga				 * remove page so we can get nextpg,
84325188Soga				 * also zero out anon so we don't use
84325188Soga				 * it after the free.
84325188Soga				 */
8d0b5bafSpedro				anon->an_page = NULL;
84325188Soga				p->uanon = NULL;
1414b0faSart
1414b0faSart				uvm_anfree(anon);	/* kills anon */
1e8cdc2eSderaadt				pmap_page_protect(p, PROT_NONE);
1414b0faSart				anon = NULL;
1414b0faSart				uvm_lock_pageq();
1414b0faSart				nextpg = TAILQ_NEXT(p, pageq);
1414b0faSart				/* free released page */
1414b0faSart				uvm_pagefree(p);
1414b0faSart			} else {	/* page was not released during I/O */
1414b0faSart				uvm_lock_pageq();
1414b0faSart				nextpg = TAILQ_NEXT(p, pageq);
1414b0faSart				if (result != VM_PAGER_OK) {
1414b0faSart					/* pageout was a failure... */
1414b0faSart					if (result != VM_PAGER_AGAIN)
1414b0faSart						uvm_pageactivate(p);
1414b0faSart					pmap_clear_reference(p);
1414b0faSart				} else {
1414b0faSart					/* pageout was a success... */
1414b0faSart					pmap_clear_reference(p);
1414b0faSart					pmap_clear_modify(p);
65d6360cSart					atomic_setbits_int(&p->pg_flags,
65d6360cSart					    PG_CLEAN);
1414b0faSart				}
1414b0faSart			}
1414b0faSart
1414b0faSart			/*
1414b0faSart			 * drop object lock (if there is an object left).   do
1414b0faSart			 * a safety check of nextpg to make sure it is on the
1414b0faSart			 * inactive queue (it should be since PG_BUSY pages on
1414b0faSart			 * the inactive queue can't be re-queued [note: not
1414b0faSart			 * true for active queue]).
1414b0faSart			 */
0dea91ceSmpi			rw_exit(slock);
1414b0faSart
49b3ab21Soga			if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
49b3ab21Soga				nextpg = TAILQ_FIRST(pglst);	/* reload! */
49b3ab21Soga			}
1414b0faSart		} else {
1414b0faSart			/*
1414b0faSart			 * if p is null in this loop, make sure it stays null
1414b0faSart			 * in the next loop.
1414b0faSart			 */
1414b0faSart			nextpg = NULL;
1414b0faSart
1414b0faSart			/*
1414b0faSart			 * lock page queues here just so they're always locked
1414b0faSart			 * at the end of the loop.
1414b0faSart			 */
1414b0faSart			uvm_lock_pageq();
1414b0faSart		}
1aa8821bSart	}
c1e5f9e3Smpi
c1e5f9e3Smpi	return freed;
cd7ee8acSart}
cd7ee8acSart
cd7ee8acSart/*
cd7ee8acSart * uvmpd_scan: scan the page queues and attempt to meet our targets.
cd7ee8acSart *
cd7ee8acSart * => called with pageq's locked
cd7ee8acSart */
cd7ee8acSart
cd7ee8acSartvoid
ab9ceab3Smpiuvmpd_scan(struct uvm_pmalloc *pma, int shortage, int inactive_shortage)
cd7ee8acSart{
4e368faeSmpi	int swap_shortage, pages_freed;
cd7ee8acSart
d21d8ab4Smpi	MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
d21d8ab4Smpi
cd7ee8acSart	uvmexp.pdrevs++;		/* counter */
cd7ee8acSart
cd7ee8acSart
f88a4ea9Smiod#ifdef __HAVE_PMAP_COLLECT
cd7ee8acSart	/*
cd7ee8acSart	 * swap out some processes if we are below our free target.
cd7ee8acSart	 * we need to unlock the page queues for this.
cd7ee8acSart	 */
4e368faeSmpi	if (shortage > 0) {
cd7ee8acSart		uvmexp.pdswout++;
cd7ee8acSart		uvm_unlock_pageq();
4e368faeSmpi		shortage -= uvm_swapout_threads();
cd7ee8acSart		uvm_lock_pageq();
cd7ee8acSart	}
cd7ee8acSart#endif
cd7ee8acSart
cd7ee8acSart	/*
cd7ee8acSart	 * now we want to work on meeting our targets.   first we work on our
cd7ee8acSart	 * free target by converting inactive pages into free pages.  then
cd7ee8acSart	 * we work on meeting our inactive target by converting active pages
cd7ee8acSart	 * to inactive ones.
cd7ee8acSart	 */
ab9ceab3Smpi	pages_freed = uvmpd_scan_inactive(pma, shortage);
c1e5f9e3Smpi	uvmexp.pdfreed += pages_freed;
4e368faeSmpi	shortage -= pages_freed;
cd7ee8acSart
cd7ee8acSart	/*
cd7ee8acSart	 * we have done the scan to get free pages.   now we work on meeting
cd7ee8acSart	 * our inactive target.
779ee49fSmpi	 *
8a42ed70Sart	 * detect if we're not going to be able to page anything out
8a42ed70Sart	 * until we free some swap resources from active pages.
8a42ed70Sart	 */
8a42ed70Sart	swap_shortage = 0;
4e368faeSmpi	if ((shortage > 0) && uvm_swapisfilled() && !uvm_swapisfull() &&
8a42ed70Sart	    pages_freed == 0) {
4e368faeSmpi		swap_shortage = shortage;
8a42ed70Sart	}
8a42ed70Sart
767e8a65Smpi	uvmpd_scan_active(pma, swap_shortage, inactive_shortage);
767e8a65Smpi}
767e8a65Smpi
767e8a65Smpivoid
767e8a65Smpiuvmpd_scan_active(struct uvm_pmalloc *pma, int swap_shortage,
767e8a65Smpi    int inactive_shortage)
767e8a65Smpi{
767e8a65Smpi	struct vm_page *p, *nextpg;
767e8a65Smpi	struct rwlock *slock;
767e8a65Smpi
767e8a65Smpi	MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
767e8a65Smpi
8a42ed70Sart	for (p = TAILQ_FIRST(&uvm.page_active);
8a42ed70Sart	     p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
8a42ed70Sart	     p = nextpg) {
1aa8821bSart		nextpg = TAILQ_NEXT(p, pageq);
609704bbSmpi		if (p->pg_flags & PG_BUSY) {
b8a635f6Stedu			continue;
609704bbSmpi		}
cd7ee8acSart
609704bbSmpi		/*
a125353dSmpi		 * If we couldn't release enough pages from a given memory
a125353dSmpi		 * range try to deactivate them first...
a125353dSmpi		 *
a125353dSmpi		 * ...unless we are low on swap slots, in such case we are
a125353dSmpi		 * probably OOM and want to release swap resources as quickly
a125353dSmpi		 * as possible.
2d45b4c2Smpi		 */
a125353dSmpi		if (inactive_shortage > 0 && swap_shortage == 0 &&
a125353dSmpi		    !uvmpd_pma_done(pma) &&
ab9ceab3Smpi		    !uvmpd_match_constraint(p, &pma->pm_constraint))
2d45b4c2Smpi			continue;
2d45b4c2Smpi
2d45b4c2Smpi		/*
609704bbSmpi		 * lock the page's owner.
609704bbSmpi		 */
c4141b1cSmpi		slock = uvmpd_trylockowner(p);
c4141b1cSmpi		if (slock == NULL) {
1c92e3afSmpi			continue;
609704bbSmpi		}
609704bbSmpi
609704bbSmpi		/*
609704bbSmpi		 * skip this page if it's busy.
609704bbSmpi		 */
609704bbSmpi		if ((p->pg_flags & PG_BUSY) != 0) {
609704bbSmpi			rw_exit(slock);
69c04514Smpi			continue;
69c04514Smpi		}
cd7ee8acSart
8a42ed70Sart		/*
8a42ed70Sart		 * if there's a shortage of swap, free any swap allocated
8a42ed70Sart		 * to this page so that other pages can be paged out.
8a42ed70Sart		 */
8a42ed70Sart		if (swap_shortage > 0) {
5a3e8fe8Smpi			if (uvmpd_dropswap(p)) {
65d6360cSart				atomic_clearbits_int(&p->pg_flags, PG_CLEAN);
8a42ed70Sart				swap_shortage--;
8a42ed70Sart			}
8a42ed70Sart		}
8a42ed70Sart
8a42ed70Sart		/*
2c932f6fSmiod		 * deactivate this page if there's a shortage of
2c932f6fSmiod		 * inactive pages.
8a42ed70Sart		 */
2c932f6fSmiod		if (inactive_shortage > 0) {
cd7ee8acSart			/* no need to check wire_count as pg is "active" */
cd7ee8acSart			uvm_pagedeactivate(p);
cd7ee8acSart			uvmexp.pddeact++;
8a42ed70Sart			inactive_shortage--;
cd7ee8acSart		}
609704bbSmpi
609704bbSmpi		/*
609704bbSmpi		 * we're done with this page.
609704bbSmpi		 */
609704bbSmpi		rw_exit(slock);
cd7ee8acSart	}
cd7ee8acSart}
f5db0a1cSkettenis
f5db0a1cSkettenis#ifdef HIBERNATE
f5db0a1cSkettenis
f5db0a1cSkettenis/*
f5db0a1cSkettenis * uvmpd_drop: drop clean pages from list
f5db0a1cSkettenis */
f5db0a1cSkettenisvoid
f5db0a1cSkettenisuvmpd_drop(struct pglist *pglst)
f5db0a1cSkettenis{
f5db0a1cSkettenis	struct vm_page *p, *nextpg;
f5db0a1cSkettenis
f5db0a1cSkettenis	for (p = TAILQ_FIRST(pglst); p != NULL; p = nextpg) {
f5db0a1cSkettenis		nextpg = TAILQ_NEXT(p, pageq);
f5db0a1cSkettenis
f5db0a1cSkettenis		if (p->pg_flags & PQ_ANON || p->uobject == NULL)
f5db0a1cSkettenis			continue;
f5db0a1cSkettenis
f5db0a1cSkettenis		if (p->pg_flags & PG_BUSY)
f5db0a1cSkettenis			continue;
f5db0a1cSkettenis
f5db0a1cSkettenis		if (p->pg_flags & PG_CLEAN) {
69c04514Smpi			struct uvm_object * uobj = p->uobject;
69c04514Smpi
69c04514Smpi			rw_enter(uobj->vmobjlock, RW_WRITE);
69c04514Smpi			uvm_lock_pageq();
f5db0a1cSkettenis			/*
f5db0a1cSkettenis			 * we now have the page queues locked.
f5db0a1cSkettenis			 * the page is not busy.   if the page is clean we
f5db0a1cSkettenis			 * can free it now and continue.
f5db0a1cSkettenis			 */
f5db0a1cSkettenis			if (p->pg_flags & PG_CLEAN) {
f5db0a1cSkettenis				if (p->pg_flags & PQ_SWAPBACKED) {
f5db0a1cSkettenis					/* this page now lives only in swap */
c4a864baSmpi					atomic_inc_int(&uvmexp.swpgonly);
f5db0a1cSkettenis				}
f5db0a1cSkettenis
f5db0a1cSkettenis				/* zap all mappings with pmap_page_protect... */
1e8cdc2eSderaadt				pmap_page_protect(p, PROT_NONE);
f5db0a1cSkettenis				uvm_pagefree(p);
f5db0a1cSkettenis			}
69c04514Smpi			uvm_unlock_pageq();
69c04514Smpi			rw_exit(uobj->vmobjlock);
f5db0a1cSkettenis		}
f5db0a1cSkettenis	}
f5db0a1cSkettenis}
f5db0a1cSkettenis
f5db0a1cSkettenisvoid
f5db0a1cSkettenisuvmpd_hibernate(void)
f5db0a1cSkettenis{
c4e40561Smpi	uvmpd_drop(&uvm.page_inactive);
f5db0a1cSkettenis	uvmpd_drop(&uvm.page_active);
f5db0a1cSkettenis}
f5db0a1cSkettenis
f5db0a1cSkettenis#endif