xref: /netbsd-src/sys/uvm/uvm_pdpolicy_clock.c (revision 06ddeb9f13adf7283ddbc0ff517ec4b59ab5f739)
1*06ddeb9fSandvar /*	$NetBSD: uvm_pdpolicy_clock.c,v 1.40 2022/04/12 20:27:56 andvar Exp $	*/
29d3e3eabSyamt /*	NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $	*/
39d3e3eabSyamt 
494843b13Sad /*-
59d385320Sad  * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
694843b13Sad  * All rights reserved.
794843b13Sad  *
894843b13Sad  * This code is derived from software contributed to The NetBSD Foundation
994843b13Sad  * by Andrew Doran.
1094843b13Sad  *
1194843b13Sad  * Redistribution and use in source and binary forms, with or without
1294843b13Sad  * modification, are permitted provided that the following conditions
1394843b13Sad  * are met:
1494843b13Sad  * 1. Redistributions of source code must retain the above copyright
1594843b13Sad  *    notice, this list of conditions and the following disclaimer.
1694843b13Sad  * 2. Redistributions in binary form must reproduce the above copyright
1794843b13Sad  *    notice, this list of conditions and the following disclaimer in the
1894843b13Sad  *    documentation and/or other materials provided with the distribution.
1994843b13Sad  *
2094843b13Sad  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
2194843b13Sad  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
2294843b13Sad  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
2394843b13Sad  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
2494843b13Sad  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2594843b13Sad  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2694843b13Sad  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2794843b13Sad  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2894843b13Sad  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2994843b13Sad  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3094843b13Sad  * POSSIBILITY OF SUCH DAMAGE.
3194843b13Sad  */
3294843b13Sad 
339d3e3eabSyamt /*
349d3e3eabSyamt  * Copyright (c) 1997 Charles D. Cranor and Washington University.
359d3e3eabSyamt  * Copyright (c) 1991, 1993, The Regents of the University of California.
369d3e3eabSyamt  *
379d3e3eabSyamt  * All rights reserved.
389d3e3eabSyamt  *
399d3e3eabSyamt  * This code is derived from software contributed to Berkeley by
409d3e3eabSyamt  * The Mach Operating System project at Carnegie-Mellon University.
419d3e3eabSyamt  *
429d3e3eabSyamt  * Redistribution and use in source and binary forms, with or without
439d3e3eabSyamt  * modification, are permitted provided that the following conditions
449d3e3eabSyamt  * are met:
459d3e3eabSyamt  * 1. Redistributions of source code must retain the above copyright
469d3e3eabSyamt  *    notice, this list of conditions and the following disclaimer.
479d3e3eabSyamt  * 2. Redistributions in binary form must reproduce the above copyright
489d3e3eabSyamt  *    notice, this list of conditions and the following disclaimer in the
499d3e3eabSyamt  *    documentation and/or other materials provided with the distribution.
5040ec801aSchuck  * 3. Neither the name of the University nor the names of its contributors
519d3e3eabSyamt  *    may be used to endorse or promote products derived from this software
529d3e3eabSyamt  *    without specific prior written permission.
539d3e3eabSyamt  *
549d3e3eabSyamt  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
559d3e3eabSyamt  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
569d3e3eabSyamt  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
579d3e3eabSyamt  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
589d3e3eabSyamt  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
599d3e3eabSyamt  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
609d3e3eabSyamt  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
619d3e3eabSyamt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
629d3e3eabSyamt  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
639d3e3eabSyamt  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
649d3e3eabSyamt  * SUCH DAMAGE.
659d3e3eabSyamt  *
669d3e3eabSyamt  *	@(#)vm_pageout.c        8.5 (Berkeley) 2/14/94
679d3e3eabSyamt  * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
689d3e3eabSyamt  *
699d3e3eabSyamt  *
709d3e3eabSyamt  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
719d3e3eabSyamt  * All rights reserved.
729d3e3eabSyamt  *
739d3e3eabSyamt  * Permission to use, copy, modify and distribute this software and
749d3e3eabSyamt  * its documentation is hereby granted, provided that both the copyright
759d3e3eabSyamt  * notice and this permission notice appear in all copies of the
769d3e3eabSyamt  * software, derivative works or modified versions, and any portions
779d3e3eabSyamt  * thereof, and that both notices appear in supporting documentation.
789d3e3eabSyamt  *
799d3e3eabSyamt  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
809d3e3eabSyamt  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
819d3e3eabSyamt  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
829d3e3eabSyamt  *
839d3e3eabSyamt  * Carnegie Mellon requests users of this software to return to
849d3e3eabSyamt  *
859d3e3eabSyamt  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
869d3e3eabSyamt  *  School of Computer Science
879d3e3eabSyamt  *  Carnegie Mellon University
889d3e3eabSyamt  *  Pittsburgh PA 15213-3890
899d3e3eabSyamt  *
909d3e3eabSyamt  * any improvements or extensions that they make and grant Carnegie the
919d3e3eabSyamt  * rights to redistribute these changes.
929d3e3eabSyamt  */
939d3e3eabSyamt 
949d3e3eabSyamt #if defined(PDSIM)
959d3e3eabSyamt 
969d3e3eabSyamt #include "pdsim.h"
979d3e3eabSyamt 
989d3e3eabSyamt #else /* defined(PDSIM) */
999d3e3eabSyamt 
1009d3e3eabSyamt #include <sys/cdefs.h>
101*06ddeb9fSandvar __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.40 2022/04/12 20:27:56 andvar Exp $");
1029d3e3eabSyamt 
1039d3e3eabSyamt #include <sys/param.h>
1049d3e3eabSyamt #include <sys/proc.h>
1059d3e3eabSyamt #include <sys/systm.h>
1069d3e3eabSyamt #include <sys/kernel.h>
10794843b13Sad #include <sys/kmem.h>
108c3c98c15Smlelstv #include <sys/atomic.h>
1099d3e3eabSyamt 
1109d3e3eabSyamt #include <uvm/uvm.h>
1119d3e3eabSyamt #include <uvm/uvm_pdpolicy.h>
1129d3e3eabSyamt #include <uvm/uvm_pdpolicy_impl.h>
1135978ddc6Sad #include <uvm/uvm_stat.h>
1149d3e3eabSyamt 
1159d3e3eabSyamt #endif /* defined(PDSIM) */
1169d3e3eabSyamt 
11794843b13Sad /*
11894843b13Sad  * per-CPU queue of pending page status changes.  128 entries makes for a
11994843b13Sad  * 1kB queue on _LP64 and has been found to be a reasonable compromise that
12094843b13Sad  * keeps lock contention events and wait times low, while not using too much
12194843b13Sad  * memory nor allowing global state to fall too far behind.
12294843b13Sad  */
12394843b13Sad #if !defined(CLOCK_PDQ_SIZE)
12494843b13Sad #define	CLOCK_PDQ_SIZE	128
12594843b13Sad #endif /* !defined(CLOCK_PDQ_SIZE) */
12694843b13Sad 
12794843b13Sad #define PQ_INACTIVE	0x00000010	/* page is in inactive list */
12894843b13Sad #define PQ_ACTIVE	0x00000020	/* page is in active list */
1299d3e3eabSyamt 
1309d3e3eabSyamt #if !defined(CLOCK_INACTIVEPCT)
1319d3e3eabSyamt #define	CLOCK_INACTIVEPCT	33
1329d3e3eabSyamt #endif /* !defined(CLOCK_INACTIVEPCT) */
1339d3e3eabSyamt 
1349d3e3eabSyamt struct uvmpdpol_globalstate {
1355978ddc6Sad 	kmutex_t lock;			/* lock on state */
1365978ddc6Sad 					/* <= compiler pads here */
1375978ddc6Sad 	struct pglist s_activeq		/* allocated pages, in use */
1385978ddc6Sad 	    __aligned(COHERENCY_UNIT);
1399d3e3eabSyamt 	struct pglist s_inactiveq;	/* pages between the clock hands */
1409d3e3eabSyamt 	int s_active;
1419d3e3eabSyamt 	int s_inactive;
1429d3e3eabSyamt 	int s_inactarg;
1439d3e3eabSyamt 	struct uvm_pctparam s_anonmin;
1449d3e3eabSyamt 	struct uvm_pctparam s_filemin;
1459d3e3eabSyamt 	struct uvm_pctparam s_execmin;
1469d3e3eabSyamt 	struct uvm_pctparam s_anonmax;
1479d3e3eabSyamt 	struct uvm_pctparam s_filemax;
1489d3e3eabSyamt 	struct uvm_pctparam s_execmax;
1499d3e3eabSyamt 	struct uvm_pctparam s_inactivepct;
1509d3e3eabSyamt };
1519d3e3eabSyamt 
1529d3e3eabSyamt struct uvmpdpol_scanstate {
153712239e3Sthorpej 	bool ss_anonreact, ss_filereact, ss_execreact;
1549344a595Sad 	struct vm_page ss_marker;
1559d3e3eabSyamt };
1569d3e3eabSyamt 
1575978ddc6Sad static void	uvmpdpol_pageactivate_locked(struct vm_page *);
1585978ddc6Sad static void	uvmpdpol_pagedeactivate_locked(struct vm_page *);
1595978ddc6Sad static void	uvmpdpol_pagedequeue_locked(struct vm_page *);
16094843b13Sad static bool	uvmpdpol_pagerealize_locked(struct vm_page *);
16194843b13Sad static struct uvm_cpu *uvmpdpol_flush(void);
1625978ddc6Sad 
1635978ddc6Sad static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
1649d3e3eabSyamt static struct uvmpdpol_scanstate pdpol_scanstate;
1659d3e3eabSyamt 
1669d3e3eabSyamt PDPOL_EVCNT_DEFINE(reactexec)
PDPOL_EVCNT_DEFINE(reactfile)1679d3e3eabSyamt PDPOL_EVCNT_DEFINE(reactfile)
1689d3e3eabSyamt PDPOL_EVCNT_DEFINE(reactanon)
1699d3e3eabSyamt 
1709d3e3eabSyamt static void
1719d3e3eabSyamt clock_tune(void)
1729d3e3eabSyamt {
1739d3e3eabSyamt 	struct uvmpdpol_globalstate *s = &pdpol_state;
1749d3e3eabSyamt 
1759d3e3eabSyamt 	s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
1769d3e3eabSyamt 	    s->s_active + s->s_inactive);
1779d3e3eabSyamt 	if (s->s_inactarg <= uvmexp.freetarg) {
1789d3e3eabSyamt 		s->s_inactarg = uvmexp.freetarg + 1;
1799d3e3eabSyamt 	}
1809d3e3eabSyamt }
1819d3e3eabSyamt 
1829d3e3eabSyamt void
uvmpdpol_scaninit(void)1839d3e3eabSyamt uvmpdpol_scaninit(void)
1849d3e3eabSyamt {
1859d3e3eabSyamt 	struct uvmpdpol_globalstate *s = &pdpol_state;
1869d3e3eabSyamt 	struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
1879d3e3eabSyamt 	int t;
188712239e3Sthorpej 	bool anonunder, fileunder, execunder;
189712239e3Sthorpej 	bool anonover, fileover, execover;
190712239e3Sthorpej 	bool anonreact, filereact, execreact;
191a98966d3Sad 	int64_t freepg, anonpg, filepg, execpg;
1929d3e3eabSyamt 
1939d3e3eabSyamt 	/*
1949d3e3eabSyamt 	 * decide which types of pages we want to reactivate instead of freeing
1959d3e3eabSyamt 	 * to keep usage within the minimum and maximum usage limits.
196ba90a6baSad 	 * uvm_availmem() will sync the counters.
1979d3e3eabSyamt 	 */
1989d3e3eabSyamt 
1994b8a875aSad 	freepg = uvm_availmem(false);
200ba90a6baSad 	anonpg = cpu_count_get(CPU_COUNT_ANONCLEAN) +
201ba90a6baSad 	    cpu_count_get(CPU_COUNT_ANONDIRTY) +
202ba90a6baSad 	    cpu_count_get(CPU_COUNT_ANONUNKNOWN);
203a98966d3Sad 	execpg = cpu_count_get(CPU_COUNT_EXECPAGES);
204ba90a6baSad 	filepg = cpu_count_get(CPU_COUNT_FILECLEAN) +
205ba90a6baSad 	    cpu_count_get(CPU_COUNT_FILEDIRTY) +
206ba90a6baSad 	    cpu_count_get(CPU_COUNT_FILEUNKNOWN) -
207ba90a6baSad 	    execpg;
208a98966d3Sad 
2095978ddc6Sad 	mutex_enter(&s->lock);
210a98966d3Sad 	t = s->s_active + s->s_inactive + freepg;
211a98966d3Sad 	anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
212a98966d3Sad 	fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
213a98966d3Sad 	execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
214a98966d3Sad 	anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
215a98966d3Sad 	fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
216a98966d3Sad 	execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
2179d3e3eabSyamt 	anonreact = anonunder || (!anonover && (fileover || execover));
2189d3e3eabSyamt 	filereact = fileunder || (!fileover && (anonover || execover));
2199d3e3eabSyamt 	execreact = execunder || (!execover && (anonover || fileover));
2209d3e3eabSyamt 	if (filereact && execreact && (anonreact || uvm_swapisfull())) {
221b3667adaSthorpej 		anonreact = filereact = execreact = false;
2229d3e3eabSyamt 	}
2239d3e3eabSyamt 	ss->ss_anonreact = anonreact;
2249d3e3eabSyamt 	ss->ss_filereact = filereact;
2259d3e3eabSyamt 	ss->ss_execreact = execreact;
2269344a595Sad 	memset(&ss->ss_marker, 0, sizeof(ss->ss_marker));
2279344a595Sad 	ss->ss_marker.flags = PG_MARKER;
2289344a595Sad 	TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
2299344a595Sad 	mutex_exit(&s->lock);
2309344a595Sad }
2319d3e3eabSyamt 
2329344a595Sad void
uvmpdpol_scanfini(void)2339344a595Sad uvmpdpol_scanfini(void)
2349344a595Sad {
2359344a595Sad 	struct uvmpdpol_globalstate *s = &pdpol_state;
2369344a595Sad 	struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
2379344a595Sad 
2389344a595Sad 	mutex_enter(&s->lock);
2399344a595Sad 	TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
2405978ddc6Sad 	mutex_exit(&s->lock);
2419d3e3eabSyamt }
2429d3e3eabSyamt 
2439d3e3eabSyamt struct vm_page *
uvmpdpol_selectvictim(krwlock_t ** plock)244d2a0ebb6Sad uvmpdpol_selectvictim(krwlock_t **plock)
2459d3e3eabSyamt {
2465978ddc6Sad 	struct uvmpdpol_globalstate *s = &pdpol_state;
2479d3e3eabSyamt 	struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
2489d3e3eabSyamt 	struct vm_page *pg;
249d2a0ebb6Sad 	krwlock_t *lock;
2509d3e3eabSyamt 
2515978ddc6Sad 	mutex_enter(&s->lock);
2529d3e3eabSyamt 	while (/* CONSTCOND */ 1) {
2539d3e3eabSyamt 		struct vm_anon *anon;
2549d3e3eabSyamt 		struct uvm_object *uobj;
2559d3e3eabSyamt 
2569344a595Sad 		pg = TAILQ_NEXT(&ss->ss_marker, pdqueue);
2579d3e3eabSyamt 		if (pg == NULL) {
2589d3e3eabSyamt 			break;
2599d3e3eabSyamt 		}
2609344a595Sad 		KASSERT((pg->flags & PG_MARKER) == 0);
2619d3e3eabSyamt 		uvmexp.pdscans++;
2629d3e3eabSyamt 
2639d3e3eabSyamt 		/*
264*06ddeb9fSandvar 		 * acquire interlock to stabilize page identity.
2655978ddc6Sad 		 * if we have caught the page in a state of flux
26694843b13Sad 		 * deal with it and retry.
2679d3e3eabSyamt 		 */
2685978ddc6Sad 		mutex_enter(&pg->interlock);
26994843b13Sad 		if (uvmpdpol_pagerealize_locked(pg)) {
2705978ddc6Sad 			mutex_exit(&pg->interlock);
2719d3e3eabSyamt 			continue;
2729d3e3eabSyamt 		}
2739d3e3eabSyamt 
2749d3e3eabSyamt 		/*
2759344a595Sad 		 * now prepare to move on to the next page.
2769344a595Sad 		 */
2779344a595Sad 		TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker,
2789344a595Sad 		    pdqueue);
2799344a595Sad 		TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg,
2809344a595Sad 		    &ss->ss_marker, pdqueue);
2819344a595Sad 
2829344a595Sad 		/*
2839d3e3eabSyamt 		 * enforce the minimum thresholds on different
2849d3e3eabSyamt 		 * types of memory usage.  if reusing the current
2859d3e3eabSyamt 		 * page would reduce that type of usage below its
2869d3e3eabSyamt 		 * minimum, reactivate the page instead and move
2879d3e3eabSyamt 		 * on to the next page.
2889d3e3eabSyamt 		 */
2895978ddc6Sad 		anon = pg->uanon;
2905978ddc6Sad 		uobj = pg->uobject;
2919d3e3eabSyamt 		if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
2925978ddc6Sad 			uvmpdpol_pageactivate_locked(pg);
29394843b13Sad 			mutex_exit(&pg->interlock);
2949d3e3eabSyamt 			PDPOL_EVCNT_INCR(reactexec);
2959d3e3eabSyamt 			continue;
2969d3e3eabSyamt 		}
2979d3e3eabSyamt 		if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
2989d3e3eabSyamt 		    !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
2995978ddc6Sad 			uvmpdpol_pageactivate_locked(pg);
30094843b13Sad 			mutex_exit(&pg->interlock);
3019d3e3eabSyamt 			PDPOL_EVCNT_INCR(reactfile);
3029d3e3eabSyamt 			continue;
3039d3e3eabSyamt 		}
3049d3e3eabSyamt 		if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
3055978ddc6Sad 			uvmpdpol_pageactivate_locked(pg);
30694843b13Sad 			mutex_exit(&pg->interlock);
3079d3e3eabSyamt 			PDPOL_EVCNT_INCR(reactanon);
3089d3e3eabSyamt 			continue;
3099d3e3eabSyamt 		}
3109d3e3eabSyamt 
3115978ddc6Sad 		/*
3125978ddc6Sad 		 * try to lock the object that owns the page.
3135978ddc6Sad 		 *
3145978ddc6Sad 		 * with the page interlock held, we can drop s->lock, which
3155978ddc6Sad 		 * could otherwise serve as a barrier to us getting the
3165978ddc6Sad 		 * object locked, because the owner of the object's lock may
3175978ddc6Sad 		 * be blocked on s->lock (i.e. a deadlock).
3185978ddc6Sad 		 *
3195978ddc6Sad 		 * whatever happens, uvmpd_trylockowner() will release the
3205978ddc6Sad 		 * interlock.  with the interlock dropped we can then
3215978ddc6Sad 		 * re-acquire our own lock.  the order is:
3225978ddc6Sad 		 *
3235978ddc6Sad 		 *	object -> pdpol -> interlock.
3245978ddc6Sad 	         */
3255978ddc6Sad 	        mutex_exit(&s->lock);
3265978ddc6Sad         	lock = uvmpd_trylockowner(pg);
3275978ddc6Sad         	/* pg->interlock now released */
3285978ddc6Sad         	mutex_enter(&s->lock);
3295978ddc6Sad 		if (lock == NULL) {
3305978ddc6Sad 			/* didn't get it - try the next page. */
3315978ddc6Sad 			continue;
3329d3e3eabSyamt 		}
3339d3e3eabSyamt 
3345978ddc6Sad 		/*
3355978ddc6Sad 		 * move referenced pages back to active queue and skip to
3365978ddc6Sad 		 * next page.
3375978ddc6Sad 		 */
3385978ddc6Sad 		if (pmap_is_referenced(pg)) {
33994843b13Sad 			mutex_enter(&pg->interlock);
3405978ddc6Sad 			uvmpdpol_pageactivate_locked(pg);
34194843b13Sad 			mutex_exit(&pg->interlock);
3425978ddc6Sad 			uvmexp.pdreact++;
343d2a0ebb6Sad 			rw_exit(lock);
3445978ddc6Sad 			continue;
3455978ddc6Sad 		}
3465978ddc6Sad 
3475978ddc6Sad 		/* we have a potential victim. */
3485978ddc6Sad 		*plock = lock;
3495978ddc6Sad 		break;
3505978ddc6Sad 	}
3515978ddc6Sad 	mutex_exit(&s->lock);
3529d3e3eabSyamt 	return pg;
3539d3e3eabSyamt }
3549d3e3eabSyamt 
3559d3e3eabSyamt void
uvmpdpol_balancequeue(int swap_shortage)3569d3e3eabSyamt uvmpdpol_balancequeue(int swap_shortage)
3579d3e3eabSyamt {
3585978ddc6Sad 	struct uvmpdpol_globalstate *s = &pdpol_state;
3599d3e3eabSyamt 	int inactive_shortage;
3609344a595Sad 	struct vm_page *p, marker;
361d2a0ebb6Sad 	krwlock_t *lock;
3629d3e3eabSyamt 
3639d3e3eabSyamt 	/*
3649d3e3eabSyamt 	 * we have done the scan to get free pages.   now we work on meeting
3659d3e3eabSyamt 	 * our inactive target.
3669d3e3eabSyamt 	 */
3679d3e3eabSyamt 
3689344a595Sad 	memset(&marker, 0, sizeof(marker));
3699344a595Sad 	marker.flags = PG_MARKER;
3709344a595Sad 
3715978ddc6Sad 	mutex_enter(&s->lock);
3729344a595Sad 	TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue);
3739344a595Sad 	for (;;) {
3749344a595Sad 		inactive_shortage =
3759344a595Sad 		    pdpol_state.s_inactarg - pdpol_state.s_inactive;
3769344a595Sad 		if (inactive_shortage <= 0 && swap_shortage <= 0) {
3779344a595Sad 			break;
3789d3e3eabSyamt 		}
3799344a595Sad 		p = TAILQ_NEXT(&marker, pdqueue);
3809344a595Sad 		if (p == NULL) {
3819344a595Sad 			break;
3829d3e3eabSyamt 		}
3839344a595Sad 		KASSERT((p->flags & PG_MARKER) == 0);
384e225b7bdSrmind 
3855978ddc6Sad 		/*
386*06ddeb9fSandvar 		 * acquire interlock to stabilize page identity.
3875978ddc6Sad 		 * if we have caught the page in a state of flux
38894843b13Sad 		 * deal with it and retry.
3895978ddc6Sad 		 */
3905978ddc6Sad 		mutex_enter(&p->interlock);
39194843b13Sad 		if (uvmpdpol_pagerealize_locked(p)) {
3925978ddc6Sad 			mutex_exit(&p->interlock);
3935978ddc6Sad 			continue;
3945978ddc6Sad 		}
3959344a595Sad 
3969344a595Sad 		/*
3979344a595Sad 		 * now prepare to move on to the next page.
3989344a595Sad 		 */
3999344a595Sad 		TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
4009344a595Sad 		TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker,
4019344a595Sad 		    pdqueue);
4029344a595Sad 
4039344a595Sad 		/*
4049344a595Sad 		 * try to lock the object that owns the page.  see comments
4059344a595Sad 		 * in uvmpdol_selectvictim().
4069344a595Sad 	         */
4075978ddc6Sad 	        mutex_exit(&s->lock);
408e225b7bdSrmind         	lock = uvmpd_trylockowner(p);
4095978ddc6Sad         	/* p->interlock now released */
4105978ddc6Sad         	mutex_enter(&s->lock);
4119344a595Sad 		if (lock == NULL) {
4129344a595Sad 			/* didn't get it - try the next page. */
4139344a595Sad 			continue;
4149344a595Sad 		}
4159344a595Sad 
4169344a595Sad 		/*
4179344a595Sad 		 * if there's a shortage of swap slots, try to free it.
4189344a595Sad 		 */
4199344a595Sad 		if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 &&
4209344a595Sad 		    (p->flags & PG_BUSY) == 0) {
4219344a595Sad 			if (uvmpd_dropswap(p)) {
4229344a595Sad 				swap_shortage--;
4239344a595Sad 			}
4249344a595Sad 		}
4259344a595Sad 
4269344a595Sad 		/*
4279344a595Sad 		 * if there's a shortage of inactive pages, deactivate.
4289344a595Sad 		 */
4299344a595Sad 		if (inactive_shortage > 0) {
43094843b13Sad 			pmap_clear_reference(p);
43194843b13Sad 			mutex_enter(&p->interlock);
4325978ddc6Sad 			uvmpdpol_pagedeactivate_locked(p);
43394843b13Sad 			mutex_exit(&p->interlock);
4349d3e3eabSyamt 			uvmexp.pddeact++;
4359d3e3eabSyamt 			inactive_shortage--;
4369344a595Sad 		}
437d2a0ebb6Sad 		rw_exit(lock);
4389d3e3eabSyamt 	}
4399344a595Sad 	TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
4405978ddc6Sad 	mutex_exit(&s->lock);
4419d3e3eabSyamt }
4429d3e3eabSyamt 
4435978ddc6Sad static void
uvmpdpol_pagedeactivate_locked(struct vm_page * pg)4445978ddc6Sad uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
4459d3e3eabSyamt {
44694843b13Sad 	struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
4479d3e3eabSyamt 
44894843b13Sad 	KASSERT(mutex_owned(&s->lock));
44994843b13Sad 	KASSERT(mutex_owned(&pg->interlock));
45094843b13Sad 	KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
45194843b13Sad 	    (PQ_INTENT_D | PQ_INTENT_SET));
452e225b7bdSrmind 
4539d3e3eabSyamt 	if (pg->pqflags & PQ_ACTIVE) {
4546c2dc768Sad 		TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
4559d3e3eabSyamt 		KASSERT(pdpol_state.s_active > 0);
4569d3e3eabSyamt 		pdpol_state.s_active--;
4579d3e3eabSyamt 	}
4589d3e3eabSyamt 	if ((pg->pqflags & PQ_INACTIVE) == 0) {
4599d3e3eabSyamt 		KASSERT(pg->wire_count == 0);
4606c2dc768Sad 		TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
4619d3e3eabSyamt 		pdpol_state.s_inactive++;
4629d3e3eabSyamt 	}
4639d385320Sad 	pg->pqflags &= ~(PQ_ACTIVE | PQ_INTENT_SET);
4649d385320Sad 	pg->pqflags |= PQ_INACTIVE;
4659d3e3eabSyamt }
4669d3e3eabSyamt 
4679d3e3eabSyamt void
uvmpdpol_pagedeactivate(struct vm_page * pg)4685978ddc6Sad uvmpdpol_pagedeactivate(struct vm_page *pg)
4695978ddc6Sad {
4705978ddc6Sad 
471231cabb5Sad 	KASSERT(uvm_page_owner_locked_p(pg, false));
47294843b13Sad 	KASSERT(mutex_owned(&pg->interlock));
47394843b13Sad 
47494843b13Sad 	/*
47594843b13Sad 	 * we have to clear the reference bit now, as when it comes time to
47694843b13Sad 	 * realize the intent we won't have the object locked any more.
47794843b13Sad 	 */
47894843b13Sad 	pmap_clear_reference(pg);
47994843b13Sad 	uvmpdpol_set_intent(pg, PQ_INTENT_I);
4805978ddc6Sad }
4815978ddc6Sad 
4825978ddc6Sad static void
uvmpdpol_pageactivate_locked(struct vm_page * pg)4835978ddc6Sad uvmpdpol_pageactivate_locked(struct vm_page *pg)
4849d3e3eabSyamt {
48594843b13Sad 	struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
48694843b13Sad 
48794843b13Sad 	KASSERT(mutex_owned(&s->lock));
48894843b13Sad 	KASSERT(mutex_owned(&pg->interlock));
48994843b13Sad 	KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
49094843b13Sad 	    (PQ_INTENT_D | PQ_INTENT_SET));
4919d3e3eabSyamt 
4925978ddc6Sad 	uvmpdpol_pagedequeue_locked(pg);
4936c2dc768Sad 	TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
4949d3e3eabSyamt 	pdpol_state.s_active++;
4959d385320Sad 	pg->pqflags &= ~(PQ_INACTIVE | PQ_INTENT_SET);
4969d385320Sad 	pg->pqflags |= PQ_ACTIVE;
4979d3e3eabSyamt }
4989d3e3eabSyamt 
4999d3e3eabSyamt void
uvmpdpol_pageactivate(struct vm_page * pg)5005978ddc6Sad uvmpdpol_pageactivate(struct vm_page *pg)
5015978ddc6Sad {
5025978ddc6Sad 
503231cabb5Sad 	KASSERT(uvm_page_owner_locked_p(pg, false));
50494843b13Sad 	KASSERT(mutex_owned(&pg->interlock));
50594843b13Sad 
50694843b13Sad 	uvmpdpol_set_intent(pg, PQ_INTENT_A);
5075978ddc6Sad }
5085978ddc6Sad 
5095978ddc6Sad static void
uvmpdpol_pagedequeue_locked(struct vm_page * pg)5105978ddc6Sad uvmpdpol_pagedequeue_locked(struct vm_page *pg)
5119d3e3eabSyamt {
51294843b13Sad 	struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
51394843b13Sad 
51494843b13Sad 	KASSERT(mutex_owned(&s->lock));
51594843b13Sad 	KASSERT(mutex_owned(&pg->interlock));
5169d3e3eabSyamt 
5179d3e3eabSyamt 	if (pg->pqflags & PQ_ACTIVE) {
5186c2dc768Sad 		TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
51994843b13Sad 		KASSERT((pg->pqflags & PQ_INACTIVE) == 0);
5209d3e3eabSyamt 		KASSERT(pdpol_state.s_active > 0);
5219d3e3eabSyamt 		pdpol_state.s_active--;
5229d3e3eabSyamt 	} else if (pg->pqflags & PQ_INACTIVE) {
5236c2dc768Sad 		TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
5249d3e3eabSyamt 		KASSERT(pdpol_state.s_inactive > 0);
5259d3e3eabSyamt 		pdpol_state.s_inactive--;
5269d3e3eabSyamt 	}
5279d385320Sad 	pg->pqflags &= ~(PQ_ACTIVE | PQ_INACTIVE | PQ_INTENT_SET);
5289d3e3eabSyamt }
5299d3e3eabSyamt 
5309d3e3eabSyamt void
uvmpdpol_pagedequeue(struct vm_page * pg)5315978ddc6Sad uvmpdpol_pagedequeue(struct vm_page *pg)
5325978ddc6Sad {
5335978ddc6Sad 
534d2a0ebb6Sad 	KASSERT(uvm_page_owner_locked_p(pg, true));
53594843b13Sad 	KASSERT(mutex_owned(&pg->interlock));
53694843b13Sad 
53794843b13Sad 	uvmpdpol_set_intent(pg, PQ_INTENT_D);
5385978ddc6Sad }
5395978ddc6Sad 
5405978ddc6Sad void
uvmpdpol_pageenqueue(struct vm_page * pg)5419d3e3eabSyamt uvmpdpol_pageenqueue(struct vm_page *pg)
5429d3e3eabSyamt {
5439d3e3eabSyamt 
544231cabb5Sad 	KASSERT(uvm_page_owner_locked_p(pg, false));
54594843b13Sad 	KASSERT(mutex_owned(&pg->interlock));
54694843b13Sad 
54794843b13Sad 	uvmpdpol_set_intent(pg, PQ_INTENT_E);
5489d3e3eabSyamt }
5499d3e3eabSyamt 
5509d3e3eabSyamt void
uvmpdpol_anfree(struct vm_anon * an)5511a7bc55dSyamt uvmpdpol_anfree(struct vm_anon *an)
5529d3e3eabSyamt {
5539d3e3eabSyamt }
5549d3e3eabSyamt 
555712239e3Sthorpej bool
uvmpdpol_pageisqueued_p(struct vm_page * pg)5569d3e3eabSyamt uvmpdpol_pageisqueued_p(struct vm_page *pg)
5579d3e3eabSyamt {
55894843b13Sad 	uint32_t pqflags;
5599d3e3eabSyamt 
56094843b13Sad 	/*
56194843b13Sad 	 * if there's an intent set, we have to consider it.  otherwise,
56294843b13Sad 	 * return the actual state.  we may be called unlocked for the
56394843b13Sad 	 * purpose of assertions, which is safe due to the page lifecycle.
56494843b13Sad 	 */
56594843b13Sad 	pqflags = atomic_load_relaxed(&pg->pqflags);
56694843b13Sad 	if ((pqflags & PQ_INTENT_SET) != 0) {
56794843b13Sad 		return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D;
56894843b13Sad 	} else {
56994843b13Sad 		return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
57094843b13Sad 	}
5719d3e3eabSyamt }
5729d3e3eabSyamt 
573ff872804Sad bool
uvmpdpol_pageactivate_p(struct vm_page * pg)574ff872804Sad uvmpdpol_pageactivate_p(struct vm_page *pg)
575ff872804Sad {
576ff872804Sad 	uint32_t pqflags;
577ff872804Sad 
578ff872804Sad 	/* consider intent in preference to actual state. */
579ff872804Sad 	pqflags = atomic_load_relaxed(&pg->pqflags);
580ff872804Sad 	if ((pqflags & PQ_INTENT_SET) != 0) {
581ff872804Sad 		pqflags &= PQ_INTENT_MASK;
582ff872804Sad 		return pqflags != PQ_INTENT_A && pqflags != PQ_INTENT_E;
583ff872804Sad 	} else {
584ff872804Sad 		/*
585ff872804Sad 		 * TODO: Enabling this may be too much of a big hammer,
586ff872804Sad 		 * since we do get useful information from activations.
587ff872804Sad 		 * Think about it more and maybe come up with a heuristic
588ff872804Sad 		 * or something.
589ff872804Sad 		 *
590ff872804Sad 		 * return (pqflags & PQ_ACTIVE) == 0;
591ff872804Sad 		 */
592ff872804Sad 		return true;
593ff872804Sad 	}
594ff872804Sad }
595ff872804Sad 
5969d3e3eabSyamt void
uvmpdpol_estimatepageable(int * active,int * inactive)5979d3e3eabSyamt uvmpdpol_estimatepageable(int *active, int *inactive)
5989d3e3eabSyamt {
5995978ddc6Sad 	struct uvmpdpol_globalstate *s = &pdpol_state;
6009d3e3eabSyamt 
601da84a45cSad 	/*
602da84a45cSad 	 * Don't take any locks here.  This can be called from DDB, and in
603da84a45cSad 	 * any case the numbers are stale the instant the lock is dropped,
604da84a45cSad 	 * so it just doesn't matter.
605da84a45cSad 	 */
6069d3e3eabSyamt 	if (active) {
607da84a45cSad 		*active = s->s_active;
6089d3e3eabSyamt 	}
6099d3e3eabSyamt 	if (inactive) {
610da84a45cSad 		*inactive = s->s_inactive;
6119d3e3eabSyamt 	}
6129d3e3eabSyamt }
6139d3e3eabSyamt 
6149d3e3eabSyamt #if !defined(PDSIM)
6159d3e3eabSyamt static int
min_check(struct uvm_pctparam * pct,int t)6169d3e3eabSyamt min_check(struct uvm_pctparam *pct, int t)
6179d3e3eabSyamt {
6189d3e3eabSyamt 	struct uvmpdpol_globalstate *s = &pdpol_state;
6199d3e3eabSyamt 	int total = t;
6209d3e3eabSyamt 
6219d3e3eabSyamt 	if (pct != &s->s_anonmin) {
6229d3e3eabSyamt 		total += uvm_pctparam_get(&s->s_anonmin);
6239d3e3eabSyamt 	}
6249d3e3eabSyamt 	if (pct != &s->s_filemin) {
6259d3e3eabSyamt 		total += uvm_pctparam_get(&s->s_filemin);
6269d3e3eabSyamt 	}
6279d3e3eabSyamt 	if (pct != &s->s_execmin) {
6289d3e3eabSyamt 		total += uvm_pctparam_get(&s->s_execmin);
6299d3e3eabSyamt 	}
6309d3e3eabSyamt 	if (total > 95) {
6319d3e3eabSyamt 		return EINVAL;
6329d3e3eabSyamt 	}
6339d3e3eabSyamt 	return 0;
6349d3e3eabSyamt }
6359d3e3eabSyamt #endif /* !defined(PDSIM) */
6369d3e3eabSyamt 
6379d3e3eabSyamt void
uvmpdpol_init(void)6389d3e3eabSyamt uvmpdpol_init(void)
6399d3e3eabSyamt {
6409d3e3eabSyamt 	struct uvmpdpol_globalstate *s = &pdpol_state;
6419d3e3eabSyamt 
6425978ddc6Sad 	mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
6439d3e3eabSyamt 	TAILQ_INIT(&s->s_activeq);
6449d3e3eabSyamt 	TAILQ_INIT(&s->s_inactiveq);
6459d3e3eabSyamt 	uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
6469d3e3eabSyamt 	uvm_pctparam_init(&s->s_anonmin, 10, min_check);
6479d3e3eabSyamt 	uvm_pctparam_init(&s->s_filemin, 10, min_check);
6489d3e3eabSyamt 	uvm_pctparam_init(&s->s_execmin,  5, min_check);
6499d3e3eabSyamt 	uvm_pctparam_init(&s->s_anonmax, 80, NULL);
6509d3e3eabSyamt 	uvm_pctparam_init(&s->s_filemax, 50, NULL);
6519d3e3eabSyamt 	uvm_pctparam_init(&s->s_execmax, 30, NULL);
6529d3e3eabSyamt }
6539d3e3eabSyamt 
6549d3e3eabSyamt void
uvmpdpol_init_cpu(struct uvm_cpu * ucpu)65594843b13Sad uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
65694843b13Sad {
65794843b13Sad 
65894843b13Sad 	ucpu->pdq =
65994843b13Sad 	    kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
66094843b13Sad 	ucpu->pdqhead = CLOCK_PDQ_SIZE;
66194843b13Sad 	ucpu->pdqtail = CLOCK_PDQ_SIZE;
66294843b13Sad }
66394843b13Sad 
66494843b13Sad void
uvmpdpol_reinit(void)6659d3e3eabSyamt uvmpdpol_reinit(void)
6669d3e3eabSyamt {
6679d3e3eabSyamt }
6689d3e3eabSyamt 
669712239e3Sthorpej bool
uvmpdpol_needsscan_p(void)6709d3e3eabSyamt uvmpdpol_needsscan_p(void)
6719d3e3eabSyamt {
6729d3e3eabSyamt 
67394843b13Sad 	/*
67494843b13Sad 	 * this must be an unlocked check: can be called from interrupt.
67594843b13Sad 	 */
6764db6dbc1Spara 	return pdpol_state.s_inactive < pdpol_state.s_inactarg;
6779d3e3eabSyamt }
6789d3e3eabSyamt 
6799d3e3eabSyamt void
uvmpdpol_tune(void)6809d3e3eabSyamt uvmpdpol_tune(void)
6819d3e3eabSyamt {
6825978ddc6Sad 	struct uvmpdpol_globalstate *s = &pdpol_state;
6839d3e3eabSyamt 
6845978ddc6Sad 	mutex_enter(&s->lock);
6859d3e3eabSyamt 	clock_tune();
6865978ddc6Sad 	mutex_exit(&s->lock);
6879d3e3eabSyamt }
6889d3e3eabSyamt 
68994843b13Sad /*
6908764f427Sad  * uvmpdpol_pagerealize_locked: take the intended state set on a page and
6918764f427Sad  * make it real.  return true if any work was done.
69294843b13Sad  */
69394843b13Sad static bool
uvmpdpol_pagerealize_locked(struct vm_page * pg)69494843b13Sad uvmpdpol_pagerealize_locked(struct vm_page *pg)
69594843b13Sad {
69694843b13Sad 	struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
69794843b13Sad 
69894843b13Sad 	KASSERT(mutex_owned(&s->lock));
69994843b13Sad 	KASSERT(mutex_owned(&pg->interlock));
70094843b13Sad 
70194843b13Sad 	switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
70294843b13Sad 	case PQ_INTENT_A | PQ_INTENT_SET:
70394843b13Sad 	case PQ_INTENT_E | PQ_INTENT_SET:
70494843b13Sad 		uvmpdpol_pageactivate_locked(pg);
70594843b13Sad 		return true;
70694843b13Sad 	case PQ_INTENT_I | PQ_INTENT_SET:
70794843b13Sad 		uvmpdpol_pagedeactivate_locked(pg);
70894843b13Sad 		return true;
70994843b13Sad 	case PQ_INTENT_D | PQ_INTENT_SET:
71094843b13Sad 		uvmpdpol_pagedequeue_locked(pg);
71194843b13Sad 		return true;
71294843b13Sad 	default:
71394843b13Sad 		return false;
71494843b13Sad 	}
71594843b13Sad }
71694843b13Sad 
71794843b13Sad /*
71894843b13Sad  * uvmpdpol_flush: return the current uvm_cpu with all of its pending
71994843b13Sad  * updates flushed to the global queues.  this routine may block, and
72094843b13Sad  * so can switch cpu.  the idea is to empty to queue on whatever cpu
72194843b13Sad  * we finally end up on.
72294843b13Sad  */
72394843b13Sad static struct uvm_cpu *
uvmpdpol_flush(void)72494843b13Sad uvmpdpol_flush(void)
72594843b13Sad {
72694843b13Sad 	struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
72794843b13Sad 	struct uvm_cpu *ucpu;
72894843b13Sad 	struct vm_page *pg;
72994843b13Sad 
73094843b13Sad 	KASSERT(kpreempt_disabled());
73194843b13Sad 
73294843b13Sad 	mutex_enter(&s->lock);
73394843b13Sad 	for (;;) {
73494843b13Sad 		/*
73594843b13Sad 		 * prefer scanning forwards (even though mutex_enter() is
73694843b13Sad 		 * serializing) so as to not defeat any prefetch logic in
73794843b13Sad 		 * the CPU.  that means elsewhere enqueuing backwards, like
73894843b13Sad 		 * a stack, but not so important there as pages are being
73994843b13Sad 		 * added singularly.
74094843b13Sad 		 *
74194843b13Sad 		 * prefetch the next "struct vm_page" while working on the
74294843b13Sad 		 * current one.  this has a measurable and very positive
74394843b13Sad 		 * effect in reducing the amount of time spent here under
74494843b13Sad 		 * the global lock.
74594843b13Sad 		 */
74694843b13Sad 		ucpu = curcpu()->ci_data.cpu_uvm;
74794843b13Sad 		KASSERT(ucpu->pdqhead <= ucpu->pdqtail);
74894843b13Sad 		if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) {
74994843b13Sad 			break;
75094843b13Sad 		}
75194843b13Sad 		pg = ucpu->pdq[ucpu->pdqhead++];
75294843b13Sad 		if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) {
75394843b13Sad 			__builtin_prefetch(ucpu->pdq[ucpu->pdqhead]);
75494843b13Sad 		}
75594843b13Sad 		mutex_enter(&pg->interlock);
75694843b13Sad 		pg->pqflags &= ~PQ_INTENT_QUEUED;
75794843b13Sad 		(void)uvmpdpol_pagerealize_locked(pg);
75894843b13Sad 		mutex_exit(&pg->interlock);
75994843b13Sad 	}
76094843b13Sad 	mutex_exit(&s->lock);
76194843b13Sad 	return ucpu;
76294843b13Sad }
76394843b13Sad 
76494843b13Sad /*
76594843b13Sad  * uvmpdpol_pagerealize: realize any intent set on the page.  in this
76694843b13Sad  * implementation, that means putting the page on a per-CPU queue to be
76794843b13Sad  * dealt with later.
76894843b13Sad  */
76994843b13Sad void
uvmpdpol_pagerealize(struct vm_page * pg)77094843b13Sad uvmpdpol_pagerealize(struct vm_page *pg)
77194843b13Sad {
77294843b13Sad 	struct uvm_cpu *ucpu;
77394843b13Sad 
77494843b13Sad 	/*
77594843b13Sad 	 * drain the per per-CPU queue if full, then enter the page.
77694843b13Sad 	 */
77794843b13Sad 	kpreempt_disable();
77894843b13Sad 	ucpu = curcpu()->ci_data.cpu_uvm;
77994843b13Sad 	if (__predict_false(ucpu->pdqhead == 0)) {
78094843b13Sad 		ucpu = uvmpdpol_flush();
78194843b13Sad 	}
78294843b13Sad 	ucpu->pdq[--(ucpu->pdqhead)] = pg;
78394843b13Sad 	kpreempt_enable();
78494843b13Sad }
78594843b13Sad 
78694843b13Sad /*
78794843b13Sad  * uvmpdpol_idle: called from the system idle loop.  periodically purge any
78894843b13Sad  * pending updates back to the global queues.
78994843b13Sad  */
79094843b13Sad void
uvmpdpol_idle(struct uvm_cpu * ucpu)79194843b13Sad uvmpdpol_idle(struct uvm_cpu *ucpu)
79294843b13Sad {
79394843b13Sad 	struct uvmpdpol_globalstate *s = &pdpol_state;
79494843b13Sad 	struct vm_page *pg;
79594843b13Sad 
79694843b13Sad 	KASSERT(kpreempt_disabled());
79794843b13Sad 
79894843b13Sad 	/*
79994843b13Sad 	 * if no pages in the queue, we have nothing to do.
80094843b13Sad 	 */
80194843b13Sad 	if (ucpu->pdqhead == ucpu->pdqtail) {
802fd2e91e6Smaxv 		ucpu->pdqtime = getticks();
80394843b13Sad 		return;
80494843b13Sad 	}
80594843b13Sad 
80694843b13Sad 	/*
80794843b13Sad 	 * don't do this more than ~8 times a second as it would needlessly
80894843b13Sad 	 * exert pressure.
80994843b13Sad 	 */
810fd2e91e6Smaxv 	if (getticks() - ucpu->pdqtime < (hz >> 3)) {
81194843b13Sad 		return;
81294843b13Sad 	}
81394843b13Sad 
81494843b13Sad 	/*
81594843b13Sad 	 * the idle LWP can't block, so we have to try for the lock.  if we
81694843b13Sad 	 * get it, purge the per-CPU pending update queue.  continually
81794843b13Sad 	 * check for a pending resched: in that case exit immediately.
81894843b13Sad 	 */
81994843b13Sad 	if (mutex_tryenter(&s->lock)) {
82094843b13Sad 		while (ucpu->pdqhead != ucpu->pdqtail) {
82194843b13Sad 			pg = ucpu->pdq[ucpu->pdqhead];
82294843b13Sad 			if (!mutex_tryenter(&pg->interlock)) {
82394843b13Sad 				break;
82494843b13Sad 			}
82594843b13Sad 			ucpu->pdqhead++;
82694843b13Sad 			pg->pqflags &= ~PQ_INTENT_QUEUED;
82794843b13Sad 			(void)uvmpdpol_pagerealize_locked(pg);
82894843b13Sad 			mutex_exit(&pg->interlock);
82994843b13Sad 			if (curcpu()->ci_want_resched) {
83094843b13Sad 				break;
83194843b13Sad 			}
83294843b13Sad 		}
83394843b13Sad 		if (ucpu->pdqhead == ucpu->pdqtail) {
834fd2e91e6Smaxv 			ucpu->pdqtime = getticks();
83594843b13Sad 		}
83694843b13Sad 		mutex_exit(&s->lock);
83794843b13Sad 	}
83894843b13Sad }
83994843b13Sad 
8409d3e3eabSyamt #if !defined(PDSIM)
8419d3e3eabSyamt 
8429d3e3eabSyamt #include <sys/sysctl.h>	/* XXX SYSCTL_DESCR */
8439d3e3eabSyamt 
8449d3e3eabSyamt void
uvmpdpol_sysctlsetup(void)8459d3e3eabSyamt uvmpdpol_sysctlsetup(void)
8469d3e3eabSyamt {
8479d3e3eabSyamt 	struct uvmpdpol_globalstate *s = &pdpol_state;
8489d3e3eabSyamt 
8499d3e3eabSyamt 	uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
8509d3e3eabSyamt 	    SYSCTL_DESCR("Percentage of physical memory reserved "
8519d3e3eabSyamt 	    "for anonymous application data"));
8529d3e3eabSyamt 	uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
8539d3e3eabSyamt 	    SYSCTL_DESCR("Percentage of physical memory reserved "
854813190b4Smartin 	    "for cached file data"));
8559d3e3eabSyamt 	uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
8569d3e3eabSyamt 	    SYSCTL_DESCR("Percentage of physical memory reserved "
857813190b4Smartin 	    "for cached executable data"));
8589d3e3eabSyamt 
8599d3e3eabSyamt 	uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
8609d3e3eabSyamt 	    SYSCTL_DESCR("Percentage of physical memory which will "
8619d3e3eabSyamt 	    "be reclaimed from other usage for "
8629d3e3eabSyamt 	    "anonymous application data"));
8639d3e3eabSyamt 	uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
8649d3e3eabSyamt 	    SYSCTL_DESCR("Percentage of physical memory which will "
8659d3e3eabSyamt 	    "be reclaimed from other usage for cached "
8669d3e3eabSyamt 	    "file data"));
8679d3e3eabSyamt 	uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
8689d3e3eabSyamt 	    SYSCTL_DESCR("Percentage of physical memory which will "
8699d3e3eabSyamt 	    "be reclaimed from other usage for cached "
8709d3e3eabSyamt 	    "executable data"));
8719d3e3eabSyamt 
8729d3e3eabSyamt 	uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
8739d3e3eabSyamt 	    SYSCTL_DESCR("Percentage of inactive queue of "
8749d3e3eabSyamt 	    "the entire (active + inactive) queue"));
8759d3e3eabSyamt }
8769d3e3eabSyamt 
8779d3e3eabSyamt #endif /* !defined(PDSIM) */
8789d3e3eabSyamt 
8799d3e3eabSyamt #if defined(PDSIM)
8809d3e3eabSyamt void
pdsim_dump(const char * id)8819d3e3eabSyamt pdsim_dump(const char *id)
8829d3e3eabSyamt {
8839d3e3eabSyamt #if defined(DEBUG)
8849d3e3eabSyamt 	/* XXX */
8859d3e3eabSyamt #endif /* defined(DEBUG) */
8869d3e3eabSyamt }
8879d3e3eabSyamt #endif /* defined(PDSIM) */
888