xref: /netbsd-src/sys/uvm/uvm_pdpolicy_clockpro.c (revision 06ddeb9f13adf7283ddbc0ff517ec4b59ab5f739)
1 /*	$NetBSD: uvm_pdpolicy_clockpro.c,v 1.27 2022/04/12 20:27:56 andvar Exp $	*/
2 
3 /*-
4  * Copyright (c)2005, 2006 YAMAMOTO Takashi,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * CLOCK-Pro replacement policy:
31  *	http://web.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-3.html
32  *
33  * approximation of the list of non-resident pages using hash:
34  *	http://linux-mm.org/ClockProApproximation
35  */
36 
37 /* #define	CLOCKPRO_DEBUG */
38 
39 #if defined(PDSIM)
40 
41 #include "pdsim.h"
42 
43 #else /* defined(PDSIM) */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.27 2022/04/12 20:27:56 andvar Exp $");
47 
48 #include "opt_ddb.h"
49 
50 #include <sys/param.h>
51 #include <sys/proc.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/hash.h>
55 
56 #include <uvm/uvm.h>
57 #include <uvm/uvm_pdaemon.h>	/* for uvmpd_trylockowner */
58 #include <uvm/uvm_pdpolicy.h>
59 #include <uvm/uvm_pdpolicy_impl.h>
60 
61 #if ((__STDC_VERSION__ - 0) >= 199901L)
62 #define	DPRINTF(...)	/* nothing */
63 #define	WARN(...)	printf(__VA_ARGS__)
64 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */
65 #define	DPRINTF(a...)	/* nothing */	/* GCC */
66 #define	WARN(a...)	printf(a)
67 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */
68 
69 #define	dump(a)		/* nothing */
70 
71 #undef	USEONCE2
72 #define	LISTQ
73 #undef	ADAPTIVE
74 
75 #endif /* defined(PDSIM) */
76 
77 #if !defined(CLOCKPRO_COLDPCT)
78 #define	CLOCKPRO_COLDPCT	10
79 #endif /* !defined(CLOCKPRO_COLDPCT) */
80 
81 #define	CLOCKPRO_COLDPCTMAX	90
82 
83 #if !defined(CLOCKPRO_HASHFACTOR)
84 #define	CLOCKPRO_HASHFACTOR	2
85 #endif /* !defined(CLOCKPRO_HASHFACTOR) */
86 
87 #define	CLOCKPRO_NEWQMIN	((1024 * 1024) >> PAGE_SHIFT)	/* XXX */
88 
89 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR;
90 
91 PDPOL_EVCNT_DEFINE(nresrecordobj)
92 PDPOL_EVCNT_DEFINE(nresrecordanon)
93 PDPOL_EVCNT_DEFINE(nreslookupobj)
94 PDPOL_EVCNT_DEFINE(nreslookupanon)
95 PDPOL_EVCNT_DEFINE(nresfoundobj)
96 PDPOL_EVCNT_DEFINE(nresfoundanon)
97 PDPOL_EVCNT_DEFINE(nresanonfree)
98 PDPOL_EVCNT_DEFINE(nresconflict)
99 PDPOL_EVCNT_DEFINE(nresoverwritten)
100 PDPOL_EVCNT_DEFINE(nreshandhot)
101 
102 PDPOL_EVCNT_DEFINE(hhottakeover)
103 PDPOL_EVCNT_DEFINE(hhotref)
104 PDPOL_EVCNT_DEFINE(hhotunref)
105 PDPOL_EVCNT_DEFINE(hhotcold)
106 PDPOL_EVCNT_DEFINE(hhotcoldtest)
107 
108 PDPOL_EVCNT_DEFINE(hcoldtakeover)
109 PDPOL_EVCNT_DEFINE(hcoldref)
110 PDPOL_EVCNT_DEFINE(hcoldunref)
111 PDPOL_EVCNT_DEFINE(hcoldreftest)
112 PDPOL_EVCNT_DEFINE(hcoldunreftest)
113 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative)
114 PDPOL_EVCNT_DEFINE(hcoldhot)
115 
116 PDPOL_EVCNT_DEFINE(speculativeenqueue)
117 PDPOL_EVCNT_DEFINE(speculativehit1)
118 PDPOL_EVCNT_DEFINE(speculativehit2)
119 PDPOL_EVCNT_DEFINE(speculativemiss)
120 
121 PDPOL_EVCNT_DEFINE(locksuccess)
122 PDPOL_EVCNT_DEFINE(lockfail)
123 
124 #define	PQ_REFERENCED	0x000000010
125 #define	PQ_HOT		0x000000020
126 #define	PQ_TEST		0x000000040
127 #define	PQ_INITIALREF	0x000000080
128 #define	PQ_QMASK	0x000000700
129 #define	PQ_QFACTOR	0x000000100
130 #define	PQ_SPECULATIVE	0x000000800
131 
132 #define	CLOCKPRO_NOQUEUE	0
133 #define	CLOCKPRO_NEWQ		1	/* small queue to clear initial ref. */
134 #if defined(LISTQ)
135 #define	CLOCKPRO_COLDQ		2
136 #define	CLOCKPRO_HOTQ		3
137 #else /* defined(LISTQ) */
138 #define	CLOCKPRO_COLDQ		(2 + coldqidx)	/* XXX */
139 #define	CLOCKPRO_HOTQ		(3 - coldqidx)	/* XXX */
140 #endif /* defined(LISTQ) */
141 #define	CLOCKPRO_LISTQ		4
142 #define	CLOCKPRO_NQUEUE		4
143 
144 static bool	uvmpdpol_pagerealize_locked(struct vm_page *);
145 
146 static inline void
clockpro_setq(struct vm_page * pg,int qidx)147 clockpro_setq(struct vm_page *pg, int qidx)
148 {
149 	KASSERT(qidx >= CLOCKPRO_NOQUEUE);
150 	KASSERT(qidx <= CLOCKPRO_NQUEUE);
151 
152 	pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR);
153 }
154 
155 static inline int
clockpro_getq(struct vm_page * pg)156 clockpro_getq(struct vm_page *pg)
157 {
158 	int qidx;
159 
160 	qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR;
161 	KASSERT(qidx >= CLOCKPRO_NOQUEUE);
162 	KASSERT(qidx <= CLOCKPRO_NQUEUE);
163 	return qidx;
164 }
165 
166 typedef struct {
167 	struct pglist q_q;
168 	int q_len;
169 } pageq_t;
170 
171 struct clockpro_state {
172 	kmutex_t lock;
173 	int s_npages;
174 	int s_coldtarget;
175 	int s_ncold;
176 
177 	int s_newqlenmax;
178 	pageq_t s_q[CLOCKPRO_NQUEUE];
179 
180 	struct uvm_pctparam s_coldtargetpct;
181 };
182 
183 static pageq_t *
clockpro_queue(struct clockpro_state * s,int qidx)184 clockpro_queue(struct clockpro_state *s, int qidx)
185 {
186 
187 	KASSERT(CLOCKPRO_NOQUEUE < qidx);
188 	KASSERT(qidx <= CLOCKPRO_NQUEUE);
189 
190 	return &s->s_q[qidx - 1];
191 }
192 
193 #if !defined(LISTQ)
194 
195 static int coldqidx;
196 
197 static void
clockpro_switchqueue(void)198 clockpro_switchqueue(void)
199 {
200 
201 	coldqidx = 1 - coldqidx;
202 }
203 
204 #endif /* !defined(LISTQ) */
205 
206 static struct clockpro_state clockpro __cacheline_aligned;
207 static struct clockpro_scanstate {
208 	int ss_nscanned;
209 } scanstate;
210 
211 /* ---------------------------------------- */
212 
213 static void
pageq_init(pageq_t * q)214 pageq_init(pageq_t *q)
215 {
216 
217 	TAILQ_INIT(&q->q_q);
218 	q->q_len = 0;
219 }
220 
221 static int
pageq_len(const pageq_t * q)222 pageq_len(const pageq_t *q)
223 {
224 
225 	return q->q_len;
226 }
227 
228 static struct vm_page *
pageq_first(const pageq_t * q)229 pageq_first(const pageq_t *q)
230 {
231 
232 	return TAILQ_FIRST(&q->q_q);
233 }
234 
235 static void
pageq_insert_tail(pageq_t * q,struct vm_page * pg)236 pageq_insert_tail(pageq_t *q, struct vm_page *pg)
237 {
238 
239 	TAILQ_INSERT_TAIL(&q->q_q, pg, pdqueue);
240 	q->q_len++;
241 }
242 
243 #if defined(LISTQ)
244 static void
pageq_insert_head(pageq_t * q,struct vm_page * pg)245 pageq_insert_head(pageq_t *q, struct vm_page *pg)
246 {
247 
248 	TAILQ_INSERT_HEAD(&q->q_q, pg, pdqueue);
249 	q->q_len++;
250 }
251 #endif
252 
253 static void
pageq_remove(pageq_t * q,struct vm_page * pg)254 pageq_remove(pageq_t *q, struct vm_page *pg)
255 {
256 
257 #if 1
258 	KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q);
259 #endif
260 	KASSERT(q->q_len > 0);
261 	TAILQ_REMOVE(&q->q_q, pg, pdqueue);
262 	q->q_len--;
263 }
264 
265 static struct vm_page *
pageq_remove_head(pageq_t * q)266 pageq_remove_head(pageq_t *q)
267 {
268 	struct vm_page *pg;
269 
270 	pg = TAILQ_FIRST(&q->q_q);
271 	if (pg == NULL) {
272 		KASSERT(q->q_len == 0);
273 		return NULL;
274 	}
275 	pageq_remove(q, pg);
276 	return pg;
277 }
278 
279 /* ---------------------------------------- */
280 
281 static void
clockpro_insert_tail(struct clockpro_state * s,int qidx,struct vm_page * pg)282 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg)
283 {
284 	pageq_t *q = clockpro_queue(s, qidx);
285 
286 	clockpro_setq(pg, qidx);
287 	pageq_insert_tail(q, pg);
288 }
289 
290 #if defined(LISTQ)
291 static void
clockpro_insert_head(struct clockpro_state * s,int qidx,struct vm_page * pg)292 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg)
293 {
294 	pageq_t *q = clockpro_queue(s, qidx);
295 
296 	clockpro_setq(pg, qidx);
297 	pageq_insert_head(q, pg);
298 }
299 
300 #endif
301 /* ---------------------------------------- */
302 
303 typedef uint32_t nonres_cookie_t;
304 #define	NONRES_COOKIE_INVAL	0
305 
306 typedef uintptr_t objid_t;
307 
308 /*
309  * XXX maybe these hash functions need reconsideration,
310  * given that hash distribution is critical here.
311  */
312 
313 static uint32_t
pageidentityhash1(objid_t obj,off_t idx)314 pageidentityhash1(objid_t obj, off_t idx)
315 {
316 	uint32_t hash = HASH32_BUF_INIT;
317 
318 #if 1
319 	hash = hash32_buf(&idx, sizeof(idx), hash);
320 	hash = hash32_buf(&obj, sizeof(obj), hash);
321 #else
322 	hash = hash32_buf(&obj, sizeof(obj), hash);
323 	hash = hash32_buf(&idx, sizeof(idx), hash);
324 #endif
325 	return hash;
326 }
327 
328 static uint32_t
pageidentityhash2(objid_t obj,off_t idx)329 pageidentityhash2(objid_t obj, off_t idx)
330 {
331 	uint32_t hash = HASH32_BUF_INIT;
332 
333 	hash = hash32_buf(&obj, sizeof(obj), hash);
334 	hash = hash32_buf(&idx, sizeof(idx), hash);
335 	return hash;
336 }
337 
338 static nonres_cookie_t
calccookie(objid_t obj,off_t idx)339 calccookie(objid_t obj, off_t idx)
340 {
341 	uint32_t hash = pageidentityhash2(obj, idx);
342 	nonres_cookie_t cookie = hash;
343 
344 	if (__predict_false(cookie == NONRES_COOKIE_INVAL)) {
345 		cookie++; /* XXX */
346 	}
347 	return cookie;
348 }
349 
350 #define	BUCKETSIZE	14
351 struct bucket {
352 	int cycle;
353 	int cur;
354 	nonres_cookie_t pages[BUCKETSIZE];
355 };
356 static int cycle_target;
357 static int cycle_target_frac;
358 
359 static struct bucket static_bucket;
360 static struct bucket *buckets = &static_bucket;
361 static size_t hashsize = 1;
362 
363 static int coldadj;
364 #define	COLDTARGET_ADJ(d)	coldadj += (d)
365 
366 #if defined(PDSIM)
367 
368 static void *
clockpro_hashalloc(int n)369 clockpro_hashalloc(int n)
370 {
371 	size_t allocsz = sizeof(*buckets) * n;
372 
373 	return malloc(allocsz);
374 }
375 
376 static void
clockpro_hashfree(void * p,int n)377 clockpro_hashfree(void *p, int n)
378 {
379 
380 	free(p);
381 }
382 
383 #else /* defined(PDSIM) */
384 
385 static void *
clockpro_hashalloc(int n)386 clockpro_hashalloc(int n)
387 {
388 	size_t allocsz = round_page(sizeof(*buckets) * n);
389 
390 	return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED);
391 }
392 
393 static void
clockpro_hashfree(void * p,int n)394 clockpro_hashfree(void *p, int n)
395 {
396 	size_t allocsz = round_page(sizeof(*buckets) * n);
397 
398 	uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED);
399 }
400 
401 #endif /* defined(PDSIM) */
402 
403 static void
clockpro_hashinit(uint64_t n)404 clockpro_hashinit(uint64_t n)
405 {
406 	struct bucket *newbuckets;
407 	struct bucket *oldbuckets;
408 	size_t sz;
409 	size_t oldsz;
410 	int i;
411 
412 	sz = howmany(n, BUCKETSIZE);
413 	sz *= clockpro_hashfactor;
414 	newbuckets = clockpro_hashalloc(sz);
415 	if (newbuckets == NULL) {
416 		panic("%s: allocation failure", __func__);
417 	}
418 	for (i = 0; i < sz; i++) {
419 		struct bucket *b = &newbuckets[i];
420 		int j;
421 
422 		b->cycle = cycle_target;
423 		b->cur = 0;
424 		for (j = 0; j < BUCKETSIZE; j++) {
425 			b->pages[j] = NONRES_COOKIE_INVAL;
426 		}
427 	}
428 	/* XXX lock */
429 	oldbuckets = buckets;
430 	oldsz = hashsize;
431 	buckets = newbuckets;
432 	hashsize = sz;
433 	/* XXX unlock */
434 	if (oldbuckets != &static_bucket) {
435 		clockpro_hashfree(oldbuckets, oldsz);
436 	}
437 }
438 
439 static struct bucket *
nonresident_getbucket(objid_t obj,off_t idx)440 nonresident_getbucket(objid_t obj, off_t idx)
441 {
442 	uint32_t hash;
443 
444 	hash = pageidentityhash1(obj, idx);
445 	return &buckets[hash % hashsize];
446 }
447 
448 static void
nonresident_rotate(struct bucket * b)449 nonresident_rotate(struct bucket *b)
450 {
451 	const int target = cycle_target;
452 	const int cycle = b->cycle;
453 	int cur;
454 	int todo;
455 
456 	todo = target - cycle;
457 	if (todo >= BUCKETSIZE * 2) {
458 		todo = (todo % BUCKETSIZE) + BUCKETSIZE;
459 	}
460 	cur = b->cur;
461 	while (todo > 0) {
462 		if (b->pages[cur] != NONRES_COOKIE_INVAL) {
463 			PDPOL_EVCNT_INCR(nreshandhot);
464 			COLDTARGET_ADJ(-1);
465 		}
466 		b->pages[cur] = NONRES_COOKIE_INVAL;
467 		cur++;
468 		if (cur == BUCKETSIZE) {
469 			cur = 0;
470 		}
471 		todo--;
472 	}
473 	b->cycle = target;
474 	b->cur = cur;
475 }
476 
477 static bool
nonresident_lookupremove(objid_t obj,off_t idx)478 nonresident_lookupremove(objid_t obj, off_t idx)
479 {
480 	struct bucket *b = nonresident_getbucket(obj, idx);
481 	nonres_cookie_t cookie = calccookie(obj, idx);
482 	int i;
483 
484 	nonresident_rotate(b);
485 	for (i = 0; i < BUCKETSIZE; i++) {
486 		if (b->pages[i] == cookie) {
487 			b->pages[i] = NONRES_COOKIE_INVAL;
488 			return true;
489 		}
490 	}
491 	return false;
492 }
493 
494 static objid_t
pageobj(struct vm_page * pg)495 pageobj(struct vm_page *pg)
496 {
497 	const void *obj;
498 
499 	/*
500 	 * XXX object pointer is often freed and reused for unrelated object.
501 	 * for vnodes, it would be better to use something like
502 	 * a hash of fsid/fileid/generation.
503 	 */
504 
505 	obj = pg->uobject;
506 	if (obj == NULL) {
507 		obj = pg->uanon;
508 		KASSERT(obj != NULL);
509 	}
510 	return (objid_t)obj;
511 }
512 
513 static off_t
pageidx(struct vm_page * pg)514 pageidx(struct vm_page *pg)
515 {
516 
517 	KASSERT((pg->offset & PAGE_MASK) == 0);
518 	return pg->offset >> PAGE_SHIFT;
519 }
520 
521 static bool
nonresident_pagelookupremove(struct vm_page * pg)522 nonresident_pagelookupremove(struct vm_page *pg)
523 {
524 	bool found = nonresident_lookupremove(pageobj(pg), pageidx(pg));
525 
526 	if (pg->uobject) {
527 		PDPOL_EVCNT_INCR(nreslookupobj);
528 	} else {
529 		PDPOL_EVCNT_INCR(nreslookupanon);
530 	}
531 	if (found) {
532 		if (pg->uobject) {
533 			PDPOL_EVCNT_INCR(nresfoundobj);
534 		} else {
535 			PDPOL_EVCNT_INCR(nresfoundanon);
536 		}
537 	}
538 	return found;
539 }
540 
541 static void
nonresident_pagerecord(struct vm_page * pg)542 nonresident_pagerecord(struct vm_page *pg)
543 {
544 	objid_t obj = pageobj(pg);
545 	off_t idx = pageidx(pg);
546 	struct bucket *b = nonresident_getbucket(obj, idx);
547 	nonres_cookie_t cookie = calccookie(obj, idx);
548 
549 #if defined(DEBUG)
550 	int i;
551 
552 	for (i = 0; i < BUCKETSIZE; i++) {
553 		if (b->pages[i] == cookie) {
554 			PDPOL_EVCNT_INCR(nresconflict);
555 		}
556 	}
557 #endif /* defined(DEBUG) */
558 
559 	if (pg->uobject) {
560 		PDPOL_EVCNT_INCR(nresrecordobj);
561 	} else {
562 		PDPOL_EVCNT_INCR(nresrecordanon);
563 	}
564 	nonresident_rotate(b);
565 	if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
566 		PDPOL_EVCNT_INCR(nresoverwritten);
567 		COLDTARGET_ADJ(-1);
568 	}
569 	b->pages[b->cur] = cookie;
570 	b->cur = (b->cur + 1) % BUCKETSIZE;
571 }
572 
573 /* ---------------------------------------- */
574 
575 #if defined(CLOCKPRO_DEBUG)
576 static void
check_sanity(void)577 check_sanity(void)
578 {
579 }
580 #else /* defined(CLOCKPRO_DEBUG) */
581 #define	check_sanity()	/* nothing */
582 #endif /* defined(CLOCKPRO_DEBUG) */
583 
584 static void
clockpro_reinit(void)585 clockpro_reinit(void)
586 {
587 
588 	KASSERT(mutex_owned(&clockpro.lock));
589 
590 	clockpro_hashinit(uvmexp.npages);
591 }
592 
593 static void
clockpro_init(void)594 clockpro_init(void)
595 {
596 	struct clockpro_state *s = &clockpro;
597 	int i;
598 
599 	mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
600 	for (i = 0; i < CLOCKPRO_NQUEUE; i++) {
601 		pageq_init(&s->s_q[i]);
602 	}
603 	s->s_newqlenmax = 1;
604 	s->s_coldtarget = 1;
605 	uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL);
606 }
607 
608 static void
clockpro_tune(void)609 clockpro_tune(void)
610 {
611 	struct clockpro_state *s = &clockpro;
612 	int coldtarget;
613 
614 	KASSERT(mutex_owned(&s->lock));
615 
616 #if defined(ADAPTIVE)
617 	int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100;
618 	int coldmin = 1;
619 
620 	coldtarget = s->s_coldtarget;
621 	if (coldtarget + coldadj < coldmin) {
622 		coldadj = coldmin - coldtarget;
623 	} else if (coldtarget + coldadj > coldmax) {
624 		coldadj = coldmax - coldtarget;
625 	}
626 	coldtarget += coldadj;
627 #else /* defined(ADAPTIVE) */
628 	coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages);
629 	if (coldtarget < 1) {
630 		coldtarget = 1;
631 	}
632 #endif /* defined(ADAPTIVE) */
633 
634 	s->s_coldtarget = coldtarget;
635 	s->s_newqlenmax = coldtarget / 4;
636 	if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) {
637 		s->s_newqlenmax = CLOCKPRO_NEWQMIN;
638 	}
639 }
640 
641 static void
clockpro_movereferencebit(struct vm_page * pg,bool locked)642 clockpro_movereferencebit(struct vm_page *pg, bool locked)
643 {
644 	kmutex_t *lock;
645 	bool referenced;
646 
647 	KASSERT(mutex_owned(&clockpro.lock));
648 	KASSERT(!locked || uvm_page_owner_locked_p(pg, false));
649 	if (!locked) {
650 		/*
651 		 * acquire interlock to stabilize page identity.
652 		 * if we have caught the page in a state of flux
653 		 * and it should be dequeued, abort.  it will be
654 		 * dequeued later.
655 		 */
656 		mutex_enter(&pg->interlock);
657 	        if ((pg->uobject == NULL && pg->uanon == NULL) ||
658 	            pg->wire_count > 0) {
659 	            	mutex_exit(&pg->interlock);
660 			PDPOL_EVCNT_INCR(lockfail);
661 			return;
662 		}
663 		mutex_exit(&clockpro.lock);	/* XXX */
664 		lock = uvmpd_trylockowner(pg);
665 		/* pg->interlock now dropped */
666 		mutex_enter(&clockpro.lock);	/* XXX */
667 		if (lock == NULL) {
668 			/*
669 			 * XXXuvmplock
670 			 */
671 			PDPOL_EVCNT_INCR(lockfail);
672 			return;
673 		}
674 		PDPOL_EVCNT_INCR(locksuccess);
675 	}
676 	referenced = pmap_clear_reference(pg);
677 	if (!locked) {
678 		mutex_exit(lock);
679 	}
680 	if (referenced) {
681 		pg->pqflags |= PQ_REFERENCED;
682 	}
683 }
684 
685 static void
clockpro_clearreferencebit(struct vm_page * pg,bool locked)686 clockpro_clearreferencebit(struct vm_page *pg, bool locked)
687 {
688 
689 	KASSERT(mutex_owned(&clockpro.lock));
690 
691 	clockpro_movereferencebit(pg, locked);
692 	pg->pqflags &= ~PQ_REFERENCED;
693 }
694 
695 static void
clockpro___newqrotate(int len)696 clockpro___newqrotate(int len)
697 {
698 	struct clockpro_state * const s = &clockpro;
699 	pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ);
700 	struct vm_page *pg;
701 
702 	KASSERT(mutex_owned(&s->lock));
703 
704 	while (pageq_len(newq) > len) {
705 		pg = pageq_remove_head(newq);
706 		KASSERT(pg != NULL);
707 		KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ);
708 		if ((pg->pqflags & PQ_INITIALREF) != 0) {
709 			clockpro_clearreferencebit(pg, false);
710 			pg->pqflags &= ~PQ_INITIALREF;
711 		}
712 		/* place at the list head */
713 		clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
714 	}
715 }
716 
717 static void
clockpro_newqrotate(void)718 clockpro_newqrotate(void)
719 {
720 	struct clockpro_state * const s = &clockpro;
721 
722 	KASSERT(mutex_owned(&s->lock));
723 
724 	check_sanity();
725 	clockpro___newqrotate(s->s_newqlenmax);
726 	check_sanity();
727 }
728 
729 static void
clockpro_newqflush(int n)730 clockpro_newqflush(int n)
731 {
732 
733 	KASSERT(mutex_owned(&clockpro.lock));
734 
735 	check_sanity();
736 	clockpro___newqrotate(n);
737 	check_sanity();
738 }
739 
740 static void
clockpro_newqflushone(void)741 clockpro_newqflushone(void)
742 {
743 	struct clockpro_state * const s = &clockpro;
744 
745 	KASSERT(mutex_owned(&s->lock));
746 
747 	clockpro_newqflush(
748 	    MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0));
749 }
750 
751 /*
752  * our "tail" is called "list-head" in the paper.
753  */
754 
755 static void
clockpro___enqueuetail(struct vm_page * pg)756 clockpro___enqueuetail(struct vm_page *pg)
757 {
758 	struct clockpro_state * const s = &clockpro;
759 
760 	KASSERT(mutex_owned(&s->lock));
761 	KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
762 
763 	check_sanity();
764 #if !defined(USEONCE2)
765 	clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg);
766 	clockpro_newqrotate();
767 #else /* !defined(USEONCE2) */
768 #if defined(LISTQ)
769 	KASSERT((pg->pqflags & PQ_REFERENCED) == 0);
770 #endif /* defined(LISTQ) */
771 	clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
772 #endif /* !defined(USEONCE2) */
773 	check_sanity();
774 }
775 
776 static void
clockpro_pageenqueue(struct vm_page * pg)777 clockpro_pageenqueue(struct vm_page *pg)
778 {
779 	struct clockpro_state * const s = &clockpro;
780 	bool hot;
781 	bool speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
782 
783 	KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
784 	KASSERT(mutex_owned(&s->lock));
785 	check_sanity();
786 	KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
787 	s->s_npages++;
788 	pg->pqflags &= ~(PQ_HOT|PQ_TEST);
789 	if (speculative) {
790 		hot = false;
791 		PDPOL_EVCNT_INCR(speculativeenqueue);
792 	} else {
793 		hot = nonresident_pagelookupremove(pg);
794 		if (hot) {
795 			COLDTARGET_ADJ(1);
796 		}
797 	}
798 
799 	/*
800 	 * consider mmap'ed file:
801 	 *
802 	 * - read-ahead enqueues a page.
803 	 *
804 	 * - on the following read-ahead hit, the fault handler activates it.
805 	 *
806 	 * - finally, the userland code which caused the above fault
807 	 *   actually accesses the page.  it makes its reference bit set.
808 	 *
809 	 * we want to count the above as a single access, rather than
810 	 * three accesses with short reuse distances.
811 	 */
812 
813 #if defined(USEONCE2)
814 	pg->pqflags &= ~PQ_INITIALREF;
815 	if (hot) {
816 		pg->pqflags |= PQ_TEST;
817 	}
818 	s->s_ncold++;
819 	clockpro_clearreferencebit(pg, false);
820 	clockpro___enqueuetail(pg);
821 #else /* defined(USEONCE2) */
822 	if (speculative) {
823 		s->s_ncold++;
824 	} else if (hot) {
825 		pg->pqflags |= PQ_HOT;
826 	} else {
827 		pg->pqflags |= PQ_TEST;
828 		s->s_ncold++;
829 	}
830 	clockpro___enqueuetail(pg);
831 #endif /* defined(USEONCE2) */
832 	KASSERT(s->s_ncold <= s->s_npages);
833 }
834 
835 static pageq_t *
clockpro_pagequeue(struct vm_page * pg)836 clockpro_pagequeue(struct vm_page *pg)
837 {
838 	struct clockpro_state * const s = &clockpro;
839 	int qidx;
840 
841 	KASSERT(mutex_owned(&s->lock));
842 
843 	qidx = clockpro_getq(pg);
844 	KASSERT(qidx != CLOCKPRO_NOQUEUE);
845 
846 	return clockpro_queue(s, qidx);
847 }
848 
849 static void
clockpro_pagedequeue(struct vm_page * pg)850 clockpro_pagedequeue(struct vm_page *pg)
851 {
852 	struct clockpro_state * const s = &clockpro;
853 	pageq_t *q;
854 
855 	KASSERT(mutex_owned(&s->lock));
856 
857 	KASSERT(s->s_npages > 0);
858 	check_sanity();
859 	q = clockpro_pagequeue(pg);
860 	pageq_remove(q, pg);
861 	check_sanity();
862 	clockpro_setq(pg, CLOCKPRO_NOQUEUE);
863 	if ((pg->pqflags & PQ_HOT) == 0) {
864 		KASSERT(s->s_ncold > 0);
865 		s->s_ncold--;
866 	}
867 	KASSERT(s->s_npages > 0);
868 	s->s_npages--;
869 	check_sanity();
870 }
871 
872 static void
clockpro_pagerequeue(struct vm_page * pg)873 clockpro_pagerequeue(struct vm_page *pg)
874 {
875 	struct clockpro_state * const s = &clockpro;
876 	int qidx;
877 
878 	KASSERT(mutex_owned(&s->lock));
879 
880 	qidx = clockpro_getq(pg);
881 	KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ);
882 	pageq_remove(clockpro_queue(s, qidx), pg);
883 	check_sanity();
884 	clockpro_setq(pg, CLOCKPRO_NOQUEUE);
885 
886 	clockpro___enqueuetail(pg);
887 }
888 
889 static void
handhot_endtest(struct vm_page * pg)890 handhot_endtest(struct vm_page *pg)
891 {
892 
893 	KASSERT(mutex_owned(&clockpro.lock));
894 
895 	KASSERT((pg->pqflags & PQ_HOT) == 0);
896 	if ((pg->pqflags & PQ_TEST) != 0) {
897 		PDPOL_EVCNT_INCR(hhotcoldtest);
898 		COLDTARGET_ADJ(-1);
899 		pg->pqflags &= ~PQ_TEST;
900 	} else {
901 		PDPOL_EVCNT_INCR(hhotcold);
902 	}
903 }
904 
905 static void
handhot_advance(void)906 handhot_advance(void)
907 {
908 	struct clockpro_state * const s = &clockpro;
909 	struct vm_page *pg;
910 	pageq_t *hotq;
911 	int hotqlen;
912 
913 	KASSERT(mutex_owned(&s->lock));
914 
915 	clockpro_tune();
916 
917 	dump("hot called");
918 	if (s->s_ncold >= s->s_coldtarget) {
919 		return;
920 	}
921 	hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
922 again:
923 	pg = pageq_first(hotq);
924 	if (pg == NULL) {
925 		DPRINTF("%s: HHOT TAKEOVER\n", __func__);
926 		dump("hhottakeover");
927 		PDPOL_EVCNT_INCR(hhottakeover);
928 #if defined(LISTQ)
929 		while (/* CONSTCOND */ 1) {
930 			pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
931 
932 			pg = pageq_first(coldq);
933 			if (pg == NULL) {
934 				clockpro_newqflushone();
935 				pg = pageq_first(coldq);
936 				if (pg == NULL) {
937 					WARN("hhot: no page?\n");
938 					return;
939 				}
940 			}
941 			KASSERT(clockpro_pagequeue(pg) == coldq);
942 			pageq_remove(coldq, pg);
943 			check_sanity();
944 			if ((pg->pqflags & PQ_HOT) == 0) {
945 				handhot_endtest(pg);
946 				clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg);
947 			} else {
948 				clockpro_insert_head(s, CLOCKPRO_HOTQ, pg);
949 				break;
950 			}
951 		}
952 #else /* defined(LISTQ) */
953 		clockpro_newqflush(0); /* XXX XXX */
954 		clockpro_switchqueue();
955 		hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
956 		goto again;
957 #endif /* defined(LISTQ) */
958 	}
959 
960 	KASSERT(clockpro_pagequeue(pg) == hotq);
961 
962 	/*
963 	 * terminate test period of nonresident pages by cycling them.
964 	 */
965 
966 	cycle_target_frac += BUCKETSIZE;
967 	hotqlen = pageq_len(hotq);
968 	while (cycle_target_frac >= hotqlen) {
969 		cycle_target++;
970 		cycle_target_frac -= hotqlen;
971 	}
972 
973 	if ((pg->pqflags & PQ_HOT) == 0) {
974 #if defined(LISTQ)
975 		panic("cold page in hotq: %p", pg);
976 #else /* defined(LISTQ) */
977 		handhot_endtest(pg);
978 		goto next;
979 #endif /* defined(LISTQ) */
980 	}
981 	KASSERT((pg->pqflags & PQ_TEST) == 0);
982 	KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
983 	KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
984 
985 	/*
986 	 * once we met our target,
987 	 * stop at a hot page so that no cold pages in test period
988 	 * have larger recency than any hot pages.
989 	 */
990 
991 	if (s->s_ncold >= s->s_coldtarget) {
992 		dump("hot done");
993 		return;
994 	}
995 	clockpro_movereferencebit(pg, false);
996 	if ((pg->pqflags & PQ_REFERENCED) == 0) {
997 		PDPOL_EVCNT_INCR(hhotunref);
998 		uvmexp.pddeact++;
999 		pg->pqflags &= ~PQ_HOT;
1000 		clockpro.s_ncold++;
1001 		KASSERT(s->s_ncold <= s->s_npages);
1002 	} else {
1003 		PDPOL_EVCNT_INCR(hhotref);
1004 	}
1005 	pg->pqflags &= ~PQ_REFERENCED;
1006 #if !defined(LISTQ)
1007 next:
1008 #endif /* !defined(LISTQ) */
1009 	clockpro_pagerequeue(pg);
1010 	dump("hot");
1011 	goto again;
1012 }
1013 
1014 static struct vm_page *
handcold_advance(void)1015 handcold_advance(void)
1016 {
1017 	struct clockpro_state * const s = &clockpro;
1018 	struct vm_page *pg;
1019 
1020 	KASSERT(mutex_owned(&s->lock));
1021 
1022 	for (;;) {
1023 #if defined(LISTQ)
1024 		pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ);
1025 #endif /* defined(LISTQ) */
1026 		pageq_t *coldq;
1027 
1028 		clockpro_newqrotate();
1029 		handhot_advance();
1030 #if defined(LISTQ)
1031 		pg = pageq_first(listq);
1032 		if (pg != NULL) {
1033 			KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ);
1034 			KASSERT((pg->pqflags & PQ_TEST) == 0);
1035 			KASSERT((pg->pqflags & PQ_HOT) == 0);
1036 			KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
1037 			pageq_remove(listq, pg);
1038 			check_sanity();
1039 			clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */
1040 			goto gotcold;
1041 		}
1042 #endif /* defined(LISTQ) */
1043 		check_sanity();
1044 		coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
1045 		pg = pageq_first(coldq);
1046 		if (pg == NULL) {
1047 			clockpro_newqflushone();
1048 			pg = pageq_first(coldq);
1049 		}
1050 		if (pg == NULL) {
1051 			DPRINTF("%s: HCOLD TAKEOVER\n", __func__);
1052 			dump("hcoldtakeover");
1053 			PDPOL_EVCNT_INCR(hcoldtakeover);
1054 			KASSERT(
1055 			    pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0);
1056 #if defined(LISTQ)
1057 			KASSERT(
1058 			    pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0);
1059 #else /* defined(LISTQ) */
1060 			clockpro_switchqueue();
1061 			coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
1062 			pg = pageq_first(coldq);
1063 #endif /* defined(LISTQ) */
1064 		}
1065 		if (pg == NULL) {
1066 			WARN("hcold: no page?\n");
1067 			return NULL;
1068 		}
1069 		KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
1070 		if ((pg->pqflags & PQ_HOT) != 0) {
1071 			PDPOL_EVCNT_INCR(hcoldhot);
1072 			pageq_remove(coldq, pg);
1073 			clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg);
1074 			check_sanity();
1075 			KASSERT((pg->pqflags & PQ_TEST) == 0);
1076 			uvmexp.pdscans++;
1077 			continue;
1078 		}
1079 #if defined(LISTQ)
1080 gotcold:
1081 #endif /* defined(LISTQ) */
1082 		KASSERT((pg->pqflags & PQ_HOT) == 0);
1083 		uvmexp.pdscans++;
1084 		clockpro_movereferencebit(pg, false);
1085 		if ((pg->pqflags & PQ_SPECULATIVE) != 0) {
1086 			KASSERT((pg->pqflags & PQ_TEST) == 0);
1087 			if ((pg->pqflags & PQ_REFERENCED) != 0) {
1088 				PDPOL_EVCNT_INCR(speculativehit2);
1089 				pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED);
1090 				clockpro_pagedequeue(pg);
1091 				clockpro_pageenqueue(pg);
1092 				continue;
1093 			}
1094 			PDPOL_EVCNT_INCR(speculativemiss);
1095 		}
1096 		switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) {
1097 		case PQ_TEST:
1098 			PDPOL_EVCNT_INCR(hcoldunreftest);
1099 			nonresident_pagerecord(pg);
1100 			goto gotit;
1101 		case 0:
1102 			PDPOL_EVCNT_INCR(hcoldunref);
1103 gotit:
1104 			KASSERT(s->s_ncold > 0);
1105 			clockpro_pagerequeue(pg); /* XXX */
1106 			dump("cold done");
1107 			/* XXX "pg" is still in queue */
1108 			handhot_advance();
1109 			goto done;
1110 
1111 		case PQ_REFERENCED|PQ_TEST:
1112 			PDPOL_EVCNT_INCR(hcoldreftest);
1113 			s->s_ncold--;
1114 			COLDTARGET_ADJ(1);
1115 			pg->pqflags |= PQ_HOT;
1116 			pg->pqflags &= ~PQ_TEST;
1117 			break;
1118 
1119 		case PQ_REFERENCED:
1120 			PDPOL_EVCNT_INCR(hcoldref);
1121 			pg->pqflags |= PQ_TEST;
1122 			break;
1123 		}
1124 		pg->pqflags &= ~PQ_REFERENCED;
1125 		uvmexp.pdreact++;
1126 		/* move to the list head */
1127 		clockpro_pagerequeue(pg);
1128 		dump("cold");
1129 	}
1130 done:;
1131 	return pg;
1132 }
1133 
1134 static void
uvmpdpol_pageactivate_locked(struct vm_page * pg)1135 uvmpdpol_pageactivate_locked(struct vm_page *pg)
1136 {
1137 
1138 	if (!uvmpdpol_pageisqueued_p(pg)) {
1139 		KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
1140 		pg->pqflags |= PQ_INITIALREF;
1141 		clockpro_pageenqueue(pg);
1142 	} else if ((pg->pqflags & PQ_SPECULATIVE)) {
1143 		PDPOL_EVCNT_INCR(speculativehit1);
1144 		pg->pqflags &= ~PQ_SPECULATIVE;
1145 		pg->pqflags |= PQ_INITIALREF;
1146 		clockpro_pagedequeue(pg);
1147 		clockpro_pageenqueue(pg);
1148 	}
1149 	pg->pqflags |= PQ_REFERENCED;
1150 }
1151 
1152 void
uvmpdpol_pageactivate(struct vm_page * pg)1153 uvmpdpol_pageactivate(struct vm_page *pg)
1154 {
1155 
1156 	uvmpdpol_set_intent(pg, PQ_INTENT_A);
1157 }
1158 
1159 static void
uvmpdpol_pagedeactivate_locked(struct vm_page * pg)1160 uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
1161 {
1162 
1163 	clockpro_clearreferencebit(pg, true);
1164 }
1165 
1166 void
uvmpdpol_pagedeactivate(struct vm_page * pg)1167 uvmpdpol_pagedeactivate(struct vm_page *pg)
1168 {
1169 
1170 	uvmpdpol_set_intent(pg, PQ_INTENT_I);
1171 }
1172 
1173 static void
uvmpdpol_pagedequeue_locked(struct vm_page * pg)1174 uvmpdpol_pagedequeue_locked(struct vm_page *pg)
1175 {
1176 
1177 	if (!uvmpdpol_pageisqueued_p(pg)) {
1178 		return;
1179 	}
1180 	clockpro_pagedequeue(pg);
1181 	pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE);
1182 }
1183 
1184 void
uvmpdpol_pagedequeue(struct vm_page * pg)1185 uvmpdpol_pagedequeue(struct vm_page *pg)
1186 {
1187 
1188 	uvmpdpol_set_intent(pg, PQ_INTENT_D);
1189 }
1190 
1191 static void
uvmpdpol_pageenqueue_locked(struct vm_page * pg)1192 uvmpdpol_pageenqueue_locked(struct vm_page *pg)
1193 {
1194 
1195 #if 1
1196 	if (uvmpdpol_pageisqueued_p(pg)) {
1197 		return;
1198 	}
1199 	clockpro_clearreferencebit(pg, true);
1200 	pg->pqflags |= PQ_SPECULATIVE;
1201 	clockpro_pageenqueue(pg);
1202 #else
1203 	uvmpdpol_pageactivate_locked(pg);
1204 #endif
1205 }
1206 
1207 void
uvmpdpol_pageenqueue(struct vm_page * pg)1208 uvmpdpol_pageenqueue(struct vm_page *pg)
1209 {
1210 
1211 	uvmpdpol_set_intent(pg, PQ_INTENT_D);
1212 }
1213 
1214 static bool
uvmpdpol_pagerealize_locked(struct vm_page * pg)1215 uvmpdpol_pagerealize_locked(struct vm_page *pg)
1216 {
1217 	uint32_t pqflags;
1218 
1219 	KASSERT(mutex_owned(&clockpro.lock));
1220 	KASSERT(mutex_owned(&pg->interlock));
1221 
1222 	/* XXX this needs to be called from elsewhere, like uvmpdpol_clock. */
1223 
1224 	pqflags = pg->pqflags;
1225 	pq->pqflags &= ~(PQ_INTENT_SET | PQ_INTENT_QUEUED);
1226 	switch (pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
1227 	case PQ_INTENT_A | PQ_INTENT_SET:
1228 		uvmpdpol_pageactivate_locked(pg);
1229 		return true;
1230 	case PQ_INTENT_E | PQ_INTENT_SET:
1231 		uvmpdpol_pageenqueue_locked(pg);
1232 		return true;
1233 	case PQ_INTENT_I | PQ_INTENT_SET:
1234 		uvmpdpol_pagedeactivate_locked(pg);
1235 		return true;
1236 	case PQ_INTENT_D | PQ_INTENT_SET:
1237 		uvmpdpol_pagedequeue_locked(pg);
1238 		return true;
1239 	default:
1240 		return false;
1241 	}
1242 }
1243 
1244 void
uvmpdpol_pagerealize(struct vm_page * pg)1245 uvmpdpol_pagerealize(struct vm_page *pg)
1246 {
1247 	struct clockpro_state * const s = &clockpro;
1248 
1249 	mutex_enter(&s->lock);
1250 	uvmpdpol_pagerealize_locked(pg);
1251 	mutex_exit(&s->lock);
1252 }
1253 
1254 void
uvmpdpol_anfree(struct vm_anon * an)1255 uvmpdpol_anfree(struct vm_anon *an)
1256 {
1257 	struct clockpro_state * const s = &clockpro;
1258 
1259 	KASSERT(an->an_page == NULL);
1260 	mutex_enter(&s->lock);
1261 	if (nonresident_lookupremove((objid_t)an, 0)) {
1262 		PDPOL_EVCNT_INCR(nresanonfree);
1263 	}
1264 	mutex_exit(&s->lock);
1265 }
1266 
1267 void
uvmpdpol_init(void)1268 uvmpdpol_init(void)
1269 {
1270 
1271 	clockpro_init();
1272 }
1273 
1274 void
uvmpdpol_reinit(void)1275 uvmpdpol_reinit(void)
1276 {
1277 	struct clockpro_state * const s = &clockpro;
1278 
1279 	mutex_enter(&s->lock);
1280 	clockpro_reinit();
1281 	mutex_exit(&s->lock);
1282 }
1283 
1284 void
uvmpdpol_estimatepageable(int * active,int * inactive)1285 uvmpdpol_estimatepageable(int *active, int *inactive)
1286 {
1287 	struct clockpro_state * const s = &clockpro;
1288 
1289 	/*
1290 	 * Don't take any locks here.  This can be called from DDB, and in
1291 	 * any case the numbers are stale the instant the lock is dropped,
1292 	 * so it just doesn't matter.
1293 	 */
1294 	if (active) {
1295 		*active = s->s_npages - s->s_ncold;
1296 	}
1297 	if (inactive) {
1298 		*inactive = s->s_ncold;
1299 	}
1300 }
1301 
1302 bool
uvmpdpol_pageisqueued_p(struct vm_page * pg)1303 uvmpdpol_pageisqueued_p(struct vm_page *pg)
1304 {
1305 
1306 	/* Unlocked check OK due to page lifecycle. */
1307 	return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
1308 }
1309 
1310 bool
uvmpdpol_pageactivate_p(struct vm_page * pg)1311 uvmpdpol_pageactivate_p(struct vm_page *pg)
1312 {
1313 
1314 	/* For now, no heuristic, always receive activations. */
1315 	return true;
1316 }
1317 
1318 void
uvmpdpol_scaninit(void)1319 uvmpdpol_scaninit(void)
1320 {
1321 	struct clockpro_state * const s = &clockpro;
1322 	struct clockpro_scanstate * const ss = &scanstate;
1323 
1324 	mutex_enter(&s->lock);
1325 	ss->ss_nscanned = 0;
1326 	mutex_exit(&s->lock);
1327 }
1328 
1329 void
uvmpdpol_scanfini(void)1330 uvmpdpol_scanfini(void)
1331 {
1332 
1333 }
1334 
1335 struct vm_page *
uvmpdpol_selectvictim(kmutex_t ** plock)1336 uvmpdpol_selectvictim(kmutex_t **plock)
1337 {
1338 	struct clockpro_state * const s = &clockpro;
1339 	struct clockpro_scanstate * const ss = &scanstate;
1340 	struct vm_page *pg;
1341 	kmutex_t *lock = NULL;
1342 
1343 	do {
1344 		mutex_enter(&s->lock);
1345 		if (ss->ss_nscanned > s->s_npages) {
1346 			DPRINTF("scan too much\n");
1347 			mutex_exit(&s->lock);
1348 			return NULL;
1349 		}
1350 		pg = handcold_advance();
1351 		if (pg == NULL) {
1352 			mutex_exit(&s->lock);
1353 			break;
1354 		}
1355 		ss->ss_nscanned++;
1356 		/*
1357 		 * acquire interlock to stabilize page identity.
1358 		 * if we have caught the page in a state of flux
1359 		 * and it should be dequeued, do it now and then
1360 		 * move on to the next.
1361 		 */
1362 		mutex_enter(&pg->interlock);
1363 	        if ((pg->uobject == NULL && pg->uanon == NULL) ||
1364 	            pg->wire_count > 0) {
1365 	            	mutex_exit(&pg->interlock);
1366 			clockpro_pagedequeue(pg);
1367 			pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE);
1368 	            	continue;
1369 		}
1370 		mutex_exit(&s->lock);
1371 		lock = uvmpd_trylockowner(pg);
1372 		/* pg->interlock now dropped */
1373 	} while (lock == NULL);
1374 	*plock = lock;
1375 	return pg;
1376 }
1377 
1378 static void
clockpro_dropswap(pageq_t * q,int * todo)1379 clockpro_dropswap(pageq_t *q, int *todo)
1380 {
1381 	struct vm_page *pg;
1382 	kmutex_t *lock;
1383 
1384 	KASSERT(mutex_owned(&clockpro.lock));
1385 
1386 	TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pdqueue) {
1387 		if (*todo <= 0) {
1388 			break;
1389 		}
1390 		if ((pg->pqflags & PQ_HOT) == 0) {
1391 			continue;
1392 		}
1393 		mutex_enter(&pg->interlock);
1394 		if ((pg->flags & PG_SWAPBACKED) == 0) {
1395 			mutex_exit(&pg->interlock);
1396 			continue;
1397 		}
1398 
1399 		/*
1400 		 * try to lock the object that owns the page.
1401 	         */
1402 	        mutex_exit(&clockpro.lock);
1403         	lock = uvmpd_trylockowner(pg);
1404         	/* pg->interlock now released */
1405         	mutex_enter(&clockpro.lock);
1406 		if (lock == NULL) {
1407 			/* didn't get it - try the next page. */
1408 			/* XXXAD lost position in queue */
1409 			continue;
1410 		}
1411 
1412 		/*
1413 		 * if there's a shortage of swap slots, try to free it.
1414 		 */
1415 		if ((pg->flags & PG_SWAPBACKED) != 0 &&
1416 		    (pg->flags & PG_BUSY) == 0) {
1417 			if (uvmpd_dropswap(pg)) {
1418 				(*todo)--;
1419 			}
1420 		}
1421 		mutex_exit(lock);
1422 	}
1423 }
1424 
1425 void
uvmpdpol_balancequeue(int swap_shortage)1426 uvmpdpol_balancequeue(int swap_shortage)
1427 {
1428 	struct clockpro_state * const s = &clockpro;
1429 	int todo = swap_shortage;
1430 
1431 	if (todo == 0) {
1432 		return;
1433 	}
1434 
1435 	/*
1436 	 * reclaim swap slots from hot pages
1437 	 */
1438 
1439 	DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage);
1440 
1441 	mutex_enter(&s->lock);
1442 	clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo);
1443 	clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo);
1444 	clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo);
1445 	mutex_exit(&s->lock);
1446 
1447 	DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo);
1448 }
1449 
1450 bool
uvmpdpol_needsscan_p(void)1451 uvmpdpol_needsscan_p(void)
1452 {
1453 	struct clockpro_state * const s = &clockpro;
1454 
1455 	/* This must be an unlocked check: can be called from interrupt. */
1456 	return s->s_ncold < s->s_coldtarget;
1457 }
1458 
1459 void
uvmpdpol_tune(void)1460 uvmpdpol_tune(void)
1461 {
1462 	struct clockpro_state * const s = &clockpro;
1463 
1464 	mutex_enter(&s->lock);
1465 	clockpro_tune();
1466 	mutex_exit(&s->lock);
1467 }
1468 
1469 void
uvmpdpol_idle(void)1470 uvmpdpol_idle(void)
1471 {
1472 
1473 }
1474 
1475 #if !defined(PDSIM)
1476 
1477 #include <sys/sysctl.h>	/* XXX SYSCTL_DESCR */
1478 
1479 void
uvmpdpol_sysctlsetup(void)1480 uvmpdpol_sysctlsetup(void)
1481 {
1482 #if !defined(ADAPTIVE)
1483 	struct clockpro_state * const s = &clockpro;
1484 
1485 	uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct",
1486 	    SYSCTL_DESCR("Percentage cold target queue of the entire queue"));
1487 #endif /* !defined(ADAPTIVE) */
1488 }
1489 
1490 #endif /* !defined(PDSIM) */
1491 
1492 #if defined(DDB)
1493 
1494 #if 0 /* XXXuvmplock */
1495 #define	_pmap_is_referenced(pg)	pmap_is_referenced(pg)
1496 #else
1497 #define	_pmap_is_referenced(pg)	false
1498 #endif
1499 
1500 void clockpro_dump(void);
1501 
1502 void
clockpro_dump(void)1503 clockpro_dump(void)
1504 {
1505 	struct clockpro_state * const s = &clockpro;
1506 
1507 	struct vm_page *pg;
1508 	int ncold, nhot, ntest, nspeculative, ninitialref, nref;
1509 	int newqlen, coldqlen, hotqlen, listqlen;
1510 
1511 	newqlen = coldqlen = hotqlen = listqlen = 0;
1512 	printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n",
1513 	    s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax);
1514 
1515 #define	INITCOUNT()	\
1516 	ncold = nhot = ntest = nspeculative = ninitialref = nref = 0
1517 
1518 #define	COUNT(pg)	\
1519 	if ((pg->pqflags & PQ_HOT) != 0) { \
1520 		nhot++; \
1521 	} else { \
1522 		ncold++; \
1523 		if ((pg->pqflags & PQ_TEST) != 0) { \
1524 			ntest++; \
1525 		} \
1526 		if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \
1527 			nspeculative++; \
1528 		} \
1529 		if ((pg->pqflags & PQ_INITIALREF) != 0) { \
1530 			ninitialref++; \
1531 		} else if ((pg->pqflags & PQ_REFERENCED) != 0 || \
1532 		    _pmap_is_referenced(pg)) { \
1533 			nref++; \
1534 		} \
1535 	}
1536 
1537 #define	PRINTCOUNT(name)	\
1538 	printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \
1539 	    "nref=%d\n", \
1540 	    (name), nhot, ncold, ntest, nspeculative, ninitialref, nref)
1541 
1542 	INITCOUNT();
1543 	TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pdqueue) {
1544 		if (clockpro_getq(pg) != CLOCKPRO_NEWQ) {
1545 			printf("newq corrupt %p\n", pg);
1546 		}
1547 		COUNT(pg)
1548 		newqlen++;
1549 	}
1550 	PRINTCOUNT("newq");
1551 
1552 	INITCOUNT();
1553 	TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pdqueue) {
1554 		if (clockpro_getq(pg) != CLOCKPRO_COLDQ) {
1555 			printf("coldq corrupt %p\n", pg);
1556 		}
1557 		COUNT(pg)
1558 		coldqlen++;
1559 	}
1560 	PRINTCOUNT("coldq");
1561 
1562 	INITCOUNT();
1563 	TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pdqueue) {
1564 		if (clockpro_getq(pg) != CLOCKPRO_HOTQ) {
1565 			printf("hotq corrupt %p\n", pg);
1566 		}
1567 #if defined(LISTQ)
1568 		if ((pg->pqflags & PQ_HOT) == 0) {
1569 			printf("cold page in hotq: %p\n", pg);
1570 		}
1571 #endif /* defined(LISTQ) */
1572 		COUNT(pg)
1573 		hotqlen++;
1574 	}
1575 	PRINTCOUNT("hotq");
1576 
1577 	INITCOUNT();
1578 	TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pdqueue) {
1579 #if !defined(LISTQ)
1580 		printf("listq %p\n", pg);
1581 #endif /* !defined(LISTQ) */
1582 		if (clockpro_getq(pg) != CLOCKPRO_LISTQ) {
1583 			printf("listq corrupt %p\n", pg);
1584 		}
1585 		COUNT(pg)
1586 		listqlen++;
1587 	}
1588 	PRINTCOUNT("listq");
1589 
1590 	printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n",
1591 	    newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)),
1592 	    coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)),
1593 	    hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)),
1594 	    listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ)));
1595 }
1596 
1597 #endif /* defined(DDB) */
1598 
1599 #if defined(PDSIM)
1600 #if defined(DEBUG)
1601 static void
pdsim_dumpq(int qidx)1602 pdsim_dumpq(int qidx)
1603 {
1604 	struct clockpro_state * const s = &clockpro;
1605 	pageq_t *q = clockpro_queue(s, qidx);
1606 	struct vm_page *pg;
1607 
1608 	TAILQ_FOREACH(pg, &q->q_q, pdqueue) {
1609 		DPRINTF(" %" PRIu64 "%s%s%s%s%s%s",
1610 		    pg->offset >> PAGE_SHIFT,
1611 		    (pg->pqflags & PQ_HOT) ? "H" : "",
1612 		    (pg->pqflags & PQ_TEST) ? "T" : "",
1613 		    (pg->pqflags & PQ_REFERENCED) ? "R" : "",
1614 		    _pmap_is_referenced(pg) ? "r" : "",
1615 		    (pg->pqflags & PQ_INITIALREF) ? "I" : "",
1616 		    (pg->pqflags & PQ_SPECULATIVE) ? "S" : ""
1617 		    );
1618 	}
1619 }
1620 #endif /* defined(DEBUG) */
1621 
1622 void
pdsim_dump(const char * id)1623 pdsim_dump(const char *id)
1624 {
1625 #if defined(DEBUG)
1626 	struct clockpro_state * const s = &clockpro;
1627 
1628 	DPRINTF("  %s L(", id);
1629 	pdsim_dumpq(CLOCKPRO_LISTQ);
1630 	DPRINTF(" ) H(");
1631 	pdsim_dumpq(CLOCKPRO_HOTQ);
1632 	DPRINTF(" ) C(");
1633 	pdsim_dumpq(CLOCKPRO_COLDQ);
1634 	DPRINTF(" ) N(");
1635 	pdsim_dumpq(CLOCKPRO_NEWQ);
1636 	DPRINTF(" ) ncold=%d/%d, coldadj=%d\n",
1637 	    s->s_ncold, s->s_coldtarget, coldadj);
1638 #endif /* defined(DEBUG) */
1639 }
1640 #endif /* defined(PDSIM) */
1641