1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.2 2006/09/15 15:51:13 yamt Exp $ */ 2 3 /*- 4 * Copyright (c)2005, 2006 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * CLOCK-Pro replacement policy: 31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html 32 * 33 * approximation of the list of non-resident pages using hash: 34 * http://linux-mm.org/ClockProApproximation 35 */ 36 37 /* #define CLOCKPRO_DEBUG */ 38 39 #if defined(PDSIM) 40 41 #include "pdsim.h" 42 43 #else /* defined(PDSIM) */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.2 2006/09/15 15:51:13 yamt Exp $"); 47 48 #include "opt_ddb.h" 49 50 #include <sys/param.h> 51 #include <sys/proc.h> 52 #include <sys/systm.h> 53 #include <sys/kernel.h> 54 #include <sys/vnode.h> 55 #include <sys/hash.h> 56 57 #include <uvm/uvm.h> 58 #include <uvm/uvm_pdpolicy.h> 59 #include <uvm/uvm_pdpolicy_impl.h> 60 61 #if ((__STDC_VERSION__ - 0) >= 199901L) 62 #define DPRINTF(...) /* nothing */ 63 #define WARN(...) printf(__VA_ARGS__) 64 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */ 65 #define DPRINTF(a...) /* nothing */ /* GCC */ 66 #define WARN(a...) printf(a) 67 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */ 68 69 #define dump(a) /* nothing */ 70 71 #undef USEONCE2 72 #define LISTQ 73 #undef ADAPTIVE 74 75 #endif /* defined(PDSIM) */ 76 77 #if !defined(CLOCKPRO_COLDPCT) 78 #define CLOCKPRO_COLDPCT 10 79 #endif /* !defined(CLOCKPRO_COLDPCT) */ 80 81 #define CLOCKPRO_COLDPCTMAX 90 82 83 #if !defined(CLOCKPRO_HASHFACTOR) 84 #define CLOCKPRO_HASHFACTOR 2 85 #endif /* !defined(CLOCKPRO_HASHFACTOR) */ 86 87 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */ 88 89 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR; 90 91 PDPOL_EVCNT_DEFINE(nresrecordobj) 92 PDPOL_EVCNT_DEFINE(nresrecordanon) 93 PDPOL_EVCNT_DEFINE(nreslookup) 94 PDPOL_EVCNT_DEFINE(nresfoundobj) 95 PDPOL_EVCNT_DEFINE(nresfoundanon) 96 PDPOL_EVCNT_DEFINE(nresanonfree) 97 PDPOL_EVCNT_DEFINE(nresconflict) 98 PDPOL_EVCNT_DEFINE(nresoverwritten) 99 PDPOL_EVCNT_DEFINE(nreshandhot) 100 101 PDPOL_EVCNT_DEFINE(hhottakeover) 102 PDPOL_EVCNT_DEFINE(hhotref) 103 PDPOL_EVCNT_DEFINE(hhotunref) 104 PDPOL_EVCNT_DEFINE(hhotcold) 105 PDPOL_EVCNT_DEFINE(hhotcoldtest) 106 107 PDPOL_EVCNT_DEFINE(hcoldtakeover) 108 PDPOL_EVCNT_DEFINE(hcoldref) 109 PDPOL_EVCNT_DEFINE(hcoldunref) 110 PDPOL_EVCNT_DEFINE(hcoldreftest) 111 PDPOL_EVCNT_DEFINE(hcoldunreftest) 112 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative) 113 PDPOL_EVCNT_DEFINE(hcoldhot) 114 115 PDPOL_EVCNT_DEFINE(speculativeenqueue) 116 PDPOL_EVCNT_DEFINE(speculativehit1) 117 PDPOL_EVCNT_DEFINE(speculativehit2) 118 PDPOL_EVCNT_DEFINE(speculativemiss) 119 120 #define PQ_REFERENCED PQ_PRIVATE1 121 #define PQ_HOT PQ_PRIVATE2 122 #define PQ_TEST PQ_PRIVATE3 123 #define PQ_INITIALREF PQ_PRIVATE4 124 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2 125 #error PQ_PRIVATE 126 #endif 127 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7) 128 #define PQ_QFACTOR PQ_PRIVATE5 129 #define PQ_SPECULATIVE PQ_PRIVATE8 130 131 #define CLOCKPRO_NOQUEUE 0 132 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */ 133 #if defined(LISTQ) 134 #define CLOCKPRO_COLDQ 2 135 #define CLOCKPRO_HOTQ 3 136 #else /* defined(LISTQ) */ 137 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */ 138 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */ 139 #endif /* defined(LISTQ) */ 140 #define CLOCKPRO_LISTQ 4 141 #define CLOCKPRO_NQUEUE 4 142 143 static inline void 144 clockpro_setq(struct vm_page *pg, int qidx) 145 { 146 KASSERT(qidx >= CLOCKPRO_NOQUEUE); 147 KASSERT(qidx <= CLOCKPRO_NQUEUE); 148 149 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR); 150 } 151 152 static inline int 153 clockpro_getq(struct vm_page *pg) 154 { 155 int qidx; 156 157 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR; 158 KASSERT(qidx >= CLOCKPRO_NOQUEUE); 159 KASSERT(qidx <= CLOCKPRO_NQUEUE); 160 return qidx; 161 } 162 163 typedef struct { 164 struct pglist q_q; 165 int q_len; 166 } pageq_t; 167 168 struct clockpro_state { 169 int s_npages; 170 int s_coldtarget; 171 int s_ncold; 172 173 int s_newqlenmax; 174 pageq_t s_q[CLOCKPRO_NQUEUE]; 175 176 struct uvm_pctparam s_coldtargetpct; 177 }; 178 179 static pageq_t * 180 clockpro_queue(struct clockpro_state *s, int qidx) 181 { 182 183 KASSERT(CLOCKPRO_NOQUEUE < qidx); 184 KASSERT(qidx <= CLOCKPRO_NQUEUE); 185 186 return &s->s_q[qidx - 1]; 187 } 188 189 #if !defined(LISTQ) 190 191 static int coldqidx; 192 193 static void 194 clockpro_switchqueue(void) 195 { 196 197 coldqidx = 1 - coldqidx; 198 } 199 200 #endif /* !defined(LISTQ) */ 201 202 static struct clockpro_state clockpro; 203 static struct clockpro_scanstate { 204 int ss_nscanned; 205 } scanstate; 206 207 /* ---------------------------------------- */ 208 209 static void 210 pageq_init(pageq_t *q) 211 { 212 213 TAILQ_INIT(&q->q_q); 214 q->q_len = 0; 215 } 216 217 static int 218 pageq_len(const pageq_t *q) 219 { 220 221 return q->q_len; 222 } 223 224 static struct vm_page * 225 pageq_first(const pageq_t *q) 226 { 227 228 return TAILQ_FIRST(&q->q_q); 229 } 230 231 static void 232 pageq_insert_tail(pageq_t *q, struct vm_page *pg) 233 { 234 235 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq); 236 q->q_len++; 237 } 238 239 static void 240 pageq_insert_head(pageq_t *q, struct vm_page *pg) 241 { 242 243 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq); 244 q->q_len++; 245 } 246 247 static void 248 pageq_remove(pageq_t *q, struct vm_page *pg) 249 { 250 251 #if 1 252 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q); 253 #endif 254 KASSERT(q->q_len > 0); 255 TAILQ_REMOVE(&q->q_q, pg, pageq); 256 q->q_len--; 257 } 258 259 static struct vm_page * 260 pageq_remove_head(pageq_t *q) 261 { 262 struct vm_page *pg; 263 264 pg = TAILQ_FIRST(&q->q_q); 265 if (pg == NULL) { 266 KASSERT(q->q_len == 0); 267 return NULL; 268 } 269 pageq_remove(q, pg); 270 return pg; 271 } 272 273 /* ---------------------------------------- */ 274 275 static void 276 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg) 277 { 278 pageq_t *q = clockpro_queue(s, qidx); 279 280 clockpro_setq(pg, qidx); 281 pageq_insert_tail(q, pg); 282 } 283 284 static void 285 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg) 286 { 287 pageq_t *q = clockpro_queue(s, qidx); 288 289 clockpro_setq(pg, qidx); 290 pageq_insert_head(q, pg); 291 } 292 293 /* ---------------------------------------- */ 294 295 typedef uint32_t nonres_cookie_t; 296 #define NONRES_COOKIE_INVAL 0 297 298 typedef uintptr_t objid_t; 299 300 /* 301 * XXX maybe these hash functions need reconsideration, 302 * given that hash distribution is critical here. 303 */ 304 305 static uint32_t 306 pageidentityhash1(objid_t obj, off_t idx) 307 { 308 uint32_t hash = HASH32_BUF_INIT; 309 310 #if 1 311 hash = hash32_buf(&idx, sizeof(idx), hash); 312 hash = hash32_buf(&obj, sizeof(obj), hash); 313 #else 314 hash = hash32_buf(&obj, sizeof(obj), hash); 315 hash = hash32_buf(&idx, sizeof(idx), hash); 316 #endif 317 return hash; 318 } 319 320 static uint32_t 321 pageidentityhash2(objid_t obj, off_t idx) 322 { 323 uint32_t hash = HASH32_BUF_INIT; 324 325 hash = hash32_buf(&obj, sizeof(obj), hash); 326 hash = hash32_buf(&idx, sizeof(idx), hash); 327 return hash; 328 } 329 330 static nonres_cookie_t 331 calccookie(objid_t obj, off_t idx) 332 { 333 uint32_t hash = pageidentityhash2(obj, idx); 334 nonres_cookie_t cookie = hash; 335 336 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) { 337 cookie++; /* XXX */ 338 } 339 return cookie; 340 } 341 342 #define BUCKETSIZE 14 343 struct bucket { 344 int cycle; 345 int cur; 346 nonres_cookie_t pages[BUCKETSIZE]; 347 }; 348 static int cycle_target; 349 static int cycle_target_frac; 350 351 static struct bucket static_bucket; 352 static struct bucket *buckets = &static_bucket; 353 static size_t hashsize = 1; 354 355 static int coldadj; 356 #define COLDTARGET_ADJ(d) coldadj += (d) 357 358 #if defined(PDSIM) 359 360 static void * 361 clockpro_hashalloc(int n) 362 { 363 size_t allocsz = sizeof(*buckets) * n; 364 365 return malloc(allocsz); 366 } 367 368 static void 369 clockpro_hashfree(void *p, int n) 370 { 371 372 free(p); 373 } 374 375 #else /* defined(PDSIM) */ 376 377 static void * 378 clockpro_hashalloc(int n) 379 { 380 size_t allocsz = round_page(sizeof(*buckets) * n); 381 382 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED); 383 } 384 385 static void 386 clockpro_hashfree(void *p, int n) 387 { 388 size_t allocsz = round_page(sizeof(*buckets) * n); 389 390 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED); 391 } 392 393 #endif /* defined(PDSIM) */ 394 395 static void 396 clockpro_hashinit(uint64_t n) 397 { 398 struct bucket *newbuckets; 399 struct bucket *oldbuckets; 400 size_t sz; 401 size_t oldsz; 402 int i; 403 404 sz = howmany(n, BUCKETSIZE); 405 sz *= clockpro_hashfactor; 406 newbuckets = clockpro_hashalloc(sz); 407 if (newbuckets == NULL) { 408 panic("%s: allocation failure", __func__); 409 } 410 for (i = 0; i < sz; i++) { 411 struct bucket *b = &newbuckets[i]; 412 int j; 413 414 b->cycle = cycle_target; 415 b->cur = 0; 416 for (j = 0; j < BUCKETSIZE; j++) { 417 b->pages[j] = NONRES_COOKIE_INVAL; 418 } 419 } 420 /* XXX lock */ 421 oldbuckets = buckets; 422 oldsz = hashsize; 423 buckets = newbuckets; 424 hashsize = sz; 425 /* XXX unlock */ 426 if (oldbuckets != &static_bucket) { 427 clockpro_hashfree(oldbuckets, oldsz); 428 } 429 } 430 431 static struct bucket * 432 nonresident_getbucket(objid_t obj, off_t idx) 433 { 434 uint32_t hash; 435 436 hash = pageidentityhash1(obj, idx); 437 return &buckets[hash % hashsize]; 438 } 439 440 static void 441 nonresident_rotate(struct bucket *b) 442 { 443 444 while (b->cycle - cycle_target < 0) { 445 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) { 446 PDPOL_EVCNT_INCR(nreshandhot); 447 COLDTARGET_ADJ(-1); 448 } 449 b->pages[b->cur] = NONRES_COOKIE_INVAL; 450 b->cur = (b->cur + 1) % BUCKETSIZE; 451 b->cycle++; 452 } 453 } 454 455 static boolean_t 456 nonresident_lookupremove(objid_t obj, off_t idx) 457 { 458 struct bucket *b = nonresident_getbucket(obj, idx); 459 nonres_cookie_t cookie = calccookie(obj, idx); 460 int i; 461 462 nonresident_rotate(b); 463 for (i = 0; i < BUCKETSIZE; i++) { 464 if (b->pages[i] == cookie) { 465 b->pages[i] = NONRES_COOKIE_INVAL; 466 return TRUE; 467 } 468 } 469 return FALSE; 470 } 471 472 static objid_t 473 pageobj(struct vm_page *pg) 474 { 475 const void *obj; 476 477 /* 478 * XXX object pointer is often freed and reused for unrelated object. 479 * for vnodes, it would be better to use something like 480 * a hash of fsid/fileid/generation. 481 */ 482 483 obj = pg->uobject; 484 if (obj == NULL) { 485 obj = pg->uanon; 486 KASSERT(obj != NULL); 487 KASSERT(pg->offset == 0); 488 } 489 490 return (objid_t)obj; 491 } 492 493 static off_t 494 pageidx(struct vm_page *pg) 495 { 496 497 KASSERT((pg->offset & PAGE_MASK) == 0); 498 return pg->offset >> PAGE_SHIFT; 499 } 500 501 static boolean_t 502 nonresident_pagelookupremove(struct vm_page *pg) 503 { 504 boolean_t found = nonresident_lookupremove(pageobj(pg), pageidx(pg)); 505 506 PDPOL_EVCNT_INCR(nreslookup); 507 if (found) { 508 if (pg->uobject) { 509 PDPOL_EVCNT_INCR(nresfoundobj); 510 } else { 511 PDPOL_EVCNT_INCR(nresfoundanon); 512 } 513 } 514 return found; 515 } 516 517 static void 518 nonresident_pagerecord(struct vm_page *pg) 519 { 520 objid_t obj = pageobj(pg); 521 off_t idx = pageidx(pg); 522 struct bucket *b = nonresident_getbucket(obj, idx); 523 nonres_cookie_t cookie = calccookie(obj, idx); 524 525 #if defined(DEBUG) 526 int i; 527 528 for (i = 0; i < BUCKETSIZE; i++) { 529 if (b->pages[i] == cookie) { 530 PDPOL_EVCNT_INCR(nresconflict); 531 } 532 } 533 #endif /* defined(DEBUG) */ 534 535 if (pg->uobject) { 536 PDPOL_EVCNT_INCR(nresrecordobj); 537 } else { 538 PDPOL_EVCNT_INCR(nresrecordanon); 539 } 540 nonresident_rotate(b); 541 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) { 542 PDPOL_EVCNT_INCR(nresoverwritten); 543 COLDTARGET_ADJ(-1); 544 } 545 b->pages[b->cur] = cookie; 546 b->cur = (b->cur + 1) % BUCKETSIZE; 547 } 548 549 /* ---------------------------------------- */ 550 551 #if defined(CLOCKPRO_DEBUG) 552 static void 553 check_sanity(void) 554 { 555 } 556 #else /* defined(CLOCKPRO_DEBUG) */ 557 #define check_sanity() /* nothing */ 558 #endif /* defined(CLOCKPRO_DEBUG) */ 559 560 static void 561 clockpro_reinit(void) 562 { 563 564 clockpro_hashinit(uvmexp.npages); 565 } 566 567 static void 568 clockpro_init(void) 569 { 570 struct clockpro_state *s = &clockpro; 571 int i; 572 573 for (i = 0; i < CLOCKPRO_NQUEUE; i++) { 574 pageq_init(&s->s_q[i]); 575 } 576 s->s_newqlenmax = 1; 577 s->s_coldtarget = 1; 578 uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL); 579 } 580 581 static void 582 clockpro_tune(void) 583 { 584 struct clockpro_state *s = &clockpro; 585 int coldtarget; 586 587 #if defined(ADAPTIVE) 588 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100; 589 int coldmin = 1; 590 591 coldtarget = s->s_coldtarget; 592 if (coldtarget + coldadj < coldmin) { 593 coldadj = coldmin - coldtarget; 594 } else if (coldtarget + coldadj > coldmax) { 595 coldadj = coldmax - coldtarget; 596 } 597 coldtarget += coldadj; 598 #else /* defined(ADAPTIVE) */ 599 coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages); 600 if (coldtarget < 1) { 601 coldtarget = 1; 602 } 603 #endif /* defined(ADAPTIVE) */ 604 605 s->s_coldtarget = coldtarget; 606 s->s_newqlenmax = coldtarget / 4; 607 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) { 608 s->s_newqlenmax = CLOCKPRO_NEWQMIN; 609 } 610 } 611 612 static void 613 clockpro_movereferencebit(struct vm_page *pg) 614 { 615 boolean_t referenced; 616 617 referenced = pmap_clear_reference(pg); 618 if (referenced) { 619 pg->pqflags |= PQ_REFERENCED; 620 } 621 } 622 623 static void 624 clockpro_clearreferencebit(struct vm_page *pg) 625 { 626 627 clockpro_movereferencebit(pg); 628 pg->pqflags &= ~PQ_REFERENCED; 629 } 630 631 static void 632 clockpro___newqrotate(int len) 633 { 634 struct clockpro_state * const s = &clockpro; 635 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ); 636 struct vm_page *pg; 637 638 while (pageq_len(newq) > len) { 639 pg = pageq_remove_head(newq); 640 KASSERT(pg != NULL); 641 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ); 642 if ((pg->pqflags & PQ_INITIALREF) != 0) { 643 clockpro_clearreferencebit(pg); 644 pg->pqflags &= ~PQ_INITIALREF; 645 } 646 /* place at the list head */ 647 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg); 648 } 649 } 650 651 static void 652 clockpro_newqrotate(void) 653 { 654 struct clockpro_state * const s = &clockpro; 655 656 check_sanity(); 657 clockpro___newqrotate(s->s_newqlenmax); 658 check_sanity(); 659 } 660 661 static void 662 clockpro_newqflush(int n) 663 { 664 665 check_sanity(); 666 clockpro___newqrotate(n); 667 check_sanity(); 668 } 669 670 static void 671 clockpro_newqflushone(void) 672 { 673 struct clockpro_state * const s = &clockpro; 674 675 clockpro_newqflush( 676 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0)); 677 } 678 679 /* 680 * our "tail" is called "list-head" in the paper. 681 */ 682 683 static void 684 clockpro___enqueuetail(struct vm_page *pg) 685 { 686 struct clockpro_state * const s = &clockpro; 687 688 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE); 689 690 check_sanity(); 691 #if !defined(USEONCE2) 692 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg); 693 clockpro_newqrotate(); 694 #else /* !defined(USEONCE2) */ 695 #if defined(LISTQ) 696 KASSERT((pg->pqflags & PQ_REFERENCED) == 0); 697 #endif /* defined(LISTQ) */ 698 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg); 699 #endif /* !defined(USEONCE2) */ 700 check_sanity(); 701 } 702 703 static void 704 clockpro_pageenqueue(struct vm_page *pg) 705 { 706 struct clockpro_state * const s = &clockpro; 707 boolean_t hot; 708 boolean_t speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */ 709 710 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0); 711 UVM_LOCK_ASSERT_PAGEQ(); 712 check_sanity(); 713 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE); 714 s->s_npages++; 715 pg->pqflags &= ~(PQ_HOT|PQ_TEST); 716 if (speculative) { 717 hot = FALSE; 718 PDPOL_EVCNT_INCR(speculativeenqueue); 719 } else { 720 hot = nonresident_pagelookupremove(pg); 721 if (hot) { 722 COLDTARGET_ADJ(1); 723 } 724 } 725 726 /* 727 * consider mmap'ed file: 728 * 729 * - read-ahead enqueues a page. 730 * 731 * - on the following read-ahead hit, the fault handler activates it. 732 * 733 * - finally, the userland code which caused the above fault 734 * actually accesses the page. it makes its reference bit set. 735 * 736 * we want to count the above as a single access, rather than 737 * three accesses with short reuse distances. 738 */ 739 740 #if defined(USEONCE2) 741 pg->pqflags &= ~PQ_INITIALREF; 742 if (hot) { 743 pg->pqflags |= PQ_TEST; 744 } 745 s->s_ncold++; 746 clockpro_clearreferencebit(pg); 747 clockpro___enqueuetail(pg); 748 #else /* defined(USEONCE2) */ 749 if (speculative) { 750 s->s_ncold++; 751 } else if (hot) { 752 pg->pqflags |= PQ_HOT; 753 } else { 754 pg->pqflags |= PQ_TEST; 755 s->s_ncold++; 756 } 757 clockpro___enqueuetail(pg); 758 #endif /* defined(USEONCE2) */ 759 KASSERT(s->s_ncold <= s->s_npages); 760 } 761 762 static pageq_t * 763 clockpro_pagequeue(struct vm_page *pg) 764 { 765 struct clockpro_state * const s = &clockpro; 766 int qidx; 767 768 qidx = clockpro_getq(pg); 769 KASSERT(qidx != CLOCKPRO_NOQUEUE); 770 771 return clockpro_queue(s, qidx); 772 } 773 774 static void 775 clockpro_pagedequeue(struct vm_page *pg) 776 { 777 struct clockpro_state * const s = &clockpro; 778 pageq_t *q; 779 780 KASSERT(s->s_npages > 0); 781 check_sanity(); 782 q = clockpro_pagequeue(pg); 783 pageq_remove(q, pg); 784 check_sanity(); 785 clockpro_setq(pg, CLOCKPRO_NOQUEUE); 786 if ((pg->pqflags & PQ_HOT) == 0) { 787 KASSERT(s->s_ncold > 0); 788 s->s_ncold--; 789 } 790 KASSERT(s->s_npages > 0); 791 s->s_npages--; 792 check_sanity(); 793 } 794 795 static void 796 clockpro_pagerequeue(struct vm_page *pg) 797 { 798 struct clockpro_state * const s = &clockpro; 799 int qidx; 800 801 qidx = clockpro_getq(pg); 802 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ); 803 pageq_remove(clockpro_queue(s, qidx), pg); 804 check_sanity(); 805 clockpro_setq(pg, CLOCKPRO_NOQUEUE); 806 807 clockpro___enqueuetail(pg); 808 } 809 810 static void 811 handhot_endtest(struct vm_page *pg) 812 { 813 814 KASSERT((pg->pqflags & PQ_HOT) == 0); 815 if ((pg->pqflags & PQ_TEST) != 0) { 816 PDPOL_EVCNT_INCR(hhotcoldtest); 817 COLDTARGET_ADJ(-1); 818 pg->pqflags &= ~PQ_TEST; 819 } else { 820 PDPOL_EVCNT_INCR(hhotcold); 821 } 822 } 823 824 static void 825 handhot_advance(void) 826 { 827 struct clockpro_state * const s = &clockpro; 828 struct vm_page *pg; 829 pageq_t *hotq; 830 int hotqlen; 831 832 clockpro_tune(); 833 834 dump("hot called"); 835 if (s->s_ncold >= s->s_coldtarget) { 836 return; 837 } 838 hotq = clockpro_queue(s, CLOCKPRO_HOTQ); 839 again: 840 pg = pageq_first(hotq); 841 if (pg == NULL) { 842 DPRINTF("%s: HHOT TAKEOVER\n", __func__); 843 dump("hhottakeover"); 844 PDPOL_EVCNT_INCR(hhottakeover); 845 #if defined(LISTQ) 846 while (/* CONSTCOND */ 1) { 847 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ); 848 849 pg = pageq_first(coldq); 850 if (pg == NULL) { 851 clockpro_newqflushone(); 852 pg = pageq_first(coldq); 853 if (pg == NULL) { 854 WARN("hhot: no page?\n"); 855 return; 856 } 857 } 858 KASSERT(clockpro_pagequeue(pg) == coldq); 859 pageq_remove(coldq, pg); 860 check_sanity(); 861 if ((pg->pqflags & PQ_HOT) == 0) { 862 handhot_endtest(pg); 863 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg); 864 } else { 865 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg); 866 break; 867 } 868 } 869 #else /* defined(LISTQ) */ 870 clockpro_newqflush(0); /* XXX XXX */ 871 clockpro_switchqueue(); 872 hotq = clockpro_queue(s, CLOCKPRO_HOTQ); 873 goto again; 874 #endif /* defined(LISTQ) */ 875 } 876 877 KASSERT(clockpro_pagequeue(pg) == hotq); 878 879 /* 880 * terminate test period of nonresident pages by cycling them. 881 */ 882 883 cycle_target_frac += BUCKETSIZE; 884 hotqlen = pageq_len(hotq); 885 while (cycle_target_frac >= hotqlen) { 886 cycle_target++; 887 cycle_target_frac -= hotqlen; 888 } 889 890 if ((pg->pqflags & PQ_HOT) == 0) { 891 #if defined(LISTQ) 892 panic("cold page in hotq: %p", pg); 893 #else /* defined(LISTQ) */ 894 handhot_endtest(pg); 895 goto next; 896 #endif /* defined(LISTQ) */ 897 } 898 KASSERT((pg->pqflags & PQ_TEST) == 0); 899 KASSERT((pg->pqflags & PQ_INITIALREF) == 0); 900 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0); 901 902 /* 903 * once we met our target, 904 * stop at a hot page so that no cold pages in test period 905 * have larger recency than any hot pages. 906 */ 907 908 if (s->s_ncold >= s->s_coldtarget) { 909 dump("hot done"); 910 return; 911 } 912 clockpro_movereferencebit(pg); 913 if ((pg->pqflags & PQ_REFERENCED) == 0) { 914 PDPOL_EVCNT_INCR(hhotunref); 915 uvmexp.pddeact++; 916 pg->pqflags &= ~PQ_HOT; 917 clockpro.s_ncold++; 918 KASSERT(s->s_ncold <= s->s_npages); 919 } else { 920 PDPOL_EVCNT_INCR(hhotref); 921 } 922 pg->pqflags &= ~PQ_REFERENCED; 923 #if !defined(LISTQ) 924 next: 925 #endif /* !defined(LISTQ) */ 926 clockpro_pagerequeue(pg); 927 dump("hot"); 928 goto again; 929 } 930 931 static struct vm_page * 932 handcold_advance(void) 933 { 934 struct clockpro_state * const s = &clockpro; 935 struct vm_page *pg; 936 937 for (;;) { 938 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ); 939 pageq_t *coldq; 940 941 clockpro_newqrotate(); 942 handhot_advance(); 943 #if defined(LISTQ) 944 pg = pageq_first(listq); 945 if (pg != NULL) { 946 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ); 947 KASSERT((pg->pqflags & PQ_TEST) == 0); 948 KASSERT((pg->pqflags & PQ_HOT) == 0); 949 KASSERT((pg->pqflags & PQ_INITIALREF) == 0); 950 pageq_remove(listq, pg); 951 check_sanity(); 952 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */ 953 goto gotcold; 954 } 955 #endif /* defined(LISTQ) */ 956 check_sanity(); 957 coldq = clockpro_queue(s, CLOCKPRO_COLDQ); 958 pg = pageq_first(coldq); 959 if (pg == NULL) { 960 clockpro_newqflushone(); 961 pg = pageq_first(coldq); 962 } 963 if (pg == NULL) { 964 DPRINTF("%s: HCOLD TAKEOVER\n", __func__); 965 dump("hcoldtakeover"); 966 PDPOL_EVCNT_INCR(hcoldtakeover); 967 KASSERT( 968 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0); 969 #if defined(LISTQ) 970 KASSERT( 971 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0); 972 #else /* defined(LISTQ) */ 973 clockpro_switchqueue(); 974 coldq = clockpro_queue(s, CLOCKPRO_COLDQ); 975 pg = pageq_first(coldq); 976 #endif /* defined(LISTQ) */ 977 } 978 if (pg == NULL) { 979 WARN("hcold: no page?\n"); 980 return NULL; 981 } 982 KASSERT((pg->pqflags & PQ_INITIALREF) == 0); 983 if ((pg->pqflags & PQ_HOT) != 0) { 984 PDPOL_EVCNT_INCR(hcoldhot); 985 pageq_remove(coldq, pg); 986 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg); 987 check_sanity(); 988 KASSERT((pg->pqflags & PQ_TEST) == 0); 989 uvmexp.pdscans++; 990 continue; 991 } 992 #if defined(LISTQ) 993 gotcold: 994 #endif /* defined(LISTQ) */ 995 KASSERT((pg->pqflags & PQ_HOT) == 0); 996 uvmexp.pdscans++; 997 clockpro_movereferencebit(pg); 998 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { 999 KASSERT((pg->pqflags & PQ_TEST) == 0); 1000 if ((pg->pqflags & PQ_REFERENCED) != 0) { 1001 PDPOL_EVCNT_INCR(speculativehit2); 1002 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED); 1003 clockpro_pagedequeue(pg); 1004 clockpro_pageenqueue(pg); 1005 continue; 1006 } 1007 PDPOL_EVCNT_INCR(speculativemiss); 1008 } 1009 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) { 1010 case PQ_TEST: 1011 PDPOL_EVCNT_INCR(hcoldunreftest); 1012 nonresident_pagerecord(pg); 1013 goto gotit; 1014 case 0: 1015 PDPOL_EVCNT_INCR(hcoldunref); 1016 gotit: 1017 KASSERT(s->s_ncold > 0); 1018 clockpro_pagerequeue(pg); /* XXX */ 1019 dump("cold done"); 1020 /* XXX "pg" is still in queue */ 1021 handhot_advance(); 1022 goto done; 1023 1024 case PQ_REFERENCED|PQ_TEST: 1025 PDPOL_EVCNT_INCR(hcoldreftest); 1026 s->s_ncold--; 1027 COLDTARGET_ADJ(1); 1028 pg->pqflags |= PQ_HOT; 1029 pg->pqflags &= ~PQ_TEST; 1030 break; 1031 1032 case PQ_REFERENCED: 1033 PDPOL_EVCNT_INCR(hcoldref); 1034 pg->pqflags |= PQ_TEST; 1035 break; 1036 } 1037 pg->pqflags &= ~PQ_REFERENCED; 1038 uvmexp.pdreact++; 1039 /* move to the list head */ 1040 clockpro_pagerequeue(pg); 1041 dump("cold"); 1042 } 1043 done:; 1044 return pg; 1045 } 1046 1047 void 1048 uvmpdpol_pageactivate(struct vm_page *pg) 1049 { 1050 1051 if (!uvmpdpol_pageisqueued_p(pg)) { 1052 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0); 1053 pg->pqflags |= PQ_INITIALREF; 1054 clockpro_pageenqueue(pg); 1055 } else if ((pg->pqflags & PQ_SPECULATIVE)) { 1056 PDPOL_EVCNT_INCR(speculativehit1); 1057 pg->pqflags &= ~PQ_SPECULATIVE; 1058 pg->pqflags |= PQ_INITIALREF; 1059 clockpro_pagedequeue(pg); 1060 clockpro_pageenqueue(pg); 1061 } 1062 pg->pqflags |= PQ_REFERENCED; 1063 } 1064 1065 void 1066 uvmpdpol_pagedeactivate(struct vm_page *pg) 1067 { 1068 1069 pg->pqflags &= ~PQ_REFERENCED; 1070 } 1071 1072 void 1073 uvmpdpol_pagedequeue(struct vm_page *pg) 1074 { 1075 1076 if (!uvmpdpol_pageisqueued_p(pg)) { 1077 return; 1078 } 1079 clockpro_pagedequeue(pg); 1080 pg->pqflags &= ~PQ_SPECULATIVE; 1081 } 1082 1083 void 1084 uvmpdpol_pageenqueue(struct vm_page *pg) 1085 { 1086 1087 #if 1 1088 if (uvmpdpol_pageisqueued_p(pg)) { 1089 return; 1090 } 1091 clockpro_clearreferencebit(pg); 1092 pg->pqflags |= PQ_SPECULATIVE; 1093 clockpro_pageenqueue(pg); 1094 #else 1095 uvmpdpol_pageactivate(pg); 1096 #endif 1097 } 1098 1099 void 1100 uvmpdpol_anfree(struct vm_anon *an) 1101 { 1102 1103 KASSERT(an->an_page == NULL); 1104 if (nonresident_lookupremove((objid_t)an, 0)) { 1105 PDPOL_EVCNT_INCR(nresanonfree); 1106 } 1107 } 1108 1109 void 1110 uvmpdpol_init(void) 1111 { 1112 1113 clockpro_init(); 1114 } 1115 1116 void 1117 uvmpdpol_reinit(void) 1118 { 1119 1120 clockpro_reinit(); 1121 } 1122 1123 void 1124 uvmpdpol_estimatepageable(int *active, int *inactive) 1125 { 1126 struct clockpro_state * const s = &clockpro; 1127 1128 if (active) { 1129 *active = s->s_npages - s->s_ncold; 1130 } 1131 if (inactive) { 1132 *inactive = s->s_ncold; 1133 } 1134 } 1135 1136 boolean_t 1137 uvmpdpol_pageisqueued_p(struct vm_page *pg) 1138 { 1139 1140 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE; 1141 } 1142 1143 void 1144 uvmpdpol_scaninit(void) 1145 { 1146 struct clockpro_scanstate * const ss = &scanstate; 1147 1148 ss->ss_nscanned = 0; 1149 } 1150 1151 struct vm_page * 1152 uvmpdpol_selectvictim(void) 1153 { 1154 struct clockpro_state * const s = &clockpro; 1155 struct clockpro_scanstate * const ss = &scanstate; 1156 struct vm_page *pg; 1157 1158 if (ss->ss_nscanned > s->s_npages) { 1159 DPRINTF("scan too much\n"); 1160 return NULL; 1161 } 1162 pg = handcold_advance(); 1163 ss->ss_nscanned++; 1164 return pg; 1165 } 1166 1167 static void 1168 clockpro_dropswap(pageq_t *q, int *todo) 1169 { 1170 struct vm_page *pg; 1171 1172 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq) { 1173 if (*todo <= 0) { 1174 break; 1175 } 1176 if ((pg->pqflags & PQ_HOT) == 0) { 1177 continue; 1178 } 1179 if ((pg->pqflags & PQ_SWAPBACKED) == 0) { 1180 continue; 1181 } 1182 if (uvmpd_trydropswap(pg)) { 1183 (*todo)--; 1184 } 1185 } 1186 } 1187 1188 void 1189 uvmpdpol_balancequeue(int swap_shortage) 1190 { 1191 struct clockpro_state * const s = &clockpro; 1192 int todo = swap_shortage; 1193 1194 if (todo == 0) { 1195 return; 1196 } 1197 1198 /* 1199 * reclaim swap slots from hot pages 1200 */ 1201 1202 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage); 1203 1204 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo); 1205 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo); 1206 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo); 1207 1208 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo); 1209 } 1210 1211 boolean_t 1212 uvmpdpol_needsscan_p(void) 1213 { 1214 struct clockpro_state * const s = &clockpro; 1215 1216 if (s->s_ncold < s->s_coldtarget) { 1217 return TRUE; 1218 } 1219 return FALSE; 1220 } 1221 1222 void 1223 uvmpdpol_tune(void) 1224 { 1225 1226 clockpro_tune(); 1227 } 1228 1229 #if !defined(PDSIM) 1230 1231 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */ 1232 1233 void 1234 uvmpdpol_sysctlsetup(void) 1235 { 1236 #if !defined(ADAPTIVE) 1237 struct clockpro_state * const s = &clockpro; 1238 1239 uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct", 1240 SYSCTL_DESCR("Percentage cold target queue of the entire queue")); 1241 #endif /* !defined(ADAPTIVE) */ 1242 } 1243 1244 #endif /* !defined(PDSIM) */ 1245 1246 #if defined(DDB) 1247 1248 void clockpro_dump(void); 1249 1250 void 1251 clockpro_dump(void) 1252 { 1253 struct clockpro_state * const s = &clockpro; 1254 1255 struct vm_page *pg; 1256 int ncold, nhot, ntest, nspeculative, ninitialref, nref; 1257 int newqlen, coldqlen, hotqlen, listqlen; 1258 1259 newqlen = coldqlen = hotqlen = listqlen = 0; 1260 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n", 1261 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax); 1262 1263 #define INITCOUNT() \ 1264 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0 1265 1266 #define COUNT(pg) \ 1267 if ((pg->pqflags & PQ_HOT) != 0) { \ 1268 nhot++; \ 1269 } else { \ 1270 ncold++; \ 1271 if ((pg->pqflags & PQ_TEST) != 0) { \ 1272 ntest++; \ 1273 } \ 1274 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \ 1275 nspeculative++; \ 1276 } \ 1277 if ((pg->pqflags & PQ_INITIALREF) != 0) { \ 1278 ninitialref++; \ 1279 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \ 1280 pmap_is_referenced(pg)) { \ 1281 nref++; \ 1282 } \ 1283 } 1284 1285 #define PRINTCOUNT(name) \ 1286 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \ 1287 "nref=%d\n", \ 1288 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref) 1289 1290 INITCOUNT(); 1291 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq) { 1292 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) { 1293 printf("newq corrupt %p\n", pg); 1294 } 1295 COUNT(pg) 1296 newqlen++; 1297 } 1298 PRINTCOUNT("newq"); 1299 1300 INITCOUNT(); 1301 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq) { 1302 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) { 1303 printf("coldq corrupt %p\n", pg); 1304 } 1305 COUNT(pg) 1306 coldqlen++; 1307 } 1308 PRINTCOUNT("coldq"); 1309 1310 INITCOUNT(); 1311 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq) { 1312 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) { 1313 printf("hotq corrupt %p\n", pg); 1314 } 1315 #if defined(LISTQ) 1316 if ((pg->pqflags & PQ_HOT) == 0) { 1317 printf("cold page in hotq: %p\n", pg); 1318 } 1319 #endif /* defined(LISTQ) */ 1320 COUNT(pg) 1321 hotqlen++; 1322 } 1323 PRINTCOUNT("hotq"); 1324 1325 INITCOUNT(); 1326 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq) { 1327 #if !defined(LISTQ) 1328 printf("listq %p\n"); 1329 #endif /* !defined(LISTQ) */ 1330 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) { 1331 printf("listq corrupt %p\n", pg); 1332 } 1333 COUNT(pg) 1334 listqlen++; 1335 } 1336 PRINTCOUNT("listq"); 1337 1338 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n", 1339 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)), 1340 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)), 1341 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)), 1342 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ))); 1343 } 1344 1345 #endif /* defined(DDB) */ 1346 1347 #if defined(PDSIM) 1348 static void 1349 pdsim_dumpq(int qidx) 1350 { 1351 struct clockpro_state * const s = &clockpro; 1352 pageq_t *q = clockpro_queue(s, qidx); 1353 struct vm_page *pg; 1354 1355 TAILQ_FOREACH(pg, &q->q_q, pageq) { 1356 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s", 1357 pg->offset >> PAGE_SHIFT, 1358 (pg->pqflags & PQ_HOT) ? "H" : "", 1359 (pg->pqflags & PQ_TEST) ? "T" : "", 1360 (pg->pqflags & PQ_REFERENCED) ? "R" : "", 1361 pmap_is_referenced(pg) ? "r" : "", 1362 (pg->pqflags & PQ_INITIALREF) ? "I" : "", 1363 (pg->pqflags & PQ_SPECULATIVE) ? "S" : "" 1364 ); 1365 } 1366 } 1367 1368 void 1369 pdsim_dump(const char *id) 1370 { 1371 #if defined(DEBUG) 1372 struct clockpro_state * const s = &clockpro; 1373 1374 DPRINTF(" %s L(", id); 1375 pdsim_dumpq(CLOCKPRO_LISTQ); 1376 DPRINTF(" ) H("); 1377 pdsim_dumpq(CLOCKPRO_HOTQ); 1378 DPRINTF(" ) C("); 1379 pdsim_dumpq(CLOCKPRO_COLDQ); 1380 DPRINTF(" ) N("); 1381 pdsim_dumpq(CLOCKPRO_NEWQ); 1382 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n", 1383 s->s_ncold, s->s_coldtarget, coldadj); 1384 #endif /* defined(DEBUG) */ 1385 } 1386 #endif /* defined(PDSIM) */ 1387