1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.9 2007/08/01 14:49:55 yamt Exp $ */ 2 3 /*- 4 * Copyright (c)2005, 2006 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * CLOCK-Pro replacement policy: 31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html 32 * 33 * approximation of the list of non-resident pages using hash: 34 * http://linux-mm.org/ClockProApproximation 35 */ 36 37 /* #define CLOCKPRO_DEBUG */ 38 39 #if defined(PDSIM) 40 41 #include "pdsim.h" 42 43 #else /* defined(PDSIM) */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.9 2007/08/01 14:49:55 yamt Exp $"); 47 48 #include "opt_ddb.h" 49 50 #include <sys/param.h> 51 #include <sys/proc.h> 52 #include <sys/systm.h> 53 #include <sys/kernel.h> 54 #include <sys/hash.h> 55 56 #include <uvm/uvm.h> 57 #include <uvm/uvm_pdpolicy.h> 58 #include <uvm/uvm_pdpolicy_impl.h> 59 60 #if ((__STDC_VERSION__ - 0) >= 199901L) 61 #define DPRINTF(...) /* nothing */ 62 #define WARN(...) printf(__VA_ARGS__) 63 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */ 64 #define DPRINTF(a...) /* nothing */ /* GCC */ 65 #define WARN(a...) printf(a) 66 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */ 67 68 #define dump(a) /* nothing */ 69 70 #undef USEONCE2 71 #define LISTQ 72 #undef ADAPTIVE 73 74 #endif /* defined(PDSIM) */ 75 76 #if !defined(CLOCKPRO_COLDPCT) 77 #define CLOCKPRO_COLDPCT 10 78 #endif /* !defined(CLOCKPRO_COLDPCT) */ 79 80 #define CLOCKPRO_COLDPCTMAX 90 81 82 #if !defined(CLOCKPRO_HASHFACTOR) 83 #define CLOCKPRO_HASHFACTOR 2 84 #endif /* !defined(CLOCKPRO_HASHFACTOR) */ 85 86 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */ 87 88 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR; 89 90 PDPOL_EVCNT_DEFINE(nresrecordobj) 91 PDPOL_EVCNT_DEFINE(nresrecordanon) 92 PDPOL_EVCNT_DEFINE(nreslookupobj) 93 PDPOL_EVCNT_DEFINE(nreslookupanon) 94 PDPOL_EVCNT_DEFINE(nresfoundobj) 95 PDPOL_EVCNT_DEFINE(nresfoundanon) 96 PDPOL_EVCNT_DEFINE(nresanonfree) 97 PDPOL_EVCNT_DEFINE(nresconflict) 98 PDPOL_EVCNT_DEFINE(nresoverwritten) 99 PDPOL_EVCNT_DEFINE(nreshandhot) 100 101 PDPOL_EVCNT_DEFINE(hhottakeover) 102 PDPOL_EVCNT_DEFINE(hhotref) 103 PDPOL_EVCNT_DEFINE(hhotunref) 104 PDPOL_EVCNT_DEFINE(hhotcold) 105 PDPOL_EVCNT_DEFINE(hhotcoldtest) 106 107 PDPOL_EVCNT_DEFINE(hcoldtakeover) 108 PDPOL_EVCNT_DEFINE(hcoldref) 109 PDPOL_EVCNT_DEFINE(hcoldunref) 110 PDPOL_EVCNT_DEFINE(hcoldreftest) 111 PDPOL_EVCNT_DEFINE(hcoldunreftest) 112 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative) 113 PDPOL_EVCNT_DEFINE(hcoldhot) 114 115 PDPOL_EVCNT_DEFINE(speculativeenqueue) 116 PDPOL_EVCNT_DEFINE(speculativehit1) 117 PDPOL_EVCNT_DEFINE(speculativehit2) 118 PDPOL_EVCNT_DEFINE(speculativemiss) 119 120 #define PQ_REFERENCED PQ_PRIVATE1 121 #define PQ_HOT PQ_PRIVATE2 122 #define PQ_TEST PQ_PRIVATE3 123 #define PQ_INITIALREF PQ_PRIVATE4 124 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2 125 #error PQ_PRIVATE 126 #endif 127 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7) 128 #define PQ_QFACTOR PQ_PRIVATE5 129 #define PQ_SPECULATIVE PQ_PRIVATE8 130 131 #define CLOCKPRO_NOQUEUE 0 132 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */ 133 #if defined(LISTQ) 134 #define CLOCKPRO_COLDQ 2 135 #define CLOCKPRO_HOTQ 3 136 #else /* defined(LISTQ) */ 137 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */ 138 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */ 139 #endif /* defined(LISTQ) */ 140 #define CLOCKPRO_LISTQ 4 141 #define CLOCKPRO_NQUEUE 4 142 143 static inline void 144 clockpro_setq(struct vm_page *pg, int qidx) 145 { 146 KASSERT(qidx >= CLOCKPRO_NOQUEUE); 147 KASSERT(qidx <= CLOCKPRO_NQUEUE); 148 149 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR); 150 } 151 152 static inline int 153 clockpro_getq(struct vm_page *pg) 154 { 155 int qidx; 156 157 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR; 158 KASSERT(qidx >= CLOCKPRO_NOQUEUE); 159 KASSERT(qidx <= CLOCKPRO_NQUEUE); 160 return qidx; 161 } 162 163 typedef struct { 164 struct pglist q_q; 165 int q_len; 166 } pageq_t; 167 168 struct clockpro_state { 169 int s_npages; 170 int s_coldtarget; 171 int s_ncold; 172 173 int s_newqlenmax; 174 pageq_t s_q[CLOCKPRO_NQUEUE]; 175 176 struct uvm_pctparam s_coldtargetpct; 177 }; 178 179 static pageq_t * 180 clockpro_queue(struct clockpro_state *s, int qidx) 181 { 182 183 KASSERT(CLOCKPRO_NOQUEUE < qidx); 184 KASSERT(qidx <= CLOCKPRO_NQUEUE); 185 186 return &s->s_q[qidx - 1]; 187 } 188 189 #if !defined(LISTQ) 190 191 static int coldqidx; 192 193 static void 194 clockpro_switchqueue(void) 195 { 196 197 coldqidx = 1 - coldqidx; 198 } 199 200 #endif /* !defined(LISTQ) */ 201 202 static struct clockpro_state clockpro; 203 static struct clockpro_scanstate { 204 int ss_nscanned; 205 } scanstate; 206 207 /* ---------------------------------------- */ 208 209 static void 210 pageq_init(pageq_t *q) 211 { 212 213 TAILQ_INIT(&q->q_q); 214 q->q_len = 0; 215 } 216 217 static int 218 pageq_len(const pageq_t *q) 219 { 220 221 return q->q_len; 222 } 223 224 static struct vm_page * 225 pageq_first(const pageq_t *q) 226 { 227 228 return TAILQ_FIRST(&q->q_q); 229 } 230 231 static void 232 pageq_insert_tail(pageq_t *q, struct vm_page *pg) 233 { 234 235 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq); 236 q->q_len++; 237 } 238 239 static void 240 pageq_insert_head(pageq_t *q, struct vm_page *pg) 241 { 242 243 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq); 244 q->q_len++; 245 } 246 247 static void 248 pageq_remove(pageq_t *q, struct vm_page *pg) 249 { 250 251 #if 1 252 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q); 253 #endif 254 KASSERT(q->q_len > 0); 255 TAILQ_REMOVE(&q->q_q, pg, pageq); 256 q->q_len--; 257 } 258 259 static struct vm_page * 260 pageq_remove_head(pageq_t *q) 261 { 262 struct vm_page *pg; 263 264 pg = TAILQ_FIRST(&q->q_q); 265 if (pg == NULL) { 266 KASSERT(q->q_len == 0); 267 return NULL; 268 } 269 pageq_remove(q, pg); 270 return pg; 271 } 272 273 /* ---------------------------------------- */ 274 275 static void 276 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg) 277 { 278 pageq_t *q = clockpro_queue(s, qidx); 279 280 clockpro_setq(pg, qidx); 281 pageq_insert_tail(q, pg); 282 } 283 284 static void 285 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg) 286 { 287 pageq_t *q = clockpro_queue(s, qidx); 288 289 clockpro_setq(pg, qidx); 290 pageq_insert_head(q, pg); 291 } 292 293 /* ---------------------------------------- */ 294 295 typedef uint32_t nonres_cookie_t; 296 #define NONRES_COOKIE_INVAL 0 297 298 typedef uintptr_t objid_t; 299 300 /* 301 * XXX maybe these hash functions need reconsideration, 302 * given that hash distribution is critical here. 303 */ 304 305 static uint32_t 306 pageidentityhash1(objid_t obj, off_t idx) 307 { 308 uint32_t hash = HASH32_BUF_INIT; 309 310 #if 1 311 hash = hash32_buf(&idx, sizeof(idx), hash); 312 hash = hash32_buf(&obj, sizeof(obj), hash); 313 #else 314 hash = hash32_buf(&obj, sizeof(obj), hash); 315 hash = hash32_buf(&idx, sizeof(idx), hash); 316 #endif 317 return hash; 318 } 319 320 static uint32_t 321 pageidentityhash2(objid_t obj, off_t idx) 322 { 323 uint32_t hash = HASH32_BUF_INIT; 324 325 hash = hash32_buf(&obj, sizeof(obj), hash); 326 hash = hash32_buf(&idx, sizeof(idx), hash); 327 return hash; 328 } 329 330 static nonres_cookie_t 331 calccookie(objid_t obj, off_t idx) 332 { 333 uint32_t hash = pageidentityhash2(obj, idx); 334 nonres_cookie_t cookie = hash; 335 336 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) { 337 cookie++; /* XXX */ 338 } 339 return cookie; 340 } 341 342 #define BUCKETSIZE 14 343 struct bucket { 344 int cycle; 345 int cur; 346 nonres_cookie_t pages[BUCKETSIZE]; 347 }; 348 static int cycle_target; 349 static int cycle_target_frac; 350 351 static struct bucket static_bucket; 352 static struct bucket *buckets = &static_bucket; 353 static size_t hashsize = 1; 354 355 static int coldadj; 356 #define COLDTARGET_ADJ(d) coldadj += (d) 357 358 #if defined(PDSIM) 359 360 static void * 361 clockpro_hashalloc(int n) 362 { 363 size_t allocsz = sizeof(*buckets) * n; 364 365 return malloc(allocsz); 366 } 367 368 static void 369 clockpro_hashfree(void *p, int n) 370 { 371 372 free(p); 373 } 374 375 #else /* defined(PDSIM) */ 376 377 static void * 378 clockpro_hashalloc(int n) 379 { 380 size_t allocsz = round_page(sizeof(*buckets) * n); 381 382 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED); 383 } 384 385 static void 386 clockpro_hashfree(void *p, int n) 387 { 388 size_t allocsz = round_page(sizeof(*buckets) * n); 389 390 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED); 391 } 392 393 #endif /* defined(PDSIM) */ 394 395 static void 396 clockpro_hashinit(uint64_t n) 397 { 398 struct bucket *newbuckets; 399 struct bucket *oldbuckets; 400 size_t sz; 401 size_t oldsz; 402 int i; 403 404 sz = howmany(n, BUCKETSIZE); 405 sz *= clockpro_hashfactor; 406 newbuckets = clockpro_hashalloc(sz); 407 if (newbuckets == NULL) { 408 panic("%s: allocation failure", __func__); 409 } 410 for (i = 0; i < sz; i++) { 411 struct bucket *b = &newbuckets[i]; 412 int j; 413 414 b->cycle = cycle_target; 415 b->cur = 0; 416 for (j = 0; j < BUCKETSIZE; j++) { 417 b->pages[j] = NONRES_COOKIE_INVAL; 418 } 419 } 420 /* XXX lock */ 421 oldbuckets = buckets; 422 oldsz = hashsize; 423 buckets = newbuckets; 424 hashsize = sz; 425 /* XXX unlock */ 426 if (oldbuckets != &static_bucket) { 427 clockpro_hashfree(oldbuckets, oldsz); 428 } 429 } 430 431 static struct bucket * 432 nonresident_getbucket(objid_t obj, off_t idx) 433 { 434 uint32_t hash; 435 436 hash = pageidentityhash1(obj, idx); 437 return &buckets[hash % hashsize]; 438 } 439 440 static void 441 nonresident_rotate(struct bucket *b) 442 { 443 444 while (b->cycle - cycle_target < 0) { 445 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) { 446 PDPOL_EVCNT_INCR(nreshandhot); 447 COLDTARGET_ADJ(-1); 448 } 449 b->pages[b->cur] = NONRES_COOKIE_INVAL; 450 b->cur = (b->cur + 1) % BUCKETSIZE; 451 b->cycle++; 452 } 453 } 454 455 static bool 456 nonresident_lookupremove(objid_t obj, off_t idx) 457 { 458 struct bucket *b = nonresident_getbucket(obj, idx); 459 nonres_cookie_t cookie = calccookie(obj, idx); 460 int i; 461 462 nonresident_rotate(b); 463 for (i = 0; i < BUCKETSIZE; i++) { 464 if (b->pages[i] == cookie) { 465 b->pages[i] = NONRES_COOKIE_INVAL; 466 return true; 467 } 468 } 469 return false; 470 } 471 472 static objid_t 473 pageobj(struct vm_page *pg) 474 { 475 const void *obj; 476 477 /* 478 * XXX object pointer is often freed and reused for unrelated object. 479 * for vnodes, it would be better to use something like 480 * a hash of fsid/fileid/generation. 481 */ 482 483 obj = pg->uobject; 484 if (obj == NULL) { 485 obj = pg->uanon; 486 KASSERT(obj != NULL); 487 KASSERT(pg->offset == 0); 488 } 489 490 return (objid_t)obj; 491 } 492 493 static off_t 494 pageidx(struct vm_page *pg) 495 { 496 497 KASSERT((pg->offset & PAGE_MASK) == 0); 498 return pg->offset >> PAGE_SHIFT; 499 } 500 501 static bool 502 nonresident_pagelookupremove(struct vm_page *pg) 503 { 504 bool found = nonresident_lookupremove(pageobj(pg), pageidx(pg)); 505 506 if (pg->uobject) { 507 PDPOL_EVCNT_INCR(nreslookupobj); 508 } else { 509 PDPOL_EVCNT_INCR(nreslookupanon); 510 } 511 if (found) { 512 if (pg->uobject) { 513 PDPOL_EVCNT_INCR(nresfoundobj); 514 } else { 515 PDPOL_EVCNT_INCR(nresfoundanon); 516 } 517 } 518 return found; 519 } 520 521 static void 522 nonresident_pagerecord(struct vm_page *pg) 523 { 524 objid_t obj = pageobj(pg); 525 off_t idx = pageidx(pg); 526 struct bucket *b = nonresident_getbucket(obj, idx); 527 nonres_cookie_t cookie = calccookie(obj, idx); 528 529 #if defined(DEBUG) 530 int i; 531 532 for (i = 0; i < BUCKETSIZE; i++) { 533 if (b->pages[i] == cookie) { 534 PDPOL_EVCNT_INCR(nresconflict); 535 } 536 } 537 #endif /* defined(DEBUG) */ 538 539 if (pg->uobject) { 540 PDPOL_EVCNT_INCR(nresrecordobj); 541 } else { 542 PDPOL_EVCNT_INCR(nresrecordanon); 543 } 544 nonresident_rotate(b); 545 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) { 546 PDPOL_EVCNT_INCR(nresoverwritten); 547 COLDTARGET_ADJ(-1); 548 } 549 b->pages[b->cur] = cookie; 550 b->cur = (b->cur + 1) % BUCKETSIZE; 551 } 552 553 /* ---------------------------------------- */ 554 555 #if defined(CLOCKPRO_DEBUG) 556 static void 557 check_sanity(void) 558 { 559 } 560 #else /* defined(CLOCKPRO_DEBUG) */ 561 #define check_sanity() /* nothing */ 562 #endif /* defined(CLOCKPRO_DEBUG) */ 563 564 static void 565 clockpro_reinit(void) 566 { 567 568 clockpro_hashinit(uvmexp.npages); 569 } 570 571 static void 572 clockpro_init(void) 573 { 574 struct clockpro_state *s = &clockpro; 575 int i; 576 577 for (i = 0; i < CLOCKPRO_NQUEUE; i++) { 578 pageq_init(&s->s_q[i]); 579 } 580 s->s_newqlenmax = 1; 581 s->s_coldtarget = 1; 582 uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL); 583 } 584 585 static void 586 clockpro_tune(void) 587 { 588 struct clockpro_state *s = &clockpro; 589 int coldtarget; 590 591 #if defined(ADAPTIVE) 592 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100; 593 int coldmin = 1; 594 595 coldtarget = s->s_coldtarget; 596 if (coldtarget + coldadj < coldmin) { 597 coldadj = coldmin - coldtarget; 598 } else if (coldtarget + coldadj > coldmax) { 599 coldadj = coldmax - coldtarget; 600 } 601 coldtarget += coldadj; 602 #else /* defined(ADAPTIVE) */ 603 coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages); 604 if (coldtarget < 1) { 605 coldtarget = 1; 606 } 607 #endif /* defined(ADAPTIVE) */ 608 609 s->s_coldtarget = coldtarget; 610 s->s_newqlenmax = coldtarget / 4; 611 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) { 612 s->s_newqlenmax = CLOCKPRO_NEWQMIN; 613 } 614 } 615 616 static void 617 clockpro_movereferencebit(struct vm_page *pg) 618 { 619 bool referenced; 620 621 referenced = pmap_clear_reference(pg); 622 if (referenced) { 623 pg->pqflags |= PQ_REFERENCED; 624 } 625 } 626 627 static void 628 clockpro_clearreferencebit(struct vm_page *pg) 629 { 630 631 clockpro_movereferencebit(pg); 632 pg->pqflags &= ~PQ_REFERENCED; 633 } 634 635 static void 636 clockpro___newqrotate(int len) 637 { 638 struct clockpro_state * const s = &clockpro; 639 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ); 640 struct vm_page *pg; 641 642 while (pageq_len(newq) > len) { 643 pg = pageq_remove_head(newq); 644 KASSERT(pg != NULL); 645 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ); 646 if ((pg->pqflags & PQ_INITIALREF) != 0) { 647 clockpro_clearreferencebit(pg); 648 pg->pqflags &= ~PQ_INITIALREF; 649 } 650 /* place at the list head */ 651 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg); 652 } 653 } 654 655 static void 656 clockpro_newqrotate(void) 657 { 658 struct clockpro_state * const s = &clockpro; 659 660 check_sanity(); 661 clockpro___newqrotate(s->s_newqlenmax); 662 check_sanity(); 663 } 664 665 static void 666 clockpro_newqflush(int n) 667 { 668 669 check_sanity(); 670 clockpro___newqrotate(n); 671 check_sanity(); 672 } 673 674 static void 675 clockpro_newqflushone(void) 676 { 677 struct clockpro_state * const s = &clockpro; 678 679 clockpro_newqflush( 680 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0)); 681 } 682 683 /* 684 * our "tail" is called "list-head" in the paper. 685 */ 686 687 static void 688 clockpro___enqueuetail(struct vm_page *pg) 689 { 690 struct clockpro_state * const s = &clockpro; 691 692 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE); 693 694 check_sanity(); 695 #if !defined(USEONCE2) 696 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg); 697 clockpro_newqrotate(); 698 #else /* !defined(USEONCE2) */ 699 #if defined(LISTQ) 700 KASSERT((pg->pqflags & PQ_REFERENCED) == 0); 701 #endif /* defined(LISTQ) */ 702 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg); 703 #endif /* !defined(USEONCE2) */ 704 check_sanity(); 705 } 706 707 static void 708 clockpro_pageenqueue(struct vm_page *pg) 709 { 710 struct clockpro_state * const s = &clockpro; 711 bool hot; 712 bool speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */ 713 714 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0); 715 UVM_LOCK_ASSERT_PAGEQ(); 716 check_sanity(); 717 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE); 718 s->s_npages++; 719 pg->pqflags &= ~(PQ_HOT|PQ_TEST); 720 if (speculative) { 721 hot = false; 722 PDPOL_EVCNT_INCR(speculativeenqueue); 723 } else { 724 hot = nonresident_pagelookupremove(pg); 725 if (hot) { 726 COLDTARGET_ADJ(1); 727 } 728 } 729 730 /* 731 * consider mmap'ed file: 732 * 733 * - read-ahead enqueues a page. 734 * 735 * - on the following read-ahead hit, the fault handler activates it. 736 * 737 * - finally, the userland code which caused the above fault 738 * actually accesses the page. it makes its reference bit set. 739 * 740 * we want to count the above as a single access, rather than 741 * three accesses with short reuse distances. 742 */ 743 744 #if defined(USEONCE2) 745 pg->pqflags &= ~PQ_INITIALREF; 746 if (hot) { 747 pg->pqflags |= PQ_TEST; 748 } 749 s->s_ncold++; 750 clockpro_clearreferencebit(pg); 751 clockpro___enqueuetail(pg); 752 #else /* defined(USEONCE2) */ 753 if (speculative) { 754 s->s_ncold++; 755 } else if (hot) { 756 pg->pqflags |= PQ_HOT; 757 } else { 758 pg->pqflags |= PQ_TEST; 759 s->s_ncold++; 760 } 761 clockpro___enqueuetail(pg); 762 #endif /* defined(USEONCE2) */ 763 KASSERT(s->s_ncold <= s->s_npages); 764 } 765 766 static pageq_t * 767 clockpro_pagequeue(struct vm_page *pg) 768 { 769 struct clockpro_state * const s = &clockpro; 770 int qidx; 771 772 qidx = clockpro_getq(pg); 773 KASSERT(qidx != CLOCKPRO_NOQUEUE); 774 775 return clockpro_queue(s, qidx); 776 } 777 778 static void 779 clockpro_pagedequeue(struct vm_page *pg) 780 { 781 struct clockpro_state * const s = &clockpro; 782 pageq_t *q; 783 784 KASSERT(s->s_npages > 0); 785 check_sanity(); 786 q = clockpro_pagequeue(pg); 787 pageq_remove(q, pg); 788 check_sanity(); 789 clockpro_setq(pg, CLOCKPRO_NOQUEUE); 790 if ((pg->pqflags & PQ_HOT) == 0) { 791 KASSERT(s->s_ncold > 0); 792 s->s_ncold--; 793 } 794 KASSERT(s->s_npages > 0); 795 s->s_npages--; 796 check_sanity(); 797 } 798 799 static void 800 clockpro_pagerequeue(struct vm_page *pg) 801 { 802 struct clockpro_state * const s = &clockpro; 803 int qidx; 804 805 qidx = clockpro_getq(pg); 806 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ); 807 pageq_remove(clockpro_queue(s, qidx), pg); 808 check_sanity(); 809 clockpro_setq(pg, CLOCKPRO_NOQUEUE); 810 811 clockpro___enqueuetail(pg); 812 } 813 814 static void 815 handhot_endtest(struct vm_page *pg) 816 { 817 818 KASSERT((pg->pqflags & PQ_HOT) == 0); 819 if ((pg->pqflags & PQ_TEST) != 0) { 820 PDPOL_EVCNT_INCR(hhotcoldtest); 821 COLDTARGET_ADJ(-1); 822 pg->pqflags &= ~PQ_TEST; 823 } else { 824 PDPOL_EVCNT_INCR(hhotcold); 825 } 826 } 827 828 static void 829 handhot_advance(void) 830 { 831 struct clockpro_state * const s = &clockpro; 832 struct vm_page *pg; 833 pageq_t *hotq; 834 int hotqlen; 835 836 clockpro_tune(); 837 838 dump("hot called"); 839 if (s->s_ncold >= s->s_coldtarget) { 840 return; 841 } 842 hotq = clockpro_queue(s, CLOCKPRO_HOTQ); 843 again: 844 pg = pageq_first(hotq); 845 if (pg == NULL) { 846 DPRINTF("%s: HHOT TAKEOVER\n", __func__); 847 dump("hhottakeover"); 848 PDPOL_EVCNT_INCR(hhottakeover); 849 #if defined(LISTQ) 850 while (/* CONSTCOND */ 1) { 851 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ); 852 853 pg = pageq_first(coldq); 854 if (pg == NULL) { 855 clockpro_newqflushone(); 856 pg = pageq_first(coldq); 857 if (pg == NULL) { 858 WARN("hhot: no page?\n"); 859 return; 860 } 861 } 862 KASSERT(clockpro_pagequeue(pg) == coldq); 863 pageq_remove(coldq, pg); 864 check_sanity(); 865 if ((pg->pqflags & PQ_HOT) == 0) { 866 handhot_endtest(pg); 867 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg); 868 } else { 869 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg); 870 break; 871 } 872 } 873 #else /* defined(LISTQ) */ 874 clockpro_newqflush(0); /* XXX XXX */ 875 clockpro_switchqueue(); 876 hotq = clockpro_queue(s, CLOCKPRO_HOTQ); 877 goto again; 878 #endif /* defined(LISTQ) */ 879 } 880 881 KASSERT(clockpro_pagequeue(pg) == hotq); 882 883 /* 884 * terminate test period of nonresident pages by cycling them. 885 */ 886 887 cycle_target_frac += BUCKETSIZE; 888 hotqlen = pageq_len(hotq); 889 while (cycle_target_frac >= hotqlen) { 890 cycle_target++; 891 cycle_target_frac -= hotqlen; 892 } 893 894 if ((pg->pqflags & PQ_HOT) == 0) { 895 #if defined(LISTQ) 896 panic("cold page in hotq: %p", pg); 897 #else /* defined(LISTQ) */ 898 handhot_endtest(pg); 899 goto next; 900 #endif /* defined(LISTQ) */ 901 } 902 KASSERT((pg->pqflags & PQ_TEST) == 0); 903 KASSERT((pg->pqflags & PQ_INITIALREF) == 0); 904 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0); 905 906 /* 907 * once we met our target, 908 * stop at a hot page so that no cold pages in test period 909 * have larger recency than any hot pages. 910 */ 911 912 if (s->s_ncold >= s->s_coldtarget) { 913 dump("hot done"); 914 return; 915 } 916 clockpro_movereferencebit(pg); 917 if ((pg->pqflags & PQ_REFERENCED) == 0) { 918 PDPOL_EVCNT_INCR(hhotunref); 919 uvmexp.pddeact++; 920 pg->pqflags &= ~PQ_HOT; 921 clockpro.s_ncold++; 922 KASSERT(s->s_ncold <= s->s_npages); 923 } else { 924 PDPOL_EVCNT_INCR(hhotref); 925 } 926 pg->pqflags &= ~PQ_REFERENCED; 927 #if !defined(LISTQ) 928 next: 929 #endif /* !defined(LISTQ) */ 930 clockpro_pagerequeue(pg); 931 dump("hot"); 932 goto again; 933 } 934 935 static struct vm_page * 936 handcold_advance(void) 937 { 938 struct clockpro_state * const s = &clockpro; 939 struct vm_page *pg; 940 941 for (;;) { 942 #if defined(LISTQ) 943 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ); 944 #endif /* defined(LISTQ) */ 945 pageq_t *coldq; 946 947 clockpro_newqrotate(); 948 handhot_advance(); 949 #if defined(LISTQ) 950 pg = pageq_first(listq); 951 if (pg != NULL) { 952 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ); 953 KASSERT((pg->pqflags & PQ_TEST) == 0); 954 KASSERT((pg->pqflags & PQ_HOT) == 0); 955 KASSERT((pg->pqflags & PQ_INITIALREF) == 0); 956 pageq_remove(listq, pg); 957 check_sanity(); 958 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */ 959 goto gotcold; 960 } 961 #endif /* defined(LISTQ) */ 962 check_sanity(); 963 coldq = clockpro_queue(s, CLOCKPRO_COLDQ); 964 pg = pageq_first(coldq); 965 if (pg == NULL) { 966 clockpro_newqflushone(); 967 pg = pageq_first(coldq); 968 } 969 if (pg == NULL) { 970 DPRINTF("%s: HCOLD TAKEOVER\n", __func__); 971 dump("hcoldtakeover"); 972 PDPOL_EVCNT_INCR(hcoldtakeover); 973 KASSERT( 974 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0); 975 #if defined(LISTQ) 976 KASSERT( 977 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0); 978 #else /* defined(LISTQ) */ 979 clockpro_switchqueue(); 980 coldq = clockpro_queue(s, CLOCKPRO_COLDQ); 981 pg = pageq_first(coldq); 982 #endif /* defined(LISTQ) */ 983 } 984 if (pg == NULL) { 985 WARN("hcold: no page?\n"); 986 return NULL; 987 } 988 KASSERT((pg->pqflags & PQ_INITIALREF) == 0); 989 if ((pg->pqflags & PQ_HOT) != 0) { 990 PDPOL_EVCNT_INCR(hcoldhot); 991 pageq_remove(coldq, pg); 992 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg); 993 check_sanity(); 994 KASSERT((pg->pqflags & PQ_TEST) == 0); 995 uvmexp.pdscans++; 996 continue; 997 } 998 #if defined(LISTQ) 999 gotcold: 1000 #endif /* defined(LISTQ) */ 1001 KASSERT((pg->pqflags & PQ_HOT) == 0); 1002 uvmexp.pdscans++; 1003 clockpro_movereferencebit(pg); 1004 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { 1005 KASSERT((pg->pqflags & PQ_TEST) == 0); 1006 if ((pg->pqflags & PQ_REFERENCED) != 0) { 1007 PDPOL_EVCNT_INCR(speculativehit2); 1008 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED); 1009 clockpro_pagedequeue(pg); 1010 clockpro_pageenqueue(pg); 1011 continue; 1012 } 1013 PDPOL_EVCNT_INCR(speculativemiss); 1014 } 1015 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) { 1016 case PQ_TEST: 1017 PDPOL_EVCNT_INCR(hcoldunreftest); 1018 nonresident_pagerecord(pg); 1019 goto gotit; 1020 case 0: 1021 PDPOL_EVCNT_INCR(hcoldunref); 1022 gotit: 1023 KASSERT(s->s_ncold > 0); 1024 clockpro_pagerequeue(pg); /* XXX */ 1025 dump("cold done"); 1026 /* XXX "pg" is still in queue */ 1027 handhot_advance(); 1028 goto done; 1029 1030 case PQ_REFERENCED|PQ_TEST: 1031 PDPOL_EVCNT_INCR(hcoldreftest); 1032 s->s_ncold--; 1033 COLDTARGET_ADJ(1); 1034 pg->pqflags |= PQ_HOT; 1035 pg->pqflags &= ~PQ_TEST; 1036 break; 1037 1038 case PQ_REFERENCED: 1039 PDPOL_EVCNT_INCR(hcoldref); 1040 pg->pqflags |= PQ_TEST; 1041 break; 1042 } 1043 pg->pqflags &= ~PQ_REFERENCED; 1044 uvmexp.pdreact++; 1045 /* move to the list head */ 1046 clockpro_pagerequeue(pg); 1047 dump("cold"); 1048 } 1049 done:; 1050 return pg; 1051 } 1052 1053 void 1054 uvmpdpol_pageactivate(struct vm_page *pg) 1055 { 1056 1057 if (!uvmpdpol_pageisqueued_p(pg)) { 1058 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0); 1059 pg->pqflags |= PQ_INITIALREF; 1060 clockpro_pageenqueue(pg); 1061 } else if ((pg->pqflags & PQ_SPECULATIVE)) { 1062 PDPOL_EVCNT_INCR(speculativehit1); 1063 pg->pqflags &= ~PQ_SPECULATIVE; 1064 pg->pqflags |= PQ_INITIALREF; 1065 clockpro_pagedequeue(pg); 1066 clockpro_pageenqueue(pg); 1067 } 1068 pg->pqflags |= PQ_REFERENCED; 1069 } 1070 1071 void 1072 uvmpdpol_pagedeactivate(struct vm_page *pg) 1073 { 1074 1075 pg->pqflags &= ~PQ_REFERENCED; 1076 } 1077 1078 void 1079 uvmpdpol_pagedequeue(struct vm_page *pg) 1080 { 1081 1082 if (!uvmpdpol_pageisqueued_p(pg)) { 1083 return; 1084 } 1085 clockpro_pagedequeue(pg); 1086 pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE); 1087 } 1088 1089 void 1090 uvmpdpol_pageenqueue(struct vm_page *pg) 1091 { 1092 1093 #if 1 1094 if (uvmpdpol_pageisqueued_p(pg)) { 1095 return; 1096 } 1097 clockpro_clearreferencebit(pg); 1098 pg->pqflags |= PQ_SPECULATIVE; 1099 clockpro_pageenqueue(pg); 1100 #else 1101 uvmpdpol_pageactivate(pg); 1102 #endif 1103 } 1104 1105 void 1106 uvmpdpol_anfree(struct vm_anon *an) 1107 { 1108 1109 KASSERT(an->an_page == NULL); 1110 if (nonresident_lookupremove((objid_t)an, 0)) { 1111 PDPOL_EVCNT_INCR(nresanonfree); 1112 } 1113 } 1114 1115 void 1116 uvmpdpol_init(void) 1117 { 1118 1119 clockpro_init(); 1120 } 1121 1122 void 1123 uvmpdpol_reinit(void) 1124 { 1125 1126 clockpro_reinit(); 1127 } 1128 1129 void 1130 uvmpdpol_estimatepageable(int *active, int *inactive) 1131 { 1132 struct clockpro_state * const s = &clockpro; 1133 1134 if (active) { 1135 *active = s->s_npages - s->s_ncold; 1136 } 1137 if (inactive) { 1138 *inactive = s->s_ncold; 1139 } 1140 } 1141 1142 bool 1143 uvmpdpol_pageisqueued_p(struct vm_page *pg) 1144 { 1145 1146 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE; 1147 } 1148 1149 void 1150 uvmpdpol_scaninit(void) 1151 { 1152 struct clockpro_scanstate * const ss = &scanstate; 1153 1154 ss->ss_nscanned = 0; 1155 } 1156 1157 struct vm_page * 1158 uvmpdpol_selectvictim(void) 1159 { 1160 struct clockpro_state * const s = &clockpro; 1161 struct clockpro_scanstate * const ss = &scanstate; 1162 struct vm_page *pg; 1163 1164 if (ss->ss_nscanned > s->s_npages) { 1165 DPRINTF("scan too much\n"); 1166 return NULL; 1167 } 1168 pg = handcold_advance(); 1169 ss->ss_nscanned++; 1170 return pg; 1171 } 1172 1173 static void 1174 clockpro_dropswap(pageq_t *q, int *todo) 1175 { 1176 struct vm_page *pg; 1177 1178 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq) { 1179 if (*todo <= 0) { 1180 break; 1181 } 1182 if ((pg->pqflags & PQ_HOT) == 0) { 1183 continue; 1184 } 1185 if ((pg->pqflags & PQ_SWAPBACKED) == 0) { 1186 continue; 1187 } 1188 if (uvmpd_trydropswap(pg)) { 1189 (*todo)--; 1190 } 1191 } 1192 } 1193 1194 void 1195 uvmpdpol_balancequeue(int swap_shortage) 1196 { 1197 struct clockpro_state * const s = &clockpro; 1198 int todo = swap_shortage; 1199 1200 if (todo == 0) { 1201 return; 1202 } 1203 1204 /* 1205 * reclaim swap slots from hot pages 1206 */ 1207 1208 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage); 1209 1210 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo); 1211 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo); 1212 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo); 1213 1214 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo); 1215 } 1216 1217 bool 1218 uvmpdpol_needsscan_p(void) 1219 { 1220 struct clockpro_state * const s = &clockpro; 1221 1222 if (s->s_ncold < s->s_coldtarget) { 1223 return true; 1224 } 1225 return false; 1226 } 1227 1228 void 1229 uvmpdpol_tune(void) 1230 { 1231 1232 clockpro_tune(); 1233 } 1234 1235 #if !defined(PDSIM) 1236 1237 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */ 1238 1239 void 1240 uvmpdpol_sysctlsetup(void) 1241 { 1242 #if !defined(ADAPTIVE) 1243 struct clockpro_state * const s = &clockpro; 1244 1245 uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct", 1246 SYSCTL_DESCR("Percentage cold target queue of the entire queue")); 1247 #endif /* !defined(ADAPTIVE) */ 1248 } 1249 1250 #endif /* !defined(PDSIM) */ 1251 1252 #if defined(DDB) 1253 1254 void clockpro_dump(void); 1255 1256 void 1257 clockpro_dump(void) 1258 { 1259 struct clockpro_state * const s = &clockpro; 1260 1261 struct vm_page *pg; 1262 int ncold, nhot, ntest, nspeculative, ninitialref, nref; 1263 int newqlen, coldqlen, hotqlen, listqlen; 1264 1265 newqlen = coldqlen = hotqlen = listqlen = 0; 1266 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n", 1267 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax); 1268 1269 #define INITCOUNT() \ 1270 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0 1271 1272 #define COUNT(pg) \ 1273 if ((pg->pqflags & PQ_HOT) != 0) { \ 1274 nhot++; \ 1275 } else { \ 1276 ncold++; \ 1277 if ((pg->pqflags & PQ_TEST) != 0) { \ 1278 ntest++; \ 1279 } \ 1280 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \ 1281 nspeculative++; \ 1282 } \ 1283 if ((pg->pqflags & PQ_INITIALREF) != 0) { \ 1284 ninitialref++; \ 1285 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \ 1286 pmap_is_referenced(pg)) { \ 1287 nref++; \ 1288 } \ 1289 } 1290 1291 #define PRINTCOUNT(name) \ 1292 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \ 1293 "nref=%d\n", \ 1294 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref) 1295 1296 INITCOUNT(); 1297 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq) { 1298 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) { 1299 printf("newq corrupt %p\n", pg); 1300 } 1301 COUNT(pg) 1302 newqlen++; 1303 } 1304 PRINTCOUNT("newq"); 1305 1306 INITCOUNT(); 1307 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq) { 1308 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) { 1309 printf("coldq corrupt %p\n", pg); 1310 } 1311 COUNT(pg) 1312 coldqlen++; 1313 } 1314 PRINTCOUNT("coldq"); 1315 1316 INITCOUNT(); 1317 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq) { 1318 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) { 1319 printf("hotq corrupt %p\n", pg); 1320 } 1321 #if defined(LISTQ) 1322 if ((pg->pqflags & PQ_HOT) == 0) { 1323 printf("cold page in hotq: %p\n", pg); 1324 } 1325 #endif /* defined(LISTQ) */ 1326 COUNT(pg) 1327 hotqlen++; 1328 } 1329 PRINTCOUNT("hotq"); 1330 1331 INITCOUNT(); 1332 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq) { 1333 #if !defined(LISTQ) 1334 printf("listq %p\n"); 1335 #endif /* !defined(LISTQ) */ 1336 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) { 1337 printf("listq corrupt %p\n", pg); 1338 } 1339 COUNT(pg) 1340 listqlen++; 1341 } 1342 PRINTCOUNT("listq"); 1343 1344 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n", 1345 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)), 1346 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)), 1347 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)), 1348 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ))); 1349 } 1350 1351 #endif /* defined(DDB) */ 1352 1353 #if defined(PDSIM) 1354 #if defined(DEBUG) 1355 static void 1356 pdsim_dumpq(int qidx) 1357 { 1358 struct clockpro_state * const s = &clockpro; 1359 pageq_t *q = clockpro_queue(s, qidx); 1360 struct vm_page *pg; 1361 1362 TAILQ_FOREACH(pg, &q->q_q, pageq) { 1363 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s", 1364 pg->offset >> PAGE_SHIFT, 1365 (pg->pqflags & PQ_HOT) ? "H" : "", 1366 (pg->pqflags & PQ_TEST) ? "T" : "", 1367 (pg->pqflags & PQ_REFERENCED) ? "R" : "", 1368 pmap_is_referenced(pg) ? "r" : "", 1369 (pg->pqflags & PQ_INITIALREF) ? "I" : "", 1370 (pg->pqflags & PQ_SPECULATIVE) ? "S" : "" 1371 ); 1372 } 1373 } 1374 #endif /* defined(DEBUG) */ 1375 1376 void 1377 pdsim_dump(const char *id) 1378 { 1379 #if defined(DEBUG) 1380 struct clockpro_state * const s = &clockpro; 1381 1382 DPRINTF(" %s L(", id); 1383 pdsim_dumpq(CLOCKPRO_LISTQ); 1384 DPRINTF(" ) H("); 1385 pdsim_dumpq(CLOCKPRO_HOTQ); 1386 DPRINTF(" ) C("); 1387 pdsim_dumpq(CLOCKPRO_COLDQ); 1388 DPRINTF(" ) N("); 1389 pdsim_dumpq(CLOCKPRO_NEWQ); 1390 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n", 1391 s->s_ncold, s->s_coldtarget, coldadj); 1392 #endif /* defined(DEBUG) */ 1393 } 1394 #endif /* defined(PDSIM) */ 1395