1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.4 2006/10/12 10:14:43 yamt Exp $ */ 2 3 /*- 4 * Copyright (c)2005, 2006 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * CLOCK-Pro replacement policy: 31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html 32 * 33 * approximation of the list of non-resident pages using hash: 34 * http://linux-mm.org/ClockProApproximation 35 */ 36 37 /* #define CLOCKPRO_DEBUG */ 38 39 #if defined(PDSIM) 40 41 #include "pdsim.h" 42 43 #else /* defined(PDSIM) */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.4 2006/10/12 10:14:43 yamt Exp $"); 47 48 #include "opt_ddb.h" 49 50 #include <sys/param.h> 51 #include <sys/proc.h> 52 #include <sys/systm.h> 53 #include <sys/kernel.h> 54 #include <sys/hash.h> 55 56 #include <uvm/uvm.h> 57 #include <uvm/uvm_pdpolicy.h> 58 #include <uvm/uvm_pdpolicy_impl.h> 59 60 #if ((__STDC_VERSION__ - 0) >= 199901L) 61 #define DPRINTF(...) /* nothing */ 62 #define WARN(...) printf(__VA_ARGS__) 63 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */ 64 #define DPRINTF(a...) /* nothing */ /* GCC */ 65 #define WARN(a...) printf(a) 66 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */ 67 68 #define dump(a) /* nothing */ 69 70 #undef USEONCE2 71 #define LISTQ 72 #undef ADAPTIVE 73 74 #endif /* defined(PDSIM) */ 75 76 #if !defined(CLOCKPRO_COLDPCT) 77 #define CLOCKPRO_COLDPCT 10 78 #endif /* !defined(CLOCKPRO_COLDPCT) */ 79 80 #define CLOCKPRO_COLDPCTMAX 90 81 82 #if !defined(CLOCKPRO_HASHFACTOR) 83 #define CLOCKPRO_HASHFACTOR 2 84 #endif /* !defined(CLOCKPRO_HASHFACTOR) */ 85 86 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */ 87 88 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR; 89 90 PDPOL_EVCNT_DEFINE(nresrecordobj) 91 PDPOL_EVCNT_DEFINE(nresrecordanon) 92 PDPOL_EVCNT_DEFINE(nreslookup) 93 PDPOL_EVCNT_DEFINE(nresfoundobj) 94 PDPOL_EVCNT_DEFINE(nresfoundanon) 95 PDPOL_EVCNT_DEFINE(nresanonfree) 96 PDPOL_EVCNT_DEFINE(nresconflict) 97 PDPOL_EVCNT_DEFINE(nresoverwritten) 98 PDPOL_EVCNT_DEFINE(nreshandhot) 99 100 PDPOL_EVCNT_DEFINE(hhottakeover) 101 PDPOL_EVCNT_DEFINE(hhotref) 102 PDPOL_EVCNT_DEFINE(hhotunref) 103 PDPOL_EVCNT_DEFINE(hhotcold) 104 PDPOL_EVCNT_DEFINE(hhotcoldtest) 105 106 PDPOL_EVCNT_DEFINE(hcoldtakeover) 107 PDPOL_EVCNT_DEFINE(hcoldref) 108 PDPOL_EVCNT_DEFINE(hcoldunref) 109 PDPOL_EVCNT_DEFINE(hcoldreftest) 110 PDPOL_EVCNT_DEFINE(hcoldunreftest) 111 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative) 112 PDPOL_EVCNT_DEFINE(hcoldhot) 113 114 PDPOL_EVCNT_DEFINE(speculativeenqueue) 115 PDPOL_EVCNT_DEFINE(speculativehit1) 116 PDPOL_EVCNT_DEFINE(speculativehit2) 117 PDPOL_EVCNT_DEFINE(speculativemiss) 118 119 #define PQ_REFERENCED PQ_PRIVATE1 120 #define PQ_HOT PQ_PRIVATE2 121 #define PQ_TEST PQ_PRIVATE3 122 #define PQ_INITIALREF PQ_PRIVATE4 123 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2 124 #error PQ_PRIVATE 125 #endif 126 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7) 127 #define PQ_QFACTOR PQ_PRIVATE5 128 #define PQ_SPECULATIVE PQ_PRIVATE8 129 130 #define CLOCKPRO_NOQUEUE 0 131 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */ 132 #if defined(LISTQ) 133 #define CLOCKPRO_COLDQ 2 134 #define CLOCKPRO_HOTQ 3 135 #else /* defined(LISTQ) */ 136 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */ 137 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */ 138 #endif /* defined(LISTQ) */ 139 #define CLOCKPRO_LISTQ 4 140 #define CLOCKPRO_NQUEUE 4 141 142 static inline void 143 clockpro_setq(struct vm_page *pg, int qidx) 144 { 145 KASSERT(qidx >= CLOCKPRO_NOQUEUE); 146 KASSERT(qidx <= CLOCKPRO_NQUEUE); 147 148 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR); 149 } 150 151 static inline int 152 clockpro_getq(struct vm_page *pg) 153 { 154 int qidx; 155 156 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR; 157 KASSERT(qidx >= CLOCKPRO_NOQUEUE); 158 KASSERT(qidx <= CLOCKPRO_NQUEUE); 159 return qidx; 160 } 161 162 typedef struct { 163 struct pglist q_q; 164 int q_len; 165 } pageq_t; 166 167 struct clockpro_state { 168 int s_npages; 169 int s_coldtarget; 170 int s_ncold; 171 172 int s_newqlenmax; 173 pageq_t s_q[CLOCKPRO_NQUEUE]; 174 175 struct uvm_pctparam s_coldtargetpct; 176 }; 177 178 static pageq_t * 179 clockpro_queue(struct clockpro_state *s, int qidx) 180 { 181 182 KASSERT(CLOCKPRO_NOQUEUE < qidx); 183 KASSERT(qidx <= CLOCKPRO_NQUEUE); 184 185 return &s->s_q[qidx - 1]; 186 } 187 188 #if !defined(LISTQ) 189 190 static int coldqidx; 191 192 static void 193 clockpro_switchqueue(void) 194 { 195 196 coldqidx = 1 - coldqidx; 197 } 198 199 #endif /* !defined(LISTQ) */ 200 201 static struct clockpro_state clockpro; 202 static struct clockpro_scanstate { 203 int ss_nscanned; 204 } scanstate; 205 206 /* ---------------------------------------- */ 207 208 static void 209 pageq_init(pageq_t *q) 210 { 211 212 TAILQ_INIT(&q->q_q); 213 q->q_len = 0; 214 } 215 216 static int 217 pageq_len(const pageq_t *q) 218 { 219 220 return q->q_len; 221 } 222 223 static struct vm_page * 224 pageq_first(const pageq_t *q) 225 { 226 227 return TAILQ_FIRST(&q->q_q); 228 } 229 230 static void 231 pageq_insert_tail(pageq_t *q, struct vm_page *pg) 232 { 233 234 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq); 235 q->q_len++; 236 } 237 238 static void 239 pageq_insert_head(pageq_t *q, struct vm_page *pg) 240 { 241 242 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq); 243 q->q_len++; 244 } 245 246 static void 247 pageq_remove(pageq_t *q, struct vm_page *pg) 248 { 249 250 #if 1 251 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q); 252 #endif 253 KASSERT(q->q_len > 0); 254 TAILQ_REMOVE(&q->q_q, pg, pageq); 255 q->q_len--; 256 } 257 258 static struct vm_page * 259 pageq_remove_head(pageq_t *q) 260 { 261 struct vm_page *pg; 262 263 pg = TAILQ_FIRST(&q->q_q); 264 if (pg == NULL) { 265 KASSERT(q->q_len == 0); 266 return NULL; 267 } 268 pageq_remove(q, pg); 269 return pg; 270 } 271 272 /* ---------------------------------------- */ 273 274 static void 275 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg) 276 { 277 pageq_t *q = clockpro_queue(s, qidx); 278 279 clockpro_setq(pg, qidx); 280 pageq_insert_tail(q, pg); 281 } 282 283 static void __unused 284 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg) 285 { 286 pageq_t *q = clockpro_queue(s, qidx); 287 288 clockpro_setq(pg, qidx); 289 pageq_insert_head(q, pg); 290 } 291 292 /* ---------------------------------------- */ 293 294 typedef uint32_t nonres_cookie_t; 295 #define NONRES_COOKIE_INVAL 0 296 297 typedef uintptr_t objid_t; 298 299 /* 300 * XXX maybe these hash functions need reconsideration, 301 * given that hash distribution is critical here. 302 */ 303 304 static uint32_t 305 pageidentityhash1(objid_t obj, off_t idx) 306 { 307 uint32_t hash = HASH32_BUF_INIT; 308 309 #if 1 310 hash = hash32_buf(&idx, sizeof(idx), hash); 311 hash = hash32_buf(&obj, sizeof(obj), hash); 312 #else 313 hash = hash32_buf(&obj, sizeof(obj), hash); 314 hash = hash32_buf(&idx, sizeof(idx), hash); 315 #endif 316 return hash; 317 } 318 319 static uint32_t 320 pageidentityhash2(objid_t obj, off_t idx) 321 { 322 uint32_t hash = HASH32_BUF_INIT; 323 324 hash = hash32_buf(&obj, sizeof(obj), hash); 325 hash = hash32_buf(&idx, sizeof(idx), hash); 326 return hash; 327 } 328 329 static nonres_cookie_t 330 calccookie(objid_t obj, off_t idx) 331 { 332 uint32_t hash = pageidentityhash2(obj, idx); 333 nonres_cookie_t cookie = hash; 334 335 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) { 336 cookie++; /* XXX */ 337 } 338 return cookie; 339 } 340 341 #define BUCKETSIZE 14 342 struct bucket { 343 int cycle; 344 int cur; 345 nonres_cookie_t pages[BUCKETSIZE]; 346 }; 347 static int cycle_target; 348 static int cycle_target_frac; 349 350 static struct bucket static_bucket; 351 static struct bucket *buckets = &static_bucket; 352 static size_t hashsize = 1; 353 354 static int coldadj; 355 #define COLDTARGET_ADJ(d) coldadj += (d) 356 357 #if defined(PDSIM) 358 359 static void * 360 clockpro_hashalloc(int n) 361 { 362 size_t allocsz = sizeof(*buckets) * n; 363 364 return malloc(allocsz); 365 } 366 367 static void 368 clockpro_hashfree(void *p, int n) 369 { 370 371 free(p); 372 } 373 374 #else /* defined(PDSIM) */ 375 376 static void * 377 clockpro_hashalloc(int n) 378 { 379 size_t allocsz = round_page(sizeof(*buckets) * n); 380 381 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED); 382 } 383 384 static void 385 clockpro_hashfree(void *p, int n) 386 { 387 size_t allocsz = round_page(sizeof(*buckets) * n); 388 389 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED); 390 } 391 392 #endif /* defined(PDSIM) */ 393 394 static void 395 clockpro_hashinit(uint64_t n) 396 { 397 struct bucket *newbuckets; 398 struct bucket *oldbuckets; 399 size_t sz; 400 size_t oldsz; 401 int i; 402 403 sz = howmany(n, BUCKETSIZE); 404 sz *= clockpro_hashfactor; 405 newbuckets = clockpro_hashalloc(sz); 406 if (newbuckets == NULL) { 407 panic("%s: allocation failure", __func__); 408 } 409 for (i = 0; i < sz; i++) { 410 struct bucket *b = &newbuckets[i]; 411 int j; 412 413 b->cycle = cycle_target; 414 b->cur = 0; 415 for (j = 0; j < BUCKETSIZE; j++) { 416 b->pages[j] = NONRES_COOKIE_INVAL; 417 } 418 } 419 /* XXX lock */ 420 oldbuckets = buckets; 421 oldsz = hashsize; 422 buckets = newbuckets; 423 hashsize = sz; 424 /* XXX unlock */ 425 if (oldbuckets != &static_bucket) { 426 clockpro_hashfree(oldbuckets, oldsz); 427 } 428 } 429 430 static struct bucket * 431 nonresident_getbucket(objid_t obj, off_t idx) 432 { 433 uint32_t hash; 434 435 hash = pageidentityhash1(obj, idx); 436 return &buckets[hash % hashsize]; 437 } 438 439 static void 440 nonresident_rotate(struct bucket *b) 441 { 442 443 while (b->cycle - cycle_target < 0) { 444 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) { 445 PDPOL_EVCNT_INCR(nreshandhot); 446 COLDTARGET_ADJ(-1); 447 } 448 b->pages[b->cur] = NONRES_COOKIE_INVAL; 449 b->cur = (b->cur + 1) % BUCKETSIZE; 450 b->cycle++; 451 } 452 } 453 454 static boolean_t 455 nonresident_lookupremove(objid_t obj, off_t idx) 456 { 457 struct bucket *b = nonresident_getbucket(obj, idx); 458 nonres_cookie_t cookie = calccookie(obj, idx); 459 int i; 460 461 nonresident_rotate(b); 462 for (i = 0; i < BUCKETSIZE; i++) { 463 if (b->pages[i] == cookie) { 464 b->pages[i] = NONRES_COOKIE_INVAL; 465 return TRUE; 466 } 467 } 468 return FALSE; 469 } 470 471 static objid_t 472 pageobj(struct vm_page *pg) 473 { 474 const void *obj; 475 476 /* 477 * XXX object pointer is often freed and reused for unrelated object. 478 * for vnodes, it would be better to use something like 479 * a hash of fsid/fileid/generation. 480 */ 481 482 obj = pg->uobject; 483 if (obj == NULL) { 484 obj = pg->uanon; 485 KASSERT(obj != NULL); 486 KASSERT(pg->offset == 0); 487 } 488 489 return (objid_t)obj; 490 } 491 492 static off_t 493 pageidx(struct vm_page *pg) 494 { 495 496 KASSERT((pg->offset & PAGE_MASK) == 0); 497 return pg->offset >> PAGE_SHIFT; 498 } 499 500 static boolean_t 501 nonresident_pagelookupremove(struct vm_page *pg) 502 { 503 boolean_t found = nonresident_lookupremove(pageobj(pg), pageidx(pg)); 504 505 PDPOL_EVCNT_INCR(nreslookup); 506 if (found) { 507 if (pg->uobject) { 508 PDPOL_EVCNT_INCR(nresfoundobj); 509 } else { 510 PDPOL_EVCNT_INCR(nresfoundanon); 511 } 512 } 513 return found; 514 } 515 516 static void 517 nonresident_pagerecord(struct vm_page *pg) 518 { 519 objid_t obj = pageobj(pg); 520 off_t idx = pageidx(pg); 521 struct bucket *b = nonresident_getbucket(obj, idx); 522 nonres_cookie_t cookie = calccookie(obj, idx); 523 524 #if defined(DEBUG) 525 int i; 526 527 for (i = 0; i < BUCKETSIZE; i++) { 528 if (b->pages[i] == cookie) { 529 PDPOL_EVCNT_INCR(nresconflict); 530 } 531 } 532 #endif /* defined(DEBUG) */ 533 534 if (pg->uobject) { 535 PDPOL_EVCNT_INCR(nresrecordobj); 536 } else { 537 PDPOL_EVCNT_INCR(nresrecordanon); 538 } 539 nonresident_rotate(b); 540 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) { 541 PDPOL_EVCNT_INCR(nresoverwritten); 542 COLDTARGET_ADJ(-1); 543 } 544 b->pages[b->cur] = cookie; 545 b->cur = (b->cur + 1) % BUCKETSIZE; 546 } 547 548 /* ---------------------------------------- */ 549 550 #if defined(CLOCKPRO_DEBUG) 551 static void 552 check_sanity(void) 553 { 554 } 555 #else /* defined(CLOCKPRO_DEBUG) */ 556 #define check_sanity() /* nothing */ 557 #endif /* defined(CLOCKPRO_DEBUG) */ 558 559 static void 560 clockpro_reinit(void) 561 { 562 563 clockpro_hashinit(uvmexp.npages); 564 } 565 566 static void 567 clockpro_init(void) 568 { 569 struct clockpro_state *s = &clockpro; 570 int i; 571 572 for (i = 0; i < CLOCKPRO_NQUEUE; i++) { 573 pageq_init(&s->s_q[i]); 574 } 575 s->s_newqlenmax = 1; 576 s->s_coldtarget = 1; 577 uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL); 578 } 579 580 static void 581 clockpro_tune(void) 582 { 583 struct clockpro_state *s = &clockpro; 584 int coldtarget; 585 586 #if defined(ADAPTIVE) 587 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100; 588 int coldmin = 1; 589 590 coldtarget = s->s_coldtarget; 591 if (coldtarget + coldadj < coldmin) { 592 coldadj = coldmin - coldtarget; 593 } else if (coldtarget + coldadj > coldmax) { 594 coldadj = coldmax - coldtarget; 595 } 596 coldtarget += coldadj; 597 #else /* defined(ADAPTIVE) */ 598 coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages); 599 if (coldtarget < 1) { 600 coldtarget = 1; 601 } 602 #endif /* defined(ADAPTIVE) */ 603 604 s->s_coldtarget = coldtarget; 605 s->s_newqlenmax = coldtarget / 4; 606 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) { 607 s->s_newqlenmax = CLOCKPRO_NEWQMIN; 608 } 609 } 610 611 static void 612 clockpro_movereferencebit(struct vm_page *pg) 613 { 614 boolean_t referenced; 615 616 referenced = pmap_clear_reference(pg); 617 if (referenced) { 618 pg->pqflags |= PQ_REFERENCED; 619 } 620 } 621 622 static void 623 clockpro_clearreferencebit(struct vm_page *pg) 624 { 625 626 clockpro_movereferencebit(pg); 627 pg->pqflags &= ~PQ_REFERENCED; 628 } 629 630 static void 631 clockpro___newqrotate(int len) 632 { 633 struct clockpro_state * const s = &clockpro; 634 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ); 635 struct vm_page *pg; 636 637 while (pageq_len(newq) > len) { 638 pg = pageq_remove_head(newq); 639 KASSERT(pg != NULL); 640 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ); 641 if ((pg->pqflags & PQ_INITIALREF) != 0) { 642 clockpro_clearreferencebit(pg); 643 pg->pqflags &= ~PQ_INITIALREF; 644 } 645 /* place at the list head */ 646 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg); 647 } 648 } 649 650 static void 651 clockpro_newqrotate(void) 652 { 653 struct clockpro_state * const s = &clockpro; 654 655 check_sanity(); 656 clockpro___newqrotate(s->s_newqlenmax); 657 check_sanity(); 658 } 659 660 static void 661 clockpro_newqflush(int n) 662 { 663 664 check_sanity(); 665 clockpro___newqrotate(n); 666 check_sanity(); 667 } 668 669 static void 670 clockpro_newqflushone(void) 671 { 672 struct clockpro_state * const s = &clockpro; 673 674 clockpro_newqflush( 675 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0)); 676 } 677 678 /* 679 * our "tail" is called "list-head" in the paper. 680 */ 681 682 static void 683 clockpro___enqueuetail(struct vm_page *pg) 684 { 685 struct clockpro_state * const s = &clockpro; 686 687 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE); 688 689 check_sanity(); 690 #if !defined(USEONCE2) 691 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg); 692 clockpro_newqrotate(); 693 #else /* !defined(USEONCE2) */ 694 #if defined(LISTQ) 695 KASSERT((pg->pqflags & PQ_REFERENCED) == 0); 696 #endif /* defined(LISTQ) */ 697 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg); 698 #endif /* !defined(USEONCE2) */ 699 check_sanity(); 700 } 701 702 static void 703 clockpro_pageenqueue(struct vm_page *pg) 704 { 705 struct clockpro_state * const s = &clockpro; 706 boolean_t hot; 707 boolean_t speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */ 708 709 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0); 710 UVM_LOCK_ASSERT_PAGEQ(); 711 check_sanity(); 712 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE); 713 s->s_npages++; 714 pg->pqflags &= ~(PQ_HOT|PQ_TEST); 715 if (speculative) { 716 hot = FALSE; 717 PDPOL_EVCNT_INCR(speculativeenqueue); 718 } else { 719 hot = nonresident_pagelookupremove(pg); 720 if (hot) { 721 COLDTARGET_ADJ(1); 722 } 723 } 724 725 /* 726 * consider mmap'ed file: 727 * 728 * - read-ahead enqueues a page. 729 * 730 * - on the following read-ahead hit, the fault handler activates it. 731 * 732 * - finally, the userland code which caused the above fault 733 * actually accesses the page. it makes its reference bit set. 734 * 735 * we want to count the above as a single access, rather than 736 * three accesses with short reuse distances. 737 */ 738 739 #if defined(USEONCE2) 740 pg->pqflags &= ~PQ_INITIALREF; 741 if (hot) { 742 pg->pqflags |= PQ_TEST; 743 } 744 s->s_ncold++; 745 clockpro_clearreferencebit(pg); 746 clockpro___enqueuetail(pg); 747 #else /* defined(USEONCE2) */ 748 if (speculative) { 749 s->s_ncold++; 750 } else if (hot) { 751 pg->pqflags |= PQ_HOT; 752 } else { 753 pg->pqflags |= PQ_TEST; 754 s->s_ncold++; 755 } 756 clockpro___enqueuetail(pg); 757 #endif /* defined(USEONCE2) */ 758 KASSERT(s->s_ncold <= s->s_npages); 759 } 760 761 static pageq_t * 762 clockpro_pagequeue(struct vm_page *pg) 763 { 764 struct clockpro_state * const s = &clockpro; 765 int qidx; 766 767 qidx = clockpro_getq(pg); 768 KASSERT(qidx != CLOCKPRO_NOQUEUE); 769 770 return clockpro_queue(s, qidx); 771 } 772 773 static void 774 clockpro_pagedequeue(struct vm_page *pg) 775 { 776 struct clockpro_state * const s = &clockpro; 777 pageq_t *q; 778 779 KASSERT(s->s_npages > 0); 780 check_sanity(); 781 q = clockpro_pagequeue(pg); 782 pageq_remove(q, pg); 783 check_sanity(); 784 clockpro_setq(pg, CLOCKPRO_NOQUEUE); 785 if ((pg->pqflags & PQ_HOT) == 0) { 786 KASSERT(s->s_ncold > 0); 787 s->s_ncold--; 788 } 789 KASSERT(s->s_npages > 0); 790 s->s_npages--; 791 check_sanity(); 792 } 793 794 static void 795 clockpro_pagerequeue(struct vm_page *pg) 796 { 797 struct clockpro_state * const s = &clockpro; 798 int qidx; 799 800 qidx = clockpro_getq(pg); 801 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ); 802 pageq_remove(clockpro_queue(s, qidx), pg); 803 check_sanity(); 804 clockpro_setq(pg, CLOCKPRO_NOQUEUE); 805 806 clockpro___enqueuetail(pg); 807 } 808 809 static void 810 handhot_endtest(struct vm_page *pg) 811 { 812 813 KASSERT((pg->pqflags & PQ_HOT) == 0); 814 if ((pg->pqflags & PQ_TEST) != 0) { 815 PDPOL_EVCNT_INCR(hhotcoldtest); 816 COLDTARGET_ADJ(-1); 817 pg->pqflags &= ~PQ_TEST; 818 } else { 819 PDPOL_EVCNT_INCR(hhotcold); 820 } 821 } 822 823 static void 824 handhot_advance(void) 825 { 826 struct clockpro_state * const s = &clockpro; 827 struct vm_page *pg; 828 pageq_t *hotq; 829 int hotqlen; 830 831 clockpro_tune(); 832 833 dump("hot called"); 834 if (s->s_ncold >= s->s_coldtarget) { 835 return; 836 } 837 hotq = clockpro_queue(s, CLOCKPRO_HOTQ); 838 again: 839 pg = pageq_first(hotq); 840 if (pg == NULL) { 841 DPRINTF("%s: HHOT TAKEOVER\n", __func__); 842 dump("hhottakeover"); 843 PDPOL_EVCNT_INCR(hhottakeover); 844 #if defined(LISTQ) 845 while (/* CONSTCOND */ 1) { 846 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ); 847 848 pg = pageq_first(coldq); 849 if (pg == NULL) { 850 clockpro_newqflushone(); 851 pg = pageq_first(coldq); 852 if (pg == NULL) { 853 WARN("hhot: no page?\n"); 854 return; 855 } 856 } 857 KASSERT(clockpro_pagequeue(pg) == coldq); 858 pageq_remove(coldq, pg); 859 check_sanity(); 860 if ((pg->pqflags & PQ_HOT) == 0) { 861 handhot_endtest(pg); 862 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg); 863 } else { 864 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg); 865 break; 866 } 867 } 868 #else /* defined(LISTQ) */ 869 clockpro_newqflush(0); /* XXX XXX */ 870 clockpro_switchqueue(); 871 hotq = clockpro_queue(s, CLOCKPRO_HOTQ); 872 goto again; 873 #endif /* defined(LISTQ) */ 874 } 875 876 KASSERT(clockpro_pagequeue(pg) == hotq); 877 878 /* 879 * terminate test period of nonresident pages by cycling them. 880 */ 881 882 cycle_target_frac += BUCKETSIZE; 883 hotqlen = pageq_len(hotq); 884 while (cycle_target_frac >= hotqlen) { 885 cycle_target++; 886 cycle_target_frac -= hotqlen; 887 } 888 889 if ((pg->pqflags & PQ_HOT) == 0) { 890 #if defined(LISTQ) 891 panic("cold page in hotq: %p", pg); 892 #else /* defined(LISTQ) */ 893 handhot_endtest(pg); 894 goto next; 895 #endif /* defined(LISTQ) */ 896 } 897 KASSERT((pg->pqflags & PQ_TEST) == 0); 898 KASSERT((pg->pqflags & PQ_INITIALREF) == 0); 899 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0); 900 901 /* 902 * once we met our target, 903 * stop at a hot page so that no cold pages in test period 904 * have larger recency than any hot pages. 905 */ 906 907 if (s->s_ncold >= s->s_coldtarget) { 908 dump("hot done"); 909 return; 910 } 911 clockpro_movereferencebit(pg); 912 if ((pg->pqflags & PQ_REFERENCED) == 0) { 913 PDPOL_EVCNT_INCR(hhotunref); 914 uvmexp.pddeact++; 915 pg->pqflags &= ~PQ_HOT; 916 clockpro.s_ncold++; 917 KASSERT(s->s_ncold <= s->s_npages); 918 } else { 919 PDPOL_EVCNT_INCR(hhotref); 920 } 921 pg->pqflags &= ~PQ_REFERENCED; 922 #if !defined(LISTQ) 923 next: 924 #endif /* !defined(LISTQ) */ 925 clockpro_pagerequeue(pg); 926 dump("hot"); 927 goto again; 928 } 929 930 static struct vm_page * 931 handcold_advance(void) 932 { 933 struct clockpro_state * const s = &clockpro; 934 struct vm_page *pg; 935 936 for (;;) { 937 #if defined(LISTQ) 938 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ); 939 #endif /* defined(LISTQ) */ 940 pageq_t *coldq; 941 942 clockpro_newqrotate(); 943 handhot_advance(); 944 #if defined(LISTQ) 945 pg = pageq_first(listq); 946 if (pg != NULL) { 947 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ); 948 KASSERT((pg->pqflags & PQ_TEST) == 0); 949 KASSERT((pg->pqflags & PQ_HOT) == 0); 950 KASSERT((pg->pqflags & PQ_INITIALREF) == 0); 951 pageq_remove(listq, pg); 952 check_sanity(); 953 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */ 954 goto gotcold; 955 } 956 #endif /* defined(LISTQ) */ 957 check_sanity(); 958 coldq = clockpro_queue(s, CLOCKPRO_COLDQ); 959 pg = pageq_first(coldq); 960 if (pg == NULL) { 961 clockpro_newqflushone(); 962 pg = pageq_first(coldq); 963 } 964 if (pg == NULL) { 965 DPRINTF("%s: HCOLD TAKEOVER\n", __func__); 966 dump("hcoldtakeover"); 967 PDPOL_EVCNT_INCR(hcoldtakeover); 968 KASSERT( 969 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0); 970 #if defined(LISTQ) 971 KASSERT( 972 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0); 973 #else /* defined(LISTQ) */ 974 clockpro_switchqueue(); 975 coldq = clockpro_queue(s, CLOCKPRO_COLDQ); 976 pg = pageq_first(coldq); 977 #endif /* defined(LISTQ) */ 978 } 979 if (pg == NULL) { 980 WARN("hcold: no page?\n"); 981 return NULL; 982 } 983 KASSERT((pg->pqflags & PQ_INITIALREF) == 0); 984 if ((pg->pqflags & PQ_HOT) != 0) { 985 PDPOL_EVCNT_INCR(hcoldhot); 986 pageq_remove(coldq, pg); 987 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg); 988 check_sanity(); 989 KASSERT((pg->pqflags & PQ_TEST) == 0); 990 uvmexp.pdscans++; 991 continue; 992 } 993 #if defined(LISTQ) 994 gotcold: 995 #endif /* defined(LISTQ) */ 996 KASSERT((pg->pqflags & PQ_HOT) == 0); 997 uvmexp.pdscans++; 998 clockpro_movereferencebit(pg); 999 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { 1000 KASSERT((pg->pqflags & PQ_TEST) == 0); 1001 if ((pg->pqflags & PQ_REFERENCED) != 0) { 1002 PDPOL_EVCNT_INCR(speculativehit2); 1003 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED); 1004 clockpro_pagedequeue(pg); 1005 clockpro_pageenqueue(pg); 1006 continue; 1007 } 1008 PDPOL_EVCNT_INCR(speculativemiss); 1009 } 1010 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) { 1011 case PQ_TEST: 1012 PDPOL_EVCNT_INCR(hcoldunreftest); 1013 nonresident_pagerecord(pg); 1014 goto gotit; 1015 case 0: 1016 PDPOL_EVCNT_INCR(hcoldunref); 1017 gotit: 1018 KASSERT(s->s_ncold > 0); 1019 clockpro_pagerequeue(pg); /* XXX */ 1020 dump("cold done"); 1021 /* XXX "pg" is still in queue */ 1022 handhot_advance(); 1023 goto done; 1024 1025 case PQ_REFERENCED|PQ_TEST: 1026 PDPOL_EVCNT_INCR(hcoldreftest); 1027 s->s_ncold--; 1028 COLDTARGET_ADJ(1); 1029 pg->pqflags |= PQ_HOT; 1030 pg->pqflags &= ~PQ_TEST; 1031 break; 1032 1033 case PQ_REFERENCED: 1034 PDPOL_EVCNT_INCR(hcoldref); 1035 pg->pqflags |= PQ_TEST; 1036 break; 1037 } 1038 pg->pqflags &= ~PQ_REFERENCED; 1039 uvmexp.pdreact++; 1040 /* move to the list head */ 1041 clockpro_pagerequeue(pg); 1042 dump("cold"); 1043 } 1044 done:; 1045 return pg; 1046 } 1047 1048 void 1049 uvmpdpol_pageactivate(struct vm_page *pg) 1050 { 1051 1052 if (!uvmpdpol_pageisqueued_p(pg)) { 1053 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0); 1054 pg->pqflags |= PQ_INITIALREF; 1055 clockpro_pageenqueue(pg); 1056 } else if ((pg->pqflags & PQ_SPECULATIVE)) { 1057 PDPOL_EVCNT_INCR(speculativehit1); 1058 pg->pqflags &= ~PQ_SPECULATIVE; 1059 pg->pqflags |= PQ_INITIALREF; 1060 clockpro_pagedequeue(pg); 1061 clockpro_pageenqueue(pg); 1062 } 1063 pg->pqflags |= PQ_REFERENCED; 1064 } 1065 1066 void 1067 uvmpdpol_pagedeactivate(struct vm_page *pg) 1068 { 1069 1070 pg->pqflags &= ~PQ_REFERENCED; 1071 } 1072 1073 void 1074 uvmpdpol_pagedequeue(struct vm_page *pg) 1075 { 1076 1077 if (!uvmpdpol_pageisqueued_p(pg)) { 1078 return; 1079 } 1080 clockpro_pagedequeue(pg); 1081 pg->pqflags &= ~PQ_SPECULATIVE; 1082 } 1083 1084 void 1085 uvmpdpol_pageenqueue(struct vm_page *pg) 1086 { 1087 1088 #if 1 1089 if (uvmpdpol_pageisqueued_p(pg)) { 1090 return; 1091 } 1092 clockpro_clearreferencebit(pg); 1093 pg->pqflags |= PQ_SPECULATIVE; 1094 clockpro_pageenqueue(pg); 1095 #else 1096 uvmpdpol_pageactivate(pg); 1097 #endif 1098 } 1099 1100 void 1101 uvmpdpol_anfree(struct vm_anon *an) 1102 { 1103 1104 KASSERT(an->an_page == NULL); 1105 if (nonresident_lookupremove((objid_t)an, 0)) { 1106 PDPOL_EVCNT_INCR(nresanonfree); 1107 } 1108 } 1109 1110 void 1111 uvmpdpol_init(void) 1112 { 1113 1114 clockpro_init(); 1115 } 1116 1117 void 1118 uvmpdpol_reinit(void) 1119 { 1120 1121 clockpro_reinit(); 1122 } 1123 1124 void 1125 uvmpdpol_estimatepageable(int *active, int *inactive) 1126 { 1127 struct clockpro_state * const s = &clockpro; 1128 1129 if (active) { 1130 *active = s->s_npages - s->s_ncold; 1131 } 1132 if (inactive) { 1133 *inactive = s->s_ncold; 1134 } 1135 } 1136 1137 boolean_t 1138 uvmpdpol_pageisqueued_p(struct vm_page *pg) 1139 { 1140 1141 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE; 1142 } 1143 1144 void 1145 uvmpdpol_scaninit(void) 1146 { 1147 struct clockpro_scanstate * const ss = &scanstate; 1148 1149 ss->ss_nscanned = 0; 1150 } 1151 1152 struct vm_page * 1153 uvmpdpol_selectvictim(void) 1154 { 1155 struct clockpro_state * const s = &clockpro; 1156 struct clockpro_scanstate * const ss = &scanstate; 1157 struct vm_page *pg; 1158 1159 if (ss->ss_nscanned > s->s_npages) { 1160 DPRINTF("scan too much\n"); 1161 return NULL; 1162 } 1163 pg = handcold_advance(); 1164 ss->ss_nscanned++; 1165 return pg; 1166 } 1167 1168 static void 1169 clockpro_dropswap(pageq_t *q, int *todo) 1170 { 1171 struct vm_page *pg; 1172 1173 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq) { 1174 if (*todo <= 0) { 1175 break; 1176 } 1177 if ((pg->pqflags & PQ_HOT) == 0) { 1178 continue; 1179 } 1180 if ((pg->pqflags & PQ_SWAPBACKED) == 0) { 1181 continue; 1182 } 1183 if (uvmpd_trydropswap(pg)) { 1184 (*todo)--; 1185 } 1186 } 1187 } 1188 1189 void 1190 uvmpdpol_balancequeue(int swap_shortage) 1191 { 1192 struct clockpro_state * const s = &clockpro; 1193 int todo = swap_shortage; 1194 1195 if (todo == 0) { 1196 return; 1197 } 1198 1199 /* 1200 * reclaim swap slots from hot pages 1201 */ 1202 1203 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage); 1204 1205 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo); 1206 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo); 1207 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo); 1208 1209 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo); 1210 } 1211 1212 boolean_t 1213 uvmpdpol_needsscan_p(void) 1214 { 1215 struct clockpro_state * const s = &clockpro; 1216 1217 if (s->s_ncold < s->s_coldtarget) { 1218 return TRUE; 1219 } 1220 return FALSE; 1221 } 1222 1223 void 1224 uvmpdpol_tune(void) 1225 { 1226 1227 clockpro_tune(); 1228 } 1229 1230 #if !defined(PDSIM) 1231 1232 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */ 1233 1234 void 1235 uvmpdpol_sysctlsetup(void) 1236 { 1237 #if !defined(ADAPTIVE) 1238 struct clockpro_state * const s = &clockpro; 1239 1240 uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct", 1241 SYSCTL_DESCR("Percentage cold target queue of the entire queue")); 1242 #endif /* !defined(ADAPTIVE) */ 1243 } 1244 1245 #endif /* !defined(PDSIM) */ 1246 1247 #if defined(DDB) 1248 1249 void clockpro_dump(void); 1250 1251 void 1252 clockpro_dump(void) 1253 { 1254 struct clockpro_state * const s = &clockpro; 1255 1256 struct vm_page *pg; 1257 int ncold, nhot, ntest, nspeculative, ninitialref, nref; 1258 int newqlen, coldqlen, hotqlen, listqlen; 1259 1260 newqlen = coldqlen = hotqlen = listqlen = 0; 1261 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n", 1262 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax); 1263 1264 #define INITCOUNT() \ 1265 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0 1266 1267 #define COUNT(pg) \ 1268 if ((pg->pqflags & PQ_HOT) != 0) { \ 1269 nhot++; \ 1270 } else { \ 1271 ncold++; \ 1272 if ((pg->pqflags & PQ_TEST) != 0) { \ 1273 ntest++; \ 1274 } \ 1275 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \ 1276 nspeculative++; \ 1277 } \ 1278 if ((pg->pqflags & PQ_INITIALREF) != 0) { \ 1279 ninitialref++; \ 1280 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \ 1281 pmap_is_referenced(pg)) { \ 1282 nref++; \ 1283 } \ 1284 } 1285 1286 #define PRINTCOUNT(name) \ 1287 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \ 1288 "nref=%d\n", \ 1289 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref) 1290 1291 INITCOUNT(); 1292 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq) { 1293 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) { 1294 printf("newq corrupt %p\n", pg); 1295 } 1296 COUNT(pg) 1297 newqlen++; 1298 } 1299 PRINTCOUNT("newq"); 1300 1301 INITCOUNT(); 1302 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq) { 1303 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) { 1304 printf("coldq corrupt %p\n", pg); 1305 } 1306 COUNT(pg) 1307 coldqlen++; 1308 } 1309 PRINTCOUNT("coldq"); 1310 1311 INITCOUNT(); 1312 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq) { 1313 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) { 1314 printf("hotq corrupt %p\n", pg); 1315 } 1316 #if defined(LISTQ) 1317 if ((pg->pqflags & PQ_HOT) == 0) { 1318 printf("cold page in hotq: %p\n", pg); 1319 } 1320 #endif /* defined(LISTQ) */ 1321 COUNT(pg) 1322 hotqlen++; 1323 } 1324 PRINTCOUNT("hotq"); 1325 1326 INITCOUNT(); 1327 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq) { 1328 #if !defined(LISTQ) 1329 printf("listq %p\n"); 1330 #endif /* !defined(LISTQ) */ 1331 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) { 1332 printf("listq corrupt %p\n", pg); 1333 } 1334 COUNT(pg) 1335 listqlen++; 1336 } 1337 PRINTCOUNT("listq"); 1338 1339 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n", 1340 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)), 1341 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)), 1342 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)), 1343 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ))); 1344 } 1345 1346 #endif /* defined(DDB) */ 1347 1348 #if defined(PDSIM) 1349 #if defined(DEBUG) 1350 static void 1351 pdsim_dumpq(int qidx) 1352 { 1353 struct clockpro_state * const s = &clockpro; 1354 pageq_t *q = clockpro_queue(s, qidx); 1355 struct vm_page *pg; 1356 1357 TAILQ_FOREACH(pg, &q->q_q, pageq) { 1358 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s", 1359 pg->offset >> PAGE_SHIFT, 1360 (pg->pqflags & PQ_HOT) ? "H" : "", 1361 (pg->pqflags & PQ_TEST) ? "T" : "", 1362 (pg->pqflags & PQ_REFERENCED) ? "R" : "", 1363 pmap_is_referenced(pg) ? "r" : "", 1364 (pg->pqflags & PQ_INITIALREF) ? "I" : "", 1365 (pg->pqflags & PQ_SPECULATIVE) ? "S" : "" 1366 ); 1367 } 1368 } 1369 #endif /* defined(DEBUG) */ 1370 1371 void 1372 pdsim_dump(const char *id) 1373 { 1374 #if defined(DEBUG) 1375 struct clockpro_state * const s = &clockpro; 1376 1377 DPRINTF(" %s L(", id); 1378 pdsim_dumpq(CLOCKPRO_LISTQ); 1379 DPRINTF(" ) H("); 1380 pdsim_dumpq(CLOCKPRO_HOTQ); 1381 DPRINTF(" ) C("); 1382 pdsim_dumpq(CLOCKPRO_COLDQ); 1383 DPRINTF(" ) N("); 1384 pdsim_dumpq(CLOCKPRO_NEWQ); 1385 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n", 1386 s->s_ncold, s->s_coldtarget, coldadj); 1387 #endif /* defined(DEBUG) */ 1388 } 1389 #endif /* defined(PDSIM) */ 1390