1 /* $NetBSD: uvm_pdpolicy_clock.c,v 1.39 2020/06/11 22:21:05 ad Exp $ */ 2 /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */ 3 4 /*- 5 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1997 Charles D. Cranor and Washington University. 35 * Copyright (c) 1991, 1993, The Regents of the University of California. 36 * 37 * All rights reserved. 38 * 39 * This code is derived from software contributed to Berkeley by 40 * The Mach Operating System project at Carnegie-Mellon University. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 67 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp 68 * 69 * 70 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 71 * All rights reserved. 72 * 73 * Permission to use, copy, modify and distribute this software and 74 * its documentation is hereby granted, provided that both the copyright 75 * notice and this permission notice appear in all copies of the 76 * software, derivative works or modified versions, and any portions 77 * thereof, and that both notices appear in supporting documentation. 78 * 79 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 80 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 81 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 82 * 83 * Carnegie Mellon requests users of this software to return to 84 * 85 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 86 * School of Computer Science 87 * Carnegie Mellon University 88 * Pittsburgh PA 15213-3890 89 * 90 * any improvements or extensions that they make and grant Carnegie the 91 * rights to redistribute these changes. 92 */ 93 94 #if defined(PDSIM) 95 96 #include "pdsim.h" 97 98 #else /* defined(PDSIM) */ 99 100 #include <sys/cdefs.h> 101 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.39 2020/06/11 22:21:05 ad Exp $"); 102 103 #include <sys/param.h> 104 #include <sys/proc.h> 105 #include <sys/systm.h> 106 #include <sys/kernel.h> 107 #include <sys/kmem.h> 108 #include <sys/atomic.h> 109 110 #include <uvm/uvm.h> 111 #include <uvm/uvm_pdpolicy.h> 112 #include <uvm/uvm_pdpolicy_impl.h> 113 #include <uvm/uvm_stat.h> 114 115 #endif /* defined(PDSIM) */ 116 117 /* 118 * per-CPU queue of pending page status changes. 128 entries makes for a 119 * 1kB queue on _LP64 and has been found to be a reasonable compromise that 120 * keeps lock contention events and wait times low, while not using too much 121 * memory nor allowing global state to fall too far behind. 122 */ 123 #if !defined(CLOCK_PDQ_SIZE) 124 #define CLOCK_PDQ_SIZE 128 125 #endif /* !defined(CLOCK_PDQ_SIZE) */ 126 127 #define PQ_INACTIVE 0x00000010 /* page is in inactive list */ 128 #define PQ_ACTIVE 0x00000020 /* page is in active list */ 129 130 #if !defined(CLOCK_INACTIVEPCT) 131 #define CLOCK_INACTIVEPCT 33 132 #endif /* !defined(CLOCK_INACTIVEPCT) */ 133 134 struct uvmpdpol_globalstate { 135 kmutex_t lock; /* lock on state */ 136 /* <= compiler pads here */ 137 struct pglist s_activeq /* allocated pages, in use */ 138 __aligned(COHERENCY_UNIT); 139 struct pglist s_inactiveq; /* pages between the clock hands */ 140 int s_active; 141 int s_inactive; 142 int s_inactarg; 143 struct uvm_pctparam s_anonmin; 144 struct uvm_pctparam s_filemin; 145 struct uvm_pctparam s_execmin; 146 struct uvm_pctparam s_anonmax; 147 struct uvm_pctparam s_filemax; 148 struct uvm_pctparam s_execmax; 149 struct uvm_pctparam s_inactivepct; 150 }; 151 152 struct uvmpdpol_scanstate { 153 bool ss_anonreact, ss_filereact, ss_execreact; 154 struct vm_page ss_marker; 155 }; 156 157 static void uvmpdpol_pageactivate_locked(struct vm_page *); 158 static void uvmpdpol_pagedeactivate_locked(struct vm_page *); 159 static void uvmpdpol_pagedequeue_locked(struct vm_page *); 160 static bool uvmpdpol_pagerealize_locked(struct vm_page *); 161 static struct uvm_cpu *uvmpdpol_flush(void); 162 163 static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned; 164 static struct uvmpdpol_scanstate pdpol_scanstate; 165 166 PDPOL_EVCNT_DEFINE(reactexec) 167 PDPOL_EVCNT_DEFINE(reactfile) 168 PDPOL_EVCNT_DEFINE(reactanon) 169 170 static void 171 clock_tune(void) 172 { 173 struct uvmpdpol_globalstate *s = &pdpol_state; 174 175 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct, 176 s->s_active + s->s_inactive); 177 if (s->s_inactarg <= uvmexp.freetarg) { 178 s->s_inactarg = uvmexp.freetarg + 1; 179 } 180 } 181 182 void 183 uvmpdpol_scaninit(void) 184 { 185 struct uvmpdpol_globalstate *s = &pdpol_state; 186 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 187 int t; 188 bool anonunder, fileunder, execunder; 189 bool anonover, fileover, execover; 190 bool anonreact, filereact, execreact; 191 int64_t freepg, anonpg, filepg, execpg; 192 193 /* 194 * decide which types of pages we want to reactivate instead of freeing 195 * to keep usage within the minimum and maximum usage limits. 196 * uvm_availmem() will sync the counters. 197 */ 198 199 freepg = uvm_availmem(false); 200 anonpg = cpu_count_get(CPU_COUNT_ANONCLEAN) + 201 cpu_count_get(CPU_COUNT_ANONDIRTY) + 202 cpu_count_get(CPU_COUNT_ANONUNKNOWN); 203 execpg = cpu_count_get(CPU_COUNT_EXECPAGES); 204 filepg = cpu_count_get(CPU_COUNT_FILECLEAN) + 205 cpu_count_get(CPU_COUNT_FILEDIRTY) + 206 cpu_count_get(CPU_COUNT_FILEUNKNOWN) - 207 execpg; 208 209 mutex_enter(&s->lock); 210 t = s->s_active + s->s_inactive + freepg; 211 anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t); 212 fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t); 213 execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t); 214 anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t); 215 fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t); 216 execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t); 217 anonreact = anonunder || (!anonover && (fileover || execover)); 218 filereact = fileunder || (!fileover && (anonover || execover)); 219 execreact = execunder || (!execover && (anonover || fileover)); 220 if (filereact && execreact && (anonreact || uvm_swapisfull())) { 221 anonreact = filereact = execreact = false; 222 } 223 ss->ss_anonreact = anonreact; 224 ss->ss_filereact = filereact; 225 ss->ss_execreact = execreact; 226 memset(&ss->ss_marker, 0, sizeof(ss->ss_marker)); 227 ss->ss_marker.flags = PG_MARKER; 228 TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); 229 mutex_exit(&s->lock); 230 } 231 232 void 233 uvmpdpol_scanfini(void) 234 { 235 struct uvmpdpol_globalstate *s = &pdpol_state; 236 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 237 238 mutex_enter(&s->lock); 239 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); 240 mutex_exit(&s->lock); 241 } 242 243 struct vm_page * 244 uvmpdpol_selectvictim(krwlock_t **plock) 245 { 246 struct uvmpdpol_globalstate *s = &pdpol_state; 247 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 248 struct vm_page *pg; 249 krwlock_t *lock; 250 251 mutex_enter(&s->lock); 252 while (/* CONSTCOND */ 1) { 253 struct vm_anon *anon; 254 struct uvm_object *uobj; 255 256 pg = TAILQ_NEXT(&ss->ss_marker, pdqueue); 257 if (pg == NULL) { 258 break; 259 } 260 KASSERT((pg->flags & PG_MARKER) == 0); 261 uvmexp.pdscans++; 262 263 /* 264 * acquire interlock to stablize page identity. 265 * if we have caught the page in a state of flux 266 * deal with it and retry. 267 */ 268 mutex_enter(&pg->interlock); 269 if (uvmpdpol_pagerealize_locked(pg)) { 270 mutex_exit(&pg->interlock); 271 continue; 272 } 273 274 /* 275 * now prepare to move on to the next page. 276 */ 277 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, 278 pdqueue); 279 TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg, 280 &ss->ss_marker, pdqueue); 281 282 /* 283 * enforce the minimum thresholds on different 284 * types of memory usage. if reusing the current 285 * page would reduce that type of usage below its 286 * minimum, reactivate the page instead and move 287 * on to the next page. 288 */ 289 anon = pg->uanon; 290 uobj = pg->uobject; 291 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) { 292 uvmpdpol_pageactivate_locked(pg); 293 mutex_exit(&pg->interlock); 294 PDPOL_EVCNT_INCR(reactexec); 295 continue; 296 } 297 if (uobj && UVM_OBJ_IS_VNODE(uobj) && 298 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) { 299 uvmpdpol_pageactivate_locked(pg); 300 mutex_exit(&pg->interlock); 301 PDPOL_EVCNT_INCR(reactfile); 302 continue; 303 } 304 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) { 305 uvmpdpol_pageactivate_locked(pg); 306 mutex_exit(&pg->interlock); 307 PDPOL_EVCNT_INCR(reactanon); 308 continue; 309 } 310 311 /* 312 * try to lock the object that owns the page. 313 * 314 * with the page interlock held, we can drop s->lock, which 315 * could otherwise serve as a barrier to us getting the 316 * object locked, because the owner of the object's lock may 317 * be blocked on s->lock (i.e. a deadlock). 318 * 319 * whatever happens, uvmpd_trylockowner() will release the 320 * interlock. with the interlock dropped we can then 321 * re-acquire our own lock. the order is: 322 * 323 * object -> pdpol -> interlock. 324 */ 325 mutex_exit(&s->lock); 326 lock = uvmpd_trylockowner(pg); 327 /* pg->interlock now released */ 328 mutex_enter(&s->lock); 329 if (lock == NULL) { 330 /* didn't get it - try the next page. */ 331 continue; 332 } 333 334 /* 335 * move referenced pages back to active queue and skip to 336 * next page. 337 */ 338 if (pmap_is_referenced(pg)) { 339 mutex_enter(&pg->interlock); 340 uvmpdpol_pageactivate_locked(pg); 341 mutex_exit(&pg->interlock); 342 uvmexp.pdreact++; 343 rw_exit(lock); 344 continue; 345 } 346 347 /* we have a potential victim. */ 348 *plock = lock; 349 break; 350 } 351 mutex_exit(&s->lock); 352 return pg; 353 } 354 355 void 356 uvmpdpol_balancequeue(int swap_shortage) 357 { 358 struct uvmpdpol_globalstate *s = &pdpol_state; 359 int inactive_shortage; 360 struct vm_page *p, marker; 361 krwlock_t *lock; 362 363 /* 364 * we have done the scan to get free pages. now we work on meeting 365 * our inactive target. 366 */ 367 368 memset(&marker, 0, sizeof(marker)); 369 marker.flags = PG_MARKER; 370 371 mutex_enter(&s->lock); 372 TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue); 373 for (;;) { 374 inactive_shortage = 375 pdpol_state.s_inactarg - pdpol_state.s_inactive; 376 if (inactive_shortage <= 0 && swap_shortage <= 0) { 377 break; 378 } 379 p = TAILQ_NEXT(&marker, pdqueue); 380 if (p == NULL) { 381 break; 382 } 383 KASSERT((p->flags & PG_MARKER) == 0); 384 385 /* 386 * acquire interlock to stablize page identity. 387 * if we have caught the page in a state of flux 388 * deal with it and retry. 389 */ 390 mutex_enter(&p->interlock); 391 if (uvmpdpol_pagerealize_locked(p)) { 392 mutex_exit(&p->interlock); 393 continue; 394 } 395 396 /* 397 * now prepare to move on to the next page. 398 */ 399 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); 400 TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker, 401 pdqueue); 402 403 /* 404 * try to lock the object that owns the page. see comments 405 * in uvmpdol_selectvictim(). 406 */ 407 mutex_exit(&s->lock); 408 lock = uvmpd_trylockowner(p); 409 /* p->interlock now released */ 410 mutex_enter(&s->lock); 411 if (lock == NULL) { 412 /* didn't get it - try the next page. */ 413 continue; 414 } 415 416 /* 417 * if there's a shortage of swap slots, try to free it. 418 */ 419 if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 && 420 (p->flags & PG_BUSY) == 0) { 421 if (uvmpd_dropswap(p)) { 422 swap_shortage--; 423 } 424 } 425 426 /* 427 * if there's a shortage of inactive pages, deactivate. 428 */ 429 if (inactive_shortage > 0) { 430 pmap_clear_reference(p); 431 mutex_enter(&p->interlock); 432 uvmpdpol_pagedeactivate_locked(p); 433 mutex_exit(&p->interlock); 434 uvmexp.pddeact++; 435 inactive_shortage--; 436 } 437 rw_exit(lock); 438 } 439 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); 440 mutex_exit(&s->lock); 441 } 442 443 static void 444 uvmpdpol_pagedeactivate_locked(struct vm_page *pg) 445 { 446 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 447 448 KASSERT(mutex_owned(&s->lock)); 449 KASSERT(mutex_owned(&pg->interlock)); 450 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) != 451 (PQ_INTENT_D | PQ_INTENT_SET)); 452 453 if (pg->pqflags & PQ_ACTIVE) { 454 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); 455 KASSERT(pdpol_state.s_active > 0); 456 pdpol_state.s_active--; 457 } 458 if ((pg->pqflags & PQ_INACTIVE) == 0) { 459 KASSERT(pg->wire_count == 0); 460 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue); 461 pdpol_state.s_inactive++; 462 } 463 pg->pqflags &= ~(PQ_ACTIVE | PQ_INTENT_SET); 464 pg->pqflags |= PQ_INACTIVE; 465 } 466 467 void 468 uvmpdpol_pagedeactivate(struct vm_page *pg) 469 { 470 471 KASSERT(uvm_page_owner_locked_p(pg, false)); 472 KASSERT(mutex_owned(&pg->interlock)); 473 474 /* 475 * we have to clear the reference bit now, as when it comes time to 476 * realize the intent we won't have the object locked any more. 477 */ 478 pmap_clear_reference(pg); 479 uvmpdpol_set_intent(pg, PQ_INTENT_I); 480 } 481 482 static void 483 uvmpdpol_pageactivate_locked(struct vm_page *pg) 484 { 485 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 486 487 KASSERT(mutex_owned(&s->lock)); 488 KASSERT(mutex_owned(&pg->interlock)); 489 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) != 490 (PQ_INTENT_D | PQ_INTENT_SET)); 491 492 uvmpdpol_pagedequeue_locked(pg); 493 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue); 494 pdpol_state.s_active++; 495 pg->pqflags &= ~(PQ_INACTIVE | PQ_INTENT_SET); 496 pg->pqflags |= PQ_ACTIVE; 497 } 498 499 void 500 uvmpdpol_pageactivate(struct vm_page *pg) 501 { 502 503 KASSERT(uvm_page_owner_locked_p(pg, false)); 504 KASSERT(mutex_owned(&pg->interlock)); 505 506 uvmpdpol_set_intent(pg, PQ_INTENT_A); 507 } 508 509 static void 510 uvmpdpol_pagedequeue_locked(struct vm_page *pg) 511 { 512 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 513 514 KASSERT(mutex_owned(&s->lock)); 515 KASSERT(mutex_owned(&pg->interlock)); 516 517 if (pg->pqflags & PQ_ACTIVE) { 518 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); 519 KASSERT((pg->pqflags & PQ_INACTIVE) == 0); 520 KASSERT(pdpol_state.s_active > 0); 521 pdpol_state.s_active--; 522 } else if (pg->pqflags & PQ_INACTIVE) { 523 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue); 524 KASSERT(pdpol_state.s_inactive > 0); 525 pdpol_state.s_inactive--; 526 } 527 pg->pqflags &= ~(PQ_ACTIVE | PQ_INACTIVE | PQ_INTENT_SET); 528 } 529 530 void 531 uvmpdpol_pagedequeue(struct vm_page *pg) 532 { 533 534 KASSERT(uvm_page_owner_locked_p(pg, true)); 535 KASSERT(mutex_owned(&pg->interlock)); 536 537 uvmpdpol_set_intent(pg, PQ_INTENT_D); 538 } 539 540 void 541 uvmpdpol_pageenqueue(struct vm_page *pg) 542 { 543 544 KASSERT(uvm_page_owner_locked_p(pg, false)); 545 KASSERT(mutex_owned(&pg->interlock)); 546 547 uvmpdpol_set_intent(pg, PQ_INTENT_E); 548 } 549 550 void 551 uvmpdpol_anfree(struct vm_anon *an) 552 { 553 } 554 555 bool 556 uvmpdpol_pageisqueued_p(struct vm_page *pg) 557 { 558 uint32_t pqflags; 559 560 /* 561 * if there's an intent set, we have to consider it. otherwise, 562 * return the actual state. we may be called unlocked for the 563 * purpose of assertions, which is safe due to the page lifecycle. 564 */ 565 pqflags = atomic_load_relaxed(&pg->pqflags); 566 if ((pqflags & PQ_INTENT_SET) != 0) { 567 return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D; 568 } else { 569 return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0; 570 } 571 } 572 573 bool 574 uvmpdpol_pageactivate_p(struct vm_page *pg) 575 { 576 uint32_t pqflags; 577 578 /* consider intent in preference to actual state. */ 579 pqflags = atomic_load_relaxed(&pg->pqflags); 580 if ((pqflags & PQ_INTENT_SET) != 0) { 581 pqflags &= PQ_INTENT_MASK; 582 return pqflags != PQ_INTENT_A && pqflags != PQ_INTENT_E; 583 } else { 584 /* 585 * TODO: Enabling this may be too much of a big hammer, 586 * since we do get useful information from activations. 587 * Think about it more and maybe come up with a heuristic 588 * or something. 589 * 590 * return (pqflags & PQ_ACTIVE) == 0; 591 */ 592 return true; 593 } 594 } 595 596 void 597 uvmpdpol_estimatepageable(int *active, int *inactive) 598 { 599 struct uvmpdpol_globalstate *s = &pdpol_state; 600 601 /* 602 * Don't take any locks here. This can be called from DDB, and in 603 * any case the numbers are stale the instant the lock is dropped, 604 * so it just doesn't matter. 605 */ 606 if (active) { 607 *active = s->s_active; 608 } 609 if (inactive) { 610 *inactive = s->s_inactive; 611 } 612 } 613 614 #if !defined(PDSIM) 615 static int 616 min_check(struct uvm_pctparam *pct, int t) 617 { 618 struct uvmpdpol_globalstate *s = &pdpol_state; 619 int total = t; 620 621 if (pct != &s->s_anonmin) { 622 total += uvm_pctparam_get(&s->s_anonmin); 623 } 624 if (pct != &s->s_filemin) { 625 total += uvm_pctparam_get(&s->s_filemin); 626 } 627 if (pct != &s->s_execmin) { 628 total += uvm_pctparam_get(&s->s_execmin); 629 } 630 if (total > 95) { 631 return EINVAL; 632 } 633 return 0; 634 } 635 #endif /* !defined(PDSIM) */ 636 637 void 638 uvmpdpol_init(void) 639 { 640 struct uvmpdpol_globalstate *s = &pdpol_state; 641 642 mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE); 643 TAILQ_INIT(&s->s_activeq); 644 TAILQ_INIT(&s->s_inactiveq); 645 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL); 646 uvm_pctparam_init(&s->s_anonmin, 10, min_check); 647 uvm_pctparam_init(&s->s_filemin, 10, min_check); 648 uvm_pctparam_init(&s->s_execmin, 5, min_check); 649 uvm_pctparam_init(&s->s_anonmax, 80, NULL); 650 uvm_pctparam_init(&s->s_filemax, 50, NULL); 651 uvm_pctparam_init(&s->s_execmax, 30, NULL); 652 } 653 654 void 655 uvmpdpol_init_cpu(struct uvm_cpu *ucpu) 656 { 657 658 ucpu->pdq = 659 kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP); 660 ucpu->pdqhead = CLOCK_PDQ_SIZE; 661 ucpu->pdqtail = CLOCK_PDQ_SIZE; 662 } 663 664 void 665 uvmpdpol_reinit(void) 666 { 667 } 668 669 bool 670 uvmpdpol_needsscan_p(void) 671 { 672 673 /* 674 * this must be an unlocked check: can be called from interrupt. 675 */ 676 return pdpol_state.s_inactive < pdpol_state.s_inactarg; 677 } 678 679 void 680 uvmpdpol_tune(void) 681 { 682 struct uvmpdpol_globalstate *s = &pdpol_state; 683 684 mutex_enter(&s->lock); 685 clock_tune(); 686 mutex_exit(&s->lock); 687 } 688 689 /* 690 * uvmpdpol_pagerealize_locked: take the intended state set on a page and 691 * make it real. return true if any work was done. 692 */ 693 static bool 694 uvmpdpol_pagerealize_locked(struct vm_page *pg) 695 { 696 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 697 698 KASSERT(mutex_owned(&s->lock)); 699 KASSERT(mutex_owned(&pg->interlock)); 700 701 switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) { 702 case PQ_INTENT_A | PQ_INTENT_SET: 703 case PQ_INTENT_E | PQ_INTENT_SET: 704 uvmpdpol_pageactivate_locked(pg); 705 return true; 706 case PQ_INTENT_I | PQ_INTENT_SET: 707 uvmpdpol_pagedeactivate_locked(pg); 708 return true; 709 case PQ_INTENT_D | PQ_INTENT_SET: 710 uvmpdpol_pagedequeue_locked(pg); 711 return true; 712 default: 713 return false; 714 } 715 } 716 717 /* 718 * uvmpdpol_flush: return the current uvm_cpu with all of its pending 719 * updates flushed to the global queues. this routine may block, and 720 * so can switch cpu. the idea is to empty to queue on whatever cpu 721 * we finally end up on. 722 */ 723 static struct uvm_cpu * 724 uvmpdpol_flush(void) 725 { 726 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 727 struct uvm_cpu *ucpu; 728 struct vm_page *pg; 729 730 KASSERT(kpreempt_disabled()); 731 732 mutex_enter(&s->lock); 733 for (;;) { 734 /* 735 * prefer scanning forwards (even though mutex_enter() is 736 * serializing) so as to not defeat any prefetch logic in 737 * the CPU. that means elsewhere enqueuing backwards, like 738 * a stack, but not so important there as pages are being 739 * added singularly. 740 * 741 * prefetch the next "struct vm_page" while working on the 742 * current one. this has a measurable and very positive 743 * effect in reducing the amount of time spent here under 744 * the global lock. 745 */ 746 ucpu = curcpu()->ci_data.cpu_uvm; 747 KASSERT(ucpu->pdqhead <= ucpu->pdqtail); 748 if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) { 749 break; 750 } 751 pg = ucpu->pdq[ucpu->pdqhead++]; 752 if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) { 753 __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]); 754 } 755 mutex_enter(&pg->interlock); 756 pg->pqflags &= ~PQ_INTENT_QUEUED; 757 (void)uvmpdpol_pagerealize_locked(pg); 758 mutex_exit(&pg->interlock); 759 } 760 mutex_exit(&s->lock); 761 return ucpu; 762 } 763 764 /* 765 * uvmpdpol_pagerealize: realize any intent set on the page. in this 766 * implementation, that means putting the page on a per-CPU queue to be 767 * dealt with later. 768 */ 769 void 770 uvmpdpol_pagerealize(struct vm_page *pg) 771 { 772 struct uvm_cpu *ucpu; 773 774 /* 775 * drain the per per-CPU queue if full, then enter the page. 776 */ 777 kpreempt_disable(); 778 ucpu = curcpu()->ci_data.cpu_uvm; 779 if (__predict_false(ucpu->pdqhead == 0)) { 780 ucpu = uvmpdpol_flush(); 781 } 782 ucpu->pdq[--(ucpu->pdqhead)] = pg; 783 kpreempt_enable(); 784 } 785 786 /* 787 * uvmpdpol_idle: called from the system idle loop. periodically purge any 788 * pending updates back to the global queues. 789 */ 790 void 791 uvmpdpol_idle(struct uvm_cpu *ucpu) 792 { 793 struct uvmpdpol_globalstate *s = &pdpol_state; 794 struct vm_page *pg; 795 796 KASSERT(kpreempt_disabled()); 797 798 /* 799 * if no pages in the queue, we have nothing to do. 800 */ 801 if (ucpu->pdqhead == ucpu->pdqtail) { 802 ucpu->pdqtime = getticks(); 803 return; 804 } 805 806 /* 807 * don't do this more than ~8 times a second as it would needlessly 808 * exert pressure. 809 */ 810 if (getticks() - ucpu->pdqtime < (hz >> 3)) { 811 return; 812 } 813 814 /* 815 * the idle LWP can't block, so we have to try for the lock. if we 816 * get it, purge the per-CPU pending update queue. continually 817 * check for a pending resched: in that case exit immediately. 818 */ 819 if (mutex_tryenter(&s->lock)) { 820 while (ucpu->pdqhead != ucpu->pdqtail) { 821 pg = ucpu->pdq[ucpu->pdqhead]; 822 if (!mutex_tryenter(&pg->interlock)) { 823 break; 824 } 825 ucpu->pdqhead++; 826 pg->pqflags &= ~PQ_INTENT_QUEUED; 827 (void)uvmpdpol_pagerealize_locked(pg); 828 mutex_exit(&pg->interlock); 829 if (curcpu()->ci_want_resched) { 830 break; 831 } 832 } 833 if (ucpu->pdqhead == ucpu->pdqtail) { 834 ucpu->pdqtime = getticks(); 835 } 836 mutex_exit(&s->lock); 837 } 838 } 839 840 #if !defined(PDSIM) 841 842 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */ 843 844 void 845 uvmpdpol_sysctlsetup(void) 846 { 847 struct uvmpdpol_globalstate *s = &pdpol_state; 848 849 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin", 850 SYSCTL_DESCR("Percentage of physical memory reserved " 851 "for anonymous application data")); 852 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin", 853 SYSCTL_DESCR("Percentage of physical memory reserved " 854 "for cached file data")); 855 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin", 856 SYSCTL_DESCR("Percentage of physical memory reserved " 857 "for cached executable data")); 858 859 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax", 860 SYSCTL_DESCR("Percentage of physical memory which will " 861 "be reclaimed from other usage for " 862 "anonymous application data")); 863 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax", 864 SYSCTL_DESCR("Percentage of physical memory which will " 865 "be reclaimed from other usage for cached " 866 "file data")); 867 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax", 868 SYSCTL_DESCR("Percentage of physical memory which will " 869 "be reclaimed from other usage for cached " 870 "executable data")); 871 872 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct", 873 SYSCTL_DESCR("Percentage of inactive queue of " 874 "the entire (active + inactive) queue")); 875 } 876 877 #endif /* !defined(PDSIM) */ 878 879 #if defined(PDSIM) 880 void 881 pdsim_dump(const char *id) 882 { 883 #if defined(DEBUG) 884 /* XXX */ 885 #endif /* defined(DEBUG) */ 886 } 887 #endif /* defined(PDSIM) */ 888