1 /* $NetBSD: uvm_pdpolicy_clock.c,v 1.36 2020/04/02 16:29:30 maxv Exp $ */ 2 /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */ 3 4 /*- 5 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1997 Charles D. Cranor and Washington University. 35 * Copyright (c) 1991, 1993, The Regents of the University of California. 36 * 37 * All rights reserved. 38 * 39 * This code is derived from software contributed to Berkeley by 40 * The Mach Operating System project at Carnegie-Mellon University. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 67 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp 68 * 69 * 70 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 71 * All rights reserved. 72 * 73 * Permission to use, copy, modify and distribute this software and 74 * its documentation is hereby granted, provided that both the copyright 75 * notice and this permission notice appear in all copies of the 76 * software, derivative works or modified versions, and any portions 77 * thereof, and that both notices appear in supporting documentation. 78 * 79 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 80 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 81 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 82 * 83 * Carnegie Mellon requests users of this software to return to 84 * 85 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 86 * School of Computer Science 87 * Carnegie Mellon University 88 * Pittsburgh PA 15213-3890 89 * 90 * any improvements or extensions that they make and grant Carnegie the 91 * rights to redistribute these changes. 92 */ 93 94 #if defined(PDSIM) 95 96 #include "pdsim.h" 97 98 #else /* defined(PDSIM) */ 99 100 #include <sys/cdefs.h> 101 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.36 2020/04/02 16:29:30 maxv Exp $"); 102 103 #include <sys/param.h> 104 #include <sys/proc.h> 105 #include <sys/systm.h> 106 #include <sys/kernel.h> 107 #include <sys/kmem.h> 108 #include <sys/atomic.h> 109 110 #include <uvm/uvm.h> 111 #include <uvm/uvm_pdpolicy.h> 112 #include <uvm/uvm_pdpolicy_impl.h> 113 #include <uvm/uvm_stat.h> 114 115 #endif /* defined(PDSIM) */ 116 117 /* 118 * per-CPU queue of pending page status changes. 128 entries makes for a 119 * 1kB queue on _LP64 and has been found to be a reasonable compromise that 120 * keeps lock contention events and wait times low, while not using too much 121 * memory nor allowing global state to fall too far behind. 122 */ 123 #if !defined(CLOCK_PDQ_SIZE) 124 #define CLOCK_PDQ_SIZE 128 125 #endif /* !defined(CLOCK_PDQ_SIZE) */ 126 127 #define PQ_INACTIVE 0x00000010 /* page is in inactive list */ 128 #define PQ_ACTIVE 0x00000020 /* page is in active list */ 129 130 #if !defined(CLOCK_INACTIVEPCT) 131 #define CLOCK_INACTIVEPCT 33 132 #endif /* !defined(CLOCK_INACTIVEPCT) */ 133 134 struct uvmpdpol_globalstate { 135 kmutex_t lock; /* lock on state */ 136 /* <= compiler pads here */ 137 struct pglist s_activeq /* allocated pages, in use */ 138 __aligned(COHERENCY_UNIT); 139 struct pglist s_inactiveq; /* pages between the clock hands */ 140 int s_active; 141 int s_inactive; 142 int s_inactarg; 143 struct uvm_pctparam s_anonmin; 144 struct uvm_pctparam s_filemin; 145 struct uvm_pctparam s_execmin; 146 struct uvm_pctparam s_anonmax; 147 struct uvm_pctparam s_filemax; 148 struct uvm_pctparam s_execmax; 149 struct uvm_pctparam s_inactivepct; 150 }; 151 152 struct uvmpdpol_scanstate { 153 bool ss_anonreact, ss_filereact, ss_execreact; 154 struct vm_page ss_marker; 155 }; 156 157 static void uvmpdpol_pageactivate_locked(struct vm_page *); 158 static void uvmpdpol_pagedeactivate_locked(struct vm_page *); 159 static void uvmpdpol_pagedequeue_locked(struct vm_page *); 160 static bool uvmpdpol_pagerealize_locked(struct vm_page *); 161 static struct uvm_cpu *uvmpdpol_flush(void); 162 163 static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned; 164 static struct uvmpdpol_scanstate pdpol_scanstate; 165 166 PDPOL_EVCNT_DEFINE(reactexec) 167 PDPOL_EVCNT_DEFINE(reactfile) 168 PDPOL_EVCNT_DEFINE(reactanon) 169 170 static void 171 clock_tune(void) 172 { 173 struct uvmpdpol_globalstate *s = &pdpol_state; 174 175 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct, 176 s->s_active + s->s_inactive); 177 if (s->s_inactarg <= uvmexp.freetarg) { 178 s->s_inactarg = uvmexp.freetarg + 1; 179 } 180 } 181 182 void 183 uvmpdpol_scaninit(void) 184 { 185 struct uvmpdpol_globalstate *s = &pdpol_state; 186 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 187 int t; 188 bool anonunder, fileunder, execunder; 189 bool anonover, fileover, execover; 190 bool anonreact, filereact, execreact; 191 int64_t freepg, anonpg, filepg, execpg; 192 193 /* 194 * decide which types of pages we want to reactivate instead of freeing 195 * to keep usage within the minimum and maximum usage limits. 196 */ 197 198 cpu_count_sync_all(); 199 freepg = uvm_availmem(); 200 anonpg = cpu_count_get(CPU_COUNT_ANONPAGES); 201 filepg = cpu_count_get(CPU_COUNT_FILEPAGES); 202 execpg = cpu_count_get(CPU_COUNT_EXECPAGES); 203 204 mutex_enter(&s->lock); 205 t = s->s_active + s->s_inactive + freepg; 206 anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t); 207 fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t); 208 execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t); 209 anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t); 210 fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t); 211 execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t); 212 anonreact = anonunder || (!anonover && (fileover || execover)); 213 filereact = fileunder || (!fileover && (anonover || execover)); 214 execreact = execunder || (!execover && (anonover || fileover)); 215 if (filereact && execreact && (anonreact || uvm_swapisfull())) { 216 anonreact = filereact = execreact = false; 217 } 218 ss->ss_anonreact = anonreact; 219 ss->ss_filereact = filereact; 220 ss->ss_execreact = execreact; 221 memset(&ss->ss_marker, 0, sizeof(ss->ss_marker)); 222 ss->ss_marker.flags = PG_MARKER; 223 TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); 224 mutex_exit(&s->lock); 225 } 226 227 void 228 uvmpdpol_scanfini(void) 229 { 230 struct uvmpdpol_globalstate *s = &pdpol_state; 231 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 232 233 mutex_enter(&s->lock); 234 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); 235 mutex_exit(&s->lock); 236 } 237 238 struct vm_page * 239 uvmpdpol_selectvictim(krwlock_t **plock) 240 { 241 struct uvmpdpol_globalstate *s = &pdpol_state; 242 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 243 struct vm_page *pg; 244 krwlock_t *lock; 245 246 mutex_enter(&s->lock); 247 while (/* CONSTCOND */ 1) { 248 struct vm_anon *anon; 249 struct uvm_object *uobj; 250 251 pg = TAILQ_NEXT(&ss->ss_marker, pdqueue); 252 if (pg == NULL) { 253 break; 254 } 255 KASSERT((pg->flags & PG_MARKER) == 0); 256 uvmexp.pdscans++; 257 258 /* 259 * acquire interlock to stablize page identity. 260 * if we have caught the page in a state of flux 261 * deal with it and retry. 262 */ 263 mutex_enter(&pg->interlock); 264 if (uvmpdpol_pagerealize_locked(pg)) { 265 mutex_exit(&pg->interlock); 266 continue; 267 } 268 269 /* 270 * now prepare to move on to the next page. 271 */ 272 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, 273 pdqueue); 274 TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg, 275 &ss->ss_marker, pdqueue); 276 277 /* 278 * enforce the minimum thresholds on different 279 * types of memory usage. if reusing the current 280 * page would reduce that type of usage below its 281 * minimum, reactivate the page instead and move 282 * on to the next page. 283 */ 284 anon = pg->uanon; 285 uobj = pg->uobject; 286 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) { 287 uvmpdpol_pageactivate_locked(pg); 288 mutex_exit(&pg->interlock); 289 PDPOL_EVCNT_INCR(reactexec); 290 continue; 291 } 292 if (uobj && UVM_OBJ_IS_VNODE(uobj) && 293 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) { 294 uvmpdpol_pageactivate_locked(pg); 295 mutex_exit(&pg->interlock); 296 PDPOL_EVCNT_INCR(reactfile); 297 continue; 298 } 299 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) { 300 uvmpdpol_pageactivate_locked(pg); 301 mutex_exit(&pg->interlock); 302 PDPOL_EVCNT_INCR(reactanon); 303 continue; 304 } 305 306 /* 307 * try to lock the object that owns the page. 308 * 309 * with the page interlock held, we can drop s->lock, which 310 * could otherwise serve as a barrier to us getting the 311 * object locked, because the owner of the object's lock may 312 * be blocked on s->lock (i.e. a deadlock). 313 * 314 * whatever happens, uvmpd_trylockowner() will release the 315 * interlock. with the interlock dropped we can then 316 * re-acquire our own lock. the order is: 317 * 318 * object -> pdpol -> interlock. 319 */ 320 mutex_exit(&s->lock); 321 lock = uvmpd_trylockowner(pg); 322 /* pg->interlock now released */ 323 mutex_enter(&s->lock); 324 if (lock == NULL) { 325 /* didn't get it - try the next page. */ 326 continue; 327 } 328 329 /* 330 * move referenced pages back to active queue and skip to 331 * next page. 332 */ 333 if (pmap_is_referenced(pg)) { 334 mutex_enter(&pg->interlock); 335 uvmpdpol_pageactivate_locked(pg); 336 mutex_exit(&pg->interlock); 337 uvmexp.pdreact++; 338 rw_exit(lock); 339 continue; 340 } 341 342 /* we have a potential victim. */ 343 *plock = lock; 344 break; 345 } 346 mutex_exit(&s->lock); 347 return pg; 348 } 349 350 void 351 uvmpdpol_balancequeue(int swap_shortage) 352 { 353 struct uvmpdpol_globalstate *s = &pdpol_state; 354 int inactive_shortage; 355 struct vm_page *p, marker; 356 krwlock_t *lock; 357 358 /* 359 * we have done the scan to get free pages. now we work on meeting 360 * our inactive target. 361 */ 362 363 memset(&marker, 0, sizeof(marker)); 364 marker.flags = PG_MARKER; 365 366 mutex_enter(&s->lock); 367 TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue); 368 for (;;) { 369 inactive_shortage = 370 pdpol_state.s_inactarg - pdpol_state.s_inactive; 371 if (inactive_shortage <= 0 && swap_shortage <= 0) { 372 break; 373 } 374 p = TAILQ_NEXT(&marker, pdqueue); 375 if (p == NULL) { 376 break; 377 } 378 KASSERT((p->flags & PG_MARKER) == 0); 379 380 /* 381 * acquire interlock to stablize page identity. 382 * if we have caught the page in a state of flux 383 * deal with it and retry. 384 */ 385 mutex_enter(&p->interlock); 386 if (uvmpdpol_pagerealize_locked(p)) { 387 mutex_exit(&p->interlock); 388 continue; 389 } 390 391 /* 392 * now prepare to move on to the next page. 393 */ 394 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); 395 TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker, 396 pdqueue); 397 398 /* 399 * try to lock the object that owns the page. see comments 400 * in uvmpdol_selectvictim(). 401 */ 402 mutex_exit(&s->lock); 403 lock = uvmpd_trylockowner(p); 404 /* p->interlock now released */ 405 mutex_enter(&s->lock); 406 if (lock == NULL) { 407 /* didn't get it - try the next page. */ 408 continue; 409 } 410 411 /* 412 * if there's a shortage of swap slots, try to free it. 413 */ 414 if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 && 415 (p->flags & PG_BUSY) == 0) { 416 if (uvmpd_dropswap(p)) { 417 swap_shortage--; 418 } 419 } 420 421 /* 422 * if there's a shortage of inactive pages, deactivate. 423 */ 424 if (inactive_shortage > 0) { 425 pmap_clear_reference(p); 426 mutex_enter(&p->interlock); 427 uvmpdpol_pagedeactivate_locked(p); 428 mutex_exit(&p->interlock); 429 uvmexp.pddeact++; 430 inactive_shortage--; 431 } 432 rw_exit(lock); 433 } 434 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); 435 mutex_exit(&s->lock); 436 } 437 438 static void 439 uvmpdpol_pagedeactivate_locked(struct vm_page *pg) 440 { 441 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 442 443 KASSERT(mutex_owned(&s->lock)); 444 KASSERT(mutex_owned(&pg->interlock)); 445 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) != 446 (PQ_INTENT_D | PQ_INTENT_SET)); 447 448 if (pg->pqflags & PQ_ACTIVE) { 449 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); 450 KASSERT(pdpol_state.s_active > 0); 451 pdpol_state.s_active--; 452 } 453 if ((pg->pqflags & PQ_INACTIVE) == 0) { 454 KASSERT(pg->wire_count == 0); 455 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue); 456 pdpol_state.s_inactive++; 457 } 458 pg->pqflags &= ~(PQ_ACTIVE | PQ_INTENT_SET); 459 pg->pqflags |= PQ_INACTIVE; 460 } 461 462 void 463 uvmpdpol_pagedeactivate(struct vm_page *pg) 464 { 465 466 KASSERT(uvm_page_owner_locked_p(pg, false)); 467 KASSERT(mutex_owned(&pg->interlock)); 468 469 /* 470 * we have to clear the reference bit now, as when it comes time to 471 * realize the intent we won't have the object locked any more. 472 */ 473 pmap_clear_reference(pg); 474 uvmpdpol_set_intent(pg, PQ_INTENT_I); 475 } 476 477 static void 478 uvmpdpol_pageactivate_locked(struct vm_page *pg) 479 { 480 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 481 482 KASSERT(mutex_owned(&s->lock)); 483 KASSERT(mutex_owned(&pg->interlock)); 484 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) != 485 (PQ_INTENT_D | PQ_INTENT_SET)); 486 487 uvmpdpol_pagedequeue_locked(pg); 488 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue); 489 pdpol_state.s_active++; 490 pg->pqflags &= ~(PQ_INACTIVE | PQ_INTENT_SET); 491 pg->pqflags |= PQ_ACTIVE; 492 } 493 494 void 495 uvmpdpol_pageactivate(struct vm_page *pg) 496 { 497 498 KASSERT(uvm_page_owner_locked_p(pg, false)); 499 KASSERT(mutex_owned(&pg->interlock)); 500 501 uvmpdpol_set_intent(pg, PQ_INTENT_A); 502 } 503 504 static void 505 uvmpdpol_pagedequeue_locked(struct vm_page *pg) 506 { 507 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 508 509 KASSERT(mutex_owned(&s->lock)); 510 KASSERT(mutex_owned(&pg->interlock)); 511 512 if (pg->pqflags & PQ_ACTIVE) { 513 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); 514 KASSERT((pg->pqflags & PQ_INACTIVE) == 0); 515 KASSERT(pdpol_state.s_active > 0); 516 pdpol_state.s_active--; 517 } else if (pg->pqflags & PQ_INACTIVE) { 518 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue); 519 KASSERT(pdpol_state.s_inactive > 0); 520 pdpol_state.s_inactive--; 521 } 522 pg->pqflags &= ~(PQ_ACTIVE | PQ_INACTIVE | PQ_INTENT_SET); 523 } 524 525 void 526 uvmpdpol_pagedequeue(struct vm_page *pg) 527 { 528 529 KASSERT(uvm_page_owner_locked_p(pg, true)); 530 KASSERT(mutex_owned(&pg->interlock)); 531 532 uvmpdpol_set_intent(pg, PQ_INTENT_D); 533 } 534 535 void 536 uvmpdpol_pageenqueue(struct vm_page *pg) 537 { 538 539 KASSERT(uvm_page_owner_locked_p(pg, false)); 540 KASSERT(mutex_owned(&pg->interlock)); 541 542 uvmpdpol_set_intent(pg, PQ_INTENT_E); 543 } 544 545 void 546 uvmpdpol_anfree(struct vm_anon *an) 547 { 548 } 549 550 bool 551 uvmpdpol_pageisqueued_p(struct vm_page *pg) 552 { 553 uint32_t pqflags; 554 555 /* 556 * if there's an intent set, we have to consider it. otherwise, 557 * return the actual state. we may be called unlocked for the 558 * purpose of assertions, which is safe due to the page lifecycle. 559 */ 560 pqflags = atomic_load_relaxed(&pg->pqflags); 561 if ((pqflags & PQ_INTENT_SET) != 0) { 562 return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D; 563 } else { 564 return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0; 565 } 566 } 567 568 void 569 uvmpdpol_estimatepageable(int *active, int *inactive) 570 { 571 struct uvmpdpol_globalstate *s = &pdpol_state; 572 573 /* 574 * Don't take any locks here. This can be called from DDB, and in 575 * any case the numbers are stale the instant the lock is dropped, 576 * so it just doesn't matter. 577 */ 578 if (active) { 579 *active = s->s_active; 580 } 581 if (inactive) { 582 *inactive = s->s_inactive; 583 } 584 } 585 586 #if !defined(PDSIM) 587 static int 588 min_check(struct uvm_pctparam *pct, int t) 589 { 590 struct uvmpdpol_globalstate *s = &pdpol_state; 591 int total = t; 592 593 if (pct != &s->s_anonmin) { 594 total += uvm_pctparam_get(&s->s_anonmin); 595 } 596 if (pct != &s->s_filemin) { 597 total += uvm_pctparam_get(&s->s_filemin); 598 } 599 if (pct != &s->s_execmin) { 600 total += uvm_pctparam_get(&s->s_execmin); 601 } 602 if (total > 95) { 603 return EINVAL; 604 } 605 return 0; 606 } 607 #endif /* !defined(PDSIM) */ 608 609 void 610 uvmpdpol_init(void) 611 { 612 struct uvmpdpol_globalstate *s = &pdpol_state; 613 614 mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE); 615 TAILQ_INIT(&s->s_activeq); 616 TAILQ_INIT(&s->s_inactiveq); 617 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL); 618 uvm_pctparam_init(&s->s_anonmin, 10, min_check); 619 uvm_pctparam_init(&s->s_filemin, 10, min_check); 620 uvm_pctparam_init(&s->s_execmin, 5, min_check); 621 uvm_pctparam_init(&s->s_anonmax, 80, NULL); 622 uvm_pctparam_init(&s->s_filemax, 50, NULL); 623 uvm_pctparam_init(&s->s_execmax, 30, NULL); 624 } 625 626 void 627 uvmpdpol_init_cpu(struct uvm_cpu *ucpu) 628 { 629 630 ucpu->pdq = 631 kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP); 632 ucpu->pdqhead = CLOCK_PDQ_SIZE; 633 ucpu->pdqtail = CLOCK_PDQ_SIZE; 634 } 635 636 void 637 uvmpdpol_reinit(void) 638 { 639 } 640 641 bool 642 uvmpdpol_needsscan_p(void) 643 { 644 645 /* 646 * this must be an unlocked check: can be called from interrupt. 647 */ 648 return pdpol_state.s_inactive < pdpol_state.s_inactarg; 649 } 650 651 void 652 uvmpdpol_tune(void) 653 { 654 struct uvmpdpol_globalstate *s = &pdpol_state; 655 656 mutex_enter(&s->lock); 657 clock_tune(); 658 mutex_exit(&s->lock); 659 } 660 661 /* 662 * uvmpdpol_pagerealize_locked: take the intended state set on a page and 663 * make it real. return true if any work was done. 664 */ 665 static bool 666 uvmpdpol_pagerealize_locked(struct vm_page *pg) 667 { 668 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 669 670 KASSERT(mutex_owned(&s->lock)); 671 KASSERT(mutex_owned(&pg->interlock)); 672 673 switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) { 674 case PQ_INTENT_A | PQ_INTENT_SET: 675 case PQ_INTENT_E | PQ_INTENT_SET: 676 uvmpdpol_pageactivate_locked(pg); 677 return true; 678 case PQ_INTENT_I | PQ_INTENT_SET: 679 uvmpdpol_pagedeactivate_locked(pg); 680 return true; 681 case PQ_INTENT_D | PQ_INTENT_SET: 682 uvmpdpol_pagedequeue_locked(pg); 683 return true; 684 default: 685 return false; 686 } 687 } 688 689 /* 690 * uvmpdpol_flush: return the current uvm_cpu with all of its pending 691 * updates flushed to the global queues. this routine may block, and 692 * so can switch cpu. the idea is to empty to queue on whatever cpu 693 * we finally end up on. 694 */ 695 static struct uvm_cpu * 696 uvmpdpol_flush(void) 697 { 698 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 699 struct uvm_cpu *ucpu; 700 struct vm_page *pg; 701 702 KASSERT(kpreempt_disabled()); 703 704 mutex_enter(&s->lock); 705 for (;;) { 706 /* 707 * prefer scanning forwards (even though mutex_enter() is 708 * serializing) so as to not defeat any prefetch logic in 709 * the CPU. that means elsewhere enqueuing backwards, like 710 * a stack, but not so important there as pages are being 711 * added singularly. 712 * 713 * prefetch the next "struct vm_page" while working on the 714 * current one. this has a measurable and very positive 715 * effect in reducing the amount of time spent here under 716 * the global lock. 717 */ 718 ucpu = curcpu()->ci_data.cpu_uvm; 719 KASSERT(ucpu->pdqhead <= ucpu->pdqtail); 720 if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) { 721 break; 722 } 723 pg = ucpu->pdq[ucpu->pdqhead++]; 724 if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) { 725 __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]); 726 } 727 mutex_enter(&pg->interlock); 728 pg->pqflags &= ~PQ_INTENT_QUEUED; 729 (void)uvmpdpol_pagerealize_locked(pg); 730 mutex_exit(&pg->interlock); 731 } 732 mutex_exit(&s->lock); 733 return ucpu; 734 } 735 736 /* 737 * uvmpdpol_pagerealize: realize any intent set on the page. in this 738 * implementation, that means putting the page on a per-CPU queue to be 739 * dealt with later. 740 */ 741 void 742 uvmpdpol_pagerealize(struct vm_page *pg) 743 { 744 struct uvm_cpu *ucpu; 745 746 /* 747 * drain the per per-CPU queue if full, then enter the page. 748 */ 749 kpreempt_disable(); 750 ucpu = curcpu()->ci_data.cpu_uvm; 751 if (__predict_false(ucpu->pdqhead == 0)) { 752 ucpu = uvmpdpol_flush(); 753 } 754 ucpu->pdq[--(ucpu->pdqhead)] = pg; 755 kpreempt_enable(); 756 } 757 758 /* 759 * uvmpdpol_idle: called from the system idle loop. periodically purge any 760 * pending updates back to the global queues. 761 */ 762 void 763 uvmpdpol_idle(struct uvm_cpu *ucpu) 764 { 765 struct uvmpdpol_globalstate *s = &pdpol_state; 766 struct vm_page *pg; 767 768 KASSERT(kpreempt_disabled()); 769 770 /* 771 * if no pages in the queue, we have nothing to do. 772 */ 773 if (ucpu->pdqhead == ucpu->pdqtail) { 774 ucpu->pdqtime = getticks(); 775 return; 776 } 777 778 /* 779 * don't do this more than ~8 times a second as it would needlessly 780 * exert pressure. 781 */ 782 if (getticks() - ucpu->pdqtime < (hz >> 3)) { 783 return; 784 } 785 786 /* 787 * the idle LWP can't block, so we have to try for the lock. if we 788 * get it, purge the per-CPU pending update queue. continually 789 * check for a pending resched: in that case exit immediately. 790 */ 791 if (mutex_tryenter(&s->lock)) { 792 while (ucpu->pdqhead != ucpu->pdqtail) { 793 pg = ucpu->pdq[ucpu->pdqhead]; 794 if (!mutex_tryenter(&pg->interlock)) { 795 break; 796 } 797 ucpu->pdqhead++; 798 pg->pqflags &= ~PQ_INTENT_QUEUED; 799 (void)uvmpdpol_pagerealize_locked(pg); 800 mutex_exit(&pg->interlock); 801 if (curcpu()->ci_want_resched) { 802 break; 803 } 804 } 805 if (ucpu->pdqhead == ucpu->pdqtail) { 806 ucpu->pdqtime = getticks(); 807 } 808 mutex_exit(&s->lock); 809 } 810 } 811 812 #if !defined(PDSIM) 813 814 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */ 815 816 void 817 uvmpdpol_sysctlsetup(void) 818 { 819 struct uvmpdpol_globalstate *s = &pdpol_state; 820 821 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin", 822 SYSCTL_DESCR("Percentage of physical memory reserved " 823 "for anonymous application data")); 824 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin", 825 SYSCTL_DESCR("Percentage of physical memory reserved " 826 "for cached file data")); 827 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin", 828 SYSCTL_DESCR("Percentage of physical memory reserved " 829 "for cached executable data")); 830 831 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax", 832 SYSCTL_DESCR("Percentage of physical memory which will " 833 "be reclaimed from other usage for " 834 "anonymous application data")); 835 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax", 836 SYSCTL_DESCR("Percentage of physical memory which will " 837 "be reclaimed from other usage for cached " 838 "file data")); 839 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax", 840 SYSCTL_DESCR("Percentage of physical memory which will " 841 "be reclaimed from other usage for cached " 842 "executable data")); 843 844 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct", 845 SYSCTL_DESCR("Percentage of inactive queue of " 846 "the entire (active + inactive) queue")); 847 } 848 849 #endif /* !defined(PDSIM) */ 850 851 #if defined(PDSIM) 852 void 853 pdsim_dump(const char *id) 854 { 855 #if defined(DEBUG) 856 /* XXX */ 857 #endif /* defined(DEBUG) */ 858 } 859 #endif /* defined(PDSIM) */ 860