1 /*- 2 * Copyright (c) 1982, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)vmmeter.h 8.2 (Berkeley) 7/10/94 30 * $FreeBSD: src/sys/sys/vmmeter.h,v 1.21.2.2 2002/10/10 19:28:21 dillon Exp $ 31 */ 32 33 #ifndef _VM_VM_PAGE2_H_ 34 #define _VM_VM_PAGE2_H_ 35 36 #ifdef _KERNEL 37 38 #ifndef _SYS_VMMETER_H_ 39 #include <sys/vmmeter.h> 40 #endif 41 #ifndef _SYS_QUEUE_H_ 42 #include <sys/queue.h> 43 #endif 44 #ifndef _VM_VM_PAGE_H_ 45 #include <vm/vm_page.h> 46 #endif 47 #ifndef _SYS_SPINLOCK_H_ 48 #include <sys/spinlock.h> 49 #endif 50 #ifndef _SYS_SPINLOCK2_H_ 51 #include <sys/spinlock2.h> 52 #endif 53 54 /* 55 * SMP NOTE 56 * 57 * VM fault rates are highly dependent on SMP locking conflicts and, on 58 * multi-socket systems, cache mastership changes for globals due to atomic 59 * ops (even simple atomic_add_*() calls). Cache mastership changes can 60 * limit the aggregate fault rate. 61 * 62 * For this reason we go through some hoops to access VM statistics for 63 * low-memory handling, pageout, and other triggers. Each cpu collects 64 * adjustments in gd->gd_vmstats_adj. These get rolled up into the global 65 * vmstats structure. The global vmstats structure is then pulled into 66 * gd->gd_vmstats by each cpu when it needs it. Critical path checks always 67 * use the pcpu gd->gd_vmstats structure. 68 */ 69 /* 70 * Return TRUE if we are under our severe low-free-pages threshold 71 * 72 * This causes user processes to stall to avoid exhausting memory that 73 * the kernel might need. 74 * 75 * reserved < severe < minimum < wait < start < target1 < target2 76 */ 77 static __inline 78 int 79 vm_paging_severe(void) 80 { 81 globaldata_t gd = mycpu; 82 83 if (__predict_false(gd->gd_vmstats.v_free_severe > 84 gd->gd_vmstats.v_free_count + 85 gd->gd_vmstats.v_cache_count)) 86 { 87 return 1; 88 } 89 if (__predict_false(gd->gd_vmstats.v_free_reserved > 90 gd->gd_vmstats.v_free_count)) 91 { 92 return 1; 93 } 94 return 0; 95 } 96 97 /* 98 * Return TRUE if we are under our minimum low-free-pages threshold. We 99 * will not count (donotcount) free pages as being free (used mainly for 100 * hystersis tests). 101 * 102 * This will cause most normal page faults to block and activate the 103 * pageout daemon. 104 * 105 * The pageout daemon should already be active due to vm_paging_start(n) 106 * and will typically continue running until it hits target2 107 * 108 * reserved < severe < minimum < wait < start < target1 < target2 109 */ 110 static __inline 111 int 112 vm_paging_min_dnc(long donotcount) 113 { 114 globaldata_t gd = mycpu; 115 116 if (__predict_false(gd->gd_vmstats.v_free_min + donotcount > 117 (gd->gd_vmstats.v_free_count + 118 gd->gd_vmstats.v_cache_count))) 119 { 120 return 1; 121 } 122 if (__predict_false(gd->gd_vmstats.v_free_reserved > 123 gd->gd_vmstats.v_free_count)) 124 { 125 return 1; 126 } 127 return 0; 128 } 129 130 static __inline 131 int 132 vm_paging_min(void) 133 { 134 return vm_paging_min_dnc(0); 135 } 136 137 /* 138 * Return TRUE if nominal userland / VM-system allocations should slow 139 * down (but not stop) due to low free pages in the system. This is 140 * typically 1/2 way between min and start. 141 * 142 * reserved < severe < minimum < wait < start < target1 < target2 143 */ 144 static __inline 145 int 146 vm_paging_wait(void) 147 { 148 globaldata_t gd = mycpu; 149 150 if (__predict_false(gd->gd_vmstats.v_paging_wait > 151 (gd->gd_vmstats.v_free_count + 152 gd->gd_vmstats.v_cache_count))) 153 { 154 return 1; 155 } 156 if (__predict_false(gd->gd_vmstats.v_free_reserved > 157 gd->gd_vmstats.v_free_count)) 158 { 159 return 1; 160 } 161 return 0; 162 } 163 164 /* 165 * Return TRUE if the pageout daemon should be started up or continue 166 * running. Available pages have dropped to a level where we need to 167 * think about freeing some up. 168 * 169 * Also handles edge cases for required 'actually-free' pages. 170 * 171 * reserved < severe < minimum < wait < start < target1 < target2 172 */ 173 static __inline 174 int 175 vm_paging_start(int adj) 176 { 177 globaldata_t gd = mycpu; 178 179 if (__predict_false(gd->gd_vmstats.v_paging_start > 180 (gd->gd_vmstats.v_free_count + 181 gd->gd_vmstats.v_cache_count + adj))) 182 { 183 return 1; 184 } 185 if (__predict_false(gd->gd_vmstats.v_free_min > 186 gd->gd_vmstats.v_free_count + adj)) 187 { 188 return 1; 189 } 190 if (__predict_false(gd->gd_vmstats.v_free_reserved > 191 gd->gd_vmstats.v_free_count)) 192 { 193 return 1; 194 } 195 return 0; 196 } 197 198 /* 199 * Return TRUE if the pageout daemon has not yet reached its initial target. 200 * The pageout daemon works hard to reach target1. 201 * 202 * reserved < severe < minimum < wait < start < target1 < target2 203 */ 204 static __inline 205 int 206 vm_paging_target1(void) 207 { 208 globaldata_t gd = mycpu; 209 210 if (__predict_false(gd->gd_vmstats.v_paging_target1 > 211 (gd->gd_vmstats.v_free_count + 212 gd->gd_vmstats.v_cache_count))) 213 { 214 return 1; 215 } 216 if (__predict_false(gd->gd_vmstats.v_free_reserved > 217 gd->gd_vmstats.v_free_count)) 218 { 219 return 1; 220 } 221 return 0; 222 } 223 224 static __inline 225 long 226 vm_paging_target1_count(void) 227 { 228 globaldata_t gd = mycpu; 229 long delta; 230 231 delta = gd->gd_vmstats.v_paging_target1 - 232 (gd->gd_vmstats.v_free_count + gd->gd_vmstats.v_cache_count); 233 return delta; 234 } 235 236 /* 237 * Return TRUE if the pageout daemon has not yet reached its final target. 238 * The pageout daemon takes it easy on its way between target1 and target2. 239 * 240 * reserved < severe < minimum < wait < start < target1 < target2 241 */ 242 static __inline 243 int 244 vm_paging_target2(void) 245 { 246 globaldata_t gd = mycpu; 247 248 if (__predict_false(gd->gd_vmstats.v_paging_target2 > 249 (gd->gd_vmstats.v_free_count + 250 gd->gd_vmstats.v_cache_count))) 251 { 252 return 1; 253 } 254 if (__predict_false(gd->gd_vmstats.v_free_reserved > 255 gd->gd_vmstats.v_free_count)) 256 { 257 return 1; 258 } 259 return 0; 260 } 261 262 static __inline 263 long 264 vm_paging_target2_count(void) 265 { 266 globaldata_t gd = mycpu; 267 long delta; 268 269 delta = gd->gd_vmstats.v_paging_target2 - 270 (gd->gd_vmstats.v_free_count + gd->gd_vmstats.v_cache_count); 271 return delta; 272 } 273 274 /* 275 * Returns TRUE if additional pages must be deactivated, either during a 276 * pageout operation or during the page stats scan. 277 * 278 * Inactive tests are used in two places. During heavy paging the 279 * inactive_target is used to refill the inactive queue in staged. 280 * Those pages are then ultimately flushed and moved to the cache or free 281 * queues. 282 * 283 * The inactive queue is also used to manage scans to update page stats 284 * (m->act_count). The page stats scan occurs lazily in small batches to 285 * update m->act_count for pages in the active queue and to move pages 286 * (limited by inactive_target) to the inactive queue. Page stats scanning 287 * and active deactivations only run while the inactive queue is below target. 288 * After this, additional page stats scanning just to update m->act_count 289 * (but not do further deactivations) continues to run for a limited period 290 * of time after any pageout daemon activity. 291 */ 292 static __inline 293 int 294 vm_paging_inactive(void) 295 { 296 globaldata_t gd = mycpu; 297 298 if (__predict_false((gd->gd_vmstats.v_free_count + 299 gd->gd_vmstats.v_cache_count + 300 gd->gd_vmstats.v_inactive_count) < 301 (gd->gd_vmstats.v_free_min + 302 gd->gd_vmstats.v_inactive_target))) 303 { 304 return 1; 305 } 306 return 0; 307 } 308 309 /* 310 * Return number of pages that need to be deactivated to achieve the inactive 311 * target as a positive number. A negative number indicates that there are 312 * already a sufficient number of inactive pages. 313 */ 314 static __inline 315 long 316 vm_paging_inactive_count(void) 317 { 318 globaldata_t gd = mycpu; 319 long delta; 320 321 delta = (gd->gd_vmstats.v_free_min + gd->gd_vmstats.v_inactive_target) - 322 (gd->gd_vmstats.v_free_count + gd->gd_vmstats.v_cache_count + 323 gd->gd_vmstats.v_inactive_count); 324 325 return delta; 326 } 327 328 /* 329 * Clear dirty bits in the VM page but truncate the 330 * end to a DEV_BSIZE'd boundary. 331 * 332 * Used when reading data in, typically via getpages. 333 * The partial device block at the end of the truncation 334 * range should not lose its dirty bit. 335 * 336 * NOTE: This function does not clear the pmap modified bit. 337 */ 338 static __inline 339 void 340 vm_page_clear_dirty_end_nonincl(vm_page_t m, int base, int size) 341 { 342 size = (base + size) & ~DEV_BMASK; 343 if (base < size) 344 vm_page_clear_dirty(m, base, size - base); 345 } 346 347 /* 348 * Clear dirty bits in the VM page but truncate the 349 * beginning to a DEV_BSIZE'd boundary. 350 * 351 * Used when truncating a buffer. The partial device 352 * block at the beginning of the truncation range 353 * should not lose its dirty bit. 354 * 355 * NOTE: This function does not clear the pmap modified bit. 356 */ 357 static __inline 358 void 359 vm_page_clear_dirty_beg_nonincl(vm_page_t m, int base, int size) 360 { 361 size += base; 362 base = (base + DEV_BMASK) & ~DEV_BMASK; 363 if (base < size) 364 vm_page_clear_dirty(m, base, size - base); 365 } 366 367 static __inline 368 void 369 vm_page_spin_lock(vm_page_t m) 370 { 371 spin_lock(&m->spin); 372 } 373 374 static __inline 375 void 376 vm_page_spin_unlock(vm_page_t m) 377 { 378 spin_unlock(&m->spin); 379 } 380 381 /* 382 * Wire a vm_page that is already wired. Does not require a busied 383 * page. 384 */ 385 static __inline 386 void 387 vm_page_wire_quick(vm_page_t m) 388 { 389 if (atomic_fetchadd_int(&m->wire_count, 1) == 0) 390 panic("vm_page_wire_quick: wire_count was 0"); 391 } 392 393 /* 394 * Unwire a vm_page quickly, does not require a busied page. 395 * 396 * This routine refuses to drop the wire_count to 0 and will return 397 * TRUE if it would have had to (instead of decrementing it to 0). 398 * The caller can then busy the page and deal with it. 399 */ 400 static __inline 401 int 402 vm_page_unwire_quick(vm_page_t m) 403 { 404 KKASSERT(m->wire_count > 0); 405 for (;;) { 406 u_int wire_count = m->wire_count; 407 408 cpu_ccfence(); 409 if (wire_count == 1) 410 return TRUE; 411 if (atomic_cmpset_int(&m->wire_count, wire_count, wire_count - 1)) 412 return FALSE; 413 } 414 } 415 416 /* 417 * Functions implemented as macros 418 */ 419 420 static __inline void 421 vm_page_flag_set(vm_page_t m, unsigned int bits) 422 { 423 atomic_set_int(&(m)->flags, bits); 424 } 425 426 static __inline void 427 vm_page_flag_clear(vm_page_t m, unsigned int bits) 428 { 429 atomic_clear_int(&(m)->flags, bits); 430 } 431 432 /* 433 * Wakeup anyone waiting for the page after potentially unbusying 434 * (hard or soft) or doing other work on a page that might make a 435 * waiter ready. The setting of PBUSY_WANTED is integrated into the 436 * related flags and it can't be set once the flags are already 437 * clear, so there should be no races here. 438 */ 439 static __inline void 440 vm_page_flash(vm_page_t m) 441 { 442 if (m->busy_count & PBUSY_WANTED) { 443 atomic_clear_int(&m->busy_count, PBUSY_WANTED); 444 wakeup(m); 445 } 446 } 447 448 /* 449 * Adjust the soft-busy count on a page. The drop code will issue an 450 * integrated wakeup if busy_count becomes 0. 451 */ 452 static __inline void 453 vm_page_sbusy_hold(vm_page_t m) 454 { 455 atomic_add_int(&m->busy_count, 1); 456 } 457 458 static __inline void 459 vm_page_sbusy_drop(vm_page_t m) 460 { 461 uint32_t ocount; 462 463 ocount = atomic_fetchadd_int(&m->busy_count, -1); 464 if (ocount - 1 == PBUSY_WANTED) { 465 /* WANTED and no longer BUSY or SBUSY */ 466 atomic_clear_int(&m->busy_count, PBUSY_WANTED); 467 wakeup(m); 468 } 469 } 470 471 /* 472 * Reduce the protection of a page. This routine never raises the 473 * protection and therefore can be safely called if the page is already 474 * at VM_PROT_NONE (it will be a NOP effectively ). 475 * 476 * VM_PROT_NONE will remove all user mappings of a page. This is often 477 * necessary when a page changes state (for example, turns into a copy-on-write 478 * page or needs to be frozen for write I/O) in order to force a fault, or 479 * to force a page's dirty bits to be synchronized and avoid hardware 480 * (modified/accessed) bit update races with pmap changes. 481 * 482 * Since 'prot' is usually a constant, this inline usually winds up optimizing 483 * out the primary conditional. 484 * 485 * Must be called with (m) hard-busied. 486 * 487 * WARNING: VM_PROT_NONE can block, but will loop until all mappings have 488 * been cleared. Callers should be aware that other page related 489 * elements might have changed, however. 490 */ 491 static __inline void 492 vm_page_protect(vm_page_t m, int prot) 493 { 494 KKASSERT(m->busy_count & PBUSY_LOCKED); 495 if (prot == VM_PROT_NONE) { 496 if (pmap_mapped_sync(m) & (PG_MAPPED | PG_WRITEABLE)) { 497 pmap_page_protect(m, VM_PROT_NONE); 498 /* PG_WRITEABLE & PG_MAPPED cleared by call */ 499 } 500 } else if ((prot == VM_PROT_READ) && 501 (m->flags & PG_WRITEABLE) && 502 (pmap_mapped_sync(m) & PG_WRITEABLE)) { 503 pmap_page_protect(m, VM_PROT_READ); 504 /* PG_WRITEABLE cleared by call */ 505 } 506 } 507 508 /* 509 * Zero-fill the specified page. The entire contents of the page will be 510 * zero'd out. 511 */ 512 static __inline boolean_t 513 vm_page_zero_fill(vm_page_t m) 514 { 515 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 516 return (TRUE); 517 } 518 519 /* 520 * Copy the contents of src_m to dest_m. The pages must be stable but spl 521 * and other protections depend on context. 522 */ 523 static __inline void 524 vm_page_copy(vm_page_t src_m, vm_page_t dest_m) 525 { 526 pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m)); 527 dest_m->valid = VM_PAGE_BITS_ALL; 528 dest_m->dirty = VM_PAGE_BITS_ALL; 529 } 530 531 /* 532 * Free a page. The page must be marked BUSY. 533 */ 534 static __inline void 535 vm_page_free(vm_page_t m) 536 { 537 vm_page_free_toq(m); 538 } 539 540 /* 541 * Free a page to the zerod-pages queue. The caller must ensure that the 542 * page has been zerod. 543 */ 544 static __inline void 545 vm_page_free_zero(vm_page_t m) 546 { 547 #ifdef PMAP_DEBUG 548 #ifdef PHYS_TO_DMAP 549 char *p = (char *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 550 int i; 551 552 for (i = 0; i < PAGE_SIZE; i++) { 553 if (p[i] != 0) { 554 panic("non-zero page in vm_page_free_zero()"); 555 } 556 } 557 #endif 558 #endif 559 vm_page_free_toq(m); 560 } 561 562 /* 563 * Set page to not be dirty. Note: does not clear pmap modify bits . 564 */ 565 static __inline void 566 vm_page_undirty(vm_page_t m) 567 { 568 m->dirty = 0; 569 } 570 571 #endif /* _KERNEL */ 572 #endif /* _VM_VM_PAGE2_H_ */ 573 574