1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28*0Sstevel@tonic-gate /* All Rights Reserved */ 29*0Sstevel@tonic-gate 30*0Sstevel@tonic-gate /* 31*0Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 32*0Sstevel@tonic-gate * The Regents of the University of California 33*0Sstevel@tonic-gate * All Rights Reserved 34*0Sstevel@tonic-gate * 35*0Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 36*0Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 37*0Sstevel@tonic-gate * contributors. 38*0Sstevel@tonic-gate */ 39*0Sstevel@tonic-gate 40*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 41*0Sstevel@tonic-gate 42*0Sstevel@tonic-gate #include <sys/types.h> 43*0Sstevel@tonic-gate #include <sys/t_lock.h> 44*0Sstevel@tonic-gate #include <sys/param.h> 45*0Sstevel@tonic-gate #include <sys/buf.h> 46*0Sstevel@tonic-gate #include <sys/uio.h> 47*0Sstevel@tonic-gate #include <sys/proc.h> 48*0Sstevel@tonic-gate #include <sys/systm.h> 49*0Sstevel@tonic-gate #include <sys/mman.h> 50*0Sstevel@tonic-gate #include <sys/cred.h> 51*0Sstevel@tonic-gate #include <sys/vnode.h> 52*0Sstevel@tonic-gate #include <sys/vm.h> 53*0Sstevel@tonic-gate #include <sys/vmparam.h> 54*0Sstevel@tonic-gate #include <sys/vtrace.h> 55*0Sstevel@tonic-gate #include <sys/cmn_err.h> 56*0Sstevel@tonic-gate #include <sys/cpuvar.h> 57*0Sstevel@tonic-gate #include <sys/user.h> 58*0Sstevel@tonic-gate #include <sys/kmem.h> 59*0Sstevel@tonic-gate #include <sys/debug.h> 60*0Sstevel@tonic-gate #include <sys/callb.h> 61*0Sstevel@tonic-gate #include <sys/tnf_probe.h> 62*0Sstevel@tonic-gate #include <sys/mem_cage.h> 63*0Sstevel@tonic-gate #include <sys/time.h> 64*0Sstevel@tonic-gate 65*0Sstevel@tonic-gate #include <vm/hat.h> 66*0Sstevel@tonic-gate #include <vm/as.h> 67*0Sstevel@tonic-gate #include <vm/seg.h> 68*0Sstevel@tonic-gate #include <vm/page.h> 69*0Sstevel@tonic-gate #include <vm/pvn.h> 70*0Sstevel@tonic-gate #include <vm/seg_kmem.h> 71*0Sstevel@tonic-gate 72*0Sstevel@tonic-gate static int checkpage(page_t *, int); 73*0Sstevel@tonic-gate 74*0Sstevel@tonic-gate /* 75*0Sstevel@tonic-gate * The following parameters control operation of the page replacement 76*0Sstevel@tonic-gate * algorithm. They are initialized to 0, and then computed at boot time 77*0Sstevel@tonic-gate * based on the size of the system. If they are patched non-zero in 78*0Sstevel@tonic-gate * a loaded vmunix they are left alone and may thus be changed per system 79*0Sstevel@tonic-gate * using adb on the loaded system. 80*0Sstevel@tonic-gate */ 81*0Sstevel@tonic-gate pgcnt_t slowscan = 0; 82*0Sstevel@tonic-gate pgcnt_t fastscan = 0; 83*0Sstevel@tonic-gate 84*0Sstevel@tonic-gate static pgcnt_t handspreadpages = 0; 85*0Sstevel@tonic-gate static int loopfraction = 2; 86*0Sstevel@tonic-gate static pgcnt_t looppages; 87*0Sstevel@tonic-gate static int min_percent_cpu = 4; 88*0Sstevel@tonic-gate static int max_percent_cpu = 80; 89*0Sstevel@tonic-gate static pgcnt_t maxfastscan = 0; 90*0Sstevel@tonic-gate static pgcnt_t maxslowscan = 100; 91*0Sstevel@tonic-gate 92*0Sstevel@tonic-gate pgcnt_t maxpgio = 0; 93*0Sstevel@tonic-gate pgcnt_t minfree = 0; 94*0Sstevel@tonic-gate pgcnt_t desfree = 0; 95*0Sstevel@tonic-gate pgcnt_t lotsfree = 0; 96*0Sstevel@tonic-gate pgcnt_t needfree = 0; 97*0Sstevel@tonic-gate pgcnt_t throttlefree = 0; 98*0Sstevel@tonic-gate pgcnt_t pageout_reserve = 0; 99*0Sstevel@tonic-gate 100*0Sstevel@tonic-gate pgcnt_t deficit; 101*0Sstevel@tonic-gate pgcnt_t nscan; 102*0Sstevel@tonic-gate pgcnt_t desscan; 103*0Sstevel@tonic-gate 104*0Sstevel@tonic-gate /* 105*0Sstevel@tonic-gate * Values for min_pageout_ticks, max_pageout_ticks and pageout_ticks 106*0Sstevel@tonic-gate * are the number of ticks in each wakeup cycle that gives the 107*0Sstevel@tonic-gate * equivalent of some underlying %CPU duty cycle. 108*0Sstevel@tonic-gate * When RATETOSCHEDPAGING is 4, and hz is 100, pageout_scanner is 109*0Sstevel@tonic-gate * awakened every 25 clock ticks. So, converting from %CPU to ticks 110*0Sstevel@tonic-gate * per wakeup cycle would be x% of 25, that is (x * 100) / 25. 111*0Sstevel@tonic-gate * So, for example, 4% == 1 tick and 80% == 20 ticks. 112*0Sstevel@tonic-gate * 113*0Sstevel@tonic-gate * min_pageout_ticks: 114*0Sstevel@tonic-gate * ticks/wakeup equivalent of min_percent_cpu. 115*0Sstevel@tonic-gate * 116*0Sstevel@tonic-gate * max_pageout_ticks: 117*0Sstevel@tonic-gate * ticks/wakeup equivalent of max_percent_cpu. 118*0Sstevel@tonic-gate * 119*0Sstevel@tonic-gate * pageout_ticks: 120*0Sstevel@tonic-gate * Number of clock ticks budgeted for each wakeup cycle. 121*0Sstevel@tonic-gate * Computed each time around by schedpaging(). 122*0Sstevel@tonic-gate * Varies between min_pageout_ticks .. max_pageout_ticks, 123*0Sstevel@tonic-gate * depending on memory pressure. 124*0Sstevel@tonic-gate * 125*0Sstevel@tonic-gate * pageout_lbolt: 126*0Sstevel@tonic-gate * Timestamp of the last time pageout_scanner woke up and started 127*0Sstevel@tonic-gate * (or resumed) scanning for not recently referenced pages. 128*0Sstevel@tonic-gate */ 129*0Sstevel@tonic-gate 130*0Sstevel@tonic-gate static clock_t min_pageout_ticks; 131*0Sstevel@tonic-gate static clock_t max_pageout_ticks; 132*0Sstevel@tonic-gate static clock_t pageout_ticks; 133*0Sstevel@tonic-gate static clock_t pageout_lbolt; 134*0Sstevel@tonic-gate 135*0Sstevel@tonic-gate static uint_t reset_hands; 136*0Sstevel@tonic-gate 137*0Sstevel@tonic-gate #define PAGES_POLL_MASK 1023 138*0Sstevel@tonic-gate 139*0Sstevel@tonic-gate /* 140*0Sstevel@tonic-gate * pageout_sample_lim: 141*0Sstevel@tonic-gate * The limit on the number of samples needed to establish a value 142*0Sstevel@tonic-gate * for new pageout parameters, fastscan, slowscan, and handspreadpages. 143*0Sstevel@tonic-gate * 144*0Sstevel@tonic-gate * pageout_sample_cnt: 145*0Sstevel@tonic-gate * Current sample number. Once the sample gets large enough, 146*0Sstevel@tonic-gate * set new values for handspreadpages, fastscan and slowscan. 147*0Sstevel@tonic-gate * 148*0Sstevel@tonic-gate * pageout_sample_pages: 149*0Sstevel@tonic-gate * The accumulated number of pages scanned during sampling. 150*0Sstevel@tonic-gate * 151*0Sstevel@tonic-gate * pageout_sample_ticks: 152*0Sstevel@tonic-gate * The accumulated clock ticks for the sample. 153*0Sstevel@tonic-gate * 154*0Sstevel@tonic-gate * pageout_rate: 155*0Sstevel@tonic-gate * Rate in pages/nanosecond, computed at the end of sampling. 156*0Sstevel@tonic-gate * 157*0Sstevel@tonic-gate * pageout_new_spread: 158*0Sstevel@tonic-gate * The new value to use for fastscan and handspreadpages. 159*0Sstevel@tonic-gate * Calculated after enough samples have been taken. 160*0Sstevel@tonic-gate */ 161*0Sstevel@tonic-gate 162*0Sstevel@tonic-gate typedef hrtime_t hrrate_t; 163*0Sstevel@tonic-gate 164*0Sstevel@tonic-gate static uint64_t pageout_sample_lim = 4; 165*0Sstevel@tonic-gate static uint64_t pageout_sample_cnt = 0; 166*0Sstevel@tonic-gate static pgcnt_t pageout_sample_pages = 0; 167*0Sstevel@tonic-gate static hrrate_t pageout_rate = 0; 168*0Sstevel@tonic-gate static pgcnt_t pageout_new_spread = 0; 169*0Sstevel@tonic-gate 170*0Sstevel@tonic-gate static clock_t pageout_cycle_ticks; 171*0Sstevel@tonic-gate static hrtime_t sample_start, sample_end; 172*0Sstevel@tonic-gate static hrtime_t pageout_sample_etime = 0; 173*0Sstevel@tonic-gate 174*0Sstevel@tonic-gate /* 175*0Sstevel@tonic-gate * Record number of times a pageout_scanner wakeup cycle finished because it 176*0Sstevel@tonic-gate * timed out (exceeded its CPU budget), rather than because it visited 177*0Sstevel@tonic-gate * its budgeted number of pages. 178*0Sstevel@tonic-gate */ 179*0Sstevel@tonic-gate uint64_t pageout_timeouts = 0; 180*0Sstevel@tonic-gate 181*0Sstevel@tonic-gate #ifdef VM_STATS 182*0Sstevel@tonic-gate static struct pageoutvmstats_str { 183*0Sstevel@tonic-gate ulong_t checkpage[3]; 184*0Sstevel@tonic-gate } pageoutvmstats; 185*0Sstevel@tonic-gate #endif /* VM_STATS */ 186*0Sstevel@tonic-gate 187*0Sstevel@tonic-gate /* 188*0Sstevel@tonic-gate * Threads waiting for free memory use this condition variable and lock until 189*0Sstevel@tonic-gate * memory becomes available. 190*0Sstevel@tonic-gate */ 191*0Sstevel@tonic-gate kmutex_t memavail_lock; 192*0Sstevel@tonic-gate kcondvar_t memavail_cv; 193*0Sstevel@tonic-gate 194*0Sstevel@tonic-gate /* 195*0Sstevel@tonic-gate * The size of the clock loop. 196*0Sstevel@tonic-gate */ 197*0Sstevel@tonic-gate #define LOOPPAGES total_pages 198*0Sstevel@tonic-gate 199*0Sstevel@tonic-gate /* 200*0Sstevel@tonic-gate * Set up the paging constants for the clock algorithm. 201*0Sstevel@tonic-gate * Called after the system is initialized and the amount of memory 202*0Sstevel@tonic-gate * and number of paging devices is known. 203*0Sstevel@tonic-gate * 204*0Sstevel@tonic-gate * lotsfree is 1/64 of memory, but at least 512K. 205*0Sstevel@tonic-gate * desfree is 1/2 of lotsfree. 206*0Sstevel@tonic-gate * minfree is 1/2 of desfree. 207*0Sstevel@tonic-gate * 208*0Sstevel@tonic-gate * Note: to revert to the paging algorithm of Solaris 2.4/2.5, set: 209*0Sstevel@tonic-gate * 210*0Sstevel@tonic-gate * lotsfree = btop(512K) 211*0Sstevel@tonic-gate * desfree = btop(200K) 212*0Sstevel@tonic-gate * minfree = btop(100K) 213*0Sstevel@tonic-gate * throttlefree = INT_MIN 214*0Sstevel@tonic-gate * max_percent_cpu = 4 215*0Sstevel@tonic-gate */ 216*0Sstevel@tonic-gate void 217*0Sstevel@tonic-gate setupclock(int recalc) 218*0Sstevel@tonic-gate { 219*0Sstevel@tonic-gate 220*0Sstevel@tonic-gate static spgcnt_t init_lfree, init_dfree, init_mfree; 221*0Sstevel@tonic-gate static spgcnt_t init_tfree, init_preserve, init_mpgio; 222*0Sstevel@tonic-gate static spgcnt_t init_mfscan, init_fscan, init_sscan, init_hspages; 223*0Sstevel@tonic-gate 224*0Sstevel@tonic-gate looppages = LOOPPAGES; 225*0Sstevel@tonic-gate 226*0Sstevel@tonic-gate /* 227*0Sstevel@tonic-gate * setupclock can now be called to recalculate the paging 228*0Sstevel@tonic-gate * parameters in the case of dynamic addition of memory. 229*0Sstevel@tonic-gate * So to make sure we make the proper calculations, if such a 230*0Sstevel@tonic-gate * situation should arise, we save away the initial values 231*0Sstevel@tonic-gate * of each parameter so we can recall them when needed. This 232*0Sstevel@tonic-gate * way we don't lose the settings an admin might have made 233*0Sstevel@tonic-gate * through the /etc/system file. 234*0Sstevel@tonic-gate */ 235*0Sstevel@tonic-gate 236*0Sstevel@tonic-gate if (!recalc) { 237*0Sstevel@tonic-gate init_lfree = lotsfree; 238*0Sstevel@tonic-gate init_dfree = desfree; 239*0Sstevel@tonic-gate init_mfree = minfree; 240*0Sstevel@tonic-gate init_tfree = throttlefree; 241*0Sstevel@tonic-gate init_preserve = pageout_reserve; 242*0Sstevel@tonic-gate init_mpgio = maxpgio; 243*0Sstevel@tonic-gate init_mfscan = maxfastscan; 244*0Sstevel@tonic-gate init_fscan = fastscan; 245*0Sstevel@tonic-gate init_sscan = slowscan; 246*0Sstevel@tonic-gate init_hspages = handspreadpages; 247*0Sstevel@tonic-gate } 248*0Sstevel@tonic-gate 249*0Sstevel@tonic-gate /* 250*0Sstevel@tonic-gate * Set up thresholds for paging: 251*0Sstevel@tonic-gate */ 252*0Sstevel@tonic-gate 253*0Sstevel@tonic-gate /* 254*0Sstevel@tonic-gate * Lotsfree is threshold where paging daemon turns on. 255*0Sstevel@tonic-gate */ 256*0Sstevel@tonic-gate if (init_lfree == 0 || init_lfree >= looppages) 257*0Sstevel@tonic-gate lotsfree = MAX(looppages / 64, btop(512 * 1024)); 258*0Sstevel@tonic-gate else 259*0Sstevel@tonic-gate lotsfree = init_lfree; 260*0Sstevel@tonic-gate 261*0Sstevel@tonic-gate /* 262*0Sstevel@tonic-gate * Desfree is amount of memory desired free. 263*0Sstevel@tonic-gate * If less than this for extended period, start swapping. 264*0Sstevel@tonic-gate */ 265*0Sstevel@tonic-gate if (init_dfree == 0 || init_dfree >= lotsfree) 266*0Sstevel@tonic-gate desfree = lotsfree / 2; 267*0Sstevel@tonic-gate else 268*0Sstevel@tonic-gate desfree = init_dfree; 269*0Sstevel@tonic-gate 270*0Sstevel@tonic-gate /* 271*0Sstevel@tonic-gate * Minfree is minimal amount of free memory which is tolerable. 272*0Sstevel@tonic-gate */ 273*0Sstevel@tonic-gate if (init_mfree == 0 || init_mfree >= desfree) 274*0Sstevel@tonic-gate minfree = desfree / 2; 275*0Sstevel@tonic-gate else 276*0Sstevel@tonic-gate minfree = init_mfree; 277*0Sstevel@tonic-gate 278*0Sstevel@tonic-gate /* 279*0Sstevel@tonic-gate * Throttlefree is the point at which we start throttling 280*0Sstevel@tonic-gate * PG_WAIT requests until enough memory becomes available. 281*0Sstevel@tonic-gate */ 282*0Sstevel@tonic-gate if (init_tfree == 0 || init_tfree >= desfree) 283*0Sstevel@tonic-gate throttlefree = minfree; 284*0Sstevel@tonic-gate else 285*0Sstevel@tonic-gate throttlefree = init_tfree; 286*0Sstevel@tonic-gate 287*0Sstevel@tonic-gate /* 288*0Sstevel@tonic-gate * Pageout_reserve is the number of pages that we keep in 289*0Sstevel@tonic-gate * stock for pageout's own use. Having a few such pages 290*0Sstevel@tonic-gate * provides insurance against system deadlock due to 291*0Sstevel@tonic-gate * pageout needing pages. When freemem < pageout_reserve, 292*0Sstevel@tonic-gate * non-blocking allocations are denied to any threads 293*0Sstevel@tonic-gate * other than pageout and sched. (At some point we might 294*0Sstevel@tonic-gate * want to consider a per-thread flag like T_PUSHING_PAGES 295*0Sstevel@tonic-gate * to indicate that a thread is part of the page-pushing 296*0Sstevel@tonic-gate * dance (e.g. an interrupt thread) and thus is entitled 297*0Sstevel@tonic-gate * to the same special dispensation we accord pageout.) 298*0Sstevel@tonic-gate */ 299*0Sstevel@tonic-gate if (init_preserve == 0 || init_preserve >= throttlefree) 300*0Sstevel@tonic-gate pageout_reserve = throttlefree / 2; 301*0Sstevel@tonic-gate else 302*0Sstevel@tonic-gate pageout_reserve = init_preserve; 303*0Sstevel@tonic-gate 304*0Sstevel@tonic-gate /* 305*0Sstevel@tonic-gate * Maxpgio thresholds how much paging is acceptable. 306*0Sstevel@tonic-gate * This figures that 2/3 busy on an arm is all that is 307*0Sstevel@tonic-gate * tolerable for paging. We assume one operation per disk rev. 308*0Sstevel@tonic-gate * 309*0Sstevel@tonic-gate * XXX - Does not account for multiple swap devices. 310*0Sstevel@tonic-gate */ 311*0Sstevel@tonic-gate if (init_mpgio == 0) 312*0Sstevel@tonic-gate maxpgio = (DISKRPM * 2) / 3; 313*0Sstevel@tonic-gate else 314*0Sstevel@tonic-gate maxpgio = init_mpgio; 315*0Sstevel@tonic-gate 316*0Sstevel@tonic-gate /* 317*0Sstevel@tonic-gate * The clock scan rate varies between fastscan and slowscan 318*0Sstevel@tonic-gate * based on the amount of free memory available. Fastscan 319*0Sstevel@tonic-gate * rate should be set based on the number pages that can be 320*0Sstevel@tonic-gate * scanned per sec using ~10% of processor time. Since this 321*0Sstevel@tonic-gate * value depends on the processor, MMU, Mhz etc., it is 322*0Sstevel@tonic-gate * difficult to determine it in a generic manner for all 323*0Sstevel@tonic-gate * architectures. 324*0Sstevel@tonic-gate * 325*0Sstevel@tonic-gate * Instead of trying to determine the number of pages scanned 326*0Sstevel@tonic-gate * per sec for every processor, fastscan is set to be the smaller 327*0Sstevel@tonic-gate * of 1/2 of memory or MAXHANDSPREADPAGES and the sampling 328*0Sstevel@tonic-gate * time is limited to ~4% of processor time. 329*0Sstevel@tonic-gate * 330*0Sstevel@tonic-gate * Setting fastscan to be 1/2 of memory allows pageout to scan 331*0Sstevel@tonic-gate * all of memory in ~2 secs. This implies that user pages not 332*0Sstevel@tonic-gate * accessed within 1 sec (assuming, handspreadpages == fastscan) 333*0Sstevel@tonic-gate * can be reclaimed when free memory is very low. Stealing pages 334*0Sstevel@tonic-gate * not accessed within 1 sec seems reasonable and ensures that 335*0Sstevel@tonic-gate * active user processes don't thrash. 336*0Sstevel@tonic-gate * 337*0Sstevel@tonic-gate * Smaller values of fastscan result in scanning fewer pages 338*0Sstevel@tonic-gate * every second and consequently pageout may not be able to free 339*0Sstevel@tonic-gate * sufficient memory to maintain the minimum threshold. Larger 340*0Sstevel@tonic-gate * values of fastscan result in scanning a lot more pages which 341*0Sstevel@tonic-gate * could lead to thrashing and higher CPU usage. 342*0Sstevel@tonic-gate * 343*0Sstevel@tonic-gate * Fastscan needs to be limited to a maximum value and should not 344*0Sstevel@tonic-gate * scale with memory to prevent pageout from consuming too much 345*0Sstevel@tonic-gate * time for scanning on slow CPU's and avoid thrashing, as a 346*0Sstevel@tonic-gate * result of scanning too many pages, on faster CPU's. 347*0Sstevel@tonic-gate * The value of 64 Meg was chosen for MAXHANDSPREADPAGES 348*0Sstevel@tonic-gate * (the upper bound for fastscan) based on the average number 349*0Sstevel@tonic-gate * of pages that can potentially be scanned in ~1 sec (using ~4% 350*0Sstevel@tonic-gate * of the CPU) on some of the following machines that currently 351*0Sstevel@tonic-gate * run Solaris 2.x: 352*0Sstevel@tonic-gate * 353*0Sstevel@tonic-gate * average memory scanned in ~1 sec 354*0Sstevel@tonic-gate * 355*0Sstevel@tonic-gate * 25 Mhz SS1+: 23 Meg 356*0Sstevel@tonic-gate * LX: 37 Meg 357*0Sstevel@tonic-gate * 50 Mhz SC2000: 68 Meg 358*0Sstevel@tonic-gate * 359*0Sstevel@tonic-gate * 40 Mhz 486: 26 Meg 360*0Sstevel@tonic-gate * 66 Mhz 486: 42 Meg 361*0Sstevel@tonic-gate * 362*0Sstevel@tonic-gate * When free memory falls just below lotsfree, the scan rate 363*0Sstevel@tonic-gate * goes from 0 to slowscan (i.e., pageout starts running). This 364*0Sstevel@tonic-gate * transition needs to be smooth and is achieved by ensuring that 365*0Sstevel@tonic-gate * pageout scans a small number of pages to satisfy the transient 366*0Sstevel@tonic-gate * memory demand. This is set to not exceed 100 pages/sec (25 per 367*0Sstevel@tonic-gate * wakeup) since scanning that many pages has no noticible impact 368*0Sstevel@tonic-gate * on system performance. 369*0Sstevel@tonic-gate * 370*0Sstevel@tonic-gate * In addition to setting fastscan and slowscan, pageout is 371*0Sstevel@tonic-gate * limited to using ~4% of the CPU. This results in increasing 372*0Sstevel@tonic-gate * the time taken to scan all of memory, which in turn means that 373*0Sstevel@tonic-gate * user processes have a better opportunity of preventing their 374*0Sstevel@tonic-gate * pages from being stolen. This has a positive effect on 375*0Sstevel@tonic-gate * interactive and overall system performance when memory demand 376*0Sstevel@tonic-gate * is high. 377*0Sstevel@tonic-gate * 378*0Sstevel@tonic-gate * Thus, the rate at which pages are scanned for replacement will 379*0Sstevel@tonic-gate * vary linearly between slowscan and the number of pages that 380*0Sstevel@tonic-gate * can be scanned using ~4% of processor time instead of varying 381*0Sstevel@tonic-gate * linearly between slowscan and fastscan. 382*0Sstevel@tonic-gate * 383*0Sstevel@tonic-gate * Also, the processor time used by pageout will vary from ~1% 384*0Sstevel@tonic-gate * at slowscan to ~4% at fastscan instead of varying between 385*0Sstevel@tonic-gate * ~1% at slowscan and ~10% at fastscan. 386*0Sstevel@tonic-gate * 387*0Sstevel@tonic-gate * The values chosen for the various VM parameters (fastscan, 388*0Sstevel@tonic-gate * handspreadpages, etc) are not universally true for all machines, 389*0Sstevel@tonic-gate * but appear to be a good rule of thumb for the machines we've 390*0Sstevel@tonic-gate * tested. They have the following ranges: 391*0Sstevel@tonic-gate * 392*0Sstevel@tonic-gate * cpu speed: 20 to 70 Mhz 393*0Sstevel@tonic-gate * page size: 4K to 8K 394*0Sstevel@tonic-gate * memory size: 16M to 5G 395*0Sstevel@tonic-gate * page scan rate: 4000 - 17400 4K pages per sec 396*0Sstevel@tonic-gate * 397*0Sstevel@tonic-gate * The values need to be re-examined for machines which don't 398*0Sstevel@tonic-gate * fall into the various ranges (e.g., slower or faster CPUs, 399*0Sstevel@tonic-gate * smaller or larger pagesizes etc) shown above. 400*0Sstevel@tonic-gate * 401*0Sstevel@tonic-gate * On an MP machine, pageout is often unable to maintain the 402*0Sstevel@tonic-gate * minimum paging thresholds under heavy load. This is due to 403*0Sstevel@tonic-gate * the fact that user processes running on other CPU's can be 404*0Sstevel@tonic-gate * dirtying memory at a much faster pace than pageout can find 405*0Sstevel@tonic-gate * pages to free. The memory demands could be met by enabling 406*0Sstevel@tonic-gate * more than one CPU to run the clock algorithm in such a manner 407*0Sstevel@tonic-gate * that the various clock hands don't overlap. This also makes 408*0Sstevel@tonic-gate * it more difficult to determine the values for fastscan, slowscan 409*0Sstevel@tonic-gate * and handspreadpages. 410*0Sstevel@tonic-gate * 411*0Sstevel@tonic-gate * The swapper is currently used to free up memory when pageout 412*0Sstevel@tonic-gate * is unable to meet memory demands by swapping out processes. 413*0Sstevel@tonic-gate * In addition to freeing up memory, swapping also reduces the 414*0Sstevel@tonic-gate * demand for memory by preventing user processes from running 415*0Sstevel@tonic-gate * and thereby consuming memory. 416*0Sstevel@tonic-gate */ 417*0Sstevel@tonic-gate if (init_mfscan == 0) { 418*0Sstevel@tonic-gate if (pageout_new_spread != 0) 419*0Sstevel@tonic-gate maxfastscan = pageout_new_spread; 420*0Sstevel@tonic-gate else 421*0Sstevel@tonic-gate maxfastscan = MAXHANDSPREADPAGES; 422*0Sstevel@tonic-gate } else { 423*0Sstevel@tonic-gate maxfastscan = init_mfscan; 424*0Sstevel@tonic-gate } 425*0Sstevel@tonic-gate if (init_fscan == 0) 426*0Sstevel@tonic-gate fastscan = MIN(looppages / loopfraction, maxfastscan); 427*0Sstevel@tonic-gate else 428*0Sstevel@tonic-gate fastscan = init_fscan; 429*0Sstevel@tonic-gate if (fastscan > looppages / loopfraction) 430*0Sstevel@tonic-gate fastscan = looppages / loopfraction; 431*0Sstevel@tonic-gate 432*0Sstevel@tonic-gate /* 433*0Sstevel@tonic-gate * Set slow scan time to 1/10 the fast scan time, but 434*0Sstevel@tonic-gate * not to exceed maxslowscan. 435*0Sstevel@tonic-gate */ 436*0Sstevel@tonic-gate if (init_sscan == 0) 437*0Sstevel@tonic-gate slowscan = MIN(fastscan / 10, maxslowscan); 438*0Sstevel@tonic-gate else 439*0Sstevel@tonic-gate slowscan = init_sscan; 440*0Sstevel@tonic-gate if (slowscan > fastscan / 2) 441*0Sstevel@tonic-gate slowscan = fastscan / 2; 442*0Sstevel@tonic-gate 443*0Sstevel@tonic-gate /* 444*0Sstevel@tonic-gate * Handspreadpages is distance (in pages) between front and back 445*0Sstevel@tonic-gate * pageout daemon hands. The amount of time to reclaim a page 446*0Sstevel@tonic-gate * once pageout examines it increases with this distance and 447*0Sstevel@tonic-gate * decreases as the scan rate rises. It must be < the amount 448*0Sstevel@tonic-gate * of pageable memory. 449*0Sstevel@tonic-gate * 450*0Sstevel@tonic-gate * Since pageout is limited to ~4% of the CPU, setting handspreadpages 451*0Sstevel@tonic-gate * to be "fastscan" results in the front hand being a few secs 452*0Sstevel@tonic-gate * (varies based on the processor speed) ahead of the back hand 453*0Sstevel@tonic-gate * at fastscan rates. This distance can be further reduced, if 454*0Sstevel@tonic-gate * necessary, by increasing the processor time used by pageout 455*0Sstevel@tonic-gate * to be more than ~4% and preferrably not more than ~10%. 456*0Sstevel@tonic-gate * 457*0Sstevel@tonic-gate * As a result, user processes have a much better chance of 458*0Sstevel@tonic-gate * referencing their pages before the back hand examines them. 459*0Sstevel@tonic-gate * This also significantly lowers the number of reclaims from 460*0Sstevel@tonic-gate * the freelist since pageout does not end up freeing pages which 461*0Sstevel@tonic-gate * may be referenced a sec later. 462*0Sstevel@tonic-gate */ 463*0Sstevel@tonic-gate if (init_hspages == 0) 464*0Sstevel@tonic-gate handspreadpages = fastscan; 465*0Sstevel@tonic-gate else 466*0Sstevel@tonic-gate handspreadpages = init_hspages; 467*0Sstevel@tonic-gate 468*0Sstevel@tonic-gate /* 469*0Sstevel@tonic-gate * Make sure that back hand follows front hand by at least 470*0Sstevel@tonic-gate * 1/RATETOSCHEDPAGING seconds. Without this test, it is possible 471*0Sstevel@tonic-gate * for the back hand to look at a page during the same wakeup of 472*0Sstevel@tonic-gate * the pageout daemon in which the front hand cleared its ref bit. 473*0Sstevel@tonic-gate */ 474*0Sstevel@tonic-gate if (handspreadpages >= looppages) 475*0Sstevel@tonic-gate handspreadpages = looppages - 1; 476*0Sstevel@tonic-gate 477*0Sstevel@tonic-gate /* 478*0Sstevel@tonic-gate * If we have been called to recalculate the parameters, 479*0Sstevel@tonic-gate * set a flag to re-evaluate the clock hand pointers. 480*0Sstevel@tonic-gate */ 481*0Sstevel@tonic-gate if (recalc) 482*0Sstevel@tonic-gate reset_hands = 1; 483*0Sstevel@tonic-gate } 484*0Sstevel@tonic-gate 485*0Sstevel@tonic-gate /* 486*0Sstevel@tonic-gate * Pageout scheduling. 487*0Sstevel@tonic-gate * 488*0Sstevel@tonic-gate * Schedpaging controls the rate at which the page out daemon runs by 489*0Sstevel@tonic-gate * setting the global variables nscan and desscan RATETOSCHEDPAGING 490*0Sstevel@tonic-gate * times a second. Nscan records the number of pages pageout has examined 491*0Sstevel@tonic-gate * in its current pass; schedpaging resets this value to zero each time 492*0Sstevel@tonic-gate * it runs. Desscan records the number of pages pageout should examine 493*0Sstevel@tonic-gate * in its next pass; schedpaging sets this value based on the amount of 494*0Sstevel@tonic-gate * currently available memory. 495*0Sstevel@tonic-gate */ 496*0Sstevel@tonic-gate 497*0Sstevel@tonic-gate #define RATETOSCHEDPAGING 4 /* hz that is */ 498*0Sstevel@tonic-gate 499*0Sstevel@tonic-gate static kmutex_t pageout_mutex; /* held while pageout or schedpaging running */ 500*0Sstevel@tonic-gate 501*0Sstevel@tonic-gate /* 502*0Sstevel@tonic-gate * Pool of available async pageout putpage requests. 503*0Sstevel@tonic-gate */ 504*0Sstevel@tonic-gate static struct async_reqs *push_req; 505*0Sstevel@tonic-gate static struct async_reqs *req_freelist; /* available req structs */ 506*0Sstevel@tonic-gate static struct async_reqs *push_list; /* pending reqs */ 507*0Sstevel@tonic-gate static kmutex_t push_lock; /* protects req pool */ 508*0Sstevel@tonic-gate static kcondvar_t push_cv; 509*0Sstevel@tonic-gate 510*0Sstevel@tonic-gate static int async_list_size = 256; /* number of async request structs */ 511*0Sstevel@tonic-gate 512*0Sstevel@tonic-gate static void pageout_scanner(void); 513*0Sstevel@tonic-gate 514*0Sstevel@tonic-gate /* 515*0Sstevel@tonic-gate * If a page is being shared more than "po_share" times 516*0Sstevel@tonic-gate * then leave it alone- don't page it out. 517*0Sstevel@tonic-gate */ 518*0Sstevel@tonic-gate #define MIN_PO_SHARE (8) 519*0Sstevel@tonic-gate #define MAX_PO_SHARE ((MIN_PO_SHARE) << 24) 520*0Sstevel@tonic-gate ulong_t po_share = MIN_PO_SHARE; 521*0Sstevel@tonic-gate 522*0Sstevel@tonic-gate /* 523*0Sstevel@tonic-gate * Schedule rate for paging. 524*0Sstevel@tonic-gate * Rate is linear interpolation between 525*0Sstevel@tonic-gate * slowscan with lotsfree and fastscan when out of memory. 526*0Sstevel@tonic-gate */ 527*0Sstevel@tonic-gate static void 528*0Sstevel@tonic-gate schedpaging(void *arg) 529*0Sstevel@tonic-gate { 530*0Sstevel@tonic-gate spgcnt_t vavail; 531*0Sstevel@tonic-gate 532*0Sstevel@tonic-gate if (freemem < lotsfree + needfree + kmem_reapahead) 533*0Sstevel@tonic-gate kmem_reap(); 534*0Sstevel@tonic-gate 535*0Sstevel@tonic-gate if (freemem < lotsfree + needfree + seg_preapahead) 536*0Sstevel@tonic-gate seg_preap(); 537*0Sstevel@tonic-gate 538*0Sstevel@tonic-gate if (kcage_on && (kcage_freemem < kcage_desfree || kcage_needfree)) 539*0Sstevel@tonic-gate kcage_cageout_wakeup(); 540*0Sstevel@tonic-gate 541*0Sstevel@tonic-gate if (mutex_tryenter(&pageout_mutex)) { 542*0Sstevel@tonic-gate /* pageout() not running */ 543*0Sstevel@tonic-gate nscan = 0; 544*0Sstevel@tonic-gate vavail = freemem - deficit; 545*0Sstevel@tonic-gate if (vavail < 0) 546*0Sstevel@tonic-gate vavail = 0; 547*0Sstevel@tonic-gate if (vavail > lotsfree) 548*0Sstevel@tonic-gate vavail = lotsfree; 549*0Sstevel@tonic-gate 550*0Sstevel@tonic-gate /* 551*0Sstevel@tonic-gate * Fix for 1161438 (CRS SPR# 73922). All variables 552*0Sstevel@tonic-gate * in the original calculation for desscan were 32 bit signed 553*0Sstevel@tonic-gate * ints. As freemem approaches 0x0 on a system with 1 Gig or 554*0Sstevel@tonic-gate * more of memory, the calculation can overflow. When this 555*0Sstevel@tonic-gate * happens, desscan becomes negative and pageout_scanner() 556*0Sstevel@tonic-gate * stops paging out. 557*0Sstevel@tonic-gate */ 558*0Sstevel@tonic-gate if (needfree) { 559*0Sstevel@tonic-gate desscan = fastscan / RATETOSCHEDPAGING; 560*0Sstevel@tonic-gate } else { 561*0Sstevel@tonic-gate spgcnt_t faststmp, slowstmp, result; 562*0Sstevel@tonic-gate 563*0Sstevel@tonic-gate slowstmp = slowscan * vavail; 564*0Sstevel@tonic-gate faststmp = fastscan * (lotsfree - vavail); 565*0Sstevel@tonic-gate result = (slowstmp + faststmp) / 566*0Sstevel@tonic-gate nz(lotsfree) / RATETOSCHEDPAGING; 567*0Sstevel@tonic-gate desscan = (pgcnt_t)result; 568*0Sstevel@tonic-gate } 569*0Sstevel@tonic-gate 570*0Sstevel@tonic-gate pageout_ticks = min_pageout_ticks + (lotsfree - vavail) * 571*0Sstevel@tonic-gate (max_pageout_ticks - min_pageout_ticks) / nz(lotsfree); 572*0Sstevel@tonic-gate 573*0Sstevel@tonic-gate if (freemem < lotsfree + needfree || 574*0Sstevel@tonic-gate pageout_sample_cnt < pageout_sample_lim) { 575*0Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGEOUT_CV_SIGNAL, 576*0Sstevel@tonic-gate "pageout_cv_signal:freemem %ld", freemem); 577*0Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 578*0Sstevel@tonic-gate } else { 579*0Sstevel@tonic-gate /* 580*0Sstevel@tonic-gate * There are enough free pages, no need to 581*0Sstevel@tonic-gate * kick the scanner thread. And next time 582*0Sstevel@tonic-gate * around, keep more of the `highly shared' 583*0Sstevel@tonic-gate * pages. 584*0Sstevel@tonic-gate */ 585*0Sstevel@tonic-gate cv_signal_pageout(); 586*0Sstevel@tonic-gate if (po_share > MIN_PO_SHARE) { 587*0Sstevel@tonic-gate po_share >>= 1; 588*0Sstevel@tonic-gate } 589*0Sstevel@tonic-gate } 590*0Sstevel@tonic-gate mutex_exit(&pageout_mutex); 591*0Sstevel@tonic-gate } 592*0Sstevel@tonic-gate 593*0Sstevel@tonic-gate /* 594*0Sstevel@tonic-gate * Signal threads waiting for available memory. 595*0Sstevel@tonic-gate * NOTE: usually we need to grab memavail_lock before cv_broadcast, but 596*0Sstevel@tonic-gate * in this case it is not needed - the waiters will be waken up during 597*0Sstevel@tonic-gate * the next invocation of this function. 598*0Sstevel@tonic-gate */ 599*0Sstevel@tonic-gate if (kmem_avail() > 0) 600*0Sstevel@tonic-gate cv_broadcast(&memavail_cv); 601*0Sstevel@tonic-gate 602*0Sstevel@tonic-gate (void) timeout(schedpaging, arg, hz / RATETOSCHEDPAGING); 603*0Sstevel@tonic-gate } 604*0Sstevel@tonic-gate 605*0Sstevel@tonic-gate pgcnt_t pushes; 606*0Sstevel@tonic-gate ulong_t push_list_size; /* # of requests on pageout queue */ 607*0Sstevel@tonic-gate 608*0Sstevel@tonic-gate #define FRONT 1 609*0Sstevel@tonic-gate #define BACK 2 610*0Sstevel@tonic-gate 611*0Sstevel@tonic-gate int dopageout = 1; /* must be non-zero to turn page stealing on */ 612*0Sstevel@tonic-gate 613*0Sstevel@tonic-gate /* 614*0Sstevel@tonic-gate * The page out daemon, which runs as process 2. 615*0Sstevel@tonic-gate * 616*0Sstevel@tonic-gate * As long as there are at least lotsfree pages, 617*0Sstevel@tonic-gate * this process is not run. When the number of free 618*0Sstevel@tonic-gate * pages stays in the range desfree to lotsfree, 619*0Sstevel@tonic-gate * this daemon runs through the pages in the loop 620*0Sstevel@tonic-gate * at a rate determined in schedpaging(). Pageout manages 621*0Sstevel@tonic-gate * two hands on the clock. The front hand moves through 622*0Sstevel@tonic-gate * memory, clearing the reference bit, 623*0Sstevel@tonic-gate * and stealing pages from procs that are over maxrss. 624*0Sstevel@tonic-gate * The back hand travels a distance behind the front hand, 625*0Sstevel@tonic-gate * freeing the pages that have not been referenced in the time 626*0Sstevel@tonic-gate * since the front hand passed. If modified, they are pushed to 627*0Sstevel@tonic-gate * swap before being freed. 628*0Sstevel@tonic-gate * 629*0Sstevel@tonic-gate * There are 2 threads that act on behalf of the pageout process. 630*0Sstevel@tonic-gate * One thread scans pages (pageout_scanner) and frees them up if 631*0Sstevel@tonic-gate * they don't require any VOP_PUTPAGE operation. If a page must be 632*0Sstevel@tonic-gate * written back to its backing store, the request is put on a list 633*0Sstevel@tonic-gate * and the other (pageout) thread is signaled. The pageout thread 634*0Sstevel@tonic-gate * grabs VOP_PUTPAGE requests from the list, and processes them. 635*0Sstevel@tonic-gate * Some filesystems may require resources for the VOP_PUTPAGE 636*0Sstevel@tonic-gate * operations (like memory) and hence can block the pageout 637*0Sstevel@tonic-gate * thread, but the scanner thread can still operate. There is still 638*0Sstevel@tonic-gate * no gaurentee that memory deadlocks cannot occur. 639*0Sstevel@tonic-gate * 640*0Sstevel@tonic-gate * For now, this thing is in very rough form. 641*0Sstevel@tonic-gate */ 642*0Sstevel@tonic-gate void 643*0Sstevel@tonic-gate pageout() 644*0Sstevel@tonic-gate { 645*0Sstevel@tonic-gate struct async_reqs *arg; 646*0Sstevel@tonic-gate pri_t pageout_pri; 647*0Sstevel@tonic-gate int i; 648*0Sstevel@tonic-gate pgcnt_t max_pushes; 649*0Sstevel@tonic-gate callb_cpr_t cprinfo; 650*0Sstevel@tonic-gate 651*0Sstevel@tonic-gate proc_pageout = ttoproc(curthread); 652*0Sstevel@tonic-gate proc_pageout->p_cstime = 0; 653*0Sstevel@tonic-gate proc_pageout->p_stime = 0; 654*0Sstevel@tonic-gate proc_pageout->p_cutime = 0; 655*0Sstevel@tonic-gate proc_pageout->p_utime = 0; 656*0Sstevel@tonic-gate bcopy("pageout", u.u_psargs, 8); 657*0Sstevel@tonic-gate bcopy("pageout", u.u_comm, 7); 658*0Sstevel@tonic-gate 659*0Sstevel@tonic-gate /* 660*0Sstevel@tonic-gate * Create pageout scanner thread 661*0Sstevel@tonic-gate */ 662*0Sstevel@tonic-gate mutex_init(&pageout_mutex, NULL, MUTEX_DEFAULT, NULL); 663*0Sstevel@tonic-gate mutex_init(&push_lock, NULL, MUTEX_DEFAULT, NULL); 664*0Sstevel@tonic-gate 665*0Sstevel@tonic-gate /* 666*0Sstevel@tonic-gate * Allocate and initialize the async request structures 667*0Sstevel@tonic-gate * for pageout. 668*0Sstevel@tonic-gate */ 669*0Sstevel@tonic-gate push_req = (struct async_reqs *) 670*0Sstevel@tonic-gate kmem_zalloc(async_list_size * sizeof (struct async_reqs), KM_SLEEP); 671*0Sstevel@tonic-gate 672*0Sstevel@tonic-gate req_freelist = push_req; 673*0Sstevel@tonic-gate for (i = 0; i < async_list_size - 1; i++) 674*0Sstevel@tonic-gate push_req[i].a_next = &push_req[i + 1]; 675*0Sstevel@tonic-gate 676*0Sstevel@tonic-gate pageout_pri = curthread->t_pri; 677*0Sstevel@tonic-gate pageout_init(pageout_scanner, proc_pageout, pageout_pri - 1); 678*0Sstevel@tonic-gate 679*0Sstevel@tonic-gate /* 680*0Sstevel@tonic-gate * kick off pageout scheduler. 681*0Sstevel@tonic-gate */ 682*0Sstevel@tonic-gate schedpaging(NULL); 683*0Sstevel@tonic-gate 684*0Sstevel@tonic-gate /* 685*0Sstevel@tonic-gate * Create kernel cage thread. 686*0Sstevel@tonic-gate * The kernel cage thread is started under the pageout process 687*0Sstevel@tonic-gate * to take advantage of the less restricted page allocation 688*0Sstevel@tonic-gate * in page_create_throttle(). 689*0Sstevel@tonic-gate */ 690*0Sstevel@tonic-gate kcage_cageout_init(); 691*0Sstevel@tonic-gate 692*0Sstevel@tonic-gate /* 693*0Sstevel@tonic-gate * Limit pushes to avoid saturating pageout devices. 694*0Sstevel@tonic-gate */ 695*0Sstevel@tonic-gate max_pushes = maxpgio / RATETOSCHEDPAGING; 696*0Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &push_lock, callb_generic_cpr, "pageout"); 697*0Sstevel@tonic-gate 698*0Sstevel@tonic-gate for (;;) { 699*0Sstevel@tonic-gate mutex_enter(&push_lock); 700*0Sstevel@tonic-gate 701*0Sstevel@tonic-gate while ((arg = push_list) == NULL || pushes > max_pushes) { 702*0Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 703*0Sstevel@tonic-gate cv_wait(&push_cv, &push_lock); 704*0Sstevel@tonic-gate pushes = 0; 705*0Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &push_lock); 706*0Sstevel@tonic-gate } 707*0Sstevel@tonic-gate push_list = arg->a_next; 708*0Sstevel@tonic-gate arg->a_next = NULL; 709*0Sstevel@tonic-gate mutex_exit(&push_lock); 710*0Sstevel@tonic-gate 711*0Sstevel@tonic-gate if (VOP_PUTPAGE(arg->a_vp, (offset_t)arg->a_off, 712*0Sstevel@tonic-gate arg->a_len, arg->a_flags, 713*0Sstevel@tonic-gate arg->a_cred) == 0) { 714*0Sstevel@tonic-gate pushes++; 715*0Sstevel@tonic-gate } 716*0Sstevel@tonic-gate 717*0Sstevel@tonic-gate /* vp held by checkpage() */ 718*0Sstevel@tonic-gate VN_RELE(arg->a_vp); 719*0Sstevel@tonic-gate 720*0Sstevel@tonic-gate mutex_enter(&push_lock); 721*0Sstevel@tonic-gate arg->a_next = req_freelist; /* back on freelist */ 722*0Sstevel@tonic-gate req_freelist = arg; 723*0Sstevel@tonic-gate push_list_size--; 724*0Sstevel@tonic-gate mutex_exit(&push_lock); 725*0Sstevel@tonic-gate } 726*0Sstevel@tonic-gate } 727*0Sstevel@tonic-gate 728*0Sstevel@tonic-gate /* 729*0Sstevel@tonic-gate * Kernel thread that scans pages looking for ones to free 730*0Sstevel@tonic-gate */ 731*0Sstevel@tonic-gate static void 732*0Sstevel@tonic-gate pageout_scanner(void) 733*0Sstevel@tonic-gate { 734*0Sstevel@tonic-gate struct page *fronthand, *backhand; 735*0Sstevel@tonic-gate uint_t count; 736*0Sstevel@tonic-gate callb_cpr_t cprinfo; 737*0Sstevel@tonic-gate pgcnt_t nscan_limit; 738*0Sstevel@tonic-gate pgcnt_t pcount; 739*0Sstevel@tonic-gate 740*0Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &pageout_mutex, callb_generic_cpr, "poscan"); 741*0Sstevel@tonic-gate mutex_enter(&pageout_mutex); 742*0Sstevel@tonic-gate 743*0Sstevel@tonic-gate /* 744*0Sstevel@tonic-gate * The restart case does not attempt to point the hands at roughly 745*0Sstevel@tonic-gate * the right point on the assumption that after one circuit things 746*0Sstevel@tonic-gate * will have settled down - and restarts shouldn't be that often. 747*0Sstevel@tonic-gate */ 748*0Sstevel@tonic-gate 749*0Sstevel@tonic-gate /* 750*0Sstevel@tonic-gate * Set the two clock hands to be separated by a reasonable amount, 751*0Sstevel@tonic-gate * but no more than 360 degrees apart. 752*0Sstevel@tonic-gate */ 753*0Sstevel@tonic-gate backhand = page_first(); 754*0Sstevel@tonic-gate if (handspreadpages >= total_pages) 755*0Sstevel@tonic-gate fronthand = page_nextn(backhand, total_pages - 1); 756*0Sstevel@tonic-gate else 757*0Sstevel@tonic-gate fronthand = page_nextn(backhand, handspreadpages); 758*0Sstevel@tonic-gate 759*0Sstevel@tonic-gate min_pageout_ticks = MAX(1, 760*0Sstevel@tonic-gate ((hz * min_percent_cpu) / 100) / RATETOSCHEDPAGING); 761*0Sstevel@tonic-gate max_pageout_ticks = MAX(min_pageout_ticks, 762*0Sstevel@tonic-gate ((hz * max_percent_cpu) / 100) / RATETOSCHEDPAGING); 763*0Sstevel@tonic-gate 764*0Sstevel@tonic-gate loop: 765*0Sstevel@tonic-gate cv_signal_pageout(); 766*0Sstevel@tonic-gate 767*0Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 768*0Sstevel@tonic-gate cv_wait(&proc_pageout->p_cv, &pageout_mutex); 769*0Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &pageout_mutex); 770*0Sstevel@tonic-gate 771*0Sstevel@tonic-gate if (!dopageout) 772*0Sstevel@tonic-gate goto loop; 773*0Sstevel@tonic-gate 774*0Sstevel@tonic-gate if (reset_hands) { 775*0Sstevel@tonic-gate reset_hands = 0; 776*0Sstevel@tonic-gate 777*0Sstevel@tonic-gate backhand = page_first(); 778*0Sstevel@tonic-gate if (handspreadpages >= total_pages) 779*0Sstevel@tonic-gate fronthand = page_nextn(backhand, total_pages - 1); 780*0Sstevel@tonic-gate else 781*0Sstevel@tonic-gate fronthand = page_nextn(backhand, handspreadpages); 782*0Sstevel@tonic-gate } 783*0Sstevel@tonic-gate 784*0Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, vm, pgrrun, 1); 785*0Sstevel@tonic-gate count = 0; 786*0Sstevel@tonic-gate 787*0Sstevel@tonic-gate TRACE_4(TR_FAC_VM, TR_PAGEOUT_START, 788*0Sstevel@tonic-gate "pageout_start:freemem %ld lotsfree %ld nscan %ld desscan %ld", 789*0Sstevel@tonic-gate freemem, lotsfree, nscan, desscan); 790*0Sstevel@tonic-gate 791*0Sstevel@tonic-gate /* Kernel probe */ 792*0Sstevel@tonic-gate TNF_PROBE_2(pageout_scan_start, "vm pagedaemon", /* CSTYLED */, 793*0Sstevel@tonic-gate tnf_ulong, pages_free, freemem, 794*0Sstevel@tonic-gate tnf_ulong, pages_needed, needfree); 795*0Sstevel@tonic-gate 796*0Sstevel@tonic-gate pcount = 0; 797*0Sstevel@tonic-gate if (pageout_sample_cnt < pageout_sample_lim) { 798*0Sstevel@tonic-gate nscan_limit = total_pages; 799*0Sstevel@tonic-gate } else { 800*0Sstevel@tonic-gate nscan_limit = desscan; 801*0Sstevel@tonic-gate } 802*0Sstevel@tonic-gate pageout_lbolt = lbolt; 803*0Sstevel@tonic-gate sample_start = gethrtime(); 804*0Sstevel@tonic-gate 805*0Sstevel@tonic-gate /* 806*0Sstevel@tonic-gate * Scan the appropriate number of pages for a single duty cycle. 807*0Sstevel@tonic-gate * However, stop scanning as soon as there is enough free memory. 808*0Sstevel@tonic-gate * For a short while, we will be sampling the performance of the 809*0Sstevel@tonic-gate * scanner and need to keep running just to get sample data, in 810*0Sstevel@tonic-gate * which case we keep going and don't pay attention to whether 811*0Sstevel@tonic-gate * or not there is enough free memory. 812*0Sstevel@tonic-gate */ 813*0Sstevel@tonic-gate 814*0Sstevel@tonic-gate while (nscan < nscan_limit && (freemem < lotsfree + needfree || 815*0Sstevel@tonic-gate pageout_sample_cnt < pageout_sample_lim)) { 816*0Sstevel@tonic-gate int rvfront, rvback; 817*0Sstevel@tonic-gate 818*0Sstevel@tonic-gate /* 819*0Sstevel@tonic-gate * Check to see if we have exceeded our %CPU budget 820*0Sstevel@tonic-gate * for this wakeup, but not on every single page visited, 821*0Sstevel@tonic-gate * just every once in a while. 822*0Sstevel@tonic-gate */ 823*0Sstevel@tonic-gate if ((pcount & PAGES_POLL_MASK) == PAGES_POLL_MASK) { 824*0Sstevel@tonic-gate pageout_cycle_ticks = lbolt - pageout_lbolt; 825*0Sstevel@tonic-gate if (pageout_cycle_ticks >= pageout_ticks) { 826*0Sstevel@tonic-gate ++pageout_timeouts; 827*0Sstevel@tonic-gate break; 828*0Sstevel@tonic-gate } 829*0Sstevel@tonic-gate } 830*0Sstevel@tonic-gate 831*0Sstevel@tonic-gate /* 832*0Sstevel@tonic-gate * If checkpage manages to add a page to the free list, 833*0Sstevel@tonic-gate * we give ourselves another couple of trips around the loop. 834*0Sstevel@tonic-gate */ 835*0Sstevel@tonic-gate if ((rvfront = checkpage(fronthand, FRONT)) == 1) 836*0Sstevel@tonic-gate count = 0; 837*0Sstevel@tonic-gate if ((rvback = checkpage(backhand, BACK)) == 1) 838*0Sstevel@tonic-gate count = 0; 839*0Sstevel@tonic-gate 840*0Sstevel@tonic-gate ++pcount; 841*0Sstevel@tonic-gate 842*0Sstevel@tonic-gate /* 843*0Sstevel@tonic-gate * protected by pageout_mutex instead of cpu_stat_lock 844*0Sstevel@tonic-gate */ 845*0Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, vm, scan, 1); 846*0Sstevel@tonic-gate 847*0Sstevel@tonic-gate /* 848*0Sstevel@tonic-gate * Don't include ineligible pages in the number scanned. 849*0Sstevel@tonic-gate */ 850*0Sstevel@tonic-gate if (rvfront != -1 || rvback != -1) 851*0Sstevel@tonic-gate nscan++; 852*0Sstevel@tonic-gate 853*0Sstevel@tonic-gate backhand = page_next(backhand); 854*0Sstevel@tonic-gate 855*0Sstevel@tonic-gate /* 856*0Sstevel@tonic-gate * backhand update and wraparound check are done separately 857*0Sstevel@tonic-gate * because lint barks when it finds an empty "if" body 858*0Sstevel@tonic-gate */ 859*0Sstevel@tonic-gate 860*0Sstevel@tonic-gate if ((fronthand = page_next(fronthand)) == page_first()) { 861*0Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGEOUT_HAND_WRAP, 862*0Sstevel@tonic-gate "pageout_hand_wrap:freemem %ld whichhand %d", 863*0Sstevel@tonic-gate freemem, FRONT); 864*0Sstevel@tonic-gate 865*0Sstevel@tonic-gate /* 866*0Sstevel@tonic-gate * protected by pageout_mutex instead of cpu_stat_lock 867*0Sstevel@tonic-gate */ 868*0Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, vm, rev, 1); 869*0Sstevel@tonic-gate if (++count > 1) { 870*0Sstevel@tonic-gate /* 871*0Sstevel@tonic-gate * Extremely unlikely, but it happens. 872*0Sstevel@tonic-gate * We went around the loop at least once 873*0Sstevel@tonic-gate * and didn't get far enough. 874*0Sstevel@tonic-gate * If we are still skipping `highly shared' 875*0Sstevel@tonic-gate * pages, skip fewer of them. Otherwise, 876*0Sstevel@tonic-gate * give up till the next clock tick. 877*0Sstevel@tonic-gate */ 878*0Sstevel@tonic-gate if (po_share < MAX_PO_SHARE) { 879*0Sstevel@tonic-gate po_share <<= 1; 880*0Sstevel@tonic-gate } else { 881*0Sstevel@tonic-gate /* 882*0Sstevel@tonic-gate * Really a "goto loop", but 883*0Sstevel@tonic-gate * if someone is TRACing or 884*0Sstevel@tonic-gate * TNF_PROBE_ing, at least 885*0Sstevel@tonic-gate * make records to show 886*0Sstevel@tonic-gate * where we are. 887*0Sstevel@tonic-gate */ 888*0Sstevel@tonic-gate break; 889*0Sstevel@tonic-gate } 890*0Sstevel@tonic-gate } 891*0Sstevel@tonic-gate } 892*0Sstevel@tonic-gate } 893*0Sstevel@tonic-gate 894*0Sstevel@tonic-gate sample_end = gethrtime(); 895*0Sstevel@tonic-gate 896*0Sstevel@tonic-gate TRACE_5(TR_FAC_VM, TR_PAGEOUT_END, 897*0Sstevel@tonic-gate "pageout_end:freemem %ld lots %ld nscan %ld des %ld count %u", 898*0Sstevel@tonic-gate freemem, lotsfree, nscan, desscan, count); 899*0Sstevel@tonic-gate 900*0Sstevel@tonic-gate /* Kernel probe */ 901*0Sstevel@tonic-gate TNF_PROBE_2(pageout_scan_end, "vm pagedaemon", /* CSTYLED */, 902*0Sstevel@tonic-gate tnf_ulong, pages_scanned, nscan, 903*0Sstevel@tonic-gate tnf_ulong, pages_free, freemem); 904*0Sstevel@tonic-gate 905*0Sstevel@tonic-gate if (pageout_sample_cnt < pageout_sample_lim) { 906*0Sstevel@tonic-gate pageout_sample_pages += pcount; 907*0Sstevel@tonic-gate pageout_sample_etime += sample_end - sample_start; 908*0Sstevel@tonic-gate ++pageout_sample_cnt; 909*0Sstevel@tonic-gate } 910*0Sstevel@tonic-gate if (pageout_sample_cnt >= pageout_sample_lim && 911*0Sstevel@tonic-gate pageout_new_spread == 0) { 912*0Sstevel@tonic-gate pageout_rate = (hrrate_t)pageout_sample_pages * 913*0Sstevel@tonic-gate (hrrate_t)(NANOSEC) / pageout_sample_etime; 914*0Sstevel@tonic-gate pageout_new_spread = pageout_rate / 10; 915*0Sstevel@tonic-gate setupclock(1); 916*0Sstevel@tonic-gate } 917*0Sstevel@tonic-gate 918*0Sstevel@tonic-gate goto loop; 919*0Sstevel@tonic-gate } 920*0Sstevel@tonic-gate 921*0Sstevel@tonic-gate /* 922*0Sstevel@tonic-gate * Look at the page at hand. If it is locked (e.g., for physical i/o), 923*0Sstevel@tonic-gate * system (u., page table) or free, then leave it alone. Otherwise, 924*0Sstevel@tonic-gate * if we are running the front hand, turn off the page's reference bit. 925*0Sstevel@tonic-gate * If the proc is over maxrss, we take it. If running the back hand, 926*0Sstevel@tonic-gate * check whether the page has been reclaimed. If not, free the page, 927*0Sstevel@tonic-gate * pushing it to disk first if necessary. 928*0Sstevel@tonic-gate * 929*0Sstevel@tonic-gate * Return values: 930*0Sstevel@tonic-gate * -1 if the page is not a candidate at all, 931*0Sstevel@tonic-gate * 0 if not freed, or 932*0Sstevel@tonic-gate * 1 if we freed it. 933*0Sstevel@tonic-gate */ 934*0Sstevel@tonic-gate static int 935*0Sstevel@tonic-gate checkpage(struct page *pp, int whichhand) 936*0Sstevel@tonic-gate { 937*0Sstevel@tonic-gate int ppattr; 938*0Sstevel@tonic-gate int isfs = 0; 939*0Sstevel@tonic-gate int isexec = 0; 940*0Sstevel@tonic-gate int pagesync_flag; 941*0Sstevel@tonic-gate 942*0Sstevel@tonic-gate /* 943*0Sstevel@tonic-gate * Skip pages: 944*0Sstevel@tonic-gate * - associated with the kernel vnode since 945*0Sstevel@tonic-gate * they are always "exclusively" locked. 946*0Sstevel@tonic-gate * - that are free 947*0Sstevel@tonic-gate * - that are shared more than po_share'd times 948*0Sstevel@tonic-gate * - its already locked 949*0Sstevel@tonic-gate * 950*0Sstevel@tonic-gate * NOTE: These optimizations assume that reads are atomic. 951*0Sstevel@tonic-gate */ 952*0Sstevel@tonic-gate top: 953*0Sstevel@tonic-gate if ((pp->p_vnode == &kvp) || 954*0Sstevel@tonic-gate (PP_ISFREE(pp)) || 955*0Sstevel@tonic-gate (hat_page_getshare(pp) > po_share) || PAGE_LOCKED(pp)) { 956*0Sstevel@tonic-gate return (-1); 957*0Sstevel@tonic-gate } 958*0Sstevel@tonic-gate 959*0Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) { 960*0Sstevel@tonic-gate /* 961*0Sstevel@tonic-gate * Skip the page if we can't acquire the "exclusive" lock. 962*0Sstevel@tonic-gate */ 963*0Sstevel@tonic-gate return (-1); 964*0Sstevel@tonic-gate } else if (PP_ISFREE(pp)) { 965*0Sstevel@tonic-gate /* 966*0Sstevel@tonic-gate * It became free between the above check and our actually 967*0Sstevel@tonic-gate * locking the page. Oh, well there will be other pages. 968*0Sstevel@tonic-gate */ 969*0Sstevel@tonic-gate page_unlock(pp); 970*0Sstevel@tonic-gate return (-1); 971*0Sstevel@tonic-gate } 972*0Sstevel@tonic-gate 973*0Sstevel@tonic-gate /* 974*0Sstevel@tonic-gate * Reject pages that cannot be freed. The page_struct_lock 975*0Sstevel@tonic-gate * need not be acquired to examine these 976*0Sstevel@tonic-gate * fields since the page has an "exclusive" lock. 977*0Sstevel@tonic-gate */ 978*0Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 979*0Sstevel@tonic-gate page_unlock(pp); 980*0Sstevel@tonic-gate return (-1); 981*0Sstevel@tonic-gate } 982*0Sstevel@tonic-gate 983*0Sstevel@tonic-gate /* 984*0Sstevel@tonic-gate * Maintain statistics for what we are freeing 985*0Sstevel@tonic-gate */ 986*0Sstevel@tonic-gate 987*0Sstevel@tonic-gate if (pp->p_vnode != NULL) { 988*0Sstevel@tonic-gate if (pp->p_vnode->v_flag & VVMEXEC) 989*0Sstevel@tonic-gate isexec = 1; 990*0Sstevel@tonic-gate 991*0Sstevel@tonic-gate if (!IS_SWAPFSVP(pp->p_vnode)) 992*0Sstevel@tonic-gate isfs = 1; 993*0Sstevel@tonic-gate } 994*0Sstevel@tonic-gate 995*0Sstevel@tonic-gate /* 996*0Sstevel@tonic-gate * Turn off REF and MOD bits with the front hand. 997*0Sstevel@tonic-gate * The back hand examines the REF bit and always considers 998*0Sstevel@tonic-gate * SHARED pages as referenced. 999*0Sstevel@tonic-gate */ 1000*0Sstevel@tonic-gate if (whichhand == FRONT) 1001*0Sstevel@tonic-gate pagesync_flag = HAT_SYNC_ZERORM; 1002*0Sstevel@tonic-gate else 1003*0Sstevel@tonic-gate pagesync_flag = HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_REF | 1004*0Sstevel@tonic-gate HAT_SYNC_STOPON_SHARED; 1005*0Sstevel@tonic-gate 1006*0Sstevel@tonic-gate ppattr = hat_pagesync(pp, pagesync_flag); 1007*0Sstevel@tonic-gate 1008*0Sstevel@tonic-gate recheck: 1009*0Sstevel@tonic-gate /* 1010*0Sstevel@tonic-gate * If page is referenced; make unreferenced but reclaimable. 1011*0Sstevel@tonic-gate * If this page is not referenced, then it must be reclaimable 1012*0Sstevel@tonic-gate * and we can add it to the free list. 1013*0Sstevel@tonic-gate */ 1014*0Sstevel@tonic-gate if (ppattr & P_REF) { 1015*0Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGEOUT_ISREF, 1016*0Sstevel@tonic-gate "pageout_isref:pp %p whichhand %d", pp, whichhand); 1017*0Sstevel@tonic-gate if (whichhand == FRONT) { 1018*0Sstevel@tonic-gate /* 1019*0Sstevel@tonic-gate * Checking of rss or madvise flags needed here... 1020*0Sstevel@tonic-gate * 1021*0Sstevel@tonic-gate * If not "well-behaved", fall through into the code 1022*0Sstevel@tonic-gate * for not referenced. 1023*0Sstevel@tonic-gate */ 1024*0Sstevel@tonic-gate hat_clrref(pp); 1025*0Sstevel@tonic-gate } 1026*0Sstevel@tonic-gate /* 1027*0Sstevel@tonic-gate * Somebody referenced the page since the front 1028*0Sstevel@tonic-gate * hand went by, so it's not a candidate for 1029*0Sstevel@tonic-gate * freeing up. 1030*0Sstevel@tonic-gate */ 1031*0Sstevel@tonic-gate page_unlock(pp); 1032*0Sstevel@tonic-gate return (0); 1033*0Sstevel@tonic-gate } 1034*0Sstevel@tonic-gate 1035*0Sstevel@tonic-gate VM_STAT_ADD(pageoutvmstats.checkpage[0]); 1036*0Sstevel@tonic-gate 1037*0Sstevel@tonic-gate /* 1038*0Sstevel@tonic-gate * If large page, attempt to demote it. If successfully demoted, 1039*0Sstevel@tonic-gate * retry the checkpage. 1040*0Sstevel@tonic-gate */ 1041*0Sstevel@tonic-gate if (pp->p_szc != 0) { 1042*0Sstevel@tonic-gate if (!page_try_demote_pages(pp)) { 1043*0Sstevel@tonic-gate VM_STAT_ADD(pageoutvmstats.checkpage[1]); 1044*0Sstevel@tonic-gate page_unlock(pp); 1045*0Sstevel@tonic-gate return (-1); 1046*0Sstevel@tonic-gate } 1047*0Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 1048*0Sstevel@tonic-gate VM_STAT_ADD(pageoutvmstats.checkpage[2]); 1049*0Sstevel@tonic-gate /* 1050*0Sstevel@tonic-gate * since page_try_demote_pages() could have unloaded some 1051*0Sstevel@tonic-gate * mappings it makes sense to reload ppattr. 1052*0Sstevel@tonic-gate */ 1053*0Sstevel@tonic-gate ppattr = hat_page_getattr(pp, P_MOD | P_REF); 1054*0Sstevel@tonic-gate } 1055*0Sstevel@tonic-gate 1056*0Sstevel@tonic-gate /* 1057*0Sstevel@tonic-gate * If the page is currently dirty, we have to arrange 1058*0Sstevel@tonic-gate * to have it cleaned before it can be freed. 1059*0Sstevel@tonic-gate * 1060*0Sstevel@tonic-gate * XXX - ASSERT(pp->p_vnode != NULL); 1061*0Sstevel@tonic-gate */ 1062*0Sstevel@tonic-gate if ((ppattr & P_MOD) && pp->p_vnode) { 1063*0Sstevel@tonic-gate struct vnode *vp = pp->p_vnode; 1064*0Sstevel@tonic-gate u_offset_t offset = pp->p_offset; 1065*0Sstevel@tonic-gate 1066*0Sstevel@tonic-gate /* 1067*0Sstevel@tonic-gate * XXX - Test for process being swapped out or about to exit? 1068*0Sstevel@tonic-gate * [Can't get back to process(es) using the page.] 1069*0Sstevel@tonic-gate */ 1070*0Sstevel@tonic-gate 1071*0Sstevel@tonic-gate /* 1072*0Sstevel@tonic-gate * Hold the vnode before releasing the page lock to 1073*0Sstevel@tonic-gate * prevent it from being freed and re-used by some 1074*0Sstevel@tonic-gate * other thread. 1075*0Sstevel@tonic-gate */ 1076*0Sstevel@tonic-gate VN_HOLD(vp); 1077*0Sstevel@tonic-gate page_unlock(pp); 1078*0Sstevel@tonic-gate 1079*0Sstevel@tonic-gate /* 1080*0Sstevel@tonic-gate * Queue i/o request for the pageout thread. 1081*0Sstevel@tonic-gate */ 1082*0Sstevel@tonic-gate if (!queue_io_request(vp, offset)) { 1083*0Sstevel@tonic-gate VN_RELE(vp); 1084*0Sstevel@tonic-gate return (0); 1085*0Sstevel@tonic-gate } 1086*0Sstevel@tonic-gate return (1); 1087*0Sstevel@tonic-gate } 1088*0Sstevel@tonic-gate 1089*0Sstevel@tonic-gate /* 1090*0Sstevel@tonic-gate * Now we unload all the translations, 1091*0Sstevel@tonic-gate * and put the page back on to the free list. 1092*0Sstevel@tonic-gate * If the page was used (referenced or modified) after 1093*0Sstevel@tonic-gate * the pagesync but before it was unloaded we catch it 1094*0Sstevel@tonic-gate * and handle the page properly. 1095*0Sstevel@tonic-gate */ 1096*0Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGEOUT_FREE, 1097*0Sstevel@tonic-gate "pageout_free:pp %p whichhand %d", pp, whichhand); 1098*0Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 1099*0Sstevel@tonic-gate ppattr = hat_page_getattr(pp, P_MOD | P_REF); 1100*0Sstevel@tonic-gate if ((ppattr & P_REF) || ((ppattr & P_MOD) && pp->p_vnode)) 1101*0Sstevel@tonic-gate goto recheck; 1102*0Sstevel@tonic-gate 1103*0Sstevel@tonic-gate /*LINTED: constant in conditional context*/ 1104*0Sstevel@tonic-gate VN_DISPOSE(pp, B_FREE, 0, kcred); 1105*0Sstevel@tonic-gate 1106*0Sstevel@tonic-gate CPU_STATS_ADD_K(vm, dfree, 1); 1107*0Sstevel@tonic-gate 1108*0Sstevel@tonic-gate if (isfs) { 1109*0Sstevel@tonic-gate if (isexec) { 1110*0Sstevel@tonic-gate CPU_STATS_ADD_K(vm, execfree, 1); 1111*0Sstevel@tonic-gate } else { 1112*0Sstevel@tonic-gate CPU_STATS_ADD_K(vm, fsfree, 1); 1113*0Sstevel@tonic-gate } 1114*0Sstevel@tonic-gate } else { 1115*0Sstevel@tonic-gate CPU_STATS_ADD_K(vm, anonfree, 1); 1116*0Sstevel@tonic-gate } 1117*0Sstevel@tonic-gate 1118*0Sstevel@tonic-gate return (1); /* freed a page! */ 1119*0Sstevel@tonic-gate } 1120*0Sstevel@tonic-gate 1121*0Sstevel@tonic-gate /* 1122*0Sstevel@tonic-gate * Queue async i/o request from pageout_scanner and segment swapout 1123*0Sstevel@tonic-gate * routines on one common list. This ensures that pageout devices (swap) 1124*0Sstevel@tonic-gate * are not saturated by pageout_scanner or swapout requests. 1125*0Sstevel@tonic-gate * The pageout thread empties this list by initiating i/o operations. 1126*0Sstevel@tonic-gate */ 1127*0Sstevel@tonic-gate int 1128*0Sstevel@tonic-gate queue_io_request(vnode_t *vp, u_offset_t off) 1129*0Sstevel@tonic-gate { 1130*0Sstevel@tonic-gate struct async_reqs *arg; 1131*0Sstevel@tonic-gate 1132*0Sstevel@tonic-gate /* 1133*0Sstevel@tonic-gate * If we cannot allocate an async request struct, 1134*0Sstevel@tonic-gate * skip this page. 1135*0Sstevel@tonic-gate */ 1136*0Sstevel@tonic-gate mutex_enter(&push_lock); 1137*0Sstevel@tonic-gate if ((arg = req_freelist) == NULL) { 1138*0Sstevel@tonic-gate mutex_exit(&push_lock); 1139*0Sstevel@tonic-gate return (0); 1140*0Sstevel@tonic-gate } 1141*0Sstevel@tonic-gate req_freelist = arg->a_next; /* adjust freelist */ 1142*0Sstevel@tonic-gate push_list_size++; 1143*0Sstevel@tonic-gate 1144*0Sstevel@tonic-gate arg->a_vp = vp; 1145*0Sstevel@tonic-gate arg->a_off = off; 1146*0Sstevel@tonic-gate arg->a_len = PAGESIZE; 1147*0Sstevel@tonic-gate arg->a_flags = B_ASYNC | B_FREE; 1148*0Sstevel@tonic-gate arg->a_cred = kcred; /* always held */ 1149*0Sstevel@tonic-gate 1150*0Sstevel@tonic-gate /* 1151*0Sstevel@tonic-gate * Add to list of pending write requests. 1152*0Sstevel@tonic-gate */ 1153*0Sstevel@tonic-gate arg->a_next = push_list; 1154*0Sstevel@tonic-gate push_list = arg; 1155*0Sstevel@tonic-gate 1156*0Sstevel@tonic-gate if (req_freelist == NULL) { 1157*0Sstevel@tonic-gate /* 1158*0Sstevel@tonic-gate * No free async requests left. The lock is held so we 1159*0Sstevel@tonic-gate * might as well signal the pusher thread now. 1160*0Sstevel@tonic-gate */ 1161*0Sstevel@tonic-gate cv_signal(&push_cv); 1162*0Sstevel@tonic-gate } 1163*0Sstevel@tonic-gate mutex_exit(&push_lock); 1164*0Sstevel@tonic-gate return (1); 1165*0Sstevel@tonic-gate } 1166*0Sstevel@tonic-gate 1167*0Sstevel@tonic-gate /* 1168*0Sstevel@tonic-gate * Wakeup pageout to initiate i/o if push_list is not empty. 1169*0Sstevel@tonic-gate */ 1170*0Sstevel@tonic-gate void 1171*0Sstevel@tonic-gate cv_signal_pageout() 1172*0Sstevel@tonic-gate { 1173*0Sstevel@tonic-gate if (push_list != NULL) { 1174*0Sstevel@tonic-gate mutex_enter(&push_lock); 1175*0Sstevel@tonic-gate cv_signal(&push_cv); 1176*0Sstevel@tonic-gate mutex_exit(&push_lock); 1177*0Sstevel@tonic-gate } 1178*0Sstevel@tonic-gate } 1179