1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28*0Sstevel@tonic-gate /* All Rights Reserved */ 29*0Sstevel@tonic-gate 30*0Sstevel@tonic-gate 31*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 32*0Sstevel@tonic-gate 33*0Sstevel@tonic-gate #include <sys/param.h> 34*0Sstevel@tonic-gate #include <sys/types.h> 35*0Sstevel@tonic-gate #include <sys/sysmacros.h> 36*0Sstevel@tonic-gate #include <sys/systm.h> 37*0Sstevel@tonic-gate #include <sys/proc.h> 38*0Sstevel@tonic-gate #include <sys/cpuvar.h> 39*0Sstevel@tonic-gate #include <sys/var.h> 40*0Sstevel@tonic-gate #include <sys/tuneable.h> 41*0Sstevel@tonic-gate #include <sys/cmn_err.h> 42*0Sstevel@tonic-gate #include <sys/buf.h> 43*0Sstevel@tonic-gate #include <sys/disp.h> 44*0Sstevel@tonic-gate #include <sys/vmsystm.h> 45*0Sstevel@tonic-gate #include <sys/vmparam.h> 46*0Sstevel@tonic-gate #include <sys/class.h> 47*0Sstevel@tonic-gate #include <sys/vtrace.h> 48*0Sstevel@tonic-gate #include <sys/modctl.h> 49*0Sstevel@tonic-gate #include <sys/debug.h> 50*0Sstevel@tonic-gate #include <sys/tnf_probe.h> 51*0Sstevel@tonic-gate #include <sys/procfs.h> 52*0Sstevel@tonic-gate 53*0Sstevel@tonic-gate #include <vm/seg.h> 54*0Sstevel@tonic-gate #include <vm/seg_kp.h> 55*0Sstevel@tonic-gate #include <vm/as.h> 56*0Sstevel@tonic-gate #include <vm/rm.h> 57*0Sstevel@tonic-gate #include <vm/seg_kmem.h> 58*0Sstevel@tonic-gate #include <sys/callb.h> 59*0Sstevel@tonic-gate 60*0Sstevel@tonic-gate /* 61*0Sstevel@tonic-gate * The swapper sleeps on runout when there is no one to swap in. 62*0Sstevel@tonic-gate * It sleeps on runin when it could not find space to swap someone 63*0Sstevel@tonic-gate * in or after swapping someone in. 64*0Sstevel@tonic-gate */ 65*0Sstevel@tonic-gate char runout; 66*0Sstevel@tonic-gate char runin; 67*0Sstevel@tonic-gate char wake_sched; /* flag tells clock to wake swapper on next tick */ 68*0Sstevel@tonic-gate char wake_sched_sec; /* flag tells clock to wake swapper after a second */ 69*0Sstevel@tonic-gate 70*0Sstevel@tonic-gate /* 71*0Sstevel@tonic-gate * The swapper swaps processes to reduce memory demand and runs 72*0Sstevel@tonic-gate * when avefree < desfree. The swapper resorts to SOFTSWAP when 73*0Sstevel@tonic-gate * avefree < desfree which results in swapping out all processes 74*0Sstevel@tonic-gate * sleeping for more than maxslp seconds. HARDSWAP occurs when the 75*0Sstevel@tonic-gate * system is on the verge of thrashing and this results in swapping 76*0Sstevel@tonic-gate * out runnable threads or threads sleeping for less than maxslp secs. 77*0Sstevel@tonic-gate * 78*0Sstevel@tonic-gate * The swapper runs through all the active processes in the system 79*0Sstevel@tonic-gate * and invokes the scheduling class specific swapin/swapout routine 80*0Sstevel@tonic-gate * for every thread in the process to obtain an effective priority 81*0Sstevel@tonic-gate * for the process. A priority of -1 implies that the thread isn't 82*0Sstevel@tonic-gate * swappable. This effective priority is used to find the most 83*0Sstevel@tonic-gate * eligible process to swapout or swapin. 84*0Sstevel@tonic-gate * 85*0Sstevel@tonic-gate * NOTE: Threads which have been swapped are not linked on any 86*0Sstevel@tonic-gate * queue and their dispatcher lock points at the "swapped_lock". 87*0Sstevel@tonic-gate * 88*0Sstevel@tonic-gate * Processes containing threads with the TS_DONT_SWAP flag set cannot be 89*0Sstevel@tonic-gate * swapped out immediately by the swapper. This is due to the fact that 90*0Sstevel@tonic-gate * such threads may be holding locks which may be needed by the swapper 91*0Sstevel@tonic-gate * to push its pages out. The TS_SWAPENQ flag is set on such threads 92*0Sstevel@tonic-gate * to prevent them running in user mode. When such threads reach a 93*0Sstevel@tonic-gate * safe point (i.e., are not holding any locks - CL_TRAPRET), they 94*0Sstevel@tonic-gate * queue themseleves onto the swap queue which is processed by the 95*0Sstevel@tonic-gate * swapper. This results in reducing memory demand when the system 96*0Sstevel@tonic-gate * is desparate for memory as the thread can't run in user mode. 97*0Sstevel@tonic-gate * 98*0Sstevel@tonic-gate * The swap queue consists of threads, linked via t_link, which are 99*0Sstevel@tonic-gate * haven't been swapped, are runnable but not on the run queue. The 100*0Sstevel@tonic-gate * swap queue is protected by the "swapped_lock". The dispatcher 101*0Sstevel@tonic-gate * lock (t_lockp) of all threads on the swap queue points at the 102*0Sstevel@tonic-gate * "swapped_lock". Thus, the entire queue and/or threads on the 103*0Sstevel@tonic-gate * queue can be locked by acquiring "swapped_lock". 104*0Sstevel@tonic-gate */ 105*0Sstevel@tonic-gate static kthread_t *tswap_queue; 106*0Sstevel@tonic-gate extern disp_lock_t swapped_lock; /* protects swap queue and threads on it */ 107*0Sstevel@tonic-gate 108*0Sstevel@tonic-gate int maxslp = 0; 109*0Sstevel@tonic-gate pgcnt_t avefree; /* 5 sec moving average of free memory */ 110*0Sstevel@tonic-gate pgcnt_t avefree30; /* 30 sec moving average of free memory */ 111*0Sstevel@tonic-gate 112*0Sstevel@tonic-gate /* 113*0Sstevel@tonic-gate * Minimum size used to decide if sufficient memory is available 114*0Sstevel@tonic-gate * before a process is swapped in. This is necessary since in most 115*0Sstevel@tonic-gate * cases the actual size of a process (p_swrss) being swapped in 116*0Sstevel@tonic-gate * is usually 2 pages (kernel stack pages). This is due to the fact 117*0Sstevel@tonic-gate * almost all user pages of a process are stolen by pageout before 118*0Sstevel@tonic-gate * the swapper decides to swapout it out. 119*0Sstevel@tonic-gate */ 120*0Sstevel@tonic-gate int min_procsize = 12; 121*0Sstevel@tonic-gate 122*0Sstevel@tonic-gate static int swapin(proc_t *); 123*0Sstevel@tonic-gate static int swapout(proc_t *, uint_t *, int); 124*0Sstevel@tonic-gate static void process_swap_queue(); 125*0Sstevel@tonic-gate 126*0Sstevel@tonic-gate #ifdef __sparc 127*0Sstevel@tonic-gate extern void lwp_swapin(kthread_t *); 128*0Sstevel@tonic-gate #endif /* __sparc */ 129*0Sstevel@tonic-gate 130*0Sstevel@tonic-gate /* 131*0Sstevel@tonic-gate * Counters to keep track of the number of swapins or swapouts. 132*0Sstevel@tonic-gate */ 133*0Sstevel@tonic-gate uint_t tot_swapped_in, tot_swapped_out; 134*0Sstevel@tonic-gate uint_t softswap, hardswap, swapqswap; 135*0Sstevel@tonic-gate 136*0Sstevel@tonic-gate /* 137*0Sstevel@tonic-gate * Macro to determine if a process is eligble to be swapped. 138*0Sstevel@tonic-gate */ 139*0Sstevel@tonic-gate #define not_swappable(p) \ 140*0Sstevel@tonic-gate (((p)->p_flag & SSYS) || (p)->p_stat == SIDL || \ 141*0Sstevel@tonic-gate (p)->p_stat == SZOMB || (p)->p_as == NULL || \ 142*0Sstevel@tonic-gate (p)->p_as == &kas) 143*0Sstevel@tonic-gate 144*0Sstevel@tonic-gate /* 145*0Sstevel@tonic-gate * Memory scheduler. 146*0Sstevel@tonic-gate */ 147*0Sstevel@tonic-gate void 148*0Sstevel@tonic-gate sched() 149*0Sstevel@tonic-gate { 150*0Sstevel@tonic-gate kthread_id_t t; 151*0Sstevel@tonic-gate pri_t proc_pri; 152*0Sstevel@tonic-gate pri_t thread_pri; 153*0Sstevel@tonic-gate pri_t swapin_pri; 154*0Sstevel@tonic-gate int desperate; 155*0Sstevel@tonic-gate pgcnt_t needs; 156*0Sstevel@tonic-gate int divisor; 157*0Sstevel@tonic-gate proc_t *prp; 158*0Sstevel@tonic-gate proc_t *swapout_prp; 159*0Sstevel@tonic-gate proc_t *swapin_prp; 160*0Sstevel@tonic-gate spgcnt_t avail; 161*0Sstevel@tonic-gate int chosen_pri; 162*0Sstevel@tonic-gate time_t swapout_time; 163*0Sstevel@tonic-gate time_t swapin_proc_time; 164*0Sstevel@tonic-gate callb_cpr_t cprinfo; 165*0Sstevel@tonic-gate kmutex_t swap_cpr_lock; 166*0Sstevel@tonic-gate 167*0Sstevel@tonic-gate mutex_init(&swap_cpr_lock, NULL, MUTEX_DEFAULT, NULL); 168*0Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &swap_cpr_lock, callb_generic_cpr, "sched"); 169*0Sstevel@tonic-gate if (maxslp == 0) 170*0Sstevel@tonic-gate maxslp = MAXSLP; 171*0Sstevel@tonic-gate loop: 172*0Sstevel@tonic-gate needs = 0; 173*0Sstevel@tonic-gate desperate = 0; 174*0Sstevel@tonic-gate 175*0Sstevel@tonic-gate swapin_pri = v.v_nglobpris; 176*0Sstevel@tonic-gate swapin_prp = NULL; 177*0Sstevel@tonic-gate chosen_pri = -1; 178*0Sstevel@tonic-gate 179*0Sstevel@tonic-gate process_swap_queue(); 180*0Sstevel@tonic-gate 181*0Sstevel@tonic-gate /* 182*0Sstevel@tonic-gate * Set desperate if 183*0Sstevel@tonic-gate * 1. At least 2 runnable processes (on average). 184*0Sstevel@tonic-gate * 2. Short (5 sec) and longer (30 sec) average is less 185*0Sstevel@tonic-gate * than minfree and desfree respectively. 186*0Sstevel@tonic-gate * 3. Pagein + pageout rate is excessive. 187*0Sstevel@tonic-gate */ 188*0Sstevel@tonic-gate if (avenrun[0] >= 2 * FSCALE && 189*0Sstevel@tonic-gate (MAX(avefree, avefree30) < desfree) && 190*0Sstevel@tonic-gate (pginrate + pgoutrate > maxpgio || avefree < minfree)) { 191*0Sstevel@tonic-gate TRACE_4(TR_FAC_SCHED, TR_DESPERATE, 192*0Sstevel@tonic-gate "desp:avefree: %d, avefree30: %d, freemem: %d" 193*0Sstevel@tonic-gate " pginrate: %d\n", avefree, avefree30, freemem, pginrate); 194*0Sstevel@tonic-gate desperate = 1; 195*0Sstevel@tonic-gate goto unload; 196*0Sstevel@tonic-gate } 197*0Sstevel@tonic-gate 198*0Sstevel@tonic-gate /* 199*0Sstevel@tonic-gate * Search list of processes to swapin and swapout deadwood. 200*0Sstevel@tonic-gate */ 201*0Sstevel@tonic-gate swapin_proc_time = 0; 202*0Sstevel@tonic-gate top: 203*0Sstevel@tonic-gate mutex_enter(&pidlock); 204*0Sstevel@tonic-gate for (prp = practive; prp != NULL; prp = prp->p_next) { 205*0Sstevel@tonic-gate if (not_swappable(prp)) 206*0Sstevel@tonic-gate continue; 207*0Sstevel@tonic-gate 208*0Sstevel@tonic-gate /* 209*0Sstevel@tonic-gate * Look at processes with at least one swapped lwp. 210*0Sstevel@tonic-gate */ 211*0Sstevel@tonic-gate if (prp->p_swapcnt) { 212*0Sstevel@tonic-gate time_t proc_time; 213*0Sstevel@tonic-gate 214*0Sstevel@tonic-gate /* 215*0Sstevel@tonic-gate * Higher priority processes are good candidates 216*0Sstevel@tonic-gate * to swapin. 217*0Sstevel@tonic-gate */ 218*0Sstevel@tonic-gate mutex_enter(&prp->p_lock); 219*0Sstevel@tonic-gate proc_pri = -1; 220*0Sstevel@tonic-gate t = prp->p_tlist; 221*0Sstevel@tonic-gate proc_time = 0; 222*0Sstevel@tonic-gate do { 223*0Sstevel@tonic-gate if (t->t_schedflag & TS_LOAD) 224*0Sstevel@tonic-gate continue; 225*0Sstevel@tonic-gate 226*0Sstevel@tonic-gate thread_lock(t); 227*0Sstevel@tonic-gate thread_pri = CL_SWAPIN(t, 0); 228*0Sstevel@tonic-gate thread_unlock(t); 229*0Sstevel@tonic-gate 230*0Sstevel@tonic-gate if (t->t_stime - proc_time > 0) 231*0Sstevel@tonic-gate proc_time = t->t_stime; 232*0Sstevel@tonic-gate if (thread_pri > proc_pri) 233*0Sstevel@tonic-gate proc_pri = thread_pri; 234*0Sstevel@tonic-gate } while ((t = t->t_forw) != prp->p_tlist); 235*0Sstevel@tonic-gate mutex_exit(&prp->p_lock); 236*0Sstevel@tonic-gate 237*0Sstevel@tonic-gate if (proc_pri == -1) 238*0Sstevel@tonic-gate continue; 239*0Sstevel@tonic-gate 240*0Sstevel@tonic-gate TRACE_3(TR_FAC_SCHED, TR_CHOOSE_SWAPIN, 241*0Sstevel@tonic-gate "prp %p epri %d proc_time %d", 242*0Sstevel@tonic-gate prp, proc_pri, proc_time); 243*0Sstevel@tonic-gate 244*0Sstevel@tonic-gate /* 245*0Sstevel@tonic-gate * Swapin processes with a high effective priority. 246*0Sstevel@tonic-gate */ 247*0Sstevel@tonic-gate if (swapin_prp == NULL || proc_pri > chosen_pri) { 248*0Sstevel@tonic-gate swapin_prp = prp; 249*0Sstevel@tonic-gate chosen_pri = proc_pri; 250*0Sstevel@tonic-gate swapin_pri = proc_pri; 251*0Sstevel@tonic-gate swapin_proc_time = proc_time; 252*0Sstevel@tonic-gate } 253*0Sstevel@tonic-gate } else { 254*0Sstevel@tonic-gate /* 255*0Sstevel@tonic-gate * No need to soft swap if we have sufficient 256*0Sstevel@tonic-gate * memory. 257*0Sstevel@tonic-gate */ 258*0Sstevel@tonic-gate if (avefree > desfree || 259*0Sstevel@tonic-gate avefree < desfree && freemem > desfree) 260*0Sstevel@tonic-gate continue; 261*0Sstevel@tonic-gate 262*0Sstevel@tonic-gate /* 263*0Sstevel@tonic-gate * Skip processes which are exiting. This is 264*0Sstevel@tonic-gate * determined by checking p_lwpcnt since SZOMB is 265*0Sstevel@tonic-gate * set after the addressed space is released. 266*0Sstevel@tonic-gate */ 267*0Sstevel@tonic-gate mutex_enter(&prp->p_lock); 268*0Sstevel@tonic-gate if (prp->p_lwpcnt == 0 || 269*0Sstevel@tonic-gate (prp->p_flag & SEXITLWPS) || 270*0Sstevel@tonic-gate (prp->p_as != NULL && AS_ISPGLCK(prp->p_as))) { 271*0Sstevel@tonic-gate mutex_exit(&prp->p_lock); 272*0Sstevel@tonic-gate continue; 273*0Sstevel@tonic-gate } 274*0Sstevel@tonic-gate 275*0Sstevel@tonic-gate /* 276*0Sstevel@tonic-gate * Softswapping to kick out deadwood. 277*0Sstevel@tonic-gate */ 278*0Sstevel@tonic-gate proc_pri = -1; 279*0Sstevel@tonic-gate t = prp->p_tlist; 280*0Sstevel@tonic-gate do { 281*0Sstevel@tonic-gate if ((t->t_schedflag & (TS_SWAPENQ | 282*0Sstevel@tonic-gate TS_ON_SWAPQ | TS_LOAD)) != TS_LOAD) 283*0Sstevel@tonic-gate continue; 284*0Sstevel@tonic-gate 285*0Sstevel@tonic-gate thread_lock(t); 286*0Sstevel@tonic-gate thread_pri = CL_SWAPOUT(t, SOFTSWAP); 287*0Sstevel@tonic-gate thread_unlock(t); 288*0Sstevel@tonic-gate if (thread_pri > proc_pri) 289*0Sstevel@tonic-gate proc_pri = thread_pri; 290*0Sstevel@tonic-gate } while ((t = t->t_forw) != prp->p_tlist); 291*0Sstevel@tonic-gate 292*0Sstevel@tonic-gate if (proc_pri != -1) { 293*0Sstevel@tonic-gate uint_t swrss; 294*0Sstevel@tonic-gate 295*0Sstevel@tonic-gate mutex_exit(&pidlock); 296*0Sstevel@tonic-gate 297*0Sstevel@tonic-gate TRACE_1(TR_FAC_SCHED, TR_SOFTSWAP, 298*0Sstevel@tonic-gate "softswap:prp %p", prp); 299*0Sstevel@tonic-gate 300*0Sstevel@tonic-gate (void) swapout(prp, &swrss, SOFTSWAP); 301*0Sstevel@tonic-gate softswap++; 302*0Sstevel@tonic-gate prp->p_swrss += swrss; 303*0Sstevel@tonic-gate mutex_exit(&prp->p_lock); 304*0Sstevel@tonic-gate goto top; 305*0Sstevel@tonic-gate } 306*0Sstevel@tonic-gate mutex_exit(&prp->p_lock); 307*0Sstevel@tonic-gate } 308*0Sstevel@tonic-gate } 309*0Sstevel@tonic-gate if (swapin_prp != NULL) 310*0Sstevel@tonic-gate mutex_enter(&swapin_prp->p_lock); 311*0Sstevel@tonic-gate mutex_exit(&pidlock); 312*0Sstevel@tonic-gate 313*0Sstevel@tonic-gate if (swapin_prp == NULL) { 314*0Sstevel@tonic-gate TRACE_3(TR_FAC_SCHED, TR_RUNOUT, 315*0Sstevel@tonic-gate "schedrunout:runout nswapped: %d, avefree: %ld freemem: %ld", 316*0Sstevel@tonic-gate nswapped, avefree, freemem); 317*0Sstevel@tonic-gate 318*0Sstevel@tonic-gate t = curthread; 319*0Sstevel@tonic-gate thread_lock(t); 320*0Sstevel@tonic-gate runout++; 321*0Sstevel@tonic-gate t->t_schedflag |= (TS_ALLSTART & ~TS_CSTART); 322*0Sstevel@tonic-gate t->t_whystop = PR_SUSPENDED; 323*0Sstevel@tonic-gate t->t_whatstop = SUSPEND_NORMAL; 324*0Sstevel@tonic-gate (void) new_mstate(t, LMS_SLEEP); 325*0Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 326*0Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 327*0Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 328*0Sstevel@tonic-gate thread_stop(t); /* change state and drop lock */ 329*0Sstevel@tonic-gate swtch(); 330*0Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 331*0Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &swap_cpr_lock); 332*0Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 333*0Sstevel@tonic-gate goto loop; 334*0Sstevel@tonic-gate } 335*0Sstevel@tonic-gate 336*0Sstevel@tonic-gate /* 337*0Sstevel@tonic-gate * Decide how deserving this process is to be brought in. 338*0Sstevel@tonic-gate * Needs is an estimate of how much core the process will 339*0Sstevel@tonic-gate * need. If the process has been out for a while, then we 340*0Sstevel@tonic-gate * will bring it in with 1/2 the core needed, otherwise 341*0Sstevel@tonic-gate * we are conservative. 342*0Sstevel@tonic-gate */ 343*0Sstevel@tonic-gate divisor = 1; 344*0Sstevel@tonic-gate swapout_time = (lbolt - swapin_proc_time) / hz; 345*0Sstevel@tonic-gate if (swapout_time > maxslp / 2) 346*0Sstevel@tonic-gate divisor = 2; 347*0Sstevel@tonic-gate 348*0Sstevel@tonic-gate needs = MIN(swapin_prp->p_swrss, lotsfree); 349*0Sstevel@tonic-gate needs = MAX(needs, min_procsize); 350*0Sstevel@tonic-gate needs = needs / divisor; 351*0Sstevel@tonic-gate 352*0Sstevel@tonic-gate /* 353*0Sstevel@tonic-gate * Use freemem, since we want processes to be swapped 354*0Sstevel@tonic-gate * in quickly. 355*0Sstevel@tonic-gate */ 356*0Sstevel@tonic-gate avail = freemem - deficit; 357*0Sstevel@tonic-gate if (avail > (spgcnt_t)needs) { 358*0Sstevel@tonic-gate deficit += needs; 359*0Sstevel@tonic-gate 360*0Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPIN_VALUES, 361*0Sstevel@tonic-gate "swapin_values: prp %p needs %lu", swapin_prp, needs); 362*0Sstevel@tonic-gate 363*0Sstevel@tonic-gate if (swapin(swapin_prp)) { 364*0Sstevel@tonic-gate mutex_exit(&swapin_prp->p_lock); 365*0Sstevel@tonic-gate goto loop; 366*0Sstevel@tonic-gate } 367*0Sstevel@tonic-gate deficit -= MIN(needs, deficit); 368*0Sstevel@tonic-gate mutex_exit(&swapin_prp->p_lock); 369*0Sstevel@tonic-gate } else { 370*0Sstevel@tonic-gate mutex_exit(&swapin_prp->p_lock); 371*0Sstevel@tonic-gate /* 372*0Sstevel@tonic-gate * If deficit is high, too many processes have been 373*0Sstevel@tonic-gate * swapped in so wait a sec before attempting to 374*0Sstevel@tonic-gate * swapin more. 375*0Sstevel@tonic-gate */ 376*0Sstevel@tonic-gate if (freemem > needs) { 377*0Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_HIGH_DEFICIT, 378*0Sstevel@tonic-gate "deficit: prp %p needs %lu", swapin_prp, needs); 379*0Sstevel@tonic-gate goto block; 380*0Sstevel@tonic-gate } 381*0Sstevel@tonic-gate } 382*0Sstevel@tonic-gate 383*0Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_UNLOAD, 384*0Sstevel@tonic-gate "unload: prp %p needs %lu", swapin_prp, needs); 385*0Sstevel@tonic-gate 386*0Sstevel@tonic-gate unload: 387*0Sstevel@tonic-gate /* 388*0Sstevel@tonic-gate * Unload all unloadable modules, free all other memory 389*0Sstevel@tonic-gate * resources we can find, then look for a thread to hardswap. 390*0Sstevel@tonic-gate */ 391*0Sstevel@tonic-gate modreap(); 392*0Sstevel@tonic-gate segkp_cache_free(); 393*0Sstevel@tonic-gate 394*0Sstevel@tonic-gate swapout_prp = NULL; 395*0Sstevel@tonic-gate mutex_enter(&pidlock); 396*0Sstevel@tonic-gate for (prp = practive; prp != NULL; prp = prp->p_next) { 397*0Sstevel@tonic-gate 398*0Sstevel@tonic-gate /* 399*0Sstevel@tonic-gate * No need to soft swap if we have sufficient 400*0Sstevel@tonic-gate * memory. 401*0Sstevel@tonic-gate */ 402*0Sstevel@tonic-gate if (not_swappable(prp)) 403*0Sstevel@tonic-gate continue; 404*0Sstevel@tonic-gate 405*0Sstevel@tonic-gate if (avefree > minfree || 406*0Sstevel@tonic-gate avefree < minfree && freemem > desfree) { 407*0Sstevel@tonic-gate swapout_prp = NULL; 408*0Sstevel@tonic-gate break; 409*0Sstevel@tonic-gate } 410*0Sstevel@tonic-gate 411*0Sstevel@tonic-gate /* 412*0Sstevel@tonic-gate * Skip processes which are exiting. This is determined 413*0Sstevel@tonic-gate * by checking p_lwpcnt since SZOMB is set after the 414*0Sstevel@tonic-gate * addressed space is released. 415*0Sstevel@tonic-gate */ 416*0Sstevel@tonic-gate mutex_enter(&prp->p_lock); 417*0Sstevel@tonic-gate if (prp->p_lwpcnt == 0 || 418*0Sstevel@tonic-gate (prp->p_flag & SEXITLWPS) || 419*0Sstevel@tonic-gate (prp->p_as != NULL && AS_ISPGLCK(prp->p_as))) { 420*0Sstevel@tonic-gate mutex_exit(&prp->p_lock); 421*0Sstevel@tonic-gate continue; 422*0Sstevel@tonic-gate } 423*0Sstevel@tonic-gate 424*0Sstevel@tonic-gate proc_pri = -1; 425*0Sstevel@tonic-gate t = prp->p_tlist; 426*0Sstevel@tonic-gate do { 427*0Sstevel@tonic-gate if ((t->t_schedflag & (TS_SWAPENQ | 428*0Sstevel@tonic-gate TS_ON_SWAPQ | TS_LOAD)) != TS_LOAD) 429*0Sstevel@tonic-gate continue; 430*0Sstevel@tonic-gate 431*0Sstevel@tonic-gate thread_lock(t); 432*0Sstevel@tonic-gate thread_pri = CL_SWAPOUT(t, HARDSWAP); 433*0Sstevel@tonic-gate thread_unlock(t); 434*0Sstevel@tonic-gate if (thread_pri > proc_pri) 435*0Sstevel@tonic-gate proc_pri = thread_pri; 436*0Sstevel@tonic-gate } while ((t = t->t_forw) != prp->p_tlist); 437*0Sstevel@tonic-gate 438*0Sstevel@tonic-gate mutex_exit(&prp->p_lock); 439*0Sstevel@tonic-gate if (proc_pri == -1) 440*0Sstevel@tonic-gate continue; 441*0Sstevel@tonic-gate 442*0Sstevel@tonic-gate /* 443*0Sstevel@tonic-gate * Swapout processes sleeping with a lower priority 444*0Sstevel@tonic-gate * than the one currently being swapped in, if any. 445*0Sstevel@tonic-gate */ 446*0Sstevel@tonic-gate if (swapin_prp == NULL || swapin_pri > proc_pri) { 447*0Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_CHOOSE_SWAPOUT, 448*0Sstevel@tonic-gate "hardswap: prp %p needs %lu", prp, needs); 449*0Sstevel@tonic-gate 450*0Sstevel@tonic-gate if (swapout_prp == NULL || proc_pri < chosen_pri) { 451*0Sstevel@tonic-gate swapout_prp = prp; 452*0Sstevel@tonic-gate chosen_pri = proc_pri; 453*0Sstevel@tonic-gate } 454*0Sstevel@tonic-gate } 455*0Sstevel@tonic-gate } 456*0Sstevel@tonic-gate 457*0Sstevel@tonic-gate /* 458*0Sstevel@tonic-gate * Acquire the "p_lock" before dropping "pidlock" 459*0Sstevel@tonic-gate * to prevent the proc structure from being freed 460*0Sstevel@tonic-gate * if the process exits before swapout completes. 461*0Sstevel@tonic-gate */ 462*0Sstevel@tonic-gate if (swapout_prp != NULL) 463*0Sstevel@tonic-gate mutex_enter(&swapout_prp->p_lock); 464*0Sstevel@tonic-gate mutex_exit(&pidlock); 465*0Sstevel@tonic-gate 466*0Sstevel@tonic-gate if ((prp = swapout_prp) != NULL) { 467*0Sstevel@tonic-gate uint_t swrss = 0; 468*0Sstevel@tonic-gate int swapped; 469*0Sstevel@tonic-gate 470*0Sstevel@tonic-gate swapped = swapout(prp, &swrss, HARDSWAP); 471*0Sstevel@tonic-gate if (swapped) { 472*0Sstevel@tonic-gate /* 473*0Sstevel@tonic-gate * If desperate, we want to give the space obtained 474*0Sstevel@tonic-gate * by swapping this process out to processes in core, 475*0Sstevel@tonic-gate * so we give them a chance by increasing deficit. 476*0Sstevel@tonic-gate */ 477*0Sstevel@tonic-gate prp->p_swrss += swrss; 478*0Sstevel@tonic-gate if (desperate) 479*0Sstevel@tonic-gate deficit += MIN(prp->p_swrss, lotsfree); 480*0Sstevel@tonic-gate hardswap++; 481*0Sstevel@tonic-gate } 482*0Sstevel@tonic-gate mutex_exit(&swapout_prp->p_lock); 483*0Sstevel@tonic-gate 484*0Sstevel@tonic-gate if (swapped) 485*0Sstevel@tonic-gate goto loop; 486*0Sstevel@tonic-gate } 487*0Sstevel@tonic-gate 488*0Sstevel@tonic-gate /* 489*0Sstevel@tonic-gate * Delay for 1 second and look again later. 490*0Sstevel@tonic-gate */ 491*0Sstevel@tonic-gate TRACE_3(TR_FAC_SCHED, TR_RUNIN, 492*0Sstevel@tonic-gate "schedrunin:runin nswapped: %d, avefree: %ld freemem: %ld", 493*0Sstevel@tonic-gate nswapped, avefree, freemem); 494*0Sstevel@tonic-gate 495*0Sstevel@tonic-gate block: 496*0Sstevel@tonic-gate t = curthread; 497*0Sstevel@tonic-gate thread_lock(t); 498*0Sstevel@tonic-gate runin++; 499*0Sstevel@tonic-gate t->t_schedflag |= (TS_ALLSTART & ~TS_CSTART); 500*0Sstevel@tonic-gate t->t_whystop = PR_SUSPENDED; 501*0Sstevel@tonic-gate t->t_whatstop = SUSPEND_NORMAL; 502*0Sstevel@tonic-gate (void) new_mstate(t, LMS_SLEEP); 503*0Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 504*0Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 505*0Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 506*0Sstevel@tonic-gate thread_stop(t); /* change to stop state and drop lock */ 507*0Sstevel@tonic-gate swtch(); 508*0Sstevel@tonic-gate mutex_enter(&swap_cpr_lock); 509*0Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &swap_cpr_lock); 510*0Sstevel@tonic-gate mutex_exit(&swap_cpr_lock); 511*0Sstevel@tonic-gate goto loop; 512*0Sstevel@tonic-gate } 513*0Sstevel@tonic-gate 514*0Sstevel@tonic-gate /* 515*0Sstevel@tonic-gate * Remove the specified thread from the swap queue. 516*0Sstevel@tonic-gate */ 517*0Sstevel@tonic-gate static void 518*0Sstevel@tonic-gate swapdeq(kthread_id_t tp) 519*0Sstevel@tonic-gate { 520*0Sstevel@tonic-gate kthread_id_t *tpp; 521*0Sstevel@tonic-gate 522*0Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 523*0Sstevel@tonic-gate ASSERT(tp->t_schedflag & TS_ON_SWAPQ); 524*0Sstevel@tonic-gate 525*0Sstevel@tonic-gate tpp = &tswap_queue; 526*0Sstevel@tonic-gate for (;;) { 527*0Sstevel@tonic-gate ASSERT(*tpp != NULL); 528*0Sstevel@tonic-gate if (*tpp == tp) 529*0Sstevel@tonic-gate break; 530*0Sstevel@tonic-gate tpp = &(*tpp)->t_link; 531*0Sstevel@tonic-gate } 532*0Sstevel@tonic-gate *tpp = tp->t_link; 533*0Sstevel@tonic-gate tp->t_schedflag &= ~TS_ON_SWAPQ; 534*0Sstevel@tonic-gate } 535*0Sstevel@tonic-gate 536*0Sstevel@tonic-gate /* 537*0Sstevel@tonic-gate * Swap in lwps. Returns nonzero on success (i.e., if at least one lwp is 538*0Sstevel@tonic-gate * swapped in) and 0 on failure. 539*0Sstevel@tonic-gate */ 540*0Sstevel@tonic-gate static int 541*0Sstevel@tonic-gate swapin(proc_t *pp) 542*0Sstevel@tonic-gate { 543*0Sstevel@tonic-gate kthread_id_t tp; 544*0Sstevel@tonic-gate int err; 545*0Sstevel@tonic-gate int num_swapped_in = 0; 546*0Sstevel@tonic-gate struct cpu *cpup = CPU; 547*0Sstevel@tonic-gate pri_t thread_pri; 548*0Sstevel@tonic-gate 549*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pp->p_lock)); 550*0Sstevel@tonic-gate ASSERT(pp->p_swapcnt); 551*0Sstevel@tonic-gate 552*0Sstevel@tonic-gate top: 553*0Sstevel@tonic-gate tp = pp->p_tlist; 554*0Sstevel@tonic-gate do { 555*0Sstevel@tonic-gate /* 556*0Sstevel@tonic-gate * Only swapin eligible lwps (specified by the scheduling 557*0Sstevel@tonic-gate * class) which are unloaded and ready to run. 558*0Sstevel@tonic-gate */ 559*0Sstevel@tonic-gate thread_lock(tp); 560*0Sstevel@tonic-gate thread_pri = CL_SWAPIN(tp, 0); 561*0Sstevel@tonic-gate if (thread_pri != -1 && tp->t_state == TS_RUN && 562*0Sstevel@tonic-gate (tp->t_schedflag & TS_LOAD) == 0) { 563*0Sstevel@tonic-gate size_t stack_size; 564*0Sstevel@tonic-gate pgcnt_t stack_pages; 565*0Sstevel@tonic-gate 566*0Sstevel@tonic-gate ASSERT((tp->t_schedflag & TS_ON_SWAPQ) == 0); 567*0Sstevel@tonic-gate 568*0Sstevel@tonic-gate thread_unlock(tp); 569*0Sstevel@tonic-gate /* 570*0Sstevel@tonic-gate * Now drop the p_lock since the stack needs 571*0Sstevel@tonic-gate * to brought in. 572*0Sstevel@tonic-gate */ 573*0Sstevel@tonic-gate mutex_exit(&pp->p_lock); 574*0Sstevel@tonic-gate 575*0Sstevel@tonic-gate stack_size = swapsize(tp->t_swap); 576*0Sstevel@tonic-gate stack_pages = btopr(stack_size); 577*0Sstevel@tonic-gate /* Kernel probe */ 578*0Sstevel@tonic-gate TNF_PROBE_4(swapin_lwp, "vm swap swapin", /* CSTYLED */, 579*0Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 580*0Sstevel@tonic-gate tnf_lwpid, lwpid, tp->t_tid, 581*0Sstevel@tonic-gate tnf_kthread_id, tid, tp, 582*0Sstevel@tonic-gate tnf_ulong, page_count, stack_pages); 583*0Sstevel@tonic-gate 584*0Sstevel@tonic-gate rw_enter(&kas.a_lock, RW_READER); 585*0Sstevel@tonic-gate err = segkp_fault(segkp->s_as->a_hat, segkp, 586*0Sstevel@tonic-gate tp->t_swap, stack_size, F_SOFTLOCK, S_OTHER); 587*0Sstevel@tonic-gate rw_exit(&kas.a_lock); 588*0Sstevel@tonic-gate 589*0Sstevel@tonic-gate #ifdef __sparc 590*0Sstevel@tonic-gate lwp_swapin(tp); 591*0Sstevel@tonic-gate #endif /* __sparc */ 592*0Sstevel@tonic-gate 593*0Sstevel@tonic-gate /* 594*0Sstevel@tonic-gate * Re-acquire the p_lock. 595*0Sstevel@tonic-gate */ 596*0Sstevel@tonic-gate mutex_enter(&pp->p_lock); 597*0Sstevel@tonic-gate if (err) { 598*0Sstevel@tonic-gate num_swapped_in = 0; 599*0Sstevel@tonic-gate break; 600*0Sstevel@tonic-gate } else { 601*0Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, swapin, 1); 602*0Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapin, 603*0Sstevel@tonic-gate stack_pages); 604*0Sstevel@tonic-gate 605*0Sstevel@tonic-gate pp->p_swapcnt--; 606*0Sstevel@tonic-gate pp->p_swrss -= stack_pages; 607*0Sstevel@tonic-gate 608*0Sstevel@tonic-gate thread_lock(tp); 609*0Sstevel@tonic-gate tp->t_schedflag |= TS_LOAD; 610*0Sstevel@tonic-gate dq_sruninc(tp); 611*0Sstevel@tonic-gate 612*0Sstevel@tonic-gate tp->t_stime = lbolt; /* set swapin time */ 613*0Sstevel@tonic-gate thread_unlock(tp); 614*0Sstevel@tonic-gate 615*0Sstevel@tonic-gate nswapped--; 616*0Sstevel@tonic-gate tot_swapped_in++; 617*0Sstevel@tonic-gate num_swapped_in++; 618*0Sstevel@tonic-gate 619*0Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPIN, 620*0Sstevel@tonic-gate "swapin: pp %p stack_pages %lu", 621*0Sstevel@tonic-gate pp, stack_pages); 622*0Sstevel@tonic-gate goto top; 623*0Sstevel@tonic-gate } 624*0Sstevel@tonic-gate } 625*0Sstevel@tonic-gate thread_unlock(tp); 626*0Sstevel@tonic-gate } while ((tp = tp->t_forw) != pp->p_tlist); 627*0Sstevel@tonic-gate return (num_swapped_in); 628*0Sstevel@tonic-gate } 629*0Sstevel@tonic-gate 630*0Sstevel@tonic-gate /* 631*0Sstevel@tonic-gate * Swap out lwps. Returns nonzero on success (i.e., if at least one lwp is 632*0Sstevel@tonic-gate * swapped out) and 0 on failure. 633*0Sstevel@tonic-gate */ 634*0Sstevel@tonic-gate static int 635*0Sstevel@tonic-gate swapout(proc_t *pp, uint_t *swrss, int swapflags) 636*0Sstevel@tonic-gate { 637*0Sstevel@tonic-gate kthread_id_t tp; 638*0Sstevel@tonic-gate pgcnt_t ws_pages = 0; 639*0Sstevel@tonic-gate int err; 640*0Sstevel@tonic-gate int swapped_lwps = 0; 641*0Sstevel@tonic-gate struct as *as = pp->p_as; 642*0Sstevel@tonic-gate struct cpu *cpup = CPU; 643*0Sstevel@tonic-gate pri_t thread_pri; 644*0Sstevel@tonic-gate 645*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pp->p_lock)); 646*0Sstevel@tonic-gate 647*0Sstevel@tonic-gate if (pp->p_lwpcnt == 0 || (pp->p_flag & SEXITLWPS)) 648*0Sstevel@tonic-gate return (0); 649*0Sstevel@tonic-gate 650*0Sstevel@tonic-gate top: 651*0Sstevel@tonic-gate tp = pp->p_tlist; 652*0Sstevel@tonic-gate do { 653*0Sstevel@tonic-gate klwp_t *lwp = ttolwp(tp); 654*0Sstevel@tonic-gate 655*0Sstevel@tonic-gate /* 656*0Sstevel@tonic-gate * Swapout eligible lwps (specified by the scheduling 657*0Sstevel@tonic-gate * class) which don't have TS_DONT_SWAP set. Set the 658*0Sstevel@tonic-gate * "intent to swap" flag (TS_SWAPENQ) on threads 659*0Sstevel@tonic-gate * which have TS_DONT_SWAP set so that they can be 660*0Sstevel@tonic-gate * swapped if and when they reach a safe point. 661*0Sstevel@tonic-gate */ 662*0Sstevel@tonic-gate thread_lock(tp); 663*0Sstevel@tonic-gate thread_pri = CL_SWAPOUT(tp, swapflags); 664*0Sstevel@tonic-gate if (thread_pri != -1) { 665*0Sstevel@tonic-gate if (tp->t_schedflag & TS_DONT_SWAP) { 666*0Sstevel@tonic-gate tp->t_schedflag |= TS_SWAPENQ; 667*0Sstevel@tonic-gate tp->t_trapret = 1; 668*0Sstevel@tonic-gate aston(tp); 669*0Sstevel@tonic-gate } else { 670*0Sstevel@tonic-gate pgcnt_t stack_pages; 671*0Sstevel@tonic-gate size_t stack_size; 672*0Sstevel@tonic-gate 673*0Sstevel@tonic-gate ASSERT((tp->t_schedflag & 674*0Sstevel@tonic-gate (TS_DONT_SWAP | TS_LOAD)) == TS_LOAD); 675*0Sstevel@tonic-gate 676*0Sstevel@tonic-gate if (lock_try(&tp->t_lock)) { 677*0Sstevel@tonic-gate /* 678*0Sstevel@tonic-gate * Remove thread from the swap_queue. 679*0Sstevel@tonic-gate */ 680*0Sstevel@tonic-gate if (tp->t_schedflag & TS_ON_SWAPQ) { 681*0Sstevel@tonic-gate ASSERT(!(tp->t_schedflag & 682*0Sstevel@tonic-gate TS_SWAPENQ)); 683*0Sstevel@tonic-gate swapdeq(tp); 684*0Sstevel@tonic-gate } else if (tp->t_state == TS_RUN) 685*0Sstevel@tonic-gate dq_srundec(tp); 686*0Sstevel@tonic-gate 687*0Sstevel@tonic-gate tp->t_schedflag &= 688*0Sstevel@tonic-gate ~(TS_LOAD | TS_SWAPENQ); 689*0Sstevel@tonic-gate lock_clear(&tp->t_lock); 690*0Sstevel@tonic-gate 691*0Sstevel@tonic-gate /* 692*0Sstevel@tonic-gate * Set swapout time if the thread isn't 693*0Sstevel@tonic-gate * sleeping. 694*0Sstevel@tonic-gate */ 695*0Sstevel@tonic-gate if (tp->t_state != TS_SLEEP) 696*0Sstevel@tonic-gate tp->t_stime = lbolt; 697*0Sstevel@tonic-gate thread_unlock(tp); 698*0Sstevel@tonic-gate 699*0Sstevel@tonic-gate nswapped++; 700*0Sstevel@tonic-gate tot_swapped_out++; 701*0Sstevel@tonic-gate 702*0Sstevel@tonic-gate lwp->lwp_ru.nswap++; 703*0Sstevel@tonic-gate 704*0Sstevel@tonic-gate /* 705*0Sstevel@tonic-gate * Now drop the p_lock since the 706*0Sstevel@tonic-gate * stack needs to pushed out. 707*0Sstevel@tonic-gate */ 708*0Sstevel@tonic-gate mutex_exit(&pp->p_lock); 709*0Sstevel@tonic-gate 710*0Sstevel@tonic-gate stack_size = swapsize(tp->t_swap); 711*0Sstevel@tonic-gate stack_pages = btopr(stack_size); 712*0Sstevel@tonic-gate ws_pages += stack_pages; 713*0Sstevel@tonic-gate /* Kernel probe */ 714*0Sstevel@tonic-gate TNF_PROBE_4(swapout_lwp, 715*0Sstevel@tonic-gate "vm swap swapout", 716*0Sstevel@tonic-gate /* CSTYLED */, 717*0Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 718*0Sstevel@tonic-gate tnf_lwpid, lwpid, tp->t_tid, 719*0Sstevel@tonic-gate tnf_kthread_id, tid, tp, 720*0Sstevel@tonic-gate tnf_ulong, page_count, 721*0Sstevel@tonic-gate stack_pages); 722*0Sstevel@tonic-gate 723*0Sstevel@tonic-gate rw_enter(&kas.a_lock, RW_READER); 724*0Sstevel@tonic-gate err = segkp_fault(segkp->s_as->a_hat, 725*0Sstevel@tonic-gate segkp, tp->t_swap, stack_size, 726*0Sstevel@tonic-gate F_SOFTUNLOCK, S_WRITE); 727*0Sstevel@tonic-gate rw_exit(&kas.a_lock); 728*0Sstevel@tonic-gate 729*0Sstevel@tonic-gate if (err) { 730*0Sstevel@tonic-gate cmn_err(CE_PANIC, 731*0Sstevel@tonic-gate "swapout: segkp_fault " 732*0Sstevel@tonic-gate "failed err: %d", err); 733*0Sstevel@tonic-gate } 734*0Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, 735*0Sstevel@tonic-gate vm, pgswapout, stack_pages); 736*0Sstevel@tonic-gate 737*0Sstevel@tonic-gate mutex_enter(&pp->p_lock); 738*0Sstevel@tonic-gate pp->p_swapcnt++; 739*0Sstevel@tonic-gate swapped_lwps++; 740*0Sstevel@tonic-gate goto top; 741*0Sstevel@tonic-gate } 742*0Sstevel@tonic-gate } 743*0Sstevel@tonic-gate } 744*0Sstevel@tonic-gate thread_unlock(tp); 745*0Sstevel@tonic-gate } while ((tp = tp->t_forw) != pp->p_tlist); 746*0Sstevel@tonic-gate 747*0Sstevel@tonic-gate /* 748*0Sstevel@tonic-gate * Unload address space when all lwps are swapped out. 749*0Sstevel@tonic-gate */ 750*0Sstevel@tonic-gate if (pp->p_swapcnt == pp->p_lwpcnt) { 751*0Sstevel@tonic-gate size_t as_size = 0; 752*0Sstevel@tonic-gate 753*0Sstevel@tonic-gate /* 754*0Sstevel@tonic-gate * Avoid invoking as_swapout() if the process has 755*0Sstevel@tonic-gate * no MMU resources since pageout will eventually 756*0Sstevel@tonic-gate * steal pages belonging to this address space. This 757*0Sstevel@tonic-gate * saves CPU cycles as the number of pages that are 758*0Sstevel@tonic-gate * potentially freed or pushed out by the segment 759*0Sstevel@tonic-gate * swapout operation is very small. 760*0Sstevel@tonic-gate */ 761*0Sstevel@tonic-gate if (rm_asrss(pp->p_as) != 0) 762*0Sstevel@tonic-gate as_size = as_swapout(as); 763*0Sstevel@tonic-gate 764*0Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapout, btop(as_size)); 765*0Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, swapout, 1); 766*0Sstevel@tonic-gate ws_pages += btop(as_size); 767*0Sstevel@tonic-gate 768*0Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPOUT, 769*0Sstevel@tonic-gate "swapout: pp %p pages_pushed %lu", pp, ws_pages); 770*0Sstevel@tonic-gate /* Kernel probe */ 771*0Sstevel@tonic-gate TNF_PROBE_2(swapout_process, "vm swap swapout", /* CSTYLED */, 772*0Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 773*0Sstevel@tonic-gate tnf_ulong, page_count, ws_pages); 774*0Sstevel@tonic-gate } 775*0Sstevel@tonic-gate *swrss = ws_pages; 776*0Sstevel@tonic-gate return (swapped_lwps); 777*0Sstevel@tonic-gate } 778*0Sstevel@tonic-gate 779*0Sstevel@tonic-gate void 780*0Sstevel@tonic-gate swapout_lwp(klwp_t *lwp) 781*0Sstevel@tonic-gate { 782*0Sstevel@tonic-gate kthread_id_t tp = curthread; 783*0Sstevel@tonic-gate 784*0Sstevel@tonic-gate ASSERT(curthread == lwptot(lwp)); 785*0Sstevel@tonic-gate 786*0Sstevel@tonic-gate /* 787*0Sstevel@tonic-gate * Don't insert the thread onto the swap queue if 788*0Sstevel@tonic-gate * sufficient memory is available. 789*0Sstevel@tonic-gate */ 790*0Sstevel@tonic-gate if (avefree > desfree || avefree < desfree && freemem > desfree) { 791*0Sstevel@tonic-gate thread_lock(tp); 792*0Sstevel@tonic-gate tp->t_schedflag &= ~TS_SWAPENQ; 793*0Sstevel@tonic-gate thread_unlock(tp); 794*0Sstevel@tonic-gate return; 795*0Sstevel@tonic-gate } 796*0Sstevel@tonic-gate 797*0Sstevel@tonic-gate /* 798*0Sstevel@tonic-gate * Lock the thread, then move it to the swapped queue from the 799*0Sstevel@tonic-gate * onproc queue and set its state to be TS_RUN. 800*0Sstevel@tonic-gate */ 801*0Sstevel@tonic-gate thread_lock(tp); 802*0Sstevel@tonic-gate ASSERT(tp->t_state == TS_ONPROC); 803*0Sstevel@tonic-gate if (tp->t_schedflag & TS_SWAPENQ) { 804*0Sstevel@tonic-gate tp->t_schedflag &= ~TS_SWAPENQ; 805*0Sstevel@tonic-gate 806*0Sstevel@tonic-gate /* 807*0Sstevel@tonic-gate * Set the state of this thread to be runnable 808*0Sstevel@tonic-gate * and move it from the onproc queue to the swap queue. 809*0Sstevel@tonic-gate */ 810*0Sstevel@tonic-gate disp_swapped_enq(tp); 811*0Sstevel@tonic-gate 812*0Sstevel@tonic-gate /* 813*0Sstevel@tonic-gate * Insert the thread onto the swap queue. 814*0Sstevel@tonic-gate */ 815*0Sstevel@tonic-gate tp->t_link = tswap_queue; 816*0Sstevel@tonic-gate tswap_queue = tp; 817*0Sstevel@tonic-gate tp->t_schedflag |= TS_ON_SWAPQ; 818*0Sstevel@tonic-gate 819*0Sstevel@tonic-gate thread_unlock_nopreempt(tp); 820*0Sstevel@tonic-gate 821*0Sstevel@tonic-gate TRACE_1(TR_FAC_SCHED, TR_SWAPOUT_LWP, "swapout_lwp:%x", lwp); 822*0Sstevel@tonic-gate 823*0Sstevel@tonic-gate swtch(); 824*0Sstevel@tonic-gate } else { 825*0Sstevel@tonic-gate thread_unlock(tp); 826*0Sstevel@tonic-gate } 827*0Sstevel@tonic-gate } 828*0Sstevel@tonic-gate 829*0Sstevel@tonic-gate /* 830*0Sstevel@tonic-gate * Swap all threads on the swap queue. 831*0Sstevel@tonic-gate */ 832*0Sstevel@tonic-gate static void 833*0Sstevel@tonic-gate process_swap_queue(void) 834*0Sstevel@tonic-gate { 835*0Sstevel@tonic-gate kthread_id_t tp; 836*0Sstevel@tonic-gate uint_t ws_pages; 837*0Sstevel@tonic-gate proc_t *pp; 838*0Sstevel@tonic-gate struct cpu *cpup = CPU; 839*0Sstevel@tonic-gate klwp_t *lwp; 840*0Sstevel@tonic-gate int err; 841*0Sstevel@tonic-gate 842*0Sstevel@tonic-gate if (tswap_queue == NULL) 843*0Sstevel@tonic-gate return; 844*0Sstevel@tonic-gate 845*0Sstevel@tonic-gate /* 846*0Sstevel@tonic-gate * Acquire the "swapped_lock" which locks the swap queue, 847*0Sstevel@tonic-gate * and unload the stacks of all threads on it. 848*0Sstevel@tonic-gate */ 849*0Sstevel@tonic-gate disp_lock_enter(&swapped_lock); 850*0Sstevel@tonic-gate while ((tp = tswap_queue) != NULL) { 851*0Sstevel@tonic-gate pgcnt_t stack_pages; 852*0Sstevel@tonic-gate size_t stack_size; 853*0Sstevel@tonic-gate 854*0Sstevel@tonic-gate tswap_queue = tp->t_link; 855*0Sstevel@tonic-gate tp->t_link = NULL; 856*0Sstevel@tonic-gate 857*0Sstevel@tonic-gate /* 858*0Sstevel@tonic-gate * Drop the "dispatcher lock" before acquiring "t_lock" 859*0Sstevel@tonic-gate * to avoid spinning on it since the thread at the front 860*0Sstevel@tonic-gate * of the swap queue could be pinned before giving up 861*0Sstevel@tonic-gate * its "t_lock" in resume. 862*0Sstevel@tonic-gate */ 863*0Sstevel@tonic-gate disp_lock_exit(&swapped_lock); 864*0Sstevel@tonic-gate lock_set(&tp->t_lock); 865*0Sstevel@tonic-gate 866*0Sstevel@tonic-gate /* 867*0Sstevel@tonic-gate * Now, re-acquire the "swapped_lock". Acquiring this lock 868*0Sstevel@tonic-gate * results in locking the thread since its dispatcher lock 869*0Sstevel@tonic-gate * (t_lockp) is the "swapped_lock". 870*0Sstevel@tonic-gate */ 871*0Sstevel@tonic-gate disp_lock_enter(&swapped_lock); 872*0Sstevel@tonic-gate ASSERT(tp->t_state == TS_RUN); 873*0Sstevel@tonic-gate ASSERT(tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)); 874*0Sstevel@tonic-gate 875*0Sstevel@tonic-gate tp->t_schedflag &= ~(TS_LOAD | TS_ON_SWAPQ); 876*0Sstevel@tonic-gate tp->t_stime = lbolt; /* swapout time */ 877*0Sstevel@tonic-gate disp_lock_exit(&swapped_lock); 878*0Sstevel@tonic-gate lock_clear(&tp->t_lock); 879*0Sstevel@tonic-gate 880*0Sstevel@tonic-gate lwp = ttolwp(tp); 881*0Sstevel@tonic-gate lwp->lwp_ru.nswap++; 882*0Sstevel@tonic-gate 883*0Sstevel@tonic-gate pp = ttoproc(tp); 884*0Sstevel@tonic-gate stack_size = swapsize(tp->t_swap); 885*0Sstevel@tonic-gate stack_pages = btopr(stack_size); 886*0Sstevel@tonic-gate 887*0Sstevel@tonic-gate /* Kernel probe */ 888*0Sstevel@tonic-gate TNF_PROBE_4(swapout_lwp, "vm swap swapout", /* CSTYLED */, 889*0Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 890*0Sstevel@tonic-gate tnf_lwpid, lwpid, tp->t_tid, 891*0Sstevel@tonic-gate tnf_kthread_id, tid, tp, 892*0Sstevel@tonic-gate tnf_ulong, page_count, stack_pages); 893*0Sstevel@tonic-gate 894*0Sstevel@tonic-gate rw_enter(&kas.a_lock, RW_READER); 895*0Sstevel@tonic-gate err = segkp_fault(segkp->s_as->a_hat, segkp, tp->t_swap, 896*0Sstevel@tonic-gate stack_size, F_SOFTUNLOCK, S_WRITE); 897*0Sstevel@tonic-gate rw_exit(&kas.a_lock); 898*0Sstevel@tonic-gate 899*0Sstevel@tonic-gate if (err) { 900*0Sstevel@tonic-gate cmn_err(CE_PANIC, 901*0Sstevel@tonic-gate "process_swap_list: segkp_fault failed err: %d", err); 902*0Sstevel@tonic-gate } 903*0Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapout, stack_pages); 904*0Sstevel@tonic-gate 905*0Sstevel@tonic-gate nswapped++; 906*0Sstevel@tonic-gate tot_swapped_out++; 907*0Sstevel@tonic-gate swapqswap++; 908*0Sstevel@tonic-gate 909*0Sstevel@tonic-gate /* 910*0Sstevel@tonic-gate * Don't need p_lock since the swapper is the only 911*0Sstevel@tonic-gate * thread which increments/decrements p_swapcnt and p_swrss. 912*0Sstevel@tonic-gate */ 913*0Sstevel@tonic-gate ws_pages = stack_pages; 914*0Sstevel@tonic-gate pp->p_swapcnt++; 915*0Sstevel@tonic-gate 916*0Sstevel@tonic-gate TRACE_1(TR_FAC_SCHED, TR_SWAPQ_LWP, "swaplist: pp %p", pp); 917*0Sstevel@tonic-gate 918*0Sstevel@tonic-gate /* 919*0Sstevel@tonic-gate * Unload address space when all lwps are swapped out. 920*0Sstevel@tonic-gate */ 921*0Sstevel@tonic-gate if (pp->p_swapcnt == pp->p_lwpcnt) { 922*0Sstevel@tonic-gate size_t as_size = 0; 923*0Sstevel@tonic-gate 924*0Sstevel@tonic-gate if (rm_asrss(pp->p_as) != 0) 925*0Sstevel@tonic-gate as_size = as_swapout(pp->p_as); 926*0Sstevel@tonic-gate 927*0Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgswapout, 928*0Sstevel@tonic-gate btop(as_size)); 929*0Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, swapout, 1); 930*0Sstevel@tonic-gate 931*0Sstevel@tonic-gate ws_pages += btop(as_size); 932*0Sstevel@tonic-gate 933*0Sstevel@tonic-gate TRACE_2(TR_FAC_SCHED, TR_SWAPQ_PROC, 934*0Sstevel@tonic-gate "swaplist_proc: pp %p pages_pushed: %lu", 935*0Sstevel@tonic-gate pp, ws_pages); 936*0Sstevel@tonic-gate /* Kernel probe */ 937*0Sstevel@tonic-gate TNF_PROBE_2(swapout_process, "vm swap swapout", 938*0Sstevel@tonic-gate /* CSTYLED */, 939*0Sstevel@tonic-gate tnf_pid, pid, pp->p_pid, 940*0Sstevel@tonic-gate tnf_ulong, page_count, ws_pages); 941*0Sstevel@tonic-gate } 942*0Sstevel@tonic-gate pp->p_swrss += ws_pages; 943*0Sstevel@tonic-gate disp_lock_enter(&swapped_lock); 944*0Sstevel@tonic-gate } 945*0Sstevel@tonic-gate disp_lock_exit(&swapped_lock); 946*0Sstevel@tonic-gate } 947