xref: /csrg-svn/sys/vm/vm_meter.c (revision 37729)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)vm_meter.c	7.5 (Berkeley) 05/09/89
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "user.h"
23 #include "proc.h"
24 #include "text.h"
25 #include "vm.h"
26 #include "kernel.h"
27 
28 int	maxslp = MAXSLP;
29 int	saferss = SAFERSS;
30 
31 /*
32  * The following parameters control operation of the page replacement
33  * algorithm.  They are initialized to 0, and then computed at boot time
34  * based on the size of the system.  If they are patched non-zero in
35  * a loaded vmunix they are left alone and may thus be changed per system
36  * using adb on the loaded system.
37  */
38 int	maxpgio = 0;
39 int	minfree = 0;
40 int	desfree = 0;
41 int	lotsfree = 0;
42 int	slowscan = 0;
43 int	fastscan = 0;
44 int	klin = KLIN;
45 int	klseql = KLSEQL;
46 int	klsdist = KLSDIST;
47 int	kltxt = KLTXT;
48 int	klout = KLOUT;
49 int	multprog = -1;		/* so we don't count process 2 */
50 
51 double	avenrun[3];		/* load average, of runnable procs */
52 
53 /*
54  * The main loop of the scheduling (swapping) process.
55  *
56  * The basic idea is:
57  *	see if anyone wants to be swapped in;
58  *	swap out processes until there is room;
59  *	swap him in;
60  *	repeat.
61  * If the paging rate is too high, or the average free memory
62  * is very low, then we do not consider swapping anyone in,
63  * but rather look for someone to swap out.
64  *
65  * The runout flag is set whenever someone is swapped out.
66  * Sched sleeps on it awaiting work.
67  *
68  * Sched sleeps on runin whenever it cannot find enough
69  * core (by swapping out or otherwise) to fit the
70  * selected swapped process.  It is awakened when the
71  * core situation changes and in any case once per second.
72  *
73  * sched DOESN'T ACCOUNT FOR PAGE TABLE SIZE IN CALCULATIONS.
74  */
75 
76 #define	swappable(p) \
77 	(((p)->p_flag&(SSYS|SLOCK|SULOCK|SLOAD|SPAGE|SKEEP|SWEXIT|SPHYSIO))==SLOAD)
78 
79 /* insure non-zero */
80 #define	nz(x)	(x != 0 ? x : 1)
81 
82 #define	NBIG	4
83 #define	MAXNBIG	10
84 int	nbig = NBIG;
85 
86 struct bigp {
87 	struct	proc *bp_proc;
88 	int	bp_pri;
89 	struct	bigp *bp_link;
90 } bigp[MAXNBIG], bplist;
91 
92 sched()
93 {
94 	register struct proc *rp, *p, *inp;
95 	int outpri, inpri, rppri;
96 	int sleeper, desperate, deservin, needs, divisor;
97 	register struct bigp *bp, *nbp;
98 	int biggot, gives;
99 
100 loop:
101 	wantin = 0;
102 	deservin = 0;
103 	sleeper = 0;
104 	p = 0;
105 	/*
106 	 * See if paging system is overloaded; if so swap someone out.
107 	 * Conditions for hard outswap are:
108 	 *	if need kernel map (mix it up).
109 	 * or
110 	 *	1. if there are at least 2 runnable processes (on the average)
111 	 * and	2. the paging rate is excessive or memory is now VERY low.
112 	 * and	3. the short (5-second) and longer (30-second) average
113 	 *	   memory is less than desirable.
114 	 */
115 	if (kmapwnt ||
116 	    (avenrun[0] >= 2 && imax(avefree, avefree30) < desfree &&
117 	    (rate.v_pgin + rate.v_pgout > maxpgio || avefree < minfree))) {
118 		desperate = 1;
119 		goto hardswap;
120 	}
121 	desperate = 0;
122 	/*
123 	 * Not desperate for core,
124 	 * look for someone who deserves to be brought in.
125 	 */
126 	outpri = -20000;
127 	for (rp = allproc; rp != NULL; rp = rp->p_nxt) switch(rp->p_stat) {
128 
129 	case SRUN:
130 		if ((rp->p_flag&SLOAD) == 0) {
131 			rppri = rp->p_time -
132 			    rp->p_swrss / nz((maxpgio/2) * (klin * CLSIZE)) +
133 			    rp->p_slptime - (rp->p_nice-NZERO)*8;
134 			if (rppri > outpri) {
135 				if (rp->p_poip)
136 					continue;
137 				if (rp->p_textp && rp->p_textp->x_poip)
138 					continue;
139 				p = rp;
140 				outpri = rppri;
141 			}
142 		}
143 		continue;
144 
145 	case SSLEEP:
146 	case SSTOP:
147 		if ((freemem < desfree || rp->p_rssize == 0) &&
148 		    rp->p_slptime > maxslp &&
149 		    (!rp->p_textp || (rp->p_textp->x_flag&XLOCK)==0) &&
150 		    swappable(rp)) {
151 			/*
152 			 * Kick out deadwood.
153 			 */
154 			rp->p_flag &= ~SLOAD;
155 			(void) swapout(rp, rp->p_dsize, rp->p_ssize);
156 		}
157 		continue;
158 	}
159 
160 	/*
161 	 * If something came ready after we checked it,
162 	 * wantin will be set.  Otherwise,
163 	 * no one wants in, so nothing to do.
164 	 */
165 	if (outpri == -20000) {
166 		(void) splhigh();
167 		if (wantin == 0) {
168 			runout++;
169 			sleep((caddr_t)&runout, PSWP);
170 		}
171 		(void) spl0();
172 		goto loop;
173 	}
174 	/*
175 	 * Decide how deserving this guy is.  If he is deserving
176 	 * we will be willing to work harder to bring him in.
177 	 * Needs is an estimate of how much core he will need.
178 	 * If he has been out for a while, then we will
179 	 * bring him in with 1/2 the core he will need, otherwise
180 	 * we are conservative.
181 	 */
182 	deservin = 0;
183 	divisor = 1;
184 	if (outpri > maxslp/2) {
185 		deservin = 1;
186 		divisor = 2;
187 	}
188 	needs = p->p_swrss;
189 	if (p->p_textp && p->p_textp->x_ccount == 0)
190 		needs += p->p_textp->x_swrss;
191 	needs = imin(needs, lotsfree);
192 	if (freemem - deficit > needs / divisor) {
193 		deficit += needs;
194 		if (swapin(p))
195 			goto loop;
196 		deficit -= imin(needs, deficit);
197 	}
198 
199 hardswap:
200 	/*
201 	 * Need resources (kernel map or memory), swap someone out.
202 	 * Select the nbig largest jobs, then the oldest of these
203 	 * is ``most likely to get booted.''
204 	 */
205 	inp = p;
206 	sleeper = 0;
207 	if (nbig > MAXNBIG)
208 		nbig = MAXNBIG;
209 	if (nbig < 1)
210 		nbig = 1;
211 	biggot = 0;
212 	bplist.bp_link = 0;
213 	for (rp = allproc; rp != NULL; rp = rp->p_nxt) {
214 		if (!swappable(rp))
215 			continue;
216 		if (rp == inp)
217 			continue;
218 		if (rp->p_textp && rp->p_textp->x_flag&XLOCK)
219 			continue;
220 		if (rp->p_slptime > maxslp &&
221 		    (rp->p_stat==SSLEEP&&rp->p_pri>PZERO||rp->p_stat==SSTOP)) {
222 			if (sleeper < rp->p_slptime) {
223 				p = rp;
224 				sleeper = rp->p_slptime;
225 			}
226 		} else if (!sleeper && (rp->p_stat==SRUN||rp->p_stat==SSLEEP)) {
227 			rppri = rp->p_rssize;
228 			if (rp->p_textp)
229 				rppri += rp->p_textp->x_rssize/rp->p_textp->x_ccount;
230 			if (biggot < nbig)
231 				nbp = &bigp[biggot++];
232 			else {
233 				nbp = bplist.bp_link;
234 				if (nbp->bp_pri > rppri)
235 					continue;
236 				bplist.bp_link = nbp->bp_link;
237 			}
238 			for (bp = &bplist; bp->bp_link; bp = bp->bp_link)
239 				if (rppri < bp->bp_link->bp_pri)
240 					break;
241 			nbp->bp_link = bp->bp_link;
242 			bp->bp_link = nbp;
243 			nbp->bp_pri = rppri;
244 			nbp->bp_proc = rp;
245 		}
246 	}
247 	if (!sleeper) {
248 		p = NULL;
249 		inpri = -1000;
250 		for (bp = bplist.bp_link; bp; bp = bp->bp_link) {
251 			rp = bp->bp_proc;
252 			rppri = rp->p_time+rp->p_nice-NZERO;
253 			if (rppri >= inpri) {
254 				p = rp;
255 				inpri = rppri;
256 			}
257 		}
258 	}
259 	/*
260 	 * If we found a long-time sleeper, or we are desperate and
261 	 * found anyone to swap out, or if someone deserves to come
262 	 * in and we didn't find a sleeper, but found someone who
263 	 * has been in core for a reasonable length of time, then
264 	 * we kick the poor luser out.
265 	 */
266 	if (sleeper || desperate && p || deservin && inpri > maxslp) {
267 		(void) splhigh();
268 		p->p_flag &= ~SLOAD;
269 		if (p->p_stat == SRUN)
270 			remrq(p);
271 		(void) spl0();
272 		if (desperate) {
273 			/*
274 			 * Want to give this space to the rest of
275 			 * the processes in core so give them a chance
276 			 * by increasing the deficit.
277 			 */
278 			gives = p->p_rssize;
279 			if (p->p_textp)
280 				gives += p->p_textp->x_rssize / p->p_textp->x_ccount;
281 			gives = imin(gives, lotsfree);
282 			deficit += gives;
283 		} else
284 			gives = 0;	/* someone else taketh away */
285 		if (swapout(p, p->p_dsize, p->p_ssize) == 0)
286 			deficit -= imin(gives, deficit);
287 		else
288 			goto loop;
289 	}
290 	/*
291 	 * Want to swap someone in, but can't
292 	 * so wait on runin.
293 	 */
294 	(void) splhigh();
295 	runin++;
296 	sleep((caddr_t)&runin, PSWP);
297 	(void) spl0();
298 	goto loop;
299 }
300 
301 vmmeter()
302 {
303 	register unsigned *cp, *rp, *sp;
304 
305 	deficit -= imin(deficit,
306 	    imax(deficit / 10, ((klin * CLSIZE) / 2) * maxpgio / 2));
307 	ave(avefree, freemem, 5);
308 	ave(avefree30, freemem, 30);
309 	/* v_pgin is maintained by clock.c */
310 	cp = &cnt.v_first; rp = &rate.v_first; sp = &sum.v_first;
311 	while (cp <= &cnt.v_last) {
312 		ave(*rp, *cp, 5);
313 		*sp += *cp;
314 		*cp = 0;
315 		rp++, cp++, sp++;
316 	}
317 	if (time.tv_sec % 5 == 0) {
318 		vmtotal();
319 		rate.v_swpin = cnt.v_swpin;
320 		sum.v_swpin += cnt.v_swpin;
321 		cnt.v_swpin = 0;
322 		rate.v_swpout = cnt.v_swpout;
323 		sum.v_swpout += cnt.v_swpout;
324 		cnt.v_swpout = 0;
325 	}
326 	if (avefree < minfree && runout || proc[0].p_slptime > maxslp/2) {
327 		runout = 0;
328 		runin = 0;
329 		wakeup((caddr_t)&runin);
330 		wakeup((caddr_t)&runout);
331 	}
332 }
333 
334 /*
335  * Schedule rate for paging.
336  * Rate is linear interpolation between
337  * slowscan with lotsfree and fastscan when out of memory.
338  */
339 schedpaging()
340 {
341 	register int vavail;
342 
343 	nscan = desscan = 0;
344 	vavail = freemem - deficit;
345 	if (vavail < 0)
346 		vavail = 0;
347 	if (freemem < lotsfree) {
348 		desscan = (slowscan * vavail + fastscan * (lotsfree - vavail)) /
349 			nz(lotsfree) / RATETOSCHEDPAGING;
350 		wakeup((caddr_t)&proc[2]);
351 	}
352 	timeout(schedpaging, (caddr_t)0, hz / RATETOSCHEDPAGING);
353 }
354 
355 vmtotal()
356 {
357 	register struct proc *p;
358 	register struct text *xp;
359 	int nrun = 0;
360 
361 	total.t_vmtxt = 0;
362 	total.t_avmtxt = 0;
363 	total.t_rmtxt = 0;
364 	total.t_armtxt = 0;
365 	for (xp = text; xp < textNTEXT; xp++)
366 		if (xp->x_vptr) {
367 			total.t_vmtxt += xp->x_size;
368 			total.t_rmtxt += xp->x_rssize;
369 			for (p = xp->x_caddr; p; p = p->p_xlink)
370 			switch (p->p_stat) {
371 
372 			case SSTOP:
373 			case SSLEEP:
374 				if (p->p_slptime >= maxslp)
375 					continue;
376 				/* fall into... */
377 
378 			case SRUN:
379 			case SIDL:
380 				total.t_avmtxt += xp->x_size;
381 				total.t_armtxt += xp->x_rssize;
382 				goto next;
383 			}
384 next:
385 			;
386 		}
387 	total.t_vm = 0;
388 	total.t_avm = 0;
389 	total.t_rm = 0;
390 	total.t_arm = 0;
391 	total.t_rq = 0;
392 	total.t_dw = 0;
393 	total.t_pw = 0;
394 	total.t_sl = 0;
395 	total.t_sw = 0;
396 	for (p = allproc; p != NULL; p = p->p_nxt) {
397 		if (p->p_flag & SSYS)
398 			continue;
399 		if (p->p_stat) {
400 			if (p->p_stat != SZOMB) {
401 				total.t_vm += p->p_dsize + p->p_ssize;
402 				total.t_rm += p->p_rssize;
403 			}
404 			switch (p->p_stat) {
405 
406 			case SSLEEP:
407 			case SSTOP:
408 				if (p->p_pri <= PZERO && p->p_stat == SSLEEP)
409 					nrun++;
410 				if (p->p_flag & SPAGE)
411 					total.t_pw++;
412 				else if (p->p_flag & SLOAD) {
413 					if (p->p_pri <= PZERO)
414 						total.t_dw++;
415 					else if (p->p_slptime < maxslp)
416 						total.t_sl++;
417 				} else if (p->p_slptime < maxslp)
418 					total.t_sw++;
419 				if (p->p_slptime < maxslp)
420 					goto active;
421 				break;
422 
423 			case SRUN:
424 			case SIDL:
425 				nrun++;
426 				if (p->p_flag & SLOAD)
427 					total.t_rq++;
428 				else
429 					total.t_sw++;
430 active:
431 				total.t_avm += p->p_dsize + p->p_ssize;
432 				total.t_arm += p->p_rssize;
433 				break;
434 			}
435 		}
436 	}
437 	total.t_vm += total.t_vmtxt;
438 	total.t_avm += total.t_avmtxt;
439 	total.t_rm += total.t_rmtxt;
440 	total.t_arm += total.t_armtxt;
441 	total.t_free = avefree;
442 	loadav(avenrun, nrun);
443 }
444 
445 /*
446  * Constants for averages over 1, 5, and 15 minutes
447  * when sampling at 5 second intervals.
448  */
449 double	cexp[3] = {
450 	0.9200444146293232,	/* exp(-1/12) */
451 	0.9834714538216174,	/* exp(-1/60) */
452 	0.9944598480048967,	/* exp(-1/180) */
453 };
454 
455 /*
456  * Compute a tenex style load average of a quantity on
457  * 1, 5 and 15 minute intervals.
458  */
459 loadav(avg, n)
460 	register double *avg;
461 	int n;
462 {
463 	register int i;
464 
465 	for (i = 0; i < 3; i++)
466 		avg[i] = cexp[i] * avg[i] + n * (1.0 - cexp[i]);
467 }
468