xref: /csrg-svn/sys/vm/vm_meter.c (revision 11833)
1 /*	vm_meter.c	4.20	83/04/04	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/seg.h"
6 #include "../h/dir.h"
7 #include "../h/user.h"
8 #include "../h/proc.h"
9 #include "../h/text.h"
10 #include "../h/vm.h"
11 #include "../h/cmap.h"
12 #include "../h/kernel.h"
13 
14 int	maxslp = MAXSLP;
15 int	saferss = SAFERSS;
16 
17 /*
18  * The following parameters control operation of the page replacement
19  * algorithm.  They are initialized to 0, and then computed at boot time
20  * based on the size of the system.  If they are patched non-zero in
21  * a loaded vmunix they are left alone and may thus be changed per system
22  * using adb on the loaded system.
23  */
24 int	maxpgio = 0;
25 int	minfree = 0;
26 int	desfree = 0;
27 int	lotsfree = 0;
28 int	slowscan = 0;
29 int	fastscan = 0;
30 int	klin = KLIN;
31 int	klseql = KLSEQL;
32 int	klsdist = KLSDIST;
33 int	kltxt = KLTXT;
34 int	klout = KLOUT;
35 int	multprog = -1;		/* so we don't count process 2 */
36 
37 double	avenrun[3];		/* load average, of runnable procs */
38 
39 /*
40  * Setup the paging constants for the clock algorithm.
41  * Called after the system is initialized and the amount of memory
42  * and number of paging devices is known.
43  */
44 setupclock()
45 {
46 
47 	/*
48 	 * Setup thresholds for paging:
49 	 *	lotsfree	is threshold where paging daemon turns on
50 	 *	desfree		is amount of memory desired free.  if less
51 	 *			than this for extended period, do swapping
52 	 *	minfree		is minimal amount of free memory which is
53 	 *			tolerable.
54 	 *
55 	 * Strategy of 4/22/81:
56 	 *	lotsfree is 1/4 of memory free.
57 	 *	desfree is 200k bytes, but at most 1/8 of memory
58 	 *	minfree is 64k bytes, but at most 1/2 of desfree
59 	 */
60 	if (lotsfree == 0)
61 		lotsfree = LOOPPAGES / 4;
62 	if (desfree == 0) {
63 		desfree = (200*1024) / NBPG;
64 		if (desfree > LOOPPAGES / 8)
65 			desfree = LOOPPAGES / 8;
66 	}
67 	if (minfree == 0) {
68 		minfree = (64*1024) / NBPG;
69 		if (minfree > desfree/2)
70 			minfree = desfree / 2;
71 	}
72 
73 	/*
74 	 * Maxpgio thresholds how much paging is acceptable.
75 	 * This figures that 2/3 busy on an arm is all that is
76 	 * tolerable for paging.  We assume one operation per disk rev.
77 	 */
78 	if (maxpgio == 0)
79 		maxpgio = (DISKRPM * 2) / 3;
80 
81 	/*
82 	 * Clock to scan using max of ~~10% of processor time for sampling,
83 	 *     this estimated to allow maximum of 200 samples per second.
84 	 * This yields a ``fastscan'' of roughly (with CLSIZE=2):
85 	 *	<=1m	2m	3m	4m	8m
86 	 * 	5s	10s	15s	20s	40s
87 	 */
88 	if (nswdev == 1 && physmem*NBPG > 2*1024*(1024-16))
89 		printf("WARNING: should run interleaved swap with >= 2Mb\n");
90 	if (fastscan == 0)
91 		fastscan = (LOOPPAGES/CLSIZE) / 200;
92 	if (fastscan < 5)
93 		fastscan = 5;
94 	if (nswdev >= 2)
95 		maxpgio = (maxpgio * 3) / 2;
96 
97 	/*
98 	 * Set slow scan time to 1/2 the fast scan time.
99 	 */
100 	if (slowscan == 0)
101 		slowscan = 2 * fastscan;
102 #ifdef notdef
103 	printf("slowscan %d, fastscan %d, maxpgio %d\n",
104 	    slowscan, fastscan, maxpgio);
105 	printf("lotsfree %d, desfree %d, minfree %d\n",
106 	    lotsfree, desfree, minfree);
107 #endif
108 }
109 
110 /*
111  * The main loop of the scheduling (swapping) process.
112  *
113  * The basic idea is:
114  *	see if anyone wants to be swapped in;
115  *	swap out processes until there is room;
116  *	swap him in;
117  *	repeat.
118  * If the paging rate is too high, or the average free memory
119  * is very low, then we do not consider swapping anyone in,
120  * but rather look for someone to swap out.
121  *
122  * The runout flag is set whenever someone is swapped out.
123  * Sched sleeps on it awaiting work.
124  *
125  * Sched sleeps on runin whenever it cannot find enough
126  * core (by swapping out or otherwise) to fit the
127  * selected swapped process.  It is awakened when the
128  * core situation changes and in any case once per second.
129  *
130  * sched DOESN'T ACCOUNT FOR PAGE TABLE SIZE IN CALCULATIONS.
131  */
132 
133 #define	swappable(p) \
134 	(((p)->p_flag&(SSYS|SLOCK|SULOCK|SLOAD|SPAGE|SKEEP|SWEXIT|SPHYSIO))==SLOAD)
135 
136 /* insure non-zero */
137 #define	nz(x)	(x != 0 ? x : 1)
138 
139 #define	NBIG	4
140 #define	MAXNBIG	10
141 int	nbig = NBIG;
142 
143 struct bigp {
144 	struct	proc *bp_proc;
145 	int	bp_pri;
146 	struct	bigp *bp_link;
147 } bigp[MAXNBIG], bplist;
148 
149 sched()
150 {
151 	register struct proc *rp, *p, *inp;
152 	int outpri, inpri, rppri;
153 	int sleeper, desperate, deservin, needs, divisor;
154 	register struct bigp *bp, *nbp;
155 	int biggot, gives;
156 
157 loop:
158 	wantin = 0;
159 	deservin = 0;
160 	sleeper = 0;
161 	p = 0;
162 	/*
163 	 * See if paging system is overloaded; if so swap someone out.
164 	 * Conditions for hard outswap are:
165 	 *	if need kernel map (mix it up).
166 	 * or
167 	 *	1. if there are at least 2 runnable processes (on the average)
168 	 * and	2. the paging rate is excessive or memory is now VERY low.
169 	 * and	3. the short (5-second) and longer (30-second) average
170 	 *	   memory is less than desirable.
171 	 */
172 	if (kmapwnt || (avenrun[0] >= 2 && imax(avefree, avefree30) < desfree &&
173 	    (rate.v_pgin + rate.v_pgout > maxpgio || avefree < minfree))) {
174 		desperate = 1;
175 		goto hardswap;
176 	}
177 	desperate = 0;
178 	/*
179 	 * Not desperate for core,
180 	 * look for someone who deserves to be brought in.
181 	 */
182 	outpri = -20000;
183 	for (rp = proc; rp < procNPROC; rp++) switch(rp->p_stat) {
184 
185 	case SRUN:
186 		if ((rp->p_flag&SLOAD) == 0) {
187 			rppri = rp->p_time -
188 			    rp->p_swrss / nz((maxpgio/2) * (klin * CLSIZE)) +
189 			    rp->p_slptime - (rp->p_nice-NZERO)*8;
190 			if (rppri > outpri) {
191 				if (rp->p_poip)
192 					continue;
193 				if (rp->p_textp && rp->p_textp->x_poip)
194 					continue;
195 				p = rp;
196 				outpri = rppri;
197 			}
198 		}
199 		continue;
200 
201 	case SSLEEP:
202 	case SSTOP:
203 		if ((freemem < desfree || rp->p_rssize == 0) &&
204 		    rp->p_slptime > maxslp &&
205 		    (!rp->p_textp || (rp->p_textp->x_flag&XLOCK)==0) &&
206 		    swappable(rp)) {
207 			/*
208 			 * Kick out deadwood.
209 			 */
210 			(void) spl6();
211 			rp->p_flag &= ~SLOAD;
212 			if (rp->p_stat == SRUN)
213 				remrq(rp);
214 			(void) spl0();
215 			(void) swapout(rp, rp->p_dsize, rp->p_ssize);
216 			goto loop;
217 		}
218 		continue;
219 	}
220 
221 	/*
222 	 * No one wants in, so nothing to do.
223 	 */
224 	if (outpri == -20000) {
225 		(void) spl6();
226 		if (wantin) {
227 			wantin = 0;
228 			sleep((caddr_t)&lbolt, PSWP);
229 		} else {
230 			runout++;
231 			sleep((caddr_t)&runout, PSWP);
232 		}
233 		(void) spl0();
234 		goto loop;
235 	}
236 	/*
237 	 * Decide how deserving this guy is.  If he is deserving
238 	 * we will be willing to work harder to bring him in.
239 	 * Needs is an estimate of how much core he will need.
240 	 * If he has been out for a while, then we will
241 	 * bring him in with 1/2 the core he will need, otherwise
242 	 * we are conservative.
243 	 */
244 	deservin = 0;
245 	divisor = 1;
246 	if (outpri > maxslp/2) {
247 		deservin = 1;
248 		divisor = 2;
249 	}
250 	needs = p->p_swrss;
251 	if (p->p_textp && p->p_textp->x_ccount == 0)
252 		needs += p->p_textp->x_swrss;
253 	needs = imin(needs, lotsfree);
254 	if (freemem - deficit > needs / divisor) {
255 		deficit += needs;
256 		if (swapin(p))
257 			goto loop;
258 		deficit -= imin(needs, deficit);
259 	}
260 
261 hardswap:
262 	/*
263 	 * Need resources (kernel map or memory), swap someone out.
264 	 * Select the nbig largest jobs, then the oldest of these
265 	 * is ``most likely to get booted.''
266 	 */
267 	inp = p;
268 	sleeper = 0;
269 	if (nbig > MAXNBIG)
270 		nbig = MAXNBIG;
271 	if (nbig < 1)
272 		nbig = 1;
273 	biggot = 0;
274 	bplist.bp_link = 0;
275 	for (rp = proc; rp < procNPROC; rp++) {
276 		if (!swappable(rp))
277 			continue;
278 		if (rp->p_stat==SZOMB)
279 			continue;
280 		if (rp == inp)
281 			continue;
282 		if (rp->p_textp && rp->p_textp->x_flag&XLOCK)
283 			continue;
284 		if (rp->p_slptime > maxslp &&
285 		    (rp->p_stat==SSLEEP&&rp->p_pri>PZERO||rp->p_stat==SSTOP)) {
286 			if (sleeper < rp->p_slptime) {
287 				p = rp;
288 				sleeper = rp->p_slptime;
289 			}
290 		} else if (!sleeper && (rp->p_stat==SRUN||rp->p_stat==SSLEEP)) {
291 			rppri = rp->p_rssize;
292 			if (rp->p_textp)
293 				rppri += rp->p_textp->x_rssize/rp->p_textp->x_ccount;
294 			if (biggot < nbig)
295 				nbp = &bigp[biggot++];
296 			else {
297 				nbp = bplist.bp_link;
298 				if (nbp->bp_pri > rppri)
299 					continue;
300 				bplist.bp_link = nbp->bp_link;
301 			}
302 			for (bp = &bplist; bp->bp_link; bp = bp->bp_link)
303 				if (rppri < bp->bp_link->bp_pri)
304 					break;
305 			nbp->bp_link = bp->bp_link;
306 			bp->bp_link = nbp;
307 			nbp->bp_pri = rppri;
308 			nbp->bp_proc = rp;
309 		}
310 	}
311 	if (!sleeper) {
312 		p = NULL;
313 		inpri = -1000;
314 		for (bp = bplist.bp_link; bp; bp = bp->bp_link) {
315 			rp = bp->bp_proc;
316 			rppri = rp->p_time+rp->p_nice-NZERO;
317 			if (rppri >= inpri) {
318 				p = rp;
319 				inpri = rppri;
320 			}
321 		}
322 	}
323 	/*
324 	 * If we found a long-time sleeper, or we are desperate and
325 	 * found anyone to swap out, or if someone deserves to come
326 	 * in and we didn't find a sleeper, but found someone who
327 	 * has been in core for a reasonable length of time, then
328 	 * we kick the poor luser out.
329 	 */
330 	if (sleeper || desperate && p || deservin && inpri > maxslp) {
331 		(void) spl6();
332 		p->p_flag &= ~SLOAD;
333 		if (p->p_stat == SRUN)
334 			remrq(p);
335 		(void) spl0();
336 		if (desperate) {
337 			/*
338 			 * Want to give this space to the rest of
339 			 * the processes in core so give them a chance
340 			 * by increasing the deficit.
341 			 */
342 			gives = p->p_rssize;
343 			if (p->p_textp)
344 				gives += p->p_textp->x_rssize / p->p_textp->x_ccount;
345 			gives = imin(gives, lotsfree);
346 			deficit += gives;
347 		} else
348 			gives = 0;	/* someone else taketh away */
349 		if (swapout(p, p->p_dsize, p->p_ssize) == 0)
350 			deficit -= imin(gives, deficit);
351 		goto loop;
352 	}
353 	/*
354 	 * Want to swap someone in, but can't
355 	 * so wait on runin.
356 	 */
357 	(void) spl6();
358 	runin++;
359 	sleep((caddr_t)&runin, PSWP);
360 	(void) spl0();
361 	goto loop;
362 }
363 
364 vmmeter()
365 {
366 	register unsigned *cp, *rp, *sp;
367 
368 	deficit -= imin(deficit,
369 	    imax(deficit / 10, ((klin * CLSIZE) / 2) * maxpgio / 2));
370 	ave(avefree, freemem, 5);
371 	ave(avefree30, freemem, 30);
372 	/* v_pgin is maintained by clock.c */
373 	cp = &cnt.v_first; rp = &rate.v_first; sp = &sum.v_first;
374 	while (cp <= &cnt.v_last) {
375 		ave(*rp, *cp, 5);
376 		*sp += *cp;
377 		*cp = 0;
378 		rp++, cp++, sp++;
379 	}
380 	if (time.tv_sec % 5 == 0) {
381 		vmtotal();
382 		rate.v_swpin = cnt.v_swpin;
383 		sum.v_swpin += cnt.v_swpin;
384 		cnt.v_swpin = 0;
385 		rate.v_swpout = cnt.v_swpout;
386 		sum.v_swpout += cnt.v_swpout;
387 		cnt.v_swpout = 0;
388 	}
389 	if (avefree < minfree && runout || proc[0].p_slptime > maxslp/2) {
390 		runout = 0;
391 		runin = 0;
392 		wakeup((caddr_t)&runin);
393 		wakeup((caddr_t)&runout);
394 	}
395 }
396 
397 #define	RATETOSCHEDPAGING	4		/* hz that is */
398 
399 /*
400  * Schedule rate for paging.
401  * Rate is linear interpolation between
402  * slowscan with lotsfree and fastscan when out of memory.
403  */
404 schedpaging()
405 {
406 	register int vavail, scanrate;
407 
408 	nscan = desscan = 0;
409 	vavail = freemem - deficit;
410 	if (vavail < 0)
411 		vavail = 0;
412 	if (freemem < lotsfree) {
413 		scanrate =
414 			(slowscan * vavail + fastscan * (lotsfree - vavail)) /
415 				nz(lotsfree);
416 		desscan = ((LOOPPAGES / CLSIZE) / nz(scanrate)) /
417 				RATETOSCHEDPAGING;
418 		wakeup((caddr_t)&proc[2]);
419 	}
420 	timeout(schedpaging, (caddr_t)0, hz / RATETOSCHEDPAGING);
421 }
422 
423 vmtotal()
424 {
425 	register struct proc *p;
426 	register struct text *xp;
427 	int nrun = 0;
428 
429 	total.t_vmtxt = 0;
430 	total.t_avmtxt = 0;
431 	total.t_rmtxt = 0;
432 	total.t_armtxt = 0;
433 	for (xp = text; xp < textNTEXT; xp++)
434 		if (xp->x_iptr) {
435 			total.t_vmtxt += xp->x_size;
436 			total.t_rmtxt += xp->x_rssize;
437 			for (p = xp->x_caddr; p; p = p->p_xlink)
438 			switch (p->p_stat) {
439 
440 			case SSTOP:
441 			case SSLEEP:
442 				if (p->p_slptime >= maxslp)
443 					continue;
444 				/* fall into... */
445 
446 			case SRUN:
447 			case SIDL:
448 				total.t_avmtxt += xp->x_size;
449 				total.t_armtxt += xp->x_rssize;
450 				goto next;
451 			}
452 next:
453 			;
454 		}
455 	total.t_vm = 0;
456 	total.t_avm = 0;
457 	total.t_rm = 0;
458 	total.t_arm = 0;
459 	total.t_rq = 0;
460 	total.t_dw = 0;
461 	total.t_pw = 0;
462 	total.t_sl = 0;
463 	total.t_sw = 0;
464 	for (p = proc; p < procNPROC; p++) {
465 		if (p->p_flag & SSYS)
466 			continue;
467 		if (p->p_stat) {
468 			total.t_vm += p->p_dsize + p->p_ssize;
469 			total.t_rm += p->p_rssize;
470 			switch (p->p_stat) {
471 
472 			case SSLEEP:
473 			case SSTOP:
474 				if (p->p_pri <= PZERO)
475 					nrun++;
476 				if (p->p_flag & SPAGE)
477 					total.t_pw++;
478 				else if (p->p_flag & SLOAD) {
479 					if (p->p_pri <= PZERO)
480 						total.t_dw++;
481 					else if (p->p_slptime < maxslp)
482 						total.t_sl++;
483 				} else if (p->p_slptime < maxslp)
484 					total.t_sw++;
485 				if (p->p_slptime < maxslp)
486 					goto active;
487 				break;
488 
489 			case SRUN:
490 			case SIDL:
491 				nrun++;
492 				if (p->p_flag & SLOAD)
493 					total.t_rq++;
494 				else
495 					total.t_sw++;
496 active:
497 				total.t_avm += p->p_dsize + p->p_ssize;
498 				total.t_arm += p->p_rssize;
499 				break;
500 			}
501 		}
502 	}
503 	total.t_vm += total.t_vmtxt;
504 	total.t_avm += total.t_avmtxt;
505 	total.t_rm += total.t_rmtxt;
506 	total.t_arm += total.t_armtxt;
507 	total.t_free = avefree;
508 	loadav(avenrun, nrun);
509 }
510 
511 /*
512  * Constants for averages over 1, 5, and 15 minutes
513  * when sampling at 5 second intervals.
514  */
515 double	cexp[3] = {
516 	0.9200444146293232,	/* exp(-1/12) */
517 	0.9834714538216174,	/* exp(-1/60) */
518 	0.9944598480048967,	/* exp(-1/180) */
519 };
520 
521 /*
522  * Compute a tenex style load average of a quantity on
523  * 1, 5 and 15 minute intervals.
524  */
525 loadav(avg, n)
526 	register double *avg;
527 	int n;
528 {
529 	register int i;
530 
531 	for (i = 0; i < 3; i++)
532 		avg[i] = cexp[i] * avg[i] + n * (1.0 - cexp[i]);
533 }
534