xref: /csrg-svn/sys/kern/kern_clock.c (revision 5253)
1 /*	kern_clock.c	4.29	81/12/12	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/dk.h"
6 #include "../h/callout.h"
7 #include "../h/seg.h"
8 #include "../h/dir.h"
9 #include "../h/user.h"
10 #include "../h/proc.h"
11 #include "../h/reg.h"
12 #include "../h/psl.h"
13 #include "../h/vm.h"
14 #include "../h/buf.h"
15 #include "../h/text.h"
16 #include "../h/vlimit.h"
17 #include "../h/mtpr.h"
18 #include "../h/clock.h"
19 #include "../h/cpu.h"
20 #include "../h/protosw.h"
21 
22 #include "bk.h"
23 #include "dh.h"
24 #include "dz.h"
25 
26 /*
27  * Hardclock is called straight from
28  * the real time clock interrupt.
29  * We limit the work we do at real clock interrupt time to:
30  *	reloading clock
31  *	decrementing time to callouts
32  *	recording cpu time usage
33  *	modifying priority of current process
34  *	arrange for soft clock interrupt
35  *	kernel pc profiling
36  *
37  * At software (softclock) interrupt time we:
38  *	implement callouts
39  *	maintain date
40  *	lightning bolt wakeup (every second)
41  *	alarm clock signals
42  *	jab the scheduler
43  *
44  * On the vax softclock interrupts are implemented by
45  * software interrupts.  Note that we may have multiple softclock
46  * interrupts compressed into one (due to excessive interrupt load),
47  * but that hardclock interrupts should never be lost.
48  */
49 #ifdef KPROF
50 int	kcounts[20000];
51 #endif
52 
53 /*
54  * Protoslow is like lbolt, but for slow protocol timeouts, counting
55  * up to (hz/PR_SLOWHZ), then causing a pfslowtimo().
56  * Protofast is like lbolt, but for fast protocol timeouts, counting
57  * up to (hz/PR_FASTHZ), then causing a pffasttimo().
58  */
59 int	protoslow;
60 int	protofast;
61 
62 /*ARGSUSED*/
63 hardclock(pc, ps)
64 	caddr_t pc;
65 {
66 	register struct callout *p1;
67 	register struct proc *pp;
68 	register int s, cpstate;
69 
70 	/*
71 	 * reprime clock
72 	 */
73 	clkreld();
74 
75 	/*
76 	 * update callout times
77 	 */
78 	for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next)
79 		;
80 	if (p1)
81 		p1->c_time--;
82 
83 	/*
84 	 * Maintain iostat and per-process cpu statistics
85 	 */
86 	if (!noproc) {
87 		s = u.u_procp->p_rssize;
88 		u.u_vm.vm_idsrss += s;
89 		if (u.u_procp->p_textp) {
90 			register int xrss = u.u_procp->p_textp->x_rssize;
91 
92 			s += xrss;
93 			u.u_vm.vm_ixrss += xrss;
94 		}
95 		if (s > u.u_vm.vm_maxrss)
96 			u.u_vm.vm_maxrss = s;
97 		if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) {
98 			psignal(u.u_procp, SIGXCPU);
99 			if (u.u_limit[LIM_CPU] < INFINITY - 5)
100 				u.u_limit[LIM_CPU] += 5;
101 		}
102 	}
103 	/*
104 	 * Update iostat information.
105 	 */
106 	if (USERMODE(ps)) {
107 		u.u_vm.vm_utime++;
108 		if(u.u_procp->p_nice > NZERO)
109 			cpstate = CP_NICE;
110 		else
111 			cpstate = CP_USER;
112 	} else {
113 #ifdef KPROF
114 	int k = ((int)pc & 0x7fffffff) / 8;
115 	if (k < 20000)
116 		kcounts[k]++;
117 #endif
118 		cpstate = CP_SYS;
119 		if (noproc)
120 			cpstate = CP_IDLE;
121 		else
122 			u.u_vm.vm_stime++;
123 	}
124 	cp_time[cpstate]++;
125 	for (s = 0; s < DK_NDRIVE; s++)
126 		if (dk_busy&(1<<s))
127 			dk_time[s]++;
128 	/*
129 	 * Adjust priority of current process.
130 	 */
131 	if (!noproc) {
132 		pp = u.u_procp;
133 		pp->p_cpticks++;
134 		if(++pp->p_cpu == 0)
135 			pp->p_cpu--;
136 		if(pp->p_cpu % 4 == 0) {
137 			(void) setpri(pp);
138 			if (pp->p_pri >= PUSER)
139 				pp->p_pri = pp->p_usrpri;
140 		}
141 	}
142 	/*
143 	 * Time moves on.
144 	 */
145 	++lbolt;
146 
147 	/*
148 	 * Time moves on for protocols.
149 	 */
150 	++protoslow; ++protofast;
151 
152 #if VAX780
153 	/*
154 	 * On 780's, impelement a fast UBA watcher,
155 	 * to make sure uba's don't get stuck.
156 	 */
157 	if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps))
158 		unhang();
159 #endif
160 	/*
161 	 * Schedule a software interrupt for the rest
162 	 * of clock activities.
163 	 */
164 	setsoftclock();
165 }
166 
167 /*
168  * The digital decay cpu usage priority assignment is scaled to run in
169  * time as expanded by the 1 minute load average.  Each second we
170  * multiply the the previous cpu usage estimate by
171  *		nrscale*avenrun[0]
172  * The following relates the load average to the period over which
173  * cpu usage is 90% forgotten:
174  *	loadav 1	 5 seconds
175  *	loadav 5	24 seconds
176  *	loadav 10	47 seconds
177  *	loadav 20	93 seconds
178  * This is a great improvement on the previous algorithm which
179  * decayed the priorities by a constant, and decayed away all knowledge
180  * of previous activity in about 20 seconds.  Under heavy load,
181  * the previous algorithm degenerated to round-robin with poor response
182  * time when there was a high load average.
183  */
184 #undef ave
185 #define	ave(a,b) ((int)(((int)(a*b))/(b+1)))
186 int	nrscale = 2;
187 double	avenrun[];
188 
189 /*
190  * Constant for decay filter for cpu usage field
191  * in process table (used by ps au).
192  */
193 double	ccpu = 0.95122942450071400909;		/* exp(-1/20) */
194 
195 /*
196  * Software clock interrupt.
197  * This routine runs at lower priority than device interrupts.
198  */
199 /*ARGSUSED*/
200 softclock(pc, ps)
201 	caddr_t pc;
202 {
203 	register struct callout *p1;
204 	register struct proc *pp;
205 	register int a, s;
206 	caddr_t arg;
207 	int (*func)();
208 
209 	/*
210 	 * Perform callouts (but not after panic's!)
211 	 */
212 	if (panicstr == 0) {
213 		for (;;) {
214 			s = spl7();
215 			if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
216 				splx(s);
217 				break;
218 			}
219 			calltodo.c_next = p1->c_next;
220 			arg = p1->c_arg;
221 			func = p1->c_func;
222 			p1->c_next = callfree;
223 			callfree = p1;
224 			(void) splx(s);
225 			(*func)(arg);
226 		}
227 	}
228 
229 	/*
230 	 * Drain silos.
231 	 */
232 #if NDH > 0
233 	s = spl5(); dhtimer(); splx(s);
234 #endif
235 #if NDZ > 0
236 	s = spl5(); dztimer(); splx(s);
237 #endif
238 
239 	/*
240 	 * If idling and processes are waiting to swap in,
241 	 * check on them.
242 	 */
243 	if (noproc && runin) {
244 		runin = 0;
245 		wakeup((caddr_t)&runin);
246 	}
247 
248 	/*
249 	 * Run paging daemon every 1/4 sec.
250 	 */
251 	if (lbolt % (hz/4) == 0) {
252 		vmpago();
253 	}
254 
255 	/*
256 	 * Reschedule every 1/10 sec.
257 	 */
258 	if (lbolt % (hz/10) == 0) {
259 		runrun++;
260 		aston();
261 	}
262 
263 	/*
264 	 * Run network slow and fast timeouts.
265 	 */
266 	if (protofast >= hz / PR_FASTHZ)
267 		pffasttimo();
268 	if (protofast >= hz / PR_SLOWHZ)
269 		pfslowtimo();
270 
271 	/*
272 	 * Lightning bolt every second:
273 	 *	sleep timeouts
274 	 *	process priority recomputation
275 	 *	process %cpu averaging
276 	 *	virtual memory metering
277 	 *	kick swapper if processes want in
278 	 */
279 	if (lbolt >= hz) {
280 		/*
281 		 * This doesn't mean much on VAX since we run at
282 		 * software interrupt time... if hardclock()
283 		 * calls softclock() directly, it prevents
284 		 * this code from running when the priority
285 		 * was raised when the clock interrupt occurred.
286 		 */
287 		if (BASEPRI(ps))
288 			return;
289 
290 		/*
291 		 * If we didn't run a few times because of
292 		 * long blockage at high ipl, we don't
293 		 * really want to run this code several times,
294 		 * so squish out all multiples of hz here.
295 		 */
296 		time += lbolt / hz;
297 		lbolt %= hz;
298 
299 		/*
300 		 * Wakeup lightning bolt sleepers.
301 		 * Processes sleep on lbolt to wait
302 		 * for short amounts of time (e.g. 1 second).
303 		 */
304 		wakeup((caddr_t)&lbolt);
305 
306 		/*
307 		 * Recompute process priority and process
308 		 * sleep() system calls as well as internal
309 		 * sleeps with timeouts (tsleep() kernel routine).
310 		 */
311 		for (pp = proc; pp < procNPROC; pp++)
312 		if (pp->p_stat && pp->p_stat!=SZOMB) {
313 			/*
314 			 * Increase resident time, to max of 127 seconds
315 			 * (it is kept in a character.)  For
316 			 * loaded processes this is time in core; for
317 			 * swapped processes, this is time on drum.
318 			 */
319 			if (pp->p_time != 127)
320 				pp->p_time++;
321 			/*
322 			 * If process has clock counting down, and it
323 			 * expires, set it running (if this is a tsleep()),
324 			 * or give it an SIGALRM (if the user process
325 			 * is using alarm signals.
326 			 */
327 			if (pp->p_clktim && --pp->p_clktim == 0)
328 				if (pp->p_flag & STIMO) {
329 					s = spl6();
330 					switch (pp->p_stat) {
331 
332 					case SSLEEP:
333 						setrun(pp);
334 						break;
335 
336 					case SSTOP:
337 						unsleep(pp);
338 						break;
339 					}
340 					pp->p_flag &= ~STIMO;
341 					splx(s);
342 				} else
343 					psignal(pp, SIGALRM);
344 			/*
345 			 * If process is blocked, increment computed
346 			 * time blocked.  This is used in swap scheduling.
347 			 */
348 			if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP)
349 				if (pp->p_slptime != 127)
350 					pp->p_slptime++;
351 			/*
352 			 * Update digital filter estimation of process
353 			 * cpu utilization for loaded processes.
354 			 */
355 			if (pp->p_flag&SLOAD)
356 				pp->p_pctcpu = ccpu * pp->p_pctcpu +
357 				    (1.0 - ccpu) * (pp->p_cpticks/(float)hz);
358 			/*
359 			 * Recompute process priority.  The number p_cpu
360 			 * is a weighted estimate of cpu time consumed.
361 			 * A process which consumes cpu time has this
362 			 * increase regularly.  We here decrease it by
363 			 * a fraction based on load average giving a digital
364 			 * decay filter which damps out in about 5 seconds
365 			 * when seconds are measured in time expanded by the
366 			 * load average.
367 			 *
368 			 * If a process is niced, then the nice directly
369 			 * affects the new priority.  The final priority
370 			 * is in the range 0 to 255, to fit in a character.
371 			 */
372 			pp->p_cpticks = 0;
373 			a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) +
374 			     pp->p_nice - NZERO;
375 			if (a < 0)
376 				a = 0;
377 			if (a > 255)
378 				a = 255;
379 			pp->p_cpu = a;
380 			(void) setpri(pp);
381 			/*
382 			 * Now have computed new process priority
383 			 * in p->p_usrpri.  Carefully change p->p_pri.
384 			 * A process is on a run queue associated with
385 			 * this priority, so we must block out process
386 			 * state changes during the transition.
387 			 */
388 			s = spl6();
389 			if (pp->p_pri >= PUSER) {
390 				if ((pp != u.u_procp || noproc) &&
391 				    pp->p_stat == SRUN &&
392 				    (pp->p_flag & SLOAD) &&
393 				    pp->p_pri != pp->p_usrpri) {
394 					remrq(pp);
395 					pp->p_pri = pp->p_usrpri;
396 					setrq(pp);
397 				} else
398 					pp->p_pri = pp->p_usrpri;
399 			}
400 			splx(s);
401 		}
402 
403 		/*
404 		 * Perform virtual memory metering.
405 		 */
406 		vmmeter();
407 
408 		/*
409 		 * If the swap process is trying to bring
410 		 * a process in, have it look again to see
411 		 * if it is possible now.
412 		 */
413 		if (runin!=0) {
414 			runin = 0;
415 			wakeup((caddr_t)&runin);
416 		}
417 
418 		/*
419 		 * If there are pages that have been cleaned,
420 		 * jolt the pageout daemon to process them.
421 		 * We do this here so that these pages will be
422 		 * freed if there is an abundance of memory and the
423 		 * daemon would not be awakened otherwise.
424 		 */
425 		if (bclnlist != NULL)
426 			wakeup((caddr_t)&proc[2]);
427 
428 		/*
429 		 * If the trap occurred from usermode,
430 		 * then check to see if it has now been
431 		 * running more than 10 minutes of user time
432 		 * and should thus run with reduced priority
433 		 * to give other processes a chance.
434 		 */
435 		if (USERMODE(ps)) {
436 			pp = u.u_procp;
437 			if (pp->p_uid && pp->p_nice == NZERO &&
438 			    u.u_vm.vm_utime > 600 * hz)
439 				pp->p_nice = NZERO+4;
440 			(void) setpri(pp);
441 			pp->p_pri = pp->p_usrpri;
442 		}
443 	}
444 	/*
445 	 * If trapped user-mode, give it a profiling tick.
446 	 */
447 	if (USERMODE(ps) && u.u_prof.pr_scale) {
448 		u.u_procp->p_flag |= SOWEUPC;
449 		aston();
450 	}
451 }
452 
453 /*
454  * Timeout is called to arrange that
455  * fun(arg) is called in tim/hz seconds.
456  * An entry is linked into the callout
457  * structure.  The time in each structure
458  * entry is the number of hz's more
459  * than the previous entry.
460  * In this way, decrementing the
461  * first entry has the effect of
462  * updating all entries.
463  *
464  * The panic is there because there is nothing
465  * intelligent to be done if an entry won't fit.
466  */
467 timeout(fun, arg, tim)
468 	int (*fun)();
469 	caddr_t arg;
470 {
471 	register struct callout *p1, *p2, *pnew;
472 	register int t;
473 	int s;
474 
475 /* DEBUGGING CODE */
476 	int ttrstrt();
477 
478 	if (fun == ttrstrt && arg == 0)
479 		panic("timeout ttrstr arg");
480 /* END DEBUGGING CODE */
481 	t = tim;
482 	s = spl7();
483 	pnew = callfree;
484 	if (pnew == NULL)
485 		panic("timeout table overflow");
486 	callfree = pnew->c_next;
487 	pnew->c_arg = arg;
488 	pnew->c_func = fun;
489 	for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
490 		t -= p2->c_time;
491 	p1->c_next = pnew;
492 	pnew->c_next = p2;
493 	pnew->c_time = t;
494 	if (p2)
495 		p2->c_time -= t;
496 	splx(s);
497 }
498