xref: /csrg-svn/sys/kern/kern_clock.c (revision 12823)
1 /*	kern_clock.c	4.55	83/05/30	*/
2 
3 #include "../machine/reg.h"
4 #include "../machine/psl.h"
5 
6 #include "../h/param.h"
7 #include "../h/systm.h"
8 #include "../h/dk.h"
9 #include "../h/callout.h"
10 #include "../h/dir.h"
11 #include "../h/user.h"
12 #include "../h/kernel.h"
13 #include "../h/proc.h"
14 #include "../h/vm.h"
15 #include "../h/text.h"
16 
17 #ifdef vax
18 #include "../vax/mtpr.h"
19 #endif
20 
21 #ifdef GPROF
22 #include "../h/gprof.h"
23 #endif
24 
25 /*
26  * Clock handling routines.
27  *
28  * This code is written to operate with two timers which run
29  * independently of each other. The main clock, running at hz
30  * times per second, is used to do scheduling and timeout calculations.
31  * The second timer does resource utilization estimation statistically
32  * based on the state of the machine phz times a second. Both functions
33  * can be performed by a single clock (ie hz == phz), however the
34  * statistics will be much more prone to errors. Ideally a machine
35  * would have separate clocks measuring time spent in user state, system
36  * state, interrupt state, and idle state. These clocks would allow a non-
37  * approximate measure of resource utilization.
38  */
39 
40 /*
41  * TODO:
42  *	time of day, system/user timing, timeouts, profiling on separate timers
43  *	allocate more timeout table slots when table overflows.
44  */
45 
46 /*
47  * The hz hardware interval timer.
48  * We update the events relating to real time.
49  * If this timer is also being used to gather statistics,
50  * we run through the statistics gathering routine as well.
51  */
52 /*ARGSUSED*/
53 #ifdef vax
54 hardclock(pc, ps)
55 	caddr_t pc;
56 	int ps;
57 {
58 #endif
59 #ifdef sun
60 hardclock(regs)
61 	struct regs regs;
62 {
63 #define	ps	regs.r_sr
64 #define	pc	(caddr_t)regs.r_pc
65 #endif
66 	register struct callout *p1;
67 	register struct proc *p;
68 	register int s, cpstate;
69 	int needsoft = 0;
70 
71 	/*
72 	 * Update real-time timeout queue.
73 	 * At front of queue are some number of events which are ``due''.
74 	 * The time to these is <= 0 and if negative represents the
75 	 * number of ticks which have passed since it was supposed to happen.
76 	 * The rest of the q elements (times > 0) are events yet to happen,
77 	 * where the time for each is given as a delta from the previous.
78 	 * Decrementing just the first of these serves to decrement the time
79 	 * to all events.
80 	 */
81 	p1 = calltodo.c_next;
82 	while (p1) {
83 		if (--p1->c_time > 0)
84 			break;
85 		needsoft = 1;
86 		if (p1->c_time == 0)
87 			break;
88 		p1 = p1->c_next;
89 	}
90 
91 	/*
92 	 * Charge the time out based on the mode the cpu is in.
93 	 * Here again we fudge for the lack of proper interval timers
94 	 * assuming that the current state has been around at least
95 	 * one tick.
96 	 */
97 	if (USERMODE(ps)) {
98 #ifdef sun
99 		u.u_ar0 = &regs.r_r0;	/* aston needs ar0 */
100 #endif
101 		if (u.u_prof.pr_scale)
102 			needsoft = 1;
103 		/*
104 		 * CPU was in user state.  Increment
105 		 * user time counter, and process process-virtual time
106 		 * interval timer.
107 		 */
108 		bumptime(&u.u_ru.ru_utime, tick);
109 		if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) &&
110 		    itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0)
111 			psignal(u.u_procp, SIGVTALRM);
112 		if (u.u_procp->p_nice > NZERO)
113 			cpstate = CP_NICE;
114 		else
115 			cpstate = CP_USER;
116 	} else {
117 		/*
118 		 * CPU was in system state.  If profiling kernel
119 		 * increment a counter.  If no process is running
120 		 * then this is a system tick if we were running
121 		 * at a non-zero IPL (in a driver).  If a process is running,
122 		 * then we charge it with system time even if we were
123 		 * at a non-zero IPL, since the system often runs
124 		 * this way during processing of system calls.
125 		 * This is approximate, but the lack of true interval
126 		 * timers makes doing anything else difficult.
127 		 */
128 		cpstate = CP_SYS;
129 		if (noproc) {
130 			if (BASEPRI(ps))
131 				cpstate = CP_IDLE;
132 		} else {
133 			bumptime(&u.u_ru.ru_stime, tick);
134 		}
135 	}
136 
137 	/*
138 	 * If the cpu is currently scheduled to a process, then
139 	 * charge it with resource utilization for a tick, updating
140 	 * statistics which run in (user+system) virtual time,
141 	 * such as the cpu time limit and profiling timers.
142 	 * This assumes that the current process has been running
143 	 * the entire last tick.
144 	 */
145 	if (noproc == 0 && cpstate != CP_IDLE) {
146 		if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) >
147 		    u.u_rlimit[RLIMIT_CPU].rlim_cur) {
148 			psignal(u.u_procp, SIGXCPU);
149 			if (u.u_rlimit[RLIMIT_CPU].rlim_cur <
150 			    u.u_rlimit[RLIMIT_CPU].rlim_max)
151 				u.u_rlimit[RLIMIT_CPU].rlim_cur += 5;
152 		}
153 		if (timerisset(&u.u_timer[ITIMER_PROF].it_value) &&
154 		    itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0)
155 			psignal(u.u_procp, SIGPROF);
156 		s = u.u_procp->p_rssize;
157 		u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0;	/* XXX */
158 		if (u.u_procp->p_textp) {
159 			register int xrss = u.u_procp->p_textp->x_rssize;
160 
161 			s += xrss;
162 			u.u_ru.ru_ixrss += xrss;
163 		}
164 		if (s > u.u_ru.ru_maxrss)
165 			u.u_ru.ru_maxrss = s;
166 	}
167 
168 	/*
169 	 * We adjust the priority of the current process.
170 	 * The priority of a process gets worse as it accumulates
171 	 * CPU time.  The cpu usage estimator (p_cpu) is increased here
172 	 * and the formula for computing priorities (in kern_synch.c)
173 	 * will compute a different value each time the p_cpu increases
174 	 * by 4.  The cpu usage estimator ramps up quite quickly when
175 	 * the process is running (linearly), and decays away exponentially,
176 	 * at a rate which is proportionally slower when the system is
177 	 * busy.  The basic principal is that the system will 90% forget
178 	 * that a process used a lot of CPU time in 5*loadav seconds.
179 	 * This causes the system to favor processes which haven't run
180 	 * much recently, and to round-robin among other processes.
181 	 */
182 	if (!noproc) {
183 		p = u.u_procp;
184 		p->p_cpticks++;
185 		if (++p->p_cpu == 0)
186 			p->p_cpu--;
187 		if ((p->p_cpu&3) == 0) {
188 			(void) setpri(p);
189 			if (p->p_pri >= PUSER)
190 				p->p_pri = p->p_usrpri;
191 		}
192 	}
193 
194 	/*
195 	 * If the alternate clock has not made itself known then
196 	 * we must gather the statistics.
197 	 */
198 	if (phz == 0)
199 		gatherstats(pc, ps);
200 
201 	/*
202 	 * Increment the time-of-day, and schedule
203 	 * processing of the callouts at a very low cpu priority,
204 	 * so we don't keep the relatively high clock interrupt
205 	 * priority any longer than necessary.
206 	 */
207 	bumptime(&time, tick);
208 	if (needsoft)
209 		setsoftclock();
210 }
211 #ifdef sun
212 #undef pc
213 #undef ps
214 #endif
215 
216 /*
217  * Gather statistics on resource utilization.
218  *
219  * We make a gross assumption: that the system has been in the
220  * state it is in (user state, kernel state, interrupt state,
221  * or idle state) for the entire last time interval, and
222  * update statistics accordingly.
223  */
224 /*ARGSUSED*/
225 gatherstats(pc, ps)
226 	caddr_t pc;
227 	int ps;
228 {
229 	int cpstate, s;
230 
231 	/*
232 	 * Determine what state the cpu is in.
233 	 */
234 	if (USERMODE(ps)) {
235 		/*
236 		 * CPU was in user state.
237 		 */
238 		if (u.u_procp->p_nice > NZERO)
239 			cpstate = CP_NICE;
240 		else
241 			cpstate = CP_USER;
242 	} else {
243 		/*
244 		 * CPU was in system state.  If profiling kernel
245 		 * increment a counter.
246 		 */
247 		cpstate = CP_SYS;
248 		if (noproc && BASEPRI(ps))
249 			cpstate = CP_IDLE;
250 #ifdef GPROF
251 		s = pc - s_lowpc;
252 		if (profiling < 2 && s < s_textsize)
253 			kcount[s / (HISTFRACTION * sizeof (*kcount))]++;
254 #endif
255 	}
256 	/*
257 	 * We maintain statistics shown by user-level statistics
258 	 * programs:  the amount of time in each cpu state, and
259 	 * the amount of time each of DK_NDRIVE ``drives'' is busy.
260 	 */
261 	cp_time[cpstate]++;
262 	for (s = 0; s < DK_NDRIVE; s++)
263 		if (dk_busy&(1<<s))
264 			dk_time[s]++;
265 }
266 
267 /*
268  * Software priority level clock interrupt.
269  * Run periodic events from timeout queue.
270  */
271 /*ARGSUSED*/
272 #ifdef vax
273 softclock(pc, ps)
274 	caddr_t pc;
275 	int ps;
276 {
277 #endif
278 #ifdef sun
279 softclock()
280 {
281 #define	pc	(caddr_t)u.u_ar0[PC]
282 #define	ps	u.u_ar0[PS]
283 #endif
284 
285 	for (;;) {
286 		register struct callout *p1;
287 		register caddr_t arg;
288 		register int (*func)();
289 		register int a, s;
290 
291 		s = spl7();
292 		if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
293 			splx(s);
294 			break;
295 		}
296 		arg = p1->c_arg; func = p1->c_func; a = p1->c_time;
297 		calltodo.c_next = p1->c_next;
298 		p1->c_next = callfree;
299 		callfree = p1;
300 		splx(s);
301 		(*func)(arg, a);
302 	}
303 	/*
304 	 * If trapped user-mode, give it a profiling tick.
305 	 */
306 	if (USERMODE(ps) && u.u_prof.pr_scale) {
307 		u.u_procp->p_flag |= SOWEUPC;
308 		aston();
309 	}
310 }
311 
312 /*
313  * Bump a timeval by a small number of usec's.
314  */
315 bumptime(tp, usec)
316 	register struct timeval *tp;
317 	int usec;
318 {
319 
320 	tp->tv_usec += usec;
321 	if (tp->tv_usec >= 1000000) {
322 		tp->tv_usec -= 1000000;
323 		tp->tv_sec++;
324 	}
325 }
326 
327 /*
328  * Arrange that (*fun)(arg) is called in t/hz seconds.
329  */
330 timeout(fun, arg, t)
331 	int (*fun)();
332 	caddr_t arg;
333 	register int t;
334 {
335 	register struct callout *p1, *p2, *pnew;
336 	register int s = spl7();
337 
338 	if (t == 0)
339 		t = 1;
340 	pnew = callfree;
341 	if (pnew == NULL)
342 		panic("timeout table overflow");
343 	callfree = pnew->c_next;
344 	pnew->c_arg = arg;
345 	pnew->c_func = fun;
346 	for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
347 		if (p2->c_time > 0)
348 			t -= p2->c_time;
349 	p1->c_next = pnew;
350 	pnew->c_next = p2;
351 	pnew->c_time = t;
352 	if (p2)
353 		p2->c_time -= t;
354 	splx(s);
355 }
356 
357 /*
358  * untimeout is called to remove a function timeout call
359  * from the callout structure.
360  */
361 untimeout(fun, arg)
362 	int (*fun)();
363 	caddr_t arg;
364 {
365 	register struct callout *p1, *p2;
366 	register int s;
367 
368 	s = spl7();
369 	for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) {
370 		if (p2->c_func == fun && p2->c_arg == arg) {
371 			if (p2->c_next && p2->c_time > 0)
372 				p2->c_next->c_time += p2->c_time;
373 			p1->c_next = p2->c_next;
374 			p2->c_next = callfree;
375 			callfree = p2;
376 			break;
377 		}
378 	}
379 	splx(s);
380 }
381 
382 /*
383  * Compute number of hz until specified time.
384  * Used to compute third argument to timeout() from an
385  * absolute time.
386  */
387 hzto(tv)
388 	struct timeval *tv;
389 {
390 	register long ticks;
391 	register long sec;
392 	int s = spl7();
393 
394 	/*
395 	 * If number of milliseconds will fit in 32 bit arithmetic,
396 	 * then compute number of milliseconds to time and scale to
397 	 * ticks.  Otherwise just compute number of hz in time, rounding
398 	 * times greater than representible to maximum value.
399 	 *
400 	 * Delta times less than 25 days can be computed ``exactly''.
401 	 * Maximum value for any timeout in 10ms ticks is 250 days.
402 	 */
403 	sec = tv->tv_sec - time.tv_sec;
404 	if (sec <= 0x7fffffff / 1000 - 1000)
405 		ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
406 			(tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
407 	else if (sec <= 0x7fffffff / hz)
408 		ticks = sec * hz;
409 	else
410 		ticks = 0x7fffffff;
411 	splx(s);
412 	return (ticks);
413 }
414 
415 profil()
416 {
417 	register struct a {
418 		short	*bufbase;
419 		unsigned bufsize;
420 		unsigned pcoffset;
421 		unsigned pcscale;
422 	} *uap = (struct a *)u.u_ap;
423 	register struct uprof *upp = &u.u_prof;
424 
425 	upp->pr_base = uap->bufbase;
426 	upp->pr_size = uap->bufsize;
427 	upp->pr_off = uap->pcoffset;
428 	upp->pr_scale = uap->pcscale;
429 }
430 
431 opause()
432 {
433 
434 	for (;;)
435 		sleep((caddr_t)&u, PSLEP);
436 }
437