xref: /netbsd-src/sys/kern/kern_clock.c (revision 6ea46cb5e46c49111a6ecf3bcbe3c7e2730fe9f6)
1 /*	$NetBSD: kern_clock.c,v 1.19 1994/06/29 06:32:19 cgd Exp $	*/
2 
3 /*-
4  * Copyright (c) 1982, 1986, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
41  */
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/dkstat.h>
46 #include <sys/callout.h>
47 #include <sys/kernel.h>
48 #include <sys/proc.h>
49 #include <sys/resourcevar.h>
50 
51 #include <machine/cpu.h>
52 
53 #ifdef GPROF
54 #include <sys/gmon.h>
55 #endif
56 
57 /*
58  * Clock handling routines.
59  *
60  * This code is written to operate with two timers that run independently of
61  * each other.  The main clock, running hz times per second, is used to keep
62  * track of real time.  The second timer handles kernel and user profiling,
63  * and does resource use estimation.  If the second timer is programmable,
64  * it is randomized to avoid aliasing between the two clocks.  For example,
65  * the randomization prevents an adversary from always giving up the cpu
66  * just before its quantum expires.  Otherwise, it would never accumulate
67  * cpu ticks.  The mean frequency of the second timer is stathz.
68  *
69  * If no second timer exists, stathz will be zero; in this case we drive
70  * profiling and statistics off the main clock.  This WILL NOT be accurate;
71  * do not do it unless absolutely necessary.
72  *
73  * The statistics clock may (or may not) be run at a higher rate while
74  * profiling.  This profile clock runs at profhz.  We require that profhz
75  * be an integral multiple of stathz.
76  *
77  * If the statistics clock is running fast, it must be divided by the ratio
78  * profhz/stathz for statistics.  (For profiling, every tick counts.)
79  */
80 
81 /*
82  * TODO:
83  *	allocate more timeout table slots when table overflows.
84  */
85 
86 /*
87  * Bump a timeval by a small number of usec's.
88  */
89 #define BUMPTIME(t, usec) { \
90 	register volatile struct timeval *tp = (t); \
91 	register long us; \
92  \
93 	tp->tv_usec = us = tp->tv_usec + (usec); \
94 	if (us >= 1000000) { \
95 		tp->tv_usec = us - 1000000; \
96 		tp->tv_sec++; \
97 	} \
98 }
99 
100 int	stathz;
101 int	profhz;
102 int	profprocs;
103 int	ticks;
104 static int psdiv, pscnt;	/* prof => stat divider */
105 int	psratio;		/* ratio: prof / stat */
106 
107 volatile struct	timeval time;
108 volatile struct	timeval mono_time;
109 
110 /*
111  * Initialize clock frequencies and start both clocks running.
112  */
113 void
114 initclocks()
115 {
116 	register int i;
117 
118 	/*
119 	 * Set divisors to 1 (normal case) and let the machine-specific
120 	 * code do its bit.
121 	 */
122 	psdiv = pscnt = 1;
123 	cpu_initclocks();
124 
125 	/*
126 	 * Compute profhz/stathz, and fix profhz if needed.
127 	 */
128 	i = stathz ? stathz : hz;
129 	if (profhz == 0)
130 		profhz = i;
131 	psratio = profhz / i;
132 }
133 
134 /*
135  * The real-time timer, interrupting hz times per second.
136  */
137 void
138 hardclock(frame)
139 	register struct clockframe *frame;
140 {
141 	register struct callout *p1;
142 	register struct proc *p;
143 	register int delta, needsoft;
144 	extern int tickdelta;
145 	extern long timedelta;
146 
147 	/*
148 	 * Update real-time timeout queue.
149 	 * At front of queue are some number of events which are ``due''.
150 	 * The time to these is <= 0 and if negative represents the
151 	 * number of ticks which have passed since it was supposed to happen.
152 	 * The rest of the q elements (times > 0) are events yet to happen,
153 	 * where the time for each is given as a delta from the previous.
154 	 * Decrementing just the first of these serves to decrement the time
155 	 * to all events.
156 	 */
157 	needsoft = 0;
158 	for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
159 		if (--p1->c_time > 0)
160 			break;
161 		needsoft = 1;
162 		if (p1->c_time == 0)
163 			break;
164 	}
165 
166 	p = curproc;
167 	if (p) {
168 		register struct pstats *pstats;
169 
170 		/*
171 		 * Run current process's virtual and profile time, as needed.
172 		 */
173 		pstats = p->p_stats;
174 		if (CLKF_USERMODE(frame) &&
175 		    timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
176 		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
177 			psignal(p, SIGVTALRM);
178 		if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
179 		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
180 			psignal(p, SIGPROF);
181 	}
182 
183 	/*
184 	 * If no separate statistics clock is available, run it from here.
185 	 */
186 	if (stathz == 0)
187 		statclock(frame);
188 
189 	/*
190 	 * Increment the time-of-day.  The increment is just ``tick'' unless
191 	 * we are still adjusting the clock; see adjtime().
192 	 */
193 	ticks++;
194 	if (timedelta == 0)
195 		delta = tick;
196 	else {
197 		delta = tick + tickdelta;
198 		timedelta -= tickdelta;
199 	}
200 	BUMPTIME(&time, delta);
201 	BUMPTIME(&mono_time, delta);
202 
203 	/*
204 	 * Process callouts at a very low cpu priority, so we don't keep the
205 	 * relatively high clock interrupt priority any longer than necessary.
206 	 */
207 	if (needsoft) {
208 		if (CLKF_BASEPRI(frame)) {
209 			/*
210 			 * Save the overhead of a software interrupt;
211 			 * it will happen as soon as we return, so do it now.
212 			 */
213 			(void)splsoftclock();
214 			softclock();
215 		} else
216 			setsoftclock();
217 	}
218 }
219 
220 /*
221  * Software (low priority) clock interrupt.
222  * Run periodic events from timeout queue.
223  */
224 /*ARGSUSED*/
225 void
226 softclock()
227 {
228 	register struct callout *c;
229 	register void *arg;
230 	register void (*func) __P((void *));
231 	register int s;
232 
233 	s = splhigh();
234 	while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
235 		func = c->c_func;
236 		arg = c->c_arg;
237 		calltodo.c_next = c->c_next;
238 		c->c_next = callfree;
239 		callfree = c;
240 		splx(s);
241 		(*func)(arg);
242 		(void) splhigh();
243 	}
244 	splx(s);
245 }
246 
247 /*
248  * timeout --
249  *	Execute a function after a specified length of time.
250  *
251  * untimeout --
252  *	Cancel previous timeout function call.
253  *
254  *	See AT&T BCI Driver Reference Manual for specification.  This
255  *	implementation differs from that one in that no identification
256  *	value is returned from timeout, rather, the original arguments
257  *	to timeout are used to identify entries for untimeout.
258  */
259 void
260 timeout(ftn, arg, ticks)
261 	void (*ftn) __P((void *));
262 	void *arg;
263 	register int ticks;
264 {
265 	register struct callout *new, *p, *t;
266 	register int s;
267 
268 	if (ticks <= 0)
269 		ticks = 1;
270 
271 	/* Lock out the clock. */
272 	s = splhigh();
273 
274 	/* Fill in the next free callout structure. */
275 	if (callfree == NULL)
276 		panic("timeout table full");
277 	new = callfree;
278 	callfree = new->c_next;
279 	new->c_arg = arg;
280 	new->c_func = ftn;
281 
282 	/*
283 	 * The time for each event is stored as a difference from the time
284 	 * of the previous event on the queue.  Walk the queue, correcting
285 	 * the ticks argument for queue entries passed.  Correct the ticks
286 	 * value for the queue entry immediately after the insertion point
287 	 * as well.  Watch out for negative c_time values; these represent
288 	 * overdue events.
289 	 */
290 	for (p = &calltodo;
291 	    (t = p->c_next) != NULL && ticks > t->c_time; p = t)
292 		if (t->c_time > 0)
293 			ticks -= t->c_time;
294 	new->c_time = ticks;
295 	if (t != NULL)
296 		t->c_time -= ticks;
297 
298 	/* Insert the new entry into the queue. */
299 	p->c_next = new;
300 	new->c_next = t;
301 	splx(s);
302 }
303 
304 void
305 untimeout(ftn, arg)
306 	void (*ftn) __P((void *));
307 	void *arg;
308 {
309 	register struct callout *p, *t;
310 	register int s;
311 
312 	s = splhigh();
313 	for (p = &calltodo; (t = p->c_next) != NULL; p = t)
314 		if (t->c_func == ftn && t->c_arg == arg) {
315 			/* Increment next entry's tick count. */
316 			if (t->c_next && t->c_time > 0)
317 				t->c_next->c_time += t->c_time;
318 
319 			/* Move entry from callout queue to callfree queue. */
320 			p->c_next = t->c_next;
321 			t->c_next = callfree;
322 			callfree = t;
323 			break;
324 		}
325 	splx(s);
326 }
327 
328 /*
329  * Compute number of hz until specified time.  Used to
330  * compute third argument to timeout() from an absolute time.
331  */
332 int
333 hzto(tv)
334 	struct timeval *tv;
335 {
336 	register long ticks, sec;
337 	int s;
338 
339 	/*
340 	 * If number of milliseconds will fit in 32 bit arithmetic,
341 	 * then compute number of milliseconds to time and scale to
342 	 * ticks.  Otherwise just compute number of hz in time, rounding
343 	 * times greater than representible to maximum value.
344 	 *
345 	 * Delta times less than 25 days can be computed ``exactly''.
346 	 * Maximum value for any timeout in 10ms ticks is 250 days.
347 	 */
348 	s = splhigh();
349 	sec = tv->tv_sec - time.tv_sec;
350 	if (sec <= 0x7fffffff / 1000 - 1000)
351 		ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
352 			(tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
353 	else if (sec <= 0x7fffffff / hz)
354 		ticks = sec * hz;
355 	else
356 		ticks = 0x7fffffff;
357 	splx(s);
358 	return (ticks);
359 }
360 
361 /*
362  * Start profiling on a process.
363  *
364  * Kernel profiling passes proc0 which never exits and hence
365  * keeps the profile clock running constantly.
366  */
367 void
368 startprofclock(p)
369 	register struct proc *p;
370 {
371 	int s;
372 
373 	if ((p->p_flag & P_PROFIL) == 0) {
374 		p->p_flag |= P_PROFIL;
375 		if (++profprocs == 1 && stathz != 0) {
376 			s = splstatclock();
377 			psdiv = pscnt = psratio;
378 			setstatclockrate(profhz);
379 			splx(s);
380 		}
381 	}
382 }
383 
384 /*
385  * Stop profiling on a process.
386  */
387 void
388 stopprofclock(p)
389 	register struct proc *p;
390 {
391 	int s;
392 
393 	if (p->p_flag & P_PROFIL) {
394 		p->p_flag &= ~P_PROFIL;
395 		if (--profprocs == 0 && stathz != 0) {
396 			s = splstatclock();
397 			psdiv = pscnt = 1;
398 			setstatclockrate(stathz);
399 			splx(s);
400 		}
401 	}
402 }
403 
404 int	dk_ndrive = DK_NDRIVE;
405 
406 /*
407  * Statistics clock.  Grab profile sample, and if divider reaches 0,
408  * do process and kernel statistics.
409  */
410 void
411 statclock(frame)
412 	register struct clockframe *frame;
413 {
414 #ifdef GPROF
415 	register struct gmonparam *g;
416 #endif
417 	register struct proc *p;
418 	register int i;
419 
420 	if (CLKF_USERMODE(frame)) {
421 		p = curproc;
422 		if (p->p_flag & P_PROFIL)
423 			addupc_intr(p, CLKF_PC(frame), 1);
424 		if (--pscnt > 0)
425 			return;
426 		/*
427 		 * Came from user mode; CPU was in user state.
428 		 * If this process is being profiled record the tick.
429 		 */
430 		p->p_uticks++;
431 		if (p->p_nice > NZERO)
432 			cp_time[CP_NICE]++;
433 		else
434 			cp_time[CP_USER]++;
435 	} else {
436 #ifdef GPROF
437 		/*
438 		 * Kernel statistics are just like addupc_intr, only easier.
439 		 */
440 		g = &_gmonparam;
441 		if (g->state == GMON_PROF_ON) {
442 			i = CLKF_PC(frame) - g->lowpc;
443 			if (i < g->textsize) {
444 				i /= HISTFRACTION * sizeof(*g->kcount);
445 				g->kcount[i]++;
446 			}
447 		}
448 #endif
449 		if (--pscnt > 0)
450 			return;
451 		/*
452 		 * Came from kernel mode, so we were:
453 		 * - handling an interrupt,
454 		 * - doing syscall or trap work on behalf of the current
455 		 *   user process, or
456 		 * - spinning in the idle loop.
457 		 * Whichever it is, charge the time as appropriate.
458 		 * Note that we charge interrupts to the current process,
459 		 * regardless of whether they are ``for'' that process,
460 		 * so that we know how much of its real time was spent
461 		 * in ``non-process'' (i.e., interrupt) work.
462 		 */
463 		p = curproc;
464 		if (CLKF_INTR(frame)) {
465 			if (p != NULL)
466 				p->p_iticks++;
467 			cp_time[CP_INTR]++;
468 		} else if (p != NULL) {
469 			p->p_sticks++;
470 			cp_time[CP_SYS]++;
471 		} else
472 			cp_time[CP_IDLE]++;
473 	}
474 	pscnt = psdiv;
475 
476 	/*
477 	 * We maintain statistics shown by user-level statistics
478 	 * programs:  the amount of time in each cpu state, and
479 	 * the amount of time each of DK_NDRIVE ``drives'' is busy.
480 	 *
481 	 * XXX	should either run linked list of drives, or (better)
482 	 *	grab timestamps in the start & done code.
483 	 */
484 	for (i = 0; i < DK_NDRIVE; i++)
485 		if (dk_busy & (1 << i))
486 			dk_time[i]++;
487 
488 	/*
489 	 * We adjust the priority of the current process.  The priority of
490 	 * a process gets worse as it accumulates CPU time.  The cpu usage
491 	 * estimator (p_estcpu) is increased here.  The formula for computing
492 	 * priorities (in kern_synch.c) will compute a different value each
493 	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
494 	 * quite quickly when the process is running (linearly), and decays
495 	 * away exponentially, at a rate which is proportionally slower when
496 	 * the system is busy.  The basic principal is that the system will
497 	 * 90% forget that the process used a lot of CPU time in 5 * loadav
498 	 * seconds.  This causes the system to favor processes which haven't
499 	 * run much recently, and to round-robin among other processes.
500 	 */
501 	if (p != NULL) {
502 		p->p_cpticks++;
503 		if (++p->p_estcpu == 0)
504 			p->p_estcpu--;
505 		if ((p->p_estcpu & 3) == 0) {
506 			resetpriority(p);
507 			if (p->p_priority >= PUSER)
508 				p->p_priority = p->p_usrpri;
509 		}
510 	}
511 }
512 
513 /*
514  * Return information about system clocks.
515  */
516 sysctl_clockrate(where, sizep)
517 	register char *where;
518 	size_t *sizep;
519 {
520 	struct clockinfo clkinfo;
521 
522 	/*
523 	 * Construct clockinfo structure.
524 	 */
525 	clkinfo.hz = hz;
526 	clkinfo.tick = tick;
527 	clkinfo.profhz = profhz;
528 	clkinfo.stathz = stathz ? stathz : hz;
529 	return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
530 }
531 
532 #ifdef DDB
533 #include <ddb/db_access.h>
534 #include <ddb/db_sym.h>
535 
536 void db_show_callout(long addr, int haddr, int count, char *modif)
537 {
538 	register struct callout *p1;
539 	register int	cum;
540 	register int	s;
541 	db_expr_t	offset;
542 	char		*name;
543 
544         db_printf("      cum     ticks      arg  func\n");
545 	s = splhigh();
546 	for (cum = 0, p1 = calltodo.c_next; p1; p1 = p1->c_next) {
547 		register int t = p1->c_time;
548 
549 		if (t > 0)
550 			cum += t;
551 
552 		db_find_sym_and_offset(p1->c_func, &name, &offset);
553 		if (name == NULL)
554 			name = "?";
555 
556                 db_printf("%9d %9d %8x  %s (%x)\n",
557 			  cum, t, p1->c_arg, name, p1->c_func);
558 	}
559 	splx(s);
560 }
561 #endif
562