xref: /openbsd-src/sys/kern/kern_clock.c (revision c247527514ab9a1b328cae8b6e13b0ff68eb2a76)
1 /*	$OpenBSD: kern_clock.c,v 1.46 2004/06/24 19:35:24 tholo Exp $	*/
2 /*	$NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/dkstat.h>
43 #include <sys/timeout.h>
44 #include <sys/kernel.h>
45 #include <sys/limits.h>
46 #include <sys/proc.h>
47 #include <sys/resourcevar.h>
48 #include <sys/signalvar.h>
49 #include <uvm/uvm_extern.h>
50 #include <sys/sysctl.h>
51 #include <sys/sched.h>
52 
53 #include <machine/cpu.h>
54 
55 #ifdef GPROF
56 #include <sys/gmon.h>
57 #endif
58 
59 /*
60  * Clock handling routines.
61  *
62  * This code is written to operate with two timers that run independently of
63  * each other.  The main clock, running hz times per second, is used to keep
64  * track of real time.  The second timer handles kernel and user profiling,
65  * and does resource use estimation.  If the second timer is programmable,
66  * it is randomized to avoid aliasing between the two clocks.  For example,
67  * the randomization prevents an adversary from always giving up the cpu
68  * just before its quantum expires.  Otherwise, it would never accumulate
69  * cpu ticks.  The mean frequency of the second timer is stathz.
70  *
71  * If no second timer exists, stathz will be zero; in this case we drive
72  * profiling and statistics off the main clock.  This WILL NOT be accurate;
73  * do not do it unless absolutely necessary.
74  *
75  * The statistics clock may (or may not) be run at a higher rate while
76  * profiling.  This profile clock runs at profhz.  We require that profhz
77  * be an integral multiple of stathz.
78  *
79  * If the statistics clock is running fast, it must be divided by the ratio
80  * profhz/stathz for statistics.  (For profiling, every tick counts.)
81  */
82 
83 /*
84  * Bump a timeval by a small number of usec's.
85  */
86 #define BUMPTIME(t, usec) { \
87 	register volatile struct timeval *tp = (t); \
88 	register long us; \
89  \
90 	tp->tv_usec = us = tp->tv_usec + (usec); \
91 	if (us >= 1000000) { \
92 		tp->tv_usec = us - 1000000; \
93 		tp->tv_sec++; \
94 	} \
95 }
96 
97 int	stathz;
98 int	schedhz;
99 int	profhz;
100 int	profprocs;
101 int	ticks;
102 static int psdiv, pscnt;		/* prof => stat divider */
103 int	psratio;			/* ratio: prof / stat */
104 int	tickfix, tickfixinterval;	/* used if tick not really integral */
105 static int tickfixcnt;			/* accumulated fractional error */
106 
107 long cp_time[CPUSTATES];
108 
109 volatile time_t time_second;
110 volatile time_t time_uptime;
111 
112 volatile struct	timeval time
113 	__attribute__((__aligned__(__alignof__(quad_t))));
114 volatile struct	timeval mono_time;
115 
116 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
117 void	*softclock_si;
118 void	generic_softclock(void *);
119 
120 void
121 generic_softclock(void *ignore)
122 {
123 	/*
124 	 * XXX - dont' commit, just a dummy wrapper until we learn everyone
125 	 *       deal with a changed proto for softclock().
126 	 */
127 	softclock();
128 }
129 #endif
130 
131 /*
132  * Initialize clock frequencies and start both clocks running.
133  */
134 void
135 initclocks()
136 {
137 	int i;
138 
139 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
140 	softclock_si = softintr_establish(IPL_SOFTCLOCK, generic_softclock, NULL);
141 	if (softclock_si == NULL)
142 		panic("initclocks: unable to register softclock intr");
143 #endif
144 
145 	/*
146 	 * Set divisors to 1 (normal case) and let the machine-specific
147 	 * code do its bit.
148 	 */
149 	psdiv = pscnt = 1;
150 	cpu_initclocks();
151 
152 	/*
153 	 * Compute profhz/stathz, and fix profhz if needed.
154 	 */
155 	i = stathz ? stathz : hz;
156 	if (profhz == 0)
157 		profhz = i;
158 	psratio = profhz / i;
159 }
160 
161 /*
162  * The real-time timer, interrupting hz times per second.
163  */
164 void
165 hardclock(struct clockframe *frame)
166 {
167 	struct proc *p;
168 	int delta;
169 	extern int tickdelta;
170 	extern long timedelta;
171 #ifdef __HAVE_CPUINFO
172 	struct cpu_info *ci = curcpu();
173 #endif
174 
175 	p = curproc;
176 	if (p) {
177 		register struct pstats *pstats;
178 
179 		/*
180 		 * Run current process's virtual and profile time, as needed.
181 		 */
182 		pstats = p->p_stats;
183 		if (CLKF_USERMODE(frame) &&
184 		    timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
185 		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
186 			psignal(p, SIGVTALRM);
187 		if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
188 		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
189 			psignal(p, SIGPROF);
190 	}
191 
192 	/*
193 	 * If no separate statistics clock is available, run it from here.
194 	 */
195 	if (stathz == 0)
196 		statclock(frame);
197 
198 #if defined(__HAVE_CPUINFO)
199 	if (--ci->ci_schedstate.spc_rrticks <= 0)
200 		roundrobin(ci);
201 
202 	/*
203 	 * If we are not the primary CPU, we're not allowed to do
204 	 * any more work.
205 	 */
206 	if (CPU_IS_PRIMARY(ci) == 0)
207 		return;
208 #endif
209 
210 	/*
211 	 * Increment the time-of-day.  The increment is normally just
212 	 * ``tick''.  If the machine is one which has a clock frequency
213 	 * such that ``hz'' would not divide the second evenly into
214 	 * milliseconds, a periodic adjustment must be applied.  Finally,
215 	 * if we are still adjusting the time (see adjtime()),
216 	 * ``tickdelta'' may also be added in.
217 	 */
218 	ticks++;
219 	delta = tick;
220 
221 	if (tickfix) {
222 		tickfixcnt += tickfix;
223 		if (tickfixcnt >= tickfixinterval) {
224 			delta++;
225 			tickfixcnt -= tickfixinterval;
226 		}
227 	}
228 	/* Imprecise 4bsd adjtime() handling */
229 	if (timedelta != 0) {
230 		delta += tickdelta;
231 		timedelta -= tickdelta;
232 	}
233 
234 #ifdef notyet
235 	microset();
236 #endif
237 
238 	BUMPTIME(&time, delta);
239 	BUMPTIME(&mono_time, delta);
240 	time_second = time.tv_sec;
241 	time_uptime = mono_time.tv_sec;
242 
243 #ifdef CPU_CLOCKUPDATE
244 	CPU_CLOCKUPDATE();
245 #endif
246 
247 	/*
248 	 * Update real-time timeout queue.
249 	 * Process callouts at a very low cpu priority, so we don't keep the
250 	 * relatively high clock interrupt priority any longer than necessary.
251 	 */
252 	if (timeout_hardclock_update()) {
253 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
254 		softintr_schedule(softclock_si);
255 #else
256 		setsoftclock();
257 #endif
258 	}
259 }
260 
261 /*
262  * Compute number of hz until specified time.  Used to
263  * compute the second argument to timeout_add() from an absolute time.
264  */
265 int
266 hzto(tv)
267 	struct timeval *tv;
268 {
269 	struct timeval now;
270 	unsigned long ticks;
271 	long sec, usec;
272 
273 	/*
274 	 * If the number of usecs in the whole seconds part of the time
275 	 * difference fits in a long, then the total number of usecs will
276 	 * fit in an unsigned long.  Compute the total and convert it to
277 	 * ticks, rounding up and adding 1 to allow for the current tick
278 	 * to expire.  Rounding also depends on unsigned long arithmetic
279 	 * to avoid overflow.
280 	 *
281 	 * Otherwise, if the number of ticks in the whole seconds part of
282 	 * the time difference fits in a long, then convert the parts to
283 	 * ticks separately and add, using similar rounding methods and
284 	 * overflow avoidance.  This method would work in the previous
285 	 * case but it is slightly slower and assumes that hz is integral.
286 	 *
287 	 * Otherwise, round the time difference down to the maximum
288 	 * representable value.
289 	 *
290 	 * If ints have 32 bits, then the maximum value for any timeout in
291 	 * 10ms ticks is 248 days.
292 	 */
293 	getmicrotime(&now);
294 	sec = tv->tv_sec - now.tv_sec;
295 	usec = tv->tv_usec - now.tv_usec;
296 	if (usec < 0) {
297 		sec--;
298 		usec += 1000000;
299 	}
300 	if (sec < 0 || (sec == 0 && usec <= 0)) {
301 		ticks = 0;
302 	} else if (sec <= LONG_MAX / 1000000)
303 		ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
304 		    / tick + 1;
305 	else if (sec <= LONG_MAX / hz)
306 		ticks = sec * hz
307 		    + ((unsigned long)usec + (tick - 1)) / tick + 1;
308 	else
309 		ticks = LONG_MAX;
310 	if (ticks > INT_MAX)
311 		ticks = INT_MAX;
312 	return ((int)ticks);
313 }
314 
315 /*
316  * Compute number of hz in the specified amount of time.
317  */
318 int
319 tvtohz(struct timeval *tv)
320 {
321 	unsigned long ticks;
322 	long sec, usec;
323 
324 	/*
325 	 * If the number of usecs in the whole seconds part of the time
326 	 * fits in a long, then the total number of usecs will
327 	 * fit in an unsigned long.  Compute the total and convert it to
328 	 * ticks, rounding up and adding 1 to allow for the current tick
329 	 * to expire.  Rounding also depends on unsigned long arithmetic
330 	 * to avoid overflow.
331 	 *
332 	 * Otherwise, if the number of ticks in the whole seconds part of
333 	 * the time fits in a long, then convert the parts to
334 	 * ticks separately and add, using similar rounding methods and
335 	 * overflow avoidance.  This method would work in the previous
336 	 * case but it is slightly slower and assumes that hz is integral.
337 	 *
338 	 * Otherwise, round the time down to the maximum
339 	 * representable value.
340 	 *
341 	 * If ints have 32 bits, then the maximum value for any timeout in
342 	 * 10ms ticks is 248 days.
343 	 */
344 	sec = tv->tv_sec;
345 	usec = tv->tv_usec;
346 	if (sec < 0 || (sec == 0 && usec <= 0))
347 		ticks = 0;
348 	else if (sec <= LONG_MAX / 1000000)
349 		ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
350 		    / tick + 1;
351 	else if (sec <= LONG_MAX / hz)
352 		ticks = sec * hz
353 		    + ((unsigned long)usec + (tick - 1)) / tick + 1;
354 	else
355 		ticks = LONG_MAX;
356 	if (ticks > INT_MAX)
357 		ticks = INT_MAX;
358 	return ((int)ticks);
359 }
360 
361 /*
362  * Start profiling on a process.
363  *
364  * Kernel profiling passes proc0 which never exits and hence
365  * keeps the profile clock running constantly.
366  */
367 void
368 startprofclock(p)
369 	register struct proc *p;
370 {
371 	int s;
372 
373 	if ((p->p_flag & P_PROFIL) == 0) {
374 		p->p_flag |= P_PROFIL;
375 		if (++profprocs == 1 && stathz != 0) {
376 			s = splstatclock();
377 			psdiv = pscnt = psratio;
378 			setstatclockrate(profhz);
379 			splx(s);
380 		}
381 	}
382 }
383 
384 /*
385  * Stop profiling on a process.
386  */
387 void
388 stopprofclock(p)
389 	register struct proc *p;
390 {
391 	int s;
392 
393 	if (p->p_flag & P_PROFIL) {
394 		p->p_flag &= ~P_PROFIL;
395 		if (--profprocs == 0 && stathz != 0) {
396 			s = splstatclock();
397 			psdiv = pscnt = 1;
398 			setstatclockrate(stathz);
399 			splx(s);
400 		}
401 	}
402 }
403 
404 /*
405  * Statistics clock.  Grab profile sample, and if divider reaches 0,
406  * do process and kernel statistics.
407  */
408 void
409 statclock(struct clockframe *frame)
410 {
411 #ifdef GPROF
412 	struct gmonparam *g;
413 	int i;
414 #endif
415 #ifdef __HAVE_CPUINFO
416 	struct cpu_info *ci = curcpu();
417 	struct schedstate_percpu *spc = &ci->ci_schedstate;
418 #else
419 	static int schedclk;
420 #endif
421 	struct proc *p = curproc;
422 
423 #ifdef __HAVE_CPUINFO
424 	/*
425 	 * Notice changes in divisor frequency, and adjust clock
426 	 * frequency accordingly.
427 	 */
428 	if (spc->spc_psdiv != psdiv) {
429 		spc->spc_psdiv = psdiv;
430 		spc->spc_pscnt = psdiv;
431 		if (psdiv == 1) {
432 			setstatclockrate(stathz);
433 		} else {
434 			setstatclockrate(profhz);
435 		}
436 	}
437 
438 /* XXX Kludgey */
439 #define pscnt spc->spc_pscnt
440 #define cp_time spc->spc_cp_time
441 #endif
442 
443 	if (CLKF_USERMODE(frame)) {
444 		if (p->p_flag & P_PROFIL)
445 			addupc_intr(p, CLKF_PC(frame));
446 		if (--pscnt > 0)
447 			return;
448 		/*
449 		 * Came from user mode; CPU was in user state.
450 		 * If this process is being profiled record the tick.
451 		 */
452 		p->p_uticks++;
453 		if (p->p_nice > NZERO)
454 			cp_time[CP_NICE]++;
455 		else
456 			cp_time[CP_USER]++;
457 	} else {
458 #ifdef GPROF
459 		/*
460 		 * Kernel statistics are just like addupc_intr, only easier.
461 		 */
462 		g = &_gmonparam;
463 		if (g->state == GMON_PROF_ON) {
464 			i = CLKF_PC(frame) - g->lowpc;
465 			if (i < g->textsize) {
466 				i /= HISTFRACTION * sizeof(*g->kcount);
467 				g->kcount[i]++;
468 			}
469 		}
470 #endif
471 		if (--pscnt > 0)
472 			return;
473 		/*
474 		 * Came from kernel mode, so we were:
475 		 * - handling an interrupt,
476 		 * - doing syscall or trap work on behalf of the current
477 		 *   user process, or
478 		 * - spinning in the idle loop.
479 		 * Whichever it is, charge the time as appropriate.
480 		 * Note that we charge interrupts to the current process,
481 		 * regardless of whether they are ``for'' that process,
482 		 * so that we know how much of its real time was spent
483 		 * in ``non-process'' (i.e., interrupt) work.
484 		 */
485 		if (CLKF_INTR(frame)) {
486 			if (p != NULL)
487 				p->p_iticks++;
488 			cp_time[CP_INTR]++;
489 		} else if (p != NULL) {
490 			p->p_sticks++;
491 			cp_time[CP_SYS]++;
492 		} else
493 			cp_time[CP_IDLE]++;
494 	}
495 	pscnt = psdiv;
496 
497 #ifdef __HAVE_CPUINFO
498 #undef psdiv
499 #undef cp_time
500 #endif
501 
502 	if (p != NULL) {
503 		p->p_cpticks++;
504 		/*
505 		 * If no schedclock is provided, call it here at ~~12-25 Hz;
506 		 * ~~16 Hz is best
507 		 */
508 		if (schedhz == 0) {
509 #ifdef __HAVE_CPUINFO
510 			if ((++curcpu()->ci_schedstate.spc_schedticks & 3) ==
511 			    0)
512 				schedclock(p);
513 #else
514 			if ((++schedclk & 3) == 0)
515 				schedclock(p);
516 #endif
517 		}
518 	}
519 }
520 
521 /*
522  * Return information about system clocks.
523  */
524 int
525 sysctl_clockrate(where, sizep)
526 	register char *where;
527 	size_t *sizep;
528 {
529 	struct clockinfo clkinfo;
530 
531 	/*
532 	 * Construct clockinfo structure.
533 	 */
534 	clkinfo.tick = tick;
535 	clkinfo.tickadj = tickadj;
536 	clkinfo.hz = hz;
537 	clkinfo.profhz = profhz;
538 	clkinfo.stathz = stathz ? stathz : hz;
539 	return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
540 }
541 
542 /*
543  * Placeholders until everyone uses the timecounters code.
544  * Won't improve anything except maybe removing a bunch of bugs in fixed code.
545  */
546 
547 void
548 getmicrotime(struct timeval *tvp)
549 {
550 	int s;
551 
552 	s = splhigh();
553 	*tvp = time;
554 	splx(s);
555 }
556 
557 void
558 nanotime(struct timespec *tsp)
559 {
560 	struct timeval tv;
561 
562 	microtime(&tv);
563 	TIMEVAL_TO_TIMESPEC(&tv, tsp);
564 }
565 
566 void
567 getnanotime(struct timespec *tsp)
568 {
569 	struct timeval tv;
570 
571 	getmicrotime(&tv);
572 	TIMEVAL_TO_TIMESPEC(&tv, tsp);
573 }
574 
575 void
576 nanouptime(struct timespec *tsp)
577 {
578 	struct timeval tv;
579 
580 	microuptime(&tv);
581 	TIMEVAL_TO_TIMESPEC(&tv, tsp);
582 }
583 
584 
585 void
586 getnanouptime(struct timespec *tsp)
587 {
588 	struct timeval tv;
589 
590 	getmicrouptime(&tv);
591 	TIMEVAL_TO_TIMESPEC(&tv, tsp);
592 }
593 
594 void
595 microuptime(struct timeval *tvp)
596 {
597 	struct timeval tv;
598 
599 	microtime(&tv);
600 	timersub(&tv, &boottime, tvp);
601 }
602 
603 void
604 getmicrouptime(struct timeval *tvp)
605 {
606 	int s;
607 
608 	s = splhigh();
609 	*tvp = mono_time;
610 	splx(s);
611 }
612