xref: /openbsd-src/sys/kern/kern_timeout.c (revision 824adb5411e4389b29bae28eba5c2c2bbd147f34)
1 /*	$OpenBSD: kern_timeout.c,v 1.85 2021/06/19 02:05:33 cheloha Exp $	*/
2 /*
3  * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org>
4  * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
17  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
19  * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kthread.h>
31 #include <sys/proc.h>
32 #include <sys/timeout.h>
33 #include <sys/mutex.h>
34 #include <sys/kernel.h>
35 #include <sys/queue.h>			/* _Q_INVALIDATE */
36 #include <sys/sysctl.h>
37 #include <sys/witness.h>
38 
39 #ifdef DDB
40 #include <machine/db_machdep.h>
41 #include <ddb/db_interface.h>
42 #include <ddb/db_sym.h>
43 #include <ddb/db_output.h>
44 #endif
45 
46 #include "kcov.h"
47 #if NKCOV > 0
48 #include <sys/kcov.h>
49 #endif
50 
51 /*
52  * Locks used to protect global variables in this file:
53  *
54  *	I	immutable after initialization
55  *	T	timeout_mutex
56  */
57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH);
58 
59 void *softclock_si;			/* [I] softclock() interrupt handle */
60 struct timeoutstat tostat;		/* [T] statistics and totals */
61 
62 /*
63  * Timeouts are kept in a hierarchical timing wheel. The to_time is the value
64  * of the global variable "ticks" when the timeout should be called. There are
65  * four levels with 256 buckets each.
66  */
67 #define WHEELCOUNT 4
68 #define WHEELSIZE 256
69 #define WHEELMASK 255
70 #define WHEELBITS 8
71 #define BUCKETS (WHEELCOUNT * WHEELSIZE)
72 
73 struct circq timeout_wheel[BUCKETS];	/* [T] Tick-based timeouts */
74 struct circq timeout_wheel_kc[BUCKETS];	/* [T] Clock-based timeouts */
75 struct circq timeout_new;		/* [T] New, unscheduled timeouts */
76 struct circq timeout_todo;		/* [T] Due or needs rescheduling */
77 struct circq timeout_proc;		/* [T] Due + needs process context */
78 
79 time_t timeout_level_width[WHEELCOUNT];	/* [I] Wheel level width (seconds) */
80 struct timespec tick_ts;		/* [I] Length of a tick (1/hz secs) */
81 
82 struct kclock {
83 	struct timespec kc_lastscan;	/* [T] Clock time at last wheel scan */
84 	struct timespec kc_late;	/* [T] Late if due prior */
85 	struct timespec kc_offset;	/* [T] Offset from primary kclock */
86 } timeout_kclock[KCLOCK_MAX];
87 
88 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)
89 
90 #define BUCKET(rel, abs)						\
91     (timeout_wheel[							\
92 	((rel) <= (1 << (2*WHEELBITS)))					\
93 	    ? ((rel) <= (1 << WHEELBITS))				\
94 		? MASKWHEEL(0, (abs))					\
95 		: MASKWHEEL(1, (abs)) + WHEELSIZE			\
96 	    : ((rel) <= (1 << (3*WHEELBITS)))				\
97 		? MASKWHEEL(2, (abs)) + 2*WHEELSIZE			\
98 		: MASKWHEEL(3, (abs)) + 3*WHEELSIZE])
99 
100 #define MOVEBUCKET(wheel, time)						\
101     CIRCQ_CONCAT(&timeout_todo,						\
102         &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE])
103 
104 /*
105  * Circular queue definitions.
106  */
107 
108 #define CIRCQ_INIT(elem) do {			\
109 	(elem)->next = (elem);			\
110 	(elem)->prev = (elem);			\
111 } while (0)
112 
113 #define CIRCQ_INSERT_TAIL(list, elem) do {	\
114 	(elem)->prev = (list)->prev;		\
115 	(elem)->next = (list);			\
116 	(list)->prev->next = (elem);		\
117 	(list)->prev = (elem);			\
118 	tostat.tos_pending++;			\
119 } while (0)
120 
121 #define CIRCQ_CONCAT(fst, snd) do {		\
122 	if (!CIRCQ_EMPTY(snd)) {		\
123 		(fst)->prev->next = (snd)->next;\
124 		(snd)->next->prev = (fst)->prev;\
125 		(snd)->prev->next = (fst);      \
126 		(fst)->prev = (snd)->prev;      \
127 		CIRCQ_INIT(snd);		\
128 	}					\
129 } while (0)
130 
131 #define CIRCQ_REMOVE(elem) do {			\
132 	(elem)->next->prev = (elem)->prev;      \
133 	(elem)->prev->next = (elem)->next;      \
134 	_Q_INVALIDATE((elem)->prev);		\
135 	_Q_INVALIDATE((elem)->next);		\
136 	tostat.tos_pending--;			\
137 } while (0)
138 
139 #define CIRCQ_FIRST(elem) ((elem)->next)
140 
141 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem))
142 
143 #define CIRCQ_FOREACH(elem, list)		\
144 	for ((elem) = CIRCQ_FIRST(list);	\
145 	    (elem) != (list);			\
146 	    (elem) = CIRCQ_FIRST(elem))
147 
148 #ifdef WITNESS
149 struct lock_object timeout_sleeplock_obj = {
150 	.lo_name = "timeout",
151 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
152 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
153 };
154 struct lock_object timeout_spinlock_obj = {
155 	.lo_name = "timeout",
156 	.lo_flags = LO_WITNESS | LO_INITIALIZED |
157 	    (LO_CLASS_MUTEX << LO_CLASSSHIFT)
158 };
159 struct lock_type timeout_sleeplock_type = {
160 	.lt_name = "timeout"
161 };
162 struct lock_type timeout_spinlock_type = {
163 	.lt_name = "timeout"
164 };
165 #define TIMEOUT_LOCK_OBJ(needsproc) \
166 	((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj)
167 #endif
168 
169 void kclock_nanotime(int, struct timespec *);
170 void softclock(void *);
171 void softclock_create_thread(void *);
172 void softclock_process_kclock_timeout(struct timeout *, int);
173 void softclock_process_tick_timeout(struct timeout *, int);
174 void softclock_thread(void *);
175 void timeout_barrier_timeout(void *);
176 uint32_t timeout_bucket(const struct timeout *);
177 uint32_t timeout_maskwheel(uint32_t, const struct timespec *);
178 void timeout_run(struct timeout *);
179 
180 /*
181  * The first thing in a struct timeout is its struct circq, so we
182  * can get back from a pointer to the latter to a pointer to the
183  * whole timeout with just a cast.
184  */
185 static inline struct timeout *
186 timeout_from_circq(struct circq *p)
187 {
188 	return ((struct timeout *)(p));
189 }
190 
191 static inline void
192 timeout_sync_order(int needsproc)
193 {
194 	WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL);
195 }
196 
197 static inline void
198 timeout_sync_enter(int needsproc)
199 {
200 	timeout_sync_order(needsproc);
201 	WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
202 }
203 
204 static inline void
205 timeout_sync_leave(int needsproc)
206 {
207 	WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
208 }
209 
210 /*
211  * Some of the "math" in here is a bit tricky.
212  *
213  * We have to beware of wrapping ints.
214  * We use the fact that any element added to the queue must be added with a
215  * positive time. That means that any element `to' on the queue cannot be
216  * scheduled to timeout further in time than INT_MAX, but to->to_time can
217  * be positive or negative so comparing it with anything is dangerous.
218  * The only way we can use the to->to_time value in any predictable way
219  * is when we calculate how far in the future `to' will timeout -
220  * "to->to_time - ticks". The result will always be positive for future
221  * timeouts and 0 or negative for due timeouts.
222  */
223 
224 void
225 timeout_startup(void)
226 {
227 	int b, level;
228 
229 	CIRCQ_INIT(&timeout_new);
230 	CIRCQ_INIT(&timeout_todo);
231 	CIRCQ_INIT(&timeout_proc);
232 	for (b = 0; b < nitems(timeout_wheel); b++)
233 		CIRCQ_INIT(&timeout_wheel[b]);
234 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
235 		CIRCQ_INIT(&timeout_wheel_kc[b]);
236 
237 	for (level = 0; level < nitems(timeout_level_width); level++)
238 		timeout_level_width[level] = 2 << (level * WHEELBITS);
239 	NSEC_TO_TIMESPEC(tick_nsec, &tick_ts);
240 }
241 
242 void
243 timeout_proc_init(void)
244 {
245 	softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL);
246 	if (softclock_si == NULL)
247 		panic("%s: unable to register softclock interrupt", __func__);
248 
249 	WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type);
250 	WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type);
251 
252 	kthread_create_deferred(softclock_create_thread, NULL);
253 }
254 
255 static inline void
256 _timeout_set(struct timeout *to, void (*fn)(void *), void *arg, int kclock,
257     int flags)
258 {
259 	to->to_func = fn;
260 	to->to_arg = arg;
261 	to->to_kclock = kclock;
262 	to->to_flags = flags | TIMEOUT_INITIALIZED;
263 }
264 
265 void
266 timeout_set(struct timeout *new, void (*fn)(void *), void *arg)
267 {
268 	_timeout_set(new, fn, arg, KCLOCK_NONE, 0);
269 }
270 
271 void
272 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int flags)
273 {
274 	_timeout_set(to, fn, arg, KCLOCK_NONE, flags);
275 }
276 
277 void
278 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg)
279 {
280 	_timeout_set(new, fn, arg, KCLOCK_NONE, TIMEOUT_PROC);
281 }
282 
283 void
284 timeout_set_kclock(struct timeout *to, void (*fn)(void *), void *arg,
285     int kclock, int flags)
286 {
287 	_timeout_set(to, fn, arg, kclock, flags | TIMEOUT_KCLOCK);
288 }
289 
290 int
291 timeout_add(struct timeout *new, int to_ticks)
292 {
293 	int old_time;
294 	int ret = 1;
295 
296 	KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED));
297 	KASSERT(!ISSET(new->to_flags, TIMEOUT_KCLOCK));
298 	KASSERT(new->to_kclock == KCLOCK_NONE);
299 	KASSERT(to_ticks >= 0);
300 
301 	mtx_enter(&timeout_mutex);
302 
303 	/* Initialize the time here, it won't change. */
304 	old_time = new->to_time;
305 	new->to_time = to_ticks + ticks;
306 	CLR(new->to_flags, TIMEOUT_TRIGGERED);
307 
308 	/*
309 	 * If this timeout already is scheduled and now is moved
310 	 * earlier, reschedule it now. Otherwise leave it in place
311 	 * and let it be rescheduled later.
312 	 */
313 	if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) {
314 		if (new->to_time - ticks < old_time - ticks) {
315 			CIRCQ_REMOVE(&new->to_list);
316 			CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
317 		}
318 		tostat.tos_readded++;
319 		ret = 0;
320 	} else {
321 		SET(new->to_flags, TIMEOUT_ONQUEUE);
322 		CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
323 	}
324 #if NKCOV > 0
325 	new->to_process = curproc->p_p;
326 #endif
327 	tostat.tos_added++;
328 	mtx_leave(&timeout_mutex);
329 
330 	return ret;
331 }
332 
333 int
334 timeout_add_tv(struct timeout *to, const struct timeval *tv)
335 {
336 	uint64_t to_ticks;
337 
338 	to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick;
339 	if (to_ticks > INT_MAX)
340 		to_ticks = INT_MAX;
341 	if (to_ticks == 0 && tv->tv_usec > 0)
342 		to_ticks = 1;
343 
344 	return timeout_add(to, (int)to_ticks);
345 }
346 
347 int
348 timeout_add_sec(struct timeout *to, int secs)
349 {
350 	uint64_t to_ticks;
351 
352 	to_ticks = (uint64_t)hz * secs;
353 	if (to_ticks > INT_MAX)
354 		to_ticks = INT_MAX;
355 	if (to_ticks == 0)
356 		to_ticks = 1;
357 
358 	return timeout_add(to, (int)to_ticks);
359 }
360 
361 int
362 timeout_add_msec(struct timeout *to, int msecs)
363 {
364 	uint64_t to_ticks;
365 
366 	to_ticks = (uint64_t)msecs * 1000 / tick;
367 	if (to_ticks > INT_MAX)
368 		to_ticks = INT_MAX;
369 	if (to_ticks == 0 && msecs > 0)
370 		to_ticks = 1;
371 
372 	return timeout_add(to, (int)to_ticks);
373 }
374 
375 int
376 timeout_add_usec(struct timeout *to, int usecs)
377 {
378 	int to_ticks = usecs / tick;
379 
380 	if (to_ticks == 0 && usecs > 0)
381 		to_ticks = 1;
382 
383 	return timeout_add(to, to_ticks);
384 }
385 
386 int
387 timeout_add_nsec(struct timeout *to, int nsecs)
388 {
389 	int to_ticks = nsecs / (tick * 1000);
390 
391 	if (to_ticks == 0 && nsecs > 0)
392 		to_ticks = 1;
393 
394 	return timeout_add(to, to_ticks);
395 }
396 
397 int
398 timeout_at_ts(struct timeout *to, const struct timespec *abstime)
399 {
400 	struct timespec old_abstime;
401 	int ret = 1;
402 
403 	mtx_enter(&timeout_mutex);
404 
405 	KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED | TIMEOUT_KCLOCK));
406 	KASSERT(to->to_kclock != KCLOCK_NONE);
407 
408 	old_abstime = to->to_abstime;
409 	to->to_abstime = *abstime;
410 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
411 
412 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
413 		if (timespeccmp(abstime, &old_abstime, <)) {
414 			CIRCQ_REMOVE(&to->to_list);
415 			CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
416 		}
417 		tostat.tos_readded++;
418 		ret = 0;
419 	} else {
420 		SET(to->to_flags, TIMEOUT_ONQUEUE);
421 		CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
422 	}
423 #if NKCOV > 0
424 	to->to_process = curproc->p_p;
425 #endif
426 	tostat.tos_added++;
427 
428 	mtx_leave(&timeout_mutex);
429 
430 	return ret;
431 }
432 
433 int
434 timeout_in_nsec(struct timeout *to, uint64_t nsecs)
435 {
436 	struct timespec abstime, interval, now;
437 
438 	kclock_nanotime(to->to_kclock, &now);
439 	NSEC_TO_TIMESPEC(nsecs, &interval);
440 	timespecadd(&now, &interval, &abstime);
441 
442 	return timeout_at_ts(to, &abstime);
443 }
444 
445 void
446 kclock_nanotime(int kclock, struct timespec *now)
447 {
448 	switch (kclock) {
449 	case KCLOCK_UPTIME:
450 		nanouptime(now);
451 		break;
452 	default:
453 		panic("invalid kclock: 0x%x", kclock);
454 	}
455 }
456 
457 int
458 timeout_del(struct timeout *to)
459 {
460 	int ret = 0;
461 
462 	mtx_enter(&timeout_mutex);
463 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
464 		CIRCQ_REMOVE(&to->to_list);
465 		CLR(to->to_flags, TIMEOUT_ONQUEUE);
466 		tostat.tos_cancelled++;
467 		ret = 1;
468 	}
469 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
470 	tostat.tos_deleted++;
471 	mtx_leave(&timeout_mutex);
472 
473 	return ret;
474 }
475 
476 int
477 timeout_del_barrier(struct timeout *to)
478 {
479 	int removed;
480 
481 	timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC));
482 
483 	removed = timeout_del(to);
484 	if (!removed)
485 		timeout_barrier(to);
486 
487 	return removed;
488 }
489 
490 void
491 timeout_barrier(struct timeout *to)
492 {
493 	struct timeout barrier;
494 	struct cond c;
495 	int procflag;
496 
497 	procflag = (to->to_flags & TIMEOUT_PROC);
498 	timeout_sync_order(procflag);
499 
500 	timeout_set_flags(&barrier, timeout_barrier_timeout, &c, procflag);
501 	barrier.to_process = curproc->p_p;
502 	cond_init(&c);
503 
504 	mtx_enter(&timeout_mutex);
505 
506 	barrier.to_time = ticks;
507 	SET(barrier.to_flags, TIMEOUT_ONQUEUE);
508 	if (procflag)
509 		CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list);
510 	else
511 		CIRCQ_INSERT_TAIL(&timeout_todo, &barrier.to_list);
512 
513 	mtx_leave(&timeout_mutex);
514 
515 	if (procflag)
516 		wakeup_one(&timeout_proc);
517 	else
518 		softintr_schedule(softclock_si);
519 
520 	cond_wait(&c, "tmobar");
521 }
522 
523 void
524 timeout_barrier_timeout(void *arg)
525 {
526 	struct cond *c = arg;
527 
528 	cond_signal(c);
529 }
530 
531 uint32_t
532 timeout_bucket(const struct timeout *to)
533 {
534 	struct kclock *kc = &timeout_kclock[to->to_kclock];
535 	struct timespec diff, shifted_abstime;
536 	uint32_t level;
537 
538 	KASSERT(ISSET(to->to_flags, TIMEOUT_KCLOCK));
539 	KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <));
540 
541 	timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff);
542 	for (level = 0; level < nitems(timeout_level_width) - 1; level++) {
543 		if (diff.tv_sec < timeout_level_width[level])
544 			break;
545 	}
546 	timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime);
547 	return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime);
548 }
549 
550 /*
551  * Hash the absolute time into a bucket on a given level of the wheel.
552  *
553  * The complete hash is 32 bits.  The upper 25 bits are seconds, the
554  * lower 7 bits are nanoseconds.  tv_nsec is a positive value less
555  * than one billion so we need to divide it to isolate the desired
556  * bits.  We can't just shift it.
557  *
558  * The level is used to isolate an 8-bit portion of the hash.  The
559  * resulting number indicates which bucket the absolute time belongs
560  * in on the given level of the wheel.
561  */
562 uint32_t
563 timeout_maskwheel(uint32_t level, const struct timespec *abstime)
564 {
565 	uint32_t hi, lo;
566 
567  	hi = abstime->tv_sec << 7;
568 	lo = abstime->tv_nsec / 7812500;
569 
570 	return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK;
571 }
572 
573 /*
574  * This is called from hardclock() on the primary CPU at the start of
575  * every tick.
576  */
577 void
578 timeout_hardclock_update(void)
579 {
580 	struct timespec elapsed, now;
581 	struct kclock *kc;
582 	struct timespec *lastscan;
583 	int b, done, first, i, last, level, need_softclock, off;
584 
585 	nanouptime(&now);
586 	lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan;
587 	timespecsub(&now, lastscan, &elapsed);
588 	need_softclock = 1;
589 
590 	mtx_enter(&timeout_mutex);
591 
592 	MOVEBUCKET(0, ticks);
593 	if (MASKWHEEL(0, ticks) == 0) {
594 		MOVEBUCKET(1, ticks);
595 		if (MASKWHEEL(1, ticks) == 0) {
596 			MOVEBUCKET(2, ticks);
597 			if (MASKWHEEL(2, ticks) == 0)
598 				MOVEBUCKET(3, ticks);
599 		}
600 	}
601 
602 	/*
603 	 * Dump the buckets that expired while we were away.
604 	 *
605 	 * If the elapsed time has exceeded a level's limit then we need
606 	 * to dump every bucket in the level.  We have necessarily completed
607 	 * a lap of that level, too, so we need to process buckets in the
608 	 * next level.
609 	 *
610 	 * Otherwise we need to compare indices: if the index of the first
611 	 * expired bucket is greater than that of the last then we have
612 	 * completed a lap of the level and need to process buckets in the
613 	 * next level.
614 	 */
615 	for (level = 0; level < nitems(timeout_level_width); level++) {
616 		first = timeout_maskwheel(level, lastscan);
617 		if (elapsed.tv_sec >= timeout_level_width[level]) {
618 			last = (first == 0) ? WHEELSIZE - 1 : first - 1;
619 			done = 0;
620 		} else {
621 			last = timeout_maskwheel(level, &now);
622 			done = first <= last;
623 		}
624 		off = level * WHEELSIZE;
625 		for (b = first;; b = (b + 1) % WHEELSIZE) {
626 			CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]);
627 			if (b == last)
628 				break;
629 		}
630 		if (done)
631 			break;
632 	}
633 
634 	/*
635 	 * Update the cached state for each kclock.
636 	 */
637 	for (i = 0; i < nitems(timeout_kclock); i++) {
638 		kc = &timeout_kclock[i];
639 		timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan);
640 		timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late);
641 	}
642 
643 	if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo))
644 		need_softclock = 0;
645 
646 	mtx_leave(&timeout_mutex);
647 
648 	if (need_softclock)
649 		softintr_schedule(softclock_si);
650 }
651 
652 void
653 timeout_run(struct timeout *to)
654 {
655 	void (*fn)(void *);
656 	void *arg;
657 	int needsproc;
658 
659 	MUTEX_ASSERT_LOCKED(&timeout_mutex);
660 
661 	CLR(to->to_flags, TIMEOUT_ONQUEUE);
662 	SET(to->to_flags, TIMEOUT_TRIGGERED);
663 
664 	fn = to->to_func;
665 	arg = to->to_arg;
666 	needsproc = ISSET(to->to_flags, TIMEOUT_PROC);
667 #if NKCOV > 0
668 	struct process *kcov_process = to->to_process;
669 #endif
670 
671 	mtx_leave(&timeout_mutex);
672 	timeout_sync_enter(needsproc);
673 #if NKCOV > 0
674 	kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process);
675 #endif
676 	fn(arg);
677 #if NKCOV > 0
678 	kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process);
679 #endif
680 	timeout_sync_leave(needsproc);
681 	mtx_enter(&timeout_mutex);
682 }
683 
684 void
685 softclock_process_kclock_timeout(struct timeout *to, int new)
686 {
687 	struct kclock *kc = &timeout_kclock[to->to_kclock];
688 
689 	if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) {
690 		tostat.tos_scheduled++;
691 		if (!new)
692 			tostat.tos_rescheduled++;
693 		CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)],
694 		    &to->to_list);
695 		return;
696 	}
697 	if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=))
698 		tostat.tos_late++;
699 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
700 		CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
701 		return;
702 	}
703 	timeout_run(to);
704 	tostat.tos_run_softclock++;
705 }
706 
707 void
708 softclock_process_tick_timeout(struct timeout *to, int new)
709 {
710 	int delta = to->to_time - ticks;
711 
712 	if (delta > 0) {
713 		tostat.tos_scheduled++;
714 		if (!new)
715 			tostat.tos_rescheduled++;
716 		CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list);
717 		return;
718 	}
719 	if (!new && delta < 0)
720 		tostat.tos_late++;
721 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
722 		CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
723 		return;
724 	}
725 	timeout_run(to);
726 	tostat.tos_run_softclock++;
727 }
728 
729 /*
730  * Timeouts are processed here instead of timeout_hardclock_update()
731  * to avoid doing any more work at IPL_CLOCK than absolutely necessary.
732  * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly
733  * so the system remains responsive even if there is a surge of timeouts.
734  */
735 void
736 softclock(void *arg)
737 {
738 	struct timeout *first_new, *to;
739 	int needsproc, new;
740 
741 	first_new = NULL;
742 	new = 0;
743 
744 	mtx_enter(&timeout_mutex);
745 	if (!CIRCQ_EMPTY(&timeout_new))
746 		first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new));
747 	CIRCQ_CONCAT(&timeout_todo, &timeout_new);
748 	while (!CIRCQ_EMPTY(&timeout_todo)) {
749 		to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo));
750 		CIRCQ_REMOVE(&to->to_list);
751 		if (to == first_new)
752 			new = 1;
753 		if (ISSET(to->to_flags, TIMEOUT_KCLOCK))
754 			softclock_process_kclock_timeout(to, new);
755 		else
756 			softclock_process_tick_timeout(to, new);
757 	}
758 	tostat.tos_softclocks++;
759 	needsproc = !CIRCQ_EMPTY(&timeout_proc);
760 	mtx_leave(&timeout_mutex);
761 
762 	if (needsproc)
763 		wakeup(&timeout_proc);
764 }
765 
766 void
767 softclock_create_thread(void *arg)
768 {
769 	if (kthread_create(softclock_thread, NULL, NULL, "softclock"))
770 		panic("fork softclock");
771 }
772 
773 void
774 softclock_thread(void *arg)
775 {
776 	CPU_INFO_ITERATOR cii;
777 	struct cpu_info *ci;
778 	struct sleep_state sls;
779 	struct timeout *to;
780 	int s;
781 
782 	KERNEL_ASSERT_LOCKED();
783 
784 	/* Be conservative for the moment */
785 	CPU_INFO_FOREACH(cii, ci) {
786 		if (CPU_IS_PRIMARY(ci))
787 			break;
788 	}
789 	KASSERT(ci != NULL);
790 	sched_peg_curproc(ci);
791 
792 	s = splsoftclock();
793 	for (;;) {
794 		sleep_setup(&sls, &timeout_proc, PSWP, "bored", 0);
795 		sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc));
796 
797 		mtx_enter(&timeout_mutex);
798 		while (!CIRCQ_EMPTY(&timeout_proc)) {
799 			to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
800 			CIRCQ_REMOVE(&to->to_list);
801 			timeout_run(to);
802 			tostat.tos_run_thread++;
803 		}
804 		tostat.tos_thread_wakeups++;
805 		mtx_leave(&timeout_mutex);
806 	}
807 	splx(s);
808 }
809 
810 #ifndef SMALL_KERNEL
811 void
812 timeout_adjust_ticks(int adj)
813 {
814 	struct timeout *to;
815 	struct circq *p;
816 	int new_ticks, b;
817 
818 	/* adjusting the monotonic clock backwards would be a Bad Thing */
819 	if (adj <= 0)
820 		return;
821 
822 	mtx_enter(&timeout_mutex);
823 	new_ticks = ticks + adj;
824 	for (b = 0; b < nitems(timeout_wheel); b++) {
825 		p = CIRCQ_FIRST(&timeout_wheel[b]);
826 		while (p != &timeout_wheel[b]) {
827 			to = timeout_from_circq(p);
828 			p = CIRCQ_FIRST(p);
829 
830 			/* when moving a timeout forward need to reinsert it */
831 			if (to->to_time - ticks < adj)
832 				to->to_time = new_ticks;
833 			CIRCQ_REMOVE(&to->to_list);
834 			CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list);
835 		}
836 	}
837 	ticks = new_ticks;
838 	mtx_leave(&timeout_mutex);
839 }
840 #endif
841 
842 int
843 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
844 {
845 	struct timeoutstat status;
846 
847 	mtx_enter(&timeout_mutex);
848 	memcpy(&status, &tostat, sizeof(status));
849 	mtx_leave(&timeout_mutex);
850 
851 	return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status));
852 }
853 
854 #ifdef DDB
855 const char *db_kclock(int);
856 void db_show_callout_bucket(struct circq *);
857 void db_show_timeout(struct timeout *, struct circq *);
858 const char *db_timespec(const struct timespec *);
859 
860 const char *
861 db_kclock(int kclock)
862 {
863 	switch (kclock) {
864 	case KCLOCK_UPTIME:
865 		return "uptime";
866 	default:
867 		return "invalid";
868 	}
869 }
870 
871 const char *
872 db_timespec(const struct timespec *ts)
873 {
874 	static char buf[32];
875 	struct timespec tmp, zero;
876 
877 	if (ts->tv_sec >= 0) {
878 		snprintf(buf, sizeof(buf), "%lld.%09ld",
879 		    ts->tv_sec, ts->tv_nsec);
880 		return buf;
881 	}
882 
883 	timespecclear(&zero);
884 	timespecsub(&zero, ts, &tmp);
885 	snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec);
886 	return buf;
887 }
888 
889 void
890 db_show_callout_bucket(struct circq *bucket)
891 {
892 	struct circq *p;
893 
894 	CIRCQ_FOREACH(p, bucket)
895 		db_show_timeout(timeout_from_circq(p), bucket);
896 }
897 
898 void
899 db_show_timeout(struct timeout *to, struct circq *bucket)
900 {
901 	struct timespec remaining;
902 	struct kclock *kc;
903 	char buf[8];
904 	db_expr_t offset;
905 	struct circq *wheel;
906 	char *name, *where;
907 	int width = sizeof(long) * 2;
908 
909 	db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset);
910 	name = name ? name : "?";
911 	if (bucket == &timeout_new)
912 		where = "new";
913 	else if (bucket == &timeout_todo)
914 		where = "softint";
915 	else if (bucket == &timeout_proc)
916 		where = "thread";
917 	else {
918 		if (ISSET(to->to_flags, TIMEOUT_KCLOCK))
919 			wheel = timeout_wheel_kc;
920 		else
921 			wheel = timeout_wheel;
922 		snprintf(buf, sizeof(buf), "%3ld/%1ld",
923 		    (bucket - wheel) % WHEELSIZE,
924 		    (bucket - wheel) / WHEELSIZE);
925 		where = buf;
926 	}
927 	if (ISSET(to->to_flags, TIMEOUT_KCLOCK)) {
928 		kc = &timeout_kclock[to->to_kclock];
929 		timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining);
930 		db_printf("%20s  %8s  %7s  0x%0*lx  %s\n",
931 		    db_timespec(&remaining), db_kclock(to->to_kclock), where,
932 		    width, (ulong)to->to_arg, name);
933 	} else {
934 		db_printf("%20d  %8s  %7s  0x%0*lx  %s\n",
935 		    to->to_time - ticks, "ticks", where,
936 		    width, (ulong)to->to_arg, name);
937 	}
938 }
939 
940 void
941 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif)
942 {
943 	struct kclock *kc;
944 	int width = sizeof(long) * 2 + 2;
945 	int b, i;
946 
947 	db_printf("%20s  %8s\n", "lastscan", "clock");
948 	db_printf("%20d  %8s\n", ticks, "ticks");
949 	for (i = 0; i < nitems(timeout_kclock); i++) {
950 		kc = &timeout_kclock[i];
951 		db_printf("%20s  %8s\n",
952 		    db_timespec(&kc->kc_lastscan), db_kclock(i));
953 	}
954 	db_printf("\n");
955 	db_printf("%20s  %8s  %7s  %*s  %s\n",
956 	    "remaining", "clock", "wheel", width, "arg", "func");
957 	db_show_callout_bucket(&timeout_new);
958 	db_show_callout_bucket(&timeout_todo);
959 	db_show_callout_bucket(&timeout_proc);
960 	for (b = 0; b < nitems(timeout_wheel); b++)
961 		db_show_callout_bucket(&timeout_wheel[b]);
962 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
963 		db_show_callout_bucket(&timeout_wheel_kc[b]);
964 }
965 #endif
966