xref: /openbsd-src/sys/kern/kern_timeout.c (revision 94358d69ee05fa503294e6438e1b1bbf60aa9d02)
1 /*	$OpenBSD: kern_timeout.c,v 1.95 2023/07/29 06:52:08 anton Exp $	*/
2 /*
3  * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org>
4  * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
17  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
19  * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kthread.h>
31 #include <sys/proc.h>
32 #include <sys/timeout.h>
33 #include <sys/mutex.h>
34 #include <sys/kernel.h>
35 #include <sys/queue.h>			/* _Q_INVALIDATE */
36 #include <sys/sysctl.h>
37 #include <sys/witness.h>
38 
39 #ifdef DDB
40 #include <machine/db_machdep.h>
41 #include <ddb/db_interface.h>
42 #include <ddb/db_sym.h>
43 #include <ddb/db_output.h>
44 #endif
45 
46 #include "kcov.h"
47 #if NKCOV > 0
48 #include <sys/kcov.h>
49 #endif
50 
51 /*
52  * Locks used to protect global variables in this file:
53  *
54  *	I	immutable after initialization
55  *	T	timeout_mutex
56  */
57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH);
58 
59 void *softclock_si;			/* [I] softclock() interrupt handle */
60 struct timeoutstat tostat;		/* [T] statistics and totals */
61 
62 /*
63  * Timeouts are kept in a hierarchical timing wheel. The to_time is the value
64  * of the global variable "ticks" when the timeout should be called. There are
65  * four levels with 256 buckets each.
66  */
67 #define WHEELCOUNT 4
68 #define WHEELSIZE 256
69 #define WHEELMASK 255
70 #define WHEELBITS 8
71 #define BUCKETS (WHEELCOUNT * WHEELSIZE)
72 
73 struct circq timeout_wheel[BUCKETS];	/* [T] Tick-based timeouts */
74 struct circq timeout_wheel_kc[BUCKETS];	/* [T] Clock-based timeouts */
75 struct circq timeout_new;		/* [T] New, unscheduled timeouts */
76 struct circq timeout_todo;		/* [T] Due or needs rescheduling */
77 struct circq timeout_proc;		/* [T] Due + needs process context */
78 
79 time_t timeout_level_width[WHEELCOUNT];	/* [I] Wheel level width (seconds) */
80 struct timespec tick_ts;		/* [I] Length of a tick (1/hz secs) */
81 
82 struct kclock {
83 	struct timespec kc_lastscan;	/* [T] Clock time at last wheel scan */
84 	struct timespec kc_late;	/* [T] Late if due prior */
85 	struct timespec kc_offset;	/* [T] Offset from primary kclock */
86 } timeout_kclock[KCLOCK_MAX];
87 
88 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)
89 
90 #define BUCKET(rel, abs)						\
91     (timeout_wheel[							\
92 	((rel) <= (1 << (2*WHEELBITS)))					\
93 	    ? ((rel) <= (1 << WHEELBITS))				\
94 		? MASKWHEEL(0, (abs))					\
95 		: MASKWHEEL(1, (abs)) + WHEELSIZE			\
96 	    : ((rel) <= (1 << (3*WHEELBITS)))				\
97 		? MASKWHEEL(2, (abs)) + 2*WHEELSIZE			\
98 		: MASKWHEEL(3, (abs)) + 3*WHEELSIZE])
99 
100 #define MOVEBUCKET(wheel, time)						\
101     CIRCQ_CONCAT(&timeout_todo,						\
102         &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE])
103 
104 /*
105  * Circular queue definitions.
106  */
107 
108 #define CIRCQ_INIT(elem) do {			\
109 	(elem)->next = (elem);			\
110 	(elem)->prev = (elem);			\
111 } while (0)
112 
113 #define CIRCQ_INSERT_TAIL(list, elem) do {	\
114 	(elem)->prev = (list)->prev;		\
115 	(elem)->next = (list);			\
116 	(list)->prev->next = (elem);		\
117 	(list)->prev = (elem);			\
118 	tostat.tos_pending++;			\
119 } while (0)
120 
121 #define CIRCQ_CONCAT(fst, snd) do {		\
122 	if (!CIRCQ_EMPTY(snd)) {		\
123 		(fst)->prev->next = (snd)->next;\
124 		(snd)->next->prev = (fst)->prev;\
125 		(snd)->prev->next = (fst);      \
126 		(fst)->prev = (snd)->prev;      \
127 		CIRCQ_INIT(snd);		\
128 	}					\
129 } while (0)
130 
131 #define CIRCQ_REMOVE(elem) do {			\
132 	(elem)->next->prev = (elem)->prev;      \
133 	(elem)->prev->next = (elem)->next;      \
134 	_Q_INVALIDATE((elem)->prev);		\
135 	_Q_INVALIDATE((elem)->next);		\
136 	tostat.tos_pending--;			\
137 } while (0)
138 
139 #define CIRCQ_FIRST(elem) ((elem)->next)
140 
141 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem))
142 
143 #define CIRCQ_FOREACH(elem, list)		\
144 	for ((elem) = CIRCQ_FIRST(list);	\
145 	    (elem) != (list);			\
146 	    (elem) = CIRCQ_FIRST(elem))
147 
148 #ifdef WITNESS
149 struct lock_object timeout_sleeplock_obj = {
150 	.lo_name = "timeout",
151 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
152 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
153 };
154 struct lock_object timeout_spinlock_obj = {
155 	.lo_name = "timeout",
156 	.lo_flags = LO_WITNESS | LO_INITIALIZED |
157 	    (LO_CLASS_MUTEX << LO_CLASSSHIFT)
158 };
159 struct lock_type timeout_sleeplock_type = {
160 	.lt_name = "timeout"
161 };
162 struct lock_type timeout_spinlock_type = {
163 	.lt_name = "timeout"
164 };
165 #define TIMEOUT_LOCK_OBJ(needsproc) \
166 	((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj)
167 #endif
168 
169 void softclock(void *);
170 void softclock_create_thread(void *);
171 void softclock_process_kclock_timeout(struct timeout *, int);
172 void softclock_process_tick_timeout(struct timeout *, int);
173 void softclock_thread(void *);
174 void timeout_barrier_timeout(void *);
175 uint32_t timeout_bucket(const struct timeout *);
176 uint32_t timeout_maskwheel(uint32_t, const struct timespec *);
177 void timeout_run(struct timeout *);
178 
179 /*
180  * The first thing in a struct timeout is its struct circq, so we
181  * can get back from a pointer to the latter to a pointer to the
182  * whole timeout with just a cast.
183  */
184 static inline struct timeout *
185 timeout_from_circq(struct circq *p)
186 {
187 	return ((struct timeout *)(p));
188 }
189 
190 static inline void
191 timeout_sync_order(int needsproc)
192 {
193 	WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL);
194 }
195 
196 static inline void
197 timeout_sync_enter(int needsproc)
198 {
199 	timeout_sync_order(needsproc);
200 	WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
201 }
202 
203 static inline void
204 timeout_sync_leave(int needsproc)
205 {
206 	WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
207 }
208 
209 /*
210  * Some of the "math" in here is a bit tricky.
211  *
212  * We have to beware of wrapping ints.
213  * We use the fact that any element added to the queue must be added with a
214  * positive time. That means that any element `to' on the queue cannot be
215  * scheduled to timeout further in time than INT_MAX, but to->to_time can
216  * be positive or negative so comparing it with anything is dangerous.
217  * The only way we can use the to->to_time value in any predictable way
218  * is when we calculate how far in the future `to' will timeout -
219  * "to->to_time - ticks". The result will always be positive for future
220  * timeouts and 0 or negative for due timeouts.
221  */
222 
223 void
224 timeout_startup(void)
225 {
226 	int b, level;
227 
228 	CIRCQ_INIT(&timeout_new);
229 	CIRCQ_INIT(&timeout_todo);
230 	CIRCQ_INIT(&timeout_proc);
231 	for (b = 0; b < nitems(timeout_wheel); b++)
232 		CIRCQ_INIT(&timeout_wheel[b]);
233 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
234 		CIRCQ_INIT(&timeout_wheel_kc[b]);
235 
236 	for (level = 0; level < nitems(timeout_level_width); level++)
237 		timeout_level_width[level] = 2 << (level * WHEELBITS);
238 	NSEC_TO_TIMESPEC(tick_nsec, &tick_ts);
239 }
240 
241 void
242 timeout_proc_init(void)
243 {
244 	softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL);
245 	if (softclock_si == NULL)
246 		panic("%s: unable to register softclock interrupt", __func__);
247 
248 	WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type);
249 	WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type);
250 
251 	kthread_create_deferred(softclock_create_thread, NULL);
252 }
253 
254 void
255 timeout_set(struct timeout *new, void (*fn)(void *), void *arg)
256 {
257 	timeout_set_flags(new, fn, arg, KCLOCK_NONE, 0);
258 }
259 
260 void
261 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int kclock,
262     int flags)
263 {
264 	to->to_func = fn;
265 	to->to_arg = arg;
266 	to->to_kclock = kclock;
267 	to->to_flags = flags | TIMEOUT_INITIALIZED;
268 }
269 
270 void
271 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg)
272 {
273 	timeout_set_flags(new, fn, arg, KCLOCK_NONE, TIMEOUT_PROC);
274 }
275 
276 int
277 timeout_add(struct timeout *new, int to_ticks)
278 {
279 	int old_time;
280 	int ret = 1;
281 
282 	KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED));
283 	KASSERT(new->to_kclock == KCLOCK_NONE);
284 	KASSERT(to_ticks >= 0);
285 
286 	mtx_enter(&timeout_mutex);
287 
288 	/* Initialize the time here, it won't change. */
289 	old_time = new->to_time;
290 	new->to_time = to_ticks + ticks;
291 	CLR(new->to_flags, TIMEOUT_TRIGGERED);
292 
293 	/*
294 	 * If this timeout already is scheduled and now is moved
295 	 * earlier, reschedule it now. Otherwise leave it in place
296 	 * and let it be rescheduled later.
297 	 */
298 	if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) {
299 		if (new->to_time - ticks < old_time - ticks) {
300 			CIRCQ_REMOVE(&new->to_list);
301 			CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
302 		}
303 		tostat.tos_readded++;
304 		ret = 0;
305 	} else {
306 		SET(new->to_flags, TIMEOUT_ONQUEUE);
307 		CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
308 	}
309 #if NKCOV > 0
310 	if (!kcov_cold)
311 		new->to_process = curproc->p_p;
312 #endif
313 	tostat.tos_added++;
314 	mtx_leave(&timeout_mutex);
315 
316 	return ret;
317 }
318 
319 int
320 timeout_add_tv(struct timeout *to, const struct timeval *tv)
321 {
322 	uint64_t to_ticks;
323 
324 	to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick;
325 	if (to_ticks > INT_MAX)
326 		to_ticks = INT_MAX;
327 	if (to_ticks == 0 && tv->tv_usec > 0)
328 		to_ticks = 1;
329 
330 	return timeout_add(to, (int)to_ticks);
331 }
332 
333 int
334 timeout_add_sec(struct timeout *to, int secs)
335 {
336 	uint64_t to_ticks;
337 
338 	to_ticks = (uint64_t)hz * secs;
339 	if (to_ticks > INT_MAX)
340 		to_ticks = INT_MAX;
341 	if (to_ticks == 0)
342 		to_ticks = 1;
343 
344 	return timeout_add(to, (int)to_ticks);
345 }
346 
347 int
348 timeout_add_msec(struct timeout *to, int msecs)
349 {
350 	uint64_t to_ticks;
351 
352 	to_ticks = (uint64_t)msecs * 1000 / tick;
353 	if (to_ticks > INT_MAX)
354 		to_ticks = INT_MAX;
355 	if (to_ticks == 0 && msecs > 0)
356 		to_ticks = 1;
357 
358 	return timeout_add(to, (int)to_ticks);
359 }
360 
361 int
362 timeout_add_usec(struct timeout *to, int usecs)
363 {
364 	int to_ticks = usecs / tick;
365 
366 	if (to_ticks == 0 && usecs > 0)
367 		to_ticks = 1;
368 
369 	return timeout_add(to, to_ticks);
370 }
371 
372 int
373 timeout_add_nsec(struct timeout *to, int nsecs)
374 {
375 	int to_ticks = nsecs / (tick * 1000);
376 
377 	if (to_ticks == 0 && nsecs > 0)
378 		to_ticks = 1;
379 
380 	return timeout_add(to, to_ticks);
381 }
382 
383 int
384 timeout_abs_ts(struct timeout *to, const struct timespec *abstime)
385 {
386 	struct timespec old_abstime;
387 	int ret = 1;
388 
389 	mtx_enter(&timeout_mutex);
390 
391 	KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED));
392 	KASSERT(to->to_kclock != KCLOCK_NONE);
393 
394 	old_abstime = to->to_abstime;
395 	to->to_abstime = *abstime;
396 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
397 
398 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
399 		if (timespeccmp(abstime, &old_abstime, <)) {
400 			CIRCQ_REMOVE(&to->to_list);
401 			CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
402 		}
403 		tostat.tos_readded++;
404 		ret = 0;
405 	} else {
406 		SET(to->to_flags, TIMEOUT_ONQUEUE);
407 		CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
408 	}
409 #if NKCOV > 0
410 	if (!kcov_cold)
411 		to->to_process = curproc->p_p;
412 #endif
413 	tostat.tos_added++;
414 
415 	mtx_leave(&timeout_mutex);
416 
417 	return ret;
418 }
419 
420 int
421 timeout_del(struct timeout *to)
422 {
423 	int ret = 0;
424 
425 	mtx_enter(&timeout_mutex);
426 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
427 		CIRCQ_REMOVE(&to->to_list);
428 		CLR(to->to_flags, TIMEOUT_ONQUEUE);
429 		tostat.tos_cancelled++;
430 		ret = 1;
431 	}
432 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
433 	tostat.tos_deleted++;
434 	mtx_leave(&timeout_mutex);
435 
436 	return ret;
437 }
438 
439 int
440 timeout_del_barrier(struct timeout *to)
441 {
442 	int removed;
443 
444 	timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC));
445 
446 	removed = timeout_del(to);
447 	if (!removed)
448 		timeout_barrier(to);
449 
450 	return removed;
451 }
452 
453 void
454 timeout_barrier(struct timeout *to)
455 {
456 	struct timeout barrier;
457 	struct cond c;
458 	int procflag;
459 
460 	procflag = (to->to_flags & TIMEOUT_PROC);
461 	timeout_sync_order(procflag);
462 
463 	timeout_set_flags(&barrier, timeout_barrier_timeout, &c, KCLOCK_NONE,
464 	    procflag);
465 	barrier.to_process = curproc->p_p;
466 	cond_init(&c);
467 
468 	mtx_enter(&timeout_mutex);
469 
470 	barrier.to_time = ticks;
471 	SET(barrier.to_flags, TIMEOUT_ONQUEUE);
472 	if (procflag)
473 		CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list);
474 	else
475 		CIRCQ_INSERT_TAIL(&timeout_todo, &barrier.to_list);
476 
477 	mtx_leave(&timeout_mutex);
478 
479 	if (procflag)
480 		wakeup_one(&timeout_proc);
481 	else
482 		softintr_schedule(softclock_si);
483 
484 	cond_wait(&c, "tmobar");
485 }
486 
487 void
488 timeout_barrier_timeout(void *arg)
489 {
490 	struct cond *c = arg;
491 
492 	cond_signal(c);
493 }
494 
495 uint32_t
496 timeout_bucket(const struct timeout *to)
497 {
498 	struct timespec diff, shifted_abstime;
499 	struct kclock *kc;
500 	uint32_t level;
501 
502 	KASSERT(to->to_kclock == KCLOCK_UPTIME);
503 	kc = &timeout_kclock[to->to_kclock];
504 
505 	KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <));
506 	timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff);
507 	for (level = 0; level < nitems(timeout_level_width) - 1; level++) {
508 		if (diff.tv_sec < timeout_level_width[level])
509 			break;
510 	}
511 	timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime);
512 	return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime);
513 }
514 
515 /*
516  * Hash the absolute time into a bucket on a given level of the wheel.
517  *
518  * The complete hash is 32 bits.  The upper 25 bits are seconds, the
519  * lower 7 bits are nanoseconds.  tv_nsec is a positive value less
520  * than one billion so we need to divide it to isolate the desired
521  * bits.  We can't just shift it.
522  *
523  * The level is used to isolate an 8-bit portion of the hash.  The
524  * resulting number indicates which bucket the absolute time belongs
525  * in on the given level of the wheel.
526  */
527 uint32_t
528 timeout_maskwheel(uint32_t level, const struct timespec *abstime)
529 {
530 	uint32_t hi, lo;
531 
532  	hi = abstime->tv_sec << 7;
533 	lo = abstime->tv_nsec / 7812500;
534 
535 	return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK;
536 }
537 
538 /*
539  * This is called from hardclock() on the primary CPU at the start of
540  * every tick.
541  */
542 void
543 timeout_hardclock_update(void)
544 {
545 	struct timespec elapsed, now;
546 	struct kclock *kc;
547 	struct timespec *lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan;
548 	int b, done, first, i, last, level, need_softclock = 1, off;
549 
550 	mtx_enter(&timeout_mutex);
551 
552 	MOVEBUCKET(0, ticks);
553 	if (MASKWHEEL(0, ticks) == 0) {
554 		MOVEBUCKET(1, ticks);
555 		if (MASKWHEEL(1, ticks) == 0) {
556 			MOVEBUCKET(2, ticks);
557 			if (MASKWHEEL(2, ticks) == 0)
558 				MOVEBUCKET(3, ticks);
559 		}
560 	}
561 
562 	/*
563 	 * Dump the buckets that expired while we were away.
564 	 *
565 	 * If the elapsed time has exceeded a level's limit then we need
566 	 * to dump every bucket in the level.  We have necessarily completed
567 	 * a lap of that level, too, so we need to process buckets in the
568 	 * next level.
569 	 *
570 	 * Otherwise we need to compare indices: if the index of the first
571 	 * expired bucket is greater than that of the last then we have
572 	 * completed a lap of the level and need to process buckets in the
573 	 * next level.
574 	 */
575 	nanouptime(&now);
576 	timespecsub(&now, lastscan, &elapsed);
577 	for (level = 0; level < nitems(timeout_level_width); level++) {
578 		first = timeout_maskwheel(level, lastscan);
579 		if (elapsed.tv_sec >= timeout_level_width[level]) {
580 			last = (first == 0) ? WHEELSIZE - 1 : first - 1;
581 			done = 0;
582 		} else {
583 			last = timeout_maskwheel(level, &now);
584 			done = first <= last;
585 		}
586 		off = level * WHEELSIZE;
587 		for (b = first;; b = (b + 1) % WHEELSIZE) {
588 			CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]);
589 			if (b == last)
590 				break;
591 		}
592 		if (done)
593 			break;
594 	}
595 
596 	/*
597 	 * Update the cached state for each kclock.
598 	 */
599 	for (i = 0; i < nitems(timeout_kclock); i++) {
600 		kc = &timeout_kclock[i];
601 		timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan);
602 		timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late);
603 	}
604 
605 	if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo))
606 		need_softclock = 0;
607 
608 	mtx_leave(&timeout_mutex);
609 
610 	if (need_softclock)
611 		softintr_schedule(softclock_si);
612 }
613 
614 void
615 timeout_run(struct timeout *to)
616 {
617 	void (*fn)(void *);
618 	void *arg;
619 	int needsproc;
620 
621 	MUTEX_ASSERT_LOCKED(&timeout_mutex);
622 
623 	CLR(to->to_flags, TIMEOUT_ONQUEUE);
624 	SET(to->to_flags, TIMEOUT_TRIGGERED);
625 
626 	fn = to->to_func;
627 	arg = to->to_arg;
628 	needsproc = ISSET(to->to_flags, TIMEOUT_PROC);
629 #if NKCOV > 0
630 	struct process *kcov_process = to->to_process;
631 #endif
632 
633 	mtx_leave(&timeout_mutex);
634 	timeout_sync_enter(needsproc);
635 #if NKCOV > 0
636 	kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process);
637 #endif
638 	fn(arg);
639 #if NKCOV > 0
640 	kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process);
641 #endif
642 	timeout_sync_leave(needsproc);
643 	mtx_enter(&timeout_mutex);
644 }
645 
646 void
647 softclock_process_kclock_timeout(struct timeout *to, int new)
648 {
649 	struct kclock *kc = &timeout_kclock[to->to_kclock];
650 
651 	if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) {
652 		tostat.tos_scheduled++;
653 		if (!new)
654 			tostat.tos_rescheduled++;
655 		CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)],
656 		    &to->to_list);
657 		return;
658 	}
659 	if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=))
660 		tostat.tos_late++;
661 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
662 		CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
663 		return;
664 	}
665 	timeout_run(to);
666 	tostat.tos_run_softclock++;
667 }
668 
669 void
670 softclock_process_tick_timeout(struct timeout *to, int new)
671 {
672 	int delta = to->to_time - ticks;
673 
674 	if (delta > 0) {
675 		tostat.tos_scheduled++;
676 		if (!new)
677 			tostat.tos_rescheduled++;
678 		CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list);
679 		return;
680 	}
681 	if (!new && delta < 0)
682 		tostat.tos_late++;
683 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
684 		CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
685 		return;
686 	}
687 	timeout_run(to);
688 	tostat.tos_run_softclock++;
689 }
690 
691 /*
692  * Timeouts are processed here instead of timeout_hardclock_update()
693  * to avoid doing any more work at IPL_CLOCK than absolutely necessary.
694  * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly
695  * so the system remains responsive even if there is a surge of timeouts.
696  */
697 void
698 softclock(void *arg)
699 {
700 	struct timeout *first_new, *to;
701 	int needsproc, new;
702 
703 	first_new = NULL;
704 	new = 0;
705 
706 	mtx_enter(&timeout_mutex);
707 	if (!CIRCQ_EMPTY(&timeout_new))
708 		first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new));
709 	CIRCQ_CONCAT(&timeout_todo, &timeout_new);
710 	while (!CIRCQ_EMPTY(&timeout_todo)) {
711 		to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo));
712 		CIRCQ_REMOVE(&to->to_list);
713 		if (to == first_new)
714 			new = 1;
715 		if (to->to_kclock != KCLOCK_NONE)
716 			softclock_process_kclock_timeout(to, new);
717 		else
718 			softclock_process_tick_timeout(to, new);
719 	}
720 	tostat.tos_softclocks++;
721 	needsproc = !CIRCQ_EMPTY(&timeout_proc);
722 	mtx_leave(&timeout_mutex);
723 
724 	if (needsproc)
725 		wakeup(&timeout_proc);
726 }
727 
728 void
729 softclock_create_thread(void *arg)
730 {
731 	if (kthread_create(softclock_thread, NULL, NULL, "softclock"))
732 		panic("fork softclock");
733 }
734 
735 void
736 softclock_thread(void *arg)
737 {
738 	CPU_INFO_ITERATOR cii;
739 	struct cpu_info *ci;
740 	struct timeout *to;
741 	int s;
742 
743 	KERNEL_ASSERT_LOCKED();
744 
745 	/* Be conservative for the moment */
746 	CPU_INFO_FOREACH(cii, ci) {
747 		if (CPU_IS_PRIMARY(ci))
748 			break;
749 	}
750 	KASSERT(ci != NULL);
751 	sched_peg_curproc(ci);
752 
753 	s = splsoftclock();
754 	for (;;) {
755 		sleep_setup(&timeout_proc, PSWP, "bored");
756 		sleep_finish(0, CIRCQ_EMPTY(&timeout_proc));
757 
758 		mtx_enter(&timeout_mutex);
759 		while (!CIRCQ_EMPTY(&timeout_proc)) {
760 			to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
761 			CIRCQ_REMOVE(&to->to_list);
762 			timeout_run(to);
763 			tostat.tos_run_thread++;
764 		}
765 		tostat.tos_thread_wakeups++;
766 		mtx_leave(&timeout_mutex);
767 	}
768 	splx(s);
769 }
770 
771 #ifndef SMALL_KERNEL
772 void
773 timeout_adjust_ticks(int adj)
774 {
775 	struct timeout *to;
776 	struct circq *p;
777 	int new_ticks, b;
778 
779 	/* adjusting the monotonic clock backwards would be a Bad Thing */
780 	if (adj <= 0)
781 		return;
782 
783 	mtx_enter(&timeout_mutex);
784 	new_ticks = ticks + adj;
785 	for (b = 0; b < nitems(timeout_wheel); b++) {
786 		p = CIRCQ_FIRST(&timeout_wheel[b]);
787 		while (p != &timeout_wheel[b]) {
788 			to = timeout_from_circq(p);
789 			p = CIRCQ_FIRST(p);
790 
791 			/* when moving a timeout forward need to reinsert it */
792 			if (to->to_time - ticks < adj)
793 				to->to_time = new_ticks;
794 			CIRCQ_REMOVE(&to->to_list);
795 			CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list);
796 		}
797 	}
798 	ticks = new_ticks;
799 	mtx_leave(&timeout_mutex);
800 }
801 #endif
802 
803 int
804 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
805 {
806 	struct timeoutstat status;
807 
808 	mtx_enter(&timeout_mutex);
809 	memcpy(&status, &tostat, sizeof(status));
810 	mtx_leave(&timeout_mutex);
811 
812 	return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status));
813 }
814 
815 #ifdef DDB
816 const char *db_kclock(int);
817 void db_show_callout_bucket(struct circq *);
818 void db_show_timeout(struct timeout *, struct circq *);
819 const char *db_timespec(const struct timespec *);
820 
821 const char *
822 db_kclock(int kclock)
823 {
824 	switch (kclock) {
825 	case KCLOCK_UPTIME:
826 		return "uptime";
827 	default:
828 		return "invalid";
829 	}
830 }
831 
832 const char *
833 db_timespec(const struct timespec *ts)
834 {
835 	static char buf[32];
836 	struct timespec tmp, zero;
837 
838 	if (ts->tv_sec >= 0) {
839 		snprintf(buf, sizeof(buf), "%lld.%09ld",
840 		    ts->tv_sec, ts->tv_nsec);
841 		return buf;
842 	}
843 
844 	timespecclear(&zero);
845 	timespecsub(&zero, ts, &tmp);
846 	snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec);
847 	return buf;
848 }
849 
850 void
851 db_show_callout_bucket(struct circq *bucket)
852 {
853 	struct circq *p;
854 
855 	CIRCQ_FOREACH(p, bucket)
856 		db_show_timeout(timeout_from_circq(p), bucket);
857 }
858 
859 void
860 db_show_timeout(struct timeout *to, struct circq *bucket)
861 {
862 	struct timespec remaining;
863 	struct kclock *kc;
864 	char buf[8];
865 	db_expr_t offset;
866 	struct circq *wheel;
867 	char *name, *where;
868 	int width = sizeof(long) * 2;
869 
870 	db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset);
871 	name = name ? name : "?";
872 	if (bucket == &timeout_new)
873 		where = "new";
874 	else if (bucket == &timeout_todo)
875 		where = "softint";
876 	else if (bucket == &timeout_proc)
877 		where = "thread";
878 	else {
879 		if (to->to_kclock != KCLOCK_NONE)
880 			wheel = timeout_wheel_kc;
881 		else
882 			wheel = timeout_wheel;
883 		snprintf(buf, sizeof(buf), "%3ld/%1ld",
884 		    (bucket - wheel) % WHEELSIZE,
885 		    (bucket - wheel) / WHEELSIZE);
886 		where = buf;
887 	}
888 	if (to->to_kclock != KCLOCK_NONE) {
889 		kc = &timeout_kclock[to->to_kclock];
890 		timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining);
891 		db_printf("%20s  %8s  %7s  0x%0*lx  %s\n",
892 		    db_timespec(&remaining), db_kclock(to->to_kclock), where,
893 		    width, (ulong)to->to_arg, name);
894 	} else {
895 		db_printf("%20d  %8s  %7s  0x%0*lx  %s\n",
896 		    to->to_time - ticks, "ticks", where,
897 		    width, (ulong)to->to_arg, name);
898 	}
899 }
900 
901 void
902 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif)
903 {
904 	struct kclock *kc;
905 	int width = sizeof(long) * 2 + 2;
906 	int b, i;
907 
908 	db_printf("%20s  %8s\n", "lastscan", "clock");
909 	db_printf("%20d  %8s\n", ticks, "ticks");
910 	for (i = 0; i < nitems(timeout_kclock); i++) {
911 		kc = &timeout_kclock[i];
912 		db_printf("%20s  %8s\n",
913 		    db_timespec(&kc->kc_lastscan), db_kclock(i));
914 	}
915 	db_printf("\n");
916 	db_printf("%20s  %8s  %7s  %*s  %s\n",
917 	    "remaining", "clock", "wheel", width, "arg", "func");
918 	db_show_callout_bucket(&timeout_new);
919 	db_show_callout_bucket(&timeout_todo);
920 	db_show_callout_bucket(&timeout_proc);
921 	for (b = 0; b < nitems(timeout_wheel); b++)
922 		db_show_callout_bucket(&timeout_wheel[b]);
923 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
924 		db_show_callout_bucket(&timeout_wheel_kc[b]);
925 }
926 #endif
927