xref: /openbsd-src/sys/kern/kern_timeout.c (revision 3374c67d44f9b75b98444cbf63020f777792342e)
1 /*	$OpenBSD: kern_timeout.c,v 1.90 2022/12/31 16:06:24 cheloha Exp $	*/
2 /*
3  * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org>
4  * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
17  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
19  * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kthread.h>
31 #include <sys/proc.h>
32 #include <sys/timeout.h>
33 #include <sys/mutex.h>
34 #include <sys/kernel.h>
35 #include <sys/queue.h>			/* _Q_INVALIDATE */
36 #include <sys/sysctl.h>
37 #include <sys/witness.h>
38 
39 #ifdef DDB
40 #include <machine/db_machdep.h>
41 #include <ddb/db_interface.h>
42 #include <ddb/db_sym.h>
43 #include <ddb/db_output.h>
44 #endif
45 
46 #include "kcov.h"
47 #if NKCOV > 0
48 #include <sys/kcov.h>
49 #endif
50 
51 /*
52  * Locks used to protect global variables in this file:
53  *
54  *	I	immutable after initialization
55  *	T	timeout_mutex
56  */
57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH);
58 
59 void *softclock_si;			/* [I] softclock() interrupt handle */
60 struct timeoutstat tostat;		/* [T] statistics and totals */
61 
62 /*
63  * Timeouts are kept in a hierarchical timing wheel. The to_time is the value
64  * of the global variable "ticks" when the timeout should be called. There are
65  * four levels with 256 buckets each.
66  */
67 #define WHEELCOUNT 4
68 #define WHEELSIZE 256
69 #define WHEELMASK 255
70 #define WHEELBITS 8
71 #define BUCKETS (WHEELCOUNT * WHEELSIZE)
72 
73 struct circq timeout_wheel[BUCKETS];	/* [T] Tick-based timeouts */
74 struct circq timeout_wheel_kc[BUCKETS];	/* [T] Clock-based timeouts */
75 struct circq timeout_new;		/* [T] New, unscheduled timeouts */
76 struct circq timeout_todo;		/* [T] Due or needs rescheduling */
77 struct circq timeout_proc;		/* [T] Due + needs process context */
78 
79 time_t timeout_level_width[WHEELCOUNT];	/* [I] Wheel level width (seconds) */
80 struct timespec tick_ts;		/* [I] Length of a tick (1/hz secs) */
81 
82 struct kclock {
83 	struct timespec kc_lastscan;	/* [T] Clock time at last wheel scan */
84 	struct timespec kc_late;	/* [T] Late if due prior */
85 	struct timespec kc_offset;	/* [T] Offset from primary kclock */
86 } timeout_kclock[KCLOCK_MAX];
87 
88 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)
89 
90 #define BUCKET(rel, abs)						\
91     (timeout_wheel[							\
92 	((rel) <= (1 << (2*WHEELBITS)))					\
93 	    ? ((rel) <= (1 << WHEELBITS))				\
94 		? MASKWHEEL(0, (abs))					\
95 		: MASKWHEEL(1, (abs)) + WHEELSIZE			\
96 	    : ((rel) <= (1 << (3*WHEELBITS)))				\
97 		? MASKWHEEL(2, (abs)) + 2*WHEELSIZE			\
98 		: MASKWHEEL(3, (abs)) + 3*WHEELSIZE])
99 
100 #define MOVEBUCKET(wheel, time)						\
101     CIRCQ_CONCAT(&timeout_todo,						\
102         &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE])
103 
104 /*
105  * Circular queue definitions.
106  */
107 
108 #define CIRCQ_INIT(elem) do {			\
109 	(elem)->next = (elem);			\
110 	(elem)->prev = (elem);			\
111 } while (0)
112 
113 #define CIRCQ_INSERT_TAIL(list, elem) do {	\
114 	(elem)->prev = (list)->prev;		\
115 	(elem)->next = (list);			\
116 	(list)->prev->next = (elem);		\
117 	(list)->prev = (elem);			\
118 	tostat.tos_pending++;			\
119 } while (0)
120 
121 #define CIRCQ_CONCAT(fst, snd) do {		\
122 	if (!CIRCQ_EMPTY(snd)) {		\
123 		(fst)->prev->next = (snd)->next;\
124 		(snd)->next->prev = (fst)->prev;\
125 		(snd)->prev->next = (fst);      \
126 		(fst)->prev = (snd)->prev;      \
127 		CIRCQ_INIT(snd);		\
128 	}					\
129 } while (0)
130 
131 #define CIRCQ_REMOVE(elem) do {			\
132 	(elem)->next->prev = (elem)->prev;      \
133 	(elem)->prev->next = (elem)->next;      \
134 	_Q_INVALIDATE((elem)->prev);		\
135 	_Q_INVALIDATE((elem)->next);		\
136 	tostat.tos_pending--;			\
137 } while (0)
138 
139 #define CIRCQ_FIRST(elem) ((elem)->next)
140 
141 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem))
142 
143 #define CIRCQ_FOREACH(elem, list)		\
144 	for ((elem) = CIRCQ_FIRST(list);	\
145 	    (elem) != (list);			\
146 	    (elem) = CIRCQ_FIRST(elem))
147 
148 #ifdef WITNESS
149 struct lock_object timeout_sleeplock_obj = {
150 	.lo_name = "timeout",
151 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
152 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
153 };
154 struct lock_object timeout_spinlock_obj = {
155 	.lo_name = "timeout",
156 	.lo_flags = LO_WITNESS | LO_INITIALIZED |
157 	    (LO_CLASS_MUTEX << LO_CLASSSHIFT)
158 };
159 struct lock_type timeout_sleeplock_type = {
160 	.lt_name = "timeout"
161 };
162 struct lock_type timeout_spinlock_type = {
163 	.lt_name = "timeout"
164 };
165 #define TIMEOUT_LOCK_OBJ(needsproc) \
166 	((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj)
167 #endif
168 
169 void softclock(void *);
170 void softclock_create_thread(void *);
171 void softclock_process_kclock_timeout(struct timeout *, int);
172 void softclock_process_tick_timeout(struct timeout *, int);
173 void softclock_thread(void *);
174 void timeout_barrier_timeout(void *);
175 uint32_t timeout_bucket(const struct timeout *);
176 uint32_t timeout_maskwheel(uint32_t, const struct timespec *);
177 void timeout_run(struct timeout *);
178 
179 /*
180  * The first thing in a struct timeout is its struct circq, so we
181  * can get back from a pointer to the latter to a pointer to the
182  * whole timeout with just a cast.
183  */
184 static inline struct timeout *
185 timeout_from_circq(struct circq *p)
186 {
187 	return ((struct timeout *)(p));
188 }
189 
190 static inline void
191 timeout_sync_order(int needsproc)
192 {
193 	WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL);
194 }
195 
196 static inline void
197 timeout_sync_enter(int needsproc)
198 {
199 	timeout_sync_order(needsproc);
200 	WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
201 }
202 
203 static inline void
204 timeout_sync_leave(int needsproc)
205 {
206 	WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
207 }
208 
209 /*
210  * Some of the "math" in here is a bit tricky.
211  *
212  * We have to beware of wrapping ints.
213  * We use the fact that any element added to the queue must be added with a
214  * positive time. That means that any element `to' on the queue cannot be
215  * scheduled to timeout further in time than INT_MAX, but to->to_time can
216  * be positive or negative so comparing it with anything is dangerous.
217  * The only way we can use the to->to_time value in any predictable way
218  * is when we calculate how far in the future `to' will timeout -
219  * "to->to_time - ticks". The result will always be positive for future
220  * timeouts and 0 or negative for due timeouts.
221  */
222 
223 void
224 timeout_startup(void)
225 {
226 	int b, level;
227 
228 	CIRCQ_INIT(&timeout_new);
229 	CIRCQ_INIT(&timeout_todo);
230 	CIRCQ_INIT(&timeout_proc);
231 	for (b = 0; b < nitems(timeout_wheel); b++)
232 		CIRCQ_INIT(&timeout_wheel[b]);
233 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
234 		CIRCQ_INIT(&timeout_wheel_kc[b]);
235 
236 	for (level = 0; level < nitems(timeout_level_width); level++)
237 		timeout_level_width[level] = 2 << (level * WHEELBITS);
238 	NSEC_TO_TIMESPEC(tick_nsec, &tick_ts);
239 }
240 
241 void
242 timeout_proc_init(void)
243 {
244 	softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL);
245 	if (softclock_si == NULL)
246 		panic("%s: unable to register softclock interrupt", __func__);
247 
248 	WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type);
249 	WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type);
250 
251 	kthread_create_deferred(softclock_create_thread, NULL);
252 }
253 
254 void
255 timeout_set(struct timeout *new, void (*fn)(void *), void *arg)
256 {
257 	timeout_set_flags(new, fn, arg, KCLOCK_NONE, 0);
258 }
259 
260 void
261 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int kclock,
262     int flags)
263 {
264 	to->to_func = fn;
265 	to->to_arg = arg;
266 	to->to_kclock = kclock;
267 	to->to_flags = flags | TIMEOUT_INITIALIZED;
268 }
269 
270 void
271 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg)
272 {
273 	timeout_set_flags(new, fn, arg, KCLOCK_NONE, TIMEOUT_PROC);
274 }
275 
276 int
277 timeout_add(struct timeout *new, int to_ticks)
278 {
279 	int old_time;
280 	int ret = 1;
281 
282 	KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED));
283 	KASSERT(new->to_kclock == KCLOCK_NONE);
284 	KASSERT(to_ticks >= 0);
285 
286 	mtx_enter(&timeout_mutex);
287 
288 	/* Initialize the time here, it won't change. */
289 	old_time = new->to_time;
290 	new->to_time = to_ticks + ticks;
291 	CLR(new->to_flags, TIMEOUT_TRIGGERED);
292 
293 	/*
294 	 * If this timeout already is scheduled and now is moved
295 	 * earlier, reschedule it now. Otherwise leave it in place
296 	 * and let it be rescheduled later.
297 	 */
298 	if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) {
299 		if (new->to_time - ticks < old_time - ticks) {
300 			CIRCQ_REMOVE(&new->to_list);
301 			CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
302 		}
303 		tostat.tos_readded++;
304 		ret = 0;
305 	} else {
306 		SET(new->to_flags, TIMEOUT_ONQUEUE);
307 		CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
308 	}
309 #if NKCOV > 0
310 	new->to_process = curproc->p_p;
311 #endif
312 	tostat.tos_added++;
313 	mtx_leave(&timeout_mutex);
314 
315 	return ret;
316 }
317 
318 int
319 timeout_add_tv(struct timeout *to, const struct timeval *tv)
320 {
321 	uint64_t to_ticks;
322 
323 	to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick;
324 	if (to_ticks > INT_MAX)
325 		to_ticks = INT_MAX;
326 	if (to_ticks == 0 && tv->tv_usec > 0)
327 		to_ticks = 1;
328 
329 	return timeout_add(to, (int)to_ticks);
330 }
331 
332 int
333 timeout_add_sec(struct timeout *to, int secs)
334 {
335 	uint64_t to_ticks;
336 
337 	to_ticks = (uint64_t)hz * secs;
338 	if (to_ticks > INT_MAX)
339 		to_ticks = INT_MAX;
340 	if (to_ticks == 0)
341 		to_ticks = 1;
342 
343 	return timeout_add(to, (int)to_ticks);
344 }
345 
346 int
347 timeout_add_msec(struct timeout *to, int msecs)
348 {
349 	uint64_t to_ticks;
350 
351 	to_ticks = (uint64_t)msecs * 1000 / tick;
352 	if (to_ticks > INT_MAX)
353 		to_ticks = INT_MAX;
354 	if (to_ticks == 0 && msecs > 0)
355 		to_ticks = 1;
356 
357 	return timeout_add(to, (int)to_ticks);
358 }
359 
360 int
361 timeout_add_usec(struct timeout *to, int usecs)
362 {
363 	int to_ticks = usecs / tick;
364 
365 	if (to_ticks == 0 && usecs > 0)
366 		to_ticks = 1;
367 
368 	return timeout_add(to, to_ticks);
369 }
370 
371 int
372 timeout_add_nsec(struct timeout *to, int nsecs)
373 {
374 	int to_ticks = nsecs / (tick * 1000);
375 
376 	if (to_ticks == 0 && nsecs > 0)
377 		to_ticks = 1;
378 
379 	return timeout_add(to, to_ticks);
380 }
381 
382 int
383 timeout_abs_ts(struct timeout *to, const struct timespec *abstime)
384 {
385 	struct timespec old_abstime;
386 	int ret = 1;
387 
388 	mtx_enter(&timeout_mutex);
389 
390 	KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED));
391 	KASSERT(to->to_kclock != KCLOCK_NONE);
392 
393 	old_abstime = to->to_abstime;
394 	to->to_abstime = *abstime;
395 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
396 
397 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
398 		if (timespeccmp(abstime, &old_abstime, <)) {
399 			CIRCQ_REMOVE(&to->to_list);
400 			CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
401 		}
402 		tostat.tos_readded++;
403 		ret = 0;
404 	} else {
405 		SET(to->to_flags, TIMEOUT_ONQUEUE);
406 		CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
407 	}
408 #if NKCOV > 0
409 	to->to_process = curproc->p_p;
410 #endif
411 	tostat.tos_added++;
412 
413 	mtx_leave(&timeout_mutex);
414 
415 	return ret;
416 }
417 
418 int
419 timeout_del(struct timeout *to)
420 {
421 	int ret = 0;
422 
423 	mtx_enter(&timeout_mutex);
424 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
425 		CIRCQ_REMOVE(&to->to_list);
426 		CLR(to->to_flags, TIMEOUT_ONQUEUE);
427 		tostat.tos_cancelled++;
428 		ret = 1;
429 	}
430 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
431 	tostat.tos_deleted++;
432 	mtx_leave(&timeout_mutex);
433 
434 	return ret;
435 }
436 
437 int
438 timeout_del_barrier(struct timeout *to)
439 {
440 	int removed;
441 
442 	timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC));
443 
444 	removed = timeout_del(to);
445 	if (!removed)
446 		timeout_barrier(to);
447 
448 	return removed;
449 }
450 
451 void
452 timeout_barrier(struct timeout *to)
453 {
454 	struct timeout barrier;
455 	struct cond c;
456 	int procflag;
457 
458 	procflag = (to->to_flags & TIMEOUT_PROC);
459 	timeout_sync_order(procflag);
460 
461 	timeout_set_flags(&barrier, timeout_barrier_timeout, &c, KCLOCK_NONE,
462 	    procflag);
463 	barrier.to_process = curproc->p_p;
464 	cond_init(&c);
465 
466 	mtx_enter(&timeout_mutex);
467 
468 	barrier.to_time = ticks;
469 	SET(barrier.to_flags, TIMEOUT_ONQUEUE);
470 	if (procflag)
471 		CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list);
472 	else
473 		CIRCQ_INSERT_TAIL(&timeout_todo, &barrier.to_list);
474 
475 	mtx_leave(&timeout_mutex);
476 
477 	if (procflag)
478 		wakeup_one(&timeout_proc);
479 	else
480 		softintr_schedule(softclock_si);
481 
482 	cond_wait(&c, "tmobar");
483 }
484 
485 void
486 timeout_barrier_timeout(void *arg)
487 {
488 	struct cond *c = arg;
489 
490 	cond_signal(c);
491 }
492 
493 uint32_t
494 timeout_bucket(const struct timeout *to)
495 {
496 	struct timespec diff, shifted_abstime;
497 	struct kclock *kc;
498 	uint32_t level;
499 
500 	KASSERT(to->to_kclock == KCLOCK_UPTIME);
501 	kc = &timeout_kclock[to->to_kclock];
502 
503 	KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <));
504 	timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff);
505 	for (level = 0; level < nitems(timeout_level_width) - 1; level++) {
506 		if (diff.tv_sec < timeout_level_width[level])
507 			break;
508 	}
509 	timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime);
510 	return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime);
511 }
512 
513 /*
514  * Hash the absolute time into a bucket on a given level of the wheel.
515  *
516  * The complete hash is 32 bits.  The upper 25 bits are seconds, the
517  * lower 7 bits are nanoseconds.  tv_nsec is a positive value less
518  * than one billion so we need to divide it to isolate the desired
519  * bits.  We can't just shift it.
520  *
521  * The level is used to isolate an 8-bit portion of the hash.  The
522  * resulting number indicates which bucket the absolute time belongs
523  * in on the given level of the wheel.
524  */
525 uint32_t
526 timeout_maskwheel(uint32_t level, const struct timespec *abstime)
527 {
528 	uint32_t hi, lo;
529 
530  	hi = abstime->tv_sec << 7;
531 	lo = abstime->tv_nsec / 7812500;
532 
533 	return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK;
534 }
535 
536 /*
537  * This is called from hardclock() on the primary CPU at the start of
538  * every tick.
539  */
540 void
541 timeout_hardclock_update(void)
542 {
543 	struct timespec elapsed, now;
544 	struct kclock *kc;
545 	struct timespec *lastscan;
546 	int b, done, first, i, last, level, need_softclock, off;
547 
548 	nanouptime(&now);
549 	lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan;
550 	timespecsub(&now, lastscan, &elapsed);
551 	need_softclock = 1;
552 
553 	mtx_enter(&timeout_mutex);
554 
555 	MOVEBUCKET(0, ticks);
556 	if (MASKWHEEL(0, ticks) == 0) {
557 		MOVEBUCKET(1, ticks);
558 		if (MASKWHEEL(1, ticks) == 0) {
559 			MOVEBUCKET(2, ticks);
560 			if (MASKWHEEL(2, ticks) == 0)
561 				MOVEBUCKET(3, ticks);
562 		}
563 	}
564 
565 	/*
566 	 * Dump the buckets that expired while we were away.
567 	 *
568 	 * If the elapsed time has exceeded a level's limit then we need
569 	 * to dump every bucket in the level.  We have necessarily completed
570 	 * a lap of that level, too, so we need to process buckets in the
571 	 * next level.
572 	 *
573 	 * Otherwise we need to compare indices: if the index of the first
574 	 * expired bucket is greater than that of the last then we have
575 	 * completed a lap of the level and need to process buckets in the
576 	 * next level.
577 	 */
578 	for (level = 0; level < nitems(timeout_level_width); level++) {
579 		first = timeout_maskwheel(level, lastscan);
580 		if (elapsed.tv_sec >= timeout_level_width[level]) {
581 			last = (first == 0) ? WHEELSIZE - 1 : first - 1;
582 			done = 0;
583 		} else {
584 			last = timeout_maskwheel(level, &now);
585 			done = first <= last;
586 		}
587 		off = level * WHEELSIZE;
588 		for (b = first;; b = (b + 1) % WHEELSIZE) {
589 			CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]);
590 			if (b == last)
591 				break;
592 		}
593 		if (done)
594 			break;
595 	}
596 
597 	/*
598 	 * Update the cached state for each kclock.
599 	 */
600 	for (i = 0; i < nitems(timeout_kclock); i++) {
601 		kc = &timeout_kclock[i];
602 		timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan);
603 		timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late);
604 	}
605 
606 	if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo))
607 		need_softclock = 0;
608 
609 	mtx_leave(&timeout_mutex);
610 
611 	if (need_softclock)
612 		softintr_schedule(softclock_si);
613 }
614 
615 void
616 timeout_run(struct timeout *to)
617 {
618 	void (*fn)(void *);
619 	void *arg;
620 	int needsproc;
621 
622 	MUTEX_ASSERT_LOCKED(&timeout_mutex);
623 
624 	CLR(to->to_flags, TIMEOUT_ONQUEUE);
625 	SET(to->to_flags, TIMEOUT_TRIGGERED);
626 
627 	fn = to->to_func;
628 	arg = to->to_arg;
629 	needsproc = ISSET(to->to_flags, TIMEOUT_PROC);
630 #if NKCOV > 0
631 	struct process *kcov_process = to->to_process;
632 #endif
633 
634 	mtx_leave(&timeout_mutex);
635 	timeout_sync_enter(needsproc);
636 #if NKCOV > 0
637 	kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process);
638 #endif
639 	fn(arg);
640 #if NKCOV > 0
641 	kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process);
642 #endif
643 	timeout_sync_leave(needsproc);
644 	mtx_enter(&timeout_mutex);
645 }
646 
647 void
648 softclock_process_kclock_timeout(struct timeout *to, int new)
649 {
650 	struct kclock *kc = &timeout_kclock[to->to_kclock];
651 
652 	if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) {
653 		tostat.tos_scheduled++;
654 		if (!new)
655 			tostat.tos_rescheduled++;
656 		CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)],
657 		    &to->to_list);
658 		return;
659 	}
660 	if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=))
661 		tostat.tos_late++;
662 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
663 		CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
664 		return;
665 	}
666 	timeout_run(to);
667 	tostat.tos_run_softclock++;
668 }
669 
670 void
671 softclock_process_tick_timeout(struct timeout *to, int new)
672 {
673 	int delta = to->to_time - ticks;
674 
675 	if (delta > 0) {
676 		tostat.tos_scheduled++;
677 		if (!new)
678 			tostat.tos_rescheduled++;
679 		CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list);
680 		return;
681 	}
682 	if (!new && delta < 0)
683 		tostat.tos_late++;
684 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
685 		CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
686 		return;
687 	}
688 	timeout_run(to);
689 	tostat.tos_run_softclock++;
690 }
691 
692 /*
693  * Timeouts are processed here instead of timeout_hardclock_update()
694  * to avoid doing any more work at IPL_CLOCK than absolutely necessary.
695  * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly
696  * so the system remains responsive even if there is a surge of timeouts.
697  */
698 void
699 softclock(void *arg)
700 {
701 	struct timeout *first_new, *to;
702 	int needsproc, new;
703 
704 	first_new = NULL;
705 	new = 0;
706 
707 	mtx_enter(&timeout_mutex);
708 	if (!CIRCQ_EMPTY(&timeout_new))
709 		first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new));
710 	CIRCQ_CONCAT(&timeout_todo, &timeout_new);
711 	while (!CIRCQ_EMPTY(&timeout_todo)) {
712 		to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo));
713 		CIRCQ_REMOVE(&to->to_list);
714 		if (to == first_new)
715 			new = 1;
716 		if (to->to_kclock != KCLOCK_NONE)
717 			softclock_process_kclock_timeout(to, new);
718 		else
719 			softclock_process_tick_timeout(to, new);
720 	}
721 	tostat.tos_softclocks++;
722 	needsproc = !CIRCQ_EMPTY(&timeout_proc);
723 	mtx_leave(&timeout_mutex);
724 
725 	if (needsproc)
726 		wakeup(&timeout_proc);
727 }
728 
729 void
730 softclock_create_thread(void *arg)
731 {
732 	if (kthread_create(softclock_thread, NULL, NULL, "softclock"))
733 		panic("fork softclock");
734 }
735 
736 void
737 softclock_thread(void *arg)
738 {
739 	CPU_INFO_ITERATOR cii;
740 	struct cpu_info *ci;
741 	struct sleep_state sls;
742 	struct timeout *to;
743 	int s;
744 
745 	KERNEL_ASSERT_LOCKED();
746 
747 	/* Be conservative for the moment */
748 	CPU_INFO_FOREACH(cii, ci) {
749 		if (CPU_IS_PRIMARY(ci))
750 			break;
751 	}
752 	KASSERT(ci != NULL);
753 	sched_peg_curproc(ci);
754 
755 	s = splsoftclock();
756 	for (;;) {
757 		sleep_setup(&sls, &timeout_proc, PSWP, "bored", 0);
758 		sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc));
759 
760 		mtx_enter(&timeout_mutex);
761 		while (!CIRCQ_EMPTY(&timeout_proc)) {
762 			to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
763 			CIRCQ_REMOVE(&to->to_list);
764 			timeout_run(to);
765 			tostat.tos_run_thread++;
766 		}
767 		tostat.tos_thread_wakeups++;
768 		mtx_leave(&timeout_mutex);
769 	}
770 	splx(s);
771 }
772 
773 #ifndef SMALL_KERNEL
774 void
775 timeout_adjust_ticks(int adj)
776 {
777 	struct timeout *to;
778 	struct circq *p;
779 	int new_ticks, b;
780 
781 	/* adjusting the monotonic clock backwards would be a Bad Thing */
782 	if (adj <= 0)
783 		return;
784 
785 	mtx_enter(&timeout_mutex);
786 	new_ticks = ticks + adj;
787 	for (b = 0; b < nitems(timeout_wheel); b++) {
788 		p = CIRCQ_FIRST(&timeout_wheel[b]);
789 		while (p != &timeout_wheel[b]) {
790 			to = timeout_from_circq(p);
791 			p = CIRCQ_FIRST(p);
792 
793 			/* when moving a timeout forward need to reinsert it */
794 			if (to->to_time - ticks < adj)
795 				to->to_time = new_ticks;
796 			CIRCQ_REMOVE(&to->to_list);
797 			CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list);
798 		}
799 	}
800 	ticks = new_ticks;
801 	mtx_leave(&timeout_mutex);
802 }
803 #endif
804 
805 int
806 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
807 {
808 	struct timeoutstat status;
809 
810 	mtx_enter(&timeout_mutex);
811 	memcpy(&status, &tostat, sizeof(status));
812 	mtx_leave(&timeout_mutex);
813 
814 	return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status));
815 }
816 
817 #ifdef DDB
818 const char *db_kclock(int);
819 void db_show_callout_bucket(struct circq *);
820 void db_show_timeout(struct timeout *, struct circq *);
821 const char *db_timespec(const struct timespec *);
822 
823 const char *
824 db_kclock(int kclock)
825 {
826 	switch (kclock) {
827 	case KCLOCK_UPTIME:
828 		return "uptime";
829 	default:
830 		return "invalid";
831 	}
832 }
833 
834 const char *
835 db_timespec(const struct timespec *ts)
836 {
837 	static char buf[32];
838 	struct timespec tmp, zero;
839 
840 	if (ts->tv_sec >= 0) {
841 		snprintf(buf, sizeof(buf), "%lld.%09ld",
842 		    ts->tv_sec, ts->tv_nsec);
843 		return buf;
844 	}
845 
846 	timespecclear(&zero);
847 	timespecsub(&zero, ts, &tmp);
848 	snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec);
849 	return buf;
850 }
851 
852 void
853 db_show_callout_bucket(struct circq *bucket)
854 {
855 	struct circq *p;
856 
857 	CIRCQ_FOREACH(p, bucket)
858 		db_show_timeout(timeout_from_circq(p), bucket);
859 }
860 
861 void
862 db_show_timeout(struct timeout *to, struct circq *bucket)
863 {
864 	struct timespec remaining;
865 	struct kclock *kc;
866 	char buf[8];
867 	db_expr_t offset;
868 	struct circq *wheel;
869 	char *name, *where;
870 	int width = sizeof(long) * 2;
871 
872 	db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset);
873 	name = name ? name : "?";
874 	if (bucket == &timeout_new)
875 		where = "new";
876 	else if (bucket == &timeout_todo)
877 		where = "softint";
878 	else if (bucket == &timeout_proc)
879 		where = "thread";
880 	else {
881 		if (to->to_kclock != KCLOCK_NONE)
882 			wheel = timeout_wheel_kc;
883 		else
884 			wheel = timeout_wheel;
885 		snprintf(buf, sizeof(buf), "%3ld/%1ld",
886 		    (bucket - wheel) % WHEELSIZE,
887 		    (bucket - wheel) / WHEELSIZE);
888 		where = buf;
889 	}
890 	if (to->to_kclock != KCLOCK_NONE) {
891 		kc = &timeout_kclock[to->to_kclock];
892 		timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining);
893 		db_printf("%20s  %8s  %7s  0x%0*lx  %s\n",
894 		    db_timespec(&remaining), db_kclock(to->to_kclock), where,
895 		    width, (ulong)to->to_arg, name);
896 	} else {
897 		db_printf("%20d  %8s  %7s  0x%0*lx  %s\n",
898 		    to->to_time - ticks, "ticks", where,
899 		    width, (ulong)to->to_arg, name);
900 	}
901 }
902 
903 void
904 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif)
905 {
906 	struct kclock *kc;
907 	int width = sizeof(long) * 2 + 2;
908 	int b, i;
909 
910 	db_printf("%20s  %8s\n", "lastscan", "clock");
911 	db_printf("%20d  %8s\n", ticks, "ticks");
912 	for (i = 0; i < nitems(timeout_kclock); i++) {
913 		kc = &timeout_kclock[i];
914 		db_printf("%20s  %8s\n",
915 		    db_timespec(&kc->kc_lastscan), db_kclock(i));
916 	}
917 	db_printf("\n");
918 	db_printf("%20s  %8s  %7s  %*s  %s\n",
919 	    "remaining", "clock", "wheel", width, "arg", "func");
920 	db_show_callout_bucket(&timeout_new);
921 	db_show_callout_bucket(&timeout_todo);
922 	db_show_callout_bucket(&timeout_proc);
923 	for (b = 0; b < nitems(timeout_wheel); b++)
924 		db_show_callout_bucket(&timeout_wheel[b]);
925 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
926 		db_show_callout_bucket(&timeout_wheel_kc[b]);
927 }
928 #endif
929