xref: /openbsd-src/sys/kern/kern_timeout.c (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1 /*	$OpenBSD: kern_timeout.c,v 1.82 2020/10/20 22:37:12 cheloha Exp $	*/
2 /*
3  * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org>
4  * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
17  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
19  * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kthread.h>
31 #include <sys/proc.h>
32 #include <sys/timeout.h>
33 #include <sys/mutex.h>
34 #include <sys/kernel.h>
35 #include <sys/queue.h>			/* _Q_INVALIDATE */
36 #include <sys/sysctl.h>
37 #include <sys/witness.h>
38 
39 #ifdef DDB
40 #include <machine/db_machdep.h>
41 #include <ddb/db_interface.h>
42 #include <ddb/db_sym.h>
43 #include <ddb/db_output.h>
44 #endif
45 
46 #include "kcov.h"
47 #if NKCOV > 0
48 #include <sys/kcov.h>
49 #endif
50 
51 /*
52  * Locks used to protect global variables in this file:
53  *
54  *	I	immutable after initialization
55  *	T	timeout_mutex
56  */
57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH);
58 
59 void *softclock_si;			/* [I] softclock() interrupt handle */
60 struct timeoutstat tostat;		/* [T] statistics and totals */
61 
62 /*
63  * Timeouts are kept in a hierarchical timing wheel. The to_time is the value
64  * of the global variable "ticks" when the timeout should be called. There are
65  * four levels with 256 buckets each.
66  */
67 #define WHEELCOUNT 4
68 #define WHEELSIZE 256
69 #define WHEELMASK 255
70 #define WHEELBITS 8
71 #define BUCKETS (WHEELCOUNT * WHEELSIZE)
72 
73 struct circq timeout_wheel[BUCKETS];	/* [T] Tick-based timeouts */
74 struct circq timeout_wheel_kc[BUCKETS];	/* [T] Clock-based timeouts */
75 struct circq timeout_new;		/* [T] New, unscheduled timeouts */
76 struct circq timeout_todo;		/* [T] Due or needs rescheduling */
77 struct circq timeout_proc;		/* [T] Due + needs process context */
78 
79 time_t timeout_level_width[WHEELCOUNT];	/* [I] Wheel level width (seconds) */
80 struct timespec tick_ts;		/* [I] Length of a tick (1/hz secs) */
81 
82 struct kclock {
83 	struct timespec kc_lastscan;	/* [T] Clock time at last wheel scan */
84 	struct timespec kc_late;	/* [T] Late if due prior */
85 	struct timespec kc_offset;	/* [T] Offset from primary kclock */
86 } timeout_kclock[KCLOCK_MAX];
87 
88 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)
89 
90 #define BUCKET(rel, abs)						\
91     (timeout_wheel[							\
92 	((rel) <= (1 << (2*WHEELBITS)))					\
93 	    ? ((rel) <= (1 << WHEELBITS))				\
94 		? MASKWHEEL(0, (abs))					\
95 		: MASKWHEEL(1, (abs)) + WHEELSIZE			\
96 	    : ((rel) <= (1 << (3*WHEELBITS)))				\
97 		? MASKWHEEL(2, (abs)) + 2*WHEELSIZE			\
98 		: MASKWHEEL(3, (abs)) + 3*WHEELSIZE])
99 
100 #define MOVEBUCKET(wheel, time)						\
101     CIRCQ_CONCAT(&timeout_todo,						\
102         &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE])
103 
104 /*
105  * Circular queue definitions.
106  */
107 
108 #define CIRCQ_INIT(elem) do {			\
109 	(elem)->next = (elem);			\
110 	(elem)->prev = (elem);			\
111 } while (0)
112 
113 #define CIRCQ_INSERT_TAIL(list, elem) do {	\
114 	(elem)->prev = (list)->prev;		\
115 	(elem)->next = (list);			\
116 	(list)->prev->next = (elem);		\
117 	(list)->prev = (elem);			\
118 	tostat.tos_pending++;			\
119 } while (0)
120 
121 #define CIRCQ_CONCAT(fst, snd) do {		\
122 	if (!CIRCQ_EMPTY(snd)) {		\
123 		(fst)->prev->next = (snd)->next;\
124 		(snd)->next->prev = (fst)->prev;\
125 		(snd)->prev->next = (fst);      \
126 		(fst)->prev = (snd)->prev;      \
127 		CIRCQ_INIT(snd);		\
128 	}					\
129 } while (0)
130 
131 #define CIRCQ_REMOVE(elem) do {			\
132 	(elem)->next->prev = (elem)->prev;      \
133 	(elem)->prev->next = (elem)->next;      \
134 	_Q_INVALIDATE((elem)->prev);		\
135 	_Q_INVALIDATE((elem)->next);		\
136 	tostat.tos_pending--;			\
137 } while (0)
138 
139 #define CIRCQ_FIRST(elem) ((elem)->next)
140 
141 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem))
142 
143 #define CIRCQ_FOREACH(elem, list)		\
144 	for ((elem) = CIRCQ_FIRST(list);	\
145 	    (elem) != (list);			\
146 	    (elem) = CIRCQ_FIRST(elem))
147 
148 #ifdef WITNESS
149 struct lock_object timeout_sleeplock_obj = {
150 	.lo_name = "timeout",
151 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
152 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
153 };
154 struct lock_object timeout_spinlock_obj = {
155 	.lo_name = "timeout",
156 	.lo_flags = LO_WITNESS | LO_INITIALIZED |
157 	    (LO_CLASS_MUTEX << LO_CLASSSHIFT)
158 };
159 struct lock_type timeout_sleeplock_type = {
160 	.lt_name = "timeout"
161 };
162 struct lock_type timeout_spinlock_type = {
163 	.lt_name = "timeout"
164 };
165 #define TIMEOUT_LOCK_OBJ(needsproc) \
166 	((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj)
167 #endif
168 
169 void kclock_nanotime(int, struct timespec *);
170 void softclock(void *);
171 void softclock_create_thread(void *);
172 void softclock_process_kclock_timeout(struct timeout *, int);
173 void softclock_process_tick_timeout(struct timeout *, int);
174 void softclock_thread(void *);
175 uint32_t timeout_bucket(const struct timeout *);
176 uint32_t timeout_maskwheel(uint32_t, const struct timespec *);
177 void timeout_run(struct timeout *);
178 void timeout_proc_barrier(void *);
179 
180 /*
181  * The first thing in a struct timeout is its struct circq, so we
182  * can get back from a pointer to the latter to a pointer to the
183  * whole timeout with just a cast.
184  */
185 static inline struct timeout *
186 timeout_from_circq(struct circq *p)
187 {
188 	return ((struct timeout *)(p));
189 }
190 
191 static inline void
192 timeout_sync_order(int needsproc)
193 {
194 	WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL);
195 }
196 
197 static inline void
198 timeout_sync_enter(int needsproc)
199 {
200 	timeout_sync_order(needsproc);
201 	WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
202 }
203 
204 static inline void
205 timeout_sync_leave(int needsproc)
206 {
207 	WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
208 }
209 
210 /*
211  * Some of the "math" in here is a bit tricky.
212  *
213  * We have to beware of wrapping ints.
214  * We use the fact that any element added to the queue must be added with a
215  * positive time. That means that any element `to' on the queue cannot be
216  * scheduled to timeout further in time than INT_MAX, but to->to_time can
217  * be positive or negative so comparing it with anything is dangerous.
218  * The only way we can use the to->to_time value in any predictable way
219  * is when we calculate how far in the future `to' will timeout -
220  * "to->to_time - ticks". The result will always be positive for future
221  * timeouts and 0 or negative for due timeouts.
222  */
223 
224 void
225 timeout_startup(void)
226 {
227 	int b, level;
228 
229 	CIRCQ_INIT(&timeout_new);
230 	CIRCQ_INIT(&timeout_todo);
231 	CIRCQ_INIT(&timeout_proc);
232 	for (b = 0; b < nitems(timeout_wheel); b++)
233 		CIRCQ_INIT(&timeout_wheel[b]);
234 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
235 		CIRCQ_INIT(&timeout_wheel_kc[b]);
236 
237 	for (level = 0; level < nitems(timeout_level_width); level++)
238 		timeout_level_width[level] = 2 << (level * WHEELBITS);
239 	NSEC_TO_TIMESPEC(tick_nsec, &tick_ts);
240 }
241 
242 void
243 timeout_proc_init(void)
244 {
245 	softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL);
246 	if (softclock_si == NULL)
247 		panic("%s: unable to register softclock interrupt", __func__);
248 
249 	WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type);
250 	WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type);
251 
252 	kthread_create_deferred(softclock_create_thread, NULL);
253 }
254 
255 static inline void
256 _timeout_set(struct timeout *to, void (*fn)(void *), void *arg, int flags,
257     int kclock)
258 {
259 	to->to_func = fn;
260 	to->to_arg = arg;
261 	to->to_flags = flags | TIMEOUT_INITIALIZED;
262 	to->to_kclock = kclock;
263 }
264 
265 void
266 timeout_set(struct timeout *new, void (*fn)(void *), void *arg)
267 {
268 	_timeout_set(new, fn, arg, 0, KCLOCK_NONE);
269 }
270 
271 void
272 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int flags)
273 {
274 	_timeout_set(to, fn, arg, flags, KCLOCK_NONE);
275 }
276 
277 void
278 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg)
279 {
280 	_timeout_set(new, fn, arg, TIMEOUT_PROC, KCLOCK_NONE);
281 }
282 
283 void
284 timeout_set_kclock(struct timeout *to, void (*fn)(void *), void *arg,
285     int flags, int kclock)
286 {
287 	_timeout_set(to, fn, arg, flags | TIMEOUT_KCLOCK, kclock);
288 }
289 
290 int
291 timeout_add(struct timeout *new, int to_ticks)
292 {
293 	int old_time;
294 	int ret = 1;
295 
296 	KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED));
297 	KASSERT(!ISSET(new->to_flags, TIMEOUT_KCLOCK));
298 	KASSERT(new->to_kclock == KCLOCK_NONE);
299 	KASSERT(to_ticks >= 0);
300 
301 	mtx_enter(&timeout_mutex);
302 
303 	/* Initialize the time here, it won't change. */
304 	old_time = new->to_time;
305 	new->to_time = to_ticks + ticks;
306 	CLR(new->to_flags, TIMEOUT_TRIGGERED);
307 
308 	/*
309 	 * If this timeout already is scheduled and now is moved
310 	 * earlier, reschedule it now. Otherwise leave it in place
311 	 * and let it be rescheduled later.
312 	 */
313 	if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) {
314 		if (new->to_time - ticks < old_time - ticks) {
315 			CIRCQ_REMOVE(&new->to_list);
316 			CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
317 		}
318 		tostat.tos_readded++;
319 		ret = 0;
320 	} else {
321 		SET(new->to_flags, TIMEOUT_ONQUEUE);
322 		CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
323 	}
324 #if NKCOV > 0
325 	new->to_process = curproc->p_p;
326 #endif
327 	tostat.tos_added++;
328 	mtx_leave(&timeout_mutex);
329 
330 	return ret;
331 }
332 
333 int
334 timeout_add_tv(struct timeout *to, const struct timeval *tv)
335 {
336 	uint64_t to_ticks;
337 
338 	to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick;
339 	if (to_ticks > INT_MAX)
340 		to_ticks = INT_MAX;
341 	if (to_ticks == 0 && tv->tv_usec > 0)
342 		to_ticks = 1;
343 
344 	return timeout_add(to, (int)to_ticks);
345 }
346 
347 int
348 timeout_add_sec(struct timeout *to, int secs)
349 {
350 	uint64_t to_ticks;
351 
352 	to_ticks = (uint64_t)hz * secs;
353 	if (to_ticks > INT_MAX)
354 		to_ticks = INT_MAX;
355 	if (to_ticks == 0)
356 		to_ticks = 1;
357 
358 	return timeout_add(to, (int)to_ticks);
359 }
360 
361 int
362 timeout_add_msec(struct timeout *to, int msecs)
363 {
364 	uint64_t to_ticks;
365 
366 	to_ticks = (uint64_t)msecs * 1000 / tick;
367 	if (to_ticks > INT_MAX)
368 		to_ticks = INT_MAX;
369 	if (to_ticks == 0 && msecs > 0)
370 		to_ticks = 1;
371 
372 	return timeout_add(to, (int)to_ticks);
373 }
374 
375 int
376 timeout_add_usec(struct timeout *to, int usecs)
377 {
378 	int to_ticks = usecs / tick;
379 
380 	if (to_ticks == 0 && usecs > 0)
381 		to_ticks = 1;
382 
383 	return timeout_add(to, to_ticks);
384 }
385 
386 int
387 timeout_add_nsec(struct timeout *to, int nsecs)
388 {
389 	int to_ticks = nsecs / (tick * 1000);
390 
391 	if (to_ticks == 0 && nsecs > 0)
392 		to_ticks = 1;
393 
394 	return timeout_add(to, to_ticks);
395 }
396 
397 int
398 timeout_at_ts(struct timeout *to, const struct timespec *abstime)
399 {
400 	struct timespec old_abstime;
401 	int ret = 1;
402 
403 	mtx_enter(&timeout_mutex);
404 
405 	KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED | TIMEOUT_KCLOCK));
406 	KASSERT(to->to_kclock != KCLOCK_NONE);
407 
408 	old_abstime = to->to_abstime;
409 	to->to_abstime = *abstime;
410 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
411 
412 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
413 		if (timespeccmp(abstime, &old_abstime, <)) {
414 			CIRCQ_REMOVE(&to->to_list);
415 			CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
416 		}
417 		tostat.tos_readded++;
418 		ret = 0;
419 	} else {
420 		SET(to->to_flags, TIMEOUT_ONQUEUE);
421 		CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
422 	}
423 #if NKCOV > 0
424 	to->to_process = curproc->p_p;
425 #endif
426 	tostat.tos_added++;
427 
428 	mtx_leave(&timeout_mutex);
429 
430 	return ret;
431 }
432 
433 int
434 timeout_in_nsec(struct timeout *to, uint64_t nsecs)
435 {
436 	struct timespec abstime, interval, now;
437 
438 	kclock_nanotime(to->to_kclock, &now);
439 	NSEC_TO_TIMESPEC(nsecs, &interval);
440 	timespecadd(&now, &interval, &abstime);
441 
442 	return timeout_at_ts(to, &abstime);
443 }
444 
445 void
446 kclock_nanotime(int kclock, struct timespec *now)
447 {
448 	switch (kclock) {
449 	case KCLOCK_UPTIME:
450 		nanouptime(now);
451 		break;
452 	default:
453 		panic("invalid kclock: 0x%x", kclock);
454 	}
455 }
456 
457 int
458 timeout_del(struct timeout *to)
459 {
460 	int ret = 0;
461 
462 	mtx_enter(&timeout_mutex);
463 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
464 		CIRCQ_REMOVE(&to->to_list);
465 		CLR(to->to_flags, TIMEOUT_ONQUEUE);
466 		tostat.tos_cancelled++;
467 		ret = 1;
468 	}
469 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
470 	tostat.tos_deleted++;
471 	mtx_leave(&timeout_mutex);
472 
473 	return ret;
474 }
475 
476 int
477 timeout_del_barrier(struct timeout *to)
478 {
479 	int removed;
480 
481 	timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC));
482 
483 	removed = timeout_del(to);
484 	if (!removed)
485 		timeout_barrier(to);
486 
487 	return removed;
488 }
489 
490 void
491 timeout_barrier(struct timeout *to)
492 {
493 	int needsproc = ISSET(to->to_flags, TIMEOUT_PROC);
494 
495 	timeout_sync_order(needsproc);
496 
497 	if (!needsproc) {
498 		KERNEL_LOCK();
499 		splx(splsoftclock());
500 		KERNEL_UNLOCK();
501 	} else {
502 		struct cond c = COND_INITIALIZER();
503 		struct timeout barrier;
504 
505 		timeout_set_proc(&barrier, timeout_proc_barrier, &c);
506 		barrier.to_process = curproc->p_p;
507 
508 		mtx_enter(&timeout_mutex);
509 		SET(barrier.to_flags, TIMEOUT_ONQUEUE);
510 		CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list);
511 		mtx_leave(&timeout_mutex);
512 
513 		wakeup_one(&timeout_proc);
514 
515 		cond_wait(&c, "tmobar");
516 	}
517 }
518 
519 void
520 timeout_proc_barrier(void *arg)
521 {
522 	struct cond *c = arg;
523 
524 	cond_signal(c);
525 }
526 
527 uint32_t
528 timeout_bucket(const struct timeout *to)
529 {
530 	struct kclock *kc = &timeout_kclock[to->to_kclock];
531 	struct timespec diff, shifted_abstime;
532 	uint32_t level;
533 
534 	KASSERT(ISSET(to->to_flags, TIMEOUT_KCLOCK));
535 	KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <));
536 
537 	timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff);
538 	for (level = 0; level < nitems(timeout_level_width) - 1; level++) {
539 		if (diff.tv_sec < timeout_level_width[level])
540 			break;
541 	}
542 	timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime);
543 	return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime);
544 }
545 
546 /*
547  * Hash the absolute time into a bucket on a given level of the wheel.
548  *
549  * The complete hash is 32 bits.  The upper 25 bits are seconds, the
550  * lower 7 bits are nanoseconds.  tv_nsec is a positive value less
551  * than one billion so we need to divide it to isolate the desired
552  * bits.  We can't just shift it.
553  *
554  * The level is used to isolate an 8-bit portion of the hash.  The
555  * resulting number indicates which bucket the absolute time belongs
556  * in on the given level of the wheel.
557  */
558 uint32_t
559 timeout_maskwheel(uint32_t level, const struct timespec *abstime)
560 {
561 	uint32_t hi, lo;
562 
563  	hi = abstime->tv_sec << 7;
564 	lo = abstime->tv_nsec / 7812500;
565 
566 	return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK;
567 }
568 
569 /*
570  * This is called from hardclock() on the primary CPU at the start of
571  * every tick.
572  */
573 void
574 timeout_hardclock_update(void)
575 {
576 	struct timespec elapsed, now;
577 	struct kclock *kc;
578 	struct timespec *lastscan;
579 	int b, done, first, i, last, level, need_softclock, off;
580 
581 	nanouptime(&now);
582 	lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan;
583 	timespecsub(&now, lastscan, &elapsed);
584 	need_softclock = 1;
585 
586 	mtx_enter(&timeout_mutex);
587 
588 	MOVEBUCKET(0, ticks);
589 	if (MASKWHEEL(0, ticks) == 0) {
590 		MOVEBUCKET(1, ticks);
591 		if (MASKWHEEL(1, ticks) == 0) {
592 			MOVEBUCKET(2, ticks);
593 			if (MASKWHEEL(2, ticks) == 0)
594 				MOVEBUCKET(3, ticks);
595 		}
596 	}
597 
598 	/*
599 	 * Dump the buckets that expired while we were away.
600 	 *
601 	 * If the elapsed time has exceeded a level's limit then we need
602 	 * to dump every bucket in the level.  We have necessarily completed
603 	 * a lap of that level, too, so we need to process buckets in the
604 	 * next level.
605 	 *
606 	 * Otherwise we need to compare indices: if the index of the first
607 	 * expired bucket is greater than that of the last then we have
608 	 * completed a lap of the level and need to process buckets in the
609 	 * next level.
610 	 */
611 	for (level = 0; level < nitems(timeout_level_width); level++) {
612 		first = timeout_maskwheel(level, lastscan);
613 		if (elapsed.tv_sec >= timeout_level_width[level]) {
614 			last = (first == 0) ? WHEELSIZE - 1 : first - 1;
615 			done = 0;
616 		} else {
617 			last = timeout_maskwheel(level, &now);
618 			done = first <= last;
619 		}
620 		off = level * WHEELSIZE;
621 		for (b = first;; b = (b + 1) % WHEELSIZE) {
622 			CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]);
623 			if (b == last)
624 				break;
625 		}
626 		if (done)
627 			break;
628 	}
629 
630 	/*
631 	 * Update the cached state for each kclock.
632 	 */
633 	for (i = 0; i < nitems(timeout_kclock); i++) {
634 		kc = &timeout_kclock[i];
635 		timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan);
636 		timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late);
637 	}
638 
639 	if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo))
640 		need_softclock = 0;
641 
642 	mtx_leave(&timeout_mutex);
643 
644 	if (need_softclock)
645 		softintr_schedule(softclock_si);
646 }
647 
648 void
649 timeout_run(struct timeout *to)
650 {
651 	void (*fn)(void *);
652 	void *arg;
653 	int needsproc;
654 
655 	MUTEX_ASSERT_LOCKED(&timeout_mutex);
656 
657 	CLR(to->to_flags, TIMEOUT_ONQUEUE);
658 	SET(to->to_flags, TIMEOUT_TRIGGERED);
659 
660 	fn = to->to_func;
661 	arg = to->to_arg;
662 	needsproc = ISSET(to->to_flags, TIMEOUT_PROC);
663 #if NKCOV > 0
664 	struct process *kcov_process = to->to_process;
665 #endif
666 
667 	mtx_leave(&timeout_mutex);
668 	timeout_sync_enter(needsproc);
669 #if NKCOV > 0
670 	kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process);
671 #endif
672 	fn(arg);
673 #if NKCOV > 0
674 	kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process);
675 #endif
676 	timeout_sync_leave(needsproc);
677 	mtx_enter(&timeout_mutex);
678 }
679 
680 void
681 softclock_process_kclock_timeout(struct timeout *to, int new)
682 {
683 	struct kclock *kc = &timeout_kclock[to->to_kclock];
684 
685 	if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) {
686 		tostat.tos_scheduled++;
687 		if (!new)
688 			tostat.tos_rescheduled++;
689 		CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)],
690 		    &to->to_list);
691 		return;
692 	}
693 	if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=))
694 		tostat.tos_late++;
695 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
696 		CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
697 		return;
698 	}
699 	timeout_run(to);
700 	tostat.tos_run_softclock++;
701 }
702 
703 void
704 softclock_process_tick_timeout(struct timeout *to, int new)
705 {
706 	int delta = to->to_time - ticks;
707 
708 	if (delta > 0) {
709 		tostat.tos_scheduled++;
710 		if (!new)
711 			tostat.tos_rescheduled++;
712 		CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list);
713 		return;
714 	}
715 	if (!new && delta < 0)
716 		tostat.tos_late++;
717 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
718 		CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
719 		return;
720 	}
721 	timeout_run(to);
722 	tostat.tos_run_softclock++;
723 }
724 
725 /*
726  * Timeouts are processed here instead of timeout_hardclock_update()
727  * to avoid doing any more work at IPL_CLOCK than absolutely necessary.
728  * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly
729  * so the system remains responsive even if there is a surge of timeouts.
730  */
731 void
732 softclock(void *arg)
733 {
734 	struct timeout *first_new, *to;
735 	int needsproc, new;
736 
737 	first_new = NULL;
738 	new = 0;
739 
740 	mtx_enter(&timeout_mutex);
741 	if (!CIRCQ_EMPTY(&timeout_new))
742 		first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new));
743 	CIRCQ_CONCAT(&timeout_todo, &timeout_new);
744 	while (!CIRCQ_EMPTY(&timeout_todo)) {
745 		to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo));
746 		CIRCQ_REMOVE(&to->to_list);
747 		if (to == first_new)
748 			new = 1;
749 		if (ISSET(to->to_flags, TIMEOUT_KCLOCK))
750 			softclock_process_kclock_timeout(to, new);
751 		else
752 			softclock_process_tick_timeout(to, new);
753 	}
754 	tostat.tos_softclocks++;
755 	needsproc = !CIRCQ_EMPTY(&timeout_proc);
756 	mtx_leave(&timeout_mutex);
757 
758 	if (needsproc)
759 		wakeup(&timeout_proc);
760 }
761 
762 void
763 softclock_create_thread(void *arg)
764 {
765 	if (kthread_create(softclock_thread, NULL, NULL, "softclock"))
766 		panic("fork softclock");
767 }
768 
769 void
770 softclock_thread(void *arg)
771 {
772 	CPU_INFO_ITERATOR cii;
773 	struct cpu_info *ci;
774 	struct sleep_state sls;
775 	struct timeout *to;
776 	int s;
777 
778 	KERNEL_ASSERT_LOCKED();
779 
780 	/* Be conservative for the moment */
781 	CPU_INFO_FOREACH(cii, ci) {
782 		if (CPU_IS_PRIMARY(ci))
783 			break;
784 	}
785 	KASSERT(ci != NULL);
786 	sched_peg_curproc(ci);
787 
788 	s = splsoftclock();
789 	for (;;) {
790 		sleep_setup(&sls, &timeout_proc, PSWP, "bored");
791 		sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc));
792 
793 		mtx_enter(&timeout_mutex);
794 		while (!CIRCQ_EMPTY(&timeout_proc)) {
795 			to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
796 			CIRCQ_REMOVE(&to->to_list);
797 			timeout_run(to);
798 			tostat.tos_run_thread++;
799 		}
800 		tostat.tos_thread_wakeups++;
801 		mtx_leave(&timeout_mutex);
802 	}
803 	splx(s);
804 }
805 
806 #ifndef SMALL_KERNEL
807 void
808 timeout_adjust_ticks(int adj)
809 {
810 	struct timeout *to;
811 	struct circq *p;
812 	int new_ticks, b;
813 
814 	/* adjusting the monotonic clock backwards would be a Bad Thing */
815 	if (adj <= 0)
816 		return;
817 
818 	mtx_enter(&timeout_mutex);
819 	new_ticks = ticks + adj;
820 	for (b = 0; b < nitems(timeout_wheel); b++) {
821 		p = CIRCQ_FIRST(&timeout_wheel[b]);
822 		while (p != &timeout_wheel[b]) {
823 			to = timeout_from_circq(p);
824 			p = CIRCQ_FIRST(p);
825 
826 			/* when moving a timeout forward need to reinsert it */
827 			if (to->to_time - ticks < adj)
828 				to->to_time = new_ticks;
829 			CIRCQ_REMOVE(&to->to_list);
830 			CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list);
831 		}
832 	}
833 	ticks = new_ticks;
834 	mtx_leave(&timeout_mutex);
835 }
836 #endif
837 
838 int
839 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
840 {
841 	struct timeoutstat status;
842 
843 	mtx_enter(&timeout_mutex);
844 	memcpy(&status, &tostat, sizeof(status));
845 	mtx_leave(&timeout_mutex);
846 
847 	return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status));
848 }
849 
850 #ifdef DDB
851 const char *db_kclock(int);
852 void db_show_callout_bucket(struct circq *);
853 void db_show_timeout(struct timeout *, struct circq *);
854 const char *db_timespec(const struct timespec *);
855 
856 const char *
857 db_kclock(int kclock)
858 {
859 	switch (kclock) {
860 	case KCLOCK_UPTIME:
861 		return "uptime";
862 	default:
863 		return "invalid";
864 	}
865 }
866 
867 const char *
868 db_timespec(const struct timespec *ts)
869 {
870 	static char buf[32];
871 	struct timespec tmp, zero;
872 
873 	if (ts->tv_sec >= 0) {
874 		snprintf(buf, sizeof(buf), "%lld.%09ld",
875 		    ts->tv_sec, ts->tv_nsec);
876 		return buf;
877 	}
878 
879 	timespecclear(&zero);
880 	timespecsub(&zero, ts, &tmp);
881 	snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec);
882 	return buf;
883 }
884 
885 void
886 db_show_callout_bucket(struct circq *bucket)
887 {
888 	struct circq *p;
889 
890 	CIRCQ_FOREACH(p, bucket)
891 		db_show_timeout(timeout_from_circq(p), bucket);
892 }
893 
894 void
895 db_show_timeout(struct timeout *to, struct circq *bucket)
896 {
897 	struct timespec remaining;
898 	struct kclock *kc;
899 	char buf[8];
900 	db_expr_t offset;
901 	struct circq *wheel;
902 	char *name, *where;
903 	int width = sizeof(long) * 2;
904 
905 	db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset);
906 	name = name ? name : "?";
907 	if (bucket == &timeout_new)
908 		where = "new";
909 	else if (bucket == &timeout_todo)
910 		where = "softint";
911 	else if (bucket == &timeout_proc)
912 		where = "thread";
913 	else {
914 		if (ISSET(to->to_flags, TIMEOUT_KCLOCK))
915 			wheel = timeout_wheel_kc;
916 		else
917 			wheel = timeout_wheel;
918 		snprintf(buf, sizeof(buf), "%3ld/%1ld",
919 		    (bucket - wheel) % WHEELSIZE,
920 		    (bucket - wheel) / WHEELSIZE);
921 		where = buf;
922 	}
923 	if (ISSET(to->to_flags, TIMEOUT_KCLOCK)) {
924 		kc = &timeout_kclock[to->to_kclock];
925 		timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining);
926 		db_printf("%20s  %8s  %7s  0x%0*lx  %s\n",
927 		    db_timespec(&remaining), db_kclock(to->to_kclock), where,
928 		    width, (ulong)to->to_arg, name);
929 	} else {
930 		db_printf("%20d  %8s  %7s  0x%0*lx  %s\n",
931 		    to->to_time - ticks, "ticks", where,
932 		    width, (ulong)to->to_arg, name);
933 	}
934 }
935 
936 void
937 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif)
938 {
939 	struct kclock *kc;
940 	int width = sizeof(long) * 2 + 2;
941 	int b, i;
942 
943 	db_printf("%20s  %8s\n", "lastscan", "clock");
944 	db_printf("%20d  %8s\n", ticks, "ticks");
945 	for (i = 0; i < nitems(timeout_kclock); i++) {
946 		kc = &timeout_kclock[i];
947 		db_printf("%20s  %8s\n",
948 		    db_timespec(&kc->kc_lastscan), db_kclock(i));
949 	}
950 	db_printf("\n");
951 	db_printf("%20s  %8s  %7s  %*s  %s\n",
952 	    "remaining", "clock", "wheel", width, "arg", "func");
953 	db_show_callout_bucket(&timeout_new);
954 	db_show_callout_bucket(&timeout_todo);
955 	db_show_callout_bucket(&timeout_proc);
956 	for (b = 0; b < nitems(timeout_wheel); b++)
957 		db_show_callout_bucket(&timeout_wheel[b]);
958 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
959 		db_show_callout_bucket(&timeout_wheel_kc[b]);
960 }
961 #endif
962