xref: /openbsd-src/sys/kern/kern_timeout.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 /*	$OpenBSD: kern_timeout.c,v 1.96 2023/10/12 15:32:38 cheloha Exp $	*/
2 /*
3  * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org>
4  * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
17  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
19  * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kthread.h>
31 #include <sys/proc.h>
32 #include <sys/timeout.h>
33 #include <sys/mutex.h>
34 #include <sys/kernel.h>
35 #include <sys/queue.h>			/* _Q_INVALIDATE */
36 #include <sys/sysctl.h>
37 #include <sys/witness.h>
38 
39 #ifdef DDB
40 #include <machine/db_machdep.h>
41 #include <ddb/db_interface.h>
42 #include <ddb/db_sym.h>
43 #include <ddb/db_output.h>
44 #endif
45 
46 #include "kcov.h"
47 #if NKCOV > 0
48 #include <sys/kcov.h>
49 #endif
50 
51 /*
52  * Locks used to protect global variables in this file:
53  *
54  *	I	immutable after initialization
55  *	T	timeout_mutex
56  */
57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH);
58 
59 void *softclock_si;			/* [I] softclock() interrupt handle */
60 struct timeoutstat tostat;		/* [T] statistics and totals */
61 
62 /*
63  * Timeouts are kept in a hierarchical timing wheel. The to_time is the value
64  * of the global variable "ticks" when the timeout should be called. There are
65  * four levels with 256 buckets each.
66  */
67 #define WHEELCOUNT 4
68 #define WHEELSIZE 256
69 #define WHEELMASK 255
70 #define WHEELBITS 8
71 #define BUCKETS (WHEELCOUNT * WHEELSIZE)
72 
73 struct circq timeout_wheel[BUCKETS];	/* [T] Tick-based timeouts */
74 struct circq timeout_wheel_kc[BUCKETS];	/* [T] Clock-based timeouts */
75 struct circq timeout_new;		/* [T] New, unscheduled timeouts */
76 struct circq timeout_todo;		/* [T] Due or needs rescheduling */
77 struct circq timeout_proc;		/* [T] Due + needs process context */
78 #ifdef MULTIPROCESSOR
79 struct circq timeout_proc_mp;		/* [T] Process ctx + no kernel lock */
80 #endif
81 
82 time_t timeout_level_width[WHEELCOUNT];	/* [I] Wheel level width (seconds) */
83 struct timespec tick_ts;		/* [I] Length of a tick (1/hz secs) */
84 
85 struct kclock {
86 	struct timespec kc_lastscan;	/* [T] Clock time at last wheel scan */
87 	struct timespec kc_late;	/* [T] Late if due prior */
88 	struct timespec kc_offset;	/* [T] Offset from primary kclock */
89 } timeout_kclock[KCLOCK_MAX];
90 
91 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)
92 
93 #define BUCKET(rel, abs)						\
94     (timeout_wheel[							\
95 	((rel) <= (1 << (2*WHEELBITS)))					\
96 	    ? ((rel) <= (1 << WHEELBITS))				\
97 		? MASKWHEEL(0, (abs))					\
98 		: MASKWHEEL(1, (abs)) + WHEELSIZE			\
99 	    : ((rel) <= (1 << (3*WHEELBITS)))				\
100 		? MASKWHEEL(2, (abs)) + 2*WHEELSIZE			\
101 		: MASKWHEEL(3, (abs)) + 3*WHEELSIZE])
102 
103 #define MOVEBUCKET(wheel, time)						\
104     CIRCQ_CONCAT(&timeout_todo,						\
105         &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE])
106 
107 /*
108  * Circular queue definitions.
109  */
110 
111 #define CIRCQ_INIT(elem) do {			\
112 	(elem)->next = (elem);			\
113 	(elem)->prev = (elem);			\
114 } while (0)
115 
116 #define CIRCQ_INSERT_TAIL(list, elem) do {	\
117 	(elem)->prev = (list)->prev;		\
118 	(elem)->next = (list);			\
119 	(list)->prev->next = (elem);		\
120 	(list)->prev = (elem);			\
121 	tostat.tos_pending++;			\
122 } while (0)
123 
124 #define CIRCQ_CONCAT(fst, snd) do {		\
125 	if (!CIRCQ_EMPTY(snd)) {		\
126 		(fst)->prev->next = (snd)->next;\
127 		(snd)->next->prev = (fst)->prev;\
128 		(snd)->prev->next = (fst);      \
129 		(fst)->prev = (snd)->prev;      \
130 		CIRCQ_INIT(snd);		\
131 	}					\
132 } while (0)
133 
134 #define CIRCQ_REMOVE(elem) do {			\
135 	(elem)->next->prev = (elem)->prev;      \
136 	(elem)->prev->next = (elem)->next;      \
137 	_Q_INVALIDATE((elem)->prev);		\
138 	_Q_INVALIDATE((elem)->next);		\
139 	tostat.tos_pending--;			\
140 } while (0)
141 
142 #define CIRCQ_FIRST(elem) ((elem)->next)
143 
144 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem))
145 
146 #define CIRCQ_FOREACH(elem, list)		\
147 	for ((elem) = CIRCQ_FIRST(list);	\
148 	    (elem) != (list);			\
149 	    (elem) = CIRCQ_FIRST(elem))
150 
151 #ifdef WITNESS
152 struct lock_object timeout_sleeplock_obj = {
153 	.lo_name = "timeout",
154 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
155 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
156 };
157 struct lock_object timeout_spinlock_obj = {
158 	.lo_name = "timeout",
159 	.lo_flags = LO_WITNESS | LO_INITIALIZED |
160 	    (LO_CLASS_MUTEX << LO_CLASSSHIFT)
161 };
162 struct lock_type timeout_sleeplock_type = {
163 	.lt_name = "timeout"
164 };
165 struct lock_type timeout_spinlock_type = {
166 	.lt_name = "timeout"
167 };
168 #define TIMEOUT_LOCK_OBJ(needsproc) \
169 	((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj)
170 #endif
171 
172 void softclock(void *);
173 void softclock_create_thread(void *);
174 void softclock_process_kclock_timeout(struct timeout *, int);
175 void softclock_process_tick_timeout(struct timeout *, int);
176 void softclock_thread(void *);
177 #ifdef MULTIPROCESSOR
178 void softclock_thread_mp(void *);
179 #endif
180 void timeout_barrier_timeout(void *);
181 uint32_t timeout_bucket(const struct timeout *);
182 uint32_t timeout_maskwheel(uint32_t, const struct timespec *);
183 void timeout_run(struct timeout *);
184 
185 /*
186  * The first thing in a struct timeout is its struct circq, so we
187  * can get back from a pointer to the latter to a pointer to the
188  * whole timeout with just a cast.
189  */
190 static inline struct timeout *
191 timeout_from_circq(struct circq *p)
192 {
193 	return ((struct timeout *)(p));
194 }
195 
196 static inline void
197 timeout_sync_order(int needsproc)
198 {
199 	WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL);
200 }
201 
202 static inline void
203 timeout_sync_enter(int needsproc)
204 {
205 	timeout_sync_order(needsproc);
206 	WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
207 }
208 
209 static inline void
210 timeout_sync_leave(int needsproc)
211 {
212 	WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
213 }
214 
215 /*
216  * Some of the "math" in here is a bit tricky.
217  *
218  * We have to beware of wrapping ints.
219  * We use the fact that any element added to the queue must be added with a
220  * positive time. That means that any element `to' on the queue cannot be
221  * scheduled to timeout further in time than INT_MAX, but to->to_time can
222  * be positive or negative so comparing it with anything is dangerous.
223  * The only way we can use the to->to_time value in any predictable way
224  * is when we calculate how far in the future `to' will timeout -
225  * "to->to_time - ticks". The result will always be positive for future
226  * timeouts and 0 or negative for due timeouts.
227  */
228 
229 void
230 timeout_startup(void)
231 {
232 	int b, level;
233 
234 	CIRCQ_INIT(&timeout_new);
235 	CIRCQ_INIT(&timeout_todo);
236 	CIRCQ_INIT(&timeout_proc);
237 #ifdef MULTIPROCESSOR
238 	CIRCQ_INIT(&timeout_proc_mp);
239 #endif
240 	for (b = 0; b < nitems(timeout_wheel); b++)
241 		CIRCQ_INIT(&timeout_wheel[b]);
242 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
243 		CIRCQ_INIT(&timeout_wheel_kc[b]);
244 
245 	for (level = 0; level < nitems(timeout_level_width); level++)
246 		timeout_level_width[level] = 2 << (level * WHEELBITS);
247 	NSEC_TO_TIMESPEC(tick_nsec, &tick_ts);
248 }
249 
250 void
251 timeout_proc_init(void)
252 {
253 	softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL);
254 	if (softclock_si == NULL)
255 		panic("%s: unable to register softclock interrupt", __func__);
256 
257 	WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type);
258 	WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type);
259 
260 	kthread_create_deferred(softclock_create_thread, NULL);
261 }
262 
263 void
264 timeout_set(struct timeout *new, void (*fn)(void *), void *arg)
265 {
266 	timeout_set_flags(new, fn, arg, KCLOCK_NONE, 0);
267 }
268 
269 void
270 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int kclock,
271     int flags)
272 {
273 	KASSERT(!ISSET(flags, ~(TIMEOUT_PROC | TIMEOUT_MPSAFE)));
274 
275 	to->to_func = fn;
276 	to->to_arg = arg;
277 	to->to_kclock = kclock;
278 	to->to_flags = flags | TIMEOUT_INITIALIZED;
279 
280 	/* For now, only process context timeouts may be marked MP-safe. */
281 	if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
282 		KASSERT(ISSET(to->to_flags, TIMEOUT_PROC));
283 }
284 
285 void
286 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg)
287 {
288 	timeout_set_flags(new, fn, arg, KCLOCK_NONE, TIMEOUT_PROC);
289 }
290 
291 int
292 timeout_add(struct timeout *new, int to_ticks)
293 {
294 	int old_time;
295 	int ret = 1;
296 
297 	KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED));
298 	KASSERT(new->to_kclock == KCLOCK_NONE);
299 	KASSERT(to_ticks >= 0);
300 
301 	mtx_enter(&timeout_mutex);
302 
303 	/* Initialize the time here, it won't change. */
304 	old_time = new->to_time;
305 	new->to_time = to_ticks + ticks;
306 	CLR(new->to_flags, TIMEOUT_TRIGGERED);
307 
308 	/*
309 	 * If this timeout already is scheduled and now is moved
310 	 * earlier, reschedule it now. Otherwise leave it in place
311 	 * and let it be rescheduled later.
312 	 */
313 	if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) {
314 		if (new->to_time - ticks < old_time - ticks) {
315 			CIRCQ_REMOVE(&new->to_list);
316 			CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
317 		}
318 		tostat.tos_readded++;
319 		ret = 0;
320 	} else {
321 		SET(new->to_flags, TIMEOUT_ONQUEUE);
322 		CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
323 	}
324 #if NKCOV > 0
325 	if (!kcov_cold)
326 		new->to_process = curproc->p_p;
327 #endif
328 	tostat.tos_added++;
329 	mtx_leave(&timeout_mutex);
330 
331 	return ret;
332 }
333 
334 int
335 timeout_add_tv(struct timeout *to, const struct timeval *tv)
336 {
337 	uint64_t to_ticks;
338 
339 	to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick;
340 	if (to_ticks > INT_MAX)
341 		to_ticks = INT_MAX;
342 	if (to_ticks == 0 && tv->tv_usec > 0)
343 		to_ticks = 1;
344 
345 	return timeout_add(to, (int)to_ticks);
346 }
347 
348 int
349 timeout_add_sec(struct timeout *to, int secs)
350 {
351 	uint64_t to_ticks;
352 
353 	to_ticks = (uint64_t)hz * secs;
354 	if (to_ticks > INT_MAX)
355 		to_ticks = INT_MAX;
356 	if (to_ticks == 0)
357 		to_ticks = 1;
358 
359 	return timeout_add(to, (int)to_ticks);
360 }
361 
362 int
363 timeout_add_msec(struct timeout *to, int msecs)
364 {
365 	uint64_t to_ticks;
366 
367 	to_ticks = (uint64_t)msecs * 1000 / tick;
368 	if (to_ticks > INT_MAX)
369 		to_ticks = INT_MAX;
370 	if (to_ticks == 0 && msecs > 0)
371 		to_ticks = 1;
372 
373 	return timeout_add(to, (int)to_ticks);
374 }
375 
376 int
377 timeout_add_usec(struct timeout *to, int usecs)
378 {
379 	int to_ticks = usecs / tick;
380 
381 	if (to_ticks == 0 && usecs > 0)
382 		to_ticks = 1;
383 
384 	return timeout_add(to, to_ticks);
385 }
386 
387 int
388 timeout_add_nsec(struct timeout *to, int nsecs)
389 {
390 	int to_ticks = nsecs / (tick * 1000);
391 
392 	if (to_ticks == 0 && nsecs > 0)
393 		to_ticks = 1;
394 
395 	return timeout_add(to, to_ticks);
396 }
397 
398 int
399 timeout_abs_ts(struct timeout *to, const struct timespec *abstime)
400 {
401 	struct timespec old_abstime;
402 	int ret = 1;
403 
404 	mtx_enter(&timeout_mutex);
405 
406 	KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED));
407 	KASSERT(to->to_kclock != KCLOCK_NONE);
408 
409 	old_abstime = to->to_abstime;
410 	to->to_abstime = *abstime;
411 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
412 
413 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
414 		if (timespeccmp(abstime, &old_abstime, <)) {
415 			CIRCQ_REMOVE(&to->to_list);
416 			CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
417 		}
418 		tostat.tos_readded++;
419 		ret = 0;
420 	} else {
421 		SET(to->to_flags, TIMEOUT_ONQUEUE);
422 		CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
423 	}
424 #if NKCOV > 0
425 	if (!kcov_cold)
426 		to->to_process = curproc->p_p;
427 #endif
428 	tostat.tos_added++;
429 
430 	mtx_leave(&timeout_mutex);
431 
432 	return ret;
433 }
434 
435 int
436 timeout_del(struct timeout *to)
437 {
438 	int ret = 0;
439 
440 	mtx_enter(&timeout_mutex);
441 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
442 		CIRCQ_REMOVE(&to->to_list);
443 		CLR(to->to_flags, TIMEOUT_ONQUEUE);
444 		tostat.tos_cancelled++;
445 		ret = 1;
446 	}
447 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
448 	tostat.tos_deleted++;
449 	mtx_leave(&timeout_mutex);
450 
451 	return ret;
452 }
453 
454 int
455 timeout_del_barrier(struct timeout *to)
456 {
457 	int removed;
458 
459 	timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC));
460 
461 	removed = timeout_del(to);
462 	if (!removed)
463 		timeout_barrier(to);
464 
465 	return removed;
466 }
467 
468 void
469 timeout_barrier(struct timeout *to)
470 {
471 	struct timeout barrier;
472 	struct cond c;
473 	int flags;
474 
475 	flags = to->to_flags & (TIMEOUT_PROC | TIMEOUT_MPSAFE);
476 	timeout_sync_order(ISSET(flags, TIMEOUT_PROC));
477 
478 	timeout_set_flags(&barrier, timeout_barrier_timeout, &c, KCLOCK_NONE,
479 	    flags);
480 	barrier.to_process = curproc->p_p;
481 	cond_init(&c);
482 
483 	mtx_enter(&timeout_mutex);
484 
485 	barrier.to_time = ticks;
486 	SET(barrier.to_flags, TIMEOUT_ONQUEUE);
487 	if (ISSET(flags, TIMEOUT_PROC)) {
488 #ifdef MULTIPROCESSOR
489 		if (ISSET(flags, TIMEOUT_MPSAFE))
490 			CIRCQ_INSERT_TAIL(&timeout_proc_mp, &barrier.to_list);
491 		else
492 #endif
493 			CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list);
494 	} else
495 		CIRCQ_INSERT_TAIL(&timeout_todo, &barrier.to_list);
496 
497 	mtx_leave(&timeout_mutex);
498 
499 	if (ISSET(flags, TIMEOUT_PROC)) {
500 #ifdef MULTIPROCESSOR
501 		if (ISSET(flags, TIMEOUT_MPSAFE))
502 			wakeup_one(&timeout_proc_mp);
503 		else
504 #endif
505 			wakeup_one(&timeout_proc);
506 	} else
507 		softintr_schedule(softclock_si);
508 
509 	cond_wait(&c, "tmobar");
510 }
511 
512 void
513 timeout_barrier_timeout(void *arg)
514 {
515 	struct cond *c = arg;
516 
517 	cond_signal(c);
518 }
519 
520 uint32_t
521 timeout_bucket(const struct timeout *to)
522 {
523 	struct timespec diff, shifted_abstime;
524 	struct kclock *kc;
525 	uint32_t level;
526 
527 	KASSERT(to->to_kclock == KCLOCK_UPTIME);
528 	kc = &timeout_kclock[to->to_kclock];
529 
530 	KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <));
531 	timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff);
532 	for (level = 0; level < nitems(timeout_level_width) - 1; level++) {
533 		if (diff.tv_sec < timeout_level_width[level])
534 			break;
535 	}
536 	timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime);
537 	return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime);
538 }
539 
540 /*
541  * Hash the absolute time into a bucket on a given level of the wheel.
542  *
543  * The complete hash is 32 bits.  The upper 25 bits are seconds, the
544  * lower 7 bits are nanoseconds.  tv_nsec is a positive value less
545  * than one billion so we need to divide it to isolate the desired
546  * bits.  We can't just shift it.
547  *
548  * The level is used to isolate an 8-bit portion of the hash.  The
549  * resulting number indicates which bucket the absolute time belongs
550  * in on the given level of the wheel.
551  */
552 uint32_t
553 timeout_maskwheel(uint32_t level, const struct timespec *abstime)
554 {
555 	uint32_t hi, lo;
556 
557  	hi = abstime->tv_sec << 7;
558 	lo = abstime->tv_nsec / 7812500;
559 
560 	return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK;
561 }
562 
563 /*
564  * This is called from hardclock() on the primary CPU at the start of
565  * every tick.
566  */
567 void
568 timeout_hardclock_update(void)
569 {
570 	struct timespec elapsed, now;
571 	struct kclock *kc;
572 	struct timespec *lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan;
573 	int b, done, first, i, last, level, need_softclock = 1, off;
574 
575 	mtx_enter(&timeout_mutex);
576 
577 	MOVEBUCKET(0, ticks);
578 	if (MASKWHEEL(0, ticks) == 0) {
579 		MOVEBUCKET(1, ticks);
580 		if (MASKWHEEL(1, ticks) == 0) {
581 			MOVEBUCKET(2, ticks);
582 			if (MASKWHEEL(2, ticks) == 0)
583 				MOVEBUCKET(3, ticks);
584 		}
585 	}
586 
587 	/*
588 	 * Dump the buckets that expired while we were away.
589 	 *
590 	 * If the elapsed time has exceeded a level's limit then we need
591 	 * to dump every bucket in the level.  We have necessarily completed
592 	 * a lap of that level, too, so we need to process buckets in the
593 	 * next level.
594 	 *
595 	 * Otherwise we need to compare indices: if the index of the first
596 	 * expired bucket is greater than that of the last then we have
597 	 * completed a lap of the level and need to process buckets in the
598 	 * next level.
599 	 */
600 	nanouptime(&now);
601 	timespecsub(&now, lastscan, &elapsed);
602 	for (level = 0; level < nitems(timeout_level_width); level++) {
603 		first = timeout_maskwheel(level, lastscan);
604 		if (elapsed.tv_sec >= timeout_level_width[level]) {
605 			last = (first == 0) ? WHEELSIZE - 1 : first - 1;
606 			done = 0;
607 		} else {
608 			last = timeout_maskwheel(level, &now);
609 			done = first <= last;
610 		}
611 		off = level * WHEELSIZE;
612 		for (b = first;; b = (b + 1) % WHEELSIZE) {
613 			CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]);
614 			if (b == last)
615 				break;
616 		}
617 		if (done)
618 			break;
619 	}
620 
621 	/*
622 	 * Update the cached state for each kclock.
623 	 */
624 	for (i = 0; i < nitems(timeout_kclock); i++) {
625 		kc = &timeout_kclock[i];
626 		timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan);
627 		timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late);
628 	}
629 
630 	if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo))
631 		need_softclock = 0;
632 
633 	mtx_leave(&timeout_mutex);
634 
635 	if (need_softclock)
636 		softintr_schedule(softclock_si);
637 }
638 
639 void
640 timeout_run(struct timeout *to)
641 {
642 	void (*fn)(void *);
643 	void *arg;
644 	int needsproc;
645 
646 	MUTEX_ASSERT_LOCKED(&timeout_mutex);
647 
648 	CLR(to->to_flags, TIMEOUT_ONQUEUE);
649 	SET(to->to_flags, TIMEOUT_TRIGGERED);
650 
651 	fn = to->to_func;
652 	arg = to->to_arg;
653 	needsproc = ISSET(to->to_flags, TIMEOUT_PROC);
654 #if NKCOV > 0
655 	struct process *kcov_process = to->to_process;
656 #endif
657 
658 	mtx_leave(&timeout_mutex);
659 	timeout_sync_enter(needsproc);
660 #if NKCOV > 0
661 	kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process);
662 #endif
663 	fn(arg);
664 #if NKCOV > 0
665 	kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process);
666 #endif
667 	timeout_sync_leave(needsproc);
668 	mtx_enter(&timeout_mutex);
669 }
670 
671 void
672 softclock_process_kclock_timeout(struct timeout *to, int new)
673 {
674 	struct kclock *kc = &timeout_kclock[to->to_kclock];
675 
676 	if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) {
677 		tostat.tos_scheduled++;
678 		if (!new)
679 			tostat.tos_rescheduled++;
680 		CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)],
681 		    &to->to_list);
682 		return;
683 	}
684 	if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=))
685 		tostat.tos_late++;
686 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
687 #ifdef MULTIPROCESSOR
688 		if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
689 			CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list);
690 		else
691 #endif
692 			CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
693 		return;
694 	}
695 	timeout_run(to);
696 	tostat.tos_run_softclock++;
697 }
698 
699 void
700 softclock_process_tick_timeout(struct timeout *to, int new)
701 {
702 	int delta = to->to_time - ticks;
703 
704 	if (delta > 0) {
705 		tostat.tos_scheduled++;
706 		if (!new)
707 			tostat.tos_rescheduled++;
708 		CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list);
709 		return;
710 	}
711 	if (!new && delta < 0)
712 		tostat.tos_late++;
713 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
714 #ifdef MULTIPROCESSOR
715 		if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
716 			CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list);
717 		else
718 #endif
719 			CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
720 		return;
721 	}
722 	timeout_run(to);
723 	tostat.tos_run_softclock++;
724 }
725 
726 /*
727  * Timeouts are processed here instead of timeout_hardclock_update()
728  * to avoid doing any more work at IPL_CLOCK than absolutely necessary.
729  * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly
730  * so the system remains responsive even if there is a surge of timeouts.
731  */
732 void
733 softclock(void *arg)
734 {
735 	struct timeout *first_new, *to;
736 	int needsproc, new;
737 #ifdef MULTIPROCESSOR
738 	int need_proc_mp;
739 #endif
740 
741 	first_new = NULL;
742 	new = 0;
743 
744 	mtx_enter(&timeout_mutex);
745 	if (!CIRCQ_EMPTY(&timeout_new))
746 		first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new));
747 	CIRCQ_CONCAT(&timeout_todo, &timeout_new);
748 	while (!CIRCQ_EMPTY(&timeout_todo)) {
749 		to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo));
750 		CIRCQ_REMOVE(&to->to_list);
751 		if (to == first_new)
752 			new = 1;
753 		if (to->to_kclock != KCLOCK_NONE)
754 			softclock_process_kclock_timeout(to, new);
755 		else
756 			softclock_process_tick_timeout(to, new);
757 	}
758 	tostat.tos_softclocks++;
759 	needsproc = !CIRCQ_EMPTY(&timeout_proc);
760 #ifdef MULTIPROCESSOR
761 	need_proc_mp = !CIRCQ_EMPTY(&timeout_proc_mp);
762 #endif
763 	mtx_leave(&timeout_mutex);
764 
765 	if (needsproc)
766 		wakeup(&timeout_proc);
767 #ifdef MULTIPROCESSOR
768 	if (need_proc_mp)
769 		wakeup(&timeout_proc_mp);
770 #endif
771 }
772 
773 void
774 softclock_create_thread(void *arg)
775 {
776 	if (kthread_create(softclock_thread, NULL, NULL, "softclock"))
777 		panic("fork softclock");
778 #ifdef MULTIPROCESSOR
779 	if (kthread_create(softclock_thread_mp, NULL, NULL, "softclockmp"))
780 		panic("kthread_create softclock_thread_mp");
781 #endif
782 }
783 
784 void
785 softclock_thread(void *arg)
786 {
787 	CPU_INFO_ITERATOR cii;
788 	struct cpu_info *ci;
789 	struct timeout *to;
790 	int s;
791 
792 	KERNEL_ASSERT_LOCKED();
793 
794 	/* Be conservative for the moment */
795 	CPU_INFO_FOREACH(cii, ci) {
796 		if (CPU_IS_PRIMARY(ci))
797 			break;
798 	}
799 	KASSERT(ci != NULL);
800 	sched_peg_curproc(ci);
801 
802 	s = splsoftclock();
803 	mtx_enter(&timeout_mutex);
804 	for (;;) {
805 		while (!CIRCQ_EMPTY(&timeout_proc)) {
806 			to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
807 			CIRCQ_REMOVE(&to->to_list);
808 			timeout_run(to);
809 			tostat.tos_run_thread++;
810 		}
811 		tostat.tos_thread_wakeups++;
812 		msleep_nsec(&timeout_proc, &timeout_mutex, PSWP, "tmoslp",
813 		    INFSLP);
814 	}
815 	splx(s);
816 }
817 
818 #ifdef MULTIPROCESSOR
819 void
820 softclock_thread_mp(void *arg)
821 {
822 	struct timeout *to;
823 
824 	KERNEL_ASSERT_LOCKED();
825 	KERNEL_UNLOCK();
826 
827 	mtx_enter(&timeout_mutex);
828 	for (;;) {
829 		while (!CIRCQ_EMPTY(&timeout_proc_mp)) {
830 			to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc_mp));
831 			CIRCQ_REMOVE(&to->to_list);
832 			timeout_run(to);
833 			tostat.tos_run_thread++;
834 		}
835 		tostat.tos_thread_wakeups++;
836 		msleep_nsec(&timeout_proc_mp, &timeout_mutex, PSWP, "tmoslp",
837 		    INFSLP);
838 	}
839 }
840 #endif /* MULTIPROCESSOR */
841 
842 #ifndef SMALL_KERNEL
843 void
844 timeout_adjust_ticks(int adj)
845 {
846 	struct timeout *to;
847 	struct circq *p;
848 	int new_ticks, b;
849 
850 	/* adjusting the monotonic clock backwards would be a Bad Thing */
851 	if (adj <= 0)
852 		return;
853 
854 	mtx_enter(&timeout_mutex);
855 	new_ticks = ticks + adj;
856 	for (b = 0; b < nitems(timeout_wheel); b++) {
857 		p = CIRCQ_FIRST(&timeout_wheel[b]);
858 		while (p != &timeout_wheel[b]) {
859 			to = timeout_from_circq(p);
860 			p = CIRCQ_FIRST(p);
861 
862 			/* when moving a timeout forward need to reinsert it */
863 			if (to->to_time - ticks < adj)
864 				to->to_time = new_ticks;
865 			CIRCQ_REMOVE(&to->to_list);
866 			CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list);
867 		}
868 	}
869 	ticks = new_ticks;
870 	mtx_leave(&timeout_mutex);
871 }
872 #endif
873 
874 int
875 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
876 {
877 	struct timeoutstat status;
878 
879 	mtx_enter(&timeout_mutex);
880 	memcpy(&status, &tostat, sizeof(status));
881 	mtx_leave(&timeout_mutex);
882 
883 	return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status));
884 }
885 
886 #ifdef DDB
887 const char *db_kclock(int);
888 void db_show_callout_bucket(struct circq *);
889 void db_show_timeout(struct timeout *, struct circq *);
890 const char *db_timespec(const struct timespec *);
891 
892 const char *
893 db_kclock(int kclock)
894 {
895 	switch (kclock) {
896 	case KCLOCK_UPTIME:
897 		return "uptime";
898 	default:
899 		return "invalid";
900 	}
901 }
902 
903 const char *
904 db_timespec(const struct timespec *ts)
905 {
906 	static char buf[32];
907 	struct timespec tmp, zero;
908 
909 	if (ts->tv_sec >= 0) {
910 		snprintf(buf, sizeof(buf), "%lld.%09ld",
911 		    ts->tv_sec, ts->tv_nsec);
912 		return buf;
913 	}
914 
915 	timespecclear(&zero);
916 	timespecsub(&zero, ts, &tmp);
917 	snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec);
918 	return buf;
919 }
920 
921 void
922 db_show_callout_bucket(struct circq *bucket)
923 {
924 	struct circq *p;
925 
926 	CIRCQ_FOREACH(p, bucket)
927 		db_show_timeout(timeout_from_circq(p), bucket);
928 }
929 
930 void
931 db_show_timeout(struct timeout *to, struct circq *bucket)
932 {
933 	struct timespec remaining;
934 	struct kclock *kc;
935 	char buf[8];
936 	db_expr_t offset;
937 	struct circq *wheel;
938 	char *name, *where;
939 	int width = sizeof(long) * 2;
940 
941 	db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset);
942 	name = name ? name : "?";
943 	if (bucket == &timeout_new)
944 		where = "new";
945 	else if (bucket == &timeout_todo)
946 		where = "softint";
947 	else if (bucket == &timeout_proc)
948 		where = "thread";
949 #ifdef MULTIPROCESSOR
950 	else if (bucket == &timeout_proc_mp)
951 		where = "thread-mp";
952 #endif
953 	else {
954 		if (to->to_kclock != KCLOCK_NONE)
955 			wheel = timeout_wheel_kc;
956 		else
957 			wheel = timeout_wheel;
958 		snprintf(buf, sizeof(buf), "%3ld/%1ld",
959 		    (bucket - wheel) % WHEELSIZE,
960 		    (bucket - wheel) / WHEELSIZE);
961 		where = buf;
962 	}
963 	if (to->to_kclock != KCLOCK_NONE) {
964 		kc = &timeout_kclock[to->to_kclock];
965 		timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining);
966 		db_printf("%20s  %8s  %9s  0x%0*lx  %s\n",
967 		    db_timespec(&remaining), db_kclock(to->to_kclock), where,
968 		    width, (ulong)to->to_arg, name);
969 	} else {
970 		db_printf("%20d  %8s  %9s  0x%0*lx  %s\n",
971 		    to->to_time - ticks, "ticks", where,
972 		    width, (ulong)to->to_arg, name);
973 	}
974 }
975 
976 void
977 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif)
978 {
979 	struct kclock *kc;
980 	int width = sizeof(long) * 2 + 2;
981 	int b, i;
982 
983 	db_printf("%20s  %8s\n", "lastscan", "clock");
984 	db_printf("%20d  %8s\n", ticks, "ticks");
985 	for (i = 0; i < nitems(timeout_kclock); i++) {
986 		kc = &timeout_kclock[i];
987 		db_printf("%20s  %8s\n",
988 		    db_timespec(&kc->kc_lastscan), db_kclock(i));
989 	}
990 	db_printf("\n");
991 	db_printf("%20s  %8s  %9s  %*s  %s\n",
992 	    "remaining", "clock", "wheel", width, "arg", "func");
993 	db_show_callout_bucket(&timeout_new);
994 	db_show_callout_bucket(&timeout_todo);
995 	db_show_callout_bucket(&timeout_proc);
996 #ifdef MULTIPROCESSOR
997 	db_show_callout_bucket(&timeout_proc_mp);
998 #endif
999 	for (b = 0; b < nitems(timeout_wheel); b++)
1000 		db_show_callout_bucket(&timeout_wheel[b]);
1001 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
1002 		db_show_callout_bucket(&timeout_wheel_kc[b]);
1003 }
1004 #endif
1005