xref: /openbsd-src/sys/kern/kern_timeout.c (revision 0b5493cb0c742202428c9cd6ec8d6e309804d203)
1 /*	$OpenBSD: kern_timeout.c,v 1.93 2023/07/06 23:24:37 cheloha Exp $	*/
2 /*
3  * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org>
4  * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
17  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
19  * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kthread.h>
31 #include <sys/proc.h>
32 #include <sys/timeout.h>
33 #include <sys/mutex.h>
34 #include <sys/kernel.h>
35 #include <sys/queue.h>			/* _Q_INVALIDATE */
36 #include <sys/sysctl.h>
37 #include <sys/witness.h>
38 
39 #ifdef DDB
40 #include <machine/db_machdep.h>
41 #include <ddb/db_interface.h>
42 #include <ddb/db_sym.h>
43 #include <ddb/db_output.h>
44 #endif
45 
46 #include "kcov.h"
47 #if NKCOV > 0
48 #include <sys/kcov.h>
49 #endif
50 
51 /*
52  * Locks used to protect global variables in this file:
53  *
54  *	I	immutable after initialization
55  *	T	timeout_mutex
56  */
57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH);
58 
59 void *softclock_si;			/* [I] softclock() interrupt handle */
60 struct timeoutstat tostat;		/* [T] statistics and totals */
61 
62 /*
63  * Timeouts are kept in a hierarchical timing wheel. The to_time is the value
64  * of the global variable "ticks" when the timeout should be called. There are
65  * four levels with 256 buckets each.
66  */
67 #define WHEELCOUNT 4
68 #define WHEELSIZE 256
69 #define WHEELMASK 255
70 #define WHEELBITS 8
71 #define BUCKETS (WHEELCOUNT * WHEELSIZE)
72 
73 struct circq timeout_wheel[BUCKETS];	/* [T] Tick-based timeouts */
74 struct circq timeout_wheel_kc[BUCKETS];	/* [T] Clock-based timeouts */
75 struct circq timeout_new;		/* [T] New, unscheduled timeouts */
76 struct circq timeout_todo;		/* [T] Due or needs rescheduling */
77 struct circq timeout_proc;		/* [T] Due + needs process context */
78 
79 time_t timeout_level_width[WHEELCOUNT];	/* [I] Wheel level width (seconds) */
80 struct timespec tick_ts;		/* [I] Length of a tick (1/hz secs) */
81 
82 struct kclock {
83 	struct timespec kc_lastscan;	/* [T] Clock time at last wheel scan */
84 	struct timespec kc_late;	/* [T] Late if due prior */
85 	struct timespec kc_offset;	/* [T] Offset from primary kclock */
86 } timeout_kclock[KCLOCK_MAX];
87 
88 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)
89 
90 #define BUCKET(rel, abs)						\
91     (timeout_wheel[							\
92 	((rel) <= (1 << (2*WHEELBITS)))					\
93 	    ? ((rel) <= (1 << WHEELBITS))				\
94 		? MASKWHEEL(0, (abs))					\
95 		: MASKWHEEL(1, (abs)) + WHEELSIZE			\
96 	    : ((rel) <= (1 << (3*WHEELBITS)))				\
97 		? MASKWHEEL(2, (abs)) + 2*WHEELSIZE			\
98 		: MASKWHEEL(3, (abs)) + 3*WHEELSIZE])
99 
100 #define MOVEBUCKET(wheel, time)						\
101     CIRCQ_CONCAT(&timeout_todo,						\
102         &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE])
103 
104 /*
105  * Circular queue definitions.
106  */
107 
108 #define CIRCQ_INIT(elem) do {			\
109 	(elem)->next = (elem);			\
110 	(elem)->prev = (elem);			\
111 } while (0)
112 
113 #define CIRCQ_INSERT_TAIL(list, elem) do {	\
114 	(elem)->prev = (list)->prev;		\
115 	(elem)->next = (list);			\
116 	(list)->prev->next = (elem);		\
117 	(list)->prev = (elem);			\
118 	tostat.tos_pending++;			\
119 } while (0)
120 
121 #define CIRCQ_CONCAT(fst, snd) do {		\
122 	if (!CIRCQ_EMPTY(snd)) {		\
123 		(fst)->prev->next = (snd)->next;\
124 		(snd)->next->prev = (fst)->prev;\
125 		(snd)->prev->next = (fst);      \
126 		(fst)->prev = (snd)->prev;      \
127 		CIRCQ_INIT(snd);		\
128 	}					\
129 } while (0)
130 
131 #define CIRCQ_REMOVE(elem) do {			\
132 	(elem)->next->prev = (elem)->prev;      \
133 	(elem)->prev->next = (elem)->next;      \
134 	_Q_INVALIDATE((elem)->prev);		\
135 	_Q_INVALIDATE((elem)->next);		\
136 	tostat.tos_pending--;			\
137 } while (0)
138 
139 #define CIRCQ_FIRST(elem) ((elem)->next)
140 
141 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem))
142 
143 #define CIRCQ_FOREACH(elem, list)		\
144 	for ((elem) = CIRCQ_FIRST(list);	\
145 	    (elem) != (list);			\
146 	    (elem) = CIRCQ_FIRST(elem))
147 
148 #ifdef WITNESS
149 struct lock_object timeout_sleeplock_obj = {
150 	.lo_name = "timeout",
151 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
152 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
153 };
154 struct lock_object timeout_spinlock_obj = {
155 	.lo_name = "timeout",
156 	.lo_flags = LO_WITNESS | LO_INITIALIZED |
157 	    (LO_CLASS_MUTEX << LO_CLASSSHIFT)
158 };
159 struct lock_type timeout_sleeplock_type = {
160 	.lt_name = "timeout"
161 };
162 struct lock_type timeout_spinlock_type = {
163 	.lt_name = "timeout"
164 };
165 #define TIMEOUT_LOCK_OBJ(needsproc) \
166 	((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj)
167 #endif
168 
169 void softclock(void *);
170 void softclock_create_thread(void *);
171 void softclock_process_kclock_timeout(struct timeout *, int);
172 void softclock_process_tick_timeout(struct timeout *, int);
173 void softclock_thread(void *);
174 void timeout_barrier_timeout(void *);
175 uint32_t timeout_bucket(const struct timeout *);
176 uint32_t timeout_maskwheel(uint32_t, const struct timespec *);
177 void timeout_run(struct timeout *);
178 
179 /*
180  * The first thing in a struct timeout is its struct circq, so we
181  * can get back from a pointer to the latter to a pointer to the
182  * whole timeout with just a cast.
183  */
184 static inline struct timeout *
185 timeout_from_circq(struct circq *p)
186 {
187 	return ((struct timeout *)(p));
188 }
189 
190 static inline void
191 timeout_sync_order(int needsproc)
192 {
193 	WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL);
194 }
195 
196 static inline void
197 timeout_sync_enter(int needsproc)
198 {
199 	timeout_sync_order(needsproc);
200 	WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
201 }
202 
203 static inline void
204 timeout_sync_leave(int needsproc)
205 {
206 	WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
207 }
208 
209 /*
210  * Some of the "math" in here is a bit tricky.
211  *
212  * We have to beware of wrapping ints.
213  * We use the fact that any element added to the queue must be added with a
214  * positive time. That means that any element `to' on the queue cannot be
215  * scheduled to timeout further in time than INT_MAX, but to->to_time can
216  * be positive or negative so comparing it with anything is dangerous.
217  * The only way we can use the to->to_time value in any predictable way
218  * is when we calculate how far in the future `to' will timeout -
219  * "to->to_time - ticks". The result will always be positive for future
220  * timeouts and 0 or negative for due timeouts.
221  */
222 
223 void
224 timeout_startup(void)
225 {
226 	int b, level;
227 
228 	CIRCQ_INIT(&timeout_new);
229 	CIRCQ_INIT(&timeout_todo);
230 	CIRCQ_INIT(&timeout_proc);
231 	for (b = 0; b < nitems(timeout_wheel); b++)
232 		CIRCQ_INIT(&timeout_wheel[b]);
233 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
234 		CIRCQ_INIT(&timeout_wheel_kc[b]);
235 
236 	for (level = 0; level < nitems(timeout_level_width); level++)
237 		timeout_level_width[level] = 2 << (level * WHEELBITS);
238 	NSEC_TO_TIMESPEC(tick_nsec, &tick_ts);
239 }
240 
241 void
242 timeout_proc_init(void)
243 {
244 	softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL);
245 	if (softclock_si == NULL)
246 		panic("%s: unable to register softclock interrupt", __func__);
247 
248 	WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type);
249 	WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type);
250 
251 	kthread_create_deferred(softclock_create_thread, NULL);
252 }
253 
254 void
255 timeout_set(struct timeout *new, void (*fn)(void *), void *arg)
256 {
257 	timeout_set_flags(new, fn, arg, KCLOCK_NONE, 0);
258 }
259 
260 void
261 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int kclock,
262     int flags)
263 {
264 	to->to_func = fn;
265 	to->to_arg = arg;
266 	to->to_kclock = kclock;
267 	to->to_flags = flags | TIMEOUT_INITIALIZED;
268 }
269 
270 void
271 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg)
272 {
273 	timeout_set_flags(new, fn, arg, KCLOCK_NONE, TIMEOUT_PROC);
274 }
275 
276 int
277 timeout_add(struct timeout *new, int to_ticks)
278 {
279 	int old_time;
280 	int ret = 1;
281 
282 	KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED));
283 	KASSERT(new->to_kclock == KCLOCK_NONE);
284 	KASSERT(to_ticks >= 0);
285 
286 	mtx_enter(&timeout_mutex);
287 
288 	/* Initialize the time here, it won't change. */
289 	old_time = new->to_time;
290 	new->to_time = to_ticks + ticks;
291 	CLR(new->to_flags, TIMEOUT_TRIGGERED);
292 
293 	/*
294 	 * If this timeout already is scheduled and now is moved
295 	 * earlier, reschedule it now. Otherwise leave it in place
296 	 * and let it be rescheduled later.
297 	 */
298 	if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) {
299 		if (new->to_time - ticks < old_time - ticks) {
300 			CIRCQ_REMOVE(&new->to_list);
301 			CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
302 		}
303 		tostat.tos_readded++;
304 		ret = 0;
305 	} else {
306 		SET(new->to_flags, TIMEOUT_ONQUEUE);
307 		CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
308 	}
309 #if NKCOV > 0
310 	new->to_process = curproc->p_p;
311 #endif
312 	tostat.tos_added++;
313 	mtx_leave(&timeout_mutex);
314 
315 	return ret;
316 }
317 
318 int
319 timeout_add_tv(struct timeout *to, const struct timeval *tv)
320 {
321 	uint64_t to_ticks;
322 
323 	to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick;
324 	if (to_ticks > INT_MAX)
325 		to_ticks = INT_MAX;
326 	if (to_ticks == 0 && tv->tv_usec > 0)
327 		to_ticks = 1;
328 
329 	return timeout_add(to, (int)to_ticks);
330 }
331 
332 int
333 timeout_add_sec(struct timeout *to, int secs)
334 {
335 	uint64_t to_ticks;
336 
337 	to_ticks = (uint64_t)hz * secs;
338 	if (to_ticks > INT_MAX)
339 		to_ticks = INT_MAX;
340 	if (to_ticks == 0)
341 		to_ticks = 1;
342 
343 	return timeout_add(to, (int)to_ticks);
344 }
345 
346 int
347 timeout_add_msec(struct timeout *to, int msecs)
348 {
349 	uint64_t to_ticks;
350 
351 	to_ticks = (uint64_t)msecs * 1000 / tick;
352 	if (to_ticks > INT_MAX)
353 		to_ticks = INT_MAX;
354 	if (to_ticks == 0 && msecs > 0)
355 		to_ticks = 1;
356 
357 	return timeout_add(to, (int)to_ticks);
358 }
359 
360 int
361 timeout_add_usec(struct timeout *to, int usecs)
362 {
363 	int to_ticks = usecs / tick;
364 
365 	if (to_ticks == 0 && usecs > 0)
366 		to_ticks = 1;
367 
368 	return timeout_add(to, to_ticks);
369 }
370 
371 int
372 timeout_add_nsec(struct timeout *to, int nsecs)
373 {
374 	int to_ticks = nsecs / (tick * 1000);
375 
376 	if (to_ticks == 0 && nsecs > 0)
377 		to_ticks = 1;
378 
379 	return timeout_add(to, to_ticks);
380 }
381 
382 int
383 timeout_abs_ts(struct timeout *to, const struct timespec *abstime)
384 {
385 	struct timespec old_abstime;
386 	int ret = 1;
387 
388 	mtx_enter(&timeout_mutex);
389 
390 	KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED));
391 	KASSERT(to->to_kclock != KCLOCK_NONE);
392 
393 	old_abstime = to->to_abstime;
394 	to->to_abstime = *abstime;
395 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
396 
397 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
398 		if (timespeccmp(abstime, &old_abstime, <)) {
399 			CIRCQ_REMOVE(&to->to_list);
400 			CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
401 		}
402 		tostat.tos_readded++;
403 		ret = 0;
404 	} else {
405 		SET(to->to_flags, TIMEOUT_ONQUEUE);
406 		CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
407 	}
408 #if NKCOV > 0
409 	to->to_process = curproc->p_p;
410 #endif
411 	tostat.tos_added++;
412 
413 	mtx_leave(&timeout_mutex);
414 
415 	return ret;
416 }
417 
418 int
419 timeout_del(struct timeout *to)
420 {
421 	int ret = 0;
422 
423 	mtx_enter(&timeout_mutex);
424 	if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
425 		CIRCQ_REMOVE(&to->to_list);
426 		CLR(to->to_flags, TIMEOUT_ONQUEUE);
427 		tostat.tos_cancelled++;
428 		ret = 1;
429 	}
430 	CLR(to->to_flags, TIMEOUT_TRIGGERED);
431 	tostat.tos_deleted++;
432 	mtx_leave(&timeout_mutex);
433 
434 	return ret;
435 }
436 
437 int
438 timeout_del_barrier(struct timeout *to)
439 {
440 	int removed;
441 
442 	timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC));
443 
444 	removed = timeout_del(to);
445 	if (!removed)
446 		timeout_barrier(to);
447 
448 	return removed;
449 }
450 
451 void
452 timeout_barrier(struct timeout *to)
453 {
454 	struct timeout barrier;
455 	struct cond c;
456 	int procflag;
457 
458 	procflag = (to->to_flags & TIMEOUT_PROC);
459 	timeout_sync_order(procflag);
460 
461 	timeout_set_flags(&barrier, timeout_barrier_timeout, &c, KCLOCK_NONE,
462 	    procflag);
463 	barrier.to_process = curproc->p_p;
464 	cond_init(&c);
465 
466 	mtx_enter(&timeout_mutex);
467 
468 	barrier.to_time = ticks;
469 	SET(barrier.to_flags, TIMEOUT_ONQUEUE);
470 	if (procflag)
471 		CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list);
472 	else
473 		CIRCQ_INSERT_TAIL(&timeout_todo, &barrier.to_list);
474 
475 	mtx_leave(&timeout_mutex);
476 
477 	if (procflag)
478 		wakeup_one(&timeout_proc);
479 	else
480 		softintr_schedule(softclock_si);
481 
482 	cond_wait(&c, "tmobar");
483 }
484 
485 void
486 timeout_barrier_timeout(void *arg)
487 {
488 	struct cond *c = arg;
489 
490 	cond_signal(c);
491 }
492 
493 uint32_t
494 timeout_bucket(const struct timeout *to)
495 {
496 	struct timespec diff, shifted_abstime;
497 	struct kclock *kc;
498 	uint32_t level;
499 
500 	KASSERT(to->to_kclock == KCLOCK_UPTIME);
501 	kc = &timeout_kclock[to->to_kclock];
502 
503 	KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <));
504 	timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff);
505 	for (level = 0; level < nitems(timeout_level_width) - 1; level++) {
506 		if (diff.tv_sec < timeout_level_width[level])
507 			break;
508 	}
509 	timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime);
510 	return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime);
511 }
512 
513 /*
514  * Hash the absolute time into a bucket on a given level of the wheel.
515  *
516  * The complete hash is 32 bits.  The upper 25 bits are seconds, the
517  * lower 7 bits are nanoseconds.  tv_nsec is a positive value less
518  * than one billion so we need to divide it to isolate the desired
519  * bits.  We can't just shift it.
520  *
521  * The level is used to isolate an 8-bit portion of the hash.  The
522  * resulting number indicates which bucket the absolute time belongs
523  * in on the given level of the wheel.
524  */
525 uint32_t
526 timeout_maskwheel(uint32_t level, const struct timespec *abstime)
527 {
528 	uint32_t hi, lo;
529 
530  	hi = abstime->tv_sec << 7;
531 	lo = abstime->tv_nsec / 7812500;
532 
533 	return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK;
534 }
535 
536 /*
537  * This is called from hardclock() on the primary CPU at the start of
538  * every tick.
539  */
540 void
541 timeout_hardclock_update(void)
542 {
543 	struct timespec elapsed, now;
544 	struct kclock *kc;
545 	struct timespec *lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan;
546 	int b, done, first, i, last, level, need_softclock = 1, off;
547 
548 	mtx_enter(&timeout_mutex);
549 
550 	MOVEBUCKET(0, ticks);
551 	if (MASKWHEEL(0, ticks) == 0) {
552 		MOVEBUCKET(1, ticks);
553 		if (MASKWHEEL(1, ticks) == 0) {
554 			MOVEBUCKET(2, ticks);
555 			if (MASKWHEEL(2, ticks) == 0)
556 				MOVEBUCKET(3, ticks);
557 		}
558 	}
559 
560 	/*
561 	 * Dump the buckets that expired while we were away.
562 	 *
563 	 * If the elapsed time has exceeded a level's limit then we need
564 	 * to dump every bucket in the level.  We have necessarily completed
565 	 * a lap of that level, too, so we need to process buckets in the
566 	 * next level.
567 	 *
568 	 * Otherwise we need to compare indices: if the index of the first
569 	 * expired bucket is greater than that of the last then we have
570 	 * completed a lap of the level and need to process buckets in the
571 	 * next level.
572 	 */
573 	nanouptime(&now);
574 	timespecsub(&now, lastscan, &elapsed);
575 	for (level = 0; level < nitems(timeout_level_width); level++) {
576 		first = timeout_maskwheel(level, lastscan);
577 		if (elapsed.tv_sec >= timeout_level_width[level]) {
578 			last = (first == 0) ? WHEELSIZE - 1 : first - 1;
579 			done = 0;
580 		} else {
581 			last = timeout_maskwheel(level, &now);
582 			done = first <= last;
583 		}
584 		off = level * WHEELSIZE;
585 		for (b = first;; b = (b + 1) % WHEELSIZE) {
586 			CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]);
587 			if (b == last)
588 				break;
589 		}
590 		if (done)
591 			break;
592 	}
593 
594 	/*
595 	 * Update the cached state for each kclock.
596 	 */
597 	for (i = 0; i < nitems(timeout_kclock); i++) {
598 		kc = &timeout_kclock[i];
599 		timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan);
600 		timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late);
601 	}
602 
603 	if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo))
604 		need_softclock = 0;
605 
606 	mtx_leave(&timeout_mutex);
607 
608 	if (need_softclock)
609 		softintr_schedule(softclock_si);
610 }
611 
612 void
613 timeout_run(struct timeout *to)
614 {
615 	void (*fn)(void *);
616 	void *arg;
617 	int needsproc;
618 
619 	MUTEX_ASSERT_LOCKED(&timeout_mutex);
620 
621 	CLR(to->to_flags, TIMEOUT_ONQUEUE);
622 	SET(to->to_flags, TIMEOUT_TRIGGERED);
623 
624 	fn = to->to_func;
625 	arg = to->to_arg;
626 	needsproc = ISSET(to->to_flags, TIMEOUT_PROC);
627 #if NKCOV > 0
628 	struct process *kcov_process = to->to_process;
629 #endif
630 
631 	mtx_leave(&timeout_mutex);
632 	timeout_sync_enter(needsproc);
633 #if NKCOV > 0
634 	kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process);
635 #endif
636 	fn(arg);
637 #if NKCOV > 0
638 	kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process);
639 #endif
640 	timeout_sync_leave(needsproc);
641 	mtx_enter(&timeout_mutex);
642 }
643 
644 void
645 softclock_process_kclock_timeout(struct timeout *to, int new)
646 {
647 	struct kclock *kc = &timeout_kclock[to->to_kclock];
648 
649 	if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) {
650 		tostat.tos_scheduled++;
651 		if (!new)
652 			tostat.tos_rescheduled++;
653 		CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)],
654 		    &to->to_list);
655 		return;
656 	}
657 	if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=))
658 		tostat.tos_late++;
659 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
660 		CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
661 		return;
662 	}
663 	timeout_run(to);
664 	tostat.tos_run_softclock++;
665 }
666 
667 void
668 softclock_process_tick_timeout(struct timeout *to, int new)
669 {
670 	int delta = to->to_time - ticks;
671 
672 	if (delta > 0) {
673 		tostat.tos_scheduled++;
674 		if (!new)
675 			tostat.tos_rescheduled++;
676 		CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list);
677 		return;
678 	}
679 	if (!new && delta < 0)
680 		tostat.tos_late++;
681 	if (ISSET(to->to_flags, TIMEOUT_PROC)) {
682 		CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
683 		return;
684 	}
685 	timeout_run(to);
686 	tostat.tos_run_softclock++;
687 }
688 
689 /*
690  * Timeouts are processed here instead of timeout_hardclock_update()
691  * to avoid doing any more work at IPL_CLOCK than absolutely necessary.
692  * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly
693  * so the system remains responsive even if there is a surge of timeouts.
694  */
695 void
696 softclock(void *arg)
697 {
698 	struct timeout *first_new, *to;
699 	int needsproc, new;
700 
701 	first_new = NULL;
702 	new = 0;
703 
704 	mtx_enter(&timeout_mutex);
705 	if (!CIRCQ_EMPTY(&timeout_new))
706 		first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new));
707 	CIRCQ_CONCAT(&timeout_todo, &timeout_new);
708 	while (!CIRCQ_EMPTY(&timeout_todo)) {
709 		to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo));
710 		CIRCQ_REMOVE(&to->to_list);
711 		if (to == first_new)
712 			new = 1;
713 		if (to->to_kclock != KCLOCK_NONE)
714 			softclock_process_kclock_timeout(to, new);
715 		else
716 			softclock_process_tick_timeout(to, new);
717 	}
718 	tostat.tos_softclocks++;
719 	needsproc = !CIRCQ_EMPTY(&timeout_proc);
720 	mtx_leave(&timeout_mutex);
721 
722 	if (needsproc)
723 		wakeup(&timeout_proc);
724 }
725 
726 void
727 softclock_create_thread(void *arg)
728 {
729 	if (kthread_create(softclock_thread, NULL, NULL, "softclock"))
730 		panic("fork softclock");
731 }
732 
733 void
734 softclock_thread(void *arg)
735 {
736 	CPU_INFO_ITERATOR cii;
737 	struct cpu_info *ci;
738 	struct sleep_state sls;
739 	struct timeout *to;
740 	int s;
741 
742 	KERNEL_ASSERT_LOCKED();
743 
744 	/* Be conservative for the moment */
745 	CPU_INFO_FOREACH(cii, ci) {
746 		if (CPU_IS_PRIMARY(ci))
747 			break;
748 	}
749 	KASSERT(ci != NULL);
750 	sched_peg_curproc(ci);
751 
752 	s = splsoftclock();
753 	for (;;) {
754 		sleep_setup(&sls, &timeout_proc, PSWP, "bored");
755 		sleep_finish(&sls, PSWP, 0, CIRCQ_EMPTY(&timeout_proc));
756 
757 		mtx_enter(&timeout_mutex);
758 		while (!CIRCQ_EMPTY(&timeout_proc)) {
759 			to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
760 			CIRCQ_REMOVE(&to->to_list);
761 			timeout_run(to);
762 			tostat.tos_run_thread++;
763 		}
764 		tostat.tos_thread_wakeups++;
765 		mtx_leave(&timeout_mutex);
766 	}
767 	splx(s);
768 }
769 
770 #ifndef SMALL_KERNEL
771 void
772 timeout_adjust_ticks(int adj)
773 {
774 	struct timeout *to;
775 	struct circq *p;
776 	int new_ticks, b;
777 
778 	/* adjusting the monotonic clock backwards would be a Bad Thing */
779 	if (adj <= 0)
780 		return;
781 
782 	mtx_enter(&timeout_mutex);
783 	new_ticks = ticks + adj;
784 	for (b = 0; b < nitems(timeout_wheel); b++) {
785 		p = CIRCQ_FIRST(&timeout_wheel[b]);
786 		while (p != &timeout_wheel[b]) {
787 			to = timeout_from_circq(p);
788 			p = CIRCQ_FIRST(p);
789 
790 			/* when moving a timeout forward need to reinsert it */
791 			if (to->to_time - ticks < adj)
792 				to->to_time = new_ticks;
793 			CIRCQ_REMOVE(&to->to_list);
794 			CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list);
795 		}
796 	}
797 	ticks = new_ticks;
798 	mtx_leave(&timeout_mutex);
799 }
800 #endif
801 
802 int
803 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
804 {
805 	struct timeoutstat status;
806 
807 	mtx_enter(&timeout_mutex);
808 	memcpy(&status, &tostat, sizeof(status));
809 	mtx_leave(&timeout_mutex);
810 
811 	return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status));
812 }
813 
814 #ifdef DDB
815 const char *db_kclock(int);
816 void db_show_callout_bucket(struct circq *);
817 void db_show_timeout(struct timeout *, struct circq *);
818 const char *db_timespec(const struct timespec *);
819 
820 const char *
821 db_kclock(int kclock)
822 {
823 	switch (kclock) {
824 	case KCLOCK_UPTIME:
825 		return "uptime";
826 	default:
827 		return "invalid";
828 	}
829 }
830 
831 const char *
832 db_timespec(const struct timespec *ts)
833 {
834 	static char buf[32];
835 	struct timespec tmp, zero;
836 
837 	if (ts->tv_sec >= 0) {
838 		snprintf(buf, sizeof(buf), "%lld.%09ld",
839 		    ts->tv_sec, ts->tv_nsec);
840 		return buf;
841 	}
842 
843 	timespecclear(&zero);
844 	timespecsub(&zero, ts, &tmp);
845 	snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec);
846 	return buf;
847 }
848 
849 void
850 db_show_callout_bucket(struct circq *bucket)
851 {
852 	struct circq *p;
853 
854 	CIRCQ_FOREACH(p, bucket)
855 		db_show_timeout(timeout_from_circq(p), bucket);
856 }
857 
858 void
859 db_show_timeout(struct timeout *to, struct circq *bucket)
860 {
861 	struct timespec remaining;
862 	struct kclock *kc;
863 	char buf[8];
864 	db_expr_t offset;
865 	struct circq *wheel;
866 	char *name, *where;
867 	int width = sizeof(long) * 2;
868 
869 	db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset);
870 	name = name ? name : "?";
871 	if (bucket == &timeout_new)
872 		where = "new";
873 	else if (bucket == &timeout_todo)
874 		where = "softint";
875 	else if (bucket == &timeout_proc)
876 		where = "thread";
877 	else {
878 		if (to->to_kclock != KCLOCK_NONE)
879 			wheel = timeout_wheel_kc;
880 		else
881 			wheel = timeout_wheel;
882 		snprintf(buf, sizeof(buf), "%3ld/%1ld",
883 		    (bucket - wheel) % WHEELSIZE,
884 		    (bucket - wheel) / WHEELSIZE);
885 		where = buf;
886 	}
887 	if (to->to_kclock != KCLOCK_NONE) {
888 		kc = &timeout_kclock[to->to_kclock];
889 		timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining);
890 		db_printf("%20s  %8s  %7s  0x%0*lx  %s\n",
891 		    db_timespec(&remaining), db_kclock(to->to_kclock), where,
892 		    width, (ulong)to->to_arg, name);
893 	} else {
894 		db_printf("%20d  %8s  %7s  0x%0*lx  %s\n",
895 		    to->to_time - ticks, "ticks", where,
896 		    width, (ulong)to->to_arg, name);
897 	}
898 }
899 
900 void
901 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif)
902 {
903 	struct kclock *kc;
904 	int width = sizeof(long) * 2 + 2;
905 	int b, i;
906 
907 	db_printf("%20s  %8s\n", "lastscan", "clock");
908 	db_printf("%20d  %8s\n", ticks, "ticks");
909 	for (i = 0; i < nitems(timeout_kclock); i++) {
910 		kc = &timeout_kclock[i];
911 		db_printf("%20s  %8s\n",
912 		    db_timespec(&kc->kc_lastscan), db_kclock(i));
913 	}
914 	db_printf("\n");
915 	db_printf("%20s  %8s  %7s  %*s  %s\n",
916 	    "remaining", "clock", "wheel", width, "arg", "func");
917 	db_show_callout_bucket(&timeout_new);
918 	db_show_callout_bucket(&timeout_todo);
919 	db_show_callout_bucket(&timeout_proc);
920 	for (b = 0; b < nitems(timeout_wheel); b++)
921 		db_show_callout_bucket(&timeout_wheel[b]);
922 	for (b = 0; b < nitems(timeout_wheel_kc); b++)
923 		db_show_callout_bucket(&timeout_wheel_kc[b]);
924 }
925 #endif
926