xref: /openbsd-src/sys/kern/kern_timeout.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: kern_timeout.c,v 1.55 2019/04/23 13:35:12 visa Exp $	*/
2 /*
3  * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org>
4  * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
17  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
19  * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kthread.h>
31 #include <sys/timeout.h>
32 #include <sys/mutex.h>
33 #include <sys/kernel.h>
34 #include <sys/queue.h>			/* _Q_INVALIDATE */
35 #include <sys/witness.h>
36 
37 #ifdef DDB
38 #include <machine/db_machdep.h>
39 #include <ddb/db_interface.h>
40 #include <ddb/db_sym.h>
41 #include <ddb/db_output.h>
42 #endif
43 
44 /*
45  * Timeouts are kept in a hierarchical timing wheel. The to_time is the value
46  * of the global variable "ticks" when the timeout should be called. There are
47  * four levels with 256 buckets each. See 'Scheme 7' in
48  * "Hashed and Hierarchical Timing Wheels: Efficient Data Structures for
49  * Implementing a Timer Facility" by George Varghese and Tony Lauck.
50  */
51 #define BUCKETS 1024
52 #define WHEELSIZE 256
53 #define WHEELMASK 255
54 #define WHEELBITS 8
55 
56 struct circq timeout_wheel[BUCKETS];	/* Queues of timeouts */
57 struct circq timeout_todo;		/* Worklist */
58 struct circq timeout_proc;		/* Due timeouts needing proc. context */
59 
60 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)
61 
62 #define BUCKET(rel, abs)						\
63     (timeout_wheel[							\
64 	((rel) <= (1 << (2*WHEELBITS)))					\
65 	    ? ((rel) <= (1 << WHEELBITS))				\
66 		? MASKWHEEL(0, (abs))					\
67 		: MASKWHEEL(1, (abs)) + WHEELSIZE			\
68 	    : ((rel) <= (1 << (3*WHEELBITS)))				\
69 		? MASKWHEEL(2, (abs)) + 2*WHEELSIZE			\
70 		: MASKWHEEL(3, (abs)) + 3*WHEELSIZE])
71 
72 #define MOVEBUCKET(wheel, time)						\
73     CIRCQ_APPEND(&timeout_todo,						\
74         &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE])
75 
76 /*
77  * The first thing in a struct timeout is its struct circq, so we
78  * can get back from a pointer to the latter to a pointer to the
79  * whole timeout with just a cast.
80  */
81 static __inline struct timeout *
82 timeout_from_circq(struct circq *p)
83 {
84 	return ((struct timeout *)(p));
85 }
86 
87 /*
88  * All wheels are locked with the same mutex.
89  *
90  * We need locking since the timeouts are manipulated from hardclock that's
91  * not behind the big lock.
92  */
93 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH);
94 
95 /*
96  * Circular queue definitions.
97  */
98 
99 #define CIRCQ_INIT(elem) do {                   \
100         (elem)->next = (elem);                  \
101         (elem)->prev = (elem);                  \
102 } while (0)
103 
104 #define CIRCQ_INSERT(elem, list) do {           \
105         (elem)->prev = (list)->prev;            \
106         (elem)->next = (list);                  \
107         (list)->prev->next = (elem);            \
108         (list)->prev = (elem);                  \
109 } while (0)
110 
111 #define CIRCQ_APPEND(fst, snd) do {             \
112         if (!CIRCQ_EMPTY(snd)) {                \
113                 (fst)->prev->next = (snd)->next;\
114                 (snd)->next->prev = (fst)->prev;\
115                 (snd)->prev->next = (fst);      \
116                 (fst)->prev = (snd)->prev;      \
117                 CIRCQ_INIT(snd);                \
118         }                                       \
119 } while (0)
120 
121 #define CIRCQ_REMOVE(elem) do {                 \
122         (elem)->next->prev = (elem)->prev;      \
123         (elem)->prev->next = (elem)->next;      \
124 	_Q_INVALIDATE((elem)->prev);		\
125 	_Q_INVALIDATE((elem)->next);		\
126 } while (0)
127 
128 #define CIRCQ_FIRST(elem) ((elem)->next)
129 
130 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem))
131 
132 void softclock_thread(void *);
133 void softclock_create_thread(void *);
134 
135 #ifdef WITNESS
136 struct lock_object timeout_sleeplock_obj = {
137 	.lo_name = "timeout",
138 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
139 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
140 };
141 struct lock_object timeout_spinlock_obj = {
142 	.lo_name = "timeout",
143 	.lo_flags = LO_WITNESS | LO_INITIALIZED |
144 	    (LO_CLASS_MUTEX << LO_CLASSSHIFT)
145 };
146 struct lock_type timeout_sleeplock_type = {
147 	.lt_name = "timeout"
148 };
149 struct lock_type timeout_spinlock_type = {
150 	.lt_name = "timeout"
151 };
152 #define TIMEOUT_LOCK_OBJ(needsproc) \
153 	((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj)
154 #endif
155 
156 static void
157 timeout_sync_order(int needsproc)
158 {
159 	WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL);
160 }
161 
162 static void
163 timeout_sync_enter(int needsproc)
164 {
165 	timeout_sync_order(needsproc);
166 	WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
167 }
168 
169 static void
170 timeout_sync_leave(int needsproc)
171 {
172 	WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
173 }
174 
175 /*
176  * Some of the "math" in here is a bit tricky.
177  *
178  * We have to beware of wrapping ints.
179  * We use the fact that any element added to the queue must be added with a
180  * positive time. That means that any element `to' on the queue cannot be
181  * scheduled to timeout further in time than INT_MAX, but to->to_time can
182  * be positive or negative so comparing it with anything is dangerous.
183  * The only way we can use the to->to_time value in any predictable way
184  * is when we calculate how far in the future `to' will timeout -
185  * "to->to_time - ticks". The result will always be positive for future
186  * timeouts and 0 or negative for due timeouts.
187  */
188 
189 void
190 timeout_startup(void)
191 {
192 	int b;
193 
194 	CIRCQ_INIT(&timeout_todo);
195 	CIRCQ_INIT(&timeout_proc);
196 	for (b = 0; b < nitems(timeout_wheel); b++)
197 		CIRCQ_INIT(&timeout_wheel[b]);
198 }
199 
200 void
201 timeout_proc_init(void)
202 {
203 	WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type);
204 	WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type);
205 
206 	kthread_create_deferred(softclock_create_thread, NULL);
207 }
208 
209 void
210 timeout_set(struct timeout *new, void (*fn)(void *), void *arg)
211 {
212 	new->to_func = fn;
213 	new->to_arg = arg;
214 	new->to_flags = TIMEOUT_INITIALIZED;
215 }
216 
217 void
218 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg)
219 {
220 	timeout_set(new, fn, arg);
221 	new->to_flags |= TIMEOUT_NEEDPROCCTX;
222 }
223 
224 int
225 timeout_add(struct timeout *new, int to_ticks)
226 {
227 	int old_time;
228 	int ret = 1;
229 
230 #ifdef DIAGNOSTIC
231 	if (!(new->to_flags & TIMEOUT_INITIALIZED))
232 		panic("timeout_add: not initialized");
233 	if (to_ticks < 0)
234 		panic("timeout_add: to_ticks (%d) < 0", to_ticks);
235 #endif
236 
237 	mtx_enter(&timeout_mutex);
238 	/* Initialize the time here, it won't change. */
239 	old_time = new->to_time;
240 	new->to_time = to_ticks + ticks;
241 	new->to_flags &= ~TIMEOUT_TRIGGERED;
242 
243 	/*
244 	 * If this timeout already is scheduled and now is moved
245 	 * earlier, reschedule it now. Otherwise leave it in place
246 	 * and let it be rescheduled later.
247 	 */
248 	if (new->to_flags & TIMEOUT_ONQUEUE) {
249 		if (new->to_time - ticks < old_time - ticks) {
250 			CIRCQ_REMOVE(&new->to_list);
251 			CIRCQ_INSERT(&new->to_list, &timeout_todo);
252 		}
253 		ret = 0;
254 	} else {
255 		new->to_flags |= TIMEOUT_ONQUEUE;
256 		CIRCQ_INSERT(&new->to_list, &timeout_todo);
257 	}
258 	mtx_leave(&timeout_mutex);
259 
260 	return (ret);
261 }
262 
263 int
264 timeout_add_tv(struct timeout *to, const struct timeval *tv)
265 {
266 	uint64_t to_ticks;
267 
268 	to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick;
269 	if (to_ticks > INT_MAX)
270 		to_ticks = INT_MAX;
271 	if (to_ticks == 0 && tv->tv_usec > 0)
272 		to_ticks = 1;
273 
274 	return (timeout_add(to, (int)to_ticks));
275 }
276 
277 int
278 timeout_add_ts(struct timeout *to, const struct timespec *ts)
279 {
280 	uint64_t to_ticks;
281 
282 	to_ticks = (uint64_t)hz * ts->tv_sec + ts->tv_nsec / (tick * 1000);
283 	if (to_ticks > INT_MAX)
284 		to_ticks = INT_MAX;
285 	if (to_ticks == 0 && ts->tv_nsec > 0)
286 		to_ticks = 1;
287 
288 	return (timeout_add(to, (int)to_ticks));
289 }
290 
291 int
292 timeout_add_bt(struct timeout *to, const struct bintime *bt)
293 {
294 	uint64_t to_ticks;
295 
296 	to_ticks = (uint64_t)hz * bt->sec + (long)(((uint64_t)1000000 *
297 	    (uint32_t)(bt->frac >> 32)) >> 32) / tick;
298 	if (to_ticks > INT_MAX)
299 		to_ticks = INT_MAX;
300 	if (to_ticks == 0 && bt->frac > 0)
301 		to_ticks = 1;
302 
303 	return (timeout_add(to, (int)to_ticks));
304 }
305 
306 int
307 timeout_add_sec(struct timeout *to, int secs)
308 {
309 	uint64_t to_ticks;
310 
311 	to_ticks = (uint64_t)hz * secs;
312 	if (to_ticks > INT_MAX)
313 		to_ticks = INT_MAX;
314 
315 	return (timeout_add(to, (int)to_ticks));
316 }
317 
318 int
319 timeout_add_msec(struct timeout *to, int msecs)
320 {
321 	uint64_t to_ticks;
322 
323 	to_ticks = (uint64_t)msecs * 1000 / tick;
324 	if (to_ticks > INT_MAX)
325 		to_ticks = INT_MAX;
326 	if (to_ticks == 0 && msecs > 0)
327 		to_ticks = 1;
328 
329 	return (timeout_add(to, (int)to_ticks));
330 }
331 
332 int
333 timeout_add_usec(struct timeout *to, int usecs)
334 {
335 	int to_ticks = usecs / tick;
336 
337 	if (to_ticks == 0 && usecs > 0)
338 		to_ticks = 1;
339 
340 	return (timeout_add(to, to_ticks));
341 }
342 
343 int
344 timeout_add_nsec(struct timeout *to, int nsecs)
345 {
346 	int to_ticks = nsecs / (tick * 1000);
347 
348 	if (to_ticks == 0 && nsecs > 0)
349 		to_ticks = 1;
350 
351 	return (timeout_add(to, to_ticks));
352 }
353 
354 int
355 timeout_del(struct timeout *to)
356 {
357 	int ret = 0;
358 
359 	mtx_enter(&timeout_mutex);
360 	if (to->to_flags & TIMEOUT_ONQUEUE) {
361 		CIRCQ_REMOVE(&to->to_list);
362 		to->to_flags &= ~TIMEOUT_ONQUEUE;
363 		ret = 1;
364 	}
365 	to->to_flags &= ~TIMEOUT_TRIGGERED;
366 	mtx_leave(&timeout_mutex);
367 
368 	return (ret);
369 }
370 
371 int
372 timeout_del_barrier(struct timeout *to)
373 {
374 	int removed;
375 
376 	timeout_sync_order(ISSET(to->to_flags, TIMEOUT_NEEDPROCCTX));
377 
378 	removed = timeout_del(to);
379 	if (!removed)
380 		timeout_barrier(to);
381 
382 	return (removed);
383 }
384 
385 void	timeout_proc_barrier(void *);
386 
387 void
388 timeout_barrier(struct timeout *to)
389 {
390 	int needsproc = ISSET(to->to_flags, TIMEOUT_NEEDPROCCTX);
391 
392 	timeout_sync_order(needsproc);
393 
394 	if (!needsproc) {
395 		KERNEL_LOCK();
396 		splx(splsoftclock());
397 		KERNEL_UNLOCK();
398 	} else {
399 		struct cond c = COND_INITIALIZER();
400 		struct timeout barrier;
401 
402 		timeout_set_proc(&barrier, timeout_proc_barrier, &c);
403 
404 		mtx_enter(&timeout_mutex);
405 		barrier.to_flags |= TIMEOUT_ONQUEUE;
406 		CIRCQ_INSERT(&barrier.to_list, &timeout_proc);
407 		mtx_leave(&timeout_mutex);
408 
409 		wakeup_one(&timeout_proc);
410 
411 		cond_wait(&c, "tmobar");
412 	}
413 }
414 
415 void
416 timeout_proc_barrier(void *arg)
417 {
418 	struct cond *c = arg;
419 
420 	cond_signal(c);
421 }
422 
423 /*
424  * This is called from hardclock() once every tick.
425  * We return !0 if we need to schedule a softclock.
426  */
427 int
428 timeout_hardclock_update(void)
429 {
430 	int ret;
431 
432 	mtx_enter(&timeout_mutex);
433 
434 	MOVEBUCKET(0, ticks);
435 	if (MASKWHEEL(0, ticks) == 0) {
436 		MOVEBUCKET(1, ticks);
437 		if (MASKWHEEL(1, ticks) == 0) {
438 			MOVEBUCKET(2, ticks);
439 			if (MASKWHEEL(2, ticks) == 0)
440 				MOVEBUCKET(3, ticks);
441 		}
442 	}
443 	ret = !CIRCQ_EMPTY(&timeout_todo);
444 	mtx_leave(&timeout_mutex);
445 
446 	return (ret);
447 }
448 
449 void
450 timeout_run(struct timeout *to)
451 {
452 	void (*fn)(void *);
453 	void *arg;
454 	int needsproc;
455 
456 	MUTEX_ASSERT_LOCKED(&timeout_mutex);
457 
458 	to->to_flags &= ~TIMEOUT_ONQUEUE;
459 	to->to_flags |= TIMEOUT_TRIGGERED;
460 
461 	fn = to->to_func;
462 	arg = to->to_arg;
463 	needsproc = ISSET(to->to_flags, TIMEOUT_NEEDPROCCTX);
464 
465 	mtx_leave(&timeout_mutex);
466 	timeout_sync_enter(needsproc);
467 	fn(arg);
468 	timeout_sync_leave(needsproc);
469 	mtx_enter(&timeout_mutex);
470 }
471 
472 void
473 softclock(void *arg)
474 {
475 	int delta;
476 	struct circq *bucket;
477 	struct timeout *to;
478 	int needsproc = 0;
479 
480 	mtx_enter(&timeout_mutex);
481 	while (!CIRCQ_EMPTY(&timeout_todo)) {
482 		to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo));
483 		CIRCQ_REMOVE(&to->to_list);
484 
485 		/*
486 		 * If due run it or defer execution to the thread,
487 		 * otherwise insert it into the right bucket.
488 		 */
489 		delta = to->to_time - ticks;
490 		if (delta > 0) {
491 			bucket = &BUCKET(delta, to->to_time);
492 			CIRCQ_INSERT(&to->to_list, bucket);
493 		} else if (to->to_flags & TIMEOUT_NEEDPROCCTX) {
494 			CIRCQ_INSERT(&to->to_list, &timeout_proc);
495 			needsproc = 1;
496 		} else {
497 #ifdef DEBUG
498 			if (delta < 0)
499 				printf("timeout delayed %d\n", delta);
500 #endif
501 			timeout_run(to);
502 		}
503 	}
504 	mtx_leave(&timeout_mutex);
505 
506 	if (needsproc)
507 		wakeup(&timeout_proc);
508 }
509 
510 void
511 softclock_create_thread(void *arg)
512 {
513 	if (kthread_create(softclock_thread, NULL, NULL, "softclock"))
514 		panic("fork softclock");
515 }
516 
517 void
518 softclock_thread(void *arg)
519 {
520 	CPU_INFO_ITERATOR cii;
521 	struct cpu_info *ci;
522 	struct sleep_state sls;
523 	struct timeout *to;
524 
525 	KERNEL_ASSERT_LOCKED();
526 
527 	/* Be conservative for the moment */
528 	CPU_INFO_FOREACH(cii, ci) {
529 		if (CPU_IS_PRIMARY(ci))
530 			break;
531 	}
532 	KASSERT(ci != NULL);
533 	sched_peg_curproc(ci);
534 
535 	for (;;) {
536 		sleep_setup(&sls, &timeout_proc, PSWP, "bored");
537 		sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc));
538 
539 		mtx_enter(&timeout_mutex);
540 		while (!CIRCQ_EMPTY(&timeout_proc)) {
541 			to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
542 			CIRCQ_REMOVE(&to->to_list);
543 			timeout_run(to);
544 		}
545 		mtx_leave(&timeout_mutex);
546 	}
547 }
548 
549 #ifndef SMALL_KERNEL
550 void
551 timeout_adjust_ticks(int adj)
552 {
553 	struct timeout *to;
554 	struct circq *p;
555 	int new_ticks, b;
556 
557 	/* adjusting the monotonic clock backwards would be a Bad Thing */
558 	if (adj <= 0)
559 		return;
560 
561 	mtx_enter(&timeout_mutex);
562 	new_ticks = ticks + adj;
563 	for (b = 0; b < nitems(timeout_wheel); b++) {
564 		p = CIRCQ_FIRST(&timeout_wheel[b]);
565 		while (p != &timeout_wheel[b]) {
566 			to = timeout_from_circq(p);
567 			p = CIRCQ_FIRST(p);
568 
569 			/* when moving a timeout forward need to reinsert it */
570 			if (to->to_time - ticks < adj)
571 				to->to_time = new_ticks;
572 			CIRCQ_REMOVE(&to->to_list);
573 			CIRCQ_INSERT(&to->to_list, &timeout_todo);
574 		}
575 	}
576 	ticks = new_ticks;
577 	mtx_leave(&timeout_mutex);
578 }
579 #endif
580 
581 #ifdef DDB
582 void db_show_callout_bucket(struct circq *);
583 
584 void
585 db_show_callout_bucket(struct circq *bucket)
586 {
587 	struct timeout *to;
588 	struct circq *p;
589 	db_expr_t offset;
590 	char *name;
591 
592 	for (p = CIRCQ_FIRST(bucket); p != bucket; p = CIRCQ_FIRST(p)) {
593 		to = timeout_from_circq(p);
594 		db_find_sym_and_offset((db_addr_t)to->to_func, &name, &offset);
595 		name = name ? name : "?";
596 		db_printf("%9d %2td/%-4td %p  %s\n", to->to_time - ticks,
597 		    (bucket - timeout_wheel) / WHEELSIZE,
598 		    bucket - timeout_wheel, to->to_arg, name);
599 	}
600 }
601 
602 void
603 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif)
604 {
605 	int b;
606 
607 	db_printf("ticks now: %d\n", ticks);
608 	db_printf("    ticks  wheel       arg  func\n");
609 
610 	db_show_callout_bucket(&timeout_todo);
611 	for (b = 0; b < nitems(timeout_wheel); b++)
612 		db_show_callout_bucket(&timeout_wheel[b]);
613 }
614 #endif
615