1 /* $NetBSD: kern_timeout.c,v 1.79 2023/10/08 13:23:05 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2003, 2006, 2007, 2008, 2009, 2019, 2023
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org>
35 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org>
36 * All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 *
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. The name of the author may not be used to endorse or promote products
48 * derived from this software without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
51 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
52 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
53 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
54 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
55 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
56 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
57 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
58 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
59 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
60 */
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.79 2023/10/08 13:23:05 ad Exp $");
64
65 /*
66 * Timeouts are kept in a hierarchical timing wheel. The c_time is the
67 * value of c_cpu->cc_ticks when the timeout should be called. There are
68 * four levels with 256 buckets each. See 'Scheme 7' in "Hashed and
69 * Hierarchical Timing Wheels: Efficient Data Structures for Implementing
70 * a Timer Facility" by George Varghese and Tony Lauck.
71 *
72 * Some of the "math" in here is a bit tricky. We have to beware of
73 * wrapping ints.
74 *
75 * We use the fact that any element added to the queue must be added with
76 * a positive time. That means that any element `to' on the queue cannot
77 * be scheduled to timeout further in time than INT_MAX, but c->c_time can
78 * be positive or negative so comparing it with anything is dangerous.
79 * The only way we can use the c->c_time value in any predictable way is
80 * when we calculate how far in the future `to' will timeout - "c->c_time
81 * - c->c_cpu->cc_ticks". The result will always be positive for future
82 * timeouts and 0 or negative for due timeouts.
83 */
84
85 #define _CALLOUT_PRIVATE
86
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/kernel.h>
90 #include <sys/callout.h>
91 #include <sys/lwp.h>
92 #include <sys/mutex.h>
93 #include <sys/proc.h>
94 #include <sys/sleepq.h>
95 #include <sys/syncobj.h>
96 #include <sys/evcnt.h>
97 #include <sys/intr.h>
98 #include <sys/cpu.h>
99 #include <sys/kmem.h>
100 #include <sys/sdt.h>
101
102 #ifdef DDB
103 #include <machine/db_machdep.h>
104 #include <ddb/db_interface.h>
105 #include <ddb/db_access.h>
106 #include <ddb/db_cpu.h>
107 #include <ddb/db_sym.h>
108 #include <ddb/db_output.h>
109 #endif
110
111 #define BUCKETS 1024
112 #define WHEELSIZE 256
113 #define WHEELMASK 255
114 #define WHEELBITS 8
115
116 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)
117
118 #define BUCKET(cc, rel, abs) \
119 (((rel) <= (1 << (2*WHEELBITS))) \
120 ? ((rel) <= (1 << WHEELBITS)) \
121 ? &(cc)->cc_wheel[MASKWHEEL(0, (abs))] \
122 : &(cc)->cc_wheel[MASKWHEEL(1, (abs)) + WHEELSIZE] \
123 : ((rel) <= (1 << (3*WHEELBITS))) \
124 ? &(cc)->cc_wheel[MASKWHEEL(2, (abs)) + 2*WHEELSIZE] \
125 : &(cc)->cc_wheel[MASKWHEEL(3, (abs)) + 3*WHEELSIZE])
126
127 #define MOVEBUCKET(cc, wheel, time) \
128 CIRCQ_APPEND(&(cc)->cc_todo, \
129 &(cc)->cc_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE])
130
131 /*
132 * Circular queue definitions.
133 */
134
135 #define CIRCQ_INIT(list) \
136 do { \
137 (list)->cq_next_l = (list); \
138 (list)->cq_prev_l = (list); \
139 } while (/*CONSTCOND*/0)
140
141 #define CIRCQ_INSERT(elem, list) \
142 do { \
143 (elem)->cq_prev_e = (list)->cq_prev_e; \
144 (elem)->cq_next_l = (list); \
145 (list)->cq_prev_l->cq_next_l = (elem); \
146 (list)->cq_prev_l = (elem); \
147 } while (/*CONSTCOND*/0)
148
149 #define CIRCQ_APPEND(fst, snd) \
150 do { \
151 if (!CIRCQ_EMPTY(snd)) { \
152 (fst)->cq_prev_l->cq_next_l = (snd)->cq_next_l; \
153 (snd)->cq_next_l->cq_prev_l = (fst)->cq_prev_l; \
154 (snd)->cq_prev_l->cq_next_l = (fst); \
155 (fst)->cq_prev_l = (snd)->cq_prev_l; \
156 CIRCQ_INIT(snd); \
157 } \
158 } while (/*CONSTCOND*/0)
159
160 #define CIRCQ_REMOVE(elem) \
161 do { \
162 (elem)->cq_next_l->cq_prev_e = (elem)->cq_prev_e; \
163 (elem)->cq_prev_l->cq_next_e = (elem)->cq_next_e; \
164 } while (/*CONSTCOND*/0)
165
166 #define CIRCQ_FIRST(list) ((list)->cq_next_e)
167 #define CIRCQ_NEXT(elem) ((elem)->cq_next_e)
168 #define CIRCQ_LAST(elem,list) ((elem)->cq_next_l == (list))
169 #define CIRCQ_EMPTY(list) ((list)->cq_next_l == (list))
170
171 struct callout_cpu {
172 kmutex_t *cc_lock;
173 sleepq_t cc_sleepq;
174 u_int cc_nwait;
175 u_int cc_ticks;
176 lwp_t *cc_lwp;
177 callout_impl_t *cc_active;
178 struct evcnt cc_ev_late;
179 struct evcnt cc_ev_block;
180 struct callout_circq cc_todo; /* Worklist */
181 struct callout_circq cc_wheel[BUCKETS]; /* Queues of timeouts */
182 char cc_name1[12];
183 char cc_name2[12];
184 struct cpu_info *cc_cpu;
185 };
186
187 #ifdef DDB
188 static struct callout_cpu ccb;
189 #endif
190
191 #ifndef CRASH /* _KERNEL */
192 static void callout_softclock(void *);
193 static void callout_wait(callout_impl_t *, void *, kmutex_t *);
194
195 static struct callout_cpu callout_cpu0 __cacheline_aligned;
196 static void *callout_sih __read_mostly;
197
198 SDT_PROBE_DEFINE2(sdt, kernel, callout, init,
199 "struct callout *"/*ch*/,
200 "unsigned"/*flags*/);
201 SDT_PROBE_DEFINE1(sdt, kernel, callout, destroy,
202 "struct callout *"/*ch*/);
203 SDT_PROBE_DEFINE4(sdt, kernel, callout, setfunc,
204 "struct callout *"/*ch*/,
205 "void (*)(void *)"/*func*/,
206 "void *"/*arg*/,
207 "unsigned"/*flags*/);
208 SDT_PROBE_DEFINE5(sdt, kernel, callout, schedule,
209 "struct callout *"/*ch*/,
210 "void (*)(void *)"/*func*/,
211 "void *"/*arg*/,
212 "unsigned"/*flags*/,
213 "int"/*ticks*/);
214 SDT_PROBE_DEFINE6(sdt, kernel, callout, migrate,
215 "struct callout *"/*ch*/,
216 "void (*)(void *)"/*func*/,
217 "void *"/*arg*/,
218 "unsigned"/*flags*/,
219 "struct cpu_info *"/*ocpu*/,
220 "struct cpu_info *"/*ncpu*/);
221 SDT_PROBE_DEFINE4(sdt, kernel, callout, entry,
222 "struct callout *"/*ch*/,
223 "void (*)(void *)"/*func*/,
224 "void *"/*arg*/,
225 "unsigned"/*flags*/);
226 SDT_PROBE_DEFINE4(sdt, kernel, callout, return,
227 "struct callout *"/*ch*/,
228 "void (*)(void *)"/*func*/,
229 "void *"/*arg*/,
230 "unsigned"/*flags*/);
231 SDT_PROBE_DEFINE5(sdt, kernel, callout, stop,
232 "struct callout *"/*ch*/,
233 "void (*)(void *)"/*func*/,
234 "void *"/*arg*/,
235 "unsigned"/*flags*/,
236 "bool"/*expired*/);
237 SDT_PROBE_DEFINE4(sdt, kernel, callout, halt,
238 "struct callout *"/*ch*/,
239 "void (*)(void *)"/*func*/,
240 "void *"/*arg*/,
241 "unsigned"/*flags*/);
242 SDT_PROBE_DEFINE5(sdt, kernel, callout, halt__done,
243 "struct callout *"/*ch*/,
244 "void (*)(void *)"/*func*/,
245 "void *"/*arg*/,
246 "unsigned"/*flags*/,
247 "bool"/*expired*/);
248
249 syncobj_t callout_syncobj = {
250 .sobj_name = "callout",
251 .sobj_flag = SOBJ_SLEEPQ_SORTED,
252 .sobj_boostpri = PRI_KERNEL,
253 .sobj_unsleep = sleepq_unsleep,
254 .sobj_changepri = sleepq_changepri,
255 .sobj_lendpri = sleepq_lendpri,
256 .sobj_owner = syncobj_noowner,
257 };
258
259 static inline kmutex_t *
callout_lock(callout_impl_t * c)260 callout_lock(callout_impl_t *c)
261 {
262 struct callout_cpu *cc;
263 kmutex_t *lock;
264
265 for (;;) {
266 cc = c->c_cpu;
267 lock = cc->cc_lock;
268 mutex_spin_enter(lock);
269 if (__predict_true(cc == c->c_cpu))
270 return lock;
271 mutex_spin_exit(lock);
272 }
273 }
274
275 /*
276 * Check if the callout is currently running on an LWP that isn't curlwp.
277 */
278 static inline bool
callout_running_somewhere_else(callout_impl_t * c,struct callout_cpu * cc)279 callout_running_somewhere_else(callout_impl_t *c, struct callout_cpu *cc)
280 {
281 KASSERT(c->c_cpu == cc);
282
283 return cc->cc_active == c && cc->cc_lwp != curlwp;
284 }
285
286 /*
287 * callout_startup:
288 *
289 * Initialize the callout facility, called at system startup time.
290 * Do just enough to allow callouts to be safely registered.
291 */
292 void
callout_startup(void)293 callout_startup(void)
294 {
295 struct callout_cpu *cc;
296 int b;
297
298 KASSERT(curcpu()->ci_data.cpu_callout == NULL);
299
300 cc = &callout_cpu0;
301 cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
302 CIRCQ_INIT(&cc->cc_todo);
303 for (b = 0; b < BUCKETS; b++)
304 CIRCQ_INIT(&cc->cc_wheel[b]);
305 curcpu()->ci_data.cpu_callout = cc;
306 }
307
308 /*
309 * callout_init_cpu:
310 *
311 * Per-CPU initialization.
312 */
313 CTASSERT(sizeof(callout_impl_t) <= sizeof(callout_t));
314
315 void
callout_init_cpu(struct cpu_info * ci)316 callout_init_cpu(struct cpu_info *ci)
317 {
318 struct callout_cpu *cc;
319 int b;
320
321 if ((cc = ci->ci_data.cpu_callout) == NULL) {
322 cc = kmem_zalloc(sizeof(*cc), KM_SLEEP);
323 cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
324 CIRCQ_INIT(&cc->cc_todo);
325 for (b = 0; b < BUCKETS; b++)
326 CIRCQ_INIT(&cc->cc_wheel[b]);
327 } else {
328 /* Boot CPU, one time only. */
329 callout_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
330 callout_softclock, NULL);
331 if (callout_sih == NULL)
332 panic("callout_init_cpu (2)");
333 }
334
335 sleepq_init(&cc->cc_sleepq);
336
337 snprintf(cc->cc_name1, sizeof(cc->cc_name1), "late/%u",
338 cpu_index(ci));
339 evcnt_attach_dynamic(&cc->cc_ev_late, EVCNT_TYPE_MISC,
340 NULL, "callout", cc->cc_name1);
341
342 snprintf(cc->cc_name2, sizeof(cc->cc_name2), "wait/%u",
343 cpu_index(ci));
344 evcnt_attach_dynamic(&cc->cc_ev_block, EVCNT_TYPE_MISC,
345 NULL, "callout", cc->cc_name2);
346
347 cc->cc_cpu = ci;
348 ci->ci_data.cpu_callout = cc;
349 }
350
351 /*
352 * callout_init:
353 *
354 * Initialize a callout structure. This must be quick, so we fill
355 * only the minimum number of fields.
356 */
357 void
callout_init(callout_t * cs,u_int flags)358 callout_init(callout_t *cs, u_int flags)
359 {
360 callout_impl_t *c = (callout_impl_t *)cs;
361 struct callout_cpu *cc;
362
363 KASSERT((flags & ~CALLOUT_FLAGMASK) == 0);
364
365 SDT_PROBE2(sdt, kernel, callout, init, cs, flags);
366
367 cc = curcpu()->ci_data.cpu_callout;
368 c->c_func = NULL;
369 c->c_magic = CALLOUT_MAGIC;
370 if (__predict_true((flags & CALLOUT_MPSAFE) != 0 && cc != NULL)) {
371 c->c_flags = flags;
372 c->c_cpu = cc;
373 return;
374 }
375 c->c_flags = flags | CALLOUT_BOUND;
376 c->c_cpu = &callout_cpu0;
377 }
378
379 /*
380 * callout_destroy:
381 *
382 * Destroy a callout structure. The callout must be stopped.
383 */
384 void
callout_destroy(callout_t * cs)385 callout_destroy(callout_t *cs)
386 {
387 callout_impl_t *c = (callout_impl_t *)cs;
388
389 SDT_PROBE1(sdt, kernel, callout, destroy, cs);
390
391 KASSERTMSG(c->c_magic == CALLOUT_MAGIC,
392 "callout %p: c_magic (%#x) != CALLOUT_MAGIC (%#x)",
393 c, c->c_magic, CALLOUT_MAGIC);
394 /*
395 * It's not necessary to lock in order to see the correct value
396 * of c->c_flags. If the callout could potentially have been
397 * running, the current thread should have stopped it.
398 */
399 KASSERTMSG((c->c_flags & CALLOUT_PENDING) == 0,
400 "pending callout %p: c_func (%p) c_flags (%#x) destroyed from %p",
401 c, c->c_func, c->c_flags, __builtin_return_address(0));
402 KASSERTMSG(!callout_running_somewhere_else(c, c->c_cpu),
403 "running callout %p: c_func (%p) c_flags (%#x) destroyed from %p",
404 c, c->c_func, c->c_flags, __builtin_return_address(0));
405 c->c_magic = 0;
406 }
407
408 /*
409 * callout_schedule_locked:
410 *
411 * Schedule a callout to run. The function and argument must
412 * already be set in the callout structure. Must be called with
413 * callout_lock.
414 */
415 static void
callout_schedule_locked(callout_impl_t * c,kmutex_t * lock,int to_ticks)416 callout_schedule_locked(callout_impl_t *c, kmutex_t *lock, int to_ticks)
417 {
418 struct callout_cpu *cc, *occ;
419 int old_time;
420
421 SDT_PROBE5(sdt, kernel, callout, schedule,
422 c, c->c_func, c->c_arg, c->c_flags, to_ticks);
423
424 KASSERT(to_ticks >= 0);
425 KASSERT(c->c_func != NULL);
426
427 /* Initialize the time here, it won't change. */
428 occ = c->c_cpu;
429 c->c_flags &= ~(CALLOUT_FIRED | CALLOUT_INVOKING);
430
431 /*
432 * If this timeout is already scheduled and now is moved
433 * earlier, reschedule it now. Otherwise leave it in place
434 * and let it be rescheduled later.
435 */
436 if ((c->c_flags & CALLOUT_PENDING) != 0) {
437 /* Leave on existing CPU. */
438 old_time = c->c_time;
439 c->c_time = to_ticks + occ->cc_ticks;
440 if (c->c_time - old_time < 0) {
441 CIRCQ_REMOVE(&c->c_list);
442 CIRCQ_INSERT(&c->c_list, &occ->cc_todo);
443 }
444 mutex_spin_exit(lock);
445 return;
446 }
447
448 cc = curcpu()->ci_data.cpu_callout;
449 if ((c->c_flags & CALLOUT_BOUND) != 0 || cc == occ ||
450 !mutex_tryenter(cc->cc_lock)) {
451 /* Leave on existing CPU. */
452 c->c_time = to_ticks + occ->cc_ticks;
453 c->c_flags |= CALLOUT_PENDING;
454 CIRCQ_INSERT(&c->c_list, &occ->cc_todo);
455 } else {
456 /* Move to this CPU. */
457 c->c_cpu = cc;
458 c->c_time = to_ticks + cc->cc_ticks;
459 c->c_flags |= CALLOUT_PENDING;
460 CIRCQ_INSERT(&c->c_list, &cc->cc_todo);
461 mutex_spin_exit(cc->cc_lock);
462 SDT_PROBE6(sdt, kernel, callout, migrate,
463 c, c->c_func, c->c_arg, c->c_flags,
464 occ->cc_cpu, cc->cc_cpu);
465 }
466 mutex_spin_exit(lock);
467 }
468
469 /*
470 * callout_reset:
471 *
472 * Reset a callout structure with a new function and argument, and
473 * schedule it to run.
474 */
475 void
callout_reset(callout_t * cs,int to_ticks,void (* func)(void *),void * arg)476 callout_reset(callout_t *cs, int to_ticks, void (*func)(void *), void *arg)
477 {
478 callout_impl_t *c = (callout_impl_t *)cs;
479 kmutex_t *lock;
480
481 KASSERT(c->c_magic == CALLOUT_MAGIC);
482 KASSERT(func != NULL);
483
484 lock = callout_lock(c);
485 SDT_PROBE4(sdt, kernel, callout, setfunc, cs, func, arg, c->c_flags);
486 c->c_func = func;
487 c->c_arg = arg;
488 callout_schedule_locked(c, lock, to_ticks);
489 }
490
491 /*
492 * callout_schedule:
493 *
494 * Schedule a callout to run. The function and argument must
495 * already be set in the callout structure.
496 */
497 void
callout_schedule(callout_t * cs,int to_ticks)498 callout_schedule(callout_t *cs, int to_ticks)
499 {
500 callout_impl_t *c = (callout_impl_t *)cs;
501 kmutex_t *lock;
502
503 KASSERT(c->c_magic == CALLOUT_MAGIC);
504
505 lock = callout_lock(c);
506 callout_schedule_locked(c, lock, to_ticks);
507 }
508
509 /*
510 * callout_stop:
511 *
512 * Try to cancel a pending callout. It may be too late: the callout
513 * could be running on another CPU. If called from interrupt context,
514 * the callout could already be in progress at a lower priority.
515 */
516 bool
callout_stop(callout_t * cs)517 callout_stop(callout_t *cs)
518 {
519 callout_impl_t *c = (callout_impl_t *)cs;
520 kmutex_t *lock;
521 bool expired;
522
523 KASSERT(c->c_magic == CALLOUT_MAGIC);
524
525 lock = callout_lock(c);
526
527 if ((c->c_flags & CALLOUT_PENDING) != 0)
528 CIRCQ_REMOVE(&c->c_list);
529 expired = ((c->c_flags & CALLOUT_FIRED) != 0);
530 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED);
531
532 SDT_PROBE5(sdt, kernel, callout, stop,
533 c, c->c_func, c->c_arg, c->c_flags, expired);
534
535 mutex_spin_exit(lock);
536
537 return expired;
538 }
539
540 /*
541 * callout_halt:
542 *
543 * Cancel a pending callout. If in-flight, block until it completes.
544 * May not be called from a hard interrupt handler. If the callout
545 * can take locks, the caller of callout_halt() must not hold any of
546 * those locks, otherwise the two could deadlock. If 'interlock' is
547 * non-NULL and we must wait for the callout to complete, it will be
548 * released and re-acquired before returning.
549 */
550 bool
callout_halt(callout_t * cs,void * interlock)551 callout_halt(callout_t *cs, void *interlock)
552 {
553 callout_impl_t *c = (callout_impl_t *)cs;
554 kmutex_t *lock;
555
556 KASSERT(c->c_magic == CALLOUT_MAGIC);
557 KASSERT(!cpu_intr_p());
558 KASSERT(interlock == NULL || mutex_owned(interlock));
559
560 /* Fast path. */
561 lock = callout_lock(c);
562 SDT_PROBE4(sdt, kernel, callout, halt,
563 c, c->c_func, c->c_arg, c->c_flags);
564 if ((c->c_flags & CALLOUT_PENDING) != 0)
565 CIRCQ_REMOVE(&c->c_list);
566 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED);
567 if (__predict_false(callout_running_somewhere_else(c, c->c_cpu))) {
568 callout_wait(c, interlock, lock);
569 return true;
570 }
571 SDT_PROBE5(sdt, kernel, callout, halt__done,
572 c, c->c_func, c->c_arg, c->c_flags, /*expired*/false);
573 mutex_spin_exit(lock);
574 return false;
575 }
576
577 /*
578 * callout_wait:
579 *
580 * Slow path for callout_halt(). Deliberately marked __noinline to
581 * prevent unneeded overhead in the caller.
582 */
583 static void __noinline
callout_wait(callout_impl_t * c,void * interlock,kmutex_t * lock)584 callout_wait(callout_impl_t *c, void *interlock, kmutex_t *lock)
585 {
586 struct callout_cpu *cc;
587 struct lwp *l;
588 kmutex_t *relock;
589 int nlocks;
590
591 l = curlwp;
592 relock = NULL;
593 for (;;) {
594 /*
595 * At this point we know the callout is not pending, but it
596 * could be running on a CPU somewhere. That can be curcpu
597 * in a few cases:
598 *
599 * - curlwp is a higher priority soft interrupt
600 * - the callout blocked on a lock and is currently asleep
601 * - the callout itself has called callout_halt() (nice!)
602 */
603 cc = c->c_cpu;
604 if (__predict_true(!callout_running_somewhere_else(c, cc)))
605 break;
606
607 /* It's running - need to wait for it to complete. */
608 if (interlock != NULL) {
609 /*
610 * Avoid potential scheduler lock order problems by
611 * dropping the interlock without the callout lock
612 * held; then retry.
613 */
614 mutex_spin_exit(lock);
615 mutex_exit(interlock);
616 relock = interlock;
617 interlock = NULL;
618 } else {
619 /* XXX Better to do priority inheritance. */
620 KASSERT(l->l_wchan == NULL);
621 cc->cc_nwait++;
622 cc->cc_ev_block.ev_count++;
623 nlocks = sleepq_enter(&cc->cc_sleepq, l, cc->cc_lock);
624 sleepq_enqueue(&cc->cc_sleepq, cc, "callout",
625 &callout_syncobj, false);
626 sleepq_block(0, false, &callout_syncobj, nlocks);
627 }
628
629 /*
630 * Re-lock the callout and check the state of play again.
631 * It's a common design pattern for callouts to re-schedule
632 * themselves so put a stop to it again if needed.
633 */
634 lock = callout_lock(c);
635 if ((c->c_flags & CALLOUT_PENDING) != 0)
636 CIRCQ_REMOVE(&c->c_list);
637 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED);
638 }
639
640 SDT_PROBE5(sdt, kernel, callout, halt__done,
641 c, c->c_func, c->c_arg, c->c_flags, /*expired*/true);
642
643 mutex_spin_exit(lock);
644 if (__predict_false(relock != NULL))
645 mutex_enter(relock);
646 }
647
648 #ifdef notyet
649 /*
650 * callout_bind:
651 *
652 * Bind a callout so that it will only execute on one CPU.
653 * The callout must be stopped, and must be MPSAFE.
654 *
655 * XXX Disabled for now until it is decided how to handle
656 * offlined CPUs. We may want weak+strong binding.
657 */
658 void
callout_bind(callout_t * cs,struct cpu_info * ci)659 callout_bind(callout_t *cs, struct cpu_info *ci)
660 {
661 callout_impl_t *c = (callout_impl_t *)cs;
662 struct callout_cpu *cc;
663 kmutex_t *lock;
664
665 KASSERT((c->c_flags & CALLOUT_PENDING) == 0);
666 KASSERT(c->c_cpu->cc_active != c);
667 KASSERT(c->c_magic == CALLOUT_MAGIC);
668 KASSERT((c->c_flags & CALLOUT_MPSAFE) != 0);
669
670 lock = callout_lock(c);
671 cc = ci->ci_data.cpu_callout;
672 c->c_flags |= CALLOUT_BOUND;
673 if (c->c_cpu != cc) {
674 /*
675 * Assigning c_cpu effectively unlocks the callout
676 * structure, as we don't hold the new CPU's lock.
677 * Issue memory barrier to prevent accesses being
678 * reordered.
679 */
680 membar_exit();
681 c->c_cpu = cc;
682 }
683 mutex_spin_exit(lock);
684 }
685 #endif
686
687 void
callout_setfunc(callout_t * cs,void (* func)(void *),void * arg)688 callout_setfunc(callout_t *cs, void (*func)(void *), void *arg)
689 {
690 callout_impl_t *c = (callout_impl_t *)cs;
691 kmutex_t *lock;
692
693 KASSERT(c->c_magic == CALLOUT_MAGIC);
694 KASSERT(func != NULL);
695
696 lock = callout_lock(c);
697 SDT_PROBE4(sdt, kernel, callout, setfunc, cs, func, arg, c->c_flags);
698 c->c_func = func;
699 c->c_arg = arg;
700 mutex_spin_exit(lock);
701 }
702
703 bool
callout_expired(callout_t * cs)704 callout_expired(callout_t *cs)
705 {
706 callout_impl_t *c = (callout_impl_t *)cs;
707 kmutex_t *lock;
708 bool rv;
709
710 KASSERT(c->c_magic == CALLOUT_MAGIC);
711
712 lock = callout_lock(c);
713 rv = ((c->c_flags & CALLOUT_FIRED) != 0);
714 mutex_spin_exit(lock);
715
716 return rv;
717 }
718
719 bool
callout_active(callout_t * cs)720 callout_active(callout_t *cs)
721 {
722 callout_impl_t *c = (callout_impl_t *)cs;
723 kmutex_t *lock;
724 bool rv;
725
726 KASSERT(c->c_magic == CALLOUT_MAGIC);
727
728 lock = callout_lock(c);
729 rv = ((c->c_flags & (CALLOUT_PENDING|CALLOUT_FIRED)) != 0);
730 mutex_spin_exit(lock);
731
732 return rv;
733 }
734
735 bool
callout_pending(callout_t * cs)736 callout_pending(callout_t *cs)
737 {
738 callout_impl_t *c = (callout_impl_t *)cs;
739 kmutex_t *lock;
740 bool rv;
741
742 KASSERT(c->c_magic == CALLOUT_MAGIC);
743
744 lock = callout_lock(c);
745 rv = ((c->c_flags & CALLOUT_PENDING) != 0);
746 mutex_spin_exit(lock);
747
748 return rv;
749 }
750
751 bool
callout_invoking(callout_t * cs)752 callout_invoking(callout_t *cs)
753 {
754 callout_impl_t *c = (callout_impl_t *)cs;
755 kmutex_t *lock;
756 bool rv;
757
758 KASSERT(c->c_magic == CALLOUT_MAGIC);
759
760 lock = callout_lock(c);
761 rv = ((c->c_flags & CALLOUT_INVOKING) != 0);
762 mutex_spin_exit(lock);
763
764 return rv;
765 }
766
767 void
callout_ack(callout_t * cs)768 callout_ack(callout_t *cs)
769 {
770 callout_impl_t *c = (callout_impl_t *)cs;
771 kmutex_t *lock;
772
773 KASSERT(c->c_magic == CALLOUT_MAGIC);
774
775 lock = callout_lock(c);
776 c->c_flags &= ~CALLOUT_INVOKING;
777 mutex_spin_exit(lock);
778 }
779
780 /*
781 * callout_hardclock:
782 *
783 * Called from hardclock() once every tick. We schedule a soft
784 * interrupt if there is work to be done.
785 */
786 void
callout_hardclock(void)787 callout_hardclock(void)
788 {
789 struct callout_cpu *cc;
790 int needsoftclock, ticks;
791
792 cc = curcpu()->ci_data.cpu_callout;
793 mutex_spin_enter(cc->cc_lock);
794
795 ticks = ++cc->cc_ticks;
796
797 MOVEBUCKET(cc, 0, ticks);
798 if (MASKWHEEL(0, ticks) == 0) {
799 MOVEBUCKET(cc, 1, ticks);
800 if (MASKWHEEL(1, ticks) == 0) {
801 MOVEBUCKET(cc, 2, ticks);
802 if (MASKWHEEL(2, ticks) == 0)
803 MOVEBUCKET(cc, 3, ticks);
804 }
805 }
806
807 needsoftclock = !CIRCQ_EMPTY(&cc->cc_todo);
808 mutex_spin_exit(cc->cc_lock);
809
810 if (needsoftclock)
811 softint_schedule(callout_sih);
812 }
813
814 /*
815 * callout_softclock:
816 *
817 * Soft interrupt handler, scheduled above if there is work to
818 * be done. Callouts are made in soft interrupt context.
819 */
820 static void
callout_softclock(void * v)821 callout_softclock(void *v)
822 {
823 callout_impl_t *c;
824 struct callout_cpu *cc;
825 void (*func)(void *);
826 void *arg;
827 int mpsafe, count, ticks, delta;
828 u_int flags __unused;
829 lwp_t *l;
830
831 l = curlwp;
832 KASSERT(l->l_cpu == curcpu());
833 cc = l->l_cpu->ci_data.cpu_callout;
834
835 mutex_spin_enter(cc->cc_lock);
836 cc->cc_lwp = l;
837 while (!CIRCQ_EMPTY(&cc->cc_todo)) {
838 c = CIRCQ_FIRST(&cc->cc_todo);
839 KASSERT(c->c_magic == CALLOUT_MAGIC);
840 KASSERT(c->c_func != NULL);
841 KASSERT(c->c_cpu == cc);
842 KASSERT((c->c_flags & CALLOUT_PENDING) != 0);
843 KASSERT((c->c_flags & CALLOUT_FIRED) == 0);
844 CIRCQ_REMOVE(&c->c_list);
845
846 /* If due run it, otherwise insert it into the right bucket. */
847 ticks = cc->cc_ticks;
848 delta = (int)((unsigned)c->c_time - (unsigned)ticks);
849 if (delta > 0) {
850 CIRCQ_INSERT(&c->c_list, BUCKET(cc, delta, c->c_time));
851 continue;
852 }
853 if (delta < 0)
854 cc->cc_ev_late.ev_count++;
855
856 c->c_flags = (c->c_flags & ~CALLOUT_PENDING) |
857 (CALLOUT_FIRED | CALLOUT_INVOKING);
858 mpsafe = (c->c_flags & CALLOUT_MPSAFE);
859 func = c->c_func;
860 arg = c->c_arg;
861 cc->cc_active = c;
862 flags = c->c_flags;
863
864 mutex_spin_exit(cc->cc_lock);
865 KASSERT(func != NULL);
866 SDT_PROBE4(sdt, kernel, callout, entry, c, func, arg, flags);
867 if (__predict_false(!mpsafe)) {
868 KERNEL_LOCK(1, NULL);
869 (*func)(arg);
870 KERNEL_UNLOCK_ONE(NULL);
871 } else
872 (*func)(arg);
873 SDT_PROBE4(sdt, kernel, callout, return, c, func, arg, flags);
874 KASSERTMSG(l->l_blcnt == 0,
875 "callout %p func %p leaked %d biglocks",
876 c, func, l->l_blcnt);
877 mutex_spin_enter(cc->cc_lock);
878
879 /*
880 * We can't touch 'c' here because it might be
881 * freed already. If LWPs waiting for callout
882 * to complete, awaken them.
883 */
884 cc->cc_active = NULL;
885 if ((count = cc->cc_nwait) != 0) {
886 cc->cc_nwait = 0;
887 /* sleepq_wake() drops the lock. */
888 sleepq_wake(&cc->cc_sleepq, cc, count, cc->cc_lock);
889 mutex_spin_enter(cc->cc_lock);
890 }
891 }
892 cc->cc_lwp = NULL;
893 mutex_spin_exit(cc->cc_lock);
894 }
895 #endif /* !CRASH */
896
897 #ifdef DDB
898 static void
db_show_callout_bucket(struct callout_cpu * cc,struct callout_circq * kbucket,struct callout_circq * bucket)899 db_show_callout_bucket(struct callout_cpu *cc, struct callout_circq *kbucket,
900 struct callout_circq *bucket)
901 {
902 callout_impl_t *c, ci;
903 db_expr_t offset;
904 const char *name;
905 static char question[] = "?";
906 int b;
907
908 if (CIRCQ_LAST(bucket, kbucket))
909 return;
910
911 for (c = CIRCQ_FIRST(bucket); /*nothing*/; c = CIRCQ_NEXT(&c->c_list)) {
912 db_read_bytes((db_addr_t)c, sizeof(ci), (char *)&ci);
913 c = &ci;
914 db_find_sym_and_offset((db_addr_t)(intptr_t)c->c_func, &name,
915 &offset);
916 name = name ? name : question;
917 b = (bucket - cc->cc_wheel);
918 if (b < 0)
919 b = -WHEELSIZE;
920 db_printf("%9d %2d/%-4d %16lx %s\n",
921 c->c_time - cc->cc_ticks, b / WHEELSIZE, b,
922 (u_long)c->c_arg, name);
923 if (CIRCQ_LAST(&c->c_list, kbucket))
924 break;
925 }
926 }
927
928 void
db_show_callout(db_expr_t addr,bool haddr,db_expr_t count,const char * modif)929 db_show_callout(db_expr_t addr, bool haddr, db_expr_t count, const char *modif)
930 {
931 struct callout_cpu *cc;
932 struct cpu_info *ci;
933 int b;
934
935 #ifndef CRASH
936 db_printf("hardclock_ticks now: %d\n", getticks());
937 #endif
938 db_printf(" ticks wheel arg func\n");
939
940 /*
941 * Don't lock the callwheel; all the other CPUs are paused
942 * anyhow, and we might be called in a circumstance where
943 * some other CPU was paused while holding the lock.
944 */
945 for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) {
946 db_read_bytes((db_addr_t)ci +
947 offsetof(struct cpu_info, ci_data.cpu_callout),
948 sizeof(cc), (char *)&cc);
949 db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb);
950 db_show_callout_bucket(&ccb, &cc->cc_todo, &ccb.cc_todo);
951 }
952 for (b = 0; b < BUCKETS; b++) {
953 for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) {
954 db_read_bytes((db_addr_t)ci +
955 offsetof(struct cpu_info, ci_data.cpu_callout),
956 sizeof(cc), (char *)&cc);
957 db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb);
958 db_show_callout_bucket(&ccb, &cc->cc_wheel[b],
959 &ccb.cc_wheel[b]);
960 }
961 }
962 }
963 #endif /* DDB */
964