xref: /dflybsd-src/sys/kern/kern_timeout.c (revision acdf1ee6e01f6d399479bd25d28f8f57ff8a3ad8)
1 /*
2  * Copyright (c) 2004,2014,2019-2020 The DragonFly Project.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to The DragonFly Project
6  * by Matthew Dillon <dillon@backplane.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 /*
36  * Copyright (c) 1982, 1986, 1991, 1993
37  *	The Regents of the University of California.  All rights reserved.
38  * (c) UNIX System Laboratories, Inc.
39  * All or some portions of this file are derived from material licensed
40  * to the University of California by American Telephone and Telegraph
41  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
42  * the permission of UNIX System Laboratories, Inc.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  * 3. Neither the name of the University nor the names of its contributors
53  *    may be used to endorse or promote products derived from this software
54  *    without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66  * SUCH DAMAGE.
67  */
68 /*
69  * The original callout mechanism was based on the work of Adam M. Costello
70  * and George Varghese, published in a technical report entitled "Redesigning
71  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
72  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
73  * used in this implementation was published by G. Varghese and T. Lauck in
74  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
75  * the Efficient Implementation of a Timer Facility" in the Proceedings of
76  * the 11th ACM Annual Symposium on Operating Systems Principles,
77  * Austin, Texas Nov 1987.
78  */
79 
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/spinlock.h>
83 #include <sys/callout.h>
84 #include <sys/kernel.h>
85 #include <sys/malloc.h>
86 #include <sys/interrupt.h>
87 #include <sys/thread.h>
88 #include <sys/sysctl.h>
89 #include <sys/exislock.h>
90 #include <vm/vm_extern.h>
91 #include <machine/atomic.h>
92 
93 #include <sys/spinlock2.h>
94 #include <sys/thread2.h>
95 #include <sys/mplock2.h>
96 #include <sys/exislock2.h>
97 
98 TAILQ_HEAD(colist, _callout);
99 struct softclock_pcpu;
100 
101 /*
102  * DID_INIT	- Sanity check
103  * PREVENTED	- A callback was prevented
104  * RESET	- Callout_reset requested
105  * STOP		- Callout_stop requested
106  * INPROG	- Softclock_handler thread processing in-progress on callout,
107  *		  queue linkage is indeterminant.  Third parties must queue
108  *		  a STOP or CANCEL and await completion.
109  * SET		- Callout is linked to queue (if INPROG not set)
110  * AUTOLOCK	- Lockmgr cancelable interlock (copied from frontend)
111  * MPSAFE	- Callout is MPSAFE (copied from frontend)
112  * CANCEL	- callout_cancel requested
113  * ACTIVE	- active/inactive (frontend only, see documentation).
114  *		  This is *NOT* the same as whether a callout is queued or
115  *		  not.
116  */
117 #define CALLOUT_DID_INIT	0x00000001	/* frontend */
118 #define CALLOUT_PREVENTED	0x00000002	/* backend */
119 #define CALLOUT_FREELIST	0x00000004	/* backend */
120 #define CALLOUT_UNUSED0008	0x00000008
121 #define CALLOUT_UNUSED0010	0x00000010
122 #define CALLOUT_RESET		0x00000020	/* backend */
123 #define CALLOUT_STOP		0x00000040	/* backend */
124 #define CALLOUT_INPROG		0x00000080	/* backend */
125 #define CALLOUT_SET		0x00000100	/* backend */
126 #define CALLOUT_AUTOLOCK	0x00000200	/* both */
127 #define CALLOUT_MPSAFE		0x00000400	/* both */
128 #define CALLOUT_CANCEL		0x00000800	/* backend */
129 #define CALLOUT_ACTIVE		0x00001000	/* frontend */
130 
131 struct wheel {
132 	struct spinlock spin;
133 	struct colist	list;
134 };
135 
136 struct softclock_pcpu {
137 	struct wheel	*callwheel;
138 	struct _callout *running;
139 	struct _callout * volatile next;
140 	struct colist	freelist;
141 	int		softticks;	/* softticks index */
142 	int		curticks;	/* per-cpu ticks counter */
143 	int		isrunning;
144 	struct thread	thread;
145 };
146 
147 typedef struct softclock_pcpu *softclock_pcpu_t;
148 
149 static int callout_debug = 0;
150 SYSCTL_INT(_debug, OID_AUTO, callout_debug, CTLFLAG_RW,
151 	   &callout_debug, 0, "");
152 
153 static MALLOC_DEFINE(M_CALLOUT, "callouts", "softclock callouts");
154 
155 static int cwheelsize;
156 static int cwheelmask;
157 static softclock_pcpu_t softclock_pcpu_ary[MAXCPU];
158 
159 static void softclock_handler(void *arg);
160 static void slotimer_callback(void *arg);
161 
162 /*
163  * Handle pending requests.  No action can be taken if the callout is still
164  * flagged INPROG.  Called from softclock for post-processing and from
165  * various API functions.
166  *
167  * This routine does not block in any way.
168  * Caller must hold c->spin.
169  *
170  * NOTE: Flags can be adjusted without holding c->spin, so atomic ops
171  *	 must be used at all times.
172  *
173  * NOTE: The related (sc) might refer to another cpu.
174  *
175  * NOTE: The cc-vs-c frontend-vs-backend might be disconnected during the
176  *	 operation, but the EXIS lock prevents (c) from being destroyed.
177  */
178 static __inline
179 void
180 _callout_update_spinlocked(struct _callout *c)
181 {
182 	struct wheel *wheel;
183 
184 	if (c->flags & CALLOUT_INPROG) {
185 		/*
186 		 * If the callout is in-progress the SET queuing state is
187 		 * indeterminant and no action can be taken at this time.
188 		 */
189 		/* nop */
190 	} else if (c->flags & CALLOUT_SET) {
191 		/*
192 		 * If the callout is SET it is queued on a callwheel, process
193 		 * various requests relative to it being in this queued state.
194 		 *
195 		 * c->q* fields are stable while we hold c->spin and
196 		 * wheel->spin.
197 		 */
198 		softclock_pcpu_t sc;
199 
200 		sc = c->qsc;
201 		wheel = &sc->callwheel[c->qtick & cwheelmask];
202 		spin_lock(&wheel->spin);
203 
204 		if (c->flags & CALLOUT_INPROG) {
205 			/*
206 			 * Raced against INPROG getting set by the softclock
207 			 * handler while we were acquiring wheel->spin.  We
208 			 * can do nothing at this time.
209 			 */
210 			/* nop */
211 		} else if (c->flags & CALLOUT_CANCEL) {
212 			/*
213 			 * CANCEL requests override everything else.
214 			 */
215 			if (sc->next == c)
216 				sc->next = TAILQ_NEXT(c, entry);
217 			TAILQ_REMOVE(&wheel->list, c, entry);
218 			atomic_clear_int(&c->flags, CALLOUT_SET |
219 						    CALLOUT_STOP |
220 						    CALLOUT_CANCEL |
221 						    CALLOUT_RESET);
222 			atomic_set_int(&c->flags, CALLOUT_PREVENTED);
223 			if (c->waiters)
224 				wakeup(c);
225 		} else if (c->flags & CALLOUT_RESET) {
226 			/*
227 			 * RESET requests reload the callout, potentially
228 			 * to a different cpu.  Once removed from the wheel,
229 			 * the retention of c->spin prevents further races.
230 			 *
231 			 * Leave SET intact.
232 			 */
233 			if (sc->next == c)
234 				sc->next = TAILQ_NEXT(c, entry);
235 			TAILQ_REMOVE(&wheel->list, c, entry);
236 			spin_unlock(&wheel->spin);
237 
238 			atomic_clear_int(&c->flags, CALLOUT_RESET);
239 			sc = c->rsc;
240 			c->qsc = sc;
241 			c->qarg = c->rarg;
242 			c->qfunc = c->rfunc;
243 			c->qtick = c->rtick;
244 
245 			/*
246 			 * Do not queue to a current or past wheel slot or
247 			 * the callout will be lost for ages.  Handle
248 			 * potential races against soft ticks.
249 			 */
250 			wheel = &sc->callwheel[c->qtick & cwheelmask];
251 			spin_lock(&wheel->spin);
252 			while (c->qtick - sc->softticks <= 0) {
253 				c->qtick = sc->softticks + 1;
254 				spin_unlock(&wheel->spin);
255 				wheel = &sc->callwheel[c->qtick & cwheelmask];
256 				spin_lock(&wheel->spin);
257 			}
258 			TAILQ_INSERT_TAIL(&wheel->list, c, entry);
259 		} else if (c->flags & CALLOUT_STOP) {
260 			/*
261 			 * STOP request simply unloads the callout.
262 			 */
263 			if (sc->next == c)
264 				sc->next = TAILQ_NEXT(c, entry);
265 			TAILQ_REMOVE(&wheel->list, c, entry);
266 			atomic_clear_int(&c->flags, CALLOUT_STOP |
267 						    CALLOUT_SET);
268 
269 			atomic_set_int(&c->flags, CALLOUT_PREVENTED);
270 			if (c->waiters)
271 				wakeup(c);
272 		} else {
273 			/*
274 			 * Do nothing if no request is pending.
275 			 */
276 			/* nop */
277 		}
278 		spin_unlock(&wheel->spin);
279 	} else {
280 		/*
281 		 * If the callout is not SET it is not queued to any callwheel,
282 		 * process various requests relative to it not being queued.
283 		 *
284 		 * c->q* fields are stable while we hold c->spin.
285 		 */
286 		if (c->flags & CALLOUT_CANCEL) {
287 			/*
288 			 * CANCEL requests override everything else.
289 			 *
290 			 * There is no state being canceled in this case,
291 			 * so do not set the PREVENTED flag.
292 			 */
293 			atomic_clear_int(&c->flags, CALLOUT_STOP |
294 						    CALLOUT_CANCEL |
295 						    CALLOUT_RESET);
296 			if (c->waiters)
297 				wakeup(c);
298 		} else if (c->flags & CALLOUT_RESET) {
299 			/*
300 			 * RESET requests get queued.  Do not queue to the
301 			 * currently-processing tick.
302 			 */
303 			softclock_pcpu_t sc;
304 
305 			sc = c->rsc;
306 			c->qsc = sc;
307 			c->qarg = c->rarg;
308 			c->qfunc = c->rfunc;
309 			c->qtick = c->rtick;
310 
311 			/*
312 			 * Do not queue to current or past wheel or the
313 			 * callout will be lost for ages.
314 			 */
315 			wheel = &sc->callwheel[c->qtick & cwheelmask];
316 			spin_lock(&wheel->spin);
317 			while (c->qtick - sc->softticks <= 0) {
318 				c->qtick = sc->softticks + 1;
319 				spin_unlock(&wheel->spin);
320 				wheel = &sc->callwheel[c->qtick & cwheelmask];
321 				spin_lock(&wheel->spin);
322 			}
323 			TAILQ_INSERT_TAIL(&wheel->list, c, entry);
324 			atomic_clear_int(&c->flags, CALLOUT_RESET);
325 			atomic_set_int(&c->flags, CALLOUT_SET);
326 			spin_unlock(&wheel->spin);
327 		} else if (c->flags & CALLOUT_STOP) {
328 			/*
329 			 * STOP requests.
330 			 *
331 			 * There is no state being stopped in this case,
332 			 * so do not set the PREVENTED flag.
333 			 */
334 			atomic_clear_int(&c->flags, CALLOUT_STOP);
335 			if (c->waiters)
336 				wakeup(c);
337 		} else {
338 			/*
339 			 * No request pending (someone else processed the
340 			 * request before we could)
341 			 */
342 			/* nop */
343 		}
344 	}
345 }
346 
347 static __inline
348 void
349 _callout_free(struct _callout *c)
350 {
351 	softclock_pcpu_t sc;
352 
353 	sc = softclock_pcpu_ary[mycpu->gd_cpuid];
354 
355 	crit_enter();
356 	exis_terminate(&c->exis);
357 	atomic_set_int(&c->flags, CALLOUT_FREELIST);
358 	atomic_clear_int(&c->flags, CALLOUT_DID_INIT);
359 	TAILQ_INSERT_TAIL(&sc->freelist, c, entry);
360 	crit_exit();
361 }
362 
363 /*
364  * System init
365  */
366 static void
367 swi_softclock_setup(void *arg)
368 {
369 	int cpu;
370 	int i;
371 	int target;
372 
373 	/*
374 	 * Figure out how large a callwheel we need.  It must be a power of 2.
375 	 *
376 	 * ncallout is primarily based on available memory, don't explode
377 	 * the allocations if the system has a lot of cpus.
378 	 */
379 	target = ncallout / ncpus + 16;
380 
381 	cwheelsize = 1;
382 	while (cwheelsize < target)
383 		cwheelsize <<= 1;
384 	cwheelmask = cwheelsize - 1;
385 
386 	/*
387 	 * Initialize per-cpu data structures.
388 	 */
389 	for (cpu = 0; cpu < ncpus; ++cpu) {
390 		softclock_pcpu_t sc;
391 		int wheel_sz;
392 
393 		sc = (void *)kmem_alloc3(&kernel_map, sizeof(*sc),
394 					 VM_SUBSYS_GD, KM_CPU(cpu));
395 		memset(sc, 0, sizeof(*sc));
396 		TAILQ_INIT(&sc->freelist);
397 		softclock_pcpu_ary[cpu] = sc;
398 
399 		wheel_sz = sizeof(*sc->callwheel) * cwheelsize;
400 		sc->callwheel = (void *)kmem_alloc3(&kernel_map, wheel_sz,
401 						    VM_SUBSYS_GD, KM_CPU(cpu));
402 		memset(sc->callwheel, 0, wheel_sz);
403 		for (i = 0; i < cwheelsize; ++i) {
404 			spin_init(&sc->callwheel[i].spin, "wheel");
405 			TAILQ_INIT(&sc->callwheel[i].list);
406 		}
407 
408 		/*
409 		 * Mark the softclock handler as being an interrupt thread
410 		 * even though it really isn't, but do not allow it to
411 		 * preempt other threads (do not assign td_preemptable).
412 		 *
413 		 * Kernel code now assumes that callouts do not preempt
414 		 * the cpu they were scheduled on.
415 		 */
416 		lwkt_create(softclock_handler, sc, NULL, &sc->thread,
417 			    TDF_NOSTART | TDF_INTTHREAD,
418 			    cpu, "softclock %d", cpu);
419 	}
420 }
421 
422 /*
423  * Must occur after ncpus has been initialized.
424  */
425 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND,
426 	swi_softclock_setup, NULL);
427 
428 /*
429  * This routine is called from the hardclock() (basically a FASTint/IPI) on
430  * each cpu in the system.  sc->curticks is this cpu's notion of the timebase.
431  * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'!  sc->softticks is where
432  * the callwheel is currently indexed.
433  *
434  * sc->softticks is adjusted by either this routine or our helper thread
435  * depending on whether the helper thread is running or not.
436  *
437  * sc->curticks and sc->softticks are adjusted using atomic ops in order
438  * to ensure that remote cpu callout installation does not race the thread.
439  */
440 void
441 hardclock_softtick(globaldata_t gd)
442 {
443 	softclock_pcpu_t sc;
444 	struct wheel *wheel;
445 
446 	sc = softclock_pcpu_ary[gd->gd_cpuid];
447 	atomic_add_int(&sc->curticks, 1);
448 	if (sc->isrunning)
449 		return;
450 	if (sc->softticks == sc->curticks) {
451 		/*
452 		 * In sync, only wakeup the thread if there is something to
453 		 * do.
454 		 */
455 		wheel = &sc->callwheel[sc->softticks & cwheelmask];
456 		spin_lock(&wheel->spin);
457 		if (TAILQ_FIRST(&wheel->list)) {
458 			sc->isrunning = 1;
459 			spin_unlock(&wheel->spin);
460 			lwkt_schedule(&sc->thread);
461 		} else {
462 			atomic_add_int(&sc->softticks, 1);
463 			spin_unlock(&wheel->spin);
464 		}
465 	} else {
466 		/*
467 		 * out of sync, wakeup the thread unconditionally so it can
468 		 * catch up.
469 		 */
470 		sc->isrunning = 1;
471 		lwkt_schedule(&sc->thread);
472 	}
473 }
474 
475 /*
476  * This procedure is the main loop of our per-cpu helper thread.  The
477  * sc->isrunning flag prevents us from racing hardclock_softtick().
478  *
479  * The thread starts with the MP lock released and not in a critical
480  * section.  The loop itself is MP safe while individual callbacks
481  * may or may not be, so we obtain or release the MP lock as appropriate.
482  */
483 static void
484 softclock_handler(void *arg)
485 {
486 	softclock_pcpu_t sc;
487 	struct _callout *c;
488 	struct wheel *wheel;
489 	struct callout slotimer1;
490 	struct _callout slotimer2;
491 	int mpsafe = 1;
492 
493 	/*
494 	 * Setup pcpu slow clocks which we want to run from the callout
495 	 * thread.  This thread starts very early and cannot kmalloc(),
496 	 * so use internal functions to supply the _callout.
497 	 */
498 	_callout_setup_quick(&slotimer1, &slotimer2, hz * 10,
499 			     slotimer_callback, &slotimer1);
500 
501 	/*
502 	 * Run the callout thread at the same priority as other kernel
503 	 * threads so it can be round-robined.
504 	 */
505 	/*lwkt_setpri_self(TDPRI_SOFT_NORM);*/
506 
507 	sc = arg;
508 loop:
509 	while (sc->softticks != (int)(sc->curticks + 1)) {
510 		wheel = &sc->callwheel[sc->softticks & cwheelmask];
511 
512 		spin_lock(&wheel->spin);
513 		sc->next = TAILQ_FIRST(&wheel->list);
514 		while ((c = sc->next) != NULL) {
515 			int error;
516 
517 			/*
518 			 * Match callouts for this tick.
519 			 */
520 			sc->next = TAILQ_NEXT(c, entry);
521 			if (c->qtick != sc->softticks)
522 				continue;
523 
524 			/*
525 			 * Double check the validity of the callout, detect
526 			 * if the originator's structure has been ripped out.
527 			 */
528 			if (c->verifier->toc != c) {
529 				spin_unlock(&wheel->spin);
530 				panic("_callout %p verifier %p failed",
531 				      c, c->verifier);
532 			}
533 
534 			/*
535 			 * The wheel spinlock is sufficient to set INPROG and
536 			 * remove (c) from the list.  Once INPROG is set,
537 			 * other threads can only make limited changes to (c).
538 			 */
539 			TAILQ_REMOVE(&wheel->list, c, entry);
540 			atomic_set_int(&c->flags, CALLOUT_INPROG);
541 			sc->running = c;
542 			spin_unlock(&wheel->spin);
543 
544 			/*
545 			 * Legacy mplock support
546 			 */
547 			if (c->flags & CALLOUT_MPSAFE) {
548 				if (mpsafe == 0) {
549 					mpsafe = 1;
550 					rel_mplock();
551 				}
552 			} else {
553 				if (mpsafe) {
554 					mpsafe = 0;
555 					get_mplock();
556 				}
557 			}
558 
559 			/*
560 			 * Execute the 'q' function (protected by INPROG)
561 			 */
562 			if (c->flags & (CALLOUT_STOP | CALLOUT_CANCEL)) {
563 				/*
564 				 * Raced a stop or cancel request, do
565 				 * not execute.  The processing code
566 				 * thinks its a normal completion so
567 				 * flag the fact that cancel/stop actually
568 				 * prevented a callout here.
569 				 */
570 				if (c->flags &
571 				    (CALLOUT_CANCEL | CALLOUT_STOP)) {
572 					atomic_set_int(&c->verifier->flags,
573 						       CALLOUT_PREVENTED);
574 				}
575 			} else if (c->flags & CALLOUT_RESET) {
576 				/*
577 				 * A RESET raced, make it seem like it
578 				 * didn't.  Do nothing here and let the
579 				 * update procedure requeue us.
580 				 */
581 			} else if (c->flags & CALLOUT_AUTOLOCK) {
582 				/*
583 				 * Interlocked cancelable call.  If the
584 				 * lock gets canceled we have to flag the
585 				 * fact that the cancel/stop actually
586 				 * prevented the callout here.
587 				 */
588 				error = lockmgr(c->lk, LK_EXCLUSIVE |
589 						       LK_CANCELABLE);
590 				if (error == 0) {
591 					c->qfunc(c->qarg);
592 					lockmgr(c->lk, LK_RELEASE);
593 				} else if (c->flags &
594 					   (CALLOUT_CANCEL | CALLOUT_STOP)) {
595 					atomic_set_int(&c->verifier->flags,
596 						       CALLOUT_PREVENTED);
597 				}
598 			} else {
599 				/*
600 				 * Normal call
601 				 */
602 				c->qfunc(c->qarg);
603 			}
604 
605 			/*
606 			 * Interlocked clearing of INPROG, then handle any
607 			 * queued request (such as a callout_reset() request).
608 			 */
609 			spin_lock(&c->spin);
610 			atomic_clear_int(&c->flags,
611 					 CALLOUT_INPROG | CALLOUT_SET);
612 			sc->running = NULL;
613 			_callout_update_spinlocked(c);
614 			spin_unlock(&c->spin);
615 
616 			spin_lock(&wheel->spin);
617 		}
618 		spin_unlock(&wheel->spin);
619 		atomic_add_int(&sc->softticks, 1);
620 
621 		/*
622 		 * Clean up any _callout structures which are now allowed
623 		 * to be freed.
624 		 */
625 		crit_enter();
626 		while ((c = TAILQ_FIRST(&sc->freelist)) != NULL) {
627 			if (!exis_freeable(&c->exis))
628 				break;
629 			TAILQ_REMOVE(&sc->freelist, c, entry);
630 			c->flags = 0;
631 			kfree(c, M_CALLOUT);
632 			if (callout_debug)
633 				kprintf("KFREEB %p\n", c);
634 		}
635 		crit_exit();
636 	}
637 
638 	/*
639 	 * Don't leave us holding the MP lock when we deschedule ourselves.
640 	 */
641 	if (mpsafe == 0) {
642 		mpsafe = 1;
643 		rel_mplock();
644 	}
645 
646 	/*
647 	 * Recheck in critical section to interlock against hardlock
648 	 */
649 	crit_enter();
650 	if (sc->softticks == (int)(sc->curticks + 1)) {
651 		sc->isrunning = 0;
652 		lwkt_deschedule_self(&sc->thread);	/* == curthread */
653 		lwkt_switch();
654 	}
655 	crit_exit();
656 	goto loop;
657 	/* NOT REACHED */
658 }
659 
660 /*
661  * A very slow system cleanup timer (10 second interval),
662  * per-cpu.
663  */
664 void
665 slotimer_callback(void *arg)
666 {
667 	struct callout *c = arg;
668 
669 	slab_cleanup();
670 	callout_reset(c, hz * 10, slotimer_callback, c);
671 }
672 
673 /*
674  * API FUNCTIONS
675  */
676 
677 static __inline
678 struct _callout *
679 _callout_gettoc(struct callout *cc)
680 {
681 	globaldata_t gd = mycpu;
682 	struct _callout *c;
683 	softclock_pcpu_t sc;
684 
685 	KKASSERT(cc->flags & CALLOUT_DID_INIT);
686 	exis_hold_gd(gd);
687 	for (;;) {
688 		c = cc->toc;
689 		cpu_ccfence();
690 		if (c) {
691 			KKASSERT(c->verifier == cc);
692 			spin_lock(&c->spin);
693 			break;
694 		}
695 		sc = softclock_pcpu_ary[gd->gd_cpuid];
696 		c = kmalloc(sizeof(*c), M_CALLOUT, M_INTWAIT | M_ZERO);
697 		if (callout_debug)
698 			kprintf("ALLOC %p\n", c);
699 		c->flags = cc->flags;
700 		c->lk = cc->lk;
701 		c->verifier = cc;
702 		exis_init(&c->exis);
703 		spin_init(&c->spin, "calou");
704 		spin_lock(&c->spin);
705 		if (atomic_cmpset_ptr(&cc->toc, NULL, c))
706 			break;
707 		spin_unlock(&c->spin);
708 		c->verifier = NULL;
709 		kfree(c, M_CALLOUT);
710 		if (callout_debug)
711 			kprintf("KFREEA %p\n", c);
712 	}
713 	exis_drop_gd(gd);
714 
715 	/*
716 	 * Return internal __callout with spin-lock held
717 	 */
718 	return c;
719 }
720 
721 /*
722  * Macrod in sys/callout.h for debugging
723  *
724  * WARNING! tsleep() assumes this will not block
725  */
726 void
727 _callout_init(struct callout *cc CALLOUT_DEBUG_ARGS)
728 {
729 	bzero(cc, sizeof(*cc));
730 	cc->flags = CALLOUT_DID_INIT;
731 }
732 
733 void
734 _callout_init_mp(struct callout *cc CALLOUT_DEBUG_ARGS)
735 {
736 	bzero(cc, sizeof(*cc));
737 	cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE;
738 }
739 
740 void
741 _callout_init_lk(struct callout *cc, struct lock *lk CALLOUT_DEBUG_ARGS)
742 {
743 	bzero(cc, sizeof(*cc));
744 	cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE | CALLOUT_AUTOLOCK;
745 	cc->lk = lk;
746 }
747 
748 /*
749  * Start or restart a timeout.  New timeouts can be installed while the
750  * current one is running.
751  *
752  * Start or restart a timeout.  Installs the callout structure on the
753  * callwheel of the current cpu.  Callers may legally pass any value, even
754  * if 0 or negative, but since the sc->curticks index may have already
755  * been processed a minimum timeout of 1 tick will be enforced.
756  *
757  * This function will not deadlock against a running call.
758  *
759  * WARNING! tsleep() assumes this will not block
760  */
761 void
762 callout_reset(struct callout *cc, int to_ticks, void (*ftn)(void *), void *arg)
763 {
764 	softclock_pcpu_t sc;
765 	struct _callout *c;
766 
767 	/*
768 	 * We need to acquire/associate a _callout.
769 	 * gettoc spin-locks (c).
770 	 */
771 	KKASSERT(cc->flags & CALLOUT_DID_INIT);
772 	atomic_set_int(&cc->flags, CALLOUT_ACTIVE);
773 	c = _callout_gettoc(cc);
774 
775 	/*
776 	 * Request a RESET.  This automatically overrides a STOP in
777 	 * _callout_update_spinlocked().
778 	 */
779 	atomic_set_int(&c->flags, CALLOUT_RESET);
780 	sc = softclock_pcpu_ary[mycpu->gd_cpuid];
781 	c->rsc = sc;
782 	c->rtick = sc->curticks + to_ticks;
783 	c->rfunc = ftn;
784 	c->rarg = arg;
785 	_callout_update_spinlocked(c);
786 	spin_unlock(&c->spin);
787 }
788 
789 /*
790  * Same as callout_reset() but the timeout will run on a particular cpu.
791  */
792 void
793 callout_reset_bycpu(struct callout *cc, int to_ticks, void (*ftn)(void *),
794 		    void *arg, int cpuid)
795 {
796 	softclock_pcpu_t sc;
797 	struct _callout *c;
798 
799 	/*
800 	 * We need to acquire/associate a _callout.
801 	 * gettoc spin-locks (c).
802 	 */
803 	KKASSERT(cc->flags & CALLOUT_DID_INIT);
804 	atomic_set_int(&cc->flags, CALLOUT_ACTIVE);
805 	c = _callout_gettoc(cc);
806 
807 	/*
808 	 * Set RESET.  Do not clear STOP here (let the process code do it).
809 	 */
810 	atomic_set_int(&c->flags, CALLOUT_RESET);
811 
812 	sc = softclock_pcpu_ary[cpuid];
813 	c->rsc = sc;
814 	c->rtick = sc->curticks + to_ticks;
815 	c->rfunc = ftn;
816 	c->rarg = arg;
817 	_callout_update_spinlocked(c);
818 	spin_unlock(&c->spin);
819 }
820 
821 /*
822  * Issue synchronous or asynchronous cancel or stop
823  */
824 static __inline
825 int
826 _callout_cancel_or_stop(struct callout *cc, uint32_t flags, int sync)
827 {
828 	globaldata_t gd = mycpu;
829 	struct _callout *c;
830 	int res;
831 
832 	/*
833 	 * Callout is inactive after cancel or stop.  Degenerate case if
834 	 * no _callout is currently associated.
835 	 */
836 	atomic_clear_int(&cc->flags, CALLOUT_ACTIVE);
837 	if (cc->toc == NULL)
838 		return 0;
839 
840 	/*
841 	 * Ensure that the related (c) is not destroyed.  Set the CANCEL
842 	 * or STOP request flag, clear the PREVENTED status flag, and update.
843 	 */
844 	exis_hold_gd(gd);
845 	c = _callout_gettoc(cc);
846 	atomic_clear_int(&c->flags, CALLOUT_PREVENTED);
847 	atomic_set_int(&c->flags, flags);
848 	_callout_update_spinlocked(c);
849 	spin_unlock(&c->spin);
850 
851 	/*
852 	 * If the operation is still in-progress then re-acquire the spin-lock
853 	 * and block if necessary.  Also initiate the lock cancel.
854 	 */
855 	if (sync == 0 || (c->flags & (CALLOUT_INPROG | CALLOUT_SET)) == 0) {
856 		exis_drop_gd(gd);
857 		return 0;
858 	}
859 	if (c->flags & CALLOUT_AUTOLOCK)
860 		lockmgr(c->lk, LK_CANCEL_BEG);
861 	spin_lock(&c->spin);
862 	if ((c->flags & (CALLOUT_INPROG | CALLOUT_SET)) == 0) {
863 		spin_unlock(&c->spin);
864 		if (c->flags & CALLOUT_AUTOLOCK)
865 			lockmgr(c->lk, LK_CANCEL_END);
866 		exis_drop_gd(gd);
867 		return ((c->flags & CALLOUT_PREVENTED) != 0);
868 	}
869 
870 	/*
871 	 * With c->spin held we can synchronously wait completion of our
872 	 * request.
873 	 */
874 	++c->waiters;
875 	for (;;) {
876 		cpu_ccfence();
877 		if ((c->flags & flags) == 0)
878 			break;
879 		ssleep(c, &c->spin, 0, "costp", 0);
880 	}
881 	--c->waiters;
882 	spin_unlock(&c->spin);
883 	if (c->flags & CALLOUT_AUTOLOCK)
884 		lockmgr(c->lk, LK_CANCEL_END);
885 	res = ((c->flags & CALLOUT_PREVENTED) != 0);
886 	exis_drop_gd(gd);
887 
888 	return res;
889 }
890 
891 /*
892  * Internalized special low-overhead version without normal safety
893  * checks or allocations.  Used by tsleep().
894  *
895  * Must be called from critical section, specify both the external
896  * and internal callout structure and set timeout on the current cpu.
897  */
898 void
899 _callout_setup_quick(struct callout *cc, struct _callout *c, int ticks,
900 		     void (*ftn)(void *), void *arg)
901 {
902 	softclock_pcpu_t sc;
903 	struct wheel *wheel;
904 
905 	/*
906 	 * Request a RESET.  This automatically overrides a STOP in
907 	 * _callout_update_spinlocked().
908 	 */
909 	sc = softclock_pcpu_ary[mycpu->gd_cpuid];
910 
911 	cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE;
912 	cc->toc = c;
913 	cc->lk = NULL;
914 	c->flags = cc->flags | CALLOUT_SET;
915 	c->lk = NULL;
916 	c->verifier = cc;
917 	c->qsc = sc;
918 	c->qtick = sc->curticks + ticks;
919 	c->qfunc = ftn;
920 	c->qarg = arg;
921 	spin_init(&c->spin, "calou");
922 
923 	/*
924 	 * Since we are on the same cpu with a critical section, we can
925 	 * do this with only the wheel spinlock.
926 	 */
927 	if (c->qtick - sc->softticks <= 0)
928 		c->qtick = sc->softticks + 1;
929 	wheel = &sc->callwheel[c->qtick & cwheelmask];
930 
931 	spin_lock(&wheel->spin);
932 	TAILQ_INSERT_TAIL(&wheel->list, c, entry);
933 	spin_unlock(&wheel->spin);
934 }
935 
936 /*
937  * Internalized special low-overhead version without normal safety
938  * checks or allocations.  Used by tsleep().
939  *
940  * Must be called on the same cpu that queued the timeout.
941  * Must be called with a critical section already held.
942  */
943 void
944 _callout_cancel_quick(struct _callout *c)
945 {
946 	softclock_pcpu_t sc;
947 	struct wheel *wheel;
948 
949 	/*
950 	 * Wakeup callouts for tsleep() should never block, so this flag
951 	 * had better never be found set.
952 	 */
953 	KKASSERT((c->flags & CALLOUT_INPROG) == 0);
954 
955 	/*
956 	 * Remove from queue if necessary.  Since we are in a critical
957 	 * section on the same cpu, the queueing status should not change.
958 	 */
959 	if (c->flags & CALLOUT_SET) {
960 		sc = c->qsc;
961 		KKASSERT(sc == softclock_pcpu_ary[mycpu->gd_cpuid]);
962 		wheel = &sc->callwheel[c->qtick & cwheelmask];
963 
964 		/*
965 		 * NOTE: We must still spin-lock the wheel because other
966 		 *	 cpus can manipulate the list.
967 		 */
968 		spin_lock(&wheel->spin);
969 		TAILQ_REMOVE(&wheel->list, c, entry);
970 		c->flags &= ~(CALLOUT_SET | CALLOUT_STOP |
971 			      CALLOUT_CANCEL | CALLOUT_RESET);
972 		spin_unlock(&wheel->spin);
973 	}
974 	c->verifier = NULL;
975 }
976 
977 /*
978  * This is a synchronous STOP which cancels the callout.  If AUTOLOCK
979  * then a CANCEL will be issued to the lock holder.  Unlike STOP, the
980  * cancel function prevents any new callout_reset()s from being issued
981  * in addition to canceling the lock.  The lock will also be deactivated.
982  *
983  * Returns 0 if the callout was not active (or was active and completed,
984  *	     but didn't try to start a new timeout).
985  * Returns 1 if the cancel is responsible for stopping the callout.
986  */
987 int
988 callout_cancel(struct callout *cc)
989 {
990 	return _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1);
991 }
992 
993 /*
994  * Currently the same as callout_cancel.  Ultimately we may wish the
995  * drain function to allow a pending callout to proceed, but for now
996  * we will attempt to to cancel it.
997  *
998  * Returns 0 if the callout was not active (or was active and completed,
999  *	     but didn't try to start a new timeout).
1000  * Returns 1 if the drain is responsible for stopping the callout.
1001  */
1002 int
1003 callout_drain(struct callout *cc)
1004 {
1005 	return _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1);
1006 }
1007 
1008 /*
1009  * Stops a callout if it is pending or queued, does not block.
1010  * This function does not interlock against a callout that is in-progress.
1011  *
1012  * Returns whether the STOP operation was responsible for removing a
1013  * queued or pending callout.
1014  */
1015 int
1016 callout_stop_async(struct callout *cc)
1017 {
1018 	return _callout_cancel_or_stop(cc, CALLOUT_STOP, 0);
1019 }
1020 
1021 /*
1022  * Callout deactivate merely clears the CALLOUT_ACTIVE bit and stop a
1023  * callout if it is pending or queued.  However this cannot stop a callout
1024  * whos callback is in-progress.
1025  *
1026  *
1027  * This function does not interlock against a callout that is in-progress.
1028  */
1029 void
1030 callout_deactivate(struct callout *cc)
1031 {
1032 	atomic_clear_int(&cc->flags, CALLOUT_ACTIVE);
1033 	callout_stop_async(cc);
1034 }
1035 
1036 /*
1037  * lock-aided callouts are STOPped synchronously using STOP semantics
1038  * (meaning that another thread can start the callout again before we
1039  * return).
1040  *
1041  * non-lock-aided callouts
1042  *
1043  * Stops a callout if it is pending or queued, does not block.
1044  * This function does not interlock against a callout that is in-progress.
1045  */
1046 int
1047 callout_stop(struct callout *cc)
1048 {
1049 	return _callout_cancel_or_stop(cc, CALLOUT_STOP, 1);
1050 }
1051 
1052 /*
1053  * Destroy the callout.  Synchronously cancel any operation in progress,
1054  * clear the INIT flag, and disconnect the internal _callout.  The internal
1055  * callout will be safely freed via EXIS.
1056  *
1057  * Upon return, the callout structure may only be reused if re-initialized.
1058  */
1059 void
1060 callout_terminate(struct callout *cc)
1061 {
1062 	struct _callout *c;
1063 
1064 	exis_hold();
1065 
1066 	_callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1);
1067 	KKASSERT(cc->flags & CALLOUT_DID_INIT);
1068 	atomic_clear_int(&cc->flags, CALLOUT_DID_INIT);
1069 	c = atomic_swap_ptr((void *)&cc->toc, NULL);
1070 	if (c) {
1071 		KKASSERT(c->verifier == cc);
1072 		c->verifier = NULL;
1073 		_callout_free(c);
1074 	}
1075 
1076 	exis_drop();
1077 }
1078 
1079 /*
1080  * Returns whether a callout is queued and the time has not yet
1081  * arrived (the callout is not yet in-progress).
1082  */
1083 int
1084 callout_pending(struct callout *cc)
1085 {
1086 	struct _callout *c;
1087 
1088 	/*
1089 	 * Don't instantiate toc to test pending
1090 	 */
1091 	if (cc->toc == NULL)
1092 		return 0;
1093 	c = _callout_gettoc(cc);
1094 	if ((c->flags & (CALLOUT_SET | CALLOUT_INPROG)) == CALLOUT_SET) {
1095 		spin_unlock(&c->spin);
1096 		return 1;
1097 	}
1098 	spin_unlock(&c->spin);
1099 
1100 	return 0;
1101 }
1102 
1103 /*
1104  * Returns whether a callout is active or not.  A callout is active when
1105  * a timeout is set and remains active upon normal termination, even if
1106  * it does not issue a new timeout.  A callout is inactive if a timeout has
1107  * never been set or if the callout has been stopped or canceled.  The next
1108  * timeout that is set will re-set the active state.
1109  */
1110 int
1111 callout_active(struct callout *cc)
1112 {
1113 	return ((cc->flags & CALLOUT_ACTIVE) ? 1 : 0);
1114 }
1115