xref: /onnv-gate/usr/src/uts/common/os/callout.c (revision 3783:c865a2700370)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
53642Sqiao  * Common Development and Distribution License (the "License").
63642Sqiao  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
223642Sqiao  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include <sys/callo.h>
290Sstevel@tonic-gate #include <sys/param.h>
300Sstevel@tonic-gate #include <sys/types.h>
310Sstevel@tonic-gate #include <sys/systm.h>
320Sstevel@tonic-gate #include <sys/cpuvar.h>
330Sstevel@tonic-gate #include <sys/thread.h>
340Sstevel@tonic-gate #include <sys/kmem.h>
350Sstevel@tonic-gate #include <sys/cmn_err.h>
360Sstevel@tonic-gate #include <sys/callb.h>
370Sstevel@tonic-gate #include <sys/debug.h>
380Sstevel@tonic-gate #include <sys/vtrace.h>
390Sstevel@tonic-gate #include <sys/sysmacros.h>
400Sstevel@tonic-gate #include <sys/sdt.h>
410Sstevel@tonic-gate 
420Sstevel@tonic-gate /*
430Sstevel@tonic-gate  * Callout tables.  See timeout(9F) for details.
440Sstevel@tonic-gate  */
450Sstevel@tonic-gate static int cpr_stop_callout;
460Sstevel@tonic-gate static int callout_fanout;
470Sstevel@tonic-gate static int ncallout;
480Sstevel@tonic-gate static callout_table_t *callout_table[CALLOUT_TABLES];
490Sstevel@tonic-gate 
500Sstevel@tonic-gate #define	CALLOUT_HASH_INSERT(cthead, cp, cnext, cprev)	\
510Sstevel@tonic-gate {							\
520Sstevel@tonic-gate 	callout_t **headpp = &cthead;			\
530Sstevel@tonic-gate 	callout_t *headp = *headpp;			\
540Sstevel@tonic-gate 	cp->cnext = headp;				\
550Sstevel@tonic-gate 	cp->cprev = NULL;				\
560Sstevel@tonic-gate 	if (headp != NULL)				\
570Sstevel@tonic-gate 		headp->cprev = cp;			\
580Sstevel@tonic-gate 	*headpp = cp;					\
590Sstevel@tonic-gate }
600Sstevel@tonic-gate 
610Sstevel@tonic-gate #define	CALLOUT_HASH_DELETE(cthead, cp, cnext, cprev)	\
620Sstevel@tonic-gate {							\
630Sstevel@tonic-gate 	callout_t *nextp = cp->cnext;			\
640Sstevel@tonic-gate 	callout_t *prevp = cp->cprev;			\
650Sstevel@tonic-gate 	if (nextp != NULL)				\
660Sstevel@tonic-gate 		nextp->cprev = prevp;			\
670Sstevel@tonic-gate 	if (prevp != NULL)				\
680Sstevel@tonic-gate 		prevp->cnext = nextp;			\
690Sstevel@tonic-gate 	else						\
700Sstevel@tonic-gate 		cthead = nextp;				\
710Sstevel@tonic-gate }
720Sstevel@tonic-gate 
730Sstevel@tonic-gate #define	CALLOUT_HASH_UPDATE(INSDEL, ct, cp, id, runtime)		\
740Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ct->ct_lock));				\
750Sstevel@tonic-gate 	ASSERT(cp->c_xid == id && cp->c_runtime == runtime);		\
760Sstevel@tonic-gate 	CALLOUT_HASH_##INSDEL(ct->ct_idhash[CALLOUT_IDHASH(id)],	\
770Sstevel@tonic-gate 	cp, c_idnext, c_idprev)						\
780Sstevel@tonic-gate 	CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)],	\
790Sstevel@tonic-gate 	cp, c_lbnext, c_lbprev)
800Sstevel@tonic-gate 
813642Sqiao #define	CALLOUT_HRES_INSERT(ct, cp, cnext, cprev, hresms)		\
823642Sqiao {									\
833642Sqiao 	callout_t *nextp = ct->ct_hresq;				\
843642Sqiao 	callout_t *prevp;						\
853642Sqiao 									\
863642Sqiao 	if (nextp == NULL || hresms <= nextp->c_hresms) {		\
873642Sqiao 		cp->cnext = ct->ct_hresq;				\
883642Sqiao 		ct->ct_hresq = cp;					\
893642Sqiao 		cp->cprev = NULL;					\
903642Sqiao 		if (cp->cnext != NULL)					\
913642Sqiao 			cp->cnext->cprev = cp;				\
923642Sqiao 	} else {							\
933642Sqiao 		do {							\
943642Sqiao 			prevp = nextp;					\
953642Sqiao 			nextp = nextp->cnext;				\
963642Sqiao 		} while (nextp != NULL && hresms > nextp->c_hresms);	\
973642Sqiao 		prevp->cnext = cp;					\
983642Sqiao 		cp->cprev = prevp;					\
993642Sqiao 		cp->cnext = nextp;					\
1003642Sqiao 		if (nextp != NULL) 					\
1013642Sqiao 			nextp->cprev = cp;				\
1023642Sqiao 	}								\
1033642Sqiao }
1043642Sqiao 
1053642Sqiao #define	CALLOUT_HRES_DELETE(ct, cp, cnext, cprev, hresms)	\
1063642Sqiao {								\
1073642Sqiao 	if (cp == ct->ct_hresq) {				\
1083642Sqiao 		ct->ct_hresq = cp->cnext;			\
1093642Sqiao 		if (cp->cnext != NULL)				\
1103642Sqiao 			cp->cnext->cprev = NULL;		\
1113642Sqiao 	} else {						\
1123642Sqiao 		cp->cprev->cnext = cp->cnext;			\
1133642Sqiao 		if (cp->cnext != NULL)				\
1143642Sqiao 			cp->cnext->cprev = cp->cprev;		\
1153642Sqiao 	}							\
1163642Sqiao }
1173642Sqiao 
1183642Sqiao #define	CALLOUT_HRES_UPDATE(INSDEL, ct, cp, id, hresms)		\
1193642Sqiao 	ASSERT(MUTEX_HELD(&ct->ct_lock));			\
1203642Sqiao 	ASSERT(cp->c_xid == id);				\
1213642Sqiao 	CALLOUT_HRES_##INSDEL(ct, cp, c_hrnext,			\
1223642Sqiao 	c_hrprev, hresms)
1233642Sqiao 
1240Sstevel@tonic-gate /*
1250Sstevel@tonic-gate  * Allocate a callout structure.  We try quite hard because we
1260Sstevel@tonic-gate  * can't sleep, and if we can't do the allocation, we're toast.
1270Sstevel@tonic-gate  * Failing all, we try a KM_PANIC allocation.
1280Sstevel@tonic-gate  */
1290Sstevel@tonic-gate static callout_t *
1300Sstevel@tonic-gate callout_alloc(callout_table_t *ct)
1310Sstevel@tonic-gate {
1320Sstevel@tonic-gate 	size_t size = 0;
1330Sstevel@tonic-gate 	callout_t *cp = NULL;
1340Sstevel@tonic-gate 
1350Sstevel@tonic-gate 	mutex_exit(&ct->ct_lock);
1360Sstevel@tonic-gate 	cp = kmem_alloc_tryhard(sizeof (callout_t), &size,
1370Sstevel@tonic-gate 	    KM_NOSLEEP | KM_PANIC);
1380Sstevel@tonic-gate 	bzero(cp, sizeof (callout_t));
1390Sstevel@tonic-gate 	ncallout++;
1400Sstevel@tonic-gate 	mutex_enter(&ct->ct_lock);
1410Sstevel@tonic-gate 	return (cp);
1420Sstevel@tonic-gate }
1430Sstevel@tonic-gate 
1440Sstevel@tonic-gate /*
1450Sstevel@tonic-gate  * Arrange that func(arg) be called after delta clock ticks.
1460Sstevel@tonic-gate  */
1470Sstevel@tonic-gate static timeout_id_t
1480Sstevel@tonic-gate timeout_common(void (*func)(void *), void *arg, clock_t delta,
1490Sstevel@tonic-gate     callout_table_t *ct)
1500Sstevel@tonic-gate {
1513642Sqiao 	callout_t	*cp;
1523642Sqiao 	callout_id_t	id;
1533642Sqiao 	clock_t		runtime;
1543642Sqiao 	timestruc_t	now;
1553642Sqiao 	int64_t		hresms;
1563642Sqiao 
1573642Sqiao 	gethrestime(&now);
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate 	mutex_enter(&ct->ct_lock);
1600Sstevel@tonic-gate 
1610Sstevel@tonic-gate 	if ((cp = ct->ct_freelist) == NULL)
1620Sstevel@tonic-gate 		cp = callout_alloc(ct);
1630Sstevel@tonic-gate 	else
1640Sstevel@tonic-gate 		ct->ct_freelist = cp->c_idnext;
1650Sstevel@tonic-gate 
1660Sstevel@tonic-gate 	cp->c_func = func;
1670Sstevel@tonic-gate 	cp->c_arg = arg;
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate 	/*
1700Sstevel@tonic-gate 	 * Make sure the callout runs at least 1 tick in the future.
1710Sstevel@tonic-gate 	 */
1720Sstevel@tonic-gate 	if (delta <= 0)
1730Sstevel@tonic-gate 		delta = 1;
1740Sstevel@tonic-gate 	cp->c_runtime = runtime = lbolt + delta;
1750Sstevel@tonic-gate 
176*3783Sqiao 	/*
177*3783Sqiao 	 * Calculate the future time in millisecond.
178*3783Sqiao 	 * We must cast tv_sec and delta to 64-bit integers
179*3783Sqiao 	 * to avoid integer overflow on 32-platforms.
180*3783Sqiao 	 */
181*3783Sqiao 	hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC +
182*3783Sqiao 	    TICK_TO_MSEC((int64_t)delta);
183*3783Sqiao 
1843642Sqiao 	cp->c_hresms = hresms;
1853642Sqiao 
1860Sstevel@tonic-gate 	/*
1870Sstevel@tonic-gate 	 * Assign an ID to this callout
1880Sstevel@tonic-gate 	 */
1890Sstevel@tonic-gate 	if (delta > CALLOUT_LONGTERM_TICKS)
1900Sstevel@tonic-gate 		ct->ct_long_id = id = (ct->ct_long_id - CALLOUT_COUNTER_LOW) |
1910Sstevel@tonic-gate 		    CALLOUT_COUNTER_HIGH;
1920Sstevel@tonic-gate 	else
1930Sstevel@tonic-gate 		ct->ct_short_id = id = (ct->ct_short_id - CALLOUT_COUNTER_LOW) |
1940Sstevel@tonic-gate 		    CALLOUT_COUNTER_HIGH;
1950Sstevel@tonic-gate 
1960Sstevel@tonic-gate 	cp->c_xid = id;
1970Sstevel@tonic-gate 
1980Sstevel@tonic-gate 	CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime);
1993642Sqiao 	CALLOUT_HRES_UPDATE(INSERT, ct, cp, id, hresms);
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate 	mutex_exit(&ct->ct_lock);
2020Sstevel@tonic-gate 
2030Sstevel@tonic-gate 	TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT,
2040Sstevel@tonic-gate 		"timeout:%K(%p) in %ld ticks, cp %p",
2050Sstevel@tonic-gate 		func, arg, delta, cp);
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate 	return ((timeout_id_t)id);
2080Sstevel@tonic-gate }
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate timeout_id_t
2110Sstevel@tonic-gate timeout(void (*func)(void *), void *arg, clock_t delta)
2120Sstevel@tonic-gate {
2130Sstevel@tonic-gate 	return (timeout_common(func, arg, delta,
2140Sstevel@tonic-gate 	    callout_table[CALLOUT_TABLE(CALLOUT_NORMAL, CPU->cpu_seqid)]));
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate }
2170Sstevel@tonic-gate 
2180Sstevel@tonic-gate timeout_id_t
2190Sstevel@tonic-gate realtime_timeout(void (*func)(void *), void *arg, clock_t delta)
2200Sstevel@tonic-gate {
2210Sstevel@tonic-gate 	return (timeout_common(func, arg, delta,
2220Sstevel@tonic-gate 	    callout_table[CALLOUT_TABLE(CALLOUT_REALTIME, CPU->cpu_seqid)]));
2230Sstevel@tonic-gate }
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate clock_t
2260Sstevel@tonic-gate untimeout(timeout_id_t id_arg)
2270Sstevel@tonic-gate {
2280Sstevel@tonic-gate 	callout_id_t id = (callout_id_t)id_arg;
2290Sstevel@tonic-gate 	callout_table_t *ct;
2300Sstevel@tonic-gate 	callout_t *cp;
2310Sstevel@tonic-gate 	callout_id_t xid;
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	ct = callout_table[id & CALLOUT_TABLE_MASK];
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate 	mutex_enter(&ct->ct_lock);
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 	for (cp = ct->ct_idhash[CALLOUT_IDHASH(id)]; cp; cp = cp->c_idnext) {
2380Sstevel@tonic-gate 
2390Sstevel@tonic-gate 		if ((xid = cp->c_xid) == id) {
2400Sstevel@tonic-gate 			clock_t runtime = cp->c_runtime;
2410Sstevel@tonic-gate 			clock_t time_left = runtime - lbolt;
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, id, runtime);
2443642Sqiao 			CALLOUT_HRES_UPDATE(DELETE, ct, cp, id, 0);
2450Sstevel@tonic-gate 			cp->c_idnext = ct->ct_freelist;
2460Sstevel@tonic-gate 			ct->ct_freelist = cp;
2470Sstevel@tonic-gate 			mutex_exit(&ct->ct_lock);
2480Sstevel@tonic-gate 			TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT,
2490Sstevel@tonic-gate 			    "untimeout:ID %lx ticks_left %ld", id, time_left);
2500Sstevel@tonic-gate 			return (time_left < 0 ? 0 : time_left);
2510Sstevel@tonic-gate 		}
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 		if (xid != (id | CALLOUT_EXECUTING))
2540Sstevel@tonic-gate 			continue;
2550Sstevel@tonic-gate 
2560Sstevel@tonic-gate 		/*
2570Sstevel@tonic-gate 		 * The callout we want to delete is currently executing.
2580Sstevel@tonic-gate 		 * The DDI states that we must wait until the callout
2590Sstevel@tonic-gate 		 * completes before returning, so we block on c_done until
2600Sstevel@tonic-gate 		 * the callout ID changes (to zero if it's on the freelist,
2610Sstevel@tonic-gate 		 * or to a new callout ID if it's in use).  This implicitly
2620Sstevel@tonic-gate 		 * assumes that callout structures are persistent (they are).
2630Sstevel@tonic-gate 		 */
2640Sstevel@tonic-gate 		if (cp->c_executor == curthread) {
2650Sstevel@tonic-gate 			/*
2660Sstevel@tonic-gate 			 * The timeout handler called untimeout() on itself.
2670Sstevel@tonic-gate 			 * Stupid, but legal.  We can't wait for the timeout
2680Sstevel@tonic-gate 			 * to complete without deadlocking, so we just return.
2690Sstevel@tonic-gate 			 */
2700Sstevel@tonic-gate 			mutex_exit(&ct->ct_lock);
2710Sstevel@tonic-gate 			TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF,
2720Sstevel@tonic-gate 			    "untimeout_self:ID %x", id);
2730Sstevel@tonic-gate 			return (-1);
2740Sstevel@tonic-gate 		}
2750Sstevel@tonic-gate 		while (cp->c_xid == xid)
2760Sstevel@tonic-gate 			cv_wait(&cp->c_done, &ct->ct_lock);
2770Sstevel@tonic-gate 		mutex_exit(&ct->ct_lock);
2780Sstevel@tonic-gate 		TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING,
2790Sstevel@tonic-gate 		    "untimeout_executing:ID %lx", id);
2800Sstevel@tonic-gate 		return (-1);
2810Sstevel@tonic-gate 	}
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate 	mutex_exit(&ct->ct_lock);
2840Sstevel@tonic-gate 	TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID,
2850Sstevel@tonic-gate 	    "untimeout_bogus_id:ID %lx", id);
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate 	/*
2880Sstevel@tonic-gate 	 * We didn't find the specified callout ID.  This means either
2890Sstevel@tonic-gate 	 * (1) the callout already fired, or (2) the caller passed us
2900Sstevel@tonic-gate 	 * a bogus value.  Perform a sanity check to detect case (2).
2910Sstevel@tonic-gate 	 */
2920Sstevel@tonic-gate 	if (id != 0 && (id & (CALLOUT_COUNTER_HIGH | CALLOUT_EXECUTING)) !=
2930Sstevel@tonic-gate 	    CALLOUT_COUNTER_HIGH)
2940Sstevel@tonic-gate 		panic("untimeout: impossible timeout id %lx", id);
2950Sstevel@tonic-gate 
2960Sstevel@tonic-gate 	return (-1);
2970Sstevel@tonic-gate }
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate /*
3000Sstevel@tonic-gate  * Do the actual work of executing callouts.  This routine is called either
3010Sstevel@tonic-gate  * by a taskq_thread (normal case), or by softcall (realtime case).
3020Sstevel@tonic-gate  */
3030Sstevel@tonic-gate static void
3040Sstevel@tonic-gate callout_execute(callout_table_t *ct)
3050Sstevel@tonic-gate {
3063642Sqiao 	callout_t	*cp;
3073642Sqiao 	callout_id_t	xid;
3083642Sqiao 	clock_t		runtime;
3093642Sqiao 	timestruc_t	now;
3103642Sqiao 	int64_t		hresms;
3110Sstevel@tonic-gate 
3120Sstevel@tonic-gate 	mutex_enter(&ct->ct_lock);
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate 	while (((runtime = ct->ct_runtime) - ct->ct_curtime) <= 0) {
3150Sstevel@tonic-gate 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
3160Sstevel@tonic-gate 		    cp != NULL; cp = cp->c_lbnext) {
3170Sstevel@tonic-gate 			xid = cp->c_xid;
3180Sstevel@tonic-gate 			if (cp->c_runtime != runtime ||
3190Sstevel@tonic-gate 			    (xid & CALLOUT_EXECUTING))
3200Sstevel@tonic-gate 				continue;
3210Sstevel@tonic-gate 			cp->c_executor = curthread;
3220Sstevel@tonic-gate 			cp->c_xid = xid |= CALLOUT_EXECUTING;
3230Sstevel@tonic-gate 			mutex_exit(&ct->ct_lock);
3240Sstevel@tonic-gate 			DTRACE_PROBE1(callout__start, callout_t *, cp);
3250Sstevel@tonic-gate 			(*cp->c_func)(cp->c_arg);
3260Sstevel@tonic-gate 			DTRACE_PROBE1(callout__end, callout_t *, cp);
3270Sstevel@tonic-gate 			mutex_enter(&ct->ct_lock);
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 			/*
3303642Sqiao 			 * Delete callout from both the hash tables and the
3313642Sqiao 			 * hres queue, return it to freelist, and tell anyone
3323642Sqiao 			 * who cares that we're done.
3330Sstevel@tonic-gate 			 * Even though we dropped and reacquired ct->ct_lock,
3340Sstevel@tonic-gate 			 * it's OK to pick up where we left off because only
3350Sstevel@tonic-gate 			 * newly-created timeouts can precede cp on ct_lbhash,
3360Sstevel@tonic-gate 			 * and those timeouts cannot be due on this tick.
3370Sstevel@tonic-gate 			 */
3380Sstevel@tonic-gate 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
3393642Sqiao 			CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
3400Sstevel@tonic-gate 			cp->c_idnext = ct->ct_freelist;
3410Sstevel@tonic-gate 			ct->ct_freelist = cp;
3420Sstevel@tonic-gate 			cp->c_xid = 0;	/* Indicate completion for c_done */
3430Sstevel@tonic-gate 			cv_broadcast(&cp->c_done);
3440Sstevel@tonic-gate 		}
3450Sstevel@tonic-gate 		/*
3460Sstevel@tonic-gate 		 * We have completed all callouts that were scheduled to
3470Sstevel@tonic-gate 		 * run at "runtime".  If the global run time still matches
3480Sstevel@tonic-gate 		 * our local copy, then we advance the global run time;
3490Sstevel@tonic-gate 		 * otherwise, another callout thread must have already done so.
3500Sstevel@tonic-gate 		 */
3510Sstevel@tonic-gate 		if (ct->ct_runtime == runtime)
3520Sstevel@tonic-gate 			ct->ct_runtime = runtime + 1;
3530Sstevel@tonic-gate 	}
3543642Sqiao 
3553642Sqiao 	gethrestime(&now);
3563642Sqiao 
357*3783Sqiao 	/*
358*3783Sqiao 	 * Calculate the future time in millisecond.
359*3783Sqiao 	 * We must cast tv_sec to 64-bit integer
360*3783Sqiao 	 * to avoid integer overflow on 32-platforms.
361*3783Sqiao 	 */
362*3783Sqiao 	hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
3633642Sqiao 
3643642Sqiao 	cp = ct->ct_hresq;
3653642Sqiao 	while (cp != NULL && hresms >= cp->c_hresms) {
3663642Sqiao 		xid = cp->c_xid;
3673642Sqiao 		if (xid & CALLOUT_EXECUTING) {
3683642Sqiao 			cp = cp->c_hrnext;
3693642Sqiao 			continue;
3703642Sqiao 		}
3713642Sqiao 		cp->c_executor = curthread;
3723642Sqiao 		cp->c_xid = xid |= CALLOUT_EXECUTING;
3733642Sqiao 		runtime = cp->c_runtime;
3743642Sqiao 		mutex_exit(&ct->ct_lock);
3753642Sqiao 		DTRACE_PROBE1(callout__start, callout_t *, cp);
3763642Sqiao 		(*cp->c_func)(cp->c_arg);
3773642Sqiao 		DTRACE_PROBE1(callout__end, callout_t *, cp);
3783642Sqiao 		mutex_enter(&ct->ct_lock);
3793642Sqiao 
3803642Sqiao 		/*
3813642Sqiao 		 * See comments above.
3823642Sqiao 		 */
3833642Sqiao 		CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
3843642Sqiao 		CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
3853642Sqiao 		cp->c_idnext = ct->ct_freelist;
3863642Sqiao 		ct->ct_freelist = cp;
3873642Sqiao 		cp->c_xid = 0;	/* Indicate completion for c_done */
3883642Sqiao 		cv_broadcast(&cp->c_done);
3893642Sqiao 
3903642Sqiao 		/*
3913642Sqiao 		 * Start over from the head of the list, see if
3923642Sqiao 		 * any timeout bearing an earlier hres time.
3933642Sqiao 		 */
3943642Sqiao 		cp = ct->ct_hresq;
3953642Sqiao 	}
3960Sstevel@tonic-gate 	mutex_exit(&ct->ct_lock);
3970Sstevel@tonic-gate }
3980Sstevel@tonic-gate 
3990Sstevel@tonic-gate /*
4000Sstevel@tonic-gate  * Schedule any callouts that are due on or before this tick.
4010Sstevel@tonic-gate  */
4020Sstevel@tonic-gate static void
4030Sstevel@tonic-gate callout_schedule_1(callout_table_t *ct)
4040Sstevel@tonic-gate {
4053642Sqiao 	callout_t	*cp;
4063642Sqiao 	clock_t		curtime, runtime;
4073642Sqiao 	timestruc_t	now;
4083642Sqiao 	int64_t		hresms;
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 	mutex_enter(&ct->ct_lock);
4110Sstevel@tonic-gate 	ct->ct_curtime = curtime = lbolt;
4120Sstevel@tonic-gate 	while (((runtime = ct->ct_runtime) - curtime) <= 0) {
4130Sstevel@tonic-gate 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
4140Sstevel@tonic-gate 		    cp != NULL; cp = cp->c_lbnext) {
4150Sstevel@tonic-gate 			if (cp->c_runtime != runtime ||
4160Sstevel@tonic-gate 			    (cp->c_xid & CALLOUT_EXECUTING))
4170Sstevel@tonic-gate 				continue;
4180Sstevel@tonic-gate 			mutex_exit(&ct->ct_lock);
4190Sstevel@tonic-gate 			if (ct->ct_taskq == NULL)
4200Sstevel@tonic-gate 				softcall((void (*)(void *))callout_execute, ct);
4210Sstevel@tonic-gate 			else
4220Sstevel@tonic-gate 				(void) taskq_dispatch(ct->ct_taskq,
4230Sstevel@tonic-gate 				    (task_func_t *)callout_execute, ct,
4240Sstevel@tonic-gate 				    KM_NOSLEEP);
4250Sstevel@tonic-gate 			return;
4260Sstevel@tonic-gate 		}
4270Sstevel@tonic-gate 		ct->ct_runtime++;
4280Sstevel@tonic-gate 	}
4293642Sqiao 
4303642Sqiao 	gethrestime(&now);
4313642Sqiao 
432*3783Sqiao 	/*
433*3783Sqiao 	 * Calculate the future time in millisecond.
434*3783Sqiao 	 * We must cast tv_sec to 64-bit integer
435*3783Sqiao 	 * to avoid integer overflow on 32-platforms.
436*3783Sqiao 	 */
437*3783Sqiao 	hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
4383642Sqiao 
4393642Sqiao 	cp = ct->ct_hresq;
4403642Sqiao 	while (cp != NULL && hresms >= cp->c_hresms) {
4413642Sqiao 		if (cp->c_xid & CALLOUT_EXECUTING) {
4423642Sqiao 			cp = cp->c_hrnext;
4433642Sqiao 			continue;
4443642Sqiao 		}
4453642Sqiao 		mutex_exit(&ct->ct_lock);
4463642Sqiao 		if (ct->ct_taskq == NULL)
4473642Sqiao 			softcall((void (*)(void *))callout_execute, ct);
4483642Sqiao 		else
4493642Sqiao 			(void) taskq_dispatch(ct->ct_taskq,
4503642Sqiao 			    (task_func_t *)callout_execute, ct, KM_NOSLEEP);
4513642Sqiao 		return;
4523642Sqiao 	}
4530Sstevel@tonic-gate 	mutex_exit(&ct->ct_lock);
4540Sstevel@tonic-gate }
4550Sstevel@tonic-gate 
4560Sstevel@tonic-gate /*
4570Sstevel@tonic-gate  * Schedule callouts for all callout tables.  Called by clock() on each tick.
4580Sstevel@tonic-gate  */
4590Sstevel@tonic-gate void
4600Sstevel@tonic-gate callout_schedule(void)
4610Sstevel@tonic-gate {
4620Sstevel@tonic-gate 	int f, t;
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate 	if (cpr_stop_callout)
4650Sstevel@tonic-gate 		return;
4660Sstevel@tonic-gate 
4670Sstevel@tonic-gate 	for (t = 0; t < CALLOUT_NTYPES; t++)
4680Sstevel@tonic-gate 		for (f = 0; f < callout_fanout; f++)
4690Sstevel@tonic-gate 			callout_schedule_1(callout_table[CALLOUT_TABLE(t, f)]);
4700Sstevel@tonic-gate }
4710Sstevel@tonic-gate 
4720Sstevel@tonic-gate /*
4730Sstevel@tonic-gate  * Callback handler used by CPR to stop and resume callouts.
4740Sstevel@tonic-gate  */
4750Sstevel@tonic-gate /*ARGSUSED*/
4760Sstevel@tonic-gate static boolean_t
4770Sstevel@tonic-gate callout_cpr_callb(void *arg, int code)
4780Sstevel@tonic-gate {
4790Sstevel@tonic-gate 	cpr_stop_callout = (code == CB_CODE_CPR_CHKPT);
4800Sstevel@tonic-gate 	return (B_TRUE);
4810Sstevel@tonic-gate }
4820Sstevel@tonic-gate 
4830Sstevel@tonic-gate /*
4840Sstevel@tonic-gate  * Initialize all callout tables.  Called at boot time just before clkstart().
4850Sstevel@tonic-gate  */
4860Sstevel@tonic-gate void
4870Sstevel@tonic-gate callout_init(void)
4880Sstevel@tonic-gate {
4890Sstevel@tonic-gate 	int f, t;
4900Sstevel@tonic-gate 	int table_id;
4910Sstevel@tonic-gate 	callout_table_t *ct;
4920Sstevel@tonic-gate 
4930Sstevel@tonic-gate 	callout_fanout = MIN(CALLOUT_FANOUT, max_ncpus);
4940Sstevel@tonic-gate 
4950Sstevel@tonic-gate 	for (t = 0; t < CALLOUT_NTYPES; t++) {
4960Sstevel@tonic-gate 		for (f = 0; f < CALLOUT_FANOUT; f++) {
4970Sstevel@tonic-gate 			table_id = CALLOUT_TABLE(t, f);
4980Sstevel@tonic-gate 			if (f >= callout_fanout) {
4990Sstevel@tonic-gate 				callout_table[table_id] =
5000Sstevel@tonic-gate 				    callout_table[table_id - callout_fanout];
5010Sstevel@tonic-gate 				continue;
5020Sstevel@tonic-gate 			}
5030Sstevel@tonic-gate 			ct = kmem_zalloc(sizeof (callout_table_t), KM_SLEEP);
5040Sstevel@tonic-gate 			callout_table[table_id] = ct;
5050Sstevel@tonic-gate 			ct->ct_short_id = (callout_id_t)table_id |
5060Sstevel@tonic-gate 			    CALLOUT_COUNTER_HIGH;
5070Sstevel@tonic-gate 			ct->ct_long_id = ct->ct_short_id | CALLOUT_LONGTERM;
5080Sstevel@tonic-gate 			ct->ct_curtime = ct->ct_runtime = lbolt;
5090Sstevel@tonic-gate 			if (t == CALLOUT_NORMAL) {
5100Sstevel@tonic-gate 				/*
5110Sstevel@tonic-gate 				 * Each callout thread consumes exactly one
5120Sstevel@tonic-gate 				 * task structure while active.  Therefore,
5130Sstevel@tonic-gate 				 * prepopulating with 2 * CALLOUT_THREADS tasks
5140Sstevel@tonic-gate 				 * ensures that there's at least one task per
5150Sstevel@tonic-gate 				 * thread that's either scheduled or on the
5160Sstevel@tonic-gate 				 * freelist.  In turn, this guarantees that
5170Sstevel@tonic-gate 				 * taskq_dispatch() will always either succeed
5180Sstevel@tonic-gate 				 * (because there's a free task structure) or
5190Sstevel@tonic-gate 				 * be unnecessary (because "callout_excute(ct)"
5200Sstevel@tonic-gate 				 * has already scheduled).
5210Sstevel@tonic-gate 				 */
5220Sstevel@tonic-gate 				ct->ct_taskq =
5230Sstevel@tonic-gate 				    taskq_create_instance("callout_taskq", f,
5240Sstevel@tonic-gate 				    CALLOUT_THREADS, maxclsyspri,
5250Sstevel@tonic-gate 				    2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS,
5260Sstevel@tonic-gate 				    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
5270Sstevel@tonic-gate 			}
5280Sstevel@tonic-gate 		}
5290Sstevel@tonic-gate 	}
5300Sstevel@tonic-gate 	(void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, "callout");
5310Sstevel@tonic-gate }
532