xref: /onnv-gate/usr/src/uts/common/os/callout.c (revision 11655:c7556f85d84e)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
56422Sqiao  * Common Development and Distribution License (the "License").
66422Sqiao  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*11655SMadhavan.Venkataraman@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #include <sys/callo.h>
270Sstevel@tonic-gate #include <sys/param.h>
280Sstevel@tonic-gate #include <sys/types.h>
290Sstevel@tonic-gate #include <sys/cpuvar.h>
300Sstevel@tonic-gate #include <sys/thread.h>
310Sstevel@tonic-gate #include <sys/kmem.h>
328048SMadhavan.Venkataraman@Sun.COM #include <sys/kmem_impl.h>
330Sstevel@tonic-gate #include <sys/cmn_err.h>
340Sstevel@tonic-gate #include <sys/callb.h>
350Sstevel@tonic-gate #include <sys/debug.h>
360Sstevel@tonic-gate #include <sys/vtrace.h>
370Sstevel@tonic-gate #include <sys/sysmacros.h>
380Sstevel@tonic-gate #include <sys/sdt.h>
390Sstevel@tonic-gate 
400Sstevel@tonic-gate /*
410Sstevel@tonic-gate  * Callout tables.  See timeout(9F) for details.
420Sstevel@tonic-gate  */
439334SMadhavan.Venkataraman@Sun.COM static int callout_threads;			/* callout normal threads */
448048SMadhavan.Venkataraman@Sun.COM static hrtime_t callout_debug_hrtime;		/* debugger entry time */
459334SMadhavan.Venkataraman@Sun.COM static int callout_min_reap;			/* callout minimum reap count */
469334SMadhavan.Venkataraman@Sun.COM static int callout_tolerance;			/* callout hires tolerance */
478048SMadhavan.Venkataraman@Sun.COM static callout_table_t *callout_boot_ct;	/* Boot CPU's callout tables */
488566SMadhavan.Venkataraman@Sun.COM static clock_t callout_max_ticks;		/* max interval */
498048SMadhavan.Venkataraman@Sun.COM static hrtime_t callout_longterm;		/* longterm nanoseconds */
508048SMadhavan.Venkataraman@Sun.COM static ulong_t callout_counter_low;		/* callout ID increment */
518048SMadhavan.Venkataraman@Sun.COM static ulong_t callout_table_bits;		/* number of table bits in ID */
528048SMadhavan.Venkataraman@Sun.COM static ulong_t callout_table_mask;		/* mask for the table bits */
538048SMadhavan.Venkataraman@Sun.COM static callout_cache_t *callout_caches;		/* linked list of caches */
548048SMadhavan.Venkataraman@Sun.COM #pragma align 64(callout_table)
558048SMadhavan.Venkataraman@Sun.COM static callout_table_t *callout_table;		/* global callout table array */
560Sstevel@tonic-gate 
579039SMadhavan.Venkataraman@Sun.COM /*
5810696SDavid.Hollister@Sun.COM  * We run 'realtime' callouts at PIL 1 (CY_LOW_LEVEL). For 'normal'
5910696SDavid.Hollister@Sun.COM  * callouts, from PIL 10 (CY_LOCK_LEVEL) we dispatch the callout,
6010696SDavid.Hollister@Sun.COM  * via taskq, to a thread that executes at PIL 0 - so we end up running
6110696SDavid.Hollister@Sun.COM  * 'normal' callouts at PIL 0.
629039SMadhavan.Venkataraman@Sun.COM  */
639334SMadhavan.Venkataraman@Sun.COM static volatile int callout_realtime_level = CY_LOW_LEVEL;
649334SMadhavan.Venkataraman@Sun.COM static volatile int callout_normal_level = CY_LOCK_LEVEL;
659039SMadhavan.Venkataraman@Sun.COM 
668048SMadhavan.Venkataraman@Sun.COM static char *callout_kstat_names[] = {
678048SMadhavan.Venkataraman@Sun.COM 	"callout_timeouts",
688048SMadhavan.Venkataraman@Sun.COM 	"callout_timeouts_pending",
698048SMadhavan.Venkataraman@Sun.COM 	"callout_untimeouts_unexpired",
708048SMadhavan.Venkataraman@Sun.COM 	"callout_untimeouts_executing",
718048SMadhavan.Venkataraman@Sun.COM 	"callout_untimeouts_expired",
728048SMadhavan.Venkataraman@Sun.COM 	"callout_expirations",
738048SMadhavan.Venkataraman@Sun.COM 	"callout_allocations",
749334SMadhavan.Venkataraman@Sun.COM 	"callout_cleanups",
758048SMadhavan.Venkataraman@Sun.COM };
768048SMadhavan.Venkataraman@Sun.COM 
779334SMadhavan.Venkataraman@Sun.COM static hrtime_t	callout_heap_process(callout_table_t *, hrtime_t, int);
789334SMadhavan.Venkataraman@Sun.COM 
798048SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_HASH_INSERT(hash, cp, cnext, cprev)	\
800Sstevel@tonic-gate {							\
818048SMadhavan.Venkataraman@Sun.COM 	callout_hash_t *hashp = &(hash);		\
828048SMadhavan.Venkataraman@Sun.COM 							\
830Sstevel@tonic-gate 	cp->cprev = NULL;				\
848048SMadhavan.Venkataraman@Sun.COM 	cp->cnext = hashp->ch_head;			\
858048SMadhavan.Venkataraman@Sun.COM 	if (hashp->ch_head == NULL)			\
868048SMadhavan.Venkataraman@Sun.COM 		hashp->ch_tail = cp;			\
878048SMadhavan.Venkataraman@Sun.COM 	else						\
888048SMadhavan.Venkataraman@Sun.COM 		cp->cnext->cprev = cp;			\
898048SMadhavan.Venkataraman@Sun.COM 	hashp->ch_head = cp;				\
908048SMadhavan.Venkataraman@Sun.COM }
918048SMadhavan.Venkataraman@Sun.COM 
928048SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_HASH_APPEND(hash, cp, cnext, cprev)	\
938048SMadhavan.Venkataraman@Sun.COM {							\
948048SMadhavan.Venkataraman@Sun.COM 	callout_hash_t *hashp = &(hash);		\
958048SMadhavan.Venkataraman@Sun.COM 							\
968048SMadhavan.Venkataraman@Sun.COM 	cp->cnext = NULL;				\
978048SMadhavan.Venkataraman@Sun.COM 	cp->cprev = hashp->ch_tail;			\
988048SMadhavan.Venkataraman@Sun.COM 	if (hashp->ch_tail == NULL)			\
998048SMadhavan.Venkataraman@Sun.COM 		hashp->ch_head = cp;			\
1008048SMadhavan.Venkataraman@Sun.COM 	else						\
1018048SMadhavan.Venkataraman@Sun.COM 		cp->cprev->cnext = cp;			\
1028048SMadhavan.Venkataraman@Sun.COM 	hashp->ch_tail = cp;				\
1030Sstevel@tonic-gate }
1040Sstevel@tonic-gate 
1058048SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_HASH_DELETE(hash, cp, cnext, cprev)	\
1060Sstevel@tonic-gate {							\
1078048SMadhavan.Venkataraman@Sun.COM 	callout_hash_t *hashp = &(hash);		\
1088048SMadhavan.Venkataraman@Sun.COM 							\
1098048SMadhavan.Venkataraman@Sun.COM 	if (cp->cnext == NULL)				\
1108048SMadhavan.Venkataraman@Sun.COM 		hashp->ch_tail = cp->cprev;		\
1110Sstevel@tonic-gate 	else						\
1128048SMadhavan.Venkataraman@Sun.COM 		cp->cnext->cprev = cp->cprev;		\
1138048SMadhavan.Venkataraman@Sun.COM 	if (cp->cprev == NULL)				\
1148048SMadhavan.Venkataraman@Sun.COM 		hashp->ch_head = cp->cnext;		\
1158048SMadhavan.Venkataraman@Sun.COM 	else						\
1168048SMadhavan.Venkataraman@Sun.COM 		cp->cprev->cnext = cp->cnext;		\
1170Sstevel@tonic-gate }
1180Sstevel@tonic-gate 
1198048SMadhavan.Venkataraman@Sun.COM /*
1208048SMadhavan.Venkataraman@Sun.COM  * These definitions help us queue callouts and callout lists. Here is
1218048SMadhavan.Venkataraman@Sun.COM  * the queueing rationale:
1228048SMadhavan.Venkataraman@Sun.COM  *
1238048SMadhavan.Venkataraman@Sun.COM  *	- callouts are queued in a FIFO manner in the ID hash table.
1248048SMadhavan.Venkataraman@Sun.COM  *	  TCP timers are typically cancelled in the same order that they
1258048SMadhavan.Venkataraman@Sun.COM  *	  were issued. The FIFO queueing shortens the search for a callout
1268048SMadhavan.Venkataraman@Sun.COM  *	  during untimeout().
1278048SMadhavan.Venkataraman@Sun.COM  *
1288048SMadhavan.Venkataraman@Sun.COM  *	- callouts are queued in a FIFO manner in their callout lists.
1298048SMadhavan.Venkataraman@Sun.COM  *	  This ensures that the callouts are executed in the same order that
1308048SMadhavan.Venkataraman@Sun.COM  *	  they were queued. This is fair. Plus, it helps to make each
1318048SMadhavan.Venkataraman@Sun.COM  *	  callout expiration timely. It also favors cancellations.
1328048SMadhavan.Venkataraman@Sun.COM  *
1339334SMadhavan.Venkataraman@Sun.COM  *	- callout lists are queued in the following manner in the callout
1349334SMadhavan.Venkataraman@Sun.COM  *	  hash table buckets:
1359334SMadhavan.Venkataraman@Sun.COM  *
1369334SMadhavan.Venkataraman@Sun.COM  *		- appended, if the callout list is a 1-nanosecond resolution
1379334SMadhavan.Venkataraman@Sun.COM  *		  callout list. When a callout is created, we first look for
1389334SMadhavan.Venkataraman@Sun.COM  *		  a callout list that has the same expiration so we can avoid
1399334SMadhavan.Venkataraman@Sun.COM  *		  allocating a callout list and inserting the expiration into
1409334SMadhavan.Venkataraman@Sun.COM  *		  the heap. However, we do not want to look at 1-nanosecond
1419334SMadhavan.Venkataraman@Sun.COM  *		  resolution callout lists as we will seldom find a match in
1429334SMadhavan.Venkataraman@Sun.COM  *		  them. Keeping these callout lists in the rear of the hash
1439334SMadhavan.Venkataraman@Sun.COM  *		  buckets allows us to skip these during the lookup.
1449334SMadhavan.Venkataraman@Sun.COM  *
1459334SMadhavan.Venkataraman@Sun.COM  *		- inserted at the beginning, if the callout list is not a
1469334SMadhavan.Venkataraman@Sun.COM  *		  1-nanosecond resolution callout list. This also has the
1479334SMadhavan.Venkataraman@Sun.COM  *		  side-effect of keeping the long term timers away from the
1489334SMadhavan.Venkataraman@Sun.COM  *		  front of the buckets.
1498048SMadhavan.Venkataraman@Sun.COM  *
1508048SMadhavan.Venkataraman@Sun.COM  *	- callout lists are queued in a FIFO manner in the expired callouts
1518048SMadhavan.Venkataraman@Sun.COM  *	  list. This ensures that callout lists are executed in the order
1528048SMadhavan.Venkataraman@Sun.COM  *	  of expiration.
1538048SMadhavan.Venkataraman@Sun.COM  */
1548048SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_APPEND(ct, cp)						\
1558048SMadhavan.Venkataraman@Sun.COM 	CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)],	\
1568048SMadhavan.Venkataraman@Sun.COM 		cp, c_idnext, c_idprev);				\
1578048SMadhavan.Venkataraman@Sun.COM 	CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev)
1588048SMadhavan.Venkataraman@Sun.COM 
1598048SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_DELETE(ct, cp)						\
1608048SMadhavan.Venkataraman@Sun.COM 	CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)],	\
1618048SMadhavan.Venkataraman@Sun.COM 		cp, c_idnext, c_idprev);				\
1628048SMadhavan.Venkataraman@Sun.COM 	CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev)
1638048SMadhavan.Venkataraman@Sun.COM 
1648048SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_LIST_INSERT(hash, cl)				\
1658048SMadhavan.Venkataraman@Sun.COM 	CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev)
1668048SMadhavan.Venkataraman@Sun.COM 
1678048SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_LIST_APPEND(hash, cl)				\
1688048SMadhavan.Venkataraman@Sun.COM 	CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev)
1698048SMadhavan.Venkataraman@Sun.COM 
1708048SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_LIST_DELETE(hash, cl)				\
1718048SMadhavan.Venkataraman@Sun.COM 	CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev)
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate /*
1749039SMadhavan.Venkataraman@Sun.COM  * For normal callouts, there is a deadlock scenario if two callouts that
1759039SMadhavan.Venkataraman@Sun.COM  * have an inter-dependency end up on the same callout list. To break the
1769039SMadhavan.Venkataraman@Sun.COM  * deadlock, you need two taskq threads running in parallel. We compute
1779039SMadhavan.Venkataraman@Sun.COM  * the number of taskq threads here using a bunch of conditions to make
1789039SMadhavan.Venkataraman@Sun.COM  * it optimal for the common case. This is an ugly hack, but one that is
1799039SMadhavan.Venkataraman@Sun.COM  * necessary (sigh).
1809039SMadhavan.Venkataraman@Sun.COM  */
1819039SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_THRESHOLD	100000000
1829039SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_EXEC_COMPUTE(ct, exec)					\
1839039SMadhavan.Venkataraman@Sun.COM {									\
1849039SMadhavan.Venkataraman@Sun.COM 	callout_list_t *cl;						\
1859039SMadhavan.Venkataraman@Sun.COM 									\
1869039SMadhavan.Venkataraman@Sun.COM 	cl = ct->ct_expired.ch_head;					\
1879039SMadhavan.Venkataraman@Sun.COM 	if (cl == NULL) {						\
1889039SMadhavan.Venkataraman@Sun.COM 		/*							\
1899039SMadhavan.Venkataraman@Sun.COM 		 * If the expired list is NULL, there is nothing to	\
1909039SMadhavan.Venkataraman@Sun.COM 		 * process.						\
1919039SMadhavan.Venkataraman@Sun.COM 		 */							\
1929039SMadhavan.Venkataraman@Sun.COM 		exec = 0;						\
1939039SMadhavan.Venkataraman@Sun.COM 	} else if ((cl->cl_next == NULL) &&				\
1949039SMadhavan.Venkataraman@Sun.COM 	    (cl->cl_callouts.ch_head == cl->cl_callouts.ch_tail)) {	\
1959039SMadhavan.Venkataraman@Sun.COM 		/*							\
1969039SMadhavan.Venkataraman@Sun.COM 		 * If there is only one callout list and it contains	\
1979039SMadhavan.Venkataraman@Sun.COM 		 * only one callout, there is no need for two threads.	\
1989039SMadhavan.Venkataraman@Sun.COM 		 */							\
1999039SMadhavan.Venkataraman@Sun.COM 		exec = 1;						\
2009039SMadhavan.Venkataraman@Sun.COM 	} else if ((ct->ct_heap_num == 0) ||				\
2019334SMadhavan.Venkataraman@Sun.COM 	    (ct->ct_heap[0].ch_expiration > gethrtime() + CALLOUT_THRESHOLD)) {\
2029039SMadhavan.Venkataraman@Sun.COM 		/*							\
2039039SMadhavan.Venkataraman@Sun.COM 		 * If the heap has become empty, we need two threads as	\
2049039SMadhavan.Venkataraman@Sun.COM 		 * there is no one to kick off the second thread in the	\
2059039SMadhavan.Venkataraman@Sun.COM 		 * future. If the heap is not empty and the top of the	\
2069039SMadhavan.Venkataraman@Sun.COM 		 * heap does not expire in the near future, we need two	\
2079039SMadhavan.Venkataraman@Sun.COM 		 * threads.						\
2089039SMadhavan.Venkataraman@Sun.COM 		 */							\
2099039SMadhavan.Venkataraman@Sun.COM 		exec = 2;						\
2109039SMadhavan.Venkataraman@Sun.COM 	} else {							\
2119039SMadhavan.Venkataraman@Sun.COM 		/*							\
2129039SMadhavan.Venkataraman@Sun.COM 		 * We have multiple callouts to process. But the cyclic	\
2139039SMadhavan.Venkataraman@Sun.COM 		 * will fire in the near future. So, we only need one	\
2149039SMadhavan.Venkataraman@Sun.COM 		 * thread for now.					\
2159039SMadhavan.Venkataraman@Sun.COM 		 */							\
2169039SMadhavan.Venkataraman@Sun.COM 		exec = 1;						\
2179039SMadhavan.Venkataraman@Sun.COM 	}								\
2189039SMadhavan.Venkataraman@Sun.COM }
2199039SMadhavan.Venkataraman@Sun.COM 
2209039SMadhavan.Venkataraman@Sun.COM /*
2219334SMadhavan.Venkataraman@Sun.COM  * Macro to swap two heap items.
2229334SMadhavan.Venkataraman@Sun.COM  */
2239334SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_SWAP(h1, h2)		\
2249334SMadhavan.Venkataraman@Sun.COM {					\
2259334SMadhavan.Venkataraman@Sun.COM 	callout_heap_t tmp;		\
2269334SMadhavan.Venkataraman@Sun.COM 					\
2279334SMadhavan.Venkataraman@Sun.COM 	tmp = *h1;			\
2289334SMadhavan.Venkataraman@Sun.COM 	*h1 = *h2;			\
2299334SMadhavan.Venkataraman@Sun.COM 	*h2 = tmp;			\
2309334SMadhavan.Venkataraman@Sun.COM }
2319334SMadhavan.Venkataraman@Sun.COM 
2329334SMadhavan.Venkataraman@Sun.COM /*
2339334SMadhavan.Venkataraman@Sun.COM  * Macro to free a callout list.
2349334SMadhavan.Venkataraman@Sun.COM  */
2359334SMadhavan.Venkataraman@Sun.COM #define	CALLOUT_LIST_FREE(ct, cl)			\
2369334SMadhavan.Venkataraman@Sun.COM {							\
2379334SMadhavan.Venkataraman@Sun.COM 	cl->cl_next = ct->ct_lfree;			\
2389334SMadhavan.Venkataraman@Sun.COM 	ct->ct_lfree = cl;				\
2399334SMadhavan.Venkataraman@Sun.COM 	cl->cl_flags |= CALLOUT_LIST_FLAG_FREE;		\
2409334SMadhavan.Venkataraman@Sun.COM }
2419334SMadhavan.Venkataraman@Sun.COM 
2429334SMadhavan.Venkataraman@Sun.COM /*
2430Sstevel@tonic-gate  * Allocate a callout structure.  We try quite hard because we
2440Sstevel@tonic-gate  * can't sleep, and if we can't do the allocation, we're toast.
2458048SMadhavan.Venkataraman@Sun.COM  * Failing all, we try a KM_PANIC allocation. Note that we never
2468048SMadhavan.Venkataraman@Sun.COM  * deallocate a callout. See untimeout() for the reasoning.
2470Sstevel@tonic-gate  */
2480Sstevel@tonic-gate static callout_t *
2490Sstevel@tonic-gate callout_alloc(callout_table_t *ct)
2500Sstevel@tonic-gate {
2518048SMadhavan.Venkataraman@Sun.COM 	size_t size;
2528048SMadhavan.Venkataraman@Sun.COM 	callout_t *cp;
2538048SMadhavan.Venkataraman@Sun.COM 
2548048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
2558048SMadhavan.Venkataraman@Sun.COM 	mutex_exit(&ct->ct_mutex);
2560Sstevel@tonic-gate 
2578048SMadhavan.Venkataraman@Sun.COM 	cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP);
2588048SMadhavan.Venkataraman@Sun.COM 	if (cp == NULL) {
2598048SMadhavan.Venkataraman@Sun.COM 		size = sizeof (callout_t);
2608048SMadhavan.Venkataraman@Sun.COM 		cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC);
2618048SMadhavan.Venkataraman@Sun.COM 	}
2628048SMadhavan.Venkataraman@Sun.COM 	cp->c_xid = 0;
2639039SMadhavan.Venkataraman@Sun.COM 	cp->c_executor = NULL;
2649039SMadhavan.Venkataraman@Sun.COM 	cv_init(&cp->c_done, NULL, CV_DEFAULT, NULL);
2659039SMadhavan.Venkataraman@Sun.COM 	cp->c_waiting = 0;
2668048SMadhavan.Venkataraman@Sun.COM 
2678048SMadhavan.Venkataraman@Sun.COM 	mutex_enter(&ct->ct_mutex);
2688048SMadhavan.Venkataraman@Sun.COM 	ct->ct_allocations++;
2690Sstevel@tonic-gate 	return (cp);
2700Sstevel@tonic-gate }
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate /*
2738048SMadhavan.Venkataraman@Sun.COM  * Allocate a callout list structure.  We try quite hard because we
2748048SMadhavan.Venkataraman@Sun.COM  * can't sleep, and if we can't do the allocation, we're toast.
2758048SMadhavan.Venkataraman@Sun.COM  * Failing all, we try a KM_PANIC allocation. Note that we never
2768048SMadhavan.Venkataraman@Sun.COM  * deallocate a callout list.
2778048SMadhavan.Venkataraman@Sun.COM  */
2788048SMadhavan.Venkataraman@Sun.COM static void
2798048SMadhavan.Venkataraman@Sun.COM callout_list_alloc(callout_table_t *ct)
2808048SMadhavan.Venkataraman@Sun.COM {
2818048SMadhavan.Venkataraman@Sun.COM 	size_t size;
2828048SMadhavan.Venkataraman@Sun.COM 	callout_list_t *cl;
2838048SMadhavan.Venkataraman@Sun.COM 
2848048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
2858048SMadhavan.Venkataraman@Sun.COM 	mutex_exit(&ct->ct_mutex);
2868048SMadhavan.Venkataraman@Sun.COM 
2878048SMadhavan.Venkataraman@Sun.COM 	cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP);
2888048SMadhavan.Venkataraman@Sun.COM 	if (cl == NULL) {
2898048SMadhavan.Venkataraman@Sun.COM 		size = sizeof (callout_list_t);
2908048SMadhavan.Venkataraman@Sun.COM 		cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC);
2918048SMadhavan.Venkataraman@Sun.COM 	}
2928048SMadhavan.Venkataraman@Sun.COM 	bzero(cl, sizeof (callout_list_t));
2938048SMadhavan.Venkataraman@Sun.COM 
2948048SMadhavan.Venkataraman@Sun.COM 	mutex_enter(&ct->ct_mutex);
2959334SMadhavan.Venkataraman@Sun.COM 	CALLOUT_LIST_FREE(ct, cl);
2968048SMadhavan.Venkataraman@Sun.COM }
2978048SMadhavan.Venkataraman@Sun.COM 
2988048SMadhavan.Venkataraman@Sun.COM /*
2999334SMadhavan.Venkataraman@Sun.COM  * Find a callout list that corresponds to an expiration and matching flags.
3008048SMadhavan.Venkataraman@Sun.COM  */
3018048SMadhavan.Venkataraman@Sun.COM static callout_list_t *
3029039SMadhavan.Venkataraman@Sun.COM callout_list_get(callout_table_t *ct, hrtime_t expiration, int flags, int hash)
3038048SMadhavan.Venkataraman@Sun.COM {
3048048SMadhavan.Venkataraman@Sun.COM 	callout_list_t *cl;
3059334SMadhavan.Venkataraman@Sun.COM 	int clflags;
3068048SMadhavan.Venkataraman@Sun.COM 
3078048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
3088048SMadhavan.Venkataraman@Sun.COM 
3099334SMadhavan.Venkataraman@Sun.COM 	if (flags & CALLOUT_LIST_FLAG_NANO) {
3109334SMadhavan.Venkataraman@Sun.COM 		/*
3119334SMadhavan.Venkataraman@Sun.COM 		 * This is a 1-nanosecond resolution callout. We will rarely
3129334SMadhavan.Venkataraman@Sun.COM 		 * find a match for this. So, bail out.
3139334SMadhavan.Venkataraman@Sun.COM 		 */
3149334SMadhavan.Venkataraman@Sun.COM 		return (NULL);
3159334SMadhavan.Venkataraman@Sun.COM 	}
3169334SMadhavan.Venkataraman@Sun.COM 
3179334SMadhavan.Venkataraman@Sun.COM 	clflags = (CALLOUT_LIST_FLAG_ABSOLUTE | CALLOUT_LIST_FLAG_HRESTIME);
3188048SMadhavan.Venkataraman@Sun.COM 	for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) {
3199334SMadhavan.Venkataraman@Sun.COM 		/*
3209334SMadhavan.Venkataraman@Sun.COM 		 * If we have reached a 1-nanosecond resolution callout list,
3219334SMadhavan.Venkataraman@Sun.COM 		 * we don't have much hope of finding a match in this hash
3229334SMadhavan.Venkataraman@Sun.COM 		 * bucket. So, just bail out.
3239334SMadhavan.Venkataraman@Sun.COM 		 */
3249334SMadhavan.Venkataraman@Sun.COM 		if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO)
3259334SMadhavan.Venkataraman@Sun.COM 			return (NULL);
3269334SMadhavan.Venkataraman@Sun.COM 
3279039SMadhavan.Venkataraman@Sun.COM 		if ((cl->cl_expiration == expiration) &&
3289334SMadhavan.Venkataraman@Sun.COM 		    ((cl->cl_flags & clflags) == (flags & clflags)))
3298048SMadhavan.Venkataraman@Sun.COM 			return (cl);
3308048SMadhavan.Venkataraman@Sun.COM 	}
3318048SMadhavan.Venkataraman@Sun.COM 
3328048SMadhavan.Venkataraman@Sun.COM 	return (NULL);
3338048SMadhavan.Venkataraman@Sun.COM }
3348048SMadhavan.Venkataraman@Sun.COM 
3358048SMadhavan.Venkataraman@Sun.COM /*
3368048SMadhavan.Venkataraman@Sun.COM  * Initialize a callout table's heap, if necessary. Preallocate some free
3378048SMadhavan.Venkataraman@Sun.COM  * entries so we don't have to check for NULL elsewhere.
3388048SMadhavan.Venkataraman@Sun.COM  */
3398048SMadhavan.Venkataraman@Sun.COM static void
3408048SMadhavan.Venkataraman@Sun.COM callout_heap_init(callout_table_t *ct)
3418048SMadhavan.Venkataraman@Sun.COM {
3428048SMadhavan.Venkataraman@Sun.COM 	size_t size;
3438048SMadhavan.Venkataraman@Sun.COM 
3448048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
3458048SMadhavan.Venkataraman@Sun.COM 	ASSERT(ct->ct_heap == NULL);
3468048SMadhavan.Venkataraman@Sun.COM 
3478048SMadhavan.Venkataraman@Sun.COM 	ct->ct_heap_num = 0;
3488048SMadhavan.Venkataraman@Sun.COM 	ct->ct_heap_max = CALLOUT_CHUNK;
3499334SMadhavan.Venkataraman@Sun.COM 	size = sizeof (callout_heap_t) * CALLOUT_CHUNK;
3508048SMadhavan.Venkataraman@Sun.COM 	ct->ct_heap = kmem_alloc(size, KM_SLEEP);
3518048SMadhavan.Venkataraman@Sun.COM }
3528048SMadhavan.Venkataraman@Sun.COM 
3538048SMadhavan.Venkataraman@Sun.COM /*
3548048SMadhavan.Venkataraman@Sun.COM  * Reallocate the heap. We try quite hard because we can't sleep, and if
3558048SMadhavan.Venkataraman@Sun.COM  * we can't do the allocation, we're toast. Failing all, we try a KM_PANIC
3568048SMadhavan.Venkataraman@Sun.COM  * allocation. Note that the heap only expands, it never contracts.
3578048SMadhavan.Venkataraman@Sun.COM  */
3588048SMadhavan.Venkataraman@Sun.COM static void
3598048SMadhavan.Venkataraman@Sun.COM callout_heap_expand(callout_table_t *ct)
3608048SMadhavan.Venkataraman@Sun.COM {
3618048SMadhavan.Venkataraman@Sun.COM 	size_t max, size, osize;
3629334SMadhavan.Venkataraman@Sun.COM 	callout_heap_t *heap;
3638048SMadhavan.Venkataraman@Sun.COM 
3648048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
3658048SMadhavan.Venkataraman@Sun.COM 	ASSERT(ct->ct_heap_num <= ct->ct_heap_max);
3668048SMadhavan.Venkataraman@Sun.COM 
3678048SMadhavan.Venkataraman@Sun.COM 	while (ct->ct_heap_num == ct->ct_heap_max) {
3688048SMadhavan.Venkataraman@Sun.COM 		max = ct->ct_heap_max;
3698048SMadhavan.Venkataraman@Sun.COM 		mutex_exit(&ct->ct_mutex);
3708048SMadhavan.Venkataraman@Sun.COM 
3719334SMadhavan.Venkataraman@Sun.COM 		osize = sizeof (callout_heap_t) * max;
3729334SMadhavan.Venkataraman@Sun.COM 		size = sizeof (callout_heap_t) * (max + CALLOUT_CHUNK);
3738048SMadhavan.Venkataraman@Sun.COM 		heap = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC);
3748048SMadhavan.Venkataraman@Sun.COM 
3758048SMadhavan.Venkataraman@Sun.COM 		mutex_enter(&ct->ct_mutex);
3768048SMadhavan.Venkataraman@Sun.COM 		if (max < ct->ct_heap_max) {
3778048SMadhavan.Venkataraman@Sun.COM 			/*
3788048SMadhavan.Venkataraman@Sun.COM 			 * Someone beat us to the allocation. Free what we
3798048SMadhavan.Venkataraman@Sun.COM 			 * just allocated and proceed.
3808048SMadhavan.Venkataraman@Sun.COM 			 */
3818048SMadhavan.Venkataraman@Sun.COM 			kmem_free(heap, size);
3828048SMadhavan.Venkataraman@Sun.COM 			continue;
3838048SMadhavan.Venkataraman@Sun.COM 		}
3848048SMadhavan.Venkataraman@Sun.COM 
3858048SMadhavan.Venkataraman@Sun.COM 		bcopy(ct->ct_heap, heap, osize);
3868048SMadhavan.Venkataraman@Sun.COM 		kmem_free(ct->ct_heap, osize);
3878048SMadhavan.Venkataraman@Sun.COM 		ct->ct_heap = heap;
3889334SMadhavan.Venkataraman@Sun.COM 		ct->ct_heap_max = size / sizeof (callout_heap_t);
3898048SMadhavan.Venkataraman@Sun.COM 	}
3908048SMadhavan.Venkataraman@Sun.COM }
3918048SMadhavan.Venkataraman@Sun.COM 
3928048SMadhavan.Venkataraman@Sun.COM /*
3938048SMadhavan.Venkataraman@Sun.COM  * Move an expiration from the bottom of the heap to its correct place
3948048SMadhavan.Venkataraman@Sun.COM  * in the heap. If we reached the root doing this, return 1. Else,
3958048SMadhavan.Venkataraman@Sun.COM  * return 0.
3960Sstevel@tonic-gate  */
3978048SMadhavan.Venkataraman@Sun.COM static int
3988048SMadhavan.Venkataraman@Sun.COM callout_upheap(callout_table_t *ct)
3998048SMadhavan.Venkataraman@Sun.COM {
4008048SMadhavan.Venkataraman@Sun.COM 	int current, parent;
4019334SMadhavan.Venkataraman@Sun.COM 	callout_heap_t *heap, *hcurrent, *hparent;
4028048SMadhavan.Venkataraman@Sun.COM 
4038048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
4048048SMadhavan.Venkataraman@Sun.COM 	ASSERT(ct->ct_heap_num >= 1);
4058048SMadhavan.Venkataraman@Sun.COM 
4068048SMadhavan.Venkataraman@Sun.COM 	if (ct->ct_heap_num == 1) {
4078048SMadhavan.Venkataraman@Sun.COM 		return (1);
4088048SMadhavan.Venkataraman@Sun.COM 	}
4098048SMadhavan.Venkataraman@Sun.COM 
4108048SMadhavan.Venkataraman@Sun.COM 	heap = ct->ct_heap;
4118048SMadhavan.Venkataraman@Sun.COM 	current = ct->ct_heap_num - 1;
4128048SMadhavan.Venkataraman@Sun.COM 
4138048SMadhavan.Venkataraman@Sun.COM 	for (;;) {
4148048SMadhavan.Venkataraman@Sun.COM 		parent = CALLOUT_HEAP_PARENT(current);
4159334SMadhavan.Venkataraman@Sun.COM 		hparent = &heap[parent];
4169334SMadhavan.Venkataraman@Sun.COM 		hcurrent = &heap[current];
4178048SMadhavan.Venkataraman@Sun.COM 
4188048SMadhavan.Venkataraman@Sun.COM 		/*
4198048SMadhavan.Venkataraman@Sun.COM 		 * We have an expiration later than our parent; we're done.
4208048SMadhavan.Venkataraman@Sun.COM 		 */
4219334SMadhavan.Venkataraman@Sun.COM 		if (hcurrent->ch_expiration >= hparent->ch_expiration) {
4228048SMadhavan.Venkataraman@Sun.COM 			return (0);
4238048SMadhavan.Venkataraman@Sun.COM 		}
4248048SMadhavan.Venkataraman@Sun.COM 
4258048SMadhavan.Venkataraman@Sun.COM 		/*
4268048SMadhavan.Venkataraman@Sun.COM 		 * We need to swap with our parent, and continue up the heap.
4278048SMadhavan.Venkataraman@Sun.COM 		 */
4289334SMadhavan.Venkataraman@Sun.COM 		CALLOUT_SWAP(hparent, hcurrent);
4298048SMadhavan.Venkataraman@Sun.COM 
4308048SMadhavan.Venkataraman@Sun.COM 		/*
4318048SMadhavan.Venkataraman@Sun.COM 		 * If we just reached the root, we're done.
4328048SMadhavan.Venkataraman@Sun.COM 		 */
4338048SMadhavan.Venkataraman@Sun.COM 		if (parent == 0) {
4348048SMadhavan.Venkataraman@Sun.COM 			return (1);
4358048SMadhavan.Venkataraman@Sun.COM 		}
4368048SMadhavan.Venkataraman@Sun.COM 
4378048SMadhavan.Venkataraman@Sun.COM 		current = parent;
4388048SMadhavan.Venkataraman@Sun.COM 	}
4398048SMadhavan.Venkataraman@Sun.COM 	/*NOTREACHED*/
4408048SMadhavan.Venkataraman@Sun.COM }
4418048SMadhavan.Venkataraman@Sun.COM 
4428048SMadhavan.Venkataraman@Sun.COM /*
4439334SMadhavan.Venkataraman@Sun.COM  * Insert a new heap item into a callout table's heap.
4448048SMadhavan.Venkataraman@Sun.COM  */
4458048SMadhavan.Venkataraman@Sun.COM static void
4469334SMadhavan.Venkataraman@Sun.COM callout_heap_insert(callout_table_t *ct, callout_list_t *cl)
4478048SMadhavan.Venkataraman@Sun.COM {
4488048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
4498048SMadhavan.Venkataraman@Sun.COM 	ASSERT(ct->ct_heap_num < ct->ct_heap_max);
4508048SMadhavan.Venkataraman@Sun.COM 
4518048SMadhavan.Venkataraman@Sun.COM 	/*
4529334SMadhavan.Venkataraman@Sun.COM 	 * First, copy the expiration and callout list pointer to the bottom
4539334SMadhavan.Venkataraman@Sun.COM 	 * of the heap.
4548048SMadhavan.Venkataraman@Sun.COM 	 */
4559334SMadhavan.Venkataraman@Sun.COM 	ct->ct_heap[ct->ct_heap_num].ch_expiration = cl->cl_expiration;
4569334SMadhavan.Venkataraman@Sun.COM 	ct->ct_heap[ct->ct_heap_num].ch_list = cl;
4578048SMadhavan.Venkataraman@Sun.COM 	ct->ct_heap_num++;
4588048SMadhavan.Venkataraman@Sun.COM 
4598048SMadhavan.Venkataraman@Sun.COM 	/*
4608048SMadhavan.Venkataraman@Sun.COM 	 * Now, perform an upheap operation. If we reached the root, then
4618048SMadhavan.Venkataraman@Sun.COM 	 * the cyclic needs to be reprogrammed as we have an earlier
4628048SMadhavan.Venkataraman@Sun.COM 	 * expiration.
4638048SMadhavan.Venkataraman@Sun.COM 	 *
4648048SMadhavan.Venkataraman@Sun.COM 	 * Also, during the CPR suspend phase, do not reprogram the cyclic.
4658048SMadhavan.Venkataraman@Sun.COM 	 * We don't want any callout activity. When the CPR resume phase is
4668048SMadhavan.Venkataraman@Sun.COM 	 * entered, the cyclic will be programmed for the earliest expiration
4678048SMadhavan.Venkataraman@Sun.COM 	 * in the heap.
4688048SMadhavan.Venkataraman@Sun.COM 	 */
4698566SMadhavan.Venkataraman@Sun.COM 	if (callout_upheap(ct) && (ct->ct_suspend == 0))
4709334SMadhavan.Venkataraman@Sun.COM 		(void) cyclic_reprogram(ct->ct_cyclic, cl->cl_expiration);
4718048SMadhavan.Venkataraman@Sun.COM }
4728048SMadhavan.Venkataraman@Sun.COM 
4738048SMadhavan.Venkataraman@Sun.COM /*
4748048SMadhavan.Venkataraman@Sun.COM  * Move an expiration from the top of the heap to its correct place
4758048SMadhavan.Venkataraman@Sun.COM  * in the heap.
4768048SMadhavan.Venkataraman@Sun.COM  */
4778048SMadhavan.Venkataraman@Sun.COM static void
4788048SMadhavan.Venkataraman@Sun.COM callout_downheap(callout_table_t *ct)
4790Sstevel@tonic-gate {
4809334SMadhavan.Venkataraman@Sun.COM 	int current, left, right, nelems;
4819334SMadhavan.Venkataraman@Sun.COM 	callout_heap_t *heap, *hleft, *hright, *hcurrent;
4828048SMadhavan.Venkataraman@Sun.COM 
4838048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
4848048SMadhavan.Venkataraman@Sun.COM 	ASSERT(ct->ct_heap_num >= 1);
4858048SMadhavan.Venkataraman@Sun.COM 
4868048SMadhavan.Venkataraman@Sun.COM 	heap = ct->ct_heap;
4878048SMadhavan.Venkataraman@Sun.COM 	current = 0;
4888048SMadhavan.Venkataraman@Sun.COM 	nelems = ct->ct_heap_num;
4898048SMadhavan.Venkataraman@Sun.COM 
4908048SMadhavan.Venkataraman@Sun.COM 	for (;;) {
4918048SMadhavan.Venkataraman@Sun.COM 		/*
4928048SMadhavan.Venkataraman@Sun.COM 		 * If we don't have a left child (i.e., we're a leaf), we're
4938048SMadhavan.Venkataraman@Sun.COM 		 * done.
4948048SMadhavan.Venkataraman@Sun.COM 		 */
4958048SMadhavan.Venkataraman@Sun.COM 		if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems)
4968048SMadhavan.Venkataraman@Sun.COM 			return;
4978048SMadhavan.Venkataraman@Sun.COM 
4989334SMadhavan.Venkataraman@Sun.COM 		hleft = &heap[left];
4999334SMadhavan.Venkataraman@Sun.COM 		hcurrent = &heap[current];
5008048SMadhavan.Venkataraman@Sun.COM 
5018048SMadhavan.Venkataraman@Sun.COM 		right = CALLOUT_HEAP_RIGHT(current);
5028048SMadhavan.Venkataraman@Sun.COM 
5038048SMadhavan.Venkataraman@Sun.COM 		/*
5048048SMadhavan.Venkataraman@Sun.COM 		 * Even if we don't have a right child, we still need to compare
5058048SMadhavan.Venkataraman@Sun.COM 		 * our expiration against that of our left child.
5068048SMadhavan.Venkataraman@Sun.COM 		 */
5078048SMadhavan.Venkataraman@Sun.COM 		if (right >= nelems)
5088048SMadhavan.Venkataraman@Sun.COM 			goto comp_left;
5098048SMadhavan.Venkataraman@Sun.COM 
5109334SMadhavan.Venkataraman@Sun.COM 		hright = &heap[right];
5118048SMadhavan.Venkataraman@Sun.COM 
5128048SMadhavan.Venkataraman@Sun.COM 		/*
5138048SMadhavan.Venkataraman@Sun.COM 		 * We have both a left and a right child.  We need to compare
5148048SMadhavan.Venkataraman@Sun.COM 		 * the expiration of the children to determine which
5158048SMadhavan.Venkataraman@Sun.COM 		 * expires earlier.
5168048SMadhavan.Venkataraman@Sun.COM 		 */
5179334SMadhavan.Venkataraman@Sun.COM 		if (hright->ch_expiration < hleft->ch_expiration) {
5188048SMadhavan.Venkataraman@Sun.COM 			/*
5198048SMadhavan.Venkataraman@Sun.COM 			 * Our right child is the earlier of our children.
5208048SMadhavan.Venkataraman@Sun.COM 			 * We'll now compare our expiration to its expiration.
5218048SMadhavan.Venkataraman@Sun.COM 			 * If ours is the earlier one, we're done.
5228048SMadhavan.Venkataraman@Sun.COM 			 */
5239334SMadhavan.Venkataraman@Sun.COM 			if (hcurrent->ch_expiration <= hright->ch_expiration)
5248048SMadhavan.Venkataraman@Sun.COM 				return;
5258048SMadhavan.Venkataraman@Sun.COM 
5268048SMadhavan.Venkataraman@Sun.COM 			/*
5278048SMadhavan.Venkataraman@Sun.COM 			 * Our right child expires earlier than we do; swap
5288048SMadhavan.Venkataraman@Sun.COM 			 * with our right child, and descend right.
5298048SMadhavan.Venkataraman@Sun.COM 			 */
5309334SMadhavan.Venkataraman@Sun.COM 			CALLOUT_SWAP(hright, hcurrent);
5318048SMadhavan.Venkataraman@Sun.COM 			current = right;
5328048SMadhavan.Venkataraman@Sun.COM 			continue;
5338048SMadhavan.Venkataraman@Sun.COM 		}
5348048SMadhavan.Venkataraman@Sun.COM 
5358048SMadhavan.Venkataraman@Sun.COM comp_left:
5368048SMadhavan.Venkataraman@Sun.COM 		/*
5378048SMadhavan.Venkataraman@Sun.COM 		 * Our left child is the earlier of our children (or we have
5388048SMadhavan.Venkataraman@Sun.COM 		 * no right child).  We'll now compare our expiration
5398048SMadhavan.Venkataraman@Sun.COM 		 * to its expiration. If ours is the earlier one, we're done.
5408048SMadhavan.Venkataraman@Sun.COM 		 */
5419334SMadhavan.Venkataraman@Sun.COM 		if (hcurrent->ch_expiration <= hleft->ch_expiration)
5428048SMadhavan.Venkataraman@Sun.COM 			return;
5438048SMadhavan.Venkataraman@Sun.COM 
5448048SMadhavan.Venkataraman@Sun.COM 		/*
5458048SMadhavan.Venkataraman@Sun.COM 		 * Our left child expires earlier than we do; swap with our
5468048SMadhavan.Venkataraman@Sun.COM 		 * left child, and descend left.
5478048SMadhavan.Venkataraman@Sun.COM 		 */
5489334SMadhavan.Venkataraman@Sun.COM 		CALLOUT_SWAP(hleft, hcurrent);
5498048SMadhavan.Venkataraman@Sun.COM 		current = left;
5508048SMadhavan.Venkataraman@Sun.COM 	}
5518048SMadhavan.Venkataraman@Sun.COM }
5528048SMadhavan.Venkataraman@Sun.COM 
5538048SMadhavan.Venkataraman@Sun.COM /*
5548048SMadhavan.Venkataraman@Sun.COM  * Delete and handle all past expirations in a callout table's heap.
5558048SMadhavan.Venkataraman@Sun.COM  */
5568048SMadhavan.Venkataraman@Sun.COM static void
5578048SMadhavan.Venkataraman@Sun.COM callout_heap_delete(callout_table_t *ct)
5588048SMadhavan.Venkataraman@Sun.COM {
5599334SMadhavan.Venkataraman@Sun.COM 	hrtime_t now, expiration, next;
5608048SMadhavan.Venkataraman@Sun.COM 	callout_list_t *cl;
5619334SMadhavan.Venkataraman@Sun.COM 	callout_heap_t *heap;
5628048SMadhavan.Venkataraman@Sun.COM 	int hash;
5638048SMadhavan.Venkataraman@Sun.COM 
5648048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
5658048SMadhavan.Venkataraman@Sun.COM 
5669334SMadhavan.Venkataraman@Sun.COM 	if (CALLOUT_CLEANUP(ct)) {
5679334SMadhavan.Venkataraman@Sun.COM 		/*
5689334SMadhavan.Venkataraman@Sun.COM 		 * There are too many heap elements pointing to empty callout
5699334SMadhavan.Venkataraman@Sun.COM 		 * lists. Clean them out.
5709334SMadhavan.Venkataraman@Sun.COM 		 */
5719334SMadhavan.Venkataraman@Sun.COM 		(void) callout_heap_process(ct, 0, 0);
5729334SMadhavan.Venkataraman@Sun.COM 	}
5739334SMadhavan.Venkataraman@Sun.COM 
5748048SMadhavan.Venkataraman@Sun.COM 	now = gethrtime();
5759334SMadhavan.Venkataraman@Sun.COM 	heap = ct->ct_heap;
5768048SMadhavan.Venkataraman@Sun.COM 
5778048SMadhavan.Venkataraman@Sun.COM 	while (ct->ct_heap_num > 0) {
5789334SMadhavan.Venkataraman@Sun.COM 		expiration = heap->ch_expiration;
5798048SMadhavan.Venkataraman@Sun.COM 		hash = CALLOUT_CLHASH(expiration);
5809334SMadhavan.Venkataraman@Sun.COM 		cl = heap->ch_list;
5819334SMadhavan.Venkataraman@Sun.COM 		ASSERT(expiration == cl->cl_expiration);
5829334SMadhavan.Venkataraman@Sun.COM 
5839334SMadhavan.Venkataraman@Sun.COM 		if (cl->cl_callouts.ch_head == NULL) {
5848048SMadhavan.Venkataraman@Sun.COM 			/*
5859334SMadhavan.Venkataraman@Sun.COM 			 * If the callout list is empty, reap it.
5869334SMadhavan.Venkataraman@Sun.COM 			 * Decrement the reap count.
5879334SMadhavan.Venkataraman@Sun.COM 			 */
5889334SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
5899334SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_FREE(ct, cl);
5909334SMadhavan.Venkataraman@Sun.COM 			ct->ct_nreap--;
5919334SMadhavan.Venkataraman@Sun.COM 		} else {
5929334SMadhavan.Venkataraman@Sun.COM 			/*
5939334SMadhavan.Venkataraman@Sun.COM 			 * If the root of the heap expires in the future,
5949334SMadhavan.Venkataraman@Sun.COM 			 * bail out.
5958048SMadhavan.Venkataraman@Sun.COM 			 */
5968048SMadhavan.Venkataraman@Sun.COM 			if (expiration > now)
5978048SMadhavan.Venkataraman@Sun.COM 				break;
5988048SMadhavan.Venkataraman@Sun.COM 
5998048SMadhavan.Venkataraman@Sun.COM 			/*
6008048SMadhavan.Venkataraman@Sun.COM 			 * Move the callout list for this expiration to the
6018048SMadhavan.Venkataraman@Sun.COM 			 * list of expired callout lists. It will be processed
6028048SMadhavan.Venkataraman@Sun.COM 			 * by the callout executor.
6038048SMadhavan.Venkataraman@Sun.COM 			 */
6048048SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
6058048SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_APPEND(ct->ct_expired, cl);
6068048SMadhavan.Venkataraman@Sun.COM 		}
6078048SMadhavan.Venkataraman@Sun.COM 
6088048SMadhavan.Venkataraman@Sun.COM 		/*
6098048SMadhavan.Venkataraman@Sun.COM 		 * Now delete the root. This is done by swapping the root with
6108048SMadhavan.Venkataraman@Sun.COM 		 * the last item in the heap and downheaping the item.
6118048SMadhavan.Venkataraman@Sun.COM 		 */
6128048SMadhavan.Venkataraman@Sun.COM 		ct->ct_heap_num--;
6138048SMadhavan.Venkataraman@Sun.COM 		if (ct->ct_heap_num > 0) {
6149334SMadhavan.Venkataraman@Sun.COM 			heap[0] = heap[ct->ct_heap_num];
6158048SMadhavan.Venkataraman@Sun.COM 			callout_downheap(ct);
6168048SMadhavan.Venkataraman@Sun.COM 		}
6178048SMadhavan.Venkataraman@Sun.COM 	}
6188048SMadhavan.Venkataraman@Sun.COM 
6198048SMadhavan.Venkataraman@Sun.COM 	/*
6209334SMadhavan.Venkataraman@Sun.COM 	 * If this callout table is empty or callouts have been suspended,
6219334SMadhavan.Venkataraman@Sun.COM 	 * just return. The cyclic has already been programmed to
6228048SMadhavan.Venkataraman@Sun.COM 	 * infinity by the cyclic subsystem.
6238048SMadhavan.Venkataraman@Sun.COM 	 */
6248566SMadhavan.Venkataraman@Sun.COM 	if ((ct->ct_heap_num == 0) || (ct->ct_suspend > 0))
6258048SMadhavan.Venkataraman@Sun.COM 		return;
6268048SMadhavan.Venkataraman@Sun.COM 
6279334SMadhavan.Venkataraman@Sun.COM 	/*
6289334SMadhavan.Venkataraman@Sun.COM 	 * If the top expirations are within callout_tolerance of each other,
6299334SMadhavan.Venkataraman@Sun.COM 	 * delay the cyclic expire so that they can be processed together.
6309334SMadhavan.Venkataraman@Sun.COM 	 * This is to prevent high resolution timers from swamping the system
6319334SMadhavan.Venkataraman@Sun.COM 	 * with cyclic activity.
6329334SMadhavan.Venkataraman@Sun.COM 	 */
6339334SMadhavan.Venkataraman@Sun.COM 	if (ct->ct_heap_num > 2) {
6349334SMadhavan.Venkataraman@Sun.COM 		next = expiration + callout_tolerance;
6359334SMadhavan.Venkataraman@Sun.COM 		if ((heap[1].ch_expiration < next) ||
6369334SMadhavan.Venkataraman@Sun.COM 		    (heap[2].ch_expiration < next))
6379334SMadhavan.Venkataraman@Sun.COM 			expiration = next;
6389334SMadhavan.Venkataraman@Sun.COM 	}
6399334SMadhavan.Venkataraman@Sun.COM 
6408048SMadhavan.Venkataraman@Sun.COM 	(void) cyclic_reprogram(ct->ct_cyclic, expiration);
6418048SMadhavan.Venkataraman@Sun.COM }
6428048SMadhavan.Venkataraman@Sun.COM 
6438566SMadhavan.Venkataraman@Sun.COM /*
6449334SMadhavan.Venkataraman@Sun.COM  * There are some situations when the entire heap is walked and processed.
6459334SMadhavan.Venkataraman@Sun.COM  * This function is called to do the processing. These are the situations:
6469334SMadhavan.Venkataraman@Sun.COM  *
6479334SMadhavan.Venkataraman@Sun.COM  * 1. When the reap count reaches its threshold, the heap has to be cleared
6489334SMadhavan.Venkataraman@Sun.COM  *    of all empty callout lists.
6499334SMadhavan.Venkataraman@Sun.COM  *
6509334SMadhavan.Venkataraman@Sun.COM  * 2. When the system enters and exits KMDB/OBP, all entries in the heap
6519334SMadhavan.Venkataraman@Sun.COM  *    need to be adjusted by the interval spent in KMDB/OBP.
6529334SMadhavan.Venkataraman@Sun.COM  *
6539334SMadhavan.Venkataraman@Sun.COM  * 3. When system time is changed, the heap has to be scanned for
6549334SMadhavan.Venkataraman@Sun.COM  *    absolute hrestime timers. These need to be removed from the heap
6559334SMadhavan.Venkataraman@Sun.COM  *    and expired immediately.
6569334SMadhavan.Venkataraman@Sun.COM  *
6579334SMadhavan.Venkataraman@Sun.COM  * In cases 2 and 3, it is a good idea to do 1 as well since we are
6589334SMadhavan.Venkataraman@Sun.COM  * scanning the heap anyway.
6599334SMadhavan.Venkataraman@Sun.COM  *
6609334SMadhavan.Venkataraman@Sun.COM  * If the root gets changed and/or callout lists are expired, return the
6619334SMadhavan.Venkataraman@Sun.COM  * new expiration to the caller so he can reprogram the cyclic accordingly.
6629334SMadhavan.Venkataraman@Sun.COM  */
6639334SMadhavan.Venkataraman@Sun.COM static hrtime_t
6649334SMadhavan.Venkataraman@Sun.COM callout_heap_process(callout_table_t *ct, hrtime_t delta, int timechange)
6659334SMadhavan.Venkataraman@Sun.COM {
6669334SMadhavan.Venkataraman@Sun.COM 	callout_heap_t *heap;
6679334SMadhavan.Venkataraman@Sun.COM 	callout_list_t *cl, *rootcl;
6689334SMadhavan.Venkataraman@Sun.COM 	hrtime_t expiration, now;
6699334SMadhavan.Venkataraman@Sun.COM 	int i, hash, clflags, expired;
6709334SMadhavan.Venkataraman@Sun.COM 	ulong_t num;
6719334SMadhavan.Venkataraman@Sun.COM 
6729334SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
6739334SMadhavan.Venkataraman@Sun.COM 
6749334SMadhavan.Venkataraman@Sun.COM 	if (ct->ct_heap_num == 0)
6759334SMadhavan.Venkataraman@Sun.COM 		return (0);
6769334SMadhavan.Venkataraman@Sun.COM 
6779334SMadhavan.Venkataraman@Sun.COM 	if (ct->ct_nreap > 0)
6789334SMadhavan.Venkataraman@Sun.COM 		ct->ct_cleanups++;
6799334SMadhavan.Venkataraman@Sun.COM 
6809334SMadhavan.Venkataraman@Sun.COM 	heap = ct->ct_heap;
6819334SMadhavan.Venkataraman@Sun.COM 	rootcl = heap->ch_list;
6829334SMadhavan.Venkataraman@Sun.COM 
6839334SMadhavan.Venkataraman@Sun.COM 	/*
6849334SMadhavan.Venkataraman@Sun.COM 	 * We walk the heap from the top to the bottom. If we encounter
6859334SMadhavan.Venkataraman@Sun.COM 	 * a heap item that points to an empty callout list, we clean
6869334SMadhavan.Venkataraman@Sun.COM 	 * it out. If we encounter a hrestime entry that must be removed,
6879334SMadhavan.Venkataraman@Sun.COM 	 * again we clean it out. Otherwise, we apply any adjustments needed
6889334SMadhavan.Venkataraman@Sun.COM 	 * to an element.
6899334SMadhavan.Venkataraman@Sun.COM 	 *
6909334SMadhavan.Venkataraman@Sun.COM 	 * During the walk, we also compact the heap from the bottom and
6919334SMadhavan.Venkataraman@Sun.COM 	 * reconstruct the heap using upheap operations. This is very
6929334SMadhavan.Venkataraman@Sun.COM 	 * efficient if the number of elements to be cleaned is greater than
6939334SMadhavan.Venkataraman@Sun.COM 	 * or equal to half the heap. This is the common case.
6949334SMadhavan.Venkataraman@Sun.COM 	 *
6959334SMadhavan.Venkataraman@Sun.COM 	 * Even in the non-common case, the upheap operations should be short
6969334SMadhavan.Venkataraman@Sun.COM 	 * as the entries below generally tend to be bigger than the entries
6979334SMadhavan.Venkataraman@Sun.COM 	 * above.
6989334SMadhavan.Venkataraman@Sun.COM 	 */
6999334SMadhavan.Venkataraman@Sun.COM 	num = ct->ct_heap_num;
7009334SMadhavan.Venkataraman@Sun.COM 	ct->ct_heap_num = 0;
7019334SMadhavan.Venkataraman@Sun.COM 	clflags = (CALLOUT_LIST_FLAG_HRESTIME | CALLOUT_LIST_FLAG_ABSOLUTE);
7029334SMadhavan.Venkataraman@Sun.COM 	now = gethrtime();
7039334SMadhavan.Venkataraman@Sun.COM 	expired = 0;
7049334SMadhavan.Venkataraman@Sun.COM 	for (i = 0; i < num; i++) {
7059334SMadhavan.Venkataraman@Sun.COM 		cl = heap[i].ch_list;
7069334SMadhavan.Venkataraman@Sun.COM 		/*
7079334SMadhavan.Venkataraman@Sun.COM 		 * If the callout list is empty, delete the heap element and
7089334SMadhavan.Venkataraman@Sun.COM 		 * free the callout list.
7099334SMadhavan.Venkataraman@Sun.COM 		 */
7109334SMadhavan.Venkataraman@Sun.COM 		if (cl->cl_callouts.ch_head == NULL) {
7119334SMadhavan.Venkataraman@Sun.COM 			hash = CALLOUT_CLHASH(cl->cl_expiration);
7129334SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
7139334SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_FREE(ct, cl);
7149334SMadhavan.Venkataraman@Sun.COM 			continue;
7159334SMadhavan.Venkataraman@Sun.COM 		}
7169334SMadhavan.Venkataraman@Sun.COM 
7179334SMadhavan.Venkataraman@Sun.COM 		/*
7189334SMadhavan.Venkataraman@Sun.COM 		 * Delete the heap element and expire the callout list, if
7199334SMadhavan.Venkataraman@Sun.COM 		 * one of the following is true:
7209334SMadhavan.Venkataraman@Sun.COM 		 *	- the callout list has expired
7219334SMadhavan.Venkataraman@Sun.COM 		 *	- the callout list is an absolute hrestime one and
7229334SMadhavan.Venkataraman@Sun.COM 		 *	  there has been a system time change
7239334SMadhavan.Venkataraman@Sun.COM 		 */
7249334SMadhavan.Venkataraman@Sun.COM 		if ((cl->cl_expiration <= now) ||
7259334SMadhavan.Venkataraman@Sun.COM 		    (timechange && ((cl->cl_flags & clflags) == clflags))) {
7269334SMadhavan.Venkataraman@Sun.COM 			hash = CALLOUT_CLHASH(cl->cl_expiration);
7279334SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
7289334SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_APPEND(ct->ct_expired, cl);
7299334SMadhavan.Venkataraman@Sun.COM 			expired = 1;
7309334SMadhavan.Venkataraman@Sun.COM 			continue;
7319334SMadhavan.Venkataraman@Sun.COM 		}
7329334SMadhavan.Venkataraman@Sun.COM 
7339334SMadhavan.Venkataraman@Sun.COM 		/*
7349334SMadhavan.Venkataraman@Sun.COM 		 * Apply adjustments, if any. Adjustments are applied after
7359334SMadhavan.Venkataraman@Sun.COM 		 * the system returns from KMDB or OBP. They are only applied
7369334SMadhavan.Venkataraman@Sun.COM 		 * to relative callout lists.
7379334SMadhavan.Venkataraman@Sun.COM 		 */
7389334SMadhavan.Venkataraman@Sun.COM 		if (delta && !(cl->cl_flags & CALLOUT_LIST_FLAG_ABSOLUTE)) {
7399334SMadhavan.Venkataraman@Sun.COM 			hash = CALLOUT_CLHASH(cl->cl_expiration);
7409334SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl);
7419334SMadhavan.Venkataraman@Sun.COM 			expiration = cl->cl_expiration + delta;
7429334SMadhavan.Venkataraman@Sun.COM 			if (expiration <= 0)
7439334SMadhavan.Venkataraman@Sun.COM 				expiration = CY_INFINITY;
7449334SMadhavan.Venkataraman@Sun.COM 			heap[i].ch_expiration = expiration;
7459334SMadhavan.Venkataraman@Sun.COM 			cl->cl_expiration = expiration;
7469334SMadhavan.Venkataraman@Sun.COM 			hash = CALLOUT_CLHASH(cl->cl_expiration);
7479334SMadhavan.Venkataraman@Sun.COM 			if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) {
7489334SMadhavan.Venkataraman@Sun.COM 				CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl);
7499334SMadhavan.Venkataraman@Sun.COM 			} else {
7509334SMadhavan.Venkataraman@Sun.COM 				CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl);
7519334SMadhavan.Venkataraman@Sun.COM 			}
7529334SMadhavan.Venkataraman@Sun.COM 		}
7539334SMadhavan.Venkataraman@Sun.COM 
7549334SMadhavan.Venkataraman@Sun.COM 		heap[ct->ct_heap_num] = heap[i];
7559334SMadhavan.Venkataraman@Sun.COM 		ct->ct_heap_num++;
7569334SMadhavan.Venkataraman@Sun.COM 		(void) callout_upheap(ct);
7579334SMadhavan.Venkataraman@Sun.COM 	}
7589334SMadhavan.Venkataraman@Sun.COM 
7599334SMadhavan.Venkataraman@Sun.COM 	ct->ct_nreap = 0;
7609334SMadhavan.Venkataraman@Sun.COM 
7619334SMadhavan.Venkataraman@Sun.COM 	if (expired)
7629334SMadhavan.Venkataraman@Sun.COM 		expiration = gethrtime();
7639334SMadhavan.Venkataraman@Sun.COM 	else if (ct->ct_heap_num == 0)
7649334SMadhavan.Venkataraman@Sun.COM 		expiration = CY_INFINITY;
7659334SMadhavan.Venkataraman@Sun.COM 	else if (rootcl != heap->ch_list)
7669334SMadhavan.Venkataraman@Sun.COM 		expiration = heap->ch_expiration;
7679334SMadhavan.Venkataraman@Sun.COM 	else
7689334SMadhavan.Venkataraman@Sun.COM 		expiration = 0;
7699334SMadhavan.Venkataraman@Sun.COM 
7709334SMadhavan.Venkataraman@Sun.COM 	return (expiration);
7719334SMadhavan.Venkataraman@Sun.COM }
7729334SMadhavan.Venkataraman@Sun.COM 
7739334SMadhavan.Venkataraman@Sun.COM /*
7748566SMadhavan.Venkataraman@Sun.COM  * Common function used to create normal and realtime callouts.
7758566SMadhavan.Venkataraman@Sun.COM  *
7768566SMadhavan.Venkataraman@Sun.COM  * Realtime callouts are handled at CY_LOW_PIL by a cyclic handler. So,
7778566SMadhavan.Venkataraman@Sun.COM  * there is one restriction on a realtime callout handler - it should not
7788566SMadhavan.Venkataraman@Sun.COM  * directly or indirectly acquire cpu_lock. CPU offline waits for pending
7798566SMadhavan.Venkataraman@Sun.COM  * cyclic handlers to complete while holding cpu_lock. So, if a realtime
7808566SMadhavan.Venkataraman@Sun.COM  * callout handler were to try to get cpu_lock, there would be a deadlock
7818566SMadhavan.Venkataraman@Sun.COM  * during CPU offline.
7828566SMadhavan.Venkataraman@Sun.COM  */
7838048SMadhavan.Venkataraman@Sun.COM callout_id_t
7848048SMadhavan.Venkataraman@Sun.COM timeout_generic(int type, void (*func)(void *), void *arg,
7858048SMadhavan.Venkataraman@Sun.COM 	hrtime_t expiration, hrtime_t resolution, int flags)
7868048SMadhavan.Venkataraman@Sun.COM {
7878048SMadhavan.Venkataraman@Sun.COM 	callout_table_t *ct;
7884123Sdm120769 	callout_t *cp;
7894123Sdm120769 	callout_id_t id;
7908048SMadhavan.Venkataraman@Sun.COM 	callout_list_t *cl;
7919334SMadhavan.Venkataraman@Sun.COM 	hrtime_t now, interval, rexpiration;
7929334SMadhavan.Venkataraman@Sun.COM 	int hash, clflags;
7938048SMadhavan.Venkataraman@Sun.COM 
7948048SMadhavan.Venkataraman@Sun.COM 	ASSERT(resolution > 0);
7958048SMadhavan.Venkataraman@Sun.COM 	ASSERT(func != NULL);
7968048SMadhavan.Venkataraman@Sun.COM 
7978048SMadhavan.Venkataraman@Sun.COM 	/*
7989334SMadhavan.Venkataraman@Sun.COM 	 * We get the current hrtime right upfront so that latencies in
7999334SMadhavan.Venkataraman@Sun.COM 	 * this function do not affect the accuracy of the callout.
8008048SMadhavan.Venkataraman@Sun.COM 	 */
8019334SMadhavan.Venkataraman@Sun.COM 	now = gethrtime();
8026422Sqiao 
8038048SMadhavan.Venkataraman@Sun.COM 	/*
8048048SMadhavan.Venkataraman@Sun.COM 	 * We disable kernel preemption so that we remain on the same CPU
8058048SMadhavan.Venkataraman@Sun.COM 	 * throughout. If we needed to reprogram the callout table's cyclic,
8068048SMadhavan.Venkataraman@Sun.COM 	 * we can avoid X-calls if we are on the same CPU.
8078048SMadhavan.Venkataraman@Sun.COM 	 *
8088048SMadhavan.Venkataraman@Sun.COM 	 * Note that callout_alloc() releases and reacquires the callout
8098048SMadhavan.Venkataraman@Sun.COM 	 * table mutex. While reacquiring the mutex, it is possible for us
8108048SMadhavan.Venkataraman@Sun.COM 	 * to go to sleep and later migrate to another CPU. This should be
8118048SMadhavan.Venkataraman@Sun.COM 	 * pretty rare, though.
8128048SMadhavan.Venkataraman@Sun.COM 	 */
8138048SMadhavan.Venkataraman@Sun.COM 	kpreempt_disable();
8140Sstevel@tonic-gate 
8158048SMadhavan.Venkataraman@Sun.COM 	ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)];
8168048SMadhavan.Venkataraman@Sun.COM 	mutex_enter(&ct->ct_mutex);
8170Sstevel@tonic-gate 
8188048SMadhavan.Venkataraman@Sun.COM 	if (ct->ct_cyclic == CYCLIC_NONE) {
8198048SMadhavan.Venkataraman@Sun.COM 		mutex_exit(&ct->ct_mutex);
8208048SMadhavan.Venkataraman@Sun.COM 		/*
8218048SMadhavan.Venkataraman@Sun.COM 		 * The callout table has not yet been initialized fully.
8228048SMadhavan.Venkataraman@Sun.COM 		 * So, put this one on the boot callout table which is
8238048SMadhavan.Venkataraman@Sun.COM 		 * always initialized.
8248048SMadhavan.Venkataraman@Sun.COM 		 */
8258048SMadhavan.Venkataraman@Sun.COM 		ct = &callout_boot_ct[type];
8268048SMadhavan.Venkataraman@Sun.COM 		mutex_enter(&ct->ct_mutex);
8278048SMadhavan.Venkataraman@Sun.COM 	}
8288048SMadhavan.Venkataraman@Sun.COM 
8299334SMadhavan.Venkataraman@Sun.COM 	if (CALLOUT_CLEANUP(ct)) {
8309334SMadhavan.Venkataraman@Sun.COM 		/*
8319334SMadhavan.Venkataraman@Sun.COM 		 * There are too many heap elements pointing to empty callout
8329334SMadhavan.Venkataraman@Sun.COM 		 * lists. Clean them out.
8339334SMadhavan.Venkataraman@Sun.COM 		 */
8349334SMadhavan.Venkataraman@Sun.COM 		rexpiration = callout_heap_process(ct, 0, 0);
8359334SMadhavan.Venkataraman@Sun.COM 		if ((rexpiration != 0) && (ct->ct_suspend == 0))
8369334SMadhavan.Venkataraman@Sun.COM 			(void) cyclic_reprogram(ct->ct_cyclic, rexpiration);
8379334SMadhavan.Venkataraman@Sun.COM 	}
8389334SMadhavan.Venkataraman@Sun.COM 
8398048SMadhavan.Venkataraman@Sun.COM 	if ((cp = ct->ct_free) == NULL)
8400Sstevel@tonic-gate 		cp = callout_alloc(ct);
8410Sstevel@tonic-gate 	else
8428048SMadhavan.Venkataraman@Sun.COM 		ct->ct_free = cp->c_idnext;
8430Sstevel@tonic-gate 
8440Sstevel@tonic-gate 	cp->c_func = func;
8450Sstevel@tonic-gate 	cp->c_arg = arg;
8460Sstevel@tonic-gate 
8470Sstevel@tonic-gate 	/*
8488048SMadhavan.Venkataraman@Sun.COM 	 * Compute the expiration hrtime.
8498048SMadhavan.Venkataraman@Sun.COM 	 */
8508048SMadhavan.Venkataraman@Sun.COM 	if (flags & CALLOUT_FLAG_ABSOLUTE) {
8518048SMadhavan.Venkataraman@Sun.COM 		interval = expiration - now;
8528048SMadhavan.Venkataraman@Sun.COM 	} else {
8538048SMadhavan.Venkataraman@Sun.COM 		interval = expiration;
8548048SMadhavan.Venkataraman@Sun.COM 		expiration += now;
8558048SMadhavan.Venkataraman@Sun.COM 	}
8569334SMadhavan.Venkataraman@Sun.COM 
8579334SMadhavan.Venkataraman@Sun.COM 	if (resolution > 1) {
8589334SMadhavan.Venkataraman@Sun.COM 		/*
8599334SMadhavan.Venkataraman@Sun.COM 		 * Align expiration to the specified resolution.
8609334SMadhavan.Venkataraman@Sun.COM 		 */
8619334SMadhavan.Venkataraman@Sun.COM 		if (flags & CALLOUT_FLAG_ROUNDUP)
8629334SMadhavan.Venkataraman@Sun.COM 			expiration += resolution - 1;
8639334SMadhavan.Venkataraman@Sun.COM 		expiration = (expiration / resolution) * resolution;
8649334SMadhavan.Venkataraman@Sun.COM 	}
8659334SMadhavan.Venkataraman@Sun.COM 
8668566SMadhavan.Venkataraman@Sun.COM 	if (expiration <= 0) {
8678566SMadhavan.Venkataraman@Sun.COM 		/*
8688566SMadhavan.Venkataraman@Sun.COM 		 * expiration hrtime overflow has occurred. Just set the
8698566SMadhavan.Venkataraman@Sun.COM 		 * expiration to infinity.
8708566SMadhavan.Venkataraman@Sun.COM 		 */
8718566SMadhavan.Venkataraman@Sun.COM 		expiration = CY_INFINITY;
8728566SMadhavan.Venkataraman@Sun.COM 	}
8738048SMadhavan.Venkataraman@Sun.COM 
8748048SMadhavan.Venkataraman@Sun.COM 	/*
8758048SMadhavan.Venkataraman@Sun.COM 	 * Assign an ID to this callout
8768048SMadhavan.Venkataraman@Sun.COM 	 */
8778048SMadhavan.Venkataraman@Sun.COM 	if (flags & CALLOUT_FLAG_32BIT) {
8788048SMadhavan.Venkataraman@Sun.COM 		if (interval > callout_longterm) {
8798048SMadhavan.Venkataraman@Sun.COM 			id = (ct->ct_long_id - callout_counter_low);
8808048SMadhavan.Venkataraman@Sun.COM 			id |= CALLOUT_COUNTER_HIGH;
8818048SMadhavan.Venkataraman@Sun.COM 			ct->ct_long_id = id;
8828048SMadhavan.Venkataraman@Sun.COM 		} else {
8838048SMadhavan.Venkataraman@Sun.COM 			id = (ct->ct_short_id - callout_counter_low);
8848048SMadhavan.Venkataraman@Sun.COM 			id |= CALLOUT_COUNTER_HIGH;
8858048SMadhavan.Venkataraman@Sun.COM 			ct->ct_short_id = id;
8868048SMadhavan.Venkataraman@Sun.COM 		}
8878048SMadhavan.Venkataraman@Sun.COM 	} else {
8888048SMadhavan.Venkataraman@Sun.COM 		id = (ct->ct_gen_id - callout_counter_low);
8898048SMadhavan.Venkataraman@Sun.COM 		if ((id & CALLOUT_COUNTER_HIGH) == 0) {
8908048SMadhavan.Venkataraman@Sun.COM 			id |= CALLOUT_COUNTER_HIGH;
8918048SMadhavan.Venkataraman@Sun.COM 			id += CALLOUT_GENERATION_LOW;
8928048SMadhavan.Venkataraman@Sun.COM 		}
8938048SMadhavan.Venkataraman@Sun.COM 		ct->ct_gen_id = id;
8948048SMadhavan.Venkataraman@Sun.COM 	}
8958048SMadhavan.Venkataraman@Sun.COM 
8968048SMadhavan.Venkataraman@Sun.COM 	cp->c_xid = id;
8978048SMadhavan.Venkataraman@Sun.COM 
8989334SMadhavan.Venkataraman@Sun.COM 	clflags = 0;
8999334SMadhavan.Venkataraman@Sun.COM 	if (flags & CALLOUT_FLAG_ABSOLUTE)
9009334SMadhavan.Venkataraman@Sun.COM 		clflags |= CALLOUT_LIST_FLAG_ABSOLUTE;
9019334SMadhavan.Venkataraman@Sun.COM 	if (flags & CALLOUT_FLAG_HRESTIME)
9029334SMadhavan.Venkataraman@Sun.COM 		clflags |= CALLOUT_LIST_FLAG_HRESTIME;
9039334SMadhavan.Venkataraman@Sun.COM 	if (resolution == 1)
9049334SMadhavan.Venkataraman@Sun.COM 		clflags |= CALLOUT_LIST_FLAG_NANO;
9058048SMadhavan.Venkataraman@Sun.COM 	hash = CALLOUT_CLHASH(expiration);
9068048SMadhavan.Venkataraman@Sun.COM 
9078048SMadhavan.Venkataraman@Sun.COM again:
9088048SMadhavan.Venkataraman@Sun.COM 	/*
9098048SMadhavan.Venkataraman@Sun.COM 	 * Try to see if a callout list already exists for this expiration.
9108048SMadhavan.Venkataraman@Sun.COM 	 */
9119334SMadhavan.Venkataraman@Sun.COM 	cl = callout_list_get(ct, expiration, clflags, hash);
9128048SMadhavan.Venkataraman@Sun.COM 	if (cl == NULL) {
9138048SMadhavan.Venkataraman@Sun.COM 		/*
9148048SMadhavan.Venkataraman@Sun.COM 		 * Check if we have enough space in the heap to insert one
9158048SMadhavan.Venkataraman@Sun.COM 		 * expiration. If not, expand the heap.
9168048SMadhavan.Venkataraman@Sun.COM 		 */
9178048SMadhavan.Venkataraman@Sun.COM 		if (ct->ct_heap_num == ct->ct_heap_max) {
9188048SMadhavan.Venkataraman@Sun.COM 			callout_heap_expand(ct);
9198048SMadhavan.Venkataraman@Sun.COM 			/*
9208048SMadhavan.Venkataraman@Sun.COM 			 * In the above call, we drop the lock, allocate and
9218048SMadhavan.Venkataraman@Sun.COM 			 * reacquire the lock. So, we could have been away
9228048SMadhavan.Venkataraman@Sun.COM 			 * for a while. In the meantime, someone could have
9238048SMadhavan.Venkataraman@Sun.COM 			 * inserted a callout list with the same expiration.
9248048SMadhavan.Venkataraman@Sun.COM 			 * So, the best course is to repeat the steps. This
9258048SMadhavan.Venkataraman@Sun.COM 			 * should be an infrequent event.
9268048SMadhavan.Venkataraman@Sun.COM 			 */
9278048SMadhavan.Venkataraman@Sun.COM 			goto again;
9288048SMadhavan.Venkataraman@Sun.COM 		}
9298048SMadhavan.Venkataraman@Sun.COM 
9308048SMadhavan.Venkataraman@Sun.COM 		/*
9318048SMadhavan.Venkataraman@Sun.COM 		 * Check the free list. If we don't find one, we have to
9328048SMadhavan.Venkataraman@Sun.COM 		 * take the slow path and allocate from kmem.
9338048SMadhavan.Venkataraman@Sun.COM 		 */
9348048SMadhavan.Venkataraman@Sun.COM 		if ((cl = ct->ct_lfree) == NULL) {
9358048SMadhavan.Venkataraman@Sun.COM 			callout_list_alloc(ct);
9368048SMadhavan.Venkataraman@Sun.COM 			/*
9378048SMadhavan.Venkataraman@Sun.COM 			 * In the above call, we drop the lock, allocate and
9388048SMadhavan.Venkataraman@Sun.COM 			 * reacquire the lock. So, we could have been away
9398048SMadhavan.Venkataraman@Sun.COM 			 * for a while. In the meantime, someone could have
9408048SMadhavan.Venkataraman@Sun.COM 			 * inserted a callout list with the same expiration.
9418048SMadhavan.Venkataraman@Sun.COM 			 * Plus, the heap could have become full. So, the best
9428048SMadhavan.Venkataraman@Sun.COM 			 * course is to repeat the steps. This should be an
9438048SMadhavan.Venkataraman@Sun.COM 			 * infrequent event.
9448048SMadhavan.Venkataraman@Sun.COM 			 */
9458048SMadhavan.Venkataraman@Sun.COM 			goto again;
9468048SMadhavan.Venkataraman@Sun.COM 		}
9478048SMadhavan.Venkataraman@Sun.COM 		ct->ct_lfree = cl->cl_next;
9488048SMadhavan.Venkataraman@Sun.COM 		cl->cl_expiration = expiration;
9499334SMadhavan.Venkataraman@Sun.COM 		cl->cl_flags = clflags;
9508048SMadhavan.Venkataraman@Sun.COM 
9519334SMadhavan.Venkataraman@Sun.COM 		if (clflags & CALLOUT_LIST_FLAG_NANO) {
9529334SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl);
9539334SMadhavan.Venkataraman@Sun.COM 		} else {
9549334SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl);
9559334SMadhavan.Venkataraman@Sun.COM 		}
9568048SMadhavan.Venkataraman@Sun.COM 
9578048SMadhavan.Venkataraman@Sun.COM 		/*
9588048SMadhavan.Venkataraman@Sun.COM 		 * This is a new expiration. So, insert it into the heap.
9598048SMadhavan.Venkataraman@Sun.COM 		 * This will also reprogram the cyclic, if the expiration
9608048SMadhavan.Venkataraman@Sun.COM 		 * propagated to the root of the heap.
9618048SMadhavan.Venkataraman@Sun.COM 		 */
9629334SMadhavan.Venkataraman@Sun.COM 		callout_heap_insert(ct, cl);
9639334SMadhavan.Venkataraman@Sun.COM 	} else {
9649334SMadhavan.Venkataraman@Sun.COM 		/*
9659334SMadhavan.Venkataraman@Sun.COM 		 * If the callout list was empty, untimeout_generic() would
9669334SMadhavan.Venkataraman@Sun.COM 		 * have incremented a reap count. Decrement the reap count
9679334SMadhavan.Venkataraman@Sun.COM 		 * as we are going to insert a callout into this list.
9689334SMadhavan.Venkataraman@Sun.COM 		 */
9699334SMadhavan.Venkataraman@Sun.COM 		if (cl->cl_callouts.ch_head == NULL)
9709334SMadhavan.Venkataraman@Sun.COM 			ct->ct_nreap--;
9718048SMadhavan.Venkataraman@Sun.COM 	}
9728048SMadhavan.Venkataraman@Sun.COM 	cp->c_list = cl;
9738048SMadhavan.Venkataraman@Sun.COM 	CALLOUT_APPEND(ct, cp);
9748048SMadhavan.Venkataraman@Sun.COM 
9758048SMadhavan.Venkataraman@Sun.COM 	ct->ct_timeouts++;
9768048SMadhavan.Venkataraman@Sun.COM 	ct->ct_timeouts_pending++;
9778048SMadhavan.Venkataraman@Sun.COM 
9788048SMadhavan.Venkataraman@Sun.COM 	mutex_exit(&ct->ct_mutex);
9798048SMadhavan.Venkataraman@Sun.COM 
9808048SMadhavan.Venkataraman@Sun.COM 	kpreempt_enable();
9818048SMadhavan.Venkataraman@Sun.COM 
9828048SMadhavan.Venkataraman@Sun.COM 	TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT,
9838048SMadhavan.Venkataraman@Sun.COM 	    "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration,
9848048SMadhavan.Venkataraman@Sun.COM 	    cp);
9858048SMadhavan.Venkataraman@Sun.COM 
9868048SMadhavan.Venkataraman@Sun.COM 	return (id);
9878048SMadhavan.Venkataraman@Sun.COM }
9888048SMadhavan.Venkataraman@Sun.COM 
9898048SMadhavan.Venkataraman@Sun.COM timeout_id_t
9908048SMadhavan.Venkataraman@Sun.COM timeout(void (*func)(void *), void *arg, clock_t delta)
9918048SMadhavan.Venkataraman@Sun.COM {
9928048SMadhavan.Venkataraman@Sun.COM 	ulong_t id;
9938048SMadhavan.Venkataraman@Sun.COM 
9948048SMadhavan.Venkataraman@Sun.COM 	/*
9950Sstevel@tonic-gate 	 * Make sure the callout runs at least 1 tick in the future.
9960Sstevel@tonic-gate 	 */
9970Sstevel@tonic-gate 	if (delta <= 0)
9980Sstevel@tonic-gate 		delta = 1;
9998566SMadhavan.Venkataraman@Sun.COM 	else if (delta > callout_max_ticks)
10008566SMadhavan.Venkataraman@Sun.COM 		delta = callout_max_ticks;
10010Sstevel@tonic-gate 
10028048SMadhavan.Venkataraman@Sun.COM 	id =  (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg,
10038048SMadhavan.Venkataraman@Sun.COM 	    TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY);
10040Sstevel@tonic-gate 
10050Sstevel@tonic-gate 	return ((timeout_id_t)id);
10060Sstevel@tonic-gate }
10070Sstevel@tonic-gate 
10088048SMadhavan.Venkataraman@Sun.COM /*
10098048SMadhavan.Venkataraman@Sun.COM  * Convenience function that creates a normal callout with default parameters
10108048SMadhavan.Venkataraman@Sun.COM  * and returns a full ID.
10118048SMadhavan.Venkataraman@Sun.COM  */
10128048SMadhavan.Venkataraman@Sun.COM callout_id_t
10138048SMadhavan.Venkataraman@Sun.COM timeout_default(void (*func)(void *), void *arg, clock_t delta)
10140Sstevel@tonic-gate {
10158048SMadhavan.Venkataraman@Sun.COM 	callout_id_t id;
10160Sstevel@tonic-gate 
10178048SMadhavan.Venkataraman@Sun.COM 	/*
10188048SMadhavan.Venkataraman@Sun.COM 	 * Make sure the callout runs at least 1 tick in the future.
10198048SMadhavan.Venkataraman@Sun.COM 	 */
10208048SMadhavan.Venkataraman@Sun.COM 	if (delta <= 0)
10218048SMadhavan.Venkataraman@Sun.COM 		delta = 1;
10228566SMadhavan.Venkataraman@Sun.COM 	else if (delta > callout_max_ticks)
10238566SMadhavan.Venkataraman@Sun.COM 		delta = callout_max_ticks;
10248048SMadhavan.Venkataraman@Sun.COM 
10258048SMadhavan.Venkataraman@Sun.COM 	id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta),
10268048SMadhavan.Venkataraman@Sun.COM 	    nsec_per_tick, 0);
10278048SMadhavan.Venkataraman@Sun.COM 
10288048SMadhavan.Venkataraman@Sun.COM 	return (id);
10290Sstevel@tonic-gate }
10300Sstevel@tonic-gate 
10310Sstevel@tonic-gate timeout_id_t
10320Sstevel@tonic-gate realtime_timeout(void (*func)(void *), void *arg, clock_t delta)
10330Sstevel@tonic-gate {
10348048SMadhavan.Venkataraman@Sun.COM 	ulong_t id;
10358048SMadhavan.Venkataraman@Sun.COM 
10368048SMadhavan.Venkataraman@Sun.COM 	/*
10378048SMadhavan.Venkataraman@Sun.COM 	 * Make sure the callout runs at least 1 tick in the future.
10388048SMadhavan.Venkataraman@Sun.COM 	 */
10398048SMadhavan.Venkataraman@Sun.COM 	if (delta <= 0)
10408048SMadhavan.Venkataraman@Sun.COM 		delta = 1;
10418566SMadhavan.Venkataraman@Sun.COM 	else if (delta > callout_max_ticks)
10428566SMadhavan.Venkataraman@Sun.COM 		delta = callout_max_ticks;
10438048SMadhavan.Venkataraman@Sun.COM 
10448048SMadhavan.Venkataraman@Sun.COM 	id =  (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg,
10458048SMadhavan.Venkataraman@Sun.COM 	    TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY);
10468048SMadhavan.Venkataraman@Sun.COM 
10478048SMadhavan.Venkataraman@Sun.COM 	return ((timeout_id_t)id);
10480Sstevel@tonic-gate }
10490Sstevel@tonic-gate 
10508048SMadhavan.Venkataraman@Sun.COM /*
10518048SMadhavan.Venkataraman@Sun.COM  * Convenience function that creates a realtime callout with default parameters
10528048SMadhavan.Venkataraman@Sun.COM  * and returns a full ID.
10538048SMadhavan.Venkataraman@Sun.COM  */
10548048SMadhavan.Venkataraman@Sun.COM callout_id_t
10558048SMadhavan.Venkataraman@Sun.COM realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta)
10560Sstevel@tonic-gate {
10578048SMadhavan.Venkataraman@Sun.COM 	callout_id_t id;
10588048SMadhavan.Venkataraman@Sun.COM 
10598048SMadhavan.Venkataraman@Sun.COM 	/*
10608048SMadhavan.Venkataraman@Sun.COM 	 * Make sure the callout runs at least 1 tick in the future.
10618048SMadhavan.Venkataraman@Sun.COM 	 */
10628048SMadhavan.Venkataraman@Sun.COM 	if (delta <= 0)
10638048SMadhavan.Venkataraman@Sun.COM 		delta = 1;
10648566SMadhavan.Venkataraman@Sun.COM 	else if (delta > callout_max_ticks)
10658566SMadhavan.Venkataraman@Sun.COM 		delta = callout_max_ticks;
10668048SMadhavan.Venkataraman@Sun.COM 
10678048SMadhavan.Venkataraman@Sun.COM 	id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta),
10688048SMadhavan.Venkataraman@Sun.COM 	    nsec_per_tick, 0);
10698048SMadhavan.Venkataraman@Sun.COM 
10708048SMadhavan.Venkataraman@Sun.COM 	return (id);
10718048SMadhavan.Venkataraman@Sun.COM }
10728048SMadhavan.Venkataraman@Sun.COM 
10738048SMadhavan.Venkataraman@Sun.COM hrtime_t
10748048SMadhavan.Venkataraman@Sun.COM untimeout_generic(callout_id_t id, int nowait)
10758048SMadhavan.Venkataraman@Sun.COM {
10760Sstevel@tonic-gate 	callout_table_t *ct;
10770Sstevel@tonic-gate 	callout_t *cp;
10780Sstevel@tonic-gate 	callout_id_t xid;
10799334SMadhavan.Venkataraman@Sun.COM 	callout_list_t *cl;
10808048SMadhavan.Venkataraman@Sun.COM 	int hash;
10818048SMadhavan.Venkataraman@Sun.COM 	callout_id_t bogus;
10820Sstevel@tonic-gate 
10838048SMadhavan.Venkataraman@Sun.COM 	ct = &callout_table[CALLOUT_ID_TO_TABLE(id)];
10848048SMadhavan.Venkataraman@Sun.COM 	hash = CALLOUT_IDHASH(id);
10858048SMadhavan.Venkataraman@Sun.COM 
10868048SMadhavan.Venkataraman@Sun.COM 	mutex_enter(&ct->ct_mutex);
10870Sstevel@tonic-gate 
10888048SMadhavan.Venkataraman@Sun.COM 	/*
10898048SMadhavan.Venkataraman@Sun.COM 	 * Search the ID hash table for the callout.
10908048SMadhavan.Venkataraman@Sun.COM 	 */
10918048SMadhavan.Venkataraman@Sun.COM 	for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) {
10920Sstevel@tonic-gate 
10938048SMadhavan.Venkataraman@Sun.COM 		xid = cp->c_xid;
10948048SMadhavan.Venkataraman@Sun.COM 
10958048SMadhavan.Venkataraman@Sun.COM 		/*
10968048SMadhavan.Venkataraman@Sun.COM 		 * Match the ID and generation number.
10978048SMadhavan.Venkataraman@Sun.COM 		 */
10988048SMadhavan.Venkataraman@Sun.COM 		if ((xid & CALLOUT_ID_MASK) != id)
10998048SMadhavan.Venkataraman@Sun.COM 			continue;
11000Sstevel@tonic-gate 
11018048SMadhavan.Venkataraman@Sun.COM 		if ((xid & CALLOUT_EXECUTING) == 0) {
11028048SMadhavan.Venkataraman@Sun.COM 			hrtime_t expiration;
11030Sstevel@tonic-gate 
11048048SMadhavan.Venkataraman@Sun.COM 			/*
11058048SMadhavan.Venkataraman@Sun.COM 			 * Delete the callout. If the callout list becomes
11068048SMadhavan.Venkataraman@Sun.COM 			 * NULL, we don't remove it from the table. This is
11078048SMadhavan.Venkataraman@Sun.COM 			 * so it can be reused. If the empty callout list
11088048SMadhavan.Venkataraman@Sun.COM 			 * corresponds to the top of the the callout heap, we
11098048SMadhavan.Venkataraman@Sun.COM 			 * don't reprogram the table cyclic here. This is in
11108048SMadhavan.Venkataraman@Sun.COM 			 * order to avoid lots of X-calls to the CPU associated
11118048SMadhavan.Venkataraman@Sun.COM 			 * with the callout table.
11128048SMadhavan.Venkataraman@Sun.COM 			 */
11139334SMadhavan.Venkataraman@Sun.COM 			cl = cp->c_list;
11149334SMadhavan.Venkataraman@Sun.COM 			expiration = cl->cl_expiration;
11158048SMadhavan.Venkataraman@Sun.COM 			CALLOUT_DELETE(ct, cp);
11168048SMadhavan.Venkataraman@Sun.COM 			cp->c_idnext = ct->ct_free;
11178048SMadhavan.Venkataraman@Sun.COM 			ct->ct_free = cp;
11189334SMadhavan.Venkataraman@Sun.COM 			cp->c_xid |= CALLOUT_FREE;
11198048SMadhavan.Venkataraman@Sun.COM 			ct->ct_untimeouts_unexpired++;
11208048SMadhavan.Venkataraman@Sun.COM 			ct->ct_timeouts_pending--;
11219334SMadhavan.Venkataraman@Sun.COM 
11229334SMadhavan.Venkataraman@Sun.COM 			/*
11239334SMadhavan.Venkataraman@Sun.COM 			 * If the callout list has become empty, it needs
11249334SMadhavan.Venkataraman@Sun.COM 			 * to be cleaned along with its heap entry. Increment
11259334SMadhavan.Venkataraman@Sun.COM 			 * a reap count.
11269334SMadhavan.Venkataraman@Sun.COM 			 */
11279334SMadhavan.Venkataraman@Sun.COM 			if (cl->cl_callouts.ch_head == NULL)
11289334SMadhavan.Venkataraman@Sun.COM 				ct->ct_nreap++;
11298048SMadhavan.Venkataraman@Sun.COM 			mutex_exit(&ct->ct_mutex);
11306422Sqiao 
11318048SMadhavan.Venkataraman@Sun.COM 			expiration -= gethrtime();
11320Sstevel@tonic-gate 			TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT,
11338048SMadhavan.Venkataraman@Sun.COM 			    "untimeout:ID %lx hrtime left %llx", id,
11348048SMadhavan.Venkataraman@Sun.COM 			    expiration);
11358048SMadhavan.Venkataraman@Sun.COM 			return (expiration < 0 ? 0 : expiration);
11360Sstevel@tonic-gate 		}
11370Sstevel@tonic-gate 
11388048SMadhavan.Venkataraman@Sun.COM 		ct->ct_untimeouts_executing++;
11390Sstevel@tonic-gate 		/*
11400Sstevel@tonic-gate 		 * The callout we want to delete is currently executing.
11410Sstevel@tonic-gate 		 * The DDI states that we must wait until the callout
11429039SMadhavan.Venkataraman@Sun.COM 		 * completes before returning, so we block on c_done until the
11438048SMadhavan.Venkataraman@Sun.COM 		 * callout ID changes (to the old ID if it's on the freelist,
11440Sstevel@tonic-gate 		 * or to a new callout ID if it's in use).  This implicitly
11450Sstevel@tonic-gate 		 * assumes that callout structures are persistent (they are).
11460Sstevel@tonic-gate 		 */
11479039SMadhavan.Venkataraman@Sun.COM 		if (cp->c_executor == curthread) {
11480Sstevel@tonic-gate 			/*
11490Sstevel@tonic-gate 			 * The timeout handler called untimeout() on itself.
11500Sstevel@tonic-gate 			 * Stupid, but legal.  We can't wait for the timeout
11510Sstevel@tonic-gate 			 * to complete without deadlocking, so we just return.
11520Sstevel@tonic-gate 			 */
11538048SMadhavan.Venkataraman@Sun.COM 			mutex_exit(&ct->ct_mutex);
11540Sstevel@tonic-gate 			TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF,
11550Sstevel@tonic-gate 			    "untimeout_self:ID %x", id);
11560Sstevel@tonic-gate 			return (-1);
11570Sstevel@tonic-gate 		}
11588048SMadhavan.Venkataraman@Sun.COM 		if (nowait == 0) {
11598048SMadhavan.Venkataraman@Sun.COM 			/*
11608048SMadhavan.Venkataraman@Sun.COM 			 * We need to wait. Indicate that we are waiting by
11619039SMadhavan.Venkataraman@Sun.COM 			 * incrementing c_waiting. This prevents the executor
11629039SMadhavan.Venkataraman@Sun.COM 			 * from doing a wakeup on c_done if there are no
11638048SMadhavan.Venkataraman@Sun.COM 			 * waiters.
11648048SMadhavan.Venkataraman@Sun.COM 			 */
11658048SMadhavan.Venkataraman@Sun.COM 			while (cp->c_xid == xid) {
11669039SMadhavan.Venkataraman@Sun.COM 				cp->c_waiting = 1;
11679039SMadhavan.Venkataraman@Sun.COM 				cv_wait(&cp->c_done, &ct->ct_mutex);
11688048SMadhavan.Venkataraman@Sun.COM 			}
11698048SMadhavan.Venkataraman@Sun.COM 		}
11708048SMadhavan.Venkataraman@Sun.COM 		mutex_exit(&ct->ct_mutex);
11710Sstevel@tonic-gate 		TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING,
11720Sstevel@tonic-gate 		    "untimeout_executing:ID %lx", id);
11730Sstevel@tonic-gate 		return (-1);
11740Sstevel@tonic-gate 	}
11758048SMadhavan.Venkataraman@Sun.COM 	ct->ct_untimeouts_expired++;
11760Sstevel@tonic-gate 
11778048SMadhavan.Venkataraman@Sun.COM 	mutex_exit(&ct->ct_mutex);
11780Sstevel@tonic-gate 	TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID,
11790Sstevel@tonic-gate 	    "untimeout_bogus_id:ID %lx", id);
11800Sstevel@tonic-gate 
11810Sstevel@tonic-gate 	/*
11820Sstevel@tonic-gate 	 * We didn't find the specified callout ID.  This means either
11830Sstevel@tonic-gate 	 * (1) the callout already fired, or (2) the caller passed us
11840Sstevel@tonic-gate 	 * a bogus value.  Perform a sanity check to detect case (2).
11850Sstevel@tonic-gate 	 */
11869334SMadhavan.Venkataraman@Sun.COM 	bogus = (CALLOUT_ID_FLAGS | CALLOUT_COUNTER_HIGH);
11878048SMadhavan.Venkataraman@Sun.COM 	if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0))
11888048SMadhavan.Venkataraman@Sun.COM 		panic("untimeout: impossible timeout id %llx",
11898048SMadhavan.Venkataraman@Sun.COM 		    (unsigned long long)id);
11900Sstevel@tonic-gate 
11910Sstevel@tonic-gate 	return (-1);
11920Sstevel@tonic-gate }
11930Sstevel@tonic-gate 
11948048SMadhavan.Venkataraman@Sun.COM clock_t
11958048SMadhavan.Venkataraman@Sun.COM untimeout(timeout_id_t id_arg)
11968048SMadhavan.Venkataraman@Sun.COM {
11978048SMadhavan.Venkataraman@Sun.COM 	hrtime_t hleft;
11988048SMadhavan.Venkataraman@Sun.COM 	clock_t tleft;
11998048SMadhavan.Venkataraman@Sun.COM 	callout_id_t id;
12008048SMadhavan.Venkataraman@Sun.COM 
12018048SMadhavan.Venkataraman@Sun.COM 	id = (ulong_t)id_arg;
12028048SMadhavan.Venkataraman@Sun.COM 	hleft = untimeout_generic(id, 0);
12038048SMadhavan.Venkataraman@Sun.COM 	if (hleft < 0)
12048048SMadhavan.Venkataraman@Sun.COM 		tleft = -1;
12058048SMadhavan.Venkataraman@Sun.COM 	else if (hleft == 0)
12068048SMadhavan.Venkataraman@Sun.COM 		tleft = 0;
12078048SMadhavan.Venkataraman@Sun.COM 	else
12088048SMadhavan.Venkataraman@Sun.COM 		tleft = NSEC_TO_TICK(hleft);
12098048SMadhavan.Venkataraman@Sun.COM 
12108048SMadhavan.Venkataraman@Sun.COM 	return (tleft);
12118048SMadhavan.Venkataraman@Sun.COM }
12128048SMadhavan.Venkataraman@Sun.COM 
12130Sstevel@tonic-gate /*
12148048SMadhavan.Venkataraman@Sun.COM  * Convenience function to untimeout a timeout with a full ID with default
12158048SMadhavan.Venkataraman@Sun.COM  * parameters.
12168048SMadhavan.Venkataraman@Sun.COM  */
12178048SMadhavan.Venkataraman@Sun.COM clock_t
12188048SMadhavan.Venkataraman@Sun.COM untimeout_default(callout_id_t id, int nowait)
12198048SMadhavan.Venkataraman@Sun.COM {
12208048SMadhavan.Venkataraman@Sun.COM 	hrtime_t hleft;
12218048SMadhavan.Venkataraman@Sun.COM 	clock_t tleft;
12228048SMadhavan.Venkataraman@Sun.COM 
12238048SMadhavan.Venkataraman@Sun.COM 	hleft = untimeout_generic(id, nowait);
12248048SMadhavan.Venkataraman@Sun.COM 	if (hleft < 0)
12258048SMadhavan.Venkataraman@Sun.COM 		tleft = -1;
12268048SMadhavan.Venkataraman@Sun.COM 	else if (hleft == 0)
12278048SMadhavan.Venkataraman@Sun.COM 		tleft = 0;
12288048SMadhavan.Venkataraman@Sun.COM 	else
12298048SMadhavan.Venkataraman@Sun.COM 		tleft = NSEC_TO_TICK(hleft);
12308048SMadhavan.Venkataraman@Sun.COM 
12318048SMadhavan.Venkataraman@Sun.COM 	return (tleft);
12328048SMadhavan.Venkataraman@Sun.COM }
12338048SMadhavan.Venkataraman@Sun.COM 
12348048SMadhavan.Venkataraman@Sun.COM /*
12358048SMadhavan.Venkataraman@Sun.COM  * Expire all the callouts queued in the specified callout list.
12360Sstevel@tonic-gate  */
12370Sstevel@tonic-gate static void
12388048SMadhavan.Venkataraman@Sun.COM callout_list_expire(callout_table_t *ct, callout_list_t *cl)
12390Sstevel@tonic-gate {
12409039SMadhavan.Venkataraman@Sun.COM 	callout_t *cp, *cnext;
12418048SMadhavan.Venkataraman@Sun.COM 
12428048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
12438048SMadhavan.Venkataraman@Sun.COM 	ASSERT(cl != NULL);
12448048SMadhavan.Venkataraman@Sun.COM 
12459039SMadhavan.Venkataraman@Sun.COM 	for (cp = cl->cl_callouts.ch_head; cp != NULL; cp = cnext) {
12469039SMadhavan.Venkataraman@Sun.COM 		/*
12479039SMadhavan.Venkataraman@Sun.COM 		 * Multiple executor threads could be running at the same
12489039SMadhavan.Venkataraman@Sun.COM 		 * time. If this callout is already being executed,
12499039SMadhavan.Venkataraman@Sun.COM 		 * go on to the next one.
12509039SMadhavan.Venkataraman@Sun.COM 		 */
12519039SMadhavan.Venkataraman@Sun.COM 		if (cp->c_xid & CALLOUT_EXECUTING) {
12529039SMadhavan.Venkataraman@Sun.COM 			cnext = cp->c_clnext;
12539039SMadhavan.Venkataraman@Sun.COM 			continue;
12549039SMadhavan.Venkataraman@Sun.COM 		}
12558048SMadhavan.Venkataraman@Sun.COM 
12568048SMadhavan.Venkataraman@Sun.COM 		/*
12578048SMadhavan.Venkataraman@Sun.COM 		 * Indicate to untimeout() that a callout is
12588048SMadhavan.Venkataraman@Sun.COM 		 * being expired by the executor.
12598048SMadhavan.Venkataraman@Sun.COM 		 */
12608048SMadhavan.Venkataraman@Sun.COM 		cp->c_xid |= CALLOUT_EXECUTING;
12619039SMadhavan.Venkataraman@Sun.COM 		cp->c_executor = curthread;
12628048SMadhavan.Venkataraman@Sun.COM 		mutex_exit(&ct->ct_mutex);
12638048SMadhavan.Venkataraman@Sun.COM 
12648048SMadhavan.Venkataraman@Sun.COM 		DTRACE_PROBE1(callout__start, callout_t *, cp);
12658048SMadhavan.Venkataraman@Sun.COM 		(*cp->c_func)(cp->c_arg);
12668048SMadhavan.Venkataraman@Sun.COM 		DTRACE_PROBE1(callout__end, callout_t *, cp);
12670Sstevel@tonic-gate 
12688048SMadhavan.Venkataraman@Sun.COM 		mutex_enter(&ct->ct_mutex);
12698048SMadhavan.Venkataraman@Sun.COM 
12708048SMadhavan.Venkataraman@Sun.COM 		ct->ct_expirations++;
12718048SMadhavan.Venkataraman@Sun.COM 		ct->ct_timeouts_pending--;
12728048SMadhavan.Venkataraman@Sun.COM 		/*
12739039SMadhavan.Venkataraman@Sun.COM 		 * Indicate completion for c_done.
12748048SMadhavan.Venkataraman@Sun.COM 		 */
12758048SMadhavan.Venkataraman@Sun.COM 		cp->c_xid &= ~CALLOUT_EXECUTING;
12769039SMadhavan.Venkataraman@Sun.COM 		cp->c_executor = NULL;
12779039SMadhavan.Venkataraman@Sun.COM 		cnext = cp->c_clnext;
12788048SMadhavan.Venkataraman@Sun.COM 
12798048SMadhavan.Venkataraman@Sun.COM 		/*
12808048SMadhavan.Venkataraman@Sun.COM 		 * Delete callout from ID hash table and the callout
12818048SMadhavan.Venkataraman@Sun.COM 		 * list, return to freelist, and tell any untimeout() that
12828048SMadhavan.Venkataraman@Sun.COM 		 * cares that we're done.
12838048SMadhavan.Venkataraman@Sun.COM 		 */
12848048SMadhavan.Venkataraman@Sun.COM 		CALLOUT_DELETE(ct, cp);
12858048SMadhavan.Venkataraman@Sun.COM 		cp->c_idnext = ct->ct_free;
12868048SMadhavan.Venkataraman@Sun.COM 		ct->ct_free = cp;
12879334SMadhavan.Venkataraman@Sun.COM 		cp->c_xid |= CALLOUT_FREE;
12880Sstevel@tonic-gate 
12899039SMadhavan.Venkataraman@Sun.COM 		if (cp->c_waiting) {
12909039SMadhavan.Venkataraman@Sun.COM 			cp->c_waiting = 0;
12919039SMadhavan.Venkataraman@Sun.COM 			cv_broadcast(&cp->c_done);
12928048SMadhavan.Venkataraman@Sun.COM 		}
12938048SMadhavan.Venkataraman@Sun.COM 	}
12948048SMadhavan.Venkataraman@Sun.COM }
12958048SMadhavan.Venkataraman@Sun.COM 
12968048SMadhavan.Venkataraman@Sun.COM /*
12978048SMadhavan.Venkataraman@Sun.COM  * Execute all expired callout lists for a callout table.
12988048SMadhavan.Venkataraman@Sun.COM  */
12998048SMadhavan.Venkataraman@Sun.COM static void
13008048SMadhavan.Venkataraman@Sun.COM callout_expire(callout_table_t *ct)
13018048SMadhavan.Venkataraman@Sun.COM {
13028048SMadhavan.Venkataraman@Sun.COM 	callout_list_t *cl, *clnext;
13038048SMadhavan.Venkataraman@Sun.COM 
13048048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
13050Sstevel@tonic-gate 
13068048SMadhavan.Venkataraman@Sun.COM 	for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) {
13078048SMadhavan.Venkataraman@Sun.COM 		/*
13088048SMadhavan.Venkataraman@Sun.COM 		 * Expire all the callouts in this callout list.
13098048SMadhavan.Venkataraman@Sun.COM 		 */
13108048SMadhavan.Venkataraman@Sun.COM 		callout_list_expire(ct, cl);
13118048SMadhavan.Venkataraman@Sun.COM 
13128048SMadhavan.Venkataraman@Sun.COM 		clnext = cl->cl_next;
13139039SMadhavan.Venkataraman@Sun.COM 		if (cl->cl_callouts.ch_head == NULL) {
13149039SMadhavan.Venkataraman@Sun.COM 			/*
13159039SMadhavan.Venkataraman@Sun.COM 			 * Free the callout list.
13169039SMadhavan.Venkataraman@Sun.COM 			 */
13179039SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_DELETE(ct->ct_expired, cl);
13189334SMadhavan.Venkataraman@Sun.COM 			CALLOUT_LIST_FREE(ct, cl);
13199039SMadhavan.Venkataraman@Sun.COM 		}
13200Sstevel@tonic-gate 	}
13210Sstevel@tonic-gate }
13220Sstevel@tonic-gate 
13230Sstevel@tonic-gate /*
13248048SMadhavan.Venkataraman@Sun.COM  * The cyclic handlers below process callouts in two steps:
13258048SMadhavan.Venkataraman@Sun.COM  *
13268048SMadhavan.Venkataraman@Sun.COM  *	1. Find all expired callout lists and queue them in a separate
13278048SMadhavan.Venkataraman@Sun.COM  *	   list of expired callouts.
13288048SMadhavan.Venkataraman@Sun.COM  *	2. Execute the expired callout lists.
13298048SMadhavan.Venkataraman@Sun.COM  *
13308048SMadhavan.Venkataraman@Sun.COM  * This is done for two reasons:
13318048SMadhavan.Venkataraman@Sun.COM  *
13328048SMadhavan.Venkataraman@Sun.COM  *	1. We want to quickly find the next earliest expiration to program
13338048SMadhavan.Venkataraman@Sun.COM  *	   the cyclic to and reprogram it. We can do this right at the end
13348048SMadhavan.Venkataraman@Sun.COM  *	   of step 1.
13358048SMadhavan.Venkataraman@Sun.COM  *	2. The realtime cyclic handler expires callouts in place. However,
13368048SMadhavan.Venkataraman@Sun.COM  *	   for normal callouts, callouts are expired by a taskq thread.
13378048SMadhavan.Venkataraman@Sun.COM  *	   So, it is simpler and more robust to have the taskq thread just
13388048SMadhavan.Venkataraman@Sun.COM  *	   do step 2.
13390Sstevel@tonic-gate  */
13406422Sqiao 
13418048SMadhavan.Venkataraman@Sun.COM /*
13428048SMadhavan.Venkataraman@Sun.COM  * Realtime callout cyclic handler.
13438048SMadhavan.Venkataraman@Sun.COM  */
13448048SMadhavan.Venkataraman@Sun.COM void
13458048SMadhavan.Venkataraman@Sun.COM callout_realtime(callout_table_t *ct)
13468048SMadhavan.Venkataraman@Sun.COM {
13478048SMadhavan.Venkataraman@Sun.COM 	mutex_enter(&ct->ct_mutex);
13488048SMadhavan.Venkataraman@Sun.COM 	callout_heap_delete(ct);
13498048SMadhavan.Venkataraman@Sun.COM 	callout_expire(ct);
13508048SMadhavan.Venkataraman@Sun.COM 	mutex_exit(&ct->ct_mutex);
13518048SMadhavan.Venkataraman@Sun.COM }
13528048SMadhavan.Venkataraman@Sun.COM 
13538048SMadhavan.Venkataraman@Sun.COM void
13548048SMadhavan.Venkataraman@Sun.COM callout_execute(callout_table_t *ct)
13558048SMadhavan.Venkataraman@Sun.COM {
13568048SMadhavan.Venkataraman@Sun.COM 	mutex_enter(&ct->ct_mutex);
13578048SMadhavan.Venkataraman@Sun.COM 	callout_expire(ct);
13588048SMadhavan.Venkataraman@Sun.COM 	mutex_exit(&ct->ct_mutex);
13598048SMadhavan.Venkataraman@Sun.COM }
13608048SMadhavan.Venkataraman@Sun.COM 
13618048SMadhavan.Venkataraman@Sun.COM /*
13628048SMadhavan.Venkataraman@Sun.COM  * Normal callout cyclic handler.
13638048SMadhavan.Venkataraman@Sun.COM  */
13648048SMadhavan.Venkataraman@Sun.COM void
13658048SMadhavan.Venkataraman@Sun.COM callout_normal(callout_table_t *ct)
13668048SMadhavan.Venkataraman@Sun.COM {
13679039SMadhavan.Venkataraman@Sun.COM 	int i, exec;
13688048SMadhavan.Venkataraman@Sun.COM 
13698048SMadhavan.Venkataraman@Sun.COM 	mutex_enter(&ct->ct_mutex);
13708048SMadhavan.Venkataraman@Sun.COM 	callout_heap_delete(ct);
13719039SMadhavan.Venkataraman@Sun.COM 	CALLOUT_EXEC_COMPUTE(ct, exec);
13728048SMadhavan.Venkataraman@Sun.COM 	mutex_exit(&ct->ct_mutex);
13738048SMadhavan.Venkataraman@Sun.COM 
13749039SMadhavan.Venkataraman@Sun.COM 	for (i = 0; i < exec; i++) {
13758048SMadhavan.Venkataraman@Sun.COM 		ASSERT(ct->ct_taskq != NULL);
13768048SMadhavan.Venkataraman@Sun.COM 		(void) taskq_dispatch(ct->ct_taskq,
13778048SMadhavan.Venkataraman@Sun.COM 		    (task_func_t *)callout_execute, ct, TQ_NOSLEEP);
13780Sstevel@tonic-gate 	}
13790Sstevel@tonic-gate }
13800Sstevel@tonic-gate 
13810Sstevel@tonic-gate /*
13828048SMadhavan.Venkataraman@Sun.COM  * Suspend callout processing.
13830Sstevel@tonic-gate  */
13848048SMadhavan.Venkataraman@Sun.COM static void
13858048SMadhavan.Venkataraman@Sun.COM callout_suspend(void)
13860Sstevel@tonic-gate {
13878048SMadhavan.Venkataraman@Sun.COM 	int t, f;
13888048SMadhavan.Venkataraman@Sun.COM 	callout_table_t *ct;
13898048SMadhavan.Venkataraman@Sun.COM 
13908048SMadhavan.Venkataraman@Sun.COM 	/*
13918048SMadhavan.Venkataraman@Sun.COM 	 * Traverse every callout table in the system and suspend callout
13928048SMadhavan.Venkataraman@Sun.COM 	 * processing.
13938048SMadhavan.Venkataraman@Sun.COM 	 *
13948048SMadhavan.Venkataraman@Sun.COM 	 * We need to suspend all the tables (including the inactive ones)
13958048SMadhavan.Venkataraman@Sun.COM 	 * so that if a table is made active while the suspend is still on,
13968048SMadhavan.Venkataraman@Sun.COM 	 * the table remains suspended.
13978048SMadhavan.Venkataraman@Sun.COM 	 */
13988048SMadhavan.Venkataraman@Sun.COM 	for (f = 0; f < max_ncpus; f++) {
13998048SMadhavan.Venkataraman@Sun.COM 		for (t = 0; t < CALLOUT_NTYPES; t++) {
14008048SMadhavan.Venkataraman@Sun.COM 			ct = &callout_table[CALLOUT_TABLE(t, f)];
14018048SMadhavan.Venkataraman@Sun.COM 
14028048SMadhavan.Venkataraman@Sun.COM 			mutex_enter(&ct->ct_mutex);
14038566SMadhavan.Venkataraman@Sun.COM 			ct->ct_suspend++;
14048048SMadhavan.Venkataraman@Sun.COM 			if (ct->ct_cyclic == CYCLIC_NONE) {
14058048SMadhavan.Venkataraman@Sun.COM 				mutex_exit(&ct->ct_mutex);
14068048SMadhavan.Venkataraman@Sun.COM 				continue;
14078048SMadhavan.Venkataraman@Sun.COM 			}
14088566SMadhavan.Venkataraman@Sun.COM 			if (ct->ct_suspend == 1)
14098566SMadhavan.Venkataraman@Sun.COM 				(void) cyclic_reprogram(ct->ct_cyclic,
14108566SMadhavan.Venkataraman@Sun.COM 				    CY_INFINITY);
14118048SMadhavan.Venkataraman@Sun.COM 			mutex_exit(&ct->ct_mutex);
14128048SMadhavan.Venkataraman@Sun.COM 		}
14138048SMadhavan.Venkataraman@Sun.COM 	}
14148048SMadhavan.Venkataraman@Sun.COM }
14158048SMadhavan.Venkataraman@Sun.COM 
14168048SMadhavan.Venkataraman@Sun.COM /*
14178048SMadhavan.Venkataraman@Sun.COM  * Resume callout processing.
14188048SMadhavan.Venkataraman@Sun.COM  */
14198048SMadhavan.Venkataraman@Sun.COM static void
14209334SMadhavan.Venkataraman@Sun.COM callout_resume(hrtime_t delta, int timechange)
14218048SMadhavan.Venkataraman@Sun.COM {
14228048SMadhavan.Venkataraman@Sun.COM 	hrtime_t exp;
14238048SMadhavan.Venkataraman@Sun.COM 	int t, f;
14248048SMadhavan.Venkataraman@Sun.COM 	callout_table_t *ct;
14250Sstevel@tonic-gate 
14268048SMadhavan.Venkataraman@Sun.COM 	/*
14278048SMadhavan.Venkataraman@Sun.COM 	 * Traverse every callout table in the system and resume callout
14288048SMadhavan.Venkataraman@Sun.COM 	 * processing. For active tables, perform any hrtime adjustments
14298048SMadhavan.Venkataraman@Sun.COM 	 * necessary.
14308048SMadhavan.Venkataraman@Sun.COM 	 */
14318048SMadhavan.Venkataraman@Sun.COM 	for (f = 0; f < max_ncpus; f++) {
14328048SMadhavan.Venkataraman@Sun.COM 		for (t = 0; t < CALLOUT_NTYPES; t++) {
14338048SMadhavan.Venkataraman@Sun.COM 			ct = &callout_table[CALLOUT_TABLE(t, f)];
14348048SMadhavan.Venkataraman@Sun.COM 
14358048SMadhavan.Venkataraman@Sun.COM 			mutex_enter(&ct->ct_mutex);
14368048SMadhavan.Venkataraman@Sun.COM 			if (ct->ct_cyclic == CYCLIC_NONE) {
14378566SMadhavan.Venkataraman@Sun.COM 				ct->ct_suspend--;
14388048SMadhavan.Venkataraman@Sun.COM 				mutex_exit(&ct->ct_mutex);
14398048SMadhavan.Venkataraman@Sun.COM 				continue;
14408048SMadhavan.Venkataraman@Sun.COM 			}
14418048SMadhavan.Venkataraman@Sun.COM 
14429334SMadhavan.Venkataraman@Sun.COM 			/*
14439334SMadhavan.Venkataraman@Sun.COM 			 * If a delta is specified, adjust the expirations in
14449334SMadhavan.Venkataraman@Sun.COM 			 * the heap by delta. Also, if the caller indicates
14459334SMadhavan.Venkataraman@Sun.COM 			 * a timechange, process that. This step also cleans
14469334SMadhavan.Venkataraman@Sun.COM 			 * out any empty callout lists that might happen to
14479334SMadhavan.Venkataraman@Sun.COM 			 * be there.
14489334SMadhavan.Venkataraman@Sun.COM 			 */
14499334SMadhavan.Venkataraman@Sun.COM 			(void) callout_heap_process(ct, delta, timechange);
14508048SMadhavan.Venkataraman@Sun.COM 
14518566SMadhavan.Venkataraman@Sun.COM 			ct->ct_suspend--;
14528566SMadhavan.Venkataraman@Sun.COM 			if (ct->ct_suspend == 0) {
14538566SMadhavan.Venkataraman@Sun.COM 				/*
14548566SMadhavan.Venkataraman@Sun.COM 				 * If the expired list is non-empty, then have
14558566SMadhavan.Venkataraman@Sun.COM 				 * the cyclic expire immediately. Else, program
14568566SMadhavan.Venkataraman@Sun.COM 				 * the cyclic based on the heap.
14578566SMadhavan.Venkataraman@Sun.COM 				 */
14588566SMadhavan.Venkataraman@Sun.COM 				if (ct->ct_expired.ch_head != NULL)
14598566SMadhavan.Venkataraman@Sun.COM 					exp = gethrtime();
14608566SMadhavan.Venkataraman@Sun.COM 				else if (ct->ct_heap_num > 0)
14619334SMadhavan.Venkataraman@Sun.COM 					exp = ct->ct_heap[0].ch_expiration;
14628566SMadhavan.Venkataraman@Sun.COM 				else
14638566SMadhavan.Venkataraman@Sun.COM 					exp = 0;
14648566SMadhavan.Venkataraman@Sun.COM 				if (exp != 0)
14658566SMadhavan.Venkataraman@Sun.COM 					(void) cyclic_reprogram(ct->ct_cyclic,
14668566SMadhavan.Venkataraman@Sun.COM 					    exp);
14678566SMadhavan.Venkataraman@Sun.COM 			}
14689334SMadhavan.Venkataraman@Sun.COM 
14698048SMadhavan.Venkataraman@Sun.COM 			mutex_exit(&ct->ct_mutex);
14708048SMadhavan.Venkataraman@Sun.COM 		}
14718048SMadhavan.Venkataraman@Sun.COM 	}
14720Sstevel@tonic-gate }
14730Sstevel@tonic-gate 
14740Sstevel@tonic-gate /*
14750Sstevel@tonic-gate  * Callback handler used by CPR to stop and resume callouts.
14769334SMadhavan.Venkataraman@Sun.COM  * The cyclic subsystem saves and restores hrtime during CPR.
14779334SMadhavan.Venkataraman@Sun.COM  * That is why callout_resume() is called with a 0 delta.
14789334SMadhavan.Venkataraman@Sun.COM  * Although hrtime is the same, hrestime (system time) has
14799334SMadhavan.Venkataraman@Sun.COM  * progressed during CPR. So, we have to indicate a time change
14809334SMadhavan.Venkataraman@Sun.COM  * to expire the absolute hrestime timers.
14810Sstevel@tonic-gate  */
14820Sstevel@tonic-gate /*ARGSUSED*/
14830Sstevel@tonic-gate static boolean_t
14840Sstevel@tonic-gate callout_cpr_callb(void *arg, int code)
14850Sstevel@tonic-gate {
14868048SMadhavan.Venkataraman@Sun.COM 	if (code == CB_CODE_CPR_CHKPT)
14878048SMadhavan.Venkataraman@Sun.COM 		callout_suspend();
14888048SMadhavan.Venkataraman@Sun.COM 	else
14899334SMadhavan.Venkataraman@Sun.COM 		callout_resume(0, 1);
14908048SMadhavan.Venkataraman@Sun.COM 
14918048SMadhavan.Venkataraman@Sun.COM 	return (B_TRUE);
14928048SMadhavan.Venkataraman@Sun.COM }
14938048SMadhavan.Venkataraman@Sun.COM 
14948048SMadhavan.Venkataraman@Sun.COM /*
14958048SMadhavan.Venkataraman@Sun.COM  * Callback handler invoked when the debugger is entered or exited.
14968048SMadhavan.Venkataraman@Sun.COM  */
14978048SMadhavan.Venkataraman@Sun.COM /*ARGSUSED*/
14988048SMadhavan.Venkataraman@Sun.COM static boolean_t
14998048SMadhavan.Venkataraman@Sun.COM callout_debug_callb(void *arg, int code)
15008048SMadhavan.Venkataraman@Sun.COM {
15018048SMadhavan.Venkataraman@Sun.COM 	hrtime_t delta;
15028048SMadhavan.Venkataraman@Sun.COM 
15038048SMadhavan.Venkataraman@Sun.COM 	/*
15048048SMadhavan.Venkataraman@Sun.COM 	 * When the system enters the debugger. make a note of the hrtime.
15058048SMadhavan.Venkataraman@Sun.COM 	 * When it is resumed, compute how long the system was in the
15068048SMadhavan.Venkataraman@Sun.COM 	 * debugger. This interval should not be counted for callouts.
15078048SMadhavan.Venkataraman@Sun.COM 	 */
15088048SMadhavan.Venkataraman@Sun.COM 	if (code == 0) {
15098048SMadhavan.Venkataraman@Sun.COM 		callout_suspend();
15108048SMadhavan.Venkataraman@Sun.COM 		callout_debug_hrtime = gethrtime();
15118048SMadhavan.Venkataraman@Sun.COM 	} else {
15128048SMadhavan.Venkataraman@Sun.COM 		delta = gethrtime() - callout_debug_hrtime;
15139334SMadhavan.Venkataraman@Sun.COM 		callout_resume(delta, 0);
15148048SMadhavan.Venkataraman@Sun.COM 	}
15158048SMadhavan.Venkataraman@Sun.COM 
15160Sstevel@tonic-gate 	return (B_TRUE);
15170Sstevel@tonic-gate }
15180Sstevel@tonic-gate 
15190Sstevel@tonic-gate /*
15209039SMadhavan.Venkataraman@Sun.COM  * Move the absolute hrestime callouts to the expired list. Then program the
15219039SMadhavan.Venkataraman@Sun.COM  * table's cyclic to expire immediately so that the callouts can be executed
15228048SMadhavan.Venkataraman@Sun.COM  * immediately.
15238048SMadhavan.Venkataraman@Sun.COM  */
15248048SMadhavan.Venkataraman@Sun.COM static void
15258048SMadhavan.Venkataraman@Sun.COM callout_hrestime_one(callout_table_t *ct)
15268048SMadhavan.Venkataraman@Sun.COM {
15279334SMadhavan.Venkataraman@Sun.COM 	hrtime_t expiration;
15288048SMadhavan.Venkataraman@Sun.COM 
15298048SMadhavan.Venkataraman@Sun.COM 	mutex_enter(&ct->ct_mutex);
15308048SMadhavan.Venkataraman@Sun.COM 	if (ct->ct_heap_num == 0) {
15318048SMadhavan.Venkataraman@Sun.COM 		mutex_exit(&ct->ct_mutex);
15328048SMadhavan.Venkataraman@Sun.COM 		return;
15338048SMadhavan.Venkataraman@Sun.COM 	}
15348048SMadhavan.Venkataraman@Sun.COM 
15359334SMadhavan.Venkataraman@Sun.COM 	/*
15369334SMadhavan.Venkataraman@Sun.COM 	 * Walk the heap and process all the absolute hrestime entries.
15379334SMadhavan.Venkataraman@Sun.COM 	 */
15389334SMadhavan.Venkataraman@Sun.COM 	expiration = callout_heap_process(ct, 0, 1);
15398048SMadhavan.Venkataraman@Sun.COM 
15409334SMadhavan.Venkataraman@Sun.COM 	if ((expiration != 0) && (ct->ct_suspend == 0))
15419334SMadhavan.Venkataraman@Sun.COM 		(void) cyclic_reprogram(ct->ct_cyclic, expiration);
15429039SMadhavan.Venkataraman@Sun.COM 
15438048SMadhavan.Venkataraman@Sun.COM 	mutex_exit(&ct->ct_mutex);
15448048SMadhavan.Venkataraman@Sun.COM }
15458048SMadhavan.Venkataraman@Sun.COM 
15468048SMadhavan.Venkataraman@Sun.COM /*
15478048SMadhavan.Venkataraman@Sun.COM  * This function is called whenever system time (hrestime) is changed
15488048SMadhavan.Venkataraman@Sun.COM  * explicitly. All the HRESTIME callouts must be expired at once.
15498048SMadhavan.Venkataraman@Sun.COM  */
15508048SMadhavan.Venkataraman@Sun.COM /*ARGSUSED*/
15518048SMadhavan.Venkataraman@Sun.COM void
15528048SMadhavan.Venkataraman@Sun.COM callout_hrestime(void)
15538048SMadhavan.Venkataraman@Sun.COM {
15548048SMadhavan.Venkataraman@Sun.COM 	int t, f;
15558048SMadhavan.Venkataraman@Sun.COM 	callout_table_t *ct;
15568048SMadhavan.Venkataraman@Sun.COM 
15578048SMadhavan.Venkataraman@Sun.COM 	/*
15588048SMadhavan.Venkataraman@Sun.COM 	 * Traverse every callout table in the system and process the hrestime
15598048SMadhavan.Venkataraman@Sun.COM 	 * callouts therein.
15608048SMadhavan.Venkataraman@Sun.COM 	 *
15618048SMadhavan.Venkataraman@Sun.COM 	 * We look at all the tables because we don't know which ones were
15628048SMadhavan.Venkataraman@Sun.COM 	 * onlined and offlined in the past. The offlined tables may still
15638048SMadhavan.Venkataraman@Sun.COM 	 * have active cyclics processing timers somewhere.
15648048SMadhavan.Venkataraman@Sun.COM 	 */
15658048SMadhavan.Venkataraman@Sun.COM 	for (f = 0; f < max_ncpus; f++) {
15668048SMadhavan.Venkataraman@Sun.COM 		for (t = 0; t < CALLOUT_NTYPES; t++) {
15678048SMadhavan.Venkataraman@Sun.COM 			ct = &callout_table[CALLOUT_TABLE(t, f)];
15688048SMadhavan.Venkataraman@Sun.COM 			callout_hrestime_one(ct);
15698048SMadhavan.Venkataraman@Sun.COM 		}
15708048SMadhavan.Venkataraman@Sun.COM 	}
15718048SMadhavan.Venkataraman@Sun.COM }
15728048SMadhavan.Venkataraman@Sun.COM 
15738048SMadhavan.Venkataraman@Sun.COM /*
15748048SMadhavan.Venkataraman@Sun.COM  * Create the hash tables for this callout table.
15758048SMadhavan.Venkataraman@Sun.COM  */
15768048SMadhavan.Venkataraman@Sun.COM static void
15778048SMadhavan.Venkataraman@Sun.COM callout_hash_init(callout_table_t *ct)
15788048SMadhavan.Venkataraman@Sun.COM {
15798048SMadhavan.Venkataraman@Sun.COM 	size_t size;
15808048SMadhavan.Venkataraman@Sun.COM 
15818048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
15828048SMadhavan.Venkataraman@Sun.COM 	ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL));
15838048SMadhavan.Venkataraman@Sun.COM 
15848048SMadhavan.Venkataraman@Sun.COM 	size = sizeof (callout_hash_t) * CALLOUT_BUCKETS;
15858048SMadhavan.Venkataraman@Sun.COM 	ct->ct_idhash = kmem_zalloc(size, KM_SLEEP);
15868048SMadhavan.Venkataraman@Sun.COM 	ct->ct_clhash = kmem_zalloc(size, KM_SLEEP);
15878048SMadhavan.Venkataraman@Sun.COM }
15888048SMadhavan.Venkataraman@Sun.COM 
15898048SMadhavan.Venkataraman@Sun.COM /*
15908048SMadhavan.Venkataraman@Sun.COM  * Create per-callout table kstats.
15918048SMadhavan.Venkataraman@Sun.COM  */
15928048SMadhavan.Venkataraman@Sun.COM static void
15938048SMadhavan.Venkataraman@Sun.COM callout_kstat_init(callout_table_t *ct)
15948048SMadhavan.Venkataraman@Sun.COM {
15958048SMadhavan.Venkataraman@Sun.COM 	callout_stat_type_t stat;
15968048SMadhavan.Venkataraman@Sun.COM 	kstat_t *ct_kstats;
15978048SMadhavan.Venkataraman@Sun.COM 	int ndx;
15988048SMadhavan.Venkataraman@Sun.COM 
15998048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
16008048SMadhavan.Venkataraman@Sun.COM 	ASSERT(ct->ct_kstats == NULL);
16018048SMadhavan.Venkataraman@Sun.COM 
16028048SMadhavan.Venkataraman@Sun.COM 	ndx = ct - callout_table;
16038048SMadhavan.Venkataraman@Sun.COM 	ct_kstats = kstat_create("unix", ndx, "callout",
16048048SMadhavan.Venkataraman@Sun.COM 	    "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL);
16058048SMadhavan.Venkataraman@Sun.COM 
16068048SMadhavan.Venkataraman@Sun.COM 	if (ct_kstats == NULL) {
16078048SMadhavan.Venkataraman@Sun.COM 		cmn_err(CE_WARN, "kstat_create for callout table %p failed",
16088048SMadhavan.Venkataraman@Sun.COM 		    (void *)ct);
16098048SMadhavan.Venkataraman@Sun.COM 	} else {
16108048SMadhavan.Venkataraman@Sun.COM 		ct_kstats->ks_data = ct->ct_kstat_data;
16118048SMadhavan.Venkataraman@Sun.COM 		for (stat = 0; stat < CALLOUT_NUM_STATS; stat++)
16128048SMadhavan.Venkataraman@Sun.COM 			kstat_named_init(&ct->ct_kstat_data[stat],
16138048SMadhavan.Venkataraman@Sun.COM 			    callout_kstat_names[stat], KSTAT_DATA_INT64);
16148048SMadhavan.Venkataraman@Sun.COM 		ct->ct_kstats = ct_kstats;
16158048SMadhavan.Venkataraman@Sun.COM 		kstat_install(ct_kstats);
16168048SMadhavan.Venkataraman@Sun.COM 	}
16178048SMadhavan.Venkataraman@Sun.COM }
16188048SMadhavan.Venkataraman@Sun.COM 
16198048SMadhavan.Venkataraman@Sun.COM static void
16208048SMadhavan.Venkataraman@Sun.COM callout_cyclic_init(callout_table_t *ct)
16218048SMadhavan.Venkataraman@Sun.COM {
16228048SMadhavan.Venkataraman@Sun.COM 	cyc_handler_t hdlr;
16238048SMadhavan.Venkataraman@Sun.COM 	cyc_time_t when;
16248048SMadhavan.Venkataraman@Sun.COM 	processorid_t seqid;
16258048SMadhavan.Venkataraman@Sun.COM 	int t;
1626*11655SMadhavan.Venkataraman@Sun.COM 	cyclic_id_t cyclic;
16278048SMadhavan.Venkataraman@Sun.COM 
16288048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&ct->ct_mutex));
16298048SMadhavan.Venkataraman@Sun.COM 
16308048SMadhavan.Venkataraman@Sun.COM 	t = CALLOUT_TABLE_TYPE(ct);
16318048SMadhavan.Venkataraman@Sun.COM 	seqid = CALLOUT_TABLE_SEQID(ct);
16328048SMadhavan.Venkataraman@Sun.COM 
16338048SMadhavan.Venkataraman@Sun.COM 	/*
16348048SMadhavan.Venkataraman@Sun.COM 	 * Create the taskq thread if the table type is normal.
16358048SMadhavan.Venkataraman@Sun.COM 	 * Realtime tables are handled at PIL1 by a softint
16368048SMadhavan.Venkataraman@Sun.COM 	 * handler.
16378048SMadhavan.Venkataraman@Sun.COM 	 */
16388048SMadhavan.Venkataraman@Sun.COM 	if (t == CALLOUT_NORMAL) {
16398048SMadhavan.Venkataraman@Sun.COM 		ASSERT(ct->ct_taskq == NULL);
16408048SMadhavan.Venkataraman@Sun.COM 		/*
16418048SMadhavan.Venkataraman@Sun.COM 		 * Each callout thread consumes exactly one
16428048SMadhavan.Venkataraman@Sun.COM 		 * task structure while active.  Therefore,
16439334SMadhavan.Venkataraman@Sun.COM 		 * prepopulating with 2 * callout_threads tasks
16448048SMadhavan.Venkataraman@Sun.COM 		 * ensures that there's at least one task per
16458048SMadhavan.Venkataraman@Sun.COM 		 * thread that's either scheduled or on the
16468048SMadhavan.Venkataraman@Sun.COM 		 * freelist.  In turn, this guarantees that
16478048SMadhavan.Venkataraman@Sun.COM 		 * taskq_dispatch() will always either succeed
16488048SMadhavan.Venkataraman@Sun.COM 		 * (because there's a free task structure) or
16498048SMadhavan.Venkataraman@Sun.COM 		 * be unnecessary (because "callout_excute(ct)"
16508048SMadhavan.Venkataraman@Sun.COM 		 * has already scheduled).
16518048SMadhavan.Venkataraman@Sun.COM 		 */
16528048SMadhavan.Venkataraman@Sun.COM 		ct->ct_taskq =
16538048SMadhavan.Venkataraman@Sun.COM 		    taskq_create_instance("callout_taskq", seqid,
16549334SMadhavan.Venkataraman@Sun.COM 		    callout_threads, maxclsyspri,
16559334SMadhavan.Venkataraman@Sun.COM 		    2 * callout_threads, 2 * callout_threads,
16568048SMadhavan.Venkataraman@Sun.COM 		    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
16578048SMadhavan.Venkataraman@Sun.COM 	}
16588048SMadhavan.Venkataraman@Sun.COM 
16598048SMadhavan.Venkataraman@Sun.COM 	/*
16608048SMadhavan.Venkataraman@Sun.COM 	 * callouts can only be created in a table whose
16618048SMadhavan.Venkataraman@Sun.COM 	 * cyclic has been initialized.
16628048SMadhavan.Venkataraman@Sun.COM 	 */
16638048SMadhavan.Venkataraman@Sun.COM 	ASSERT(ct->ct_heap_num == 0);
16648048SMadhavan.Venkataraman@Sun.COM 
16658048SMadhavan.Venkataraman@Sun.COM 	/*
1666*11655SMadhavan.Venkataraman@Sun.COM 	 * Drop the mutex before creating the callout cyclics. cyclic_add()
1667*11655SMadhavan.Venkataraman@Sun.COM 	 * could potentially expand the cyclic heap. We don't want to be
1668*11655SMadhavan.Venkataraman@Sun.COM 	 * holding the callout table mutex in that case. Note that this
1669*11655SMadhavan.Venkataraman@Sun.COM 	 * function is called during CPU online. cpu_lock is held at this
1670*11655SMadhavan.Venkataraman@Sun.COM 	 * point. So, only one thread can be executing the cyclic add logic
1671*11655SMadhavan.Venkataraman@Sun.COM 	 * below at any time.
1672*11655SMadhavan.Venkataraman@Sun.COM 	 */
1673*11655SMadhavan.Venkataraman@Sun.COM 	mutex_exit(&ct->ct_mutex);
1674*11655SMadhavan.Venkataraman@Sun.COM 
1675*11655SMadhavan.Venkataraman@Sun.COM 	/*
16768048SMadhavan.Venkataraman@Sun.COM 	 * Create the callout table cyclics.
16779039SMadhavan.Venkataraman@Sun.COM 	 *
16789039SMadhavan.Venkataraman@Sun.COM 	 * The realtime cyclic handler executes at low PIL. The normal cyclic
16799039SMadhavan.Venkataraman@Sun.COM 	 * handler executes at lock PIL. This is because there are cases
16809039SMadhavan.Venkataraman@Sun.COM 	 * where code can block at PIL > 1 waiting for a normal callout handler
16819039SMadhavan.Venkataraman@Sun.COM 	 * to unblock it directly or indirectly. If the normal cyclic were to
16829039SMadhavan.Venkataraman@Sun.COM 	 * be executed at low PIL, it could get blocked out by the waiter
16839039SMadhavan.Venkataraman@Sun.COM 	 * and cause a deadlock.
16848048SMadhavan.Venkataraman@Sun.COM 	 */
16858048SMadhavan.Venkataraman@Sun.COM 	ASSERT(ct->ct_cyclic == CYCLIC_NONE);
16868048SMadhavan.Venkataraman@Sun.COM 
16878048SMadhavan.Venkataraman@Sun.COM 	hdlr.cyh_func = (cyc_func_t)CALLOUT_CYCLIC_HANDLER(t);
16889039SMadhavan.Venkataraman@Sun.COM 	if (ct->ct_type == CALLOUT_REALTIME)
16899039SMadhavan.Venkataraman@Sun.COM 		hdlr.cyh_level = callout_realtime_level;
16909039SMadhavan.Venkataraman@Sun.COM 	else
16919039SMadhavan.Venkataraman@Sun.COM 		hdlr.cyh_level = callout_normal_level;
16928048SMadhavan.Venkataraman@Sun.COM 	hdlr.cyh_arg = ct;
16938048SMadhavan.Venkataraman@Sun.COM 	when.cyt_when = CY_INFINITY;
16948048SMadhavan.Venkataraman@Sun.COM 	when.cyt_interval = CY_INFINITY;
16958048SMadhavan.Venkataraman@Sun.COM 
1696*11655SMadhavan.Venkataraman@Sun.COM 	cyclic = cyclic_add(&hdlr, &when);
1697*11655SMadhavan.Venkataraman@Sun.COM 
1698*11655SMadhavan.Venkataraman@Sun.COM 	mutex_enter(&ct->ct_mutex);
1699*11655SMadhavan.Venkataraman@Sun.COM 	ct->ct_cyclic = cyclic;
17008048SMadhavan.Venkataraman@Sun.COM }
17018048SMadhavan.Venkataraman@Sun.COM 
17028048SMadhavan.Venkataraman@Sun.COM void
17038048SMadhavan.Venkataraman@Sun.COM callout_cpu_online(cpu_t *cp)
17048048SMadhavan.Venkataraman@Sun.COM {
17058048SMadhavan.Venkataraman@Sun.COM 	lgrp_handle_t hand;
17068048SMadhavan.Venkataraman@Sun.COM 	callout_cache_t *cache;
17078048SMadhavan.Venkataraman@Sun.COM 	char s[KMEM_CACHE_NAMELEN];
17088048SMadhavan.Venkataraman@Sun.COM 	callout_table_t *ct;
17098048SMadhavan.Venkataraman@Sun.COM 	processorid_t seqid;
17108048SMadhavan.Venkataraman@Sun.COM 	int t;
17118048SMadhavan.Venkataraman@Sun.COM 
17128048SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&cpu_lock));
17138048SMadhavan.Venkataraman@Sun.COM 
17148048SMadhavan.Venkataraman@Sun.COM 	/*
17158048SMadhavan.Venkataraman@Sun.COM 	 * Locate the cache corresponding to the onlined CPU's lgroup.
17168048SMadhavan.Venkataraman@Sun.COM 	 * Note that access to callout_caches is protected by cpu_lock.
17178048SMadhavan.Venkataraman@Sun.COM 	 */
17188048SMadhavan.Venkataraman@Sun.COM 	hand = lgrp_plat_cpu_to_hand(cp->cpu_id);
17198048SMadhavan.Venkataraman@Sun.COM 	for (cache = callout_caches; cache != NULL; cache = cache->cc_next) {
17208048SMadhavan.Venkataraman@Sun.COM 		if (cache->cc_hand == hand)
17218048SMadhavan.Venkataraman@Sun.COM 			break;
17228048SMadhavan.Venkataraman@Sun.COM 	}
17238048SMadhavan.Venkataraman@Sun.COM 
17248048SMadhavan.Venkataraman@Sun.COM 	/*
17258048SMadhavan.Venkataraman@Sun.COM 	 * If not found, create one. The caches are never destroyed.
17268048SMadhavan.Venkataraman@Sun.COM 	 */
17278048SMadhavan.Venkataraman@Sun.COM 	if (cache == NULL) {
17288048SMadhavan.Venkataraman@Sun.COM 		cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP);
17298048SMadhavan.Venkataraman@Sun.COM 		cache->cc_hand = hand;
17308048SMadhavan.Venkataraman@Sun.COM 		(void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx",
17318048SMadhavan.Venkataraman@Sun.COM 		    (long)hand);
17328048SMadhavan.Venkataraman@Sun.COM 		cache->cc_cache = kmem_cache_create(s, sizeof (callout_t),
17338048SMadhavan.Venkataraman@Sun.COM 		    CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
17348048SMadhavan.Venkataraman@Sun.COM 		(void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx",
17358048SMadhavan.Venkataraman@Sun.COM 		    (long)hand);
17368048SMadhavan.Venkataraman@Sun.COM 		cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t),
17378048SMadhavan.Venkataraman@Sun.COM 		    CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
17388048SMadhavan.Venkataraman@Sun.COM 		cache->cc_next = callout_caches;
17398048SMadhavan.Venkataraman@Sun.COM 		callout_caches = cache;
17408048SMadhavan.Venkataraman@Sun.COM 	}
17418048SMadhavan.Venkataraman@Sun.COM 
17428048SMadhavan.Venkataraman@Sun.COM 	seqid = cp->cpu_seqid;
17438048SMadhavan.Venkataraman@Sun.COM 
17448048SMadhavan.Venkataraman@Sun.COM 	for (t = 0; t < CALLOUT_NTYPES; t++) {
17458048SMadhavan.Venkataraman@Sun.COM 		ct = &callout_table[CALLOUT_TABLE(t, seqid)];
17468048SMadhavan.Venkataraman@Sun.COM 
17478048SMadhavan.Venkataraman@Sun.COM 		mutex_enter(&ct->ct_mutex);
17488048SMadhavan.Venkataraman@Sun.COM 		/*
17498048SMadhavan.Venkataraman@Sun.COM 		 * Store convinience pointers to the kmem caches
17508048SMadhavan.Venkataraman@Sun.COM 		 * in the callout table. These assignments should always be
17518048SMadhavan.Venkataraman@Sun.COM 		 * done as callout tables can map to different physical
17528048SMadhavan.Venkataraman@Sun.COM 		 * CPUs each time.
17538048SMadhavan.Venkataraman@Sun.COM 		 */
17548048SMadhavan.Venkataraman@Sun.COM 		ct->ct_cache = cache->cc_cache;
17558048SMadhavan.Venkataraman@Sun.COM 		ct->ct_lcache = cache->cc_lcache;
17568048SMadhavan.Venkataraman@Sun.COM 
17578048SMadhavan.Venkataraman@Sun.COM 		/*
17588048SMadhavan.Venkataraman@Sun.COM 		 * We use the heap pointer to check if stuff has been
17598048SMadhavan.Venkataraman@Sun.COM 		 * initialized for this callout table.
17608048SMadhavan.Venkataraman@Sun.COM 		 */
17618048SMadhavan.Venkataraman@Sun.COM 		if (ct->ct_heap == NULL) {
17628048SMadhavan.Venkataraman@Sun.COM 			callout_heap_init(ct);
17638048SMadhavan.Venkataraman@Sun.COM 			callout_hash_init(ct);
17648048SMadhavan.Venkataraman@Sun.COM 			callout_kstat_init(ct);
17658048SMadhavan.Venkataraman@Sun.COM 			callout_cyclic_init(ct);
17668048SMadhavan.Venkataraman@Sun.COM 		}
17678048SMadhavan.Venkataraman@Sun.COM 
17688048SMadhavan.Venkataraman@Sun.COM 		mutex_exit(&ct->ct_mutex);
17698048SMadhavan.Venkataraman@Sun.COM 
17708048SMadhavan.Venkataraman@Sun.COM 		/*
17718566SMadhavan.Venkataraman@Sun.COM 		 * Move the cyclic to this CPU by doing a bind.
17728048SMadhavan.Venkataraman@Sun.COM 		 */
17738048SMadhavan.Venkataraman@Sun.COM 		cyclic_bind(ct->ct_cyclic, cp, NULL);
17748566SMadhavan.Venkataraman@Sun.COM 	}
17758566SMadhavan.Venkataraman@Sun.COM }
17768566SMadhavan.Venkataraman@Sun.COM 
17778566SMadhavan.Venkataraman@Sun.COM void
17788566SMadhavan.Venkataraman@Sun.COM callout_cpu_offline(cpu_t *cp)
17798566SMadhavan.Venkataraman@Sun.COM {
17808566SMadhavan.Venkataraman@Sun.COM 	callout_table_t *ct;
17818566SMadhavan.Venkataraman@Sun.COM 	processorid_t seqid;
17828566SMadhavan.Venkataraman@Sun.COM 	int t;
17838566SMadhavan.Venkataraman@Sun.COM 
17848566SMadhavan.Venkataraman@Sun.COM 	ASSERT(MUTEX_HELD(&cpu_lock));
17858566SMadhavan.Venkataraman@Sun.COM 
17868566SMadhavan.Venkataraman@Sun.COM 	seqid = cp->cpu_seqid;
17878566SMadhavan.Venkataraman@Sun.COM 
17888566SMadhavan.Venkataraman@Sun.COM 	for (t = 0; t < CALLOUT_NTYPES; t++) {
17898566SMadhavan.Venkataraman@Sun.COM 		ct = &callout_table[CALLOUT_TABLE(t, seqid)];
17908566SMadhavan.Venkataraman@Sun.COM 
17918566SMadhavan.Venkataraman@Sun.COM 		/*
17928566SMadhavan.Venkataraman@Sun.COM 		 * Unbind the cyclic. This will allow the cyclic subsystem
17938566SMadhavan.Venkataraman@Sun.COM 		 * to juggle the cyclic during CPU offline.
17948566SMadhavan.Venkataraman@Sun.COM 		 */
17958048SMadhavan.Venkataraman@Sun.COM 		cyclic_bind(ct->ct_cyclic, NULL, NULL);
17968048SMadhavan.Venkataraman@Sun.COM 	}
17978048SMadhavan.Venkataraman@Sun.COM }
17988048SMadhavan.Venkataraman@Sun.COM 
17998048SMadhavan.Venkataraman@Sun.COM /*
18008048SMadhavan.Venkataraman@Sun.COM  * This is called to perform per-CPU initialization for slave CPUs at
18018048SMadhavan.Venkataraman@Sun.COM  * boot time.
18028048SMadhavan.Venkataraman@Sun.COM  */
18038048SMadhavan.Venkataraman@Sun.COM void
18048048SMadhavan.Venkataraman@Sun.COM callout_mp_init(void)
18058048SMadhavan.Venkataraman@Sun.COM {
18068048SMadhavan.Venkataraman@Sun.COM 	cpu_t *cp;
18078048SMadhavan.Venkataraman@Sun.COM 
18088048SMadhavan.Venkataraman@Sun.COM 	mutex_enter(&cpu_lock);
18098048SMadhavan.Venkataraman@Sun.COM 
18108048SMadhavan.Venkataraman@Sun.COM 	cp = cpu_active;
18118048SMadhavan.Venkataraman@Sun.COM 	do {
18128048SMadhavan.Venkataraman@Sun.COM 		callout_cpu_online(cp);
18138048SMadhavan.Venkataraman@Sun.COM 	} while ((cp = cp->cpu_next_onln) != cpu_active);
18148048SMadhavan.Venkataraman@Sun.COM 
18158048SMadhavan.Venkataraman@Sun.COM 	mutex_exit(&cpu_lock);
18168048SMadhavan.Venkataraman@Sun.COM }
18178048SMadhavan.Venkataraman@Sun.COM 
18188048SMadhavan.Venkataraman@Sun.COM /*
18190Sstevel@tonic-gate  * Initialize all callout tables.  Called at boot time just before clkstart().
18200Sstevel@tonic-gate  */
18210Sstevel@tonic-gate void
18220Sstevel@tonic-gate callout_init(void)
18230Sstevel@tonic-gate {
18240Sstevel@tonic-gate 	int f, t;
18258048SMadhavan.Venkataraman@Sun.COM 	size_t size;
18260Sstevel@tonic-gate 	int table_id;
18270Sstevel@tonic-gate 	callout_table_t *ct;
18288048SMadhavan.Venkataraman@Sun.COM 	long bits, fanout;
18298048SMadhavan.Venkataraman@Sun.COM 	uintptr_t buf;
18300Sstevel@tonic-gate 
18318048SMadhavan.Venkataraman@Sun.COM 	/*
18328048SMadhavan.Venkataraman@Sun.COM 	 * Initialize callout globals.
18338048SMadhavan.Venkataraman@Sun.COM 	 */
18348048SMadhavan.Venkataraman@Sun.COM 	bits = 0;
18358048SMadhavan.Venkataraman@Sun.COM 	for (fanout = 1; (fanout < max_ncpus); fanout <<= 1)
18368048SMadhavan.Venkataraman@Sun.COM 		bits++;
18378048SMadhavan.Venkataraman@Sun.COM 	callout_table_bits = CALLOUT_TYPE_BITS + bits;
18388048SMadhavan.Venkataraman@Sun.COM 	callout_table_mask = (1 << callout_table_bits) - 1;
18398048SMadhavan.Venkataraman@Sun.COM 	callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT;
18408048SMadhavan.Venkataraman@Sun.COM 	callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS);
18418566SMadhavan.Venkataraman@Sun.COM 	callout_max_ticks = CALLOUT_MAX_TICKS;
18429334SMadhavan.Venkataraman@Sun.COM 	if (callout_min_reap == 0)
18439334SMadhavan.Venkataraman@Sun.COM 		callout_min_reap = CALLOUT_MIN_REAP;
18440Sstevel@tonic-gate 
18459334SMadhavan.Venkataraman@Sun.COM 	if (callout_tolerance <= 0)
18469334SMadhavan.Venkataraman@Sun.COM 		callout_tolerance = CALLOUT_TOLERANCE;
18479334SMadhavan.Venkataraman@Sun.COM 	if (callout_threads <= 0)
18489334SMadhavan.Venkataraman@Sun.COM 		callout_threads = CALLOUT_THREADS;
18498048SMadhavan.Venkataraman@Sun.COM 
18508048SMadhavan.Venkataraman@Sun.COM 	/*
18518048SMadhavan.Venkataraman@Sun.COM 	 * Allocate all the callout tables based on max_ncpus. We have chosen
18528048SMadhavan.Venkataraman@Sun.COM 	 * to do boot-time allocation instead of dynamic allocation because:
18538048SMadhavan.Venkataraman@Sun.COM 	 *
18548048SMadhavan.Venkataraman@Sun.COM 	 *	- the size of the callout tables is not too large.
18558048SMadhavan.Venkataraman@Sun.COM 	 *	- there are race conditions involved in making this dynamic.
18568048SMadhavan.Venkataraman@Sun.COM 	 *	- the hash tables that go with the callout tables consume
18578048SMadhavan.Venkataraman@Sun.COM 	 *	  most of the memory and they are only allocated in
18588048SMadhavan.Venkataraman@Sun.COM 	 *	  callout_cpu_online().
18598048SMadhavan.Venkataraman@Sun.COM 	 *
18608048SMadhavan.Venkataraman@Sun.COM 	 * Each CPU has two tables that are consecutive in the array. The first
18618048SMadhavan.Venkataraman@Sun.COM 	 * one is for realtime callouts and the second one is for normal ones.
18628048SMadhavan.Venkataraman@Sun.COM 	 *
18638048SMadhavan.Venkataraman@Sun.COM 	 * We do this alignment dance to make sure that callout table
18648048SMadhavan.Venkataraman@Sun.COM 	 * structures will always be on a cache line boundary.
18658048SMadhavan.Venkataraman@Sun.COM 	 */
18668048SMadhavan.Venkataraman@Sun.COM 	size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus;
18678048SMadhavan.Venkataraman@Sun.COM 	size += CALLOUT_ALIGN;
18688048SMadhavan.Venkataraman@Sun.COM 	buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP);
18698048SMadhavan.Venkataraman@Sun.COM 	callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN);
18708048SMadhavan.Venkataraman@Sun.COM 
18718048SMadhavan.Venkataraman@Sun.COM 	size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS;
18728048SMadhavan.Venkataraman@Sun.COM 	/*
18738048SMadhavan.Venkataraman@Sun.COM 	 * Now, initialize the tables for all the CPUs.
18748048SMadhavan.Venkataraman@Sun.COM 	 */
18758048SMadhavan.Venkataraman@Sun.COM 	for (f = 0; f < max_ncpus; f++) {
18768048SMadhavan.Venkataraman@Sun.COM 		for (t = 0; t < CALLOUT_NTYPES; t++) {
18770Sstevel@tonic-gate 			table_id = CALLOUT_TABLE(t, f);
18788048SMadhavan.Venkataraman@Sun.COM 			ct = &callout_table[table_id];
18798566SMadhavan.Venkataraman@Sun.COM 			ct->ct_type = t;
18808048SMadhavan.Venkataraman@Sun.COM 			mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
18816422Sqiao 			/*
18828048SMadhavan.Venkataraman@Sun.COM 			 * Precompute the base IDs for long and short-term
18838048SMadhavan.Venkataraman@Sun.COM 			 * legacy IDs. This makes ID generation during
18848048SMadhavan.Venkataraman@Sun.COM 			 * timeout() fast.
18856422Sqiao 			 */
18868048SMadhavan.Venkataraman@Sun.COM 			ct->ct_short_id = CALLOUT_SHORT_ID(table_id);
18878048SMadhavan.Venkataraman@Sun.COM 			ct->ct_long_id = CALLOUT_LONG_ID(table_id);
18888048SMadhavan.Venkataraman@Sun.COM 			/*
18898048SMadhavan.Venkataraman@Sun.COM 			 * Precompute the base ID for generation-based IDs.
18908048SMadhavan.Venkataraman@Sun.COM 			 * Note that when the first ID gets allocated, the
18918048SMadhavan.Venkataraman@Sun.COM 			 * ID will wrap. This will cause the generation
18928048SMadhavan.Venkataraman@Sun.COM 			 * number to be incremented to 1.
18938048SMadhavan.Venkataraman@Sun.COM 			 */
18948048SMadhavan.Venkataraman@Sun.COM 			ct->ct_gen_id = CALLOUT_SHORT_ID(table_id);
18958048SMadhavan.Venkataraman@Sun.COM 			/*
18968048SMadhavan.Venkataraman@Sun.COM 			 * Initialize the cyclic as NONE. This will get set
18978048SMadhavan.Venkataraman@Sun.COM 			 * during CPU online. This is so that partially
18988048SMadhavan.Venkataraman@Sun.COM 			 * populated systems will only have the required
18998048SMadhavan.Venkataraman@Sun.COM 			 * number of cyclics, not more.
19008048SMadhavan.Venkataraman@Sun.COM 			 */
19018048SMadhavan.Venkataraman@Sun.COM 			ct->ct_cyclic = CYCLIC_NONE;
19028048SMadhavan.Venkataraman@Sun.COM 			ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP);
19030Sstevel@tonic-gate 		}
19040Sstevel@tonic-gate 	}
19058048SMadhavan.Venkataraman@Sun.COM 
19068048SMadhavan.Venkataraman@Sun.COM 	/*
19078048SMadhavan.Venkataraman@Sun.COM 	 * Add the callback for CPR. This is called during checkpoint
19088048SMadhavan.Venkataraman@Sun.COM 	 * resume to suspend and resume callouts.
19098048SMadhavan.Venkataraman@Sun.COM 	 */
19108048SMadhavan.Venkataraman@Sun.COM 	(void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT,
19118048SMadhavan.Venkataraman@Sun.COM 	    "callout_cpr");
19128048SMadhavan.Venkataraman@Sun.COM 	(void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER,
19138048SMadhavan.Venkataraman@Sun.COM 	    "callout_debug");
19148048SMadhavan.Venkataraman@Sun.COM 
19158048SMadhavan.Venkataraman@Sun.COM 	/*
19168048SMadhavan.Venkataraman@Sun.COM 	 * Call the per-CPU initialization function for the boot CPU. This
19178048SMadhavan.Venkataraman@Sun.COM 	 * is done here because the function is not called automatically for
19188048SMadhavan.Venkataraman@Sun.COM 	 * the boot CPU from the CPU online/offline hooks. Note that the
19198048SMadhavan.Venkataraman@Sun.COM 	 * CPU lock is taken here because of convention.
19208048SMadhavan.Venkataraman@Sun.COM 	 */
19218048SMadhavan.Venkataraman@Sun.COM 	mutex_enter(&cpu_lock);
19228048SMadhavan.Venkataraman@Sun.COM 	callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)];
19238048SMadhavan.Venkataraman@Sun.COM 	callout_cpu_online(CPU);
19248048SMadhavan.Venkataraman@Sun.COM 	mutex_exit(&cpu_lock);
19250Sstevel@tonic-gate }
1926