xref: /onnv-gate/usr/src/uts/common/inet/squeue.c (revision 12504:46a3cfcacbb9)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52722Sjohnlev  * Common Development and Distribution License (the "License").
62722Sjohnlev  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*12504SAnders.Persson@Sun.COM  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
250Sstevel@tonic-gate /*
268275SEric Cheng  * Squeues: General purpose serialization mechanism
278275SEric Cheng  * ------------------------------------------------
280Sstevel@tonic-gate  *
298275SEric Cheng  * Background:
308275SEric Cheng  * -----------
310Sstevel@tonic-gate  *
328275SEric Cheng  * This is a general purpose high-performance serialization mechanism
338275SEric Cheng  * currently used by TCP/IP. It is implement by means of a per CPU queue,
348275SEric Cheng  * a worker thread and a polling thread with are bound to the CPU
358275SEric Cheng  * associated with the squeue. The squeue is strictly FIFO for both read
368275SEric Cheng  * and write side and only one thread can process it at any given time.
378275SEric Cheng  * The design goal of squeue was to offer a very high degree of
388275SEric Cheng  * parallelization (on a per H/W execution pipeline basis) with at
398275SEric Cheng  * most one queuing.
400Sstevel@tonic-gate  *
4111042SErik.Nordmark@Sun.COM  * The modules needing protection typically calls SQUEUE_ENTER_ONE() or
4211042SErik.Nordmark@Sun.COM  * SQUEUE_ENTER() macro as soon as a thread enter the module
438275SEric Cheng  * from either direction. For each packet, the processing function
448275SEric Cheng  * and argument is stored in the mblk itself. When the packet is ready
458275SEric Cheng  * to be processed, the squeue retrieves the stored function and calls
468275SEric Cheng  * it with the supplied argument and the pointer to the packet itself.
478275SEric Cheng  * The called function can assume that no other thread is processing
488275SEric Cheng  * the squeue when it is executing.
490Sstevel@tonic-gate  *
508275SEric Cheng  * Squeue/connection binding:
518275SEric Cheng  * --------------------------
520Sstevel@tonic-gate  *
538275SEric Cheng  * TCP/IP uses an IP classifier in conjunction with squeue where specific
548275SEric Cheng  * connections are assigned to specific squeue (based on various policies),
558275SEric Cheng  * at the connection creation time. Once assigned, the connection to
568275SEric Cheng  * squeue mapping is never changed and all future packets for that
578275SEric Cheng  * connection are processed on that squeue. The connection ("conn") to
588275SEric Cheng  * squeue mapping is stored in "conn_t" member "conn_sqp".
590Sstevel@tonic-gate  *
608275SEric Cheng  * Since the processing of the connection cuts across multiple layers
618275SEric Cheng  * but still allows packets for different connnection to be processed on
628275SEric Cheng  * other CPU/squeues, squeues are also termed as "Vertical Perimeter" or
638275SEric Cheng  * "Per Connection Vertical Perimeter".
640Sstevel@tonic-gate  *
658275SEric Cheng  * Processing Model:
668275SEric Cheng  * -----------------
670Sstevel@tonic-gate  *
688275SEric Cheng  * Squeue doesn't necessary processes packets with its own worker thread.
698275SEric Cheng  * The callers can pick if they just want to queue the packet, process
708275SEric Cheng  * their packet if nothing is queued or drain and process. The first two
718275SEric Cheng  * modes are typically employed when the packet was generated while
728275SEric Cheng  * already doing the processing behind the squeue and last mode (drain
738275SEric Cheng  * and process) is typically employed when the thread is entering squeue
748275SEric Cheng  * for the first time. The squeue still imposes a finite time limit
758275SEric Cheng  * for which a external thread can do processing after which it switches
768275SEric Cheng  * processing to its own worker thread.
770Sstevel@tonic-gate  *
788275SEric Cheng  * Once created, squeues are never deleted. Hence squeue pointers are
798275SEric Cheng  * always valid. This means that functions outside the squeue can still
808275SEric Cheng  * refer safely to conn_sqp and their is no need for ref counts.
810Sstevel@tonic-gate  *
828275SEric Cheng  * Only a thread executing in the squeue can change the squeue of the
838275SEric Cheng  * connection. It does so by calling a squeue framework function to do this.
848275SEric Cheng  * After changing the squeue, the thread must leave the squeue. It must not
858275SEric Cheng  * continue to execute any code that needs squeue protection.
860Sstevel@tonic-gate  *
878275SEric Cheng  * The squeue framework, after entering the squeue, checks if the current
888275SEric Cheng  * squeue matches the conn_sqp. If the check fails, the packet is delivered
898275SEric Cheng  * to right squeue.
900Sstevel@tonic-gate  *
918275SEric Cheng  * Polling Model:
928275SEric Cheng  * --------------
930Sstevel@tonic-gate  *
948275SEric Cheng  * Squeues can control the rate of packet arrival into itself from the
958275SEric Cheng  * NIC or specific Rx ring within a NIC. As part of capability negotiation
968275SEric Cheng  * between IP and MAC layer, squeue are created for each TCP soft ring
978275SEric Cheng  * (or TCP Rx ring - to be implemented in future). As part of this
988275SEric Cheng  * negotiation, squeues get a cookie for underlying soft ring or Rx
998275SEric Cheng  * ring, a function to turn off incoming packets and a function to call
1008275SEric Cheng  * to poll for packets. This helps schedule the receive side packet
1018275SEric Cheng  * processing so that queue backlog doesn't build up and packet processing
1028275SEric Cheng  * doesn't keep getting disturbed by high priority interrupts. As part
1038275SEric Cheng  * of this mode, as soon as a backlog starts building, squeue turns off
1048275SEric Cheng  * the interrupts and switches to poll mode. In poll mode, when poll
1058275SEric Cheng  * thread goes down to retrieve packets, it retrieves them in the form of
1068275SEric Cheng  * a chain which improves performance even more. As the squeue/softring
1078275SEric Cheng  * system gets more packets, it gets more efficient by switching to
1088275SEric Cheng  * polling more often and dealing with larger packet chains.
1090Sstevel@tonic-gate  *
1100Sstevel@tonic-gate  */
1110Sstevel@tonic-gate 
1120Sstevel@tonic-gate #include <sys/types.h>
1130Sstevel@tonic-gate #include <sys/cmn_err.h>
1140Sstevel@tonic-gate #include <sys/debug.h>
1150Sstevel@tonic-gate #include <sys/kmem.h>
1160Sstevel@tonic-gate #include <sys/cpuvar.h>
1170Sstevel@tonic-gate #include <sys/condvar_impl.h>
1180Sstevel@tonic-gate #include <sys/systm.h>
1190Sstevel@tonic-gate #include <sys/callb.h>
1200Sstevel@tonic-gate #include <sys/sdt.h>
1210Sstevel@tonic-gate #include <sys/ddi.h>
1228275SEric Cheng #include <sys/sunddi.h>
1230Sstevel@tonic-gate 
1240Sstevel@tonic-gate #include <inet/ipclassifier.h>
1251216Smasputra #include <inet/udp_impl.h>
1260Sstevel@tonic-gate 
1270Sstevel@tonic-gate #include <sys/squeue_impl.h>
1280Sstevel@tonic-gate 
1290Sstevel@tonic-gate static void squeue_fire(void *);
130981Sbw static void squeue_drain(squeue_t *, uint_t, hrtime_t);
1310Sstevel@tonic-gate static void squeue_worker(squeue_t *sqp);
1328275SEric Cheng static void squeue_polling_thread(squeue_t *sqp);
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate kmem_cache_t *squeue_cache;
1350Sstevel@tonic-gate 
136981Sbw #define	SQUEUE_MSEC_TO_NSEC 1000000
137981Sbw 
1388275SEric Cheng int squeue_drain_ms = 20;
1398275SEric Cheng int squeue_workerwait_ms = 0;
1400Sstevel@tonic-gate 
141981Sbw /* The values above converted to ticks or nano seconds */
1428275SEric Cheng static int squeue_drain_ns = 0;
1430Sstevel@tonic-gate static int squeue_workerwait_tick = 0;
1440Sstevel@tonic-gate 
1458275SEric Cheng #define	MAX_BYTES_TO_PICKUP	150000
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate #define	ENQUEUE_CHAIN(sqp, mp, tail, cnt) {			\
1480Sstevel@tonic-gate 	/*							\
1490Sstevel@tonic-gate 	 * Enqueue our mblk chain.				\
1500Sstevel@tonic-gate 	 */							\
1510Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&(sqp)->sq_lock));			\
1520Sstevel@tonic-gate 								\
1530Sstevel@tonic-gate 	if ((sqp)->sq_last != NULL)				\
1540Sstevel@tonic-gate 		(sqp)->sq_last->b_next = (mp);			\
1550Sstevel@tonic-gate 	else							\
1560Sstevel@tonic-gate 		(sqp)->sq_first = (mp);				\
1570Sstevel@tonic-gate 	(sqp)->sq_last = (tail);				\
1580Sstevel@tonic-gate 	(sqp)->sq_count += (cnt);				\
1590Sstevel@tonic-gate 	ASSERT((sqp)->sq_count > 0);				\
1600Sstevel@tonic-gate 	DTRACE_PROBE4(squeue__enqueuechain, squeue_t *, sqp,	\
1610Sstevel@tonic-gate 		mblk_t *, mp, mblk_t *, tail, int, cnt);	\
1620Sstevel@tonic-gate 								\
1630Sstevel@tonic-gate }
1640Sstevel@tonic-gate 
1659883SRajagopal.Kunhappan@Sun.COM /*
1669883SRajagopal.Kunhappan@Sun.COM  * Blank the receive ring (in this case it is the soft ring). When
1679883SRajagopal.Kunhappan@Sun.COM  * blanked, the soft ring will not send any more packets up.
1689883SRajagopal.Kunhappan@Sun.COM  * Blanking may not succeed when there is a CPU already in the soft
1699883SRajagopal.Kunhappan@Sun.COM  * ring sending packets up. In that case, SQS_POLLING will not be
1709883SRajagopal.Kunhappan@Sun.COM  * set.
1719883SRajagopal.Kunhappan@Sun.COM  */
1728275SEric Cheng #define	SQS_POLLING_ON(sqp, sq_poll_capable, rx_ring) {		\
1730Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&(sqp)->sq_lock));			\
1748275SEric Cheng 	if (sq_poll_capable) {					\
1758275SEric Cheng 		ASSERT(rx_ring != NULL);			\
1768275SEric Cheng 		ASSERT(sqp->sq_state & SQS_POLL_CAPAB);		\
1778275SEric Cheng 		if (!(sqp->sq_state & SQS_POLLING)) {		\
1789883SRajagopal.Kunhappan@Sun.COM 			if (rx_ring->rr_intr_disable(rx_ring->rr_intr_handle)) \
1799883SRajagopal.Kunhappan@Sun.COM 				sqp->sq_state |= SQS_POLLING;	\
1808275SEric Cheng 		}						\
1818275SEric Cheng 	}							\
1828275SEric Cheng }
1838275SEric Cheng 
1848275SEric Cheng #define	SQS_POLLING_OFF(sqp, sq_poll_capable, rx_ring) {	\
1858275SEric Cheng 	ASSERT(MUTEX_HELD(&(sqp)->sq_lock));			\
1868275SEric Cheng 	if (sq_poll_capable) {					\
1878275SEric Cheng 		ASSERT(rx_ring != NULL);			\
1888275SEric Cheng 		ASSERT(sqp->sq_state & SQS_POLL_CAPAB);		\
1898275SEric Cheng 		if (sqp->sq_state & SQS_POLLING) {		\
1908275SEric Cheng 			sqp->sq_state &= ~SQS_POLLING;		\
1918275SEric Cheng 			rx_ring->rr_intr_enable(rx_ring->rr_intr_handle); \
1928275SEric Cheng 		}						\
1938275SEric Cheng 	}							\
1940Sstevel@tonic-gate }
1950Sstevel@tonic-gate 
1969883SRajagopal.Kunhappan@Sun.COM /* Wakeup poll thread only if SQS_POLLING is set */
1979883SRajagopal.Kunhappan@Sun.COM #define	SQS_POLL_RING(sqp) {			\
1980Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&(sqp)->sq_lock));			\
1999883SRajagopal.Kunhappan@Sun.COM 	if (sqp->sq_state & SQS_POLLING) {			\
2008275SEric Cheng 		ASSERT(sqp->sq_state & SQS_POLL_CAPAB);		\
2018275SEric Cheng 		if (!(sqp->sq_state & SQS_GET_PKTS)) {		\
2028275SEric Cheng 			sqp->sq_state |= SQS_GET_PKTS;		\
2038275SEric Cheng 			cv_signal(&sqp->sq_poll_cv);		\
2048275SEric Cheng 		}						\
2058275SEric Cheng 	}							\
2060Sstevel@tonic-gate }
2070Sstevel@tonic-gate 
2088275SEric Cheng #ifdef DEBUG
2098275SEric Cheng #define	SQUEUE_DBG_SET(sqp, mp, proc, connp, tag) {		\
2108275SEric Cheng 	(sqp)->sq_curmp = (mp);					\
2118275SEric Cheng 	(sqp)->sq_curproc = (proc);				\
2128275SEric Cheng 	(sqp)->sq_connp = (connp);				\
2138275SEric Cheng 	(mp)->b_tag = (sqp)->sq_tag = (tag);			\
2148275SEric Cheng }
2158275SEric Cheng 
2168275SEric Cheng #define	SQUEUE_DBG_CLEAR(sqp)	{				\
2178275SEric Cheng 	(sqp)->sq_curmp = NULL;					\
2188275SEric Cheng 	(sqp)->sq_curproc = NULL;				\
2198275SEric Cheng 	(sqp)->sq_connp = NULL;					\
2208275SEric Cheng }
2218275SEric Cheng #else
2228275SEric Cheng #define	SQUEUE_DBG_SET(sqp, mp, proc, connp, tag)
2238275SEric Cheng #define	SQUEUE_DBG_CLEAR(sqp)
2248275SEric Cheng #endif
2258275SEric Cheng 
2260Sstevel@tonic-gate void
squeue_init(void)2270Sstevel@tonic-gate squeue_init(void)
2280Sstevel@tonic-gate {
2290Sstevel@tonic-gate 	squeue_cache = kmem_cache_create("squeue_cache",
2300Sstevel@tonic-gate 	    sizeof (squeue_t), 64, NULL, NULL, NULL, NULL, NULL, 0);
2310Sstevel@tonic-gate 
2328275SEric Cheng 	squeue_drain_ns = squeue_drain_ms * SQUEUE_MSEC_TO_NSEC;
2330Sstevel@tonic-gate 	squeue_workerwait_tick = MSEC_TO_TICK_ROUNDUP(squeue_workerwait_ms);
2340Sstevel@tonic-gate }
2350Sstevel@tonic-gate 
2360Sstevel@tonic-gate /* ARGSUSED */
2370Sstevel@tonic-gate squeue_t *
squeue_create(clock_t wait,pri_t pri)2388275SEric Cheng squeue_create(clock_t wait, pri_t pri)
2390Sstevel@tonic-gate {
2400Sstevel@tonic-gate 	squeue_t *sqp = kmem_cache_alloc(squeue_cache, KM_SLEEP);
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 	bzero(sqp, sizeof (squeue_t));
2438275SEric Cheng 	sqp->sq_bind = PBIND_NONE;
2448275SEric Cheng 	sqp->sq_priority = pri;
2450Sstevel@tonic-gate 	sqp->sq_wait = MSEC_TO_TICK(wait);
2460Sstevel@tonic-gate 	sqp->sq_worker = thread_create(NULL, 0, squeue_worker,
2470Sstevel@tonic-gate 	    sqp, 0, &p0, TS_RUN, pri);
2480Sstevel@tonic-gate 
2498275SEric Cheng 	sqp->sq_poll_thr = thread_create(NULL, 0, squeue_polling_thread,
2508275SEric Cheng 	    sqp, 0, &p0, TS_RUN, pri);
2518275SEric Cheng 
2528275SEric Cheng 	sqp->sq_enter = squeue_enter;
2538275SEric Cheng 	sqp->sq_drain = squeue_drain;
2548275SEric Cheng 
2550Sstevel@tonic-gate 	return (sqp);
2560Sstevel@tonic-gate }
2570Sstevel@tonic-gate 
2588275SEric Cheng /*
2598275SEric Cheng  * Bind squeue worker thread to the specified CPU, given by CPU id.
2608275SEric Cheng  * If the CPU id  value is -1, bind the worker thread to the value
2618275SEric Cheng  * specified in sq_bind field. If a thread is already bound to a
2628275SEric Cheng  * different CPU, unbind it from the old CPU and bind to the new one.
2638275SEric Cheng  */
2648275SEric Cheng 
2650Sstevel@tonic-gate void
squeue_bind(squeue_t * sqp,processorid_t bind)2660Sstevel@tonic-gate squeue_bind(squeue_t *sqp, processorid_t bind)
2670Sstevel@tonic-gate {
2688275SEric Cheng 	mutex_enter(&sqp->sq_lock);
2698275SEric Cheng 	ASSERT(sqp->sq_bind != PBIND_NONE || bind != PBIND_NONE);
2708275SEric Cheng 	ASSERT(MUTEX_HELD(&cpu_lock));
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate 	if (sqp->sq_state & SQS_BOUND) {
2738275SEric Cheng 		if (sqp->sq_bind == bind) {
2748275SEric Cheng 			mutex_exit(&sqp->sq_lock);
2758275SEric Cheng 			return;
2768275SEric Cheng 		}
2778275SEric Cheng 		thread_affinity_clear(sqp->sq_worker);
2788275SEric Cheng 	} else {
2798275SEric Cheng 		sqp->sq_state |= SQS_BOUND;
2800Sstevel@tonic-gate 	}
2810Sstevel@tonic-gate 
2828275SEric Cheng 	if (bind != PBIND_NONE)
2838275SEric Cheng 		sqp->sq_bind = bind;
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 	thread_affinity_set(sqp->sq_worker, sqp->sq_bind);
2868275SEric Cheng 	mutex_exit(&sqp->sq_lock);
2870Sstevel@tonic-gate }
2880Sstevel@tonic-gate 
2890Sstevel@tonic-gate void
squeue_unbind(squeue_t * sqp)2900Sstevel@tonic-gate squeue_unbind(squeue_t *sqp)
2910Sstevel@tonic-gate {
2920Sstevel@tonic-gate 	mutex_enter(&sqp->sq_lock);
2930Sstevel@tonic-gate 	if (!(sqp->sq_state & SQS_BOUND)) {
2940Sstevel@tonic-gate 		mutex_exit(&sqp->sq_lock);
2950Sstevel@tonic-gate 		return;
2960Sstevel@tonic-gate 	}
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate 	sqp->sq_state &= ~SQS_BOUND;
2998275SEric Cheng 	thread_affinity_clear(sqp->sq_worker);
3000Sstevel@tonic-gate 	mutex_exit(&sqp->sq_lock);
3018275SEric Cheng }
3028275SEric Cheng 
3038275SEric Cheng void
squeue_worker_wakeup(squeue_t * sqp)3048275SEric Cheng squeue_worker_wakeup(squeue_t *sqp)
3058275SEric Cheng {
3068275SEric Cheng 	timeout_id_t tid = (sqp)->sq_tid;
3078275SEric Cheng 
3088275SEric Cheng 	ASSERT(MUTEX_HELD(&(sqp)->sq_lock));
3098275SEric Cheng 
3108275SEric Cheng 	if (sqp->sq_wait == 0) {
3118275SEric Cheng 		ASSERT(tid == 0);
3128275SEric Cheng 		ASSERT(!(sqp->sq_state & SQS_TMO_PROG));
31311066Srafael.vanoni@sun.com 		sqp->sq_awaken = ddi_get_lbolt();
3148275SEric Cheng 		cv_signal(&sqp->sq_worker_cv);
3158275SEric Cheng 		mutex_exit(&sqp->sq_lock);
3168275SEric Cheng 		return;
3178275SEric Cheng 	}
3188275SEric Cheng 
3198275SEric Cheng 	/*
3208275SEric Cheng 	 * Queue isn't being processed, so take
3218275SEric Cheng 	 * any post enqueue actions needed before leaving.
3228275SEric Cheng 	 */
3238275SEric Cheng 	if (tid != 0) {
3248275SEric Cheng 		/*
3258275SEric Cheng 		 * Waiting for an enter() to process mblk(s).
3268275SEric Cheng 		 */
32711066Srafael.vanoni@sun.com 		clock_t now = ddi_get_lbolt();
32811066Srafael.vanoni@sun.com 		clock_t	waited = now - sqp->sq_awaken;
3290Sstevel@tonic-gate 
3308275SEric Cheng 		if (TICK_TO_MSEC(waited) >= sqp->sq_wait) {
3318275SEric Cheng 			/*
3328275SEric Cheng 			 * Times up and have a worker thread
3338275SEric Cheng 			 * waiting for work, so schedule it.
3348275SEric Cheng 			 */
3358275SEric Cheng 			sqp->sq_tid = 0;
33611066Srafael.vanoni@sun.com 			sqp->sq_awaken = now;
3378275SEric Cheng 			cv_signal(&sqp->sq_worker_cv);
3388275SEric Cheng 			mutex_exit(&sqp->sq_lock);
3398275SEric Cheng 			(void) untimeout(tid);
3408275SEric Cheng 			return;
3418275SEric Cheng 		}
3428275SEric Cheng 		mutex_exit(&sqp->sq_lock);
3438275SEric Cheng 		return;
3448275SEric Cheng 	} else if (sqp->sq_state & SQS_TMO_PROG) {
3458275SEric Cheng 		mutex_exit(&sqp->sq_lock);
3468275SEric Cheng 		return;
3478275SEric Cheng 	} else {
3488275SEric Cheng 		clock_t	wait = sqp->sq_wait;
3498275SEric Cheng 		/*
3508275SEric Cheng 		 * Wait up to sqp->sq_wait ms for an
3518275SEric Cheng 		 * enter() to process this queue. We
3528275SEric Cheng 		 * don't want to contend on timeout locks
3538275SEric Cheng 		 * with sq_lock held for performance reasons,
3548275SEric Cheng 		 * so drop the sq_lock before calling timeout
3558275SEric Cheng 		 * but we need to check if timeout is required
3568275SEric Cheng 		 * after re acquiring the sq_lock. Once
3578275SEric Cheng 		 * the sq_lock is dropped, someone else could
3588275SEric Cheng 		 * have processed the packet or the timeout could
3598275SEric Cheng 		 * have already fired.
3608275SEric Cheng 		 */
3618275SEric Cheng 		sqp->sq_state |= SQS_TMO_PROG;
3628275SEric Cheng 		mutex_exit(&sqp->sq_lock);
3638275SEric Cheng 		tid = timeout(squeue_fire, sqp, wait);
3648275SEric Cheng 		mutex_enter(&sqp->sq_lock);
3658275SEric Cheng 		/* Check again if we still need the timeout */
3668275SEric Cheng 		if (((sqp->sq_state & (SQS_PROC|SQS_TMO_PROG)) ==
3678275SEric Cheng 		    SQS_TMO_PROG) && (sqp->sq_tid == 0) &&
3688275SEric Cheng 		    (sqp->sq_first != NULL)) {
3698275SEric Cheng 				sqp->sq_state &= ~SQS_TMO_PROG;
3708275SEric Cheng 				sqp->sq_tid = tid;
3718275SEric Cheng 				mutex_exit(&sqp->sq_lock);
3728275SEric Cheng 				return;
3738275SEric Cheng 		} else {
3748275SEric Cheng 			if (sqp->sq_state & SQS_TMO_PROG) {
3758275SEric Cheng 				sqp->sq_state &= ~SQS_TMO_PROG;
3768275SEric Cheng 				mutex_exit(&sqp->sq_lock);
3778275SEric Cheng 				(void) untimeout(tid);
3788275SEric Cheng 			} else {
3798275SEric Cheng 				/*
3808275SEric Cheng 				 * The timer fired before we could
3818275SEric Cheng 				 * reacquire the sq_lock. squeue_fire
3828275SEric Cheng 				 * removes the SQS_TMO_PROG flag
3838275SEric Cheng 				 * and we don't need to	do anything
3848275SEric Cheng 				 * else.
3858275SEric Cheng 				 */
3868275SEric Cheng 				mutex_exit(&sqp->sq_lock);
3878275SEric Cheng 			}
3888275SEric Cheng 		}
3898275SEric Cheng 	}
3908275SEric Cheng 
3918275SEric Cheng 	ASSERT(MUTEX_NOT_HELD(&sqp->sq_lock));
3920Sstevel@tonic-gate }
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate /*
3950Sstevel@tonic-gate  * squeue_enter() - enter squeue sqp with mblk mp (which can be
3960Sstevel@tonic-gate  * a chain), while tail points to the end and cnt in number of
3970Sstevel@tonic-gate  * mblks in the chain.
3980Sstevel@tonic-gate  *
3990Sstevel@tonic-gate  * For a chain of single packet (i.e. mp == tail), go through the
4000Sstevel@tonic-gate  * fast path if no one is processing the squeue and nothing is queued.
4010Sstevel@tonic-gate  *
4020Sstevel@tonic-gate  * The proc and arg for each mblk is already stored in the mblk in
4030Sstevel@tonic-gate  * appropriate places.
4048275SEric Cheng  *
4058275SEric Cheng  * The process_flag specifies if we are allowed to process the mblk
4068275SEric Cheng  * and drain in the entering thread context. If process_flag is
4078275SEric Cheng  * SQ_FILL, then we just queue the mblk and return (after signaling
4088275SEric Cheng  * the worker thread if no one else is processing the squeue).
40911042SErik.Nordmark@Sun.COM  *
41011042SErik.Nordmark@Sun.COM  * The ira argument can be used when the count is one.
41111042SErik.Nordmark@Sun.COM  * For a chain the caller needs to prepend any needed mblks from
41211042SErik.Nordmark@Sun.COM  * ip_recv_attr_to_mblk().
4130Sstevel@tonic-gate  */
4148275SEric Cheng /* ARGSUSED */
4150Sstevel@tonic-gate void
squeue_enter(squeue_t * sqp,mblk_t * mp,mblk_t * tail,uint32_t cnt,ip_recv_attr_t * ira,int process_flag,uint8_t tag)4168275SEric Cheng squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
41711042SErik.Nordmark@Sun.COM     ip_recv_attr_t *ira, int process_flag, uint8_t tag)
4180Sstevel@tonic-gate {
4198275SEric Cheng 	conn_t		*connp;
4200Sstevel@tonic-gate 	sqproc_t	proc;
421981Sbw 	hrtime_t	now;
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 	ASSERT(sqp != NULL);
4240Sstevel@tonic-gate 	ASSERT(mp != NULL);
4250Sstevel@tonic-gate 	ASSERT(tail != NULL);
4260Sstevel@tonic-gate 	ASSERT(cnt > 0);
4270Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&sqp->sq_lock));
42811042SErik.Nordmark@Sun.COM 	ASSERT(ira == NULL || cnt == 1);
4290Sstevel@tonic-gate 
4300Sstevel@tonic-gate 	mutex_enter(&sqp->sq_lock);
4318275SEric Cheng 
4328275SEric Cheng 	/*
4338275SEric Cheng 	 * Try to process the packet if SQ_FILL flag is not set and
4348275SEric Cheng 	 * we are allowed to process the squeue. The SQ_NODRAIN is
4358275SEric Cheng 	 * ignored if the packet chain consists of more than 1 packet.
4368275SEric Cheng 	 */
4378275SEric Cheng 	if (!(sqp->sq_state & SQS_PROC) && ((process_flag == SQ_PROCESS) ||
4388275SEric Cheng 	    (process_flag == SQ_NODRAIN && sqp->sq_first == NULL))) {
4390Sstevel@tonic-gate 		/*
4400Sstevel@tonic-gate 		 * See if anything is already queued. If we are the
4410Sstevel@tonic-gate 		 * first packet, do inline processing else queue the
4420Sstevel@tonic-gate 		 * packet and do the drain.
4430Sstevel@tonic-gate 		 */
4440Sstevel@tonic-gate 		if (sqp->sq_first == NULL && cnt == 1) {
4450Sstevel@tonic-gate 			/*
4460Sstevel@tonic-gate 			 * Fast-path, ok to process and nothing queued.
4470Sstevel@tonic-gate 			 */
4480Sstevel@tonic-gate 			sqp->sq_state |= (SQS_PROC|SQS_FAST);
4498275SEric Cheng 			sqp->sq_run = curthread;
4500Sstevel@tonic-gate 			mutex_exit(&sqp->sq_lock);
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 			/*
4530Sstevel@tonic-gate 			 * We are the chain of 1 packet so
4540Sstevel@tonic-gate 			 * go through this fast path.
4550Sstevel@tonic-gate 			 */
4568275SEric Cheng 			ASSERT(mp->b_prev != NULL);
4578275SEric Cheng 			ASSERT(mp->b_queue != NULL);
4588275SEric Cheng 			connp = (conn_t *)mp->b_prev;
4590Sstevel@tonic-gate 			mp->b_prev = NULL;
4600Sstevel@tonic-gate 			proc = (sqproc_t)mp->b_queue;
4610Sstevel@tonic-gate 			mp->b_queue = NULL;
4628275SEric Cheng 			ASSERT(proc != NULL && connp != NULL);
4630Sstevel@tonic-gate 			ASSERT(mp->b_next == NULL);
4640Sstevel@tonic-gate 
4658275SEric Cheng 			/*
4668275SEric Cheng 			 * Handle squeue switching. More details in the
4678275SEric Cheng 			 * block comment at the top of the file
4688275SEric Cheng 			 */
4698275SEric Cheng 			if (connp->conn_sqp == sqp) {
4708275SEric Cheng 				SQUEUE_DBG_SET(sqp, mp, proc, connp,
4718275SEric Cheng 				    tag);
4728275SEric Cheng 				connp->conn_on_sqp = B_TRUE;
4738275SEric Cheng 				DTRACE_PROBE3(squeue__proc__start, squeue_t *,
4748275SEric Cheng 				    sqp, mblk_t *, mp, conn_t *, connp);
47511042SErik.Nordmark@Sun.COM 				(*proc)(connp, mp, sqp, ira);
4768275SEric Cheng 				DTRACE_PROBE2(squeue__proc__end, squeue_t *,
4778275SEric Cheng 				    sqp, conn_t *, connp);
4788275SEric Cheng 				connp->conn_on_sqp = B_FALSE;
4798275SEric Cheng 				SQUEUE_DBG_CLEAR(sqp);
4808275SEric Cheng 				CONN_DEC_REF(connp);
4818275SEric Cheng 			} else {
4828275SEric Cheng 				SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc,
48311042SErik.Nordmark@Sun.COM 				    connp, ira, SQ_FILL, SQTAG_SQUEUE_CHANGE);
4840Sstevel@tonic-gate 			}
4850Sstevel@tonic-gate 			ASSERT(MUTEX_NOT_HELD(&sqp->sq_lock));
4860Sstevel@tonic-gate 			mutex_enter(&sqp->sq_lock);
4870Sstevel@tonic-gate 			sqp->sq_state &= ~(SQS_PROC|SQS_FAST);
4888275SEric Cheng 			sqp->sq_run = NULL;
4898275SEric Cheng 			if (sqp->sq_first == NULL ||
4908275SEric Cheng 			    process_flag == SQ_NODRAIN) {
4918275SEric Cheng 				if (sqp->sq_first != NULL) {
4928275SEric Cheng 					squeue_worker_wakeup(sqp);
4938275SEric Cheng 					return;
4948275SEric Cheng 				}
4950Sstevel@tonic-gate 				/*
4968275SEric Cheng 				 * We processed inline our packet and nothing
4978275SEric Cheng 				 * new has arrived. We are done. In case any
4988275SEric Cheng 				 * control actions are pending, wake up the
4998275SEric Cheng 				 * worker.
5000Sstevel@tonic-gate 				 */
5018275SEric Cheng 				if (sqp->sq_state & SQS_WORKER_THR_CONTROL)
5028275SEric Cheng 					cv_signal(&sqp->sq_worker_cv);
5030Sstevel@tonic-gate 				mutex_exit(&sqp->sq_lock);
5040Sstevel@tonic-gate 				return;
5050Sstevel@tonic-gate 			}
5060Sstevel@tonic-gate 		} else {
50711042SErik.Nordmark@Sun.COM 			if (ira != NULL) {
50811042SErik.Nordmark@Sun.COM 				mblk_t	*attrmp;
50911042SErik.Nordmark@Sun.COM 
51011042SErik.Nordmark@Sun.COM 				ASSERT(cnt == 1);
51111042SErik.Nordmark@Sun.COM 				attrmp = ip_recv_attr_to_mblk(ira);
51211042SErik.Nordmark@Sun.COM 				if (attrmp == NULL) {
51311042SErik.Nordmark@Sun.COM 					mutex_exit(&sqp->sq_lock);
51411042SErik.Nordmark@Sun.COM 					ip_drop_input("squeue: "
51511042SErik.Nordmark@Sun.COM 					    "ip_recv_attr_to_mblk",
51611042SErik.Nordmark@Sun.COM 					    mp, NULL);
51711042SErik.Nordmark@Sun.COM 					/* Caller already set b_prev/b_next */
51811042SErik.Nordmark@Sun.COM 					mp->b_prev = mp->b_next = NULL;
51911042SErik.Nordmark@Sun.COM 					freemsg(mp);
52011042SErik.Nordmark@Sun.COM 					return;
52111042SErik.Nordmark@Sun.COM 				}
52211042SErik.Nordmark@Sun.COM 				ASSERT(attrmp->b_cont == NULL);
52311042SErik.Nordmark@Sun.COM 				attrmp->b_cont = mp;
52411042SErik.Nordmark@Sun.COM 				/* Move connp and func to new */
52511042SErik.Nordmark@Sun.COM 				attrmp->b_queue = mp->b_queue;
52611042SErik.Nordmark@Sun.COM 				mp->b_queue = NULL;
52711042SErik.Nordmark@Sun.COM 				attrmp->b_prev = mp->b_prev;
52811042SErik.Nordmark@Sun.COM 				mp->b_prev = NULL;
52911042SErik.Nordmark@Sun.COM 
53011042SErik.Nordmark@Sun.COM 				ASSERT(mp == tail);
53111042SErik.Nordmark@Sun.COM 				tail = mp = attrmp;
53211042SErik.Nordmark@Sun.COM 			}
53311042SErik.Nordmark@Sun.COM 
5340Sstevel@tonic-gate 			ENQUEUE_CHAIN(sqp, mp, tail, cnt);
5358275SEric Cheng #ifdef DEBUG
5360Sstevel@tonic-gate 			mp->b_tag = tag;
5370Sstevel@tonic-gate #endif
5380Sstevel@tonic-gate 		}
5390Sstevel@tonic-gate 		/*
5400Sstevel@tonic-gate 		 * We are here because either we couldn't do inline
5410Sstevel@tonic-gate 		 * processing (because something was already queued),
5428275SEric Cheng 		 * or we had a chain of more than one packet,
5430Sstevel@tonic-gate 		 * or something else arrived after we were done with
5440Sstevel@tonic-gate 		 * inline processing.
5450Sstevel@tonic-gate 		 */
5460Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&sqp->sq_lock));
5470Sstevel@tonic-gate 		ASSERT(sqp->sq_first != NULL);
548981Sbw 		now = gethrtime();
5498275SEric Cheng 		sqp->sq_drain(sqp, SQS_ENTER, now + squeue_drain_ns);
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate 		/*
5520Sstevel@tonic-gate 		 * If we didn't do a complete drain, the worker
5530Sstevel@tonic-gate 		 * thread was already signalled by squeue_drain.
5548275SEric Cheng 		 * In case any control actions are pending, wake
5558275SEric Cheng 		 * up the worker.
5560Sstevel@tonic-gate 		 */
5570Sstevel@tonic-gate 		sqp->sq_run = NULL;
5588275SEric Cheng 		if (sqp->sq_state & SQS_WORKER_THR_CONTROL)
5598275SEric Cheng 			cv_signal(&sqp->sq_worker_cv);
5600Sstevel@tonic-gate 		mutex_exit(&sqp->sq_lock);
5610Sstevel@tonic-gate 		return;
5620Sstevel@tonic-gate 	} else {
5630Sstevel@tonic-gate 		/*
5640Sstevel@tonic-gate 		 * We let a thread processing a squeue reenter only
5650Sstevel@tonic-gate 		 * once. This helps the case of incoming connection
5660Sstevel@tonic-gate 		 * where a SYN-ACK-ACK that triggers the conn_ind
5670Sstevel@tonic-gate 		 * doesn't have to queue the packet if listener and
5680Sstevel@tonic-gate 		 * eager are on the same squeue. Also helps the
5690Sstevel@tonic-gate 		 * loopback connection where the two ends are bound
5700Sstevel@tonic-gate 		 * to the same squeue (which is typical on single
5710Sstevel@tonic-gate 		 * CPU machines).
5728275SEric Cheng 		 *
5730Sstevel@tonic-gate 		 * We let the thread reenter only once for the fear
5740Sstevel@tonic-gate 		 * of stack getting blown with multiple traversal.
5750Sstevel@tonic-gate 		 */
5768275SEric Cheng 		connp = (conn_t *)mp->b_prev;
5778275SEric Cheng 		if (!(sqp->sq_state & SQS_REENTER) &&
5788275SEric Cheng 		    (process_flag != SQ_FILL) && (sqp->sq_first == NULL) &&
5798275SEric Cheng 		    (sqp->sq_run == curthread) && (cnt == 1) &&
5808275SEric Cheng 		    (connp->conn_on_sqp == B_FALSE)) {
5810Sstevel@tonic-gate 			sqp->sq_state |= SQS_REENTER;
5820Sstevel@tonic-gate 			mutex_exit(&sqp->sq_lock);
5830Sstevel@tonic-gate 
5848275SEric Cheng 			ASSERT(mp->b_prev != NULL);
5858275SEric Cheng 			ASSERT(mp->b_queue != NULL);
5868275SEric Cheng 
5878275SEric Cheng 			mp->b_prev = NULL;
5888275SEric Cheng 			proc = (sqproc_t)mp->b_queue;
5898275SEric Cheng 			mp->b_queue = NULL;
5908275SEric Cheng 
5918275SEric Cheng 			/*
5928275SEric Cheng 			 * Handle squeue switching. More details in the
5938275SEric Cheng 			 * block comment at the top of the file
5948275SEric Cheng 			 */
5958275SEric Cheng 			if (connp->conn_sqp == sqp) {
5968275SEric Cheng 				connp->conn_on_sqp = B_TRUE;
5978275SEric Cheng 				DTRACE_PROBE3(squeue__proc__start, squeue_t *,
5988275SEric Cheng 				    sqp, mblk_t *, mp, conn_t *, connp);
59911042SErik.Nordmark@Sun.COM 				(*proc)(connp, mp, sqp, ira);
6008275SEric Cheng 				DTRACE_PROBE2(squeue__proc__end, squeue_t *,
6018275SEric Cheng 				    sqp, conn_t *, connp);
6028275SEric Cheng 				connp->conn_on_sqp = B_FALSE;
6038275SEric Cheng 				CONN_DEC_REF(connp);
6048275SEric Cheng 			} else {
6058275SEric Cheng 				SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc,
60611042SErik.Nordmark@Sun.COM 				    connp, ira, SQ_FILL, SQTAG_SQUEUE_CHANGE);
6078275SEric Cheng 			}
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate 			mutex_enter(&sqp->sq_lock);
6100Sstevel@tonic-gate 			sqp->sq_state &= ~SQS_REENTER;
6110Sstevel@tonic-gate 			mutex_exit(&sqp->sq_lock);
6120Sstevel@tonic-gate 			return;
6130Sstevel@tonic-gate 		}
6140Sstevel@tonic-gate 
6158275SEric Cheng 		/*
6168275SEric Cheng 		 * Queue is already being processed or there is already
6178275SEric Cheng 		 * one or more paquets on the queue. Enqueue the
6188275SEric Cheng 		 * packet and wakeup the squeue worker thread if the
6198275SEric Cheng 		 * squeue is not being processed.
6208275SEric Cheng 		 */
6218275SEric Cheng #ifdef DEBUG
6220Sstevel@tonic-gate 		mp->b_tag = tag;
6230Sstevel@tonic-gate #endif
62411042SErik.Nordmark@Sun.COM 		if (ira != NULL) {
62511042SErik.Nordmark@Sun.COM 			mblk_t	*attrmp;
6268275SEric Cheng 
62711042SErik.Nordmark@Sun.COM 			ASSERT(cnt == 1);
62811042SErik.Nordmark@Sun.COM 			attrmp = ip_recv_attr_to_mblk(ira);
62911042SErik.Nordmark@Sun.COM 			if (attrmp == NULL) {
63011042SErik.Nordmark@Sun.COM 				mutex_exit(&sqp->sq_lock);
63111042SErik.Nordmark@Sun.COM 				ip_drop_input("squeue: ip_recv_attr_to_mblk",
63211042SErik.Nordmark@Sun.COM 				    mp, NULL);
63311042SErik.Nordmark@Sun.COM 				/* Caller already set b_prev/b_next */
63411042SErik.Nordmark@Sun.COM 				mp->b_prev = mp->b_next = NULL;
63511042SErik.Nordmark@Sun.COM 				freemsg(mp);
63611042SErik.Nordmark@Sun.COM 				return;
63711042SErik.Nordmark@Sun.COM 			}
63811042SErik.Nordmark@Sun.COM 			ASSERT(attrmp->b_cont == NULL);
63911042SErik.Nordmark@Sun.COM 			attrmp->b_cont = mp;
64011042SErik.Nordmark@Sun.COM 			/* Move connp and func to new */
64111042SErik.Nordmark@Sun.COM 			attrmp->b_queue = mp->b_queue;
64211042SErik.Nordmark@Sun.COM 			mp->b_queue = NULL;
64311042SErik.Nordmark@Sun.COM 			attrmp->b_prev = mp->b_prev;
64411042SErik.Nordmark@Sun.COM 			mp->b_prev = NULL;
64511042SErik.Nordmark@Sun.COM 
64611042SErik.Nordmark@Sun.COM 			ASSERT(mp == tail);
64711042SErik.Nordmark@Sun.COM 			tail = mp = attrmp;
64811042SErik.Nordmark@Sun.COM 		}
6498275SEric Cheng 		ENQUEUE_CHAIN(sqp, mp, tail, cnt);
6508275SEric Cheng 		if (!(sqp->sq_state & SQS_PROC)) {
6518275SEric Cheng 			squeue_worker_wakeup(sqp);
6520Sstevel@tonic-gate 			return;
6530Sstevel@tonic-gate 		}
6548275SEric Cheng 		/*
6558275SEric Cheng 		 * In case any control actions are pending, wake
6568275SEric Cheng 		 * up the worker.
6578275SEric Cheng 		 */
6588275SEric Cheng 		if (sqp->sq_state & SQS_WORKER_THR_CONTROL)
6598275SEric Cheng 			cv_signal(&sqp->sq_worker_cv);
6600Sstevel@tonic-gate 		mutex_exit(&sqp->sq_lock);
6610Sstevel@tonic-gate 		return;
6620Sstevel@tonic-gate 	}
6630Sstevel@tonic-gate }
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate /*
6660Sstevel@tonic-gate  * PRIVATE FUNCTIONS
6670Sstevel@tonic-gate  */
6680Sstevel@tonic-gate 
6690Sstevel@tonic-gate static void
squeue_fire(void * arg)6700Sstevel@tonic-gate squeue_fire(void *arg)
6710Sstevel@tonic-gate {
6720Sstevel@tonic-gate 	squeue_t	*sqp = arg;
6730Sstevel@tonic-gate 	uint_t		state;
6740Sstevel@tonic-gate 
6750Sstevel@tonic-gate 	mutex_enter(&sqp->sq_lock);
6760Sstevel@tonic-gate 
6770Sstevel@tonic-gate 	state = sqp->sq_state;
6780Sstevel@tonic-gate 	if (sqp->sq_tid == 0 && !(state & SQS_TMO_PROG)) {
6790Sstevel@tonic-gate 		mutex_exit(&sqp->sq_lock);
6800Sstevel@tonic-gate 		return;
6810Sstevel@tonic-gate 	}
6820Sstevel@tonic-gate 
6830Sstevel@tonic-gate 	sqp->sq_tid = 0;
6840Sstevel@tonic-gate 	/*
6850Sstevel@tonic-gate 	 * The timeout fired before we got a chance to set it.
6860Sstevel@tonic-gate 	 * Process it anyway but remove the SQS_TMO_PROG so that
6870Sstevel@tonic-gate 	 * the guy trying to set the timeout knows that it has
6880Sstevel@tonic-gate 	 * already been processed.
6890Sstevel@tonic-gate 	 */
6900Sstevel@tonic-gate 	if (state & SQS_TMO_PROG)
6910Sstevel@tonic-gate 		sqp->sq_state &= ~SQS_TMO_PROG;
6920Sstevel@tonic-gate 
6930Sstevel@tonic-gate 	if (!(state & SQS_PROC)) {
69411066Srafael.vanoni@sun.com 		sqp->sq_awaken = ddi_get_lbolt();
6958275SEric Cheng 		cv_signal(&sqp->sq_worker_cv);
6960Sstevel@tonic-gate 	}
6970Sstevel@tonic-gate 	mutex_exit(&sqp->sq_lock);
6980Sstevel@tonic-gate }
6990Sstevel@tonic-gate 
7000Sstevel@tonic-gate static void
squeue_drain(squeue_t * sqp,uint_t proc_type,hrtime_t expire)701981Sbw squeue_drain(squeue_t *sqp, uint_t proc_type, hrtime_t expire)
7020Sstevel@tonic-gate {
7038275SEric Cheng 	mblk_t		*mp;
7048275SEric Cheng 	mblk_t 		*head;
7058275SEric Cheng 	sqproc_t 	proc;
7068275SEric Cheng 	conn_t		*connp;
7078275SEric Cheng 	timeout_id_t 	tid;
7080Sstevel@tonic-gate 	ill_rx_ring_t	*sq_rx_ring = sqp->sq_rx_ring;
7098275SEric Cheng 	hrtime_t 	now;
7108275SEric Cheng 	boolean_t	did_wakeup = B_FALSE;
7118275SEric Cheng 	boolean_t	sq_poll_capable;
71211042SErik.Nordmark@Sun.COM 	ip_recv_attr_t	*ira, iras;
7130Sstevel@tonic-gate 
7148275SEric Cheng 	sq_poll_capable = (sqp->sq_state & SQS_POLL_CAPAB) != 0;
7158275SEric Cheng again:
7160Sstevel@tonic-gate 	ASSERT(mutex_owned(&sqp->sq_lock));
7178275SEric Cheng 	ASSERT(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
7188275SEric Cheng 	    SQS_POLL_QUIESCE_DONE)));
7190Sstevel@tonic-gate 
7208275SEric Cheng 	head = sqp->sq_first;
7218275SEric Cheng 	sqp->sq_first = NULL;
7228275SEric Cheng 	sqp->sq_last = NULL;
7238275SEric Cheng 	sqp->sq_count = 0;
7240Sstevel@tonic-gate 
7250Sstevel@tonic-gate 	if ((tid = sqp->sq_tid) != 0)
7260Sstevel@tonic-gate 		sqp->sq_tid = 0;
7270Sstevel@tonic-gate 
7280Sstevel@tonic-gate 	sqp->sq_state |= SQS_PROC | proc_type;
7298275SEric Cheng 
7300Sstevel@tonic-gate 	/*
7310Sstevel@tonic-gate 	 * We have backlog built up. Switch to polling mode if the
7328275SEric Cheng 	 * device underneath allows it. Need to do it so that
7338275SEric Cheng 	 * more packets don't come in and disturb us (by contending
7348275SEric Cheng 	 * for sq_lock or higher priority thread preempting us).
7358275SEric Cheng 	 *
7368275SEric Cheng 	 * The worker thread is allowed to do active polling while we
7378275SEric Cheng 	 * just disable the interrupts for drain by non worker (kernel
7388275SEric Cheng 	 * or userland) threads so they can peacefully process the
7398275SEric Cheng 	 * packets during time allocated to them.
7400Sstevel@tonic-gate 	 */
7418275SEric Cheng 	SQS_POLLING_ON(sqp, sq_poll_capable, sq_rx_ring);
7420Sstevel@tonic-gate 	mutex_exit(&sqp->sq_lock);
7430Sstevel@tonic-gate 
7440Sstevel@tonic-gate 	if (tid != 0)
7450Sstevel@tonic-gate 		(void) untimeout(tid);
7468275SEric Cheng 
7470Sstevel@tonic-gate 	while ((mp = head) != NULL) {
7488275SEric Cheng 
7490Sstevel@tonic-gate 		head = mp->b_next;
7500Sstevel@tonic-gate 		mp->b_next = NULL;
7510Sstevel@tonic-gate 
7520Sstevel@tonic-gate 		proc = (sqproc_t)mp->b_queue;
7530Sstevel@tonic-gate 		mp->b_queue = NULL;
7540Sstevel@tonic-gate 		connp = (conn_t *)mp->b_prev;
7550Sstevel@tonic-gate 		mp->b_prev = NULL;
7560Sstevel@tonic-gate 
75711042SErik.Nordmark@Sun.COM 		/* Is there an ip_recv_attr_t to handle? */
75811042SErik.Nordmark@Sun.COM 		if (ip_recv_attr_is_mblk(mp)) {
75911042SErik.Nordmark@Sun.COM 			mblk_t	*attrmp = mp;
76011042SErik.Nordmark@Sun.COM 
76111042SErik.Nordmark@Sun.COM 			ASSERT(attrmp->b_cont != NULL);
76211042SErik.Nordmark@Sun.COM 
76311042SErik.Nordmark@Sun.COM 			mp = attrmp->b_cont;
76411042SErik.Nordmark@Sun.COM 			attrmp->b_cont = NULL;
76511042SErik.Nordmark@Sun.COM 			ASSERT(mp->b_queue == NULL);
76611042SErik.Nordmark@Sun.COM 			ASSERT(mp->b_prev == NULL);
76711042SErik.Nordmark@Sun.COM 
76811042SErik.Nordmark@Sun.COM 			if (!ip_recv_attr_from_mblk(attrmp, &iras)) {
76911042SErik.Nordmark@Sun.COM 				/* The ill or ip_stack_t disappeared on us */
77011042SErik.Nordmark@Sun.COM 				ip_drop_input("ip_recv_attr_from_mblk",
77111042SErik.Nordmark@Sun.COM 				    mp, NULL);
77211042SErik.Nordmark@Sun.COM 				ira_cleanup(&iras, B_TRUE);
77311042SErik.Nordmark@Sun.COM 				CONN_DEC_REF(connp);
77411042SErik.Nordmark@Sun.COM 				continue;
77511042SErik.Nordmark@Sun.COM 			}
77611042SErik.Nordmark@Sun.COM 			ira = &iras;
77711042SErik.Nordmark@Sun.COM 		} else {
77811042SErik.Nordmark@Sun.COM 			ira = NULL;
77911042SErik.Nordmark@Sun.COM 		}
78011042SErik.Nordmark@Sun.COM 
78111042SErik.Nordmark@Sun.COM 
7828275SEric Cheng 		/*
7838275SEric Cheng 		 * Handle squeue switching. More details in the
7848275SEric Cheng 		 * block comment at the top of the file
7858275SEric Cheng 		 */
7868275SEric Cheng 		if (connp->conn_sqp == sqp) {
7878275SEric Cheng 			SQUEUE_DBG_SET(sqp, mp, proc, connp,
7888275SEric Cheng 			    mp->b_tag);
7898275SEric Cheng 			connp->conn_on_sqp = B_TRUE;
7908275SEric Cheng 			DTRACE_PROBE3(squeue__proc__start, squeue_t *,
7918275SEric Cheng 			    sqp, mblk_t *, mp, conn_t *, connp);
79211042SErik.Nordmark@Sun.COM 			(*proc)(connp, mp, sqp, ira);
7938275SEric Cheng 			DTRACE_PROBE2(squeue__proc__end, squeue_t *,
7948275SEric Cheng 			    sqp, conn_t *, connp);
7958275SEric Cheng 			connp->conn_on_sqp = B_FALSE;
7968275SEric Cheng 			CONN_DEC_REF(connp);
7978275SEric Cheng 		} else {
79811042SErik.Nordmark@Sun.COM 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc, connp, ira,
7998275SEric Cheng 			    SQ_FILL, SQTAG_SQUEUE_CHANGE);
8000Sstevel@tonic-gate 		}
80111042SErik.Nordmark@Sun.COM 		if (ira != NULL)
80211042SErik.Nordmark@Sun.COM 			ira_cleanup(ira, B_TRUE);
8030Sstevel@tonic-gate 	}
8040Sstevel@tonic-gate 
8058275SEric Cheng 	SQUEUE_DBG_CLEAR(sqp);
8060Sstevel@tonic-gate 
8070Sstevel@tonic-gate 	mutex_enter(&sqp->sq_lock);
8080Sstevel@tonic-gate 
8098275SEric Cheng 	/*
8108275SEric Cheng 	 * Check if there is still work to do (either more arrived or timer
8118275SEric Cheng 	 * expired). If we are the worker thread and we are polling capable,
8128275SEric Cheng 	 * continue doing the work since no one else is around to do the
8138275SEric Cheng 	 * work anyway (but signal the poll thread to retrieve some packets
8148275SEric Cheng 	 * in the meanwhile). If we are not the worker thread, just
8158275SEric Cheng 	 * signal the worker thread to take up the work if processing time
8168275SEric Cheng 	 * has expired.
8178275SEric Cheng 	 */
8180Sstevel@tonic-gate 	if (sqp->sq_first != NULL) {
8198275SEric Cheng 		/*
8208275SEric Cheng 		 * Still more to process. If time quanta not expired, we
8218275SEric Cheng 		 * should let the drain go on. The worker thread is allowed
8228275SEric Cheng 		 * to drain as long as there is anything left.
8238275SEric Cheng 		 */
824981Sbw 		now = gethrtime();
8258275SEric Cheng 		if ((now < expire) || (proc_type == SQS_WORKER)) {
8268275SEric Cheng 			/*
8278275SEric Cheng 			 * If time not expired or we are worker thread and
8288275SEric Cheng 			 * this squeue is polling capable, continue to do
8298275SEric Cheng 			 * the drain.
8308275SEric Cheng 			 *
8318275SEric Cheng 			 * We turn off interrupts for all userland threads
8328275SEric Cheng 			 * doing drain but we do active polling only for
8338275SEric Cheng 			 * worker thread.
8349883SRajagopal.Kunhappan@Sun.COM 			 *
8359883SRajagopal.Kunhappan@Sun.COM 			 * Calling SQS_POLL_RING() even in the case of
8369883SRajagopal.Kunhappan@Sun.COM 			 * SQS_POLLING_ON() not succeeding is ok as
8379883SRajagopal.Kunhappan@Sun.COM 			 * SQS_POLL_RING() will not wake up poll thread
8389883SRajagopal.Kunhappan@Sun.COM 			 * if SQS_POLLING bit is not set.
8398275SEric Cheng 			 */
8408275SEric Cheng 			if (proc_type == SQS_WORKER)
8419883SRajagopal.Kunhappan@Sun.COM 				SQS_POLL_RING(sqp);
8420Sstevel@tonic-gate 			goto again;
8438275SEric Cheng 		} else {
8448275SEric Cheng 			did_wakeup = B_TRUE;
84511066Srafael.vanoni@sun.com 			sqp->sq_awaken = ddi_get_lbolt();
8468275SEric Cheng 			cv_signal(&sqp->sq_worker_cv);
8470Sstevel@tonic-gate 		}
8480Sstevel@tonic-gate 	}
8490Sstevel@tonic-gate 
8500Sstevel@tonic-gate 	/*
8518275SEric Cheng 	 * If the poll thread is already running, just return. The
8528275SEric Cheng 	 * poll thread continues to hold the proc and will finish
8538275SEric Cheng 	 * processing.
8540Sstevel@tonic-gate 	 */
8558275SEric Cheng 	if (sqp->sq_state & SQS_GET_PKTS) {
8568275SEric Cheng 		ASSERT(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
8578275SEric Cheng 		    SQS_POLL_QUIESCE_DONE)));
8588275SEric Cheng 		sqp->sq_state &= ~proc_type;
8598275SEric Cheng 		return;
8608275SEric Cheng 	}
8610Sstevel@tonic-gate 
8620Sstevel@tonic-gate 	/*
8638275SEric Cheng 	 *
8648275SEric Cheng 	 * If we are the worker thread and no work is left, send the poll
8658275SEric Cheng 	 * thread down once more to see if something arrived. Otherwise,
8668275SEric Cheng 	 * turn the interrupts back on and we are done.
8670Sstevel@tonic-gate 	 */
8689883SRajagopal.Kunhappan@Sun.COM 	if ((proc_type == SQS_WORKER) && (sqp->sq_state & SQS_POLLING)) {
8698275SEric Cheng 		/*
8708275SEric Cheng 		 * Do one last check to see if anything arrived
8718275SEric Cheng 		 * in the NIC. We leave the SQS_PROC set to ensure
8728275SEric Cheng 		 * that poll thread keeps the PROC and can decide
8738275SEric Cheng 		 * if it needs to turn polling off or continue
8748275SEric Cheng 		 * processing.
8758275SEric Cheng 		 *
8768275SEric Cheng 		 * If we drop the SQS_PROC here and poll thread comes
8778275SEric Cheng 		 * up empty handed, it can not safely turn polling off
8788275SEric Cheng 		 * since someone else could have acquired the PROC
8798275SEric Cheng 		 * and started draining. The previously running poll
8808275SEric Cheng 		 * thread and the current thread doing drain would end
8818275SEric Cheng 		 * up in a race for turning polling on/off and more
8828275SEric Cheng 		 * complex code would be required to deal with it.
8838275SEric Cheng 		 *
8848275SEric Cheng 		 * Its lot simpler for drain to hand the SQS_PROC to
8858275SEric Cheng 		 * poll thread (if running) and let poll thread finish
8868275SEric Cheng 		 * without worrying about racing with any other thread.
8878275SEric Cheng 		 */
8888275SEric Cheng 		ASSERT(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
8898275SEric Cheng 		    SQS_POLL_QUIESCE_DONE)));
8909883SRajagopal.Kunhappan@Sun.COM 		SQS_POLL_RING(sqp);
8918275SEric Cheng 		sqp->sq_state &= ~proc_type;
8928275SEric Cheng 	} else {
8938275SEric Cheng 		/*
8949883SRajagopal.Kunhappan@Sun.COM 		 * The squeue is either not capable of polling or the
8959883SRajagopal.Kunhappan@Sun.COM 		 * attempt to blank (i.e., turn SQS_POLLING_ON()) was
8969883SRajagopal.Kunhappan@Sun.COM 		 * unsuccessful or poll thread already finished
8979883SRajagopal.Kunhappan@Sun.COM 		 * processing and didn't find anything. Since there
8989883SRajagopal.Kunhappan@Sun.COM 		 * is nothing queued and we already turn polling on
8999883SRajagopal.Kunhappan@Sun.COM 		 * (for all threads doing drain), we should turn
9009883SRajagopal.Kunhappan@Sun.COM 		 * polling off and relinquish the PROC.
9018275SEric Cheng 		 */
9028275SEric Cheng 		ASSERT(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
9038275SEric Cheng 		    SQS_POLL_QUIESCE_DONE)));
9048275SEric Cheng 		SQS_POLLING_OFF(sqp, sq_poll_capable, sq_rx_ring);
9058275SEric Cheng 		sqp->sq_state &= ~(SQS_PROC | proc_type);
9068275SEric Cheng 		if (!did_wakeup && sqp->sq_first != NULL) {
9078275SEric Cheng 			squeue_worker_wakeup(sqp);
9088275SEric Cheng 			mutex_enter(&sqp->sq_lock);
9098275SEric Cheng 		}
9108275SEric Cheng 		/*
9118275SEric Cheng 		 * If we are not the worker and there is a pending quiesce
9128275SEric Cheng 		 * event, wake up the worker
9138275SEric Cheng 		 */
9148275SEric Cheng 		if ((proc_type != SQS_WORKER) &&
9158275SEric Cheng 		    (sqp->sq_state & SQS_WORKER_THR_CONTROL))
9168275SEric Cheng 			cv_signal(&sqp->sq_worker_cv);
9178275SEric Cheng 	}
9188275SEric Cheng }
9198275SEric Cheng 
9208275SEric Cheng /*
9218275SEric Cheng  * Quiesce, Restart, or Cleanup of the squeue poll thread.
9228275SEric Cheng  *
9238275SEric Cheng  * Quiesce and Restart: After an squeue poll thread has been quiesced, it does
9248275SEric Cheng  * not attempt to poll the underlying soft ring any more. The quiesce is
9258275SEric Cheng  * triggered by the mac layer when it wants to quiesce a soft ring. Typically
9268275SEric Cheng  * control operations such as changing the fanout of a NIC or VNIC (dladm
9278275SEric Cheng  * setlinkprop) need to quiesce data flow before changing the wiring.
9288275SEric Cheng  * The operation is done by the mac layer, but it calls back into IP to
9298275SEric Cheng  * quiesce the soft ring. After completing the operation (say increase or
9308275SEric Cheng  * decrease of the fanout) the mac layer then calls back into IP to restart
9318275SEric Cheng  * the quiesced soft ring.
9328275SEric Cheng  *
9338275SEric Cheng  * Cleanup: This is triggered when the squeue binding to a soft ring is
9348275SEric Cheng  * removed permanently. Typically interface plumb and unplumb would trigger
9358275SEric Cheng  * this. It can also be triggered from the mac layer when a soft ring is
9368275SEric Cheng  * being deleted say as the result of a fanout reduction. Since squeues are
9378275SEric Cheng  * never deleted, the cleanup marks the squeue as fit for recycling and
9388275SEric Cheng  * moves it to the zeroth squeue set.
9398275SEric Cheng  */
9408275SEric Cheng static void
squeue_poll_thr_control(squeue_t * sqp)9418275SEric Cheng squeue_poll_thr_control(squeue_t *sqp)
9428275SEric Cheng {
9438275SEric Cheng 	if (sqp->sq_state & SQS_POLL_THR_RESTART) {
9448275SEric Cheng 		/* Restart implies a previous quiesce */
9458275SEric Cheng 		ASSERT(sqp->sq_state & SQS_POLL_THR_QUIESCED);
9468275SEric Cheng 		sqp->sq_state &= ~(SQS_POLL_THR_QUIESCED |
9478275SEric Cheng 		    SQS_POLL_THR_RESTART);
9488275SEric Cheng 		sqp->sq_state |= SQS_POLL_CAPAB;
9498275SEric Cheng 		cv_signal(&sqp->sq_worker_cv);
9508275SEric Cheng 		return;
9518275SEric Cheng 	}
9528275SEric Cheng 
9538275SEric Cheng 	if (sqp->sq_state & SQS_POLL_THR_QUIESCE) {
9548275SEric Cheng 		sqp->sq_state |= SQS_POLL_THR_QUIESCED;
9558275SEric Cheng 		sqp->sq_state &= ~SQS_POLL_THR_QUIESCE;
9568275SEric Cheng 		cv_signal(&sqp->sq_worker_cv);
9578275SEric Cheng 		return;
9588275SEric Cheng 	}
9598275SEric Cheng }
9608275SEric Cheng 
9618275SEric Cheng /*
9628275SEric Cheng  * POLLING Notes
9638275SEric Cheng  *
9648275SEric Cheng  * With polling mode, we want to do as much processing as we possibly can
9658275SEric Cheng  * in worker thread context. The sweet spot is worker thread keeps doing
9668275SEric Cheng  * work all the time in polling mode and writers etc. keep dumping packets
9678275SEric Cheng  * to worker thread. Occassionally, we send the poll thread (running at
9688275SEric Cheng  * lower priority to NIC to get the chain of packets to feed to worker).
9698275SEric Cheng  * Sending the poll thread down to NIC is dependant on 3 criterions
9708275SEric Cheng  *
9718275SEric Cheng  * 1) Its always driven from squeue_drain and only if worker thread is
9728275SEric Cheng  *	doing the drain.
9738275SEric Cheng  * 2) We clear the backlog once and more packets arrived in between.
9748275SEric Cheng  *	Before starting drain again, send the poll thread down if
9758275SEric Cheng  *	the drain is being done by worker thread.
9768275SEric Cheng  * 3) Before exiting the squeue_drain, if the poll thread is not already
9778275SEric Cheng  *	working and we are the worker thread, try to poll one more time.
9788275SEric Cheng  *
9798275SEric Cheng  * For latency sake, we do allow any thread calling squeue_enter
9808275SEric Cheng  * to process its packet provided:
9818275SEric Cheng  *
9828275SEric Cheng  * 1) Nothing is queued
9838275SEric Cheng  * 2) If more packets arrived in between, the non worker thread are allowed
9848275SEric Cheng  *	to do the drain till their time quanta expired provided SQS_GET_PKTS
9858275SEric Cheng  *	wasn't set in between.
9868275SEric Cheng  *
9878275SEric Cheng  * Avoiding deadlocks with interrupts
9888275SEric Cheng  * ==================================
9898275SEric Cheng  *
9908275SEric Cheng  * One of the big problem is that we can't send poll_thr down while holding
9918275SEric Cheng  * the sq_lock since the thread can block. So we drop the sq_lock before
9928275SEric Cheng  * calling sq_get_pkts(). We keep holding the SQS_PROC as long as the
9938275SEric Cheng  * poll thread is running so that no other thread can acquire the
9948275SEric Cheng  * perimeter in between. If the squeue_drain gets done (no more work
9958275SEric Cheng  * left), it leaves the SQS_PROC set if poll thread is running.
9968275SEric Cheng  */
9978275SEric Cheng 
9988275SEric Cheng /*
9998275SEric Cheng  * This is the squeue poll thread. In poll mode, it polls the underlying
10008275SEric Cheng  * TCP softring and feeds packets into the squeue. The worker thread then
10018275SEric Cheng  * drains the squeue. The poll thread also responds to control signals for
10028275SEric Cheng  * quiesceing, restarting, or cleanup of an squeue. These are driven by
10038275SEric Cheng  * control operations like plumb/unplumb or as a result of dynamic Rx ring
10048275SEric Cheng  * related operations that are driven from the mac layer.
10058275SEric Cheng  */
10068275SEric Cheng static void
squeue_polling_thread(squeue_t * sqp)10078275SEric Cheng squeue_polling_thread(squeue_t *sqp)
10088275SEric Cheng {
10098275SEric Cheng 	kmutex_t *lock = &sqp->sq_lock;
10108275SEric Cheng 	kcondvar_t *async = &sqp->sq_poll_cv;
10118275SEric Cheng 	ip_mac_rx_t sq_get_pkts;
10128275SEric Cheng 	ip_accept_t ip_accept;
10138275SEric Cheng 	ill_rx_ring_t *sq_rx_ring;
10148275SEric Cheng 	ill_t *sq_ill;
10158275SEric Cheng 	mblk_t *head, *tail, *mp;
10168275SEric Cheng 	uint_t cnt;
10178275SEric Cheng 	void *sq_mac_handle;
10188275SEric Cheng 	callb_cpr_t cprinfo;
10198275SEric Cheng 	size_t bytes_to_pickup;
10208275SEric Cheng 	uint32_t ctl_state;
10218275SEric Cheng 
10228275SEric Cheng 	CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, "sq_poll");
10238275SEric Cheng 	mutex_enter(lock);
10248275SEric Cheng 
10258275SEric Cheng 	for (;;) {
10268275SEric Cheng 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
10278275SEric Cheng 		cv_wait(async, lock);
10288275SEric Cheng 		CALLB_CPR_SAFE_END(&cprinfo, lock);
10298275SEric Cheng 
10308275SEric Cheng 		ctl_state = sqp->sq_state & (SQS_POLL_THR_CONTROL |
10318275SEric Cheng 		    SQS_POLL_THR_QUIESCED);
10328275SEric Cheng 		if (ctl_state != 0) {
10338275SEric Cheng 			/*
10348275SEric Cheng 			 * If the squeue is quiesced, then wait for a control
10358275SEric Cheng 			 * request. A quiesced squeue must not poll the
10368275SEric Cheng 			 * underlying soft ring.
10378275SEric Cheng 			 */
10388275SEric Cheng 			if (ctl_state == SQS_POLL_THR_QUIESCED)
10398275SEric Cheng 				continue;
10408275SEric Cheng 			/*
10418275SEric Cheng 			 * Act on control requests to quiesce, cleanup or
10428275SEric Cheng 			 * restart an squeue
10438275SEric Cheng 			 */
10448275SEric Cheng 			squeue_poll_thr_control(sqp);
10458275SEric Cheng 			continue;
10468275SEric Cheng 		}
10478275SEric Cheng 
10488275SEric Cheng 		if (!(sqp->sq_state & SQS_POLL_CAPAB))
10498275SEric Cheng 			continue;
10508275SEric Cheng 
10518275SEric Cheng 		ASSERT((sqp->sq_state &
10528275SEric Cheng 		    (SQS_PROC|SQS_POLLING|SQS_GET_PKTS)) ==
10538275SEric Cheng 		    (SQS_PROC|SQS_POLLING|SQS_GET_PKTS));
10548275SEric Cheng 
10558275SEric Cheng poll_again:
10568275SEric Cheng 		sq_rx_ring = sqp->sq_rx_ring;
10578275SEric Cheng 		sq_get_pkts = sq_rx_ring->rr_rx;
10588275SEric Cheng 		sq_mac_handle = sq_rx_ring->rr_rx_handle;
10598275SEric Cheng 		ip_accept = sq_rx_ring->rr_ip_accept;
10608275SEric Cheng 		sq_ill = sq_rx_ring->rr_ill;
10618275SEric Cheng 		bytes_to_pickup = MAX_BYTES_TO_PICKUP;
10628275SEric Cheng 		mutex_exit(lock);
10638275SEric Cheng 		head = sq_get_pkts(sq_mac_handle, bytes_to_pickup);
10648275SEric Cheng 		mp = NULL;
10658275SEric Cheng 		if (head != NULL) {
10668275SEric Cheng 			/*
10678275SEric Cheng 			 * We got the packet chain from the mac layer. It
10688275SEric Cheng 			 * would be nice to be able to process it inline
10698275SEric Cheng 			 * for better performance but we need to give
10708275SEric Cheng 			 * IP a chance to look at this chain to ensure
10718275SEric Cheng 			 * that packets are really meant for this squeue
10728275SEric Cheng 			 * and do the IP processing.
10738275SEric Cheng 			 */
10748275SEric Cheng 			mp = ip_accept(sq_ill, sq_rx_ring, sqp, head,
10758275SEric Cheng 			    &tail, &cnt);
10768275SEric Cheng 		}
10778275SEric Cheng 		mutex_enter(lock);
107811042SErik.Nordmark@Sun.COM 		if (mp != NULL) {
107911042SErik.Nordmark@Sun.COM 			/*
108011042SErik.Nordmark@Sun.COM 			 * The ip_accept function has already added an
108111042SErik.Nordmark@Sun.COM 			 * ip_recv_attr_t mblk if that is needed.
108211042SErik.Nordmark@Sun.COM 			 */
10838275SEric Cheng 			ENQUEUE_CHAIN(sqp, mp, tail, cnt);
108411042SErik.Nordmark@Sun.COM 		}
10858275SEric Cheng 		ASSERT((sqp->sq_state &
10868275SEric Cheng 		    (SQS_PROC|SQS_POLLING|SQS_GET_PKTS)) ==
10878275SEric Cheng 		    (SQS_PROC|SQS_POLLING|SQS_GET_PKTS));
10888275SEric Cheng 
10898275SEric Cheng 		if (sqp->sq_first != NULL && !(sqp->sq_state & SQS_WORKER)) {
10908275SEric Cheng 			/*
10918275SEric Cheng 			 * We have packets to process and worker thread
10928275SEric Cheng 			 * is not running.  Check to see if poll thread is
10938275SEric Cheng 			 * allowed to process. Let it do processing only if it
10948275SEric Cheng 			 * picked up some packets from the NIC otherwise
10958275SEric Cheng 			 * wakeup the worker thread.
10968275SEric Cheng 			 */
10978275SEric Cheng 			if (mp != NULL) {
10988275SEric Cheng 				hrtime_t  now;
10998275SEric Cheng 
11008275SEric Cheng 				now = gethrtime();
11018275SEric Cheng 				sqp->sq_run = curthread;
11028275SEric Cheng 				sqp->sq_drain(sqp, SQS_POLL_PROC, now +
11038275SEric Cheng 				    squeue_drain_ns);
11048275SEric Cheng 				sqp->sq_run = NULL;
11058275SEric Cheng 
11068275SEric Cheng 				if (sqp->sq_first == NULL)
11078275SEric Cheng 					goto poll_again;
11088275SEric Cheng 
11098275SEric Cheng 				/*
11108275SEric Cheng 				 * Couldn't do the entire drain because the
11118275SEric Cheng 				 * time limit expired, let the
11128275SEric Cheng 				 * worker thread take over.
11138275SEric Cheng 				 */
11148275SEric Cheng 			}
11158275SEric Cheng 
111611066Srafael.vanoni@sun.com 			sqp->sq_awaken = ddi_get_lbolt();
11178275SEric Cheng 			/*
11188275SEric Cheng 			 * Put the SQS_PROC_HELD on so the worker
11198275SEric Cheng 			 * thread can distinguish where its called from. We
11208275SEric Cheng 			 * can remove the SQS_PROC flag here and turn off the
11218275SEric Cheng 			 * polling so that it wouldn't matter who gets the
11228275SEric Cheng 			 * processing but we get better performance this way
11238275SEric Cheng 			 * and save the cost of turn polling off and possibly
11248275SEric Cheng 			 * on again as soon as we start draining again.
11258275SEric Cheng 			 *
11268275SEric Cheng 			 * We can't remove the SQS_PROC flag without turning
11278275SEric Cheng 			 * polling off until we can guarantee that control
11288275SEric Cheng 			 * will return to squeue_drain immediately.
11298275SEric Cheng 			 */
11308275SEric Cheng 			sqp->sq_state |= SQS_PROC_HELD;
11318275SEric Cheng 			sqp->sq_state &= ~SQS_GET_PKTS;
11328275SEric Cheng 			cv_signal(&sqp->sq_worker_cv);
11338275SEric Cheng 		} else if (sqp->sq_first == NULL &&
11348275SEric Cheng 		    !(sqp->sq_state & SQS_WORKER)) {
11358275SEric Cheng 			/*
11368275SEric Cheng 			 * Nothing queued and worker thread not running.
11378275SEric Cheng 			 * Since we hold the proc, no other thread is
11388275SEric Cheng 			 * processing the squeue. This means that there
11398275SEric Cheng 			 * is no work to be done and nothing is queued
11408275SEric Cheng 			 * in squeue or in NIC. Turn polling off and go
11418275SEric Cheng 			 * back to interrupt mode.
11428275SEric Cheng 			 */
11438275SEric Cheng 			sqp->sq_state &= ~(SQS_PROC|SQS_GET_PKTS);
11448275SEric Cheng 			/* LINTED: constant in conditional context */
11458275SEric Cheng 			SQS_POLLING_OFF(sqp, B_TRUE, sq_rx_ring);
11469979SThirumalai.Srinivasan@Sun.COM 
11479979SThirumalai.Srinivasan@Sun.COM 			/*
11489979SThirumalai.Srinivasan@Sun.COM 			 * If there is a pending control operation
11499979SThirumalai.Srinivasan@Sun.COM 			 * wake up the worker, since it is currently
11509979SThirumalai.Srinivasan@Sun.COM 			 * not running.
11519979SThirumalai.Srinivasan@Sun.COM 			 */
11529979SThirumalai.Srinivasan@Sun.COM 			if (sqp->sq_state & SQS_WORKER_THR_CONTROL)
11539979SThirumalai.Srinivasan@Sun.COM 				cv_signal(&sqp->sq_worker_cv);
11548275SEric Cheng 		} else {
11558275SEric Cheng 			/*
11568275SEric Cheng 			 * Worker thread is already running. We don't need
11578275SEric Cheng 			 * to do anything. Indicate that poll thread is done.
11588275SEric Cheng 			 */
11598275SEric Cheng 			sqp->sq_state &= ~SQS_GET_PKTS;
11608275SEric Cheng 		}
11618275SEric Cheng 		if (sqp->sq_state & SQS_POLL_THR_CONTROL) {
11628275SEric Cheng 			/*
11638275SEric Cheng 			 * Act on control requests to quiesce, cleanup or
11648275SEric Cheng 			 * restart an squeue
11658275SEric Cheng 			 */
11668275SEric Cheng 			squeue_poll_thr_control(sqp);
11678275SEric Cheng 		}
11688275SEric Cheng 	}
11698275SEric Cheng }
11708275SEric Cheng 
11718275SEric Cheng /*
11728275SEric Cheng  * The squeue worker thread acts on any control requests to quiesce, cleanup
11738275SEric Cheng  * or restart an ill_rx_ring_t by calling this function. The worker thread
11748275SEric Cheng  * synchronizes with the squeue poll thread to complete the request and finally
11758275SEric Cheng  * wakes up the requestor when the request is completed.
11768275SEric Cheng  */
11778275SEric Cheng static void
squeue_worker_thr_control(squeue_t * sqp)11788275SEric Cheng squeue_worker_thr_control(squeue_t *sqp)
11798275SEric Cheng {
11808275SEric Cheng 	ill_t	*ill;
11818275SEric Cheng 	ill_rx_ring_t	*rx_ring;
11828275SEric Cheng 
11838275SEric Cheng 	ASSERT(MUTEX_HELD(&sqp->sq_lock));
11848275SEric Cheng 
11858275SEric Cheng 	if (sqp->sq_state & SQS_POLL_RESTART) {
11868275SEric Cheng 		/* Restart implies a previous quiesce. */
11878275SEric Cheng 		ASSERT((sqp->sq_state & (SQS_PROC_HELD |
11888275SEric Cheng 		    SQS_POLL_QUIESCE_DONE | SQS_PROC | SQS_WORKER)) ==
11898275SEric Cheng 		    (SQS_POLL_QUIESCE_DONE | SQS_PROC | SQS_WORKER));
11908275SEric Cheng 		/*
11918275SEric Cheng 		 * Request the squeue poll thread to restart and wait till
11928275SEric Cheng 		 * it actually restarts.
11938275SEric Cheng 		 */
11948275SEric Cheng 		sqp->sq_state &= ~SQS_POLL_QUIESCE_DONE;
11958275SEric Cheng 		sqp->sq_state |= SQS_POLL_THR_RESTART;
11968275SEric Cheng 		cv_signal(&sqp->sq_poll_cv);
11978275SEric Cheng 		while (sqp->sq_state & SQS_POLL_THR_QUIESCED)
11988275SEric Cheng 			cv_wait(&sqp->sq_worker_cv, &sqp->sq_lock);
11998275SEric Cheng 		sqp->sq_state &= ~(SQS_POLL_RESTART | SQS_PROC |
12008275SEric Cheng 		    SQS_WORKER);
12018275SEric Cheng 		/*
12028275SEric Cheng 		 * Signal any waiter that is waiting for the restart
12038275SEric Cheng 		 * to complete
12048275SEric Cheng 		 */
12058275SEric Cheng 		sqp->sq_state |= SQS_POLL_RESTART_DONE;
12068275SEric Cheng 		cv_signal(&sqp->sq_ctrlop_done_cv);
12078275SEric Cheng 		return;
12088275SEric Cheng 	}
12098275SEric Cheng 
12108275SEric Cheng 	if (sqp->sq_state & SQS_PROC_HELD) {
12118275SEric Cheng 		/* The squeue poll thread handed control to us */
12128275SEric Cheng 		ASSERT(sqp->sq_state & SQS_PROC);
12138275SEric Cheng 	}
12148275SEric Cheng 
12158275SEric Cheng 	/*
12168275SEric Cheng 	 * Prevent any other thread from processing the squeue
12178275SEric Cheng 	 * until we finish the control actions by setting SQS_PROC.
12188275SEric Cheng 	 * But allow ourself to reenter by setting SQS_WORKER
12198275SEric Cheng 	 */
12208275SEric Cheng 	sqp->sq_state |= (SQS_PROC | SQS_WORKER);
12218275SEric Cheng 
12228275SEric Cheng 	/* Signal the squeue poll thread and wait for it to quiesce itself */
12238275SEric Cheng 	if (!(sqp->sq_state & SQS_POLL_THR_QUIESCED)) {
12248275SEric Cheng 		sqp->sq_state |= SQS_POLL_THR_QUIESCE;
12258275SEric Cheng 		cv_signal(&sqp->sq_poll_cv);
12268275SEric Cheng 		while (!(sqp->sq_state & SQS_POLL_THR_QUIESCED))
12278275SEric Cheng 			cv_wait(&sqp->sq_worker_cv, &sqp->sq_lock);
12288275SEric Cheng 	}
12298275SEric Cheng 
12308275SEric Cheng 	rx_ring = sqp->sq_rx_ring;
12318275SEric Cheng 	ill = rx_ring->rr_ill;
12328275SEric Cheng 	/*
12338275SEric Cheng 	 * The lock hierarchy is as follows.
12348275SEric Cheng 	 * cpu_lock -> ill_lock -> sqset_lock -> sq_lock
12358275SEric Cheng 	 */
12368275SEric Cheng 	mutex_exit(&sqp->sq_lock);
12378275SEric Cheng 	mutex_enter(&ill->ill_lock);
12388275SEric Cheng 	mutex_enter(&sqp->sq_lock);
12398275SEric Cheng 
12408275SEric Cheng 	SQS_POLLING_OFF(sqp, (sqp->sq_state & SQS_POLL_CAPAB) != 0,
12418275SEric Cheng 	    sqp->sq_rx_ring);
12428275SEric Cheng 	sqp->sq_state &= ~(SQS_POLL_CAPAB | SQS_GET_PKTS | SQS_PROC_HELD);
12438275SEric Cheng 	if (sqp->sq_state & SQS_POLL_CLEANUP) {
12448275SEric Cheng 		/*
12458275SEric Cheng 		 * Disassociate this squeue from its ill_rx_ring_t.
12468275SEric Cheng 		 * The rr_sqp, sq_rx_ring fields are protected by the
12478275SEric Cheng 		 * corresponding squeue, ill_lock* and sq_lock. Holding any
12488275SEric Cheng 		 * of them will ensure that the ring to squeue mapping does
12498275SEric Cheng 		 * not change.
12508275SEric Cheng 		 */
12518275SEric Cheng 		ASSERT(!(sqp->sq_state & SQS_DEFAULT));
12528275SEric Cheng 
12538275SEric Cheng 		sqp->sq_rx_ring = NULL;
12548275SEric Cheng 		rx_ring->rr_sqp = NULL;
12558275SEric Cheng 
12568275SEric Cheng 		sqp->sq_state &= ~(SQS_POLL_CLEANUP | SQS_POLL_THR_QUIESCED |
12578275SEric Cheng 		    SQS_POLL_QUIESCE_DONE);
12588275SEric Cheng 		sqp->sq_ill = NULL;
12598275SEric Cheng 
12608275SEric Cheng 		rx_ring->rr_rx_handle = NULL;
12618275SEric Cheng 		rx_ring->rr_intr_handle = NULL;
12628275SEric Cheng 		rx_ring->rr_intr_enable = NULL;
12638275SEric Cheng 		rx_ring->rr_intr_disable = NULL;
12648275SEric Cheng 		sqp->sq_state |= SQS_POLL_CLEANUP_DONE;
12658275SEric Cheng 	} else {
12668275SEric Cheng 		sqp->sq_state &= ~SQS_POLL_QUIESCE;
12678275SEric Cheng 		sqp->sq_state |= SQS_POLL_QUIESCE_DONE;
12688275SEric Cheng 	}
12698275SEric Cheng 	/*
12708275SEric Cheng 	 * Signal any waiter that is waiting for the quiesce or cleanup
12718275SEric Cheng 	 * to complete and also wait for it to actually see and reset the
12728275SEric Cheng 	 * SQS_POLL_CLEANUP_DONE.
12738275SEric Cheng 	 */
12748275SEric Cheng 	cv_signal(&sqp->sq_ctrlop_done_cv);
12758275SEric Cheng 	mutex_exit(&ill->ill_lock);
12768275SEric Cheng 	if (sqp->sq_state & SQS_POLL_CLEANUP_DONE) {
12778275SEric Cheng 		cv_wait(&sqp->sq_worker_cv, &sqp->sq_lock);
12788275SEric Cheng 		sqp->sq_state &= ~(SQS_PROC | SQS_WORKER);
12790Sstevel@tonic-gate 	}
12800Sstevel@tonic-gate }
12810Sstevel@tonic-gate 
12820Sstevel@tonic-gate static void
squeue_worker(squeue_t * sqp)12830Sstevel@tonic-gate squeue_worker(squeue_t *sqp)
12840Sstevel@tonic-gate {
12850Sstevel@tonic-gate 	kmutex_t *lock = &sqp->sq_lock;
12868275SEric Cheng 	kcondvar_t *async = &sqp->sq_worker_cv;
12870Sstevel@tonic-gate 	callb_cpr_t cprinfo;
1288981Sbw 	hrtime_t now;
12890Sstevel@tonic-gate 
12908275SEric Cheng 	CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, "sq_worker");
12910Sstevel@tonic-gate 	mutex_enter(lock);
12920Sstevel@tonic-gate 
12930Sstevel@tonic-gate 	for (;;) {
12948275SEric Cheng 		for (;;) {
12958275SEric Cheng 			/*
12968275SEric Cheng 			 * If the poll thread has handed control to us
12978275SEric Cheng 			 * we need to break out of the wait.
12988275SEric Cheng 			 */
12998275SEric Cheng 			if (sqp->sq_state & SQS_PROC_HELD)
13008275SEric Cheng 				break;
13010Sstevel@tonic-gate 
13020Sstevel@tonic-gate 			/*
13038275SEric Cheng 			 * If the squeue is not being processed and we either
13048275SEric Cheng 			 * have messages to drain or some thread has signaled
13058275SEric Cheng 			 * some control activity we need to break
13060Sstevel@tonic-gate 			 */
13078275SEric Cheng 			if (!(sqp->sq_state & SQS_PROC) &&
13088275SEric Cheng 			    ((sqp->sq_state & SQS_WORKER_THR_CONTROL) ||
13098275SEric Cheng 			    (sqp->sq_first != NULL)))
13108275SEric Cheng 				break;
13110Sstevel@tonic-gate 
13128275SEric Cheng 			/*
13138275SEric Cheng 			 * If we have started some control action, then check
13148275SEric Cheng 			 * for the SQS_WORKER flag (since we don't
13158275SEric Cheng 			 * release the squeue) to make sure we own the squeue
13168275SEric Cheng 			 * and break out
13178275SEric Cheng 			 */
13188275SEric Cheng 			if ((sqp->sq_state & SQS_WORKER_THR_CONTROL) &&
13198275SEric Cheng 			    (sqp->sq_state & SQS_WORKER))
13208275SEric Cheng 				break;
13218275SEric Cheng 
13220Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
13230Sstevel@tonic-gate 			cv_wait(async, lock);
13240Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cprinfo, lock);
13250Sstevel@tonic-gate 		}
13268275SEric Cheng 		if (sqp->sq_state & SQS_WORKER_THR_CONTROL) {
13278275SEric Cheng 			squeue_worker_thr_control(sqp);
13288275SEric Cheng 			continue;
13290Sstevel@tonic-gate 		}
13308275SEric Cheng 		ASSERT(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
13318275SEric Cheng 		    SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE |
13328275SEric Cheng 		    SQS_WORKER_THR_CONTROL | SQS_POLL_THR_CONTROL)));
13330Sstevel@tonic-gate 
13348275SEric Cheng 		if (sqp->sq_state & SQS_PROC_HELD)
13358275SEric Cheng 			sqp->sq_state &= ~SQS_PROC_HELD;
13360Sstevel@tonic-gate 
13378275SEric Cheng 		now = gethrtime();
13388275SEric Cheng 		sqp->sq_run = curthread;
13398275SEric Cheng 		sqp->sq_drain(sqp, SQS_WORKER, now +  squeue_drain_ns);
13408275SEric Cheng 		sqp->sq_run = NULL;
13418275SEric Cheng 	}
13420Sstevel@tonic-gate }
13430Sstevel@tonic-gate 
13440Sstevel@tonic-gate uintptr_t *
squeue_getprivate(squeue_t * sqp,sqprivate_t p)13450Sstevel@tonic-gate squeue_getprivate(squeue_t *sqp, sqprivate_t p)
13460Sstevel@tonic-gate {
13470Sstevel@tonic-gate 	ASSERT(p < SQPRIVATE_MAX);
13480Sstevel@tonic-gate 
13490Sstevel@tonic-gate 	return (&sqp->sq_private[p]);
13500Sstevel@tonic-gate }
13518348SEric.Yu@Sun.COM 
13528348SEric.Yu@Sun.COM /* ARGSUSED */
13538348SEric.Yu@Sun.COM void
squeue_wakeup_conn(void * arg,mblk_t * mp,void * arg2,ip_recv_attr_t * dummy)135411042SErik.Nordmark@Sun.COM squeue_wakeup_conn(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
13558348SEric.Yu@Sun.COM {
13568348SEric.Yu@Sun.COM 	conn_t *connp = (conn_t *)arg;
13578348SEric.Yu@Sun.COM 	squeue_t *sqp = connp->conn_sqp;
13588348SEric.Yu@Sun.COM 
13598348SEric.Yu@Sun.COM 	/*
13608348SEric.Yu@Sun.COM 	 * Mark the squeue as paused before waking up the thread stuck
13618348SEric.Yu@Sun.COM 	 * in squeue_synch_enter().
13628348SEric.Yu@Sun.COM 	 */
13638348SEric.Yu@Sun.COM 	mutex_enter(&sqp->sq_lock);
13648348SEric.Yu@Sun.COM 	sqp->sq_state |= SQS_PAUSE;
13658348SEric.Yu@Sun.COM 
13668348SEric.Yu@Sun.COM 	/*
13678348SEric.Yu@Sun.COM 	 * Notify the thread that it's OK to proceed; that is done by
13688348SEric.Yu@Sun.COM 	 * clearing the MSGWAITSYNC flag. The synch thread will free the mblk.
13698348SEric.Yu@Sun.COM 	 */
13708348SEric.Yu@Sun.COM 	ASSERT(mp->b_flag & MSGWAITSYNC);
13718348SEric.Yu@Sun.COM 	mp->b_flag &= ~MSGWAITSYNC;
13728348SEric.Yu@Sun.COM 	cv_broadcast(&connp->conn_sq_cv);
13738348SEric.Yu@Sun.COM 
13748348SEric.Yu@Sun.COM 	/*
13758348SEric.Yu@Sun.COM 	 * We are doing something on behalf of another thread, so we have to
13768348SEric.Yu@Sun.COM 	 * pause and wait until it finishes.
13778348SEric.Yu@Sun.COM 	 */
13788348SEric.Yu@Sun.COM 	while (sqp->sq_state & SQS_PAUSE) {
13798348SEric.Yu@Sun.COM 		cv_wait(&sqp->sq_synch_cv, &sqp->sq_lock);
13808348SEric.Yu@Sun.COM 	}
13818348SEric.Yu@Sun.COM 	mutex_exit(&sqp->sq_lock);
13828348SEric.Yu@Sun.COM }
13838348SEric.Yu@Sun.COM 
13848348SEric.Yu@Sun.COM int
squeue_synch_enter(conn_t * connp,mblk_t * use_mp)1385*12504SAnders.Persson@Sun.COM squeue_synch_enter(conn_t *connp, mblk_t *use_mp)
13868348SEric.Yu@Sun.COM {
1387*12504SAnders.Persson@Sun.COM 	squeue_t *sqp;
1388*12504SAnders.Persson@Sun.COM 
1389*12504SAnders.Persson@Sun.COM again:
1390*12504SAnders.Persson@Sun.COM 	sqp = connp->conn_sqp;
1391*12504SAnders.Persson@Sun.COM 
13928348SEric.Yu@Sun.COM 	mutex_enter(&sqp->sq_lock);
13938348SEric.Yu@Sun.COM 	if (sqp->sq_first == NULL && !(sqp->sq_state & SQS_PROC)) {
13948348SEric.Yu@Sun.COM 		/*
13958348SEric.Yu@Sun.COM 		 * We are OK to proceed if the squeue is empty, and
13968348SEric.Yu@Sun.COM 		 * no one owns the squeue.
13978348SEric.Yu@Sun.COM 		 *
13988348SEric.Yu@Sun.COM 		 * The caller won't own the squeue as this is called from the
13998348SEric.Yu@Sun.COM 		 * application.
14008348SEric.Yu@Sun.COM 		 */
14018348SEric.Yu@Sun.COM 		ASSERT(sqp->sq_run == NULL);
14028348SEric.Yu@Sun.COM 
14038348SEric.Yu@Sun.COM 		sqp->sq_state |= SQS_PROC;
14048348SEric.Yu@Sun.COM 		sqp->sq_run = curthread;
14058348SEric.Yu@Sun.COM 		mutex_exit(&sqp->sq_lock);
14068348SEric.Yu@Sun.COM 
1407*12504SAnders.Persson@Sun.COM 		/*
1408*12504SAnders.Persson@Sun.COM 		 * Handle squeue switching. The conn's squeue can only change
1409*12504SAnders.Persson@Sun.COM 		 * while there is a thread in the squeue, which is why we do
1410*12504SAnders.Persson@Sun.COM 		 * the check after entering the squeue. If it has changed, exit
1411*12504SAnders.Persson@Sun.COM 		 * this squeue and redo everything with the new sqeueue.
1412*12504SAnders.Persson@Sun.COM 		 */
1413*12504SAnders.Persson@Sun.COM 		if (sqp != connp->conn_sqp) {
1414*12504SAnders.Persson@Sun.COM 			mutex_enter(&sqp->sq_lock);
1415*12504SAnders.Persson@Sun.COM 			sqp->sq_state &= ~SQS_PROC;
1416*12504SAnders.Persson@Sun.COM 			sqp->sq_run = NULL;
1417*12504SAnders.Persson@Sun.COM 			mutex_exit(&sqp->sq_lock);
1418*12504SAnders.Persson@Sun.COM 			goto again;
1419*12504SAnders.Persson@Sun.COM 		}
14208348SEric.Yu@Sun.COM #if SQUEUE_DEBUG
14218348SEric.Yu@Sun.COM 		sqp->sq_curmp = NULL;
14228348SEric.Yu@Sun.COM 		sqp->sq_curproc = NULL;
14238348SEric.Yu@Sun.COM 		sqp->sq_connp = connp;
14248348SEric.Yu@Sun.COM #endif
14258348SEric.Yu@Sun.COM 		connp->conn_on_sqp = B_TRUE;
14268348SEric.Yu@Sun.COM 		return (0);
14278348SEric.Yu@Sun.COM 	} else {
14288348SEric.Yu@Sun.COM 		mblk_t  *mp;
14298348SEric.Yu@Sun.COM 
14309534SAnders.Persson@Sun.COM 		mp = (use_mp == NULL) ? allocb(0, BPRI_MED) : use_mp;
14318348SEric.Yu@Sun.COM 		if (mp == NULL) {
14328348SEric.Yu@Sun.COM 			mutex_exit(&sqp->sq_lock);
14338348SEric.Yu@Sun.COM 			return (ENOMEM);
14348348SEric.Yu@Sun.COM 		}
14358348SEric.Yu@Sun.COM 
14368348SEric.Yu@Sun.COM 		/*
14378348SEric.Yu@Sun.COM 		 * We mark the mblk as awaiting synchronous squeue access
14388348SEric.Yu@Sun.COM 		 * by setting the MSGWAITSYNC flag. Once squeue_wakeup_conn
14398348SEric.Yu@Sun.COM 		 * fires, MSGWAITSYNC is cleared, at which point we know we
14408348SEric.Yu@Sun.COM 		 * have exclusive access.
14418348SEric.Yu@Sun.COM 		 */
14428348SEric.Yu@Sun.COM 		mp->b_flag |= MSGWAITSYNC;
14438348SEric.Yu@Sun.COM 
14448348SEric.Yu@Sun.COM 		CONN_INC_REF(connp);
14458348SEric.Yu@Sun.COM 		SET_SQUEUE(mp, squeue_wakeup_conn, connp);
14468348SEric.Yu@Sun.COM 		ENQUEUE_CHAIN(sqp, mp, mp, 1);
14478348SEric.Yu@Sun.COM 
14488348SEric.Yu@Sun.COM 		ASSERT(sqp->sq_run != curthread);
14498348SEric.Yu@Sun.COM 
14508348SEric.Yu@Sun.COM 		/* Wait until the enqueued mblk get processed. */
14518348SEric.Yu@Sun.COM 		while (mp->b_flag & MSGWAITSYNC)
14528348SEric.Yu@Sun.COM 			cv_wait(&connp->conn_sq_cv, &sqp->sq_lock);
14538348SEric.Yu@Sun.COM 		mutex_exit(&sqp->sq_lock);
14548348SEric.Yu@Sun.COM 
14559534SAnders.Persson@Sun.COM 		if (use_mp == NULL)
14569534SAnders.Persson@Sun.COM 			freeb(mp);
14578348SEric.Yu@Sun.COM 
14588348SEric.Yu@Sun.COM 		return (0);
14598348SEric.Yu@Sun.COM 	}
14608348SEric.Yu@Sun.COM }
14618348SEric.Yu@Sun.COM 
14628348SEric.Yu@Sun.COM void
squeue_synch_exit(conn_t * connp)1463*12504SAnders.Persson@Sun.COM squeue_synch_exit(conn_t *connp)
14648348SEric.Yu@Sun.COM {
1465*12504SAnders.Persson@Sun.COM 	squeue_t *sqp = connp->conn_sqp;
1466*12504SAnders.Persson@Sun.COM 
14678348SEric.Yu@Sun.COM 	mutex_enter(&sqp->sq_lock);
14688348SEric.Yu@Sun.COM 	if (sqp->sq_run == curthread) {
14698348SEric.Yu@Sun.COM 		ASSERT(sqp->sq_state & SQS_PROC);
14708348SEric.Yu@Sun.COM 
14718348SEric.Yu@Sun.COM 		sqp->sq_state &= ~SQS_PROC;
14728348SEric.Yu@Sun.COM 		sqp->sq_run = NULL;
14738348SEric.Yu@Sun.COM 		connp->conn_on_sqp = B_FALSE;
14748348SEric.Yu@Sun.COM 
14758348SEric.Yu@Sun.COM 		if (sqp->sq_first == NULL) {
14768348SEric.Yu@Sun.COM 			mutex_exit(&sqp->sq_lock);
14778348SEric.Yu@Sun.COM 		} else {
14788348SEric.Yu@Sun.COM 			/*
14798348SEric.Yu@Sun.COM 			 * If this was a normal thread, then it would
14808348SEric.Yu@Sun.COM 			 * (most likely) continue processing the pending
14818348SEric.Yu@Sun.COM 			 * requests. Since the just completed operation
14828348SEric.Yu@Sun.COM 			 * was executed synchronously, the thread should
14838348SEric.Yu@Sun.COM 			 * not be delayed. To compensate, wake up the
14848348SEric.Yu@Sun.COM 			 * worker thread right away when there are outstanding
14858348SEric.Yu@Sun.COM 			 * requests.
14868348SEric.Yu@Sun.COM 			 */
148711066Srafael.vanoni@sun.com 			sqp->sq_awaken = ddi_get_lbolt();
14888348SEric.Yu@Sun.COM 			cv_signal(&sqp->sq_worker_cv);
14898348SEric.Yu@Sun.COM 			mutex_exit(&sqp->sq_lock);
14908348SEric.Yu@Sun.COM 		}
14918348SEric.Yu@Sun.COM 	} else {
14928348SEric.Yu@Sun.COM 		/*
14938348SEric.Yu@Sun.COM 		 * The caller doesn't own the squeue, clear the SQS_PAUSE flag,
14948348SEric.Yu@Sun.COM 		 * and wake up the squeue owner, such that owner can continue
14958348SEric.Yu@Sun.COM 		 * processing.
14968348SEric.Yu@Sun.COM 		 */
14978348SEric.Yu@Sun.COM 		ASSERT(sqp->sq_state & SQS_PAUSE);
14988348SEric.Yu@Sun.COM 		sqp->sq_state &= ~SQS_PAUSE;
14998348SEric.Yu@Sun.COM 
15008348SEric.Yu@Sun.COM 		/* There should be only one thread blocking on sq_synch_cv. */
15018348SEric.Yu@Sun.COM 		cv_signal(&sqp->sq_synch_cv);
15028348SEric.Yu@Sun.COM 		mutex_exit(&sqp->sq_lock);
15038348SEric.Yu@Sun.COM 	}
15048348SEric.Yu@Sun.COM }
1505