xref: /onnv-gate/usr/src/uts/common/io/stream.c (revision 2958:98aa41c076f5)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*2958Sdr146992  * Common Development and Distribution License (the "License").
6*2958Sdr146992  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
220Sstevel@tonic-gate /*	  All Rights Reserved  	*/
230Sstevel@tonic-gate 
240Sstevel@tonic-gate 
250Sstevel@tonic-gate /*
26*2958Sdr146992  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
270Sstevel@tonic-gate  * Use is subject to license terms.
280Sstevel@tonic-gate  */
290Sstevel@tonic-gate 
300Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
310Sstevel@tonic-gate 
320Sstevel@tonic-gate #include <sys/types.h>
330Sstevel@tonic-gate #include <sys/param.h>
340Sstevel@tonic-gate #include <sys/thread.h>
350Sstevel@tonic-gate #include <sys/sysmacros.h>
360Sstevel@tonic-gate #include <sys/stropts.h>
370Sstevel@tonic-gate #include <sys/stream.h>
380Sstevel@tonic-gate #include <sys/strsubr.h>
390Sstevel@tonic-gate #include <sys/strsun.h>
400Sstevel@tonic-gate #include <sys/conf.h>
410Sstevel@tonic-gate #include <sys/debug.h>
420Sstevel@tonic-gate #include <sys/cmn_err.h>
430Sstevel@tonic-gate #include <sys/kmem.h>
440Sstevel@tonic-gate #include <sys/atomic.h>
450Sstevel@tonic-gate #include <sys/errno.h>
460Sstevel@tonic-gate #include <sys/vtrace.h>
470Sstevel@tonic-gate #include <sys/ftrace.h>
480Sstevel@tonic-gate #include <sys/ontrap.h>
490Sstevel@tonic-gate #include <sys/multidata.h>
500Sstevel@tonic-gate #include <sys/multidata_impl.h>
510Sstevel@tonic-gate #include <sys/sdt.h>
521110Smeem #include <sys/strft.h>
530Sstevel@tonic-gate 
540Sstevel@tonic-gate #ifdef DEBUG
550Sstevel@tonic-gate #include <sys/kmem_impl.h>
560Sstevel@tonic-gate #endif
570Sstevel@tonic-gate 
580Sstevel@tonic-gate /*
590Sstevel@tonic-gate  * This file contains all the STREAMS utility routines that may
600Sstevel@tonic-gate  * be used by modules and drivers.
610Sstevel@tonic-gate  */
620Sstevel@tonic-gate 
630Sstevel@tonic-gate /*
640Sstevel@tonic-gate  * STREAMS message allocator: principles of operation
650Sstevel@tonic-gate  *
660Sstevel@tonic-gate  * The streams message allocator consists of all the routines that
670Sstevel@tonic-gate  * allocate, dup and free streams messages: allocb(), [d]esballoc[a],
680Sstevel@tonic-gate  * dupb(), freeb() and freemsg().  What follows is a high-level view
690Sstevel@tonic-gate  * of how the allocator works.
700Sstevel@tonic-gate  *
710Sstevel@tonic-gate  * Every streams message consists of one or more mblks, a dblk, and data.
720Sstevel@tonic-gate  * All mblks for all types of messages come from a common mblk_cache.
730Sstevel@tonic-gate  * The dblk and data come in several flavors, depending on how the
740Sstevel@tonic-gate  * message is allocated:
750Sstevel@tonic-gate  *
760Sstevel@tonic-gate  * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of
770Sstevel@tonic-gate  *     fixed-size dblk/data caches. For message sizes that are multiples of
780Sstevel@tonic-gate  *     PAGESIZE, dblks are allocated separately from the buffer.
790Sstevel@tonic-gate  *     The associated buffer is allocated by the constructor using kmem_alloc().
800Sstevel@tonic-gate  *     For all other message sizes, dblk and its associated data is allocated
810Sstevel@tonic-gate  *     as a single contiguous chunk of memory.
820Sstevel@tonic-gate  *     Objects in these caches consist of a dblk plus its associated data.
830Sstevel@tonic-gate  *     allocb() determines the nearest-size cache by table lookup:
840Sstevel@tonic-gate  *     the dblk_cache[] array provides the mapping from size to dblk cache.
850Sstevel@tonic-gate  *
860Sstevel@tonic-gate  * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by
870Sstevel@tonic-gate  *     kmem_alloc()'ing a buffer for the data and supplying that
880Sstevel@tonic-gate  *     buffer to gesballoc(), described below.
890Sstevel@tonic-gate  *
900Sstevel@tonic-gate  * (3) The four flavors of [d]esballoc[a] are all implemented by a
910Sstevel@tonic-gate  *     common routine, gesballoc() ("generic esballoc").  gesballoc()
920Sstevel@tonic-gate  *     allocates a dblk from the global dblk_esb_cache and sets db_base,
930Sstevel@tonic-gate  *     db_lim and db_frtnp to describe the caller-supplied buffer.
940Sstevel@tonic-gate  *
950Sstevel@tonic-gate  * While there are several routines to allocate messages, there is only
960Sstevel@tonic-gate  * one routine to free messages: freeb().  freeb() simply invokes the
970Sstevel@tonic-gate  * dblk's free method, dbp->db_free(), which is set at allocation time.
980Sstevel@tonic-gate  *
990Sstevel@tonic-gate  * dupb() creates a new reference to a message by allocating a new mblk,
1000Sstevel@tonic-gate  * incrementing the dblk reference count and setting the dblk's free
1010Sstevel@tonic-gate  * method to dblk_decref().  The dblk's original free method is retained
1020Sstevel@tonic-gate  * in db_lastfree.  dblk_decref() decrements the reference count on each
1030Sstevel@tonic-gate  * freeb().  If this is not the last reference it just frees the mblk;
1040Sstevel@tonic-gate  * if this *is* the last reference, it restores db_free to db_lastfree,
1050Sstevel@tonic-gate  * sets db_mblk to the current mblk (see below), and invokes db_lastfree.
1060Sstevel@tonic-gate  *
1070Sstevel@tonic-gate  * The implementation makes aggressive use of kmem object caching for
1080Sstevel@tonic-gate  * maximum performance.  This makes the code simple and compact, but
1090Sstevel@tonic-gate  * also a bit abstruse in some places.  The invariants that constitute a
1100Sstevel@tonic-gate  * message's constructed state, described below, are more subtle than usual.
1110Sstevel@tonic-gate  *
1120Sstevel@tonic-gate  * Every dblk has an "attached mblk" as part of its constructed state.
1130Sstevel@tonic-gate  * The mblk is allocated by the dblk's constructor and remains attached
1140Sstevel@tonic-gate  * until the message is either dup'ed or pulled up.  In the dupb() case
1150Sstevel@tonic-gate  * the mblk association doesn't matter until the last free, at which time
1160Sstevel@tonic-gate  * dblk_decref() attaches the last mblk to the dblk.  pullupmsg() affects
1170Sstevel@tonic-gate  * the mblk association because it swaps the leading mblks of two messages,
1180Sstevel@tonic-gate  * so it is responsible for swapping their db_mblk pointers accordingly.
1190Sstevel@tonic-gate  * From a constructed-state viewpoint it doesn't matter that a dblk's
1200Sstevel@tonic-gate  * attached mblk can change while the message is allocated; all that
1210Sstevel@tonic-gate  * matters is that the dblk has *some* attached mblk when it's freed.
1220Sstevel@tonic-gate  *
1230Sstevel@tonic-gate  * The sizes of the allocb() small-message caches are not magical.
1240Sstevel@tonic-gate  * They represent a good trade-off between internal and external
1250Sstevel@tonic-gate  * fragmentation for current workloads.  They should be reevaluated
1260Sstevel@tonic-gate  * periodically, especially if allocations larger than DBLK_MAX_CACHE
1270Sstevel@tonic-gate  * become common.  We use 64-byte alignment so that dblks don't
1280Sstevel@tonic-gate  * straddle cache lines unnecessarily.
1290Sstevel@tonic-gate  */
1300Sstevel@tonic-gate #define	DBLK_MAX_CACHE		73728
1310Sstevel@tonic-gate #define	DBLK_CACHE_ALIGN	64
1320Sstevel@tonic-gate #define	DBLK_MIN_SIZE		8
1330Sstevel@tonic-gate #define	DBLK_SIZE_SHIFT		3
1340Sstevel@tonic-gate 
1350Sstevel@tonic-gate #ifdef _BIG_ENDIAN
1360Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
1370Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
1380Sstevel@tonic-gate #else
1390Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
1400Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
1410Sstevel@tonic-gate #endif
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate #define	DBLK_RTFU(ref, type, flags, uioflag)	\
1440Sstevel@tonic-gate 	(((ref) << DBLK_RTFU_SHIFT(db_ref)) | \
1450Sstevel@tonic-gate 	((type) << DBLK_RTFU_SHIFT(db_type)) | \
1460Sstevel@tonic-gate 	(((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
1470Sstevel@tonic-gate 	((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
1480Sstevel@tonic-gate #define	DBLK_RTFU_REF_MASK	(DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
1490Sstevel@tonic-gate #define	DBLK_RTFU_WORD(dbp)	(*((uint32_t *)&(dbp)->db_ref))
1500Sstevel@tonic-gate #define	MBLK_BAND_FLAG_WORD(mp)	(*((uint32_t *)&(mp)->b_band))
1510Sstevel@tonic-gate 
1520Sstevel@tonic-gate static size_t dblk_sizes[] = {
1530Sstevel@tonic-gate #ifdef _LP64
1540Sstevel@tonic-gate 	16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3920,
1550Sstevel@tonic-gate 	8192, 12112, 16384, 20304, 24576, 28496, 32768, 36688,
1560Sstevel@tonic-gate 	40960, 44880, 49152, 53072, 57344, 61264, 65536, 69456,
1570Sstevel@tonic-gate #else
1580Sstevel@tonic-gate 	64, 128, 320, 576, 1088, 1536, 1984, 2624, 3968,
1590Sstevel@tonic-gate 	8192, 12160, 16384, 20352, 24576, 28544, 32768, 36736,
1600Sstevel@tonic-gate 	40960, 44928, 49152, 53120, 57344, 61312, 65536, 69504,
1610Sstevel@tonic-gate #endif
1620Sstevel@tonic-gate 	DBLK_MAX_CACHE, 0
1630Sstevel@tonic-gate };
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE];
1660Sstevel@tonic-gate static struct kmem_cache *mblk_cache;
1670Sstevel@tonic-gate static struct kmem_cache *dblk_esb_cache;
1680Sstevel@tonic-gate static struct kmem_cache *fthdr_cache;
1690Sstevel@tonic-gate static struct kmem_cache *ftblk_cache;
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate static void dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1720Sstevel@tonic-gate static mblk_t *allocb_oversize(size_t size, int flags);
1730Sstevel@tonic-gate static int allocb_tryhard_fails;
1740Sstevel@tonic-gate static void frnop_func(void *arg);
1750Sstevel@tonic-gate frtn_t frnop = { frnop_func };
1760Sstevel@tonic-gate static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1770Sstevel@tonic-gate 
1780Sstevel@tonic-gate static boolean_t rwnext_enter(queue_t *qp);
1790Sstevel@tonic-gate static void rwnext_exit(queue_t *qp);
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate /*
1820Sstevel@tonic-gate  * Patchable mblk/dblk kmem_cache flags.
1830Sstevel@tonic-gate  */
1840Sstevel@tonic-gate int dblk_kmem_flags = 0;
1850Sstevel@tonic-gate int mblk_kmem_flags = 0;
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate 
1880Sstevel@tonic-gate static int
1890Sstevel@tonic-gate dblk_constructor(void *buf, void *cdrarg, int kmflags)
1900Sstevel@tonic-gate {
1910Sstevel@tonic-gate 	dblk_t *dbp = buf;
1920Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
1930Sstevel@tonic-gate 	size_t index;
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate 	ASSERT(msg_size != 0);
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate 	index = (msg_size - 1) >> DBLK_SIZE_SHIFT;
1980Sstevel@tonic-gate 
199577Smeem 	ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT));
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2020Sstevel@tonic-gate 		return (-1);
2030Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
2040Sstevel@tonic-gate 		dbp->db_base = kmem_alloc(msg_size, kmflags);
2050Sstevel@tonic-gate 		if (dbp->db_base == NULL) {
2060Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, dbp->db_mblk);
2070Sstevel@tonic-gate 			return (-1);
2080Sstevel@tonic-gate 		}
2090Sstevel@tonic-gate 	} else {
2100Sstevel@tonic-gate 		dbp->db_base = (unsigned char *)&dbp[1];
2110Sstevel@tonic-gate 	}
2120Sstevel@tonic-gate 
2130Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2140Sstevel@tonic-gate 	dbp->db_cache = dblk_cache[index];
2150Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + msg_size;
2160Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = dblk_lastfree;
2170Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
2180Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2190Sstevel@tonic-gate 	dbp->db_credp = NULL;
2200Sstevel@tonic-gate 	dbp->db_cpid = -1;
2210Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2220Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2230Sstevel@tonic-gate 	return (0);
2240Sstevel@tonic-gate }
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate /*ARGSUSED*/
2270Sstevel@tonic-gate static int
2280Sstevel@tonic-gate dblk_esb_constructor(void *buf, void *cdrarg, int kmflags)
2290Sstevel@tonic-gate {
2300Sstevel@tonic-gate 	dblk_t *dbp = buf;
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2330Sstevel@tonic-gate 		return (-1);
2340Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2350Sstevel@tonic-gate 	dbp->db_cache = dblk_esb_cache;
2360Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2370Sstevel@tonic-gate 	dbp->db_credp = NULL;
2380Sstevel@tonic-gate 	dbp->db_cpid = -1;
2390Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2400Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2410Sstevel@tonic-gate 	return (0);
2420Sstevel@tonic-gate }
2430Sstevel@tonic-gate 
2440Sstevel@tonic-gate static int
2450Sstevel@tonic-gate bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags)
2460Sstevel@tonic-gate {
2470Sstevel@tonic-gate 	dblk_t *dbp = buf;
2480Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2510Sstevel@tonic-gate 		return (-1);
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 	if ((dbp->db_base = (unsigned char *)kmem_cache_alloc(bcp->buffer_cache,
2540Sstevel@tonic-gate 	    kmflags)) == NULL) {
2550Sstevel@tonic-gate 		kmem_cache_free(mblk_cache, dbp->db_mblk);
2560Sstevel@tonic-gate 		return (-1);
2570Sstevel@tonic-gate 	}
2580Sstevel@tonic-gate 
2590Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2600Sstevel@tonic-gate 	dbp->db_cache = (void *)bcp;
2610Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + bcp->size;
2620Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree;
2630Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
2640Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2650Sstevel@tonic-gate 	dbp->db_credp = NULL;
2660Sstevel@tonic-gate 	dbp->db_cpid = -1;
2670Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2680Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2690Sstevel@tonic-gate 	return (0);
2700Sstevel@tonic-gate }
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate /*ARGSUSED*/
2730Sstevel@tonic-gate static void
2740Sstevel@tonic-gate dblk_destructor(void *buf, void *cdrarg)
2750Sstevel@tonic-gate {
2760Sstevel@tonic-gate 	dblk_t *dbp = buf;
2770Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate 	ASSERT(msg_size != 0);
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
2840Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
2870Sstevel@tonic-gate 		kmem_free(dbp->db_base, msg_size);
2880Sstevel@tonic-gate 	}
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
2910Sstevel@tonic-gate }
2920Sstevel@tonic-gate 
2930Sstevel@tonic-gate static void
2940Sstevel@tonic-gate bcache_dblk_destructor(void *buf, void *cdrarg)
2950Sstevel@tonic-gate {
2960Sstevel@tonic-gate 	dblk_t *dbp = buf;
2970Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate 	kmem_cache_free(bcp->buffer_cache, dbp->db_base);
3000Sstevel@tonic-gate 
3010Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
3020Sstevel@tonic-gate 
3030Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
3040Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
3070Sstevel@tonic-gate }
3080Sstevel@tonic-gate 
3090Sstevel@tonic-gate void
3100Sstevel@tonic-gate streams_msg_init(void)
3110Sstevel@tonic-gate {
3120Sstevel@tonic-gate 	char name[40];
3130Sstevel@tonic-gate 	size_t size;
3140Sstevel@tonic-gate 	size_t lastsize = DBLK_MIN_SIZE;
3150Sstevel@tonic-gate 	size_t *sizep;
3160Sstevel@tonic-gate 	struct kmem_cache *cp;
3170Sstevel@tonic-gate 	size_t tot_size;
3180Sstevel@tonic-gate 	int offset;
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate 	mblk_cache = kmem_cache_create("streams_mblk",
3210Sstevel@tonic-gate 		sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL,
3220Sstevel@tonic-gate 		mblk_kmem_flags);
3230Sstevel@tonic-gate 
3240Sstevel@tonic-gate 	for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
3250Sstevel@tonic-gate 
3260Sstevel@tonic-gate 		if ((offset = (size & PAGEOFFSET)) != 0) {
3270Sstevel@tonic-gate 			/*
3280Sstevel@tonic-gate 			 * We are in the middle of a page, dblk should
3290Sstevel@tonic-gate 			 * be allocated on the same page
3300Sstevel@tonic-gate 			 */
3310Sstevel@tonic-gate 			tot_size = size + sizeof (dblk_t);
3320Sstevel@tonic-gate 			ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t))
3330Sstevel@tonic-gate 								< PAGESIZE);
3340Sstevel@tonic-gate 			ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate 		} else {
3370Sstevel@tonic-gate 
3380Sstevel@tonic-gate 			/*
3390Sstevel@tonic-gate 			 * buf size is multiple of page size, dblk and
3400Sstevel@tonic-gate 			 * buffer are allocated separately.
3410Sstevel@tonic-gate 			 */
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 			ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0);
3440Sstevel@tonic-gate 			tot_size = sizeof (dblk_t);
3450Sstevel@tonic-gate 		}
3460Sstevel@tonic-gate 
3470Sstevel@tonic-gate 		(void) sprintf(name, "streams_dblk_%ld", size);
3480Sstevel@tonic-gate 		cp = kmem_cache_create(name, tot_size,
3490Sstevel@tonic-gate 			DBLK_CACHE_ALIGN, dblk_constructor,
3500Sstevel@tonic-gate 			dblk_destructor, NULL,
3510Sstevel@tonic-gate 			(void *)(size), NULL, dblk_kmem_flags);
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate 		while (lastsize <= size) {
3540Sstevel@tonic-gate 			dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
3550Sstevel@tonic-gate 			lastsize += DBLK_MIN_SIZE;
3560Sstevel@tonic-gate 		}
3570Sstevel@tonic-gate 	}
3580Sstevel@tonic-gate 
3590Sstevel@tonic-gate 	dblk_esb_cache = kmem_cache_create("streams_dblk_esb",
3600Sstevel@tonic-gate 			sizeof (dblk_t), DBLK_CACHE_ALIGN,
3610Sstevel@tonic-gate 			dblk_esb_constructor, dblk_destructor, NULL,
3620Sstevel@tonic-gate 			(void *) sizeof (dblk_t), NULL, dblk_kmem_flags);
3630Sstevel@tonic-gate 	fthdr_cache = kmem_cache_create("streams_fthdr",
3640Sstevel@tonic-gate 		sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
3650Sstevel@tonic-gate 	ftblk_cache = kmem_cache_create("streams_ftblk",
3660Sstevel@tonic-gate 		sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
3670Sstevel@tonic-gate 
3680Sstevel@tonic-gate 	/* Initialize Multidata caches */
3690Sstevel@tonic-gate 	mmd_init();
3700Sstevel@tonic-gate }
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate /*ARGSUSED*/
3730Sstevel@tonic-gate mblk_t *
3740Sstevel@tonic-gate allocb(size_t size, uint_t pri)
3750Sstevel@tonic-gate {
3760Sstevel@tonic-gate 	dblk_t *dbp;
3770Sstevel@tonic-gate 	mblk_t *mp;
3780Sstevel@tonic-gate 	size_t index;
3790Sstevel@tonic-gate 
3800Sstevel@tonic-gate 	index =  (size - 1)  >> DBLK_SIZE_SHIFT;
3810Sstevel@tonic-gate 
3820Sstevel@tonic-gate 	if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
3830Sstevel@tonic-gate 		if (size != 0) {
3840Sstevel@tonic-gate 			mp = allocb_oversize(size, KM_NOSLEEP);
3850Sstevel@tonic-gate 			goto out;
3860Sstevel@tonic-gate 		}
3870Sstevel@tonic-gate 		index = 0;
3880Sstevel@tonic-gate 	}
3890Sstevel@tonic-gate 
3900Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) {
3910Sstevel@tonic-gate 		mp = NULL;
3920Sstevel@tonic-gate 		goto out;
3930Sstevel@tonic-gate 	}
3940Sstevel@tonic-gate 
3950Sstevel@tonic-gate 	mp = dbp->db_mblk;
3960Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
3970Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
3980Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
3990Sstevel@tonic-gate 	mp->b_queue = NULL;
4000Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
4010Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size);
4020Sstevel@tonic-gate out:
4030Sstevel@tonic-gate 	FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp);
4040Sstevel@tonic-gate 
4050Sstevel@tonic-gate 	return (mp);
4060Sstevel@tonic-gate }
4070Sstevel@tonic-gate 
4080Sstevel@tonic-gate mblk_t *
4090Sstevel@tonic-gate allocb_tmpl(size_t size, const mblk_t *tmpl)
4100Sstevel@tonic-gate {
4110Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
4120Sstevel@tonic-gate 
4130Sstevel@tonic-gate 	if (mp != NULL) {
4140Sstevel@tonic-gate 		cred_t *cr = DB_CRED(tmpl);
4150Sstevel@tonic-gate 		if (cr != NULL)
4160Sstevel@tonic-gate 			crhold(mp->b_datap->db_credp = cr);
4170Sstevel@tonic-gate 		DB_CPID(mp) = DB_CPID(tmpl);
4180Sstevel@tonic-gate 		DB_TYPE(mp) = DB_TYPE(tmpl);
4190Sstevel@tonic-gate 	}
4200Sstevel@tonic-gate 	return (mp);
4210Sstevel@tonic-gate }
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate mblk_t *
4240Sstevel@tonic-gate allocb_cred(size_t size, cred_t *cr)
4250Sstevel@tonic-gate {
4260Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
4270Sstevel@tonic-gate 
4280Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
4290Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
4300Sstevel@tonic-gate 
4310Sstevel@tonic-gate 	return (mp);
4320Sstevel@tonic-gate }
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate mblk_t *
4350Sstevel@tonic-gate allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr)
4360Sstevel@tonic-gate {
4370Sstevel@tonic-gate 	mblk_t *mp = allocb_wait(size, 0, flags, error);
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
4400Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
4410Sstevel@tonic-gate 
4420Sstevel@tonic-gate 	return (mp);
4430Sstevel@tonic-gate }
4440Sstevel@tonic-gate 
4450Sstevel@tonic-gate void
4460Sstevel@tonic-gate freeb(mblk_t *mp)
4470Sstevel@tonic-gate {
4480Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
4490Sstevel@tonic-gate 
4500Sstevel@tonic-gate 	ASSERT(dbp->db_ref > 0);
4510Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
4520Sstevel@tonic-gate 	FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp);
4530Sstevel@tonic-gate 
4540Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
4550Sstevel@tonic-gate 
4560Sstevel@tonic-gate 	dbp->db_free(mp, dbp);
4570Sstevel@tonic-gate }
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate void
4600Sstevel@tonic-gate freemsg(mblk_t *mp)
4610Sstevel@tonic-gate {
4620Sstevel@tonic-gate 	FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
4630Sstevel@tonic-gate 	while (mp) {
4640Sstevel@tonic-gate 		dblk_t *dbp = mp->b_datap;
4650Sstevel@tonic-gate 		mblk_t *mp_cont = mp->b_cont;
4660Sstevel@tonic-gate 
4670Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
4680Sstevel@tonic-gate 		ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
4710Sstevel@tonic-gate 
4720Sstevel@tonic-gate 		dbp->db_free(mp, dbp);
4730Sstevel@tonic-gate 		mp = mp_cont;
4740Sstevel@tonic-gate 	}
4750Sstevel@tonic-gate }
4760Sstevel@tonic-gate 
4770Sstevel@tonic-gate /*
4780Sstevel@tonic-gate  * Reallocate a block for another use.  Try hard to use the old block.
4790Sstevel@tonic-gate  * If the old data is wanted (copy), leave b_wptr at the end of the data,
4800Sstevel@tonic-gate  * otherwise return b_wptr = b_rptr.
4810Sstevel@tonic-gate  *
4820Sstevel@tonic-gate  * This routine is private and unstable.
4830Sstevel@tonic-gate  */
4840Sstevel@tonic-gate mblk_t	*
4850Sstevel@tonic-gate reallocb(mblk_t *mp, size_t size, uint_t copy)
4860Sstevel@tonic-gate {
4870Sstevel@tonic-gate 	mblk_t		*mp1;
4880Sstevel@tonic-gate 	unsigned char	*old_rptr;
4890Sstevel@tonic-gate 	ptrdiff_t	cur_size;
4900Sstevel@tonic-gate 
4910Sstevel@tonic-gate 	if (mp == NULL)
4920Sstevel@tonic-gate 		return (allocb(size, BPRI_HI));
4930Sstevel@tonic-gate 
4940Sstevel@tonic-gate 	cur_size = mp->b_wptr - mp->b_rptr;
4950Sstevel@tonic-gate 	old_rptr = mp->b_rptr;
4960Sstevel@tonic-gate 
4970Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref != 0);
4980Sstevel@tonic-gate 
4990Sstevel@tonic-gate 	if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) {
5000Sstevel@tonic-gate 		/*
5010Sstevel@tonic-gate 		 * If the data is wanted and it will fit where it is, no
5020Sstevel@tonic-gate 		 * work is required.
5030Sstevel@tonic-gate 		 */
5040Sstevel@tonic-gate 		if (copy && mp->b_datap->db_lim - mp->b_rptr >= size)
5050Sstevel@tonic-gate 			return (mp);
5060Sstevel@tonic-gate 
5070Sstevel@tonic-gate 		mp->b_wptr = mp->b_rptr = mp->b_datap->db_base;
5080Sstevel@tonic-gate 		mp1 = mp;
5090Sstevel@tonic-gate 	} else if ((mp1 = allocb_tmpl(size, mp)) != NULL) {
5100Sstevel@tonic-gate 		/* XXX other mp state could be copied too, db_flags ... ? */
5110Sstevel@tonic-gate 		mp1->b_cont = mp->b_cont;
5120Sstevel@tonic-gate 	} else {
5130Sstevel@tonic-gate 		return (NULL);
5140Sstevel@tonic-gate 	}
5150Sstevel@tonic-gate 
5160Sstevel@tonic-gate 	if (copy) {
5170Sstevel@tonic-gate 		bcopy(old_rptr, mp1->b_rptr, cur_size);
5180Sstevel@tonic-gate 		mp1->b_wptr = mp1->b_rptr + cur_size;
5190Sstevel@tonic-gate 	}
5200Sstevel@tonic-gate 
5210Sstevel@tonic-gate 	if (mp != mp1)
5220Sstevel@tonic-gate 		freeb(mp);
5230Sstevel@tonic-gate 
5240Sstevel@tonic-gate 	return (mp1);
5250Sstevel@tonic-gate }
5260Sstevel@tonic-gate 
5270Sstevel@tonic-gate static void
5280Sstevel@tonic-gate dblk_lastfree(mblk_t *mp, dblk_t *dbp)
5290Sstevel@tonic-gate {
5300Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
5310Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
5320Sstevel@tonic-gate 		str_ftfree(dbp);
5330Sstevel@tonic-gate 
5340Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
5350Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
5360Sstevel@tonic-gate 		crfree(dbp->db_credp);
5370Sstevel@tonic-gate 		dbp->db_credp = NULL;
5380Sstevel@tonic-gate 	}
5390Sstevel@tonic-gate 	dbp->db_cpid = -1;
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 	/* Reset the struioflag and the checksum flag fields */
5420Sstevel@tonic-gate 	dbp->db_struioflag = 0;
5430Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
5440Sstevel@tonic-gate 
545898Skais 	/* and the COOKED flag */
546898Skais 	dbp->db_flags &= ~DBLK_COOKED;
547898Skais 
5480Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate static void
5520Sstevel@tonic-gate dblk_decref(mblk_t *mp, dblk_t *dbp)
5530Sstevel@tonic-gate {
5540Sstevel@tonic-gate 	if (dbp->db_ref != 1) {
5550Sstevel@tonic-gate 		uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp),
5560Sstevel@tonic-gate 		    -(1 << DBLK_RTFU_SHIFT(db_ref)));
5570Sstevel@tonic-gate 		/*
5580Sstevel@tonic-gate 		 * atomic_add_32_nv() just decremented db_ref, so we no longer
5590Sstevel@tonic-gate 		 * have a reference to the dblk, which means another thread
5600Sstevel@tonic-gate 		 * could free it.  Therefore we cannot examine the dblk to
5610Sstevel@tonic-gate 		 * determine whether ours was the last reference.  Instead,
5620Sstevel@tonic-gate 		 * we extract the new and minimum reference counts from rtfu.
5630Sstevel@tonic-gate 		 * Note that all we're really saying is "if (ref != refmin)".
5640Sstevel@tonic-gate 		 */
5650Sstevel@tonic-gate 		if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) !=
5660Sstevel@tonic-gate 		    ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) {
5670Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, mp);
5680Sstevel@tonic-gate 			return;
5690Sstevel@tonic-gate 		}
5700Sstevel@tonic-gate 	}
5710Sstevel@tonic-gate 	dbp->db_mblk = mp;
5720Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree;
5730Sstevel@tonic-gate 	dbp->db_lastfree(mp, dbp);
5740Sstevel@tonic-gate }
5750Sstevel@tonic-gate 
5760Sstevel@tonic-gate mblk_t *
5770Sstevel@tonic-gate dupb(mblk_t *mp)
5780Sstevel@tonic-gate {
5790Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
5800Sstevel@tonic-gate 	mblk_t *new_mp;
5810Sstevel@tonic-gate 	uint32_t oldrtfu, newrtfu;
5820Sstevel@tonic-gate 
5830Sstevel@tonic-gate 	if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL)
5840Sstevel@tonic-gate 		goto out;
5850Sstevel@tonic-gate 
5860Sstevel@tonic-gate 	new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL;
5870Sstevel@tonic-gate 	new_mp->b_rptr = mp->b_rptr;
5880Sstevel@tonic-gate 	new_mp->b_wptr = mp->b_wptr;
5890Sstevel@tonic-gate 	new_mp->b_datap = dbp;
5900Sstevel@tonic-gate 	new_mp->b_queue = NULL;
5910Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp);
5920Sstevel@tonic-gate 
5930Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref);
5940Sstevel@tonic-gate 
5950Sstevel@tonic-gate 	/*
5960Sstevel@tonic-gate 	 * First-dup optimization.  The enabling assumption is that there
5970Sstevel@tonic-gate 	 * can can never be a race (in correct code) to dup the first copy
5980Sstevel@tonic-gate 	 * of a message.  Therefore we don't need to do it atomically.
5990Sstevel@tonic-gate 	 */
6000Sstevel@tonic-gate 	if (dbp->db_free != dblk_decref) {
6010Sstevel@tonic-gate 		dbp->db_free = dblk_decref;
6020Sstevel@tonic-gate 		dbp->db_ref++;
6030Sstevel@tonic-gate 		goto out;
6040Sstevel@tonic-gate 	}
6050Sstevel@tonic-gate 
6060Sstevel@tonic-gate 	do {
6070Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
6080Sstevel@tonic-gate 		oldrtfu = DBLK_RTFU_WORD(dbp);
6090Sstevel@tonic-gate 		newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref));
6100Sstevel@tonic-gate 		/*
6110Sstevel@tonic-gate 		 * If db_ref is maxed out we can't dup this message anymore.
6120Sstevel@tonic-gate 		 */
6130Sstevel@tonic-gate 		if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) {
6140Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, new_mp);
6150Sstevel@tonic-gate 			new_mp = NULL;
6160Sstevel@tonic-gate 			goto out;
6170Sstevel@tonic-gate 		}
6180Sstevel@tonic-gate 	} while (cas32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != oldrtfu);
6190Sstevel@tonic-gate 
6200Sstevel@tonic-gate out:
6210Sstevel@tonic-gate 	FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp);
6220Sstevel@tonic-gate 	return (new_mp);
6230Sstevel@tonic-gate }
6240Sstevel@tonic-gate 
6250Sstevel@tonic-gate static void
6260Sstevel@tonic-gate dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp)
6270Sstevel@tonic-gate {
6280Sstevel@tonic-gate 	frtn_t *frp = dbp->db_frtnp;
6290Sstevel@tonic-gate 
6300Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
6310Sstevel@tonic-gate 	frp->free_func(frp->free_arg);
6320Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
6330Sstevel@tonic-gate 		str_ftfree(dbp);
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
6360Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
6370Sstevel@tonic-gate 		crfree(dbp->db_credp);
6380Sstevel@tonic-gate 		dbp->db_credp = NULL;
6390Sstevel@tonic-gate 	}
6400Sstevel@tonic-gate 	dbp->db_cpid = -1;
6410Sstevel@tonic-gate 	dbp->db_struioflag = 0;
6420Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
6450Sstevel@tonic-gate }
6460Sstevel@tonic-gate 
6470Sstevel@tonic-gate /*ARGSUSED*/
6480Sstevel@tonic-gate static void
6490Sstevel@tonic-gate frnop_func(void *arg)
6500Sstevel@tonic-gate {
6510Sstevel@tonic-gate }
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate /*
6540Sstevel@tonic-gate  * Generic esballoc used to implement the four flavors: [d]esballoc[a].
6550Sstevel@tonic-gate  */
6560Sstevel@tonic-gate static mblk_t *
6570Sstevel@tonic-gate gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
6580Sstevel@tonic-gate 	void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
6590Sstevel@tonic-gate {
6600Sstevel@tonic-gate 	dblk_t *dbp;
6610Sstevel@tonic-gate 	mblk_t *mp;
6620Sstevel@tonic-gate 
6630Sstevel@tonic-gate 	ASSERT(base != NULL && frp != NULL);
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) {
6660Sstevel@tonic-gate 		mp = NULL;
6670Sstevel@tonic-gate 		goto out;
6680Sstevel@tonic-gate 	}
6690Sstevel@tonic-gate 
6700Sstevel@tonic-gate 	mp = dbp->db_mblk;
6710Sstevel@tonic-gate 	dbp->db_base = base;
6720Sstevel@tonic-gate 	dbp->db_lim = base + size;
6730Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = lastfree;
6740Sstevel@tonic-gate 	dbp->db_frtnp = frp;
6750Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = db_rtfu;
6760Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
6770Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = base;
6780Sstevel@tonic-gate 	mp->b_queue = NULL;
6790Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate out:
6820Sstevel@tonic-gate 	FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp);
6830Sstevel@tonic-gate 	return (mp);
6840Sstevel@tonic-gate }
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate /*ARGSUSED*/
6870Sstevel@tonic-gate mblk_t *
6880Sstevel@tonic-gate esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
6890Sstevel@tonic-gate {
6900Sstevel@tonic-gate 	mblk_t *mp;
6910Sstevel@tonic-gate 
6920Sstevel@tonic-gate 	/*
6930Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
6940Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
6950Sstevel@tonic-gate 	 * call optimization.
6960Sstevel@tonic-gate 	 */
6970Sstevel@tonic-gate 	if (!str_ftnever) {
6980Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
6990Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
7000Sstevel@tonic-gate 
7010Sstevel@tonic-gate 		if (mp != NULL)
7020Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
7030Sstevel@tonic-gate 		return (mp);
7040Sstevel@tonic-gate 	}
7050Sstevel@tonic-gate 
7060Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7070Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
7080Sstevel@tonic-gate }
7090Sstevel@tonic-gate 
7100Sstevel@tonic-gate /*
7110Sstevel@tonic-gate  * Same as esballoc() but sleeps waiting for memory.
7120Sstevel@tonic-gate  */
7130Sstevel@tonic-gate /*ARGSUSED*/
7140Sstevel@tonic-gate mblk_t *
7150Sstevel@tonic-gate esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7160Sstevel@tonic-gate {
7170Sstevel@tonic-gate 	mblk_t *mp;
7180Sstevel@tonic-gate 
7190Sstevel@tonic-gate 	/*
7200Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7210Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7220Sstevel@tonic-gate 	 * call optimization.
7230Sstevel@tonic-gate 	 */
7240Sstevel@tonic-gate 	if (!str_ftnever) {
7250Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7260Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_SLEEP);
7270Sstevel@tonic-gate 
7280Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
7290Sstevel@tonic-gate 		return (mp);
7300Sstevel@tonic-gate 	}
7310Sstevel@tonic-gate 
7320Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7330Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_SLEEP));
7340Sstevel@tonic-gate }
7350Sstevel@tonic-gate 
7360Sstevel@tonic-gate /*ARGSUSED*/
7370Sstevel@tonic-gate mblk_t *
7380Sstevel@tonic-gate desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7390Sstevel@tonic-gate {
7400Sstevel@tonic-gate 	mblk_t *mp;
7410Sstevel@tonic-gate 
7420Sstevel@tonic-gate 	/*
7430Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7440Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7450Sstevel@tonic-gate 	 * call optimization.
7460Sstevel@tonic-gate 	 */
7470Sstevel@tonic-gate 	if (!str_ftnever) {
7480Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7490Sstevel@tonic-gate 			frp, dblk_lastfree_desb, KM_NOSLEEP);
7500Sstevel@tonic-gate 
7510Sstevel@tonic-gate 		if (mp != NULL)
7520Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size);
7530Sstevel@tonic-gate 		return (mp);
7540Sstevel@tonic-gate 	}
7550Sstevel@tonic-gate 
7560Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7570Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
7580Sstevel@tonic-gate }
7590Sstevel@tonic-gate 
7600Sstevel@tonic-gate /*ARGSUSED*/
7610Sstevel@tonic-gate mblk_t *
7620Sstevel@tonic-gate esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7630Sstevel@tonic-gate {
7640Sstevel@tonic-gate 	mblk_t *mp;
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 	/*
7670Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7680Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7690Sstevel@tonic-gate 	 * call optimization.
7700Sstevel@tonic-gate 	 */
7710Sstevel@tonic-gate 	if (!str_ftnever) {
7720Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7730Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate 		if (mp != NULL)
7760Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size);
7770Sstevel@tonic-gate 		return (mp);
7780Sstevel@tonic-gate 	}
7790Sstevel@tonic-gate 
7800Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7810Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
7820Sstevel@tonic-gate }
7830Sstevel@tonic-gate 
7840Sstevel@tonic-gate /*ARGSUSED*/
7850Sstevel@tonic-gate mblk_t *
7860Sstevel@tonic-gate desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7870Sstevel@tonic-gate {
7880Sstevel@tonic-gate 	mblk_t *mp;
7890Sstevel@tonic-gate 
7900Sstevel@tonic-gate 	/*
7910Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7920Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7930Sstevel@tonic-gate 	 * call optimization.
7940Sstevel@tonic-gate 	 */
7950Sstevel@tonic-gate 	if (!str_ftnever) {
7960Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7970Sstevel@tonic-gate 		    frp, dblk_lastfree_desb, KM_NOSLEEP);
7980Sstevel@tonic-gate 
7990Sstevel@tonic-gate 		if (mp != NULL)
8000Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size);
8010Sstevel@tonic-gate 		return (mp);
8020Sstevel@tonic-gate 	}
8030Sstevel@tonic-gate 
8040Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
8050Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
8060Sstevel@tonic-gate }
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate static void
8090Sstevel@tonic-gate bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp)
8100Sstevel@tonic-gate {
8110Sstevel@tonic-gate 	bcache_t *bcp = dbp->db_cache;
8120Sstevel@tonic-gate 
8130Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
8140Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
8150Sstevel@tonic-gate 		str_ftfree(dbp);
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
8180Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
8190Sstevel@tonic-gate 		crfree(dbp->db_credp);
8200Sstevel@tonic-gate 		dbp->db_credp = NULL;
8210Sstevel@tonic-gate 	}
8220Sstevel@tonic-gate 	dbp->db_cpid = -1;
8230Sstevel@tonic-gate 	dbp->db_struioflag = 0;
8240Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
8250Sstevel@tonic-gate 
8260Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
8270Sstevel@tonic-gate 	kmem_cache_free(bcp->dblk_cache, dbp);
8280Sstevel@tonic-gate 	bcp->alloc--;
8290Sstevel@tonic-gate 
8300Sstevel@tonic-gate 	if (bcp->alloc == 0 && bcp->destroy != 0) {
8310Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
8320Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
8330Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8340Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
8350Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
8360Sstevel@tonic-gate 	} else {
8370Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8380Sstevel@tonic-gate 	}
8390Sstevel@tonic-gate }
8400Sstevel@tonic-gate 
8410Sstevel@tonic-gate bcache_t *
8420Sstevel@tonic-gate bcache_create(char *name, size_t size, uint_t align)
8430Sstevel@tonic-gate {
8440Sstevel@tonic-gate 	bcache_t *bcp;
8450Sstevel@tonic-gate 	char buffer[255];
8460Sstevel@tonic-gate 
8470Sstevel@tonic-gate 	ASSERT((align & (align - 1)) == 0);
8480Sstevel@tonic-gate 
8490Sstevel@tonic-gate 	if ((bcp = (bcache_t *)kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) ==
8500Sstevel@tonic-gate 	    NULL) {
8510Sstevel@tonic-gate 		return (NULL);
8520Sstevel@tonic-gate 	}
8530Sstevel@tonic-gate 
8540Sstevel@tonic-gate 	bcp->size = size;
8550Sstevel@tonic-gate 	bcp->align = align;
8560Sstevel@tonic-gate 	bcp->alloc = 0;
8570Sstevel@tonic-gate 	bcp->destroy = 0;
8580Sstevel@tonic-gate 
8590Sstevel@tonic-gate 	mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL);
8600Sstevel@tonic-gate 
8610Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_buffer_cache", name);
8620Sstevel@tonic-gate 	bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL,
8630Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
8640Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_dblk_cache", name);
8650Sstevel@tonic-gate 	bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
8660Sstevel@tonic-gate 	    DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
8670Sstevel@tonic-gate 						NULL, (void *)bcp, NULL, 0);
8680Sstevel@tonic-gate 
8690Sstevel@tonic-gate 	return (bcp);
8700Sstevel@tonic-gate }
8710Sstevel@tonic-gate 
8720Sstevel@tonic-gate void
8730Sstevel@tonic-gate bcache_destroy(bcache_t *bcp)
8740Sstevel@tonic-gate {
8750Sstevel@tonic-gate 	ASSERT(bcp != NULL);
8760Sstevel@tonic-gate 
8770Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
8780Sstevel@tonic-gate 	if (bcp->alloc == 0) {
8790Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
8800Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
8810Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8820Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
8830Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
8840Sstevel@tonic-gate 	} else {
8850Sstevel@tonic-gate 		bcp->destroy++;
8860Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8870Sstevel@tonic-gate 	}
8880Sstevel@tonic-gate }
8890Sstevel@tonic-gate 
8900Sstevel@tonic-gate /*ARGSUSED*/
8910Sstevel@tonic-gate mblk_t *
8920Sstevel@tonic-gate bcache_allocb(bcache_t *bcp, uint_t pri)
8930Sstevel@tonic-gate {
8940Sstevel@tonic-gate 	dblk_t *dbp;
8950Sstevel@tonic-gate 	mblk_t *mp = NULL;
8960Sstevel@tonic-gate 
8970Sstevel@tonic-gate 	ASSERT(bcp != NULL);
8980Sstevel@tonic-gate 
8990Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
9000Sstevel@tonic-gate 	if (bcp->destroy != 0) {
9010Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
9020Sstevel@tonic-gate 		goto out;
9030Sstevel@tonic-gate 	}
9040Sstevel@tonic-gate 
9050Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) {
9060Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
9070Sstevel@tonic-gate 		goto out;
9080Sstevel@tonic-gate 	}
9090Sstevel@tonic-gate 	bcp->alloc++;
9100Sstevel@tonic-gate 	mutex_exit(&bcp->mutex);
9110Sstevel@tonic-gate 
9120Sstevel@tonic-gate 	ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0);
9130Sstevel@tonic-gate 
9140Sstevel@tonic-gate 	mp = dbp->db_mblk;
9150Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
9160Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
9170Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
9180Sstevel@tonic-gate 	mp->b_queue = NULL;
9190Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
9200Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size);
9210Sstevel@tonic-gate out:
9220Sstevel@tonic-gate 	FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp);
9230Sstevel@tonic-gate 
9240Sstevel@tonic-gate 	return (mp);
9250Sstevel@tonic-gate }
9260Sstevel@tonic-gate 
9270Sstevel@tonic-gate static void
9280Sstevel@tonic-gate dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp)
9290Sstevel@tonic-gate {
9300Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
9310Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
9320Sstevel@tonic-gate 		str_ftfree(dbp);
9330Sstevel@tonic-gate 
9340Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
9350Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
9360Sstevel@tonic-gate 		crfree(dbp->db_credp);
9370Sstevel@tonic-gate 		dbp->db_credp = NULL;
9380Sstevel@tonic-gate 	}
9390Sstevel@tonic-gate 	dbp->db_cpid = -1;
9400Sstevel@tonic-gate 	dbp->db_struioflag = 0;
9410Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
9420Sstevel@tonic-gate 
9430Sstevel@tonic-gate 	kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base);
9440Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
9450Sstevel@tonic-gate }
9460Sstevel@tonic-gate 
9470Sstevel@tonic-gate static mblk_t *
9480Sstevel@tonic-gate allocb_oversize(size_t size, int kmflags)
9490Sstevel@tonic-gate {
9500Sstevel@tonic-gate 	mblk_t *mp;
9510Sstevel@tonic-gate 	void *buf;
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate 	size = P2ROUNDUP(size, DBLK_CACHE_ALIGN);
9540Sstevel@tonic-gate 	if ((buf = kmem_alloc(size, kmflags)) == NULL)
9550Sstevel@tonic-gate 		return (NULL);
9560Sstevel@tonic-gate 	if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0),
9570Sstevel@tonic-gate 	    &frnop, dblk_lastfree_oversize, kmflags)) == NULL)
9580Sstevel@tonic-gate 		kmem_free(buf, size);
9590Sstevel@tonic-gate 
9600Sstevel@tonic-gate 	if (mp != NULL)
9610Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size);
9620Sstevel@tonic-gate 
9630Sstevel@tonic-gate 	return (mp);
9640Sstevel@tonic-gate }
9650Sstevel@tonic-gate 
9660Sstevel@tonic-gate mblk_t *
9670Sstevel@tonic-gate allocb_tryhard(size_t target_size)
9680Sstevel@tonic-gate {
9690Sstevel@tonic-gate 	size_t size;
9700Sstevel@tonic-gate 	mblk_t *bp;
9710Sstevel@tonic-gate 
9720Sstevel@tonic-gate 	for (size = target_size; size < target_size + 512;
9730Sstevel@tonic-gate 	    size += DBLK_CACHE_ALIGN)
9740Sstevel@tonic-gate 		if ((bp = allocb(size, BPRI_HI)) != NULL)
9750Sstevel@tonic-gate 			return (bp);
9760Sstevel@tonic-gate 	allocb_tryhard_fails++;
9770Sstevel@tonic-gate 	return (NULL);
9780Sstevel@tonic-gate }
9790Sstevel@tonic-gate 
9800Sstevel@tonic-gate /*
9810Sstevel@tonic-gate  * This routine is consolidation private for STREAMS internal use
9820Sstevel@tonic-gate  * This routine may only be called from sync routines (i.e., not
9830Sstevel@tonic-gate  * from put or service procedures).  It is located here (rather
9840Sstevel@tonic-gate  * than strsubr.c) so that we don't have to expose all of the
9850Sstevel@tonic-gate  * allocb() implementation details in header files.
9860Sstevel@tonic-gate  */
9870Sstevel@tonic-gate mblk_t *
9880Sstevel@tonic-gate allocb_wait(size_t size, uint_t pri, uint_t flags, int *error)
9890Sstevel@tonic-gate {
9900Sstevel@tonic-gate 	dblk_t *dbp;
9910Sstevel@tonic-gate 	mblk_t *mp;
9920Sstevel@tonic-gate 	size_t index;
9930Sstevel@tonic-gate 
9940Sstevel@tonic-gate 	index = (size -1) >> DBLK_SIZE_SHIFT;
9950Sstevel@tonic-gate 
9960Sstevel@tonic-gate 	if (flags & STR_NOSIG) {
9970Sstevel@tonic-gate 		if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
9980Sstevel@tonic-gate 			if (size != 0) {
9990Sstevel@tonic-gate 				mp = allocb_oversize(size, KM_SLEEP);
10000Sstevel@tonic-gate 				FTRACE_1("allocb_wait (NOSIG): mp=0x%lx",
10010Sstevel@tonic-gate 				    (uintptr_t)mp);
10020Sstevel@tonic-gate 				return (mp);
10030Sstevel@tonic-gate 			}
10040Sstevel@tonic-gate 			index = 0;
10050Sstevel@tonic-gate 		}
10060Sstevel@tonic-gate 
10070Sstevel@tonic-gate 		dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP);
10080Sstevel@tonic-gate 		mp = dbp->db_mblk;
10090Sstevel@tonic-gate 		DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
10100Sstevel@tonic-gate 		mp->b_next = mp->b_prev = mp->b_cont = NULL;
10110Sstevel@tonic-gate 		mp->b_rptr = mp->b_wptr = dbp->db_base;
10120Sstevel@tonic-gate 		mp->b_queue = NULL;
10130Sstevel@tonic-gate 		MBLK_BAND_FLAG_WORD(mp) = 0;
10140Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size);
10150Sstevel@tonic-gate 
10160Sstevel@tonic-gate 		FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp);
10170Sstevel@tonic-gate 
10180Sstevel@tonic-gate 	} else {
10190Sstevel@tonic-gate 		while ((mp = allocb(size, pri)) == NULL) {
10200Sstevel@tonic-gate 			if ((*error = strwaitbuf(size, BPRI_HI)) != 0)
10210Sstevel@tonic-gate 				return (NULL);
10220Sstevel@tonic-gate 		}
10230Sstevel@tonic-gate 	}
10240Sstevel@tonic-gate 
10250Sstevel@tonic-gate 	return (mp);
10260Sstevel@tonic-gate }
10270Sstevel@tonic-gate 
10280Sstevel@tonic-gate /*
10290Sstevel@tonic-gate  * Call function 'func' with 'arg' when a class zero block can
10300Sstevel@tonic-gate  * be allocated with priority 'pri'.
10310Sstevel@tonic-gate  */
10320Sstevel@tonic-gate bufcall_id_t
10330Sstevel@tonic-gate esbbcall(uint_t pri, void (*func)(void *), void *arg)
10340Sstevel@tonic-gate {
10350Sstevel@tonic-gate 	return (bufcall(1, pri, func, arg));
10360Sstevel@tonic-gate }
10370Sstevel@tonic-gate 
10380Sstevel@tonic-gate /*
10390Sstevel@tonic-gate  * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials
10400Sstevel@tonic-gate  * ioc_id, rval and error of the struct ioctl to set up an ioctl call.
10410Sstevel@tonic-gate  * This provides consistency for all internal allocators of ioctl.
10420Sstevel@tonic-gate  */
10430Sstevel@tonic-gate mblk_t *
10440Sstevel@tonic-gate mkiocb(uint_t cmd)
10450Sstevel@tonic-gate {
10460Sstevel@tonic-gate 	struct iocblk	*ioc;
10470Sstevel@tonic-gate 	mblk_t		*mp;
10480Sstevel@tonic-gate 
10490Sstevel@tonic-gate 	/*
10500Sstevel@tonic-gate 	 * Allocate enough space for any of the ioctl related messages.
10510Sstevel@tonic-gate 	 */
10520Sstevel@tonic-gate 	if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL)
10530Sstevel@tonic-gate 		return (NULL);
10540Sstevel@tonic-gate 
10550Sstevel@tonic-gate 	bzero(mp->b_rptr, sizeof (union ioctypes));
10560Sstevel@tonic-gate 
10570Sstevel@tonic-gate 	/*
10580Sstevel@tonic-gate 	 * Set the mblk_t information and ptrs correctly.
10590Sstevel@tonic-gate 	 */
10600Sstevel@tonic-gate 	mp->b_wptr += sizeof (struct iocblk);
10610Sstevel@tonic-gate 	mp->b_datap->db_type = M_IOCTL;
10620Sstevel@tonic-gate 
10630Sstevel@tonic-gate 	/*
10640Sstevel@tonic-gate 	 * Fill in the fields.
10650Sstevel@tonic-gate 	 */
10660Sstevel@tonic-gate 	ioc		= (struct iocblk *)mp->b_rptr;
10670Sstevel@tonic-gate 	ioc->ioc_cmd	= cmd;
10680Sstevel@tonic-gate 	ioc->ioc_cr	= kcred;
10690Sstevel@tonic-gate 	ioc->ioc_id	= getiocseqno();
10700Sstevel@tonic-gate 	ioc->ioc_flag	= IOC_NATIVE;
10710Sstevel@tonic-gate 	return (mp);
10720Sstevel@tonic-gate }
10730Sstevel@tonic-gate 
10740Sstevel@tonic-gate /*
10750Sstevel@tonic-gate  * test if block of given size can be allocated with a request of
10760Sstevel@tonic-gate  * the given priority.
10770Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
10780Sstevel@tonic-gate  */
10790Sstevel@tonic-gate /* ARGSUSED */
10800Sstevel@tonic-gate int
10810Sstevel@tonic-gate testb(size_t size, uint_t pri)
10820Sstevel@tonic-gate {
10830Sstevel@tonic-gate 	return ((size + sizeof (dblk_t)) <= kmem_avail());
10840Sstevel@tonic-gate }
10850Sstevel@tonic-gate 
10860Sstevel@tonic-gate /*
10870Sstevel@tonic-gate  * Call function 'func' with argument 'arg' when there is a reasonably
10880Sstevel@tonic-gate  * good chance that a block of size 'size' can be allocated.
10890Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
10900Sstevel@tonic-gate  */
10910Sstevel@tonic-gate /* ARGSUSED */
10920Sstevel@tonic-gate bufcall_id_t
10930Sstevel@tonic-gate bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg)
10940Sstevel@tonic-gate {
10950Sstevel@tonic-gate 	static long bid = 1;	/* always odd to save checking for zero */
10960Sstevel@tonic-gate 	bufcall_id_t bc_id;
10970Sstevel@tonic-gate 	struct strbufcall *bcp;
10980Sstevel@tonic-gate 
10990Sstevel@tonic-gate 	if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL)
11000Sstevel@tonic-gate 		return (0);
11010Sstevel@tonic-gate 
11020Sstevel@tonic-gate 	bcp->bc_func = func;
11030Sstevel@tonic-gate 	bcp->bc_arg = arg;
11040Sstevel@tonic-gate 	bcp->bc_size = size;
11050Sstevel@tonic-gate 	bcp->bc_next = NULL;
11060Sstevel@tonic-gate 	bcp->bc_executor = NULL;
11070Sstevel@tonic-gate 
11080Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
11090Sstevel@tonic-gate 	/*
11100Sstevel@tonic-gate 	 * After bcp is linked into strbcalls and strbcall_lock is dropped there
11110Sstevel@tonic-gate 	 * should be no references to bcp since it may be freed by
11120Sstevel@tonic-gate 	 * runbufcalls(). Since bcp_id field is returned, we save its value in
11130Sstevel@tonic-gate 	 * the local var.
11140Sstevel@tonic-gate 	 */
11150Sstevel@tonic-gate 	bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2);	/* keep it odd */
11160Sstevel@tonic-gate 
11170Sstevel@tonic-gate 	/*
11180Sstevel@tonic-gate 	 * add newly allocated stream event to existing
11190Sstevel@tonic-gate 	 * linked list of events.
11200Sstevel@tonic-gate 	 */
11210Sstevel@tonic-gate 	if (strbcalls.bc_head == NULL) {
11220Sstevel@tonic-gate 		strbcalls.bc_head = strbcalls.bc_tail = bcp;
11230Sstevel@tonic-gate 	} else {
11240Sstevel@tonic-gate 		strbcalls.bc_tail->bc_next = bcp;
11250Sstevel@tonic-gate 		strbcalls.bc_tail = bcp;
11260Sstevel@tonic-gate 	}
11270Sstevel@tonic-gate 
11280Sstevel@tonic-gate 	cv_signal(&strbcall_cv);
11290Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
11300Sstevel@tonic-gate 	return (bc_id);
11310Sstevel@tonic-gate }
11320Sstevel@tonic-gate 
11330Sstevel@tonic-gate /*
11340Sstevel@tonic-gate  * Cancel a bufcall request.
11350Sstevel@tonic-gate  */
11360Sstevel@tonic-gate void
11370Sstevel@tonic-gate unbufcall(bufcall_id_t id)
11380Sstevel@tonic-gate {
11390Sstevel@tonic-gate 	strbufcall_t *bcp, *pbcp;
11400Sstevel@tonic-gate 
11410Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
11420Sstevel@tonic-gate again:
11430Sstevel@tonic-gate 	pbcp = NULL;
11440Sstevel@tonic-gate 	for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) {
11450Sstevel@tonic-gate 		if (id == bcp->bc_id)
11460Sstevel@tonic-gate 			break;
11470Sstevel@tonic-gate 		pbcp = bcp;
11480Sstevel@tonic-gate 	}
11490Sstevel@tonic-gate 	if (bcp) {
11500Sstevel@tonic-gate 		if (bcp->bc_executor != NULL) {
11510Sstevel@tonic-gate 			if (bcp->bc_executor != curthread) {
11520Sstevel@tonic-gate 				cv_wait(&bcall_cv, &strbcall_lock);
11530Sstevel@tonic-gate 				goto again;
11540Sstevel@tonic-gate 			}
11550Sstevel@tonic-gate 		} else {
11560Sstevel@tonic-gate 			if (pbcp)
11570Sstevel@tonic-gate 				pbcp->bc_next = bcp->bc_next;
11580Sstevel@tonic-gate 			else
11590Sstevel@tonic-gate 				strbcalls.bc_head = bcp->bc_next;
11600Sstevel@tonic-gate 			if (bcp == strbcalls.bc_tail)
11610Sstevel@tonic-gate 				strbcalls.bc_tail = pbcp;
11620Sstevel@tonic-gate 			kmem_free(bcp, sizeof (strbufcall_t));
11630Sstevel@tonic-gate 		}
11640Sstevel@tonic-gate 	}
11650Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
11660Sstevel@tonic-gate }
11670Sstevel@tonic-gate 
11680Sstevel@tonic-gate /*
11690Sstevel@tonic-gate  * Duplicate a message block by block (uses dupb), returning
11700Sstevel@tonic-gate  * a pointer to the duplicate message.
11710Sstevel@tonic-gate  * Returns a non-NULL value only if the entire message
11720Sstevel@tonic-gate  * was dup'd.
11730Sstevel@tonic-gate  */
11740Sstevel@tonic-gate mblk_t *
11750Sstevel@tonic-gate dupmsg(mblk_t *bp)
11760Sstevel@tonic-gate {
11770Sstevel@tonic-gate 	mblk_t *head, *nbp;
11780Sstevel@tonic-gate 
11790Sstevel@tonic-gate 	if (!bp || !(nbp = head = dupb(bp)))
11800Sstevel@tonic-gate 		return (NULL);
11810Sstevel@tonic-gate 
11820Sstevel@tonic-gate 	while (bp->b_cont) {
11830Sstevel@tonic-gate 		if (!(nbp->b_cont = dupb(bp->b_cont))) {
11840Sstevel@tonic-gate 			freemsg(head);
11850Sstevel@tonic-gate 			return (NULL);
11860Sstevel@tonic-gate 		}
11870Sstevel@tonic-gate 		nbp = nbp->b_cont;
11880Sstevel@tonic-gate 		bp = bp->b_cont;
11890Sstevel@tonic-gate 	}
11900Sstevel@tonic-gate 	return (head);
11910Sstevel@tonic-gate }
11920Sstevel@tonic-gate 
11930Sstevel@tonic-gate #define	DUPB_NOLOAN(bp) \
11940Sstevel@tonic-gate 	((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \
11950Sstevel@tonic-gate 	copyb((bp)) : dupb((bp)))
11960Sstevel@tonic-gate 
11970Sstevel@tonic-gate mblk_t *
11980Sstevel@tonic-gate dupmsg_noloan(mblk_t *bp)
11990Sstevel@tonic-gate {
12000Sstevel@tonic-gate 	mblk_t *head, *nbp;
12010Sstevel@tonic-gate 
12020Sstevel@tonic-gate 	if (bp == NULL || DB_TYPE(bp) != M_DATA ||
12030Sstevel@tonic-gate 	    ((nbp = head = DUPB_NOLOAN(bp)) == NULL))
12040Sstevel@tonic-gate 		return (NULL);
12050Sstevel@tonic-gate 
12060Sstevel@tonic-gate 	while (bp->b_cont) {
12070Sstevel@tonic-gate 		if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) {
12080Sstevel@tonic-gate 			freemsg(head);
12090Sstevel@tonic-gate 			return (NULL);
12100Sstevel@tonic-gate 		}
12110Sstevel@tonic-gate 		nbp = nbp->b_cont;
12120Sstevel@tonic-gate 		bp = bp->b_cont;
12130Sstevel@tonic-gate 	}
12140Sstevel@tonic-gate 	return (head);
12150Sstevel@tonic-gate }
12160Sstevel@tonic-gate 
12170Sstevel@tonic-gate /*
12180Sstevel@tonic-gate  * Copy data from message and data block to newly allocated message and
12190Sstevel@tonic-gate  * data block. Returns new message block pointer, or NULL if error.
12200Sstevel@tonic-gate  * The alignment of rptr (w.r.t. word alignment) will be the same in the copy
12210Sstevel@tonic-gate  * as in the original even when db_base is not word aligned. (bug 1052877)
12220Sstevel@tonic-gate  */
12230Sstevel@tonic-gate mblk_t *
12240Sstevel@tonic-gate copyb(mblk_t *bp)
12250Sstevel@tonic-gate {
12260Sstevel@tonic-gate 	mblk_t	*nbp;
12270Sstevel@tonic-gate 	dblk_t	*dp, *ndp;
12280Sstevel@tonic-gate 	uchar_t *base;
12290Sstevel@tonic-gate 	size_t	size;
12300Sstevel@tonic-gate 	size_t	unaligned;
12310Sstevel@tonic-gate 
12320Sstevel@tonic-gate 	ASSERT(bp->b_wptr >= bp->b_rptr);
12330Sstevel@tonic-gate 
12340Sstevel@tonic-gate 	dp = bp->b_datap;
12350Sstevel@tonic-gate 	if (dp->db_fthdr != NULL)
12360Sstevel@tonic-gate 		STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0);
12370Sstevel@tonic-gate 
12380Sstevel@tonic-gate 	/*
12390Sstevel@tonic-gate 	 * Special handling for Multidata message; this should be
12400Sstevel@tonic-gate 	 * removed once a copy-callback routine is made available.
12410Sstevel@tonic-gate 	 */
12420Sstevel@tonic-gate 	if (dp->db_type == M_MULTIDATA) {
12430Sstevel@tonic-gate 		cred_t *cr;
12440Sstevel@tonic-gate 
12450Sstevel@tonic-gate 		if ((nbp = mmd_copy(bp, KM_NOSLEEP)) == NULL)
12460Sstevel@tonic-gate 			return (NULL);
12470Sstevel@tonic-gate 
12480Sstevel@tonic-gate 		nbp->b_flag = bp->b_flag;
12490Sstevel@tonic-gate 		nbp->b_band = bp->b_band;
12500Sstevel@tonic-gate 		ndp = nbp->b_datap;
12510Sstevel@tonic-gate 
12520Sstevel@tonic-gate 		/* See comments below on potential issues. */
12530Sstevel@tonic-gate 		STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
12540Sstevel@tonic-gate 
12550Sstevel@tonic-gate 		ASSERT(ndp->db_type == dp->db_type);
12560Sstevel@tonic-gate 		cr = dp->db_credp;
12570Sstevel@tonic-gate 		if (cr != NULL)
12580Sstevel@tonic-gate 			crhold(ndp->db_credp = cr);
12590Sstevel@tonic-gate 		ndp->db_cpid = dp->db_cpid;
12600Sstevel@tonic-gate 		return (nbp);
12610Sstevel@tonic-gate 	}
12620Sstevel@tonic-gate 
12630Sstevel@tonic-gate 	size = dp->db_lim - dp->db_base;
12640Sstevel@tonic-gate 	unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t));
12650Sstevel@tonic-gate 	if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL)
12660Sstevel@tonic-gate 		return (NULL);
12670Sstevel@tonic-gate 	nbp->b_flag = bp->b_flag;
12680Sstevel@tonic-gate 	nbp->b_band = bp->b_band;
12690Sstevel@tonic-gate 	ndp = nbp->b_datap;
12700Sstevel@tonic-gate 
12710Sstevel@tonic-gate 	/*
12720Sstevel@tonic-gate 	 * Well, here is a potential issue.  If we are trying to
12730Sstevel@tonic-gate 	 * trace a flow, and we copy the message, we might lose
12740Sstevel@tonic-gate 	 * information about where this message might have been.
12750Sstevel@tonic-gate 	 * So we should inherit the FT data.  On the other hand,
12760Sstevel@tonic-gate 	 * a user might be interested only in alloc to free data.
12770Sstevel@tonic-gate 	 * So I guess the real answer is to provide a tunable.
12780Sstevel@tonic-gate 	 */
12790Sstevel@tonic-gate 	STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
12800Sstevel@tonic-gate 
12810Sstevel@tonic-gate 	base = ndp->db_base + unaligned;
12820Sstevel@tonic-gate 	bcopy(dp->db_base, ndp->db_base + unaligned, size);
12830Sstevel@tonic-gate 
12840Sstevel@tonic-gate 	nbp->b_rptr = base + (bp->b_rptr - dp->db_base);
12850Sstevel@tonic-gate 	nbp->b_wptr = nbp->b_rptr + MBLKL(bp);
12860Sstevel@tonic-gate 
12870Sstevel@tonic-gate 	return (nbp);
12880Sstevel@tonic-gate }
12890Sstevel@tonic-gate 
12900Sstevel@tonic-gate /*
12910Sstevel@tonic-gate  * Copy data from message to newly allocated message using new
12920Sstevel@tonic-gate  * data blocks.  Returns a pointer to the new message, or NULL if error.
12930Sstevel@tonic-gate  */
12940Sstevel@tonic-gate mblk_t *
12950Sstevel@tonic-gate copymsg(mblk_t *bp)
12960Sstevel@tonic-gate {
12970Sstevel@tonic-gate 	mblk_t *head, *nbp;
12980Sstevel@tonic-gate 
12990Sstevel@tonic-gate 	if (!bp || !(nbp = head = copyb(bp)))
13000Sstevel@tonic-gate 		return (NULL);
13010Sstevel@tonic-gate 
13020Sstevel@tonic-gate 	while (bp->b_cont) {
13030Sstevel@tonic-gate 		if (!(nbp->b_cont = copyb(bp->b_cont))) {
13040Sstevel@tonic-gate 			freemsg(head);
13050Sstevel@tonic-gate 			return (NULL);
13060Sstevel@tonic-gate 		}
13070Sstevel@tonic-gate 		nbp = nbp->b_cont;
13080Sstevel@tonic-gate 		bp = bp->b_cont;
13090Sstevel@tonic-gate 	}
13100Sstevel@tonic-gate 	return (head);
13110Sstevel@tonic-gate }
13120Sstevel@tonic-gate 
13130Sstevel@tonic-gate /*
13140Sstevel@tonic-gate  * link a message block to tail of message
13150Sstevel@tonic-gate  */
13160Sstevel@tonic-gate void
13170Sstevel@tonic-gate linkb(mblk_t *mp, mblk_t *bp)
13180Sstevel@tonic-gate {
13190Sstevel@tonic-gate 	ASSERT(mp && bp);
13200Sstevel@tonic-gate 
13210Sstevel@tonic-gate 	for (; mp->b_cont; mp = mp->b_cont)
13220Sstevel@tonic-gate 		;
13230Sstevel@tonic-gate 	mp->b_cont = bp;
13240Sstevel@tonic-gate }
13250Sstevel@tonic-gate 
13260Sstevel@tonic-gate /*
13270Sstevel@tonic-gate  * unlink a message block from head of message
13280Sstevel@tonic-gate  * return pointer to new message.
13290Sstevel@tonic-gate  * NULL if message becomes empty.
13300Sstevel@tonic-gate  */
13310Sstevel@tonic-gate mblk_t *
13320Sstevel@tonic-gate unlinkb(mblk_t *bp)
13330Sstevel@tonic-gate {
13340Sstevel@tonic-gate 	mblk_t *bp1;
13350Sstevel@tonic-gate 
13360Sstevel@tonic-gate 	bp1 = bp->b_cont;
13370Sstevel@tonic-gate 	bp->b_cont = NULL;
13380Sstevel@tonic-gate 	return (bp1);
13390Sstevel@tonic-gate }
13400Sstevel@tonic-gate 
13410Sstevel@tonic-gate /*
13420Sstevel@tonic-gate  * remove a message block "bp" from message "mp"
13430Sstevel@tonic-gate  *
13440Sstevel@tonic-gate  * Return pointer to new message or NULL if no message remains.
13450Sstevel@tonic-gate  * Return -1 if bp is not found in message.
13460Sstevel@tonic-gate  */
13470Sstevel@tonic-gate mblk_t *
13480Sstevel@tonic-gate rmvb(mblk_t *mp, mblk_t *bp)
13490Sstevel@tonic-gate {
13500Sstevel@tonic-gate 	mblk_t *tmp;
13510Sstevel@tonic-gate 	mblk_t *lastp = NULL;
13520Sstevel@tonic-gate 
13530Sstevel@tonic-gate 	ASSERT(mp && bp);
13540Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
13550Sstevel@tonic-gate 		if (tmp == bp) {
13560Sstevel@tonic-gate 			if (lastp)
13570Sstevel@tonic-gate 				lastp->b_cont = tmp->b_cont;
13580Sstevel@tonic-gate 			else
13590Sstevel@tonic-gate 				mp = tmp->b_cont;
13600Sstevel@tonic-gate 			tmp->b_cont = NULL;
13610Sstevel@tonic-gate 			return (mp);
13620Sstevel@tonic-gate 		}
13630Sstevel@tonic-gate 		lastp = tmp;
13640Sstevel@tonic-gate 	}
13650Sstevel@tonic-gate 	return ((mblk_t *)-1);
13660Sstevel@tonic-gate }
13670Sstevel@tonic-gate 
13680Sstevel@tonic-gate /*
13690Sstevel@tonic-gate  * Concatenate and align first len bytes of common
13700Sstevel@tonic-gate  * message type.  Len == -1, means concat everything.
13710Sstevel@tonic-gate  * Returns 1 on success, 0 on failure
13720Sstevel@tonic-gate  * After the pullup, mp points to the pulled up data.
13730Sstevel@tonic-gate  */
13740Sstevel@tonic-gate int
13750Sstevel@tonic-gate pullupmsg(mblk_t *mp, ssize_t len)
13760Sstevel@tonic-gate {
13770Sstevel@tonic-gate 	mblk_t *bp, *b_cont;
13780Sstevel@tonic-gate 	dblk_t *dbp;
13790Sstevel@tonic-gate 	ssize_t n;
1380*2958Sdr146992 	uint32_t start, stuff, end, value, flags;
13810Sstevel@tonic-gate 
13820Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref > 0);
13830Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
13840Sstevel@tonic-gate 
13850Sstevel@tonic-gate 	/*
13860Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
13870Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
13880Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
13890Sstevel@tonic-gate 	 */
13900Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
13910Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
13920Sstevel@tonic-gate 		return (0);
13930Sstevel@tonic-gate 
13940Sstevel@tonic-gate 	if (len == -1) {
13950Sstevel@tonic-gate 		if (mp->b_cont == NULL && str_aligned(mp->b_rptr))
13960Sstevel@tonic-gate 			return (1);
13970Sstevel@tonic-gate 		len = xmsgsize(mp);
13980Sstevel@tonic-gate 	} else {
13990Sstevel@tonic-gate 		ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr;
14000Sstevel@tonic-gate 		ASSERT(first_mblk_len >= 0);
14010Sstevel@tonic-gate 		/*
14020Sstevel@tonic-gate 		 * If the length is less than that of the first mblk,
14030Sstevel@tonic-gate 		 * we want to pull up the message into an aligned mblk.
14040Sstevel@tonic-gate 		 * Though not part of the spec, some callers assume it.
14050Sstevel@tonic-gate 		 */
14060Sstevel@tonic-gate 		if (len <= first_mblk_len) {
14070Sstevel@tonic-gate 			if (str_aligned(mp->b_rptr))
14080Sstevel@tonic-gate 				return (1);
14090Sstevel@tonic-gate 			len = first_mblk_len;
14100Sstevel@tonic-gate 		} else if (xmsgsize(mp) < len)
14110Sstevel@tonic-gate 			return (0);
14120Sstevel@tonic-gate 	}
14130Sstevel@tonic-gate 
14140Sstevel@tonic-gate 	if ((bp = allocb_tmpl(len, mp)) == NULL)
14150Sstevel@tonic-gate 		return (0);
14160Sstevel@tonic-gate 
14170Sstevel@tonic-gate 	dbp = bp->b_datap;
14180Sstevel@tonic-gate 	*bp = *mp;		/* swap mblks so bp heads the old msg... */
14190Sstevel@tonic-gate 	mp->b_datap = dbp;	/* ... and mp heads the new message */
14200Sstevel@tonic-gate 	mp->b_datap->db_mblk = mp;
14210Sstevel@tonic-gate 	bp->b_datap->db_mblk = bp;
14220Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
14230Sstevel@tonic-gate 
1424*2958Sdr146992 	/*
1425*2958Sdr146992 	 * Need to preserve checksum information by copying them
1426*2958Sdr146992 	 * to mp which heads the pulluped message.
1427*2958Sdr146992 	 */
1428*2958Sdr146992 	hcksum_retrieve(bp, NULL, NULL, &start, &stuff, &end, &value, &flags);
1429*2958Sdr146992 	(void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, value, flags, 0);
1430*2958Sdr146992 
14310Sstevel@tonic-gate 	do {
14320Sstevel@tonic-gate 		ASSERT(bp->b_datap->db_ref > 0);
14330Sstevel@tonic-gate 		ASSERT(bp->b_wptr >= bp->b_rptr);
14340Sstevel@tonic-gate 		n = MIN(bp->b_wptr - bp->b_rptr, len);
14350Sstevel@tonic-gate 		bcopy(bp->b_rptr, mp->b_wptr, (size_t)n);
14360Sstevel@tonic-gate 		mp->b_wptr += n;
14370Sstevel@tonic-gate 		bp->b_rptr += n;
14380Sstevel@tonic-gate 		len -= n;
14390Sstevel@tonic-gate 		if (bp->b_rptr != bp->b_wptr)
14400Sstevel@tonic-gate 			break;
14410Sstevel@tonic-gate 		b_cont = bp->b_cont;
14420Sstevel@tonic-gate 		freeb(bp);
14430Sstevel@tonic-gate 		bp = b_cont;
14440Sstevel@tonic-gate 	} while (len && bp);
14450Sstevel@tonic-gate 
14460Sstevel@tonic-gate 	mp->b_cont = bp;	/* tack on whatever wasn't pulled up */
14470Sstevel@tonic-gate 
14480Sstevel@tonic-gate 	return (1);
14490Sstevel@tonic-gate }
14500Sstevel@tonic-gate 
14510Sstevel@tonic-gate /*
14520Sstevel@tonic-gate  * Concatenate and align at least the first len bytes of common message
14530Sstevel@tonic-gate  * type.  Len == -1 means concatenate everything.  The original message is
14540Sstevel@tonic-gate  * unaltered.  Returns a pointer to a new message on success, otherwise
14550Sstevel@tonic-gate  * returns NULL.
14560Sstevel@tonic-gate  */
14570Sstevel@tonic-gate mblk_t *
14580Sstevel@tonic-gate msgpullup(mblk_t *mp, ssize_t len)
14590Sstevel@tonic-gate {
14600Sstevel@tonic-gate 	mblk_t	*newmp;
14610Sstevel@tonic-gate 	ssize_t	totlen;
14620Sstevel@tonic-gate 	ssize_t	n;
1463*2958Sdr146992 	uint32_t start, stuff, end, value, flags;
14640Sstevel@tonic-gate 
14650Sstevel@tonic-gate 	/*
14660Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
14670Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
14680Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
14690Sstevel@tonic-gate 	 */
14700Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
14710Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
14720Sstevel@tonic-gate 		return (NULL);
14730Sstevel@tonic-gate 
14740Sstevel@tonic-gate 	totlen = xmsgsize(mp);
14750Sstevel@tonic-gate 
14760Sstevel@tonic-gate 	if ((len > 0) && (len > totlen))
14770Sstevel@tonic-gate 		return (NULL);
14780Sstevel@tonic-gate 
14790Sstevel@tonic-gate 	/*
14800Sstevel@tonic-gate 	 * Copy all of the first msg type into one new mblk, then dupmsg
14810Sstevel@tonic-gate 	 * and link the rest onto this.
14820Sstevel@tonic-gate 	 */
14830Sstevel@tonic-gate 
14840Sstevel@tonic-gate 	len = totlen;
14850Sstevel@tonic-gate 
14860Sstevel@tonic-gate 	if ((newmp = allocb_tmpl(len, mp)) == NULL)
14870Sstevel@tonic-gate 		return (NULL);
14880Sstevel@tonic-gate 
14890Sstevel@tonic-gate 	newmp->b_flag = mp->b_flag;
14900Sstevel@tonic-gate 	newmp->b_band = mp->b_band;
14910Sstevel@tonic-gate 
1492*2958Sdr146992 	/*
1493*2958Sdr146992 	 * Need to preserve checksum information by copying them
1494*2958Sdr146992 	 * to newmp which heads the pulluped message.
1495*2958Sdr146992 	 */
1496*2958Sdr146992 	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags);
1497*2958Sdr146992 	(void) hcksum_assoc(newmp, NULL, NULL, start, stuff, end,
1498*2958Sdr146992 	    value, flags, 0);
1499*2958Sdr146992 
15000Sstevel@tonic-gate 	while (len > 0) {
15010Sstevel@tonic-gate 		n = mp->b_wptr - mp->b_rptr;
15020Sstevel@tonic-gate 		ASSERT(n >= 0);		/* allow zero-length mblk_t's */
15030Sstevel@tonic-gate 		if (n > 0)
15040Sstevel@tonic-gate 			bcopy(mp->b_rptr, newmp->b_wptr, n);
15050Sstevel@tonic-gate 		newmp->b_wptr += n;
15060Sstevel@tonic-gate 		len -= n;
15070Sstevel@tonic-gate 		mp = mp->b_cont;
15080Sstevel@tonic-gate 	}
15090Sstevel@tonic-gate 
15100Sstevel@tonic-gate 	if (mp != NULL) {
15110Sstevel@tonic-gate 		newmp->b_cont = dupmsg(mp);
15120Sstevel@tonic-gate 		if (newmp->b_cont == NULL) {
15130Sstevel@tonic-gate 			freemsg(newmp);
15140Sstevel@tonic-gate 			return (NULL);
15150Sstevel@tonic-gate 		}
15160Sstevel@tonic-gate 	}
15170Sstevel@tonic-gate 
15180Sstevel@tonic-gate 	return (newmp);
15190Sstevel@tonic-gate }
15200Sstevel@tonic-gate 
15210Sstevel@tonic-gate /*
15220Sstevel@tonic-gate  * Trim bytes from message
15230Sstevel@tonic-gate  *  len > 0, trim from head
15240Sstevel@tonic-gate  *  len < 0, trim from tail
15250Sstevel@tonic-gate  * Returns 1 on success, 0 on failure.
15260Sstevel@tonic-gate  */
15270Sstevel@tonic-gate int
15280Sstevel@tonic-gate adjmsg(mblk_t *mp, ssize_t len)
15290Sstevel@tonic-gate {
15300Sstevel@tonic-gate 	mblk_t *bp;
15310Sstevel@tonic-gate 	mblk_t *save_bp = NULL;
15320Sstevel@tonic-gate 	mblk_t *prev_bp;
15330Sstevel@tonic-gate 	mblk_t *bcont;
15340Sstevel@tonic-gate 	unsigned char type;
15350Sstevel@tonic-gate 	ssize_t n;
15360Sstevel@tonic-gate 	int fromhead;
15370Sstevel@tonic-gate 	int first;
15380Sstevel@tonic-gate 
15390Sstevel@tonic-gate 	ASSERT(mp != NULL);
15400Sstevel@tonic-gate 	/*
15410Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
15420Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
15430Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
15440Sstevel@tonic-gate 	 */
15450Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
15460Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
15470Sstevel@tonic-gate 		return (0);
15480Sstevel@tonic-gate 
15490Sstevel@tonic-gate 	if (len < 0) {
15500Sstevel@tonic-gate 		fromhead = 0;
15510Sstevel@tonic-gate 		len = -len;
15520Sstevel@tonic-gate 	} else {
15530Sstevel@tonic-gate 		fromhead = 1;
15540Sstevel@tonic-gate 	}
15550Sstevel@tonic-gate 
15560Sstevel@tonic-gate 	if (xmsgsize(mp) < len)
15570Sstevel@tonic-gate 		return (0);
15580Sstevel@tonic-gate 
15590Sstevel@tonic-gate 
15600Sstevel@tonic-gate 	if (fromhead) {
15610Sstevel@tonic-gate 		first = 1;
15620Sstevel@tonic-gate 		while (len) {
15630Sstevel@tonic-gate 			ASSERT(mp->b_wptr >= mp->b_rptr);
15640Sstevel@tonic-gate 			n = MIN(mp->b_wptr - mp->b_rptr, len);
15650Sstevel@tonic-gate 			mp->b_rptr += n;
15660Sstevel@tonic-gate 			len -= n;
15670Sstevel@tonic-gate 
15680Sstevel@tonic-gate 			/*
15690Sstevel@tonic-gate 			 * If this is not the first zero length
15700Sstevel@tonic-gate 			 * message remove it
15710Sstevel@tonic-gate 			 */
15720Sstevel@tonic-gate 			if (!first && (mp->b_wptr == mp->b_rptr)) {
15730Sstevel@tonic-gate 				bcont = mp->b_cont;
15740Sstevel@tonic-gate 				freeb(mp);
15750Sstevel@tonic-gate 				mp = save_bp->b_cont = bcont;
15760Sstevel@tonic-gate 			} else {
15770Sstevel@tonic-gate 				save_bp = mp;
15780Sstevel@tonic-gate 				mp = mp->b_cont;
15790Sstevel@tonic-gate 			}
15800Sstevel@tonic-gate 			first = 0;
15810Sstevel@tonic-gate 		}
15820Sstevel@tonic-gate 	} else {
15830Sstevel@tonic-gate 		type = mp->b_datap->db_type;
15840Sstevel@tonic-gate 		while (len) {
15850Sstevel@tonic-gate 			bp = mp;
15860Sstevel@tonic-gate 			save_bp = NULL;
15870Sstevel@tonic-gate 
15880Sstevel@tonic-gate 			/*
15890Sstevel@tonic-gate 			 * Find the last message of same type
15900Sstevel@tonic-gate 			 */
15910Sstevel@tonic-gate 
15920Sstevel@tonic-gate 			while (bp && bp->b_datap->db_type == type) {
15930Sstevel@tonic-gate 				ASSERT(bp->b_wptr >= bp->b_rptr);
15940Sstevel@tonic-gate 				prev_bp = save_bp;
15950Sstevel@tonic-gate 				save_bp = bp;
15960Sstevel@tonic-gate 				bp = bp->b_cont;
15970Sstevel@tonic-gate 			}
15980Sstevel@tonic-gate 			if (save_bp == NULL)
15990Sstevel@tonic-gate 				break;
16000Sstevel@tonic-gate 			n = MIN(save_bp->b_wptr - save_bp->b_rptr, len);
16010Sstevel@tonic-gate 			save_bp->b_wptr -= n;
16020Sstevel@tonic-gate 			len -= n;
16030Sstevel@tonic-gate 
16040Sstevel@tonic-gate 			/*
16050Sstevel@tonic-gate 			 * If this is not the first message
16060Sstevel@tonic-gate 			 * and we have taken away everything
16070Sstevel@tonic-gate 			 * from this message, remove it
16080Sstevel@tonic-gate 			 */
16090Sstevel@tonic-gate 
16100Sstevel@tonic-gate 			if ((save_bp != mp) &&
16110Sstevel@tonic-gate 				(save_bp->b_wptr == save_bp->b_rptr)) {
16120Sstevel@tonic-gate 				bcont = save_bp->b_cont;
16130Sstevel@tonic-gate 				freeb(save_bp);
16140Sstevel@tonic-gate 				prev_bp->b_cont = bcont;
16150Sstevel@tonic-gate 			}
16160Sstevel@tonic-gate 		}
16170Sstevel@tonic-gate 	}
16180Sstevel@tonic-gate 	return (1);
16190Sstevel@tonic-gate }
16200Sstevel@tonic-gate 
16210Sstevel@tonic-gate /*
16220Sstevel@tonic-gate  * get number of data bytes in message
16230Sstevel@tonic-gate  */
16240Sstevel@tonic-gate size_t
16250Sstevel@tonic-gate msgdsize(mblk_t *bp)
16260Sstevel@tonic-gate {
16270Sstevel@tonic-gate 	size_t count = 0;
16280Sstevel@tonic-gate 
16290Sstevel@tonic-gate 	for (; bp; bp = bp->b_cont)
16300Sstevel@tonic-gate 		if (bp->b_datap->db_type == M_DATA) {
16310Sstevel@tonic-gate 			ASSERT(bp->b_wptr >= bp->b_rptr);
16320Sstevel@tonic-gate 			count += bp->b_wptr - bp->b_rptr;
16330Sstevel@tonic-gate 		}
16340Sstevel@tonic-gate 	return (count);
16350Sstevel@tonic-gate }
16360Sstevel@tonic-gate 
16370Sstevel@tonic-gate /*
16380Sstevel@tonic-gate  * Get a message off head of queue
16390Sstevel@tonic-gate  *
16400Sstevel@tonic-gate  * If queue has no buffers then mark queue
16410Sstevel@tonic-gate  * with QWANTR. (queue wants to be read by
16420Sstevel@tonic-gate  * someone when data becomes available)
16430Sstevel@tonic-gate  *
16440Sstevel@tonic-gate  * If there is something to take off then do so.
16450Sstevel@tonic-gate  * If queue falls below hi water mark turn off QFULL
16460Sstevel@tonic-gate  * flag.  Decrement weighted count of queue.
16470Sstevel@tonic-gate  * Also turn off QWANTR because queue is being read.
16480Sstevel@tonic-gate  *
16490Sstevel@tonic-gate  * The queue count is maintained on a per-band basis.
16500Sstevel@tonic-gate  * Priority band 0 (normal messages) uses q_count,
16510Sstevel@tonic-gate  * q_lowat, etc.  Non-zero priority bands use the
16520Sstevel@tonic-gate  * fields in their respective qband structures
16530Sstevel@tonic-gate  * (qb_count, qb_lowat, etc.)  All messages appear
16540Sstevel@tonic-gate  * on the same list, linked via their b_next pointers.
16550Sstevel@tonic-gate  * q_first is the head of the list.  q_count does
16560Sstevel@tonic-gate  * not reflect the size of all the messages on the
16570Sstevel@tonic-gate  * queue.  It only reflects those messages in the
16580Sstevel@tonic-gate  * normal band of flow.  The one exception to this
16590Sstevel@tonic-gate  * deals with high priority messages.  They are in
16600Sstevel@tonic-gate  * their own conceptual "band", but are accounted
16610Sstevel@tonic-gate  * against q_count.
16620Sstevel@tonic-gate  *
16630Sstevel@tonic-gate  * If queue count is below the lo water mark and QWANTW
16640Sstevel@tonic-gate  * is set, enable the closest backq which has a service
16650Sstevel@tonic-gate  * procedure and turn off the QWANTW flag.
16660Sstevel@tonic-gate  *
16670Sstevel@tonic-gate  * getq could be built on top of rmvq, but isn't because
16680Sstevel@tonic-gate  * of performance considerations.
16690Sstevel@tonic-gate  *
16700Sstevel@tonic-gate  * A note on the use of q_count and q_mblkcnt:
16710Sstevel@tonic-gate  *   q_count is the traditional byte count for messages that
16720Sstevel@tonic-gate  *   have been put on a queue.  Documentation tells us that
16730Sstevel@tonic-gate  *   we shouldn't rely on that count, but some drivers/modules
16740Sstevel@tonic-gate  *   do.  What was needed, however, is a mechanism to prevent
16750Sstevel@tonic-gate  *   runaway streams from consuming all of the resources,
16760Sstevel@tonic-gate  *   and particularly be able to flow control zero-length
16770Sstevel@tonic-gate  *   messages.  q_mblkcnt is used for this purpose.  It
16780Sstevel@tonic-gate  *   counts the number of mblk's that are being put on
16790Sstevel@tonic-gate  *   the queue.  The intention here, is that each mblk should
16800Sstevel@tonic-gate  *   contain one byte of data and, for the purpose of
16810Sstevel@tonic-gate  *   flow-control, logically does.  A queue will become
16820Sstevel@tonic-gate  *   full when EITHER of these values (q_count and q_mblkcnt)
16830Sstevel@tonic-gate  *   reach the highwater mark.  It will clear when BOTH
16840Sstevel@tonic-gate  *   of them drop below the highwater mark.  And it will
16850Sstevel@tonic-gate  *   backenable when BOTH of them drop below the lowwater
16860Sstevel@tonic-gate  *   mark.
16870Sstevel@tonic-gate  *   With this algorithm, a driver/module might be able
16880Sstevel@tonic-gate  *   to find a reasonably accurate q_count, and the
16890Sstevel@tonic-gate  *   framework can still try and limit resource usage.
16900Sstevel@tonic-gate  */
16910Sstevel@tonic-gate mblk_t *
16920Sstevel@tonic-gate getq(queue_t *q)
16930Sstevel@tonic-gate {
16940Sstevel@tonic-gate 	mblk_t *bp;
1695235Smicheng 	uchar_t band = 0;
16960Sstevel@tonic-gate 
16970Sstevel@tonic-gate 	bp = getq_noenab(q);
16980Sstevel@tonic-gate 	if (bp != NULL)
16990Sstevel@tonic-gate 		band = bp->b_band;
17000Sstevel@tonic-gate 
17010Sstevel@tonic-gate 	/*
17020Sstevel@tonic-gate 	 * Inlined from qbackenable().
17030Sstevel@tonic-gate 	 * Quick check without holding the lock.
17040Sstevel@tonic-gate 	 */
17050Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
17060Sstevel@tonic-gate 		return (bp);
17070Sstevel@tonic-gate 
17080Sstevel@tonic-gate 	qbackenable(q, band);
17090Sstevel@tonic-gate 	return (bp);
17100Sstevel@tonic-gate }
17110Sstevel@tonic-gate 
17120Sstevel@tonic-gate /*
1713741Smasputra  * Calculate number of data bytes in a single data message block taking
1714741Smasputra  * multidata messages into account.
1715741Smasputra  */
1716741Smasputra 
1717741Smasputra #define	ADD_MBLK_SIZE(mp, size) 					\
1718741Smasputra 	if (DB_TYPE(mp) != M_MULTIDATA) {				\
1719741Smasputra 		(size) += MBLKL(mp);					\
1720741Smasputra 	} else {							\
1721741Smasputra 		uint_t	pinuse;						\
1722741Smasputra 									\
1723741Smasputra 		mmd_getsize(mmd_getmultidata(mp), NULL, &pinuse);	\
1724741Smasputra 		(size) += pinuse;					\
1725741Smasputra 	}
1726741Smasputra 
1727741Smasputra /*
17280Sstevel@tonic-gate  * Like getq() but does not backenable.  This is used by the stream
17290Sstevel@tonic-gate  * head when a putback() is likely.  The caller must call qbackenable()
17300Sstevel@tonic-gate  * after it is done with accessing the queue.
17310Sstevel@tonic-gate  */
17320Sstevel@tonic-gate mblk_t *
17330Sstevel@tonic-gate getq_noenab(queue_t *q)
17340Sstevel@tonic-gate {
17350Sstevel@tonic-gate 	mblk_t *bp;
17360Sstevel@tonic-gate 	mblk_t *tmp;
17370Sstevel@tonic-gate 	qband_t *qbp;
17380Sstevel@tonic-gate 	kthread_id_t freezer;
17390Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
17400Sstevel@tonic-gate 
17410Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
17420Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
17430Sstevel@tonic-gate 	if (freezer == curthread) {
17440Sstevel@tonic-gate 		ASSERT(frozenstr(q));
17450Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
17460Sstevel@tonic-gate 	} else
17470Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
17480Sstevel@tonic-gate 
17490Sstevel@tonic-gate 	if ((bp = q->q_first) == 0) {
17500Sstevel@tonic-gate 		q->q_flag |= QWANTR;
17510Sstevel@tonic-gate 	} else {
17520Sstevel@tonic-gate 		if ((q->q_first = bp->b_next) == NULL)
17530Sstevel@tonic-gate 			q->q_last = NULL;
17540Sstevel@tonic-gate 		else
17550Sstevel@tonic-gate 			q->q_first->b_prev = NULL;
17560Sstevel@tonic-gate 
17570Sstevel@tonic-gate 		/* Get message byte count for q_count accounting */
17580Sstevel@tonic-gate 		for (tmp = bp; tmp; tmp = tmp->b_cont) {
1759741Smasputra 			ADD_MBLK_SIZE(tmp, bytecnt);
17600Sstevel@tonic-gate 			mblkcnt++;
17610Sstevel@tonic-gate 		}
17620Sstevel@tonic-gate 
17630Sstevel@tonic-gate 		if (bp->b_band == 0) {
17640Sstevel@tonic-gate 			q->q_count -= bytecnt;
17650Sstevel@tonic-gate 			q->q_mblkcnt -= mblkcnt;
17660Sstevel@tonic-gate 			if ((q->q_count < q->q_hiwat) &&
17670Sstevel@tonic-gate 			    (q->q_mblkcnt < q->q_hiwat)) {
17680Sstevel@tonic-gate 				q->q_flag &= ~QFULL;
17690Sstevel@tonic-gate 			}
17700Sstevel@tonic-gate 		} else {
17710Sstevel@tonic-gate 			int i;
17720Sstevel@tonic-gate 
17730Sstevel@tonic-gate 			ASSERT(bp->b_band <= q->q_nband);
17740Sstevel@tonic-gate 			ASSERT(q->q_bandp != NULL);
17750Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(QLOCK(q)));
17760Sstevel@tonic-gate 			qbp = q->q_bandp;
17770Sstevel@tonic-gate 			i = bp->b_band;
17780Sstevel@tonic-gate 			while (--i > 0)
17790Sstevel@tonic-gate 				qbp = qbp->qb_next;
17800Sstevel@tonic-gate 			if (qbp->qb_first == qbp->qb_last) {
17810Sstevel@tonic-gate 				qbp->qb_first = NULL;
17820Sstevel@tonic-gate 				qbp->qb_last = NULL;
17830Sstevel@tonic-gate 			} else {
17840Sstevel@tonic-gate 				qbp->qb_first = bp->b_next;
17850Sstevel@tonic-gate 			}
17860Sstevel@tonic-gate 			qbp->qb_count -= bytecnt;
17870Sstevel@tonic-gate 			qbp->qb_mblkcnt -= mblkcnt;
17880Sstevel@tonic-gate 			if ((qbp->qb_count < qbp->qb_hiwat) &&
17890Sstevel@tonic-gate 			    (qbp->qb_mblkcnt < qbp->qb_hiwat)) {
17900Sstevel@tonic-gate 				qbp->qb_flag &= ~QB_FULL;
17910Sstevel@tonic-gate 			}
17920Sstevel@tonic-gate 		}
17930Sstevel@tonic-gate 		q->q_flag &= ~QWANTR;
17940Sstevel@tonic-gate 		bp->b_next = NULL;
17950Sstevel@tonic-gate 		bp->b_prev = NULL;
17960Sstevel@tonic-gate 	}
17970Sstevel@tonic-gate 	if (freezer != curthread)
17980Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
17990Sstevel@tonic-gate 
18000Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_GETQ, NULL);
18010Sstevel@tonic-gate 
18020Sstevel@tonic-gate 	return (bp);
18030Sstevel@tonic-gate }
18040Sstevel@tonic-gate 
18050Sstevel@tonic-gate /*
18060Sstevel@tonic-gate  * Determine if a backenable is needed after removing a message in the
18070Sstevel@tonic-gate  * specified band.
18080Sstevel@tonic-gate  * NOTE: This routine assumes that something like getq_noenab() has been
18090Sstevel@tonic-gate  * already called.
18100Sstevel@tonic-gate  *
18110Sstevel@tonic-gate  * For the read side it is ok to hold sd_lock across calling this (and the
18120Sstevel@tonic-gate  * stream head often does).
18130Sstevel@tonic-gate  * But for the write side strwakeq might be invoked and it acquires sd_lock.
18140Sstevel@tonic-gate  */
18150Sstevel@tonic-gate void
1816235Smicheng qbackenable(queue_t *q, uchar_t band)
18170Sstevel@tonic-gate {
18180Sstevel@tonic-gate 	int backenab = 0;
18190Sstevel@tonic-gate 	qband_t *qbp;
18200Sstevel@tonic-gate 	kthread_id_t freezer;
18210Sstevel@tonic-gate 
18220Sstevel@tonic-gate 	ASSERT(q);
18230Sstevel@tonic-gate 	ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock));
18240Sstevel@tonic-gate 
18250Sstevel@tonic-gate 	/*
18260Sstevel@tonic-gate 	 * Quick check without holding the lock.
18270Sstevel@tonic-gate 	 * OK since after getq() has lowered the q_count these flags
18280Sstevel@tonic-gate 	 * would not change unless either the qbackenable() is done by
18290Sstevel@tonic-gate 	 * another thread (which is ok) or the queue has gotten QFULL
18300Sstevel@tonic-gate 	 * in which case another backenable will take place when the queue
18310Sstevel@tonic-gate 	 * drops below q_lowat.
18320Sstevel@tonic-gate 	 */
18330Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
18340Sstevel@tonic-gate 		return;
18350Sstevel@tonic-gate 
18360Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
18370Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
18380Sstevel@tonic-gate 	if (freezer == curthread) {
18390Sstevel@tonic-gate 		ASSERT(frozenstr(q));
18400Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
18410Sstevel@tonic-gate 	} else
18420Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
18430Sstevel@tonic-gate 
18440Sstevel@tonic-gate 	if (band == 0) {
18450Sstevel@tonic-gate 		if (q->q_lowat == 0 || (q->q_count < q->q_lowat &&
18460Sstevel@tonic-gate 		    q->q_mblkcnt < q->q_lowat)) {
18470Sstevel@tonic-gate 			backenab = q->q_flag & (QWANTW|QWANTWSYNC);
18480Sstevel@tonic-gate 		}
18490Sstevel@tonic-gate 	} else {
18500Sstevel@tonic-gate 		int i;
18510Sstevel@tonic-gate 
18520Sstevel@tonic-gate 		ASSERT((unsigned)band <= q->q_nband);
18530Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
18540Sstevel@tonic-gate 
18550Sstevel@tonic-gate 		qbp = q->q_bandp;
18560Sstevel@tonic-gate 		i = band;
18570Sstevel@tonic-gate 		while (--i > 0)
18580Sstevel@tonic-gate 			qbp = qbp->qb_next;
18590Sstevel@tonic-gate 
18600Sstevel@tonic-gate 		if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat &&
18610Sstevel@tonic-gate 		    qbp->qb_mblkcnt < qbp->qb_lowat)) {
18620Sstevel@tonic-gate 			backenab = qbp->qb_flag & QB_WANTW;
18630Sstevel@tonic-gate 		}
18640Sstevel@tonic-gate 	}
18650Sstevel@tonic-gate 
18660Sstevel@tonic-gate 	if (backenab == 0) {
18670Sstevel@tonic-gate 		if (freezer != curthread)
18680Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
18690Sstevel@tonic-gate 		return;
18700Sstevel@tonic-gate 	}
18710Sstevel@tonic-gate 
18720Sstevel@tonic-gate 	/* Have to drop the lock across strwakeq and backenable */
18730Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
18740Sstevel@tonic-gate 		q->q_flag &= ~QWANTWSYNC;
18750Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW)) {
18760Sstevel@tonic-gate 		if (band != 0)
18770Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
18780Sstevel@tonic-gate 		else {
18790Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
18800Sstevel@tonic-gate 		}
18810Sstevel@tonic-gate 	}
18820Sstevel@tonic-gate 
18830Sstevel@tonic-gate 	if (freezer != curthread)
18840Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
18850Sstevel@tonic-gate 
18860Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
18870Sstevel@tonic-gate 		strwakeq(q, QWANTWSYNC);
18880Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW))
18890Sstevel@tonic-gate 		backenable(q, band);
18900Sstevel@tonic-gate }
18910Sstevel@tonic-gate 
18920Sstevel@tonic-gate /*
18930Sstevel@tonic-gate  * Remove a message from a queue.  The queue count and other
18940Sstevel@tonic-gate  * flow control parameters are adjusted and the back queue
18950Sstevel@tonic-gate  * enabled if necessary.
18960Sstevel@tonic-gate  *
18970Sstevel@tonic-gate  * rmvq can be called with the stream frozen, but other utility functions
18980Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
18990Sstevel@tonic-gate  */
19000Sstevel@tonic-gate void
19010Sstevel@tonic-gate rmvq(queue_t *q, mblk_t *mp)
19020Sstevel@tonic-gate {
19030Sstevel@tonic-gate 	ASSERT(mp != NULL);
19040Sstevel@tonic-gate 
19050Sstevel@tonic-gate 	rmvq_noenab(q, mp);
19060Sstevel@tonic-gate 	if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) {
19070Sstevel@tonic-gate 		/*
19080Sstevel@tonic-gate 		 * qbackenable can handle a frozen stream but not a "random"
19090Sstevel@tonic-gate 		 * qlock being held. Drop lock across qbackenable.
19100Sstevel@tonic-gate 		 */
19110Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
19120Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
19130Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
19140Sstevel@tonic-gate 	} else {
19150Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
19160Sstevel@tonic-gate 	}
19170Sstevel@tonic-gate }
19180Sstevel@tonic-gate 
19190Sstevel@tonic-gate /*
19200Sstevel@tonic-gate  * Like rmvq() but without any backenabling.
19210Sstevel@tonic-gate  * This exists to handle SR_CONSOL_DATA in strrput().
19220Sstevel@tonic-gate  */
19230Sstevel@tonic-gate void
19240Sstevel@tonic-gate rmvq_noenab(queue_t *q, mblk_t *mp)
19250Sstevel@tonic-gate {
19260Sstevel@tonic-gate 	mblk_t *tmp;
19270Sstevel@tonic-gate 	int i;
19280Sstevel@tonic-gate 	qband_t *qbp = NULL;
19290Sstevel@tonic-gate 	kthread_id_t freezer;
19300Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
19310Sstevel@tonic-gate 
19320Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
19330Sstevel@tonic-gate 	if (freezer == curthread) {
19340Sstevel@tonic-gate 		ASSERT(frozenstr(q));
19350Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
19360Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
19370Sstevel@tonic-gate 		/* Don't drop lock on exit */
19380Sstevel@tonic-gate 		freezer = curthread;
19390Sstevel@tonic-gate 	} else
19400Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
19410Sstevel@tonic-gate 
19420Sstevel@tonic-gate 	ASSERT(mp->b_band <= q->q_nband);
19430Sstevel@tonic-gate 	if (mp->b_band != 0) {		/* Adjust band pointers */
19440Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
19450Sstevel@tonic-gate 		qbp = q->q_bandp;
19460Sstevel@tonic-gate 		i = mp->b_band;
19470Sstevel@tonic-gate 		while (--i > 0)
19480Sstevel@tonic-gate 			qbp = qbp->qb_next;
19490Sstevel@tonic-gate 		if (mp == qbp->qb_first) {
19500Sstevel@tonic-gate 			if (mp->b_next && mp->b_band == mp->b_next->b_band)
19510Sstevel@tonic-gate 				qbp->qb_first = mp->b_next;
19520Sstevel@tonic-gate 			else
19530Sstevel@tonic-gate 				qbp->qb_first = NULL;
19540Sstevel@tonic-gate 		}
19550Sstevel@tonic-gate 		if (mp == qbp->qb_last) {
19560Sstevel@tonic-gate 			if (mp->b_prev && mp->b_band == mp->b_prev->b_band)
19570Sstevel@tonic-gate 				qbp->qb_last = mp->b_prev;
19580Sstevel@tonic-gate 			else
19590Sstevel@tonic-gate 				qbp->qb_last = NULL;
19600Sstevel@tonic-gate 		}
19610Sstevel@tonic-gate 	}
19620Sstevel@tonic-gate 
19630Sstevel@tonic-gate 	/*
19640Sstevel@tonic-gate 	 * Remove the message from the list.
19650Sstevel@tonic-gate 	 */
19660Sstevel@tonic-gate 	if (mp->b_prev)
19670Sstevel@tonic-gate 		mp->b_prev->b_next = mp->b_next;
19680Sstevel@tonic-gate 	else
19690Sstevel@tonic-gate 		q->q_first = mp->b_next;
19700Sstevel@tonic-gate 	if (mp->b_next)
19710Sstevel@tonic-gate 		mp->b_next->b_prev = mp->b_prev;
19720Sstevel@tonic-gate 	else
19730Sstevel@tonic-gate 		q->q_last = mp->b_prev;
19740Sstevel@tonic-gate 	mp->b_next = NULL;
19750Sstevel@tonic-gate 	mp->b_prev = NULL;
19760Sstevel@tonic-gate 
19770Sstevel@tonic-gate 	/* Get the size of the message for q_count accounting */
19780Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
1979741Smasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
19800Sstevel@tonic-gate 		mblkcnt++;
19810Sstevel@tonic-gate 	}
19820Sstevel@tonic-gate 
19830Sstevel@tonic-gate 	if (mp->b_band == 0) {		/* Perform q_count accounting */
19840Sstevel@tonic-gate 		q->q_count -= bytecnt;
19850Sstevel@tonic-gate 		q->q_mblkcnt -= mblkcnt;
19860Sstevel@tonic-gate 		if ((q->q_count < q->q_hiwat) &&
19870Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_hiwat)) {
19880Sstevel@tonic-gate 			q->q_flag &= ~QFULL;
19890Sstevel@tonic-gate 		}
19900Sstevel@tonic-gate 	} else {			/* Perform qb_count accounting */
19910Sstevel@tonic-gate 		qbp->qb_count -= bytecnt;
19920Sstevel@tonic-gate 		qbp->qb_mblkcnt -= mblkcnt;
19930Sstevel@tonic-gate 		if ((qbp->qb_count < qbp->qb_hiwat) &&
19940Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_hiwat)) {
19950Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
19960Sstevel@tonic-gate 		}
19970Sstevel@tonic-gate 	}
19980Sstevel@tonic-gate 	if (freezer != curthread)
19990Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
20000Sstevel@tonic-gate 
20010Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, NULL);
20020Sstevel@tonic-gate }
20030Sstevel@tonic-gate 
20040Sstevel@tonic-gate /*
20050Sstevel@tonic-gate  * Empty a queue.
20060Sstevel@tonic-gate  * If flag is set, remove all messages.  Otherwise, remove
20070Sstevel@tonic-gate  * only non-control messages.  If queue falls below its low
20080Sstevel@tonic-gate  * water mark, and QWANTW is set, enable the nearest upstream
20090Sstevel@tonic-gate  * service procedure.
20100Sstevel@tonic-gate  *
20110Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
20120Sstevel@tonic-gate  * code one difference was discovered. flushq did not have a check
20130Sstevel@tonic-gate  * for q_lowat == 0 in the backenabling test.
20140Sstevel@tonic-gate  *
20150Sstevel@tonic-gate  * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed
20160Sstevel@tonic-gate  * if one exists on the queue.
20170Sstevel@tonic-gate  */
20180Sstevel@tonic-gate void
20190Sstevel@tonic-gate flushq_common(queue_t *q, int flag, int pcproto_flag)
20200Sstevel@tonic-gate {
20210Sstevel@tonic-gate 	mblk_t *mp, *nmp;
20220Sstevel@tonic-gate 	qband_t *qbp;
20230Sstevel@tonic-gate 	int backenab = 0;
20240Sstevel@tonic-gate 	unsigned char bpri;
20250Sstevel@tonic-gate 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
20260Sstevel@tonic-gate 
20270Sstevel@tonic-gate 	if (q->q_first == NULL)
20280Sstevel@tonic-gate 		return;
20290Sstevel@tonic-gate 
20300Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
20310Sstevel@tonic-gate 	mp = q->q_first;
20320Sstevel@tonic-gate 	q->q_first = NULL;
20330Sstevel@tonic-gate 	q->q_last = NULL;
20340Sstevel@tonic-gate 	q->q_count = 0;
20350Sstevel@tonic-gate 	q->q_mblkcnt = 0;
20360Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
20370Sstevel@tonic-gate 		qbp->qb_first = NULL;
20380Sstevel@tonic-gate 		qbp->qb_last = NULL;
20390Sstevel@tonic-gate 		qbp->qb_count = 0;
20400Sstevel@tonic-gate 		qbp->qb_mblkcnt = 0;
20410Sstevel@tonic-gate 		qbp->qb_flag &= ~QB_FULL;
20420Sstevel@tonic-gate 	}
20430Sstevel@tonic-gate 	q->q_flag &= ~QFULL;
20440Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
20450Sstevel@tonic-gate 	while (mp) {
20460Sstevel@tonic-gate 		nmp = mp->b_next;
20470Sstevel@tonic-gate 		mp->b_next = mp->b_prev = NULL;
20480Sstevel@tonic-gate 
20490Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, NULL);
20500Sstevel@tonic-gate 
20510Sstevel@tonic-gate 		if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO))
20520Sstevel@tonic-gate 			(void) putq(q, mp);
20530Sstevel@tonic-gate 		else if (flag || datamsg(mp->b_datap->db_type))
20540Sstevel@tonic-gate 			freemsg(mp);
20550Sstevel@tonic-gate 		else
20560Sstevel@tonic-gate 			(void) putq(q, mp);
20570Sstevel@tonic-gate 		mp = nmp;
20580Sstevel@tonic-gate 	}
20590Sstevel@tonic-gate 	bpri = 1;
20600Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
20610Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
20620Sstevel@tonic-gate 		if ((qbp->qb_flag & QB_WANTW) &&
20630Sstevel@tonic-gate 		    (((qbp->qb_count < qbp->qb_lowat) &&
20640Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_lowat)) ||
20650Sstevel@tonic-gate 		    qbp->qb_lowat == 0)) {
20660Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
20670Sstevel@tonic-gate 			backenab = 1;
20680Sstevel@tonic-gate 			qbf[bpri] = 1;
20690Sstevel@tonic-gate 		} else
20700Sstevel@tonic-gate 			qbf[bpri] = 0;
20710Sstevel@tonic-gate 		bpri++;
20720Sstevel@tonic-gate 	}
20730Sstevel@tonic-gate 	ASSERT(bpri == (unsigned char)(q->q_nband + 1));
20740Sstevel@tonic-gate 	if ((q->q_flag & QWANTW) &&
20750Sstevel@tonic-gate 	    (((q->q_count < q->q_lowat) &&
20760Sstevel@tonic-gate 	    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
20770Sstevel@tonic-gate 		q->q_flag &= ~QWANTW;
20780Sstevel@tonic-gate 		backenab = 1;
20790Sstevel@tonic-gate 		qbf[0] = 1;
20800Sstevel@tonic-gate 	} else
20810Sstevel@tonic-gate 		qbf[0] = 0;
20820Sstevel@tonic-gate 
20830Sstevel@tonic-gate 	/*
20840Sstevel@tonic-gate 	 * If any band can now be written to, and there is a writer
20850Sstevel@tonic-gate 	 * for that band, then backenable the closest service procedure.
20860Sstevel@tonic-gate 	 */
20870Sstevel@tonic-gate 	if (backenab) {
20880Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
20890Sstevel@tonic-gate 		for (bpri = q->q_nband; bpri != 0; bpri--)
20900Sstevel@tonic-gate 			if (qbf[bpri])
2091235Smicheng 				backenable(q, bpri);
20920Sstevel@tonic-gate 		if (qbf[0])
20930Sstevel@tonic-gate 			backenable(q, 0);
20940Sstevel@tonic-gate 	} else
20950Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
20960Sstevel@tonic-gate }
20970Sstevel@tonic-gate 
20980Sstevel@tonic-gate /*
20990Sstevel@tonic-gate  * The real flushing takes place in flushq_common. This is done so that
21000Sstevel@tonic-gate  * a flag which specifies whether or not M_PCPROTO messages should be flushed
21010Sstevel@tonic-gate  * or not. Currently the only place that uses this flag is the stream head.
21020Sstevel@tonic-gate  */
21030Sstevel@tonic-gate void
21040Sstevel@tonic-gate flushq(queue_t *q, int flag)
21050Sstevel@tonic-gate {
21060Sstevel@tonic-gate 	flushq_common(q, flag, 0);
21070Sstevel@tonic-gate }
21080Sstevel@tonic-gate 
21090Sstevel@tonic-gate /*
21100Sstevel@tonic-gate  * Flush the queue of messages of the given priority band.
21110Sstevel@tonic-gate  * There is some duplication of code between flushq and flushband.
21120Sstevel@tonic-gate  * This is because we want to optimize the code as much as possible.
21130Sstevel@tonic-gate  * The assumption is that there will be more messages in the normal
21140Sstevel@tonic-gate  * (priority 0) band than in any other.
21150Sstevel@tonic-gate  *
21160Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
21170Sstevel@tonic-gate  * code one difference was discovered. flushband had an extra check for
21180Sstevel@tonic-gate  * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0
21190Sstevel@tonic-gate  * case. That check does not match the man page for flushband and was not
21200Sstevel@tonic-gate  * in the strrput flush code hence it was removed.
21210Sstevel@tonic-gate  */
21220Sstevel@tonic-gate void
21230Sstevel@tonic-gate flushband(queue_t *q, unsigned char pri, int flag)
21240Sstevel@tonic-gate {
21250Sstevel@tonic-gate 	mblk_t *mp;
21260Sstevel@tonic-gate 	mblk_t *nmp;
21270Sstevel@tonic-gate 	mblk_t *last;
21280Sstevel@tonic-gate 	qband_t *qbp;
21290Sstevel@tonic-gate 	int band;
21300Sstevel@tonic-gate 
21310Sstevel@tonic-gate 	ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL));
21320Sstevel@tonic-gate 	if (pri > q->q_nband) {
21330Sstevel@tonic-gate 		return;
21340Sstevel@tonic-gate 	}
21350Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
21360Sstevel@tonic-gate 	if (pri == 0) {
21370Sstevel@tonic-gate 		mp = q->q_first;
21380Sstevel@tonic-gate 		q->q_first = NULL;
21390Sstevel@tonic-gate 		q->q_last = NULL;
21400Sstevel@tonic-gate 		q->q_count = 0;
21410Sstevel@tonic-gate 		q->q_mblkcnt = 0;
21420Sstevel@tonic-gate 		for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
21430Sstevel@tonic-gate 			qbp->qb_first = NULL;
21440Sstevel@tonic-gate 			qbp->qb_last = NULL;
21450Sstevel@tonic-gate 			qbp->qb_count = 0;
21460Sstevel@tonic-gate 			qbp->qb_mblkcnt = 0;
21470Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
21480Sstevel@tonic-gate 		}
21490Sstevel@tonic-gate 		q->q_flag &= ~QFULL;
21500Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
21510Sstevel@tonic-gate 		while (mp) {
21520Sstevel@tonic-gate 			nmp = mp->b_next;
21530Sstevel@tonic-gate 			mp->b_next = mp->b_prev = NULL;
21540Sstevel@tonic-gate 			if ((mp->b_band == 0) &&
21550Sstevel@tonic-gate 				((flag == FLUSHALL) ||
21560Sstevel@tonic-gate 				datamsg(mp->b_datap->db_type)))
21570Sstevel@tonic-gate 				freemsg(mp);
21580Sstevel@tonic-gate 			else
21590Sstevel@tonic-gate 				(void) putq(q, mp);
21600Sstevel@tonic-gate 			mp = nmp;
21610Sstevel@tonic-gate 		}
21620Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
21630Sstevel@tonic-gate 		if ((q->q_flag & QWANTW) &&
21640Sstevel@tonic-gate 		    (((q->q_count < q->q_lowat) &&
21650Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
21660Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
21670Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21680Sstevel@tonic-gate 
2169235Smicheng 			backenable(q, pri);
21700Sstevel@tonic-gate 		} else
21710Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21720Sstevel@tonic-gate 	} else {	/* pri != 0 */
21730Sstevel@tonic-gate 		boolean_t flushed = B_FALSE;
21740Sstevel@tonic-gate 		band = pri;
21750Sstevel@tonic-gate 
21760Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
21770Sstevel@tonic-gate 		qbp = q->q_bandp;
21780Sstevel@tonic-gate 		while (--band > 0)
21790Sstevel@tonic-gate 			qbp = qbp->qb_next;
21800Sstevel@tonic-gate 		mp = qbp->qb_first;
21810Sstevel@tonic-gate 		if (mp == NULL) {
21820Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21830Sstevel@tonic-gate 			return;
21840Sstevel@tonic-gate 		}
21850Sstevel@tonic-gate 		last = qbp->qb_last->b_next;
21860Sstevel@tonic-gate 		/*
21870Sstevel@tonic-gate 		 * rmvq_noenab() and freemsg() are called for each mblk that
21880Sstevel@tonic-gate 		 * meets the criteria.  The loop is executed until the last
21890Sstevel@tonic-gate 		 * mblk has been processed.
21900Sstevel@tonic-gate 		 */
21910Sstevel@tonic-gate 		while (mp != last) {
21920Sstevel@tonic-gate 			ASSERT(mp->b_band == pri);
21930Sstevel@tonic-gate 			nmp = mp->b_next;
21940Sstevel@tonic-gate 			if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) {
21950Sstevel@tonic-gate 				rmvq_noenab(q, mp);
21960Sstevel@tonic-gate 				freemsg(mp);
21970Sstevel@tonic-gate 				flushed = B_TRUE;
21980Sstevel@tonic-gate 			}
21990Sstevel@tonic-gate 			mp = nmp;
22000Sstevel@tonic-gate 		}
22010Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
22020Sstevel@tonic-gate 
22030Sstevel@tonic-gate 		/*
22040Sstevel@tonic-gate 		 * If any mblk(s) has been freed, we know that qbackenable()
22050Sstevel@tonic-gate 		 * will need to be called.
22060Sstevel@tonic-gate 		 */
22070Sstevel@tonic-gate 		if (flushed)
2208235Smicheng 			qbackenable(q, pri);
22090Sstevel@tonic-gate 	}
22100Sstevel@tonic-gate }
22110Sstevel@tonic-gate 
22120Sstevel@tonic-gate /*
22130Sstevel@tonic-gate  * Return 1 if the queue is not full.  If the queue is full, return
22140Sstevel@tonic-gate  * 0 (may not put message) and set QWANTW flag (caller wants to write
22150Sstevel@tonic-gate  * to the queue).
22160Sstevel@tonic-gate  */
22170Sstevel@tonic-gate int
22180Sstevel@tonic-gate canput(queue_t *q)
22190Sstevel@tonic-gate {
22200Sstevel@tonic-gate 	TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q);
22210Sstevel@tonic-gate 
22220Sstevel@tonic-gate 	/* this is for loopback transports, they should not do a canput */
22230Sstevel@tonic-gate 	ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv));
22240Sstevel@tonic-gate 
22250Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
22260Sstevel@tonic-gate 	q = q->q_nfsrv;
22270Sstevel@tonic-gate 
22280Sstevel@tonic-gate 	if (!(q->q_flag & QFULL)) {
22290Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
22300Sstevel@tonic-gate 		return (1);
22310Sstevel@tonic-gate 	}
22320Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
22330Sstevel@tonic-gate 	if (q->q_flag & QFULL) {
22340Sstevel@tonic-gate 		q->q_flag |= QWANTW;
22350Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
22360Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0);
22370Sstevel@tonic-gate 		return (0);
22380Sstevel@tonic-gate 	}
22390Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
22400Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
22410Sstevel@tonic-gate 	return (1);
22420Sstevel@tonic-gate }
22430Sstevel@tonic-gate 
22440Sstevel@tonic-gate /*
22450Sstevel@tonic-gate  * This is the new canput for use with priority bands.  Return 1 if the
22460Sstevel@tonic-gate  * band is not full.  If the band is full, return 0 (may not put message)
22470Sstevel@tonic-gate  * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to
22480Sstevel@tonic-gate  * write to the queue).
22490Sstevel@tonic-gate  */
22500Sstevel@tonic-gate int
22510Sstevel@tonic-gate bcanput(queue_t *q, unsigned char pri)
22520Sstevel@tonic-gate {
22530Sstevel@tonic-gate 	qband_t *qbp;
22540Sstevel@tonic-gate 
22550Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri);
22560Sstevel@tonic-gate 	if (!q)
22570Sstevel@tonic-gate 		return (0);
22580Sstevel@tonic-gate 
22590Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
22600Sstevel@tonic-gate 	q = q->q_nfsrv;
22610Sstevel@tonic-gate 
22620Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
22630Sstevel@tonic-gate 	if (pri == 0) {
22640Sstevel@tonic-gate 		if (q->q_flag & QFULL) {
22650Sstevel@tonic-gate 			q->q_flag |= QWANTW;
22660Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22670Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22680Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 0);
22690Sstevel@tonic-gate 			return (0);
22700Sstevel@tonic-gate 		}
22710Sstevel@tonic-gate 	} else {	/* pri != 0 */
22720Sstevel@tonic-gate 		if (pri > q->q_nband) {
22730Sstevel@tonic-gate 			/*
22740Sstevel@tonic-gate 			 * No band exists yet, so return success.
22750Sstevel@tonic-gate 			 */
22760Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22770Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22780Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 1);
22790Sstevel@tonic-gate 			return (1);
22800Sstevel@tonic-gate 		}
22810Sstevel@tonic-gate 		qbp = q->q_bandp;
22820Sstevel@tonic-gate 		while (--pri)
22830Sstevel@tonic-gate 			qbp = qbp->qb_next;
22840Sstevel@tonic-gate 		if (qbp->qb_flag & QB_FULL) {
22850Sstevel@tonic-gate 			qbp->qb_flag |= QB_WANTW;
22860Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22870Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22880Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 0);
22890Sstevel@tonic-gate 			return (0);
22900Sstevel@tonic-gate 		}
22910Sstevel@tonic-gate 	}
22920Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
22930Sstevel@tonic-gate 	TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22940Sstevel@tonic-gate 		"bcanput:%p %X %d", q, pri, 1);
22950Sstevel@tonic-gate 	return (1);
22960Sstevel@tonic-gate }
22970Sstevel@tonic-gate 
22980Sstevel@tonic-gate /*
22990Sstevel@tonic-gate  * Put a message on a queue.
23000Sstevel@tonic-gate  *
23010Sstevel@tonic-gate  * Messages are enqueued on a priority basis.  The priority classes
23020Sstevel@tonic-gate  * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0),
23030Sstevel@tonic-gate  * and B_NORMAL (type < QPCTL && band == 0).
23040Sstevel@tonic-gate  *
23050Sstevel@tonic-gate  * Add appropriate weighted data block sizes to queue count.
23060Sstevel@tonic-gate  * If queue hits high water mark then set QFULL flag.
23070Sstevel@tonic-gate  *
23080Sstevel@tonic-gate  * If QNOENAB is not set (putq is allowed to enable the queue),
23090Sstevel@tonic-gate  * enable the queue only if the message is PRIORITY,
23100Sstevel@tonic-gate  * or the QWANTR flag is set (indicating that the service procedure
23110Sstevel@tonic-gate  * is ready to read the queue.  This implies that a service
23120Sstevel@tonic-gate  * procedure must NEVER put a high priority message back on its own
23130Sstevel@tonic-gate  * queue, as this would result in an infinite loop (!).
23140Sstevel@tonic-gate  */
23150Sstevel@tonic-gate int
23160Sstevel@tonic-gate putq(queue_t *q, mblk_t *bp)
23170Sstevel@tonic-gate {
23180Sstevel@tonic-gate 	mblk_t *tmp;
23190Sstevel@tonic-gate 	qband_t *qbp = NULL;
23200Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
23210Sstevel@tonic-gate 	kthread_id_t freezer;
23220Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
23230Sstevel@tonic-gate 
23240Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
23250Sstevel@tonic-gate 	if (freezer == curthread) {
23260Sstevel@tonic-gate 		ASSERT(frozenstr(q));
23270Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
23280Sstevel@tonic-gate 	} else
23290Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
23300Sstevel@tonic-gate 
23310Sstevel@tonic-gate 	/*
23320Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
23330Sstevel@tonic-gate 	 * allocated, do so.
23340Sstevel@tonic-gate 	 */
23350Sstevel@tonic-gate 	if (mcls == QPCTL) {
23360Sstevel@tonic-gate 		if (bp->b_band != 0)
23370Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
23380Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
23390Sstevel@tonic-gate 		int i;
23400Sstevel@tonic-gate 		qband_t **qbpp;
23410Sstevel@tonic-gate 
23420Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
23430Sstevel@tonic-gate 
23440Sstevel@tonic-gate 			/*
23450Sstevel@tonic-gate 			 * The qband structure for this priority band is
23460Sstevel@tonic-gate 			 * not on the queue yet, so we have to allocate
23470Sstevel@tonic-gate 			 * one on the fly.  It would be wasteful to
23480Sstevel@tonic-gate 			 * associate the qband structures with every
23490Sstevel@tonic-gate 			 * queue when the queues are allocated.  This is
23500Sstevel@tonic-gate 			 * because most queues will only need the normal
23510Sstevel@tonic-gate 			 * band of flow which can be described entirely
23520Sstevel@tonic-gate 			 * by the queue itself.
23530Sstevel@tonic-gate 			 */
23540Sstevel@tonic-gate 			qbpp = &q->q_bandp;
23550Sstevel@tonic-gate 			while (*qbpp)
23560Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
23570Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
23580Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
23590Sstevel@tonic-gate 					if (freezer != curthread)
23600Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
23610Sstevel@tonic-gate 					return (0);
23620Sstevel@tonic-gate 				}
23630Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
23640Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
23650Sstevel@tonic-gate 				q->q_nband++;
23660Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
23670Sstevel@tonic-gate 			}
23680Sstevel@tonic-gate 		}
23690Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
23700Sstevel@tonic-gate 		qbp = q->q_bandp;
23710Sstevel@tonic-gate 		i = bp->b_band;
23720Sstevel@tonic-gate 		while (--i)
23730Sstevel@tonic-gate 			qbp = qbp->qb_next;
23740Sstevel@tonic-gate 	}
23750Sstevel@tonic-gate 
23760Sstevel@tonic-gate 	/*
23770Sstevel@tonic-gate 	 * If queue is empty, add the message and initialize the pointers.
23780Sstevel@tonic-gate 	 * Otherwise, adjust message pointers and queue pointers based on
23790Sstevel@tonic-gate 	 * the type of the message and where it belongs on the queue.  Some
23800Sstevel@tonic-gate 	 * code is duplicated to minimize the number of conditionals and
23810Sstevel@tonic-gate 	 * hopefully minimize the amount of time this routine takes.
23820Sstevel@tonic-gate 	 */
23830Sstevel@tonic-gate 	if (!q->q_first) {
23840Sstevel@tonic-gate 		bp->b_next = NULL;
23850Sstevel@tonic-gate 		bp->b_prev = NULL;
23860Sstevel@tonic-gate 		q->q_first = bp;
23870Sstevel@tonic-gate 		q->q_last = bp;
23880Sstevel@tonic-gate 		if (qbp) {
23890Sstevel@tonic-gate 			qbp->qb_first = bp;
23900Sstevel@tonic-gate 			qbp->qb_last = bp;
23910Sstevel@tonic-gate 		}
23920Sstevel@tonic-gate 	} else if (!qbp) {	/* bp->b_band == 0 */
23930Sstevel@tonic-gate 
23940Sstevel@tonic-gate 		/*
23950Sstevel@tonic-gate 		 * If queue class of message is less than or equal to
23960Sstevel@tonic-gate 		 * that of the last one on the queue, tack on to the end.
23970Sstevel@tonic-gate 		 */
23980Sstevel@tonic-gate 		tmp = q->q_last;
23990Sstevel@tonic-gate 		if (mcls <= (int)queclass(tmp)) {
24000Sstevel@tonic-gate 			bp->b_next = NULL;
24010Sstevel@tonic-gate 			bp->b_prev = tmp;
24020Sstevel@tonic-gate 			tmp->b_next = bp;
24030Sstevel@tonic-gate 			q->q_last = bp;
24040Sstevel@tonic-gate 		} else {
24050Sstevel@tonic-gate 			tmp = q->q_first;
24060Sstevel@tonic-gate 			while ((int)queclass(tmp) >= mcls)
24070Sstevel@tonic-gate 				tmp = tmp->b_next;
24080Sstevel@tonic-gate 
24090Sstevel@tonic-gate 			/*
24100Sstevel@tonic-gate 			 * Insert bp before tmp.
24110Sstevel@tonic-gate 			 */
24120Sstevel@tonic-gate 			bp->b_next = tmp;
24130Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
24140Sstevel@tonic-gate 			if (tmp->b_prev)
24150Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
24160Sstevel@tonic-gate 			else
24170Sstevel@tonic-gate 				q->q_first = bp;
24180Sstevel@tonic-gate 			tmp->b_prev = bp;
24190Sstevel@tonic-gate 		}
24200Sstevel@tonic-gate 	} else {		/* bp->b_band != 0 */
24210Sstevel@tonic-gate 		if (qbp->qb_first) {
24220Sstevel@tonic-gate 			tmp = qbp->qb_last;
24230Sstevel@tonic-gate 
24240Sstevel@tonic-gate 			/*
24250Sstevel@tonic-gate 			 * Insert bp after the last message in this band.
24260Sstevel@tonic-gate 			 */
24270Sstevel@tonic-gate 			bp->b_next = tmp->b_next;
24280Sstevel@tonic-gate 			if (tmp->b_next)
24290Sstevel@tonic-gate 				tmp->b_next->b_prev = bp;
24300Sstevel@tonic-gate 			else
24310Sstevel@tonic-gate 				q->q_last = bp;
24320Sstevel@tonic-gate 			bp->b_prev = tmp;
24330Sstevel@tonic-gate 			tmp->b_next = bp;
24340Sstevel@tonic-gate 		} else {
24350Sstevel@tonic-gate 			tmp = q->q_last;
24360Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
24370Sstevel@tonic-gate 			    (bp->b_band <= tmp->b_band)) {
24380Sstevel@tonic-gate 
24390Sstevel@tonic-gate 				/*
24400Sstevel@tonic-gate 				 * Tack bp on end of queue.
24410Sstevel@tonic-gate 				 */
24420Sstevel@tonic-gate 				bp->b_next = NULL;
24430Sstevel@tonic-gate 				bp->b_prev = tmp;
24440Sstevel@tonic-gate 				tmp->b_next = bp;
24450Sstevel@tonic-gate 				q->q_last = bp;
24460Sstevel@tonic-gate 			} else {
24470Sstevel@tonic-gate 				tmp = q->q_first;
24480Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
24490Sstevel@tonic-gate 					tmp = tmp->b_next;
24500Sstevel@tonic-gate 				while (tmp->b_band >= bp->b_band)
24510Sstevel@tonic-gate 					tmp = tmp->b_next;
24520Sstevel@tonic-gate 
24530Sstevel@tonic-gate 				/*
24540Sstevel@tonic-gate 				 * Insert bp before tmp.
24550Sstevel@tonic-gate 				 */
24560Sstevel@tonic-gate 				bp->b_next = tmp;
24570Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
24580Sstevel@tonic-gate 				if (tmp->b_prev)
24590Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
24600Sstevel@tonic-gate 				else
24610Sstevel@tonic-gate 					q->q_first = bp;
24620Sstevel@tonic-gate 				tmp->b_prev = bp;
24630Sstevel@tonic-gate 			}
24640Sstevel@tonic-gate 			qbp->qb_first = bp;
24650Sstevel@tonic-gate 		}
24660Sstevel@tonic-gate 		qbp->qb_last = bp;
24670Sstevel@tonic-gate 	}
24680Sstevel@tonic-gate 
24690Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
24700Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
2471741Smasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
24720Sstevel@tonic-gate 		mblkcnt++;
24730Sstevel@tonic-gate 	}
2474741Smasputra 
24750Sstevel@tonic-gate 	if (qbp) {
24760Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
24770Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
24780Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
24790Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
24800Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
24810Sstevel@tonic-gate 		}
24820Sstevel@tonic-gate 	} else {
24830Sstevel@tonic-gate 		q->q_count += bytecnt;
24840Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
24850Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
24860Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
24870Sstevel@tonic-gate 			q->q_flag |= QFULL;
24880Sstevel@tonic-gate 		}
24890Sstevel@tonic-gate 	}
24900Sstevel@tonic-gate 
24910Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, NULL);
24920Sstevel@tonic-gate 
24930Sstevel@tonic-gate 	if ((mcls > QNORM) ||
24940Sstevel@tonic-gate 	    (canenable(q) && (q->q_flag & QWANTR || bp->b_band)))
24950Sstevel@tonic-gate 		qenable_locked(q);
24960Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
24970Sstevel@tonic-gate 	if (freezer != curthread)
24980Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
24990Sstevel@tonic-gate 
25000Sstevel@tonic-gate 	return (1);
25010Sstevel@tonic-gate }
25020Sstevel@tonic-gate 
25030Sstevel@tonic-gate /*
25040Sstevel@tonic-gate  * Put stuff back at beginning of Q according to priority order.
25050Sstevel@tonic-gate  * See comment on putq above for details.
25060Sstevel@tonic-gate  */
25070Sstevel@tonic-gate int
25080Sstevel@tonic-gate putbq(queue_t *q, mblk_t *bp)
25090Sstevel@tonic-gate {
25100Sstevel@tonic-gate 	mblk_t *tmp;
25110Sstevel@tonic-gate 	qband_t *qbp = NULL;
25120Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
25130Sstevel@tonic-gate 	kthread_id_t freezer;
25140Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
25150Sstevel@tonic-gate 
25160Sstevel@tonic-gate 	ASSERT(q && bp);
25170Sstevel@tonic-gate 	ASSERT(bp->b_next == NULL);
25180Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
25190Sstevel@tonic-gate 	if (freezer == curthread) {
25200Sstevel@tonic-gate 		ASSERT(frozenstr(q));
25210Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
25220Sstevel@tonic-gate 	} else
25230Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
25240Sstevel@tonic-gate 
25250Sstevel@tonic-gate 	/*
25260Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
25270Sstevel@tonic-gate 	 * allocated, do so.
25280Sstevel@tonic-gate 	 */
25290Sstevel@tonic-gate 	if (mcls == QPCTL) {
25300Sstevel@tonic-gate 		if (bp->b_band != 0)
25310Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
25320Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
25330Sstevel@tonic-gate 		int i;
25340Sstevel@tonic-gate 		qband_t **qbpp;
25350Sstevel@tonic-gate 
25360Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
25370Sstevel@tonic-gate 			qbpp = &q->q_bandp;
25380Sstevel@tonic-gate 			while (*qbpp)
25390Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
25400Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
25410Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
25420Sstevel@tonic-gate 					if (freezer != curthread)
25430Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
25440Sstevel@tonic-gate 					return (0);
25450Sstevel@tonic-gate 				}
25460Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
25470Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
25480Sstevel@tonic-gate 				q->q_nband++;
25490Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
25500Sstevel@tonic-gate 			}
25510Sstevel@tonic-gate 		}
25520Sstevel@tonic-gate 		qbp = q->q_bandp;
25530Sstevel@tonic-gate 		i = bp->b_band;
25540Sstevel@tonic-gate 		while (--i)
25550Sstevel@tonic-gate 			qbp = qbp->qb_next;
25560Sstevel@tonic-gate 	}
25570Sstevel@tonic-gate 
25580Sstevel@tonic-gate 	/*
25590Sstevel@tonic-gate 	 * If queue is empty or if message is high priority,
25600Sstevel@tonic-gate 	 * place on the front of the queue.
25610Sstevel@tonic-gate 	 */
25620Sstevel@tonic-gate 	tmp = q->q_first;
25630Sstevel@tonic-gate 	if ((!tmp) || (mcls == QPCTL)) {
25640Sstevel@tonic-gate 		bp->b_next = tmp;
25650Sstevel@tonic-gate 		if (tmp)
25660Sstevel@tonic-gate 			tmp->b_prev = bp;
25670Sstevel@tonic-gate 		else
25680Sstevel@tonic-gate 			q->q_last = bp;
25690Sstevel@tonic-gate 		q->q_first = bp;
25700Sstevel@tonic-gate 		bp->b_prev = NULL;
25710Sstevel@tonic-gate 		if (qbp) {
25720Sstevel@tonic-gate 			qbp->qb_first = bp;
25730Sstevel@tonic-gate 			qbp->qb_last = bp;
25740Sstevel@tonic-gate 		}
25750Sstevel@tonic-gate 	} else if (qbp) {	/* bp->b_band != 0 */
25760Sstevel@tonic-gate 		tmp = qbp->qb_first;
25770Sstevel@tonic-gate 		if (tmp) {
25780Sstevel@tonic-gate 
25790Sstevel@tonic-gate 			/*
25800Sstevel@tonic-gate 			 * Insert bp before the first message in this band.
25810Sstevel@tonic-gate 			 */
25820Sstevel@tonic-gate 			bp->b_next = tmp;
25830Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
25840Sstevel@tonic-gate 			if (tmp->b_prev)
25850Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
25860Sstevel@tonic-gate 			else
25870Sstevel@tonic-gate 				q->q_first = bp;
25880Sstevel@tonic-gate 			tmp->b_prev = bp;
25890Sstevel@tonic-gate 		} else {
25900Sstevel@tonic-gate 			tmp = q->q_last;
25910Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
25920Sstevel@tonic-gate 			    (bp->b_band < tmp->b_band)) {
25930Sstevel@tonic-gate 
25940Sstevel@tonic-gate 				/*
25950Sstevel@tonic-gate 				 * Tack bp on end of queue.
25960Sstevel@tonic-gate 				 */
25970Sstevel@tonic-gate 				bp->b_next = NULL;
25980Sstevel@tonic-gate 				bp->b_prev = tmp;
25990Sstevel@tonic-gate 				tmp->b_next = bp;
26000Sstevel@tonic-gate 				q->q_last = bp;
26010Sstevel@tonic-gate 			} else {
26020Sstevel@tonic-gate 				tmp = q->q_first;
26030Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
26040Sstevel@tonic-gate 					tmp = tmp->b_next;
26050Sstevel@tonic-gate 				while (tmp->b_band > bp->b_band)
26060Sstevel@tonic-gate 					tmp = tmp->b_next;
26070Sstevel@tonic-gate 
26080Sstevel@tonic-gate 				/*
26090Sstevel@tonic-gate 				 * Insert bp before tmp.
26100Sstevel@tonic-gate 				 */
26110Sstevel@tonic-gate 				bp->b_next = tmp;
26120Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
26130Sstevel@tonic-gate 				if (tmp->b_prev)
26140Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
26150Sstevel@tonic-gate 				else
26160Sstevel@tonic-gate 					q->q_first = bp;
26170Sstevel@tonic-gate 				tmp->b_prev = bp;
26180Sstevel@tonic-gate 			}
26190Sstevel@tonic-gate 			qbp->qb_last = bp;
26200Sstevel@tonic-gate 		}
26210Sstevel@tonic-gate 		qbp->qb_first = bp;
26220Sstevel@tonic-gate 	} else {		/* bp->b_band == 0 && !QPCTL */
26230Sstevel@tonic-gate 
26240Sstevel@tonic-gate 		/*
26250Sstevel@tonic-gate 		 * If the queue class or band is less than that of the last
26260Sstevel@tonic-gate 		 * message on the queue, tack bp on the end of the queue.
26270Sstevel@tonic-gate 		 */
26280Sstevel@tonic-gate 		tmp = q->q_last;
26290Sstevel@tonic-gate 		if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) {
26300Sstevel@tonic-gate 			bp->b_next = NULL;
26310Sstevel@tonic-gate 			bp->b_prev = tmp;
26320Sstevel@tonic-gate 			tmp->b_next = bp;
26330Sstevel@tonic-gate 			q->q_last = bp;
26340Sstevel@tonic-gate 		} else {
26350Sstevel@tonic-gate 			tmp = q->q_first;
26360Sstevel@tonic-gate 			while (tmp->b_datap->db_type >= QPCTL)
26370Sstevel@tonic-gate 				tmp = tmp->b_next;
26380Sstevel@tonic-gate 			while (tmp->b_band > bp->b_band)
26390Sstevel@tonic-gate 				tmp = tmp->b_next;
26400Sstevel@tonic-gate 
26410Sstevel@tonic-gate 			/*
26420Sstevel@tonic-gate 			 * Insert bp before tmp.
26430Sstevel@tonic-gate 			 */
26440Sstevel@tonic-gate 			bp->b_next = tmp;
26450Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
26460Sstevel@tonic-gate 			if (tmp->b_prev)
26470Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
26480Sstevel@tonic-gate 			else
26490Sstevel@tonic-gate 				q->q_first = bp;
26500Sstevel@tonic-gate 			tmp->b_prev = bp;
26510Sstevel@tonic-gate 		}
26520Sstevel@tonic-gate 	}
26530Sstevel@tonic-gate 
26540Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
26550Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
2656741Smasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
26570Sstevel@tonic-gate 		mblkcnt++;
26580Sstevel@tonic-gate 	}
26590Sstevel@tonic-gate 	if (qbp) {
26600Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
26610Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
26620Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
26630Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
26640Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
26650Sstevel@tonic-gate 		}
26660Sstevel@tonic-gate 	} else {
26670Sstevel@tonic-gate 		q->q_count += bytecnt;
26680Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
26690Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
26700Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
26710Sstevel@tonic-gate 			q->q_flag |= QFULL;
26720Sstevel@tonic-gate 		}
26730Sstevel@tonic-gate 	}
26740Sstevel@tonic-gate 
26750Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, NULL);
26760Sstevel@tonic-gate 
26770Sstevel@tonic-gate 	if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR)))
26780Sstevel@tonic-gate 		qenable_locked(q);
26790Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
26800Sstevel@tonic-gate 	if (freezer != curthread)
26810Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
26820Sstevel@tonic-gate 
26830Sstevel@tonic-gate 	return (1);
26840Sstevel@tonic-gate }
26850Sstevel@tonic-gate 
26860Sstevel@tonic-gate /*
26870Sstevel@tonic-gate  * Insert a message before an existing message on the queue.  If the
26880Sstevel@tonic-gate  * existing message is NULL, the new messages is placed on the end of
26890Sstevel@tonic-gate  * the queue.  The queue class of the new message is ignored.  However,
26900Sstevel@tonic-gate  * the priority band of the new message must adhere to the following
26910Sstevel@tonic-gate  * ordering:
26920Sstevel@tonic-gate  *
26930Sstevel@tonic-gate  *	emp->b_prev->b_band >= mp->b_band >= emp->b_band.
26940Sstevel@tonic-gate  *
26950Sstevel@tonic-gate  * All flow control parameters are updated.
26960Sstevel@tonic-gate  *
26970Sstevel@tonic-gate  * insq can be called with the stream frozen, but other utility functions
26980Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
26990Sstevel@tonic-gate  */
27000Sstevel@tonic-gate int
27010Sstevel@tonic-gate insq(queue_t *q, mblk_t *emp, mblk_t *mp)
27020Sstevel@tonic-gate {
27030Sstevel@tonic-gate 	mblk_t *tmp;
27040Sstevel@tonic-gate 	qband_t *qbp = NULL;
27050Sstevel@tonic-gate 	int mcls = (int)queclass(mp);
27060Sstevel@tonic-gate 	kthread_id_t freezer;
27070Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
27080Sstevel@tonic-gate 
27090Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
27100Sstevel@tonic-gate 	if (freezer == curthread) {
27110Sstevel@tonic-gate 		ASSERT(frozenstr(q));
27120Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
27130Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
27140Sstevel@tonic-gate 		/* Don't drop lock on exit */
27150Sstevel@tonic-gate 		freezer = curthread;
27160Sstevel@tonic-gate 	} else
27170Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
27180Sstevel@tonic-gate 
27190Sstevel@tonic-gate 	if (mcls == QPCTL) {
27200Sstevel@tonic-gate 		if (mp->b_band != 0)
27210Sstevel@tonic-gate 			mp->b_band = 0;		/* force to be correct */
27220Sstevel@tonic-gate 		if (emp && emp->b_prev &&
27230Sstevel@tonic-gate 		    (emp->b_prev->b_datap->db_type < QPCTL))
27240Sstevel@tonic-gate 			goto badord;
27250Sstevel@tonic-gate 	}
27260Sstevel@tonic-gate 	if (emp) {
27270Sstevel@tonic-gate 		if (((mcls == QNORM) && (mp->b_band < emp->b_band)) ||
27280Sstevel@tonic-gate 		    (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) &&
27290Sstevel@tonic-gate 		    (emp->b_prev->b_band < mp->b_band))) {
27300Sstevel@tonic-gate 			goto badord;
27310Sstevel@tonic-gate 		}
27320Sstevel@tonic-gate 	} else {
27330Sstevel@tonic-gate 		tmp = q->q_last;
27340Sstevel@tonic-gate 		if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) {
27350Sstevel@tonic-gate badord:
27360Sstevel@tonic-gate 			cmn_err(CE_WARN,
27370Sstevel@tonic-gate 			    "insq: attempt to insert message out of order "
27380Sstevel@tonic-gate 			    "on q %p", (void *)q);
27390Sstevel@tonic-gate 			if (freezer != curthread)
27400Sstevel@tonic-gate 				mutex_exit(QLOCK(q));
27410Sstevel@tonic-gate 			return (0);
27420Sstevel@tonic-gate 		}
27430Sstevel@tonic-gate 	}
27440Sstevel@tonic-gate 
27450Sstevel@tonic-gate 	if (mp->b_band != 0) {
27460Sstevel@tonic-gate 		int i;
27470Sstevel@tonic-gate 		qband_t **qbpp;
27480Sstevel@tonic-gate 
27490Sstevel@tonic-gate 		if (mp->b_band > q->q_nband) {
27500Sstevel@tonic-gate 			qbpp = &q->q_bandp;
27510Sstevel@tonic-gate 			while (*qbpp)
27520Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
27530Sstevel@tonic-gate 			while (mp->b_band > q->q_nband) {
27540Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
27550Sstevel@tonic-gate 					if (freezer != curthread)
27560Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
27570Sstevel@tonic-gate 					return (0);
27580Sstevel@tonic-gate 				}
27590Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
27600Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
27610Sstevel@tonic-gate 				q->q_nband++;
27620Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
27630Sstevel@tonic-gate 			}
27640Sstevel@tonic-gate 		}
27650Sstevel@tonic-gate 		qbp = q->q_bandp;
27660Sstevel@tonic-gate 		i = mp->b_band;
27670Sstevel@tonic-gate 		while (--i)
27680Sstevel@tonic-gate 			qbp = qbp->qb_next;
27690Sstevel@tonic-gate 	}
27700Sstevel@tonic-gate 
27710Sstevel@tonic-gate 	if ((mp->b_next = emp) != NULL) {
27720Sstevel@tonic-gate 		if ((mp->b_prev = emp->b_prev) != NULL)
27730Sstevel@tonic-gate 			emp->b_prev->b_next = mp;
27740Sstevel@tonic-gate 		else
27750Sstevel@tonic-gate 			q->q_first = mp;
27760Sstevel@tonic-gate 		emp->b_prev = mp;
27770Sstevel@tonic-gate 	} else {
27780Sstevel@tonic-gate 		if ((mp->b_prev = q->q_last) != NULL)
27790Sstevel@tonic-gate 			q->q_last->b_next = mp;
27800Sstevel@tonic-gate 		else
27810Sstevel@tonic-gate 			q->q_first = mp;
27820Sstevel@tonic-gate 		q->q_last = mp;
27830Sstevel@tonic-gate 	}
27840Sstevel@tonic-gate 
27850Sstevel@tonic-gate 	/* Get mblk and byte count for q_count accounting */
27860Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
2787741Smasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
27880Sstevel@tonic-gate 		mblkcnt++;
27890Sstevel@tonic-gate 	}
27900Sstevel@tonic-gate 
27910Sstevel@tonic-gate 	if (qbp) {	/* adjust qband pointers and count */
27920Sstevel@tonic-gate 		if (!qbp->qb_first) {
27930Sstevel@tonic-gate 			qbp->qb_first = mp;
27940Sstevel@tonic-gate 			qbp->qb_last = mp;
27950Sstevel@tonic-gate 		} else {
27960Sstevel@tonic-gate 			if (mp->b_prev == NULL || (mp->b_prev != NULL &&
27970Sstevel@tonic-gate 			    (mp->b_prev->b_band != mp->b_band)))
27980Sstevel@tonic-gate 				qbp->qb_first = mp;
27990Sstevel@tonic-gate 			else if (mp->b_next == NULL || (mp->b_next != NULL &&
28000Sstevel@tonic-gate 			    (mp->b_next->b_band != mp->b_band)))
28010Sstevel@tonic-gate 				qbp->qb_last = mp;
28020Sstevel@tonic-gate 		}
28030Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
28040Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
28050Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
28060Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
28070Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
28080Sstevel@tonic-gate 		}
28090Sstevel@tonic-gate 	} else {
28100Sstevel@tonic-gate 		q->q_count += bytecnt;
28110Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
28120Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
28130Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
28140Sstevel@tonic-gate 			q->q_flag |= QFULL;
28150Sstevel@tonic-gate 		}
28160Sstevel@tonic-gate 	}
28170Sstevel@tonic-gate 
28180Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_INSQ, NULL);
28190Sstevel@tonic-gate 
28200Sstevel@tonic-gate 	if (canenable(q) && (q->q_flag & QWANTR))
28210Sstevel@tonic-gate 		qenable_locked(q);
28220Sstevel@tonic-gate 
28230Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
28240Sstevel@tonic-gate 	if (freezer != curthread)
28250Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
28260Sstevel@tonic-gate 
28270Sstevel@tonic-gate 	return (1);
28280Sstevel@tonic-gate }
28290Sstevel@tonic-gate 
28300Sstevel@tonic-gate /*
28310Sstevel@tonic-gate  * Create and put a control message on queue.
28320Sstevel@tonic-gate  */
28330Sstevel@tonic-gate int
28340Sstevel@tonic-gate putctl(queue_t *q, int type)
28350Sstevel@tonic-gate {
28360Sstevel@tonic-gate 	mblk_t *bp;
28370Sstevel@tonic-gate 
28380Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28390Sstevel@tonic-gate 	    (bp = allocb_tryhard(0)) == NULL)
28400Sstevel@tonic-gate 		return (0);
28410Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char) type;
28420Sstevel@tonic-gate 
28430Sstevel@tonic-gate 	put(q, bp);
28440Sstevel@tonic-gate 
28450Sstevel@tonic-gate 	return (1);
28460Sstevel@tonic-gate }
28470Sstevel@tonic-gate 
28480Sstevel@tonic-gate /*
28490Sstevel@tonic-gate  * Control message with a single-byte parameter
28500Sstevel@tonic-gate  */
28510Sstevel@tonic-gate int
28520Sstevel@tonic-gate putctl1(queue_t *q, int type, int param)
28530Sstevel@tonic-gate {
28540Sstevel@tonic-gate 	mblk_t *bp;
28550Sstevel@tonic-gate 
28560Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28570Sstevel@tonic-gate 	    (bp = allocb_tryhard(1)) == NULL)
28580Sstevel@tonic-gate 		return (0);
28590Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28600Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
28610Sstevel@tonic-gate 
28620Sstevel@tonic-gate 	put(q, bp);
28630Sstevel@tonic-gate 
28640Sstevel@tonic-gate 	return (1);
28650Sstevel@tonic-gate }
28660Sstevel@tonic-gate 
28670Sstevel@tonic-gate int
28680Sstevel@tonic-gate putnextctl1(queue_t *q, int type, int param)
28690Sstevel@tonic-gate {
28700Sstevel@tonic-gate 	mblk_t *bp;
28710Sstevel@tonic-gate 
28720Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28730Sstevel@tonic-gate 		((bp = allocb_tryhard(1)) == NULL))
28740Sstevel@tonic-gate 		return (0);
28750Sstevel@tonic-gate 
28760Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28770Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
28780Sstevel@tonic-gate 
28790Sstevel@tonic-gate 	putnext(q, bp);
28800Sstevel@tonic-gate 
28810Sstevel@tonic-gate 	return (1);
28820Sstevel@tonic-gate }
28830Sstevel@tonic-gate 
28840Sstevel@tonic-gate int
28850Sstevel@tonic-gate putnextctl(queue_t *q, int type)
28860Sstevel@tonic-gate {
28870Sstevel@tonic-gate 	mblk_t *bp;
28880Sstevel@tonic-gate 
28890Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28900Sstevel@tonic-gate 		((bp = allocb_tryhard(0)) == NULL))
28910Sstevel@tonic-gate 		return (0);
28920Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28930Sstevel@tonic-gate 
28940Sstevel@tonic-gate 	putnext(q, bp);
28950Sstevel@tonic-gate 
28960Sstevel@tonic-gate 	return (1);
28970Sstevel@tonic-gate }
28980Sstevel@tonic-gate 
28990Sstevel@tonic-gate /*
29000Sstevel@tonic-gate  * Return the queue upstream from this one
29010Sstevel@tonic-gate  */
29020Sstevel@tonic-gate queue_t *
29030Sstevel@tonic-gate backq(queue_t *q)
29040Sstevel@tonic-gate {
29050Sstevel@tonic-gate 	q = _OTHERQ(q);
29060Sstevel@tonic-gate 	if (q->q_next) {
29070Sstevel@tonic-gate 		q = q->q_next;
29080Sstevel@tonic-gate 		return (_OTHERQ(q));
29090Sstevel@tonic-gate 	}
29100Sstevel@tonic-gate 	return (NULL);
29110Sstevel@tonic-gate }
29120Sstevel@tonic-gate 
29130Sstevel@tonic-gate /*
29140Sstevel@tonic-gate  * Send a block back up the queue in reverse from this
29150Sstevel@tonic-gate  * one (e.g. to respond to ioctls)
29160Sstevel@tonic-gate  */
29170Sstevel@tonic-gate void
29180Sstevel@tonic-gate qreply(queue_t *q, mblk_t *bp)
29190Sstevel@tonic-gate {
29200Sstevel@tonic-gate 	ASSERT(q && bp);
29210Sstevel@tonic-gate 
29220Sstevel@tonic-gate 	putnext(_OTHERQ(q), bp);
29230Sstevel@tonic-gate }
29240Sstevel@tonic-gate 
29250Sstevel@tonic-gate /*
29260Sstevel@tonic-gate  * Streams Queue Scheduling
29270Sstevel@tonic-gate  *
29280Sstevel@tonic-gate  * Queues are enabled through qenable() when they have messages to
29290Sstevel@tonic-gate  * process.  They are serviced by queuerun(), which runs each enabled
29300Sstevel@tonic-gate  * queue's service procedure.  The call to queuerun() is processor
29310Sstevel@tonic-gate  * dependent - the general principle is that it be run whenever a queue
29320Sstevel@tonic-gate  * is enabled but before returning to user level.  For system calls,
29330Sstevel@tonic-gate  * the function runqueues() is called if their action causes a queue
29340Sstevel@tonic-gate  * to be enabled.  For device interrupts, queuerun() should be
29350Sstevel@tonic-gate  * called before returning from the last level of interrupt.  Beyond
29360Sstevel@tonic-gate  * this, no timing assumptions should be made about queue scheduling.
29370Sstevel@tonic-gate  */
29380Sstevel@tonic-gate 
29390Sstevel@tonic-gate /*
29400Sstevel@tonic-gate  * Enable a queue: put it on list of those whose service procedures are
29410Sstevel@tonic-gate  * ready to run and set up the scheduling mechanism.
29420Sstevel@tonic-gate  * The broadcast is done outside the mutex -> to avoid the woken thread
29430Sstevel@tonic-gate  * from contending with the mutex. This is OK 'cos the queue has been
29440Sstevel@tonic-gate  * enqueued on the runlist and flagged safely at this point.
29450Sstevel@tonic-gate  */
29460Sstevel@tonic-gate void
29470Sstevel@tonic-gate qenable(queue_t *q)
29480Sstevel@tonic-gate {
29490Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29500Sstevel@tonic-gate 	qenable_locked(q);
29510Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29520Sstevel@tonic-gate }
29530Sstevel@tonic-gate /*
29540Sstevel@tonic-gate  * Return number of messages on queue
29550Sstevel@tonic-gate  */
29560Sstevel@tonic-gate int
29570Sstevel@tonic-gate qsize(queue_t *qp)
29580Sstevel@tonic-gate {
29590Sstevel@tonic-gate 	int count = 0;
29600Sstevel@tonic-gate 	mblk_t *mp;
29610Sstevel@tonic-gate 
29620Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
29630Sstevel@tonic-gate 	for (mp = qp->q_first; mp; mp = mp->b_next)
29640Sstevel@tonic-gate 		count++;
29650Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
29660Sstevel@tonic-gate 	return (count);
29670Sstevel@tonic-gate }
29680Sstevel@tonic-gate 
29690Sstevel@tonic-gate /*
29700Sstevel@tonic-gate  * noenable - set queue so that putq() will not enable it.
29710Sstevel@tonic-gate  * enableok - set queue so that putq() can enable it.
29720Sstevel@tonic-gate  */
29730Sstevel@tonic-gate void
29740Sstevel@tonic-gate noenable(queue_t *q)
29750Sstevel@tonic-gate {
29760Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29770Sstevel@tonic-gate 	q->q_flag |= QNOENB;
29780Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29790Sstevel@tonic-gate }
29800Sstevel@tonic-gate 
29810Sstevel@tonic-gate void
29820Sstevel@tonic-gate enableok(queue_t *q)
29830Sstevel@tonic-gate {
29840Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29850Sstevel@tonic-gate 	q->q_flag &= ~QNOENB;
29860Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29870Sstevel@tonic-gate }
29880Sstevel@tonic-gate 
29890Sstevel@tonic-gate /*
29900Sstevel@tonic-gate  * Set queue fields.
29910Sstevel@tonic-gate  */
29920Sstevel@tonic-gate int
29930Sstevel@tonic-gate strqset(queue_t *q, qfields_t what, unsigned char pri, intptr_t val)
29940Sstevel@tonic-gate {
29950Sstevel@tonic-gate 	qband_t *qbp = NULL;
29960Sstevel@tonic-gate 	queue_t	*wrq;
29970Sstevel@tonic-gate 	int error = 0;
29980Sstevel@tonic-gate 	kthread_id_t freezer;
29990Sstevel@tonic-gate 
30000Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
30010Sstevel@tonic-gate 	if (freezer == curthread) {
30020Sstevel@tonic-gate 		ASSERT(frozenstr(q));
30030Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
30040Sstevel@tonic-gate 	} else
30050Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
30060Sstevel@tonic-gate 
30070Sstevel@tonic-gate 	if (what >= QBAD) {
30080Sstevel@tonic-gate 		error = EINVAL;
30090Sstevel@tonic-gate 		goto done;
30100Sstevel@tonic-gate 	}
30110Sstevel@tonic-gate 	if (pri != 0) {
30120Sstevel@tonic-gate 		int i;
30130Sstevel@tonic-gate 		qband_t **qbpp;
30140Sstevel@tonic-gate 
30150Sstevel@tonic-gate 		if (pri > q->q_nband) {
30160Sstevel@tonic-gate 			qbpp = &q->q_bandp;
30170Sstevel@tonic-gate 			while (*qbpp)
30180Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
30190Sstevel@tonic-gate 			while (pri > q->q_nband) {
30200Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
30210Sstevel@tonic-gate 					error = EAGAIN;
30220Sstevel@tonic-gate 					goto done;
30230Sstevel@tonic-gate 				}
30240Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
30250Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
30260Sstevel@tonic-gate 				q->q_nband++;
30270Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
30280Sstevel@tonic-gate 			}
30290Sstevel@tonic-gate 		}
30300Sstevel@tonic-gate 		qbp = q->q_bandp;
30310Sstevel@tonic-gate 		i = pri;
30320Sstevel@tonic-gate 		while (--i)
30330Sstevel@tonic-gate 			qbp = qbp->qb_next;
30340Sstevel@tonic-gate 	}
30350Sstevel@tonic-gate 	switch (what) {
30360Sstevel@tonic-gate 
30370Sstevel@tonic-gate 	case QHIWAT:
30380Sstevel@tonic-gate 		if (qbp)
30390Sstevel@tonic-gate 			qbp->qb_hiwat = (size_t)val;
30400Sstevel@tonic-gate 		else
30410Sstevel@tonic-gate 			q->q_hiwat = (size_t)val;
30420Sstevel@tonic-gate 		break;
30430Sstevel@tonic-gate 
30440Sstevel@tonic-gate 	case QLOWAT:
30450Sstevel@tonic-gate 		if (qbp)
30460Sstevel@tonic-gate 			qbp->qb_lowat = (size_t)val;
30470Sstevel@tonic-gate 		else
30480Sstevel@tonic-gate 			q->q_lowat = (size_t)val;
30490Sstevel@tonic-gate 		break;
30500Sstevel@tonic-gate 
30510Sstevel@tonic-gate 	case QMAXPSZ:
30520Sstevel@tonic-gate 		if (qbp)
30530Sstevel@tonic-gate 			error = EINVAL;
30540Sstevel@tonic-gate 		else
30550Sstevel@tonic-gate 			q->q_maxpsz = (ssize_t)val;
30560Sstevel@tonic-gate 
30570Sstevel@tonic-gate 		/*
30580Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
30590Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
30600Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
30610Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
30620Sstevel@tonic-gate 		 */
30630Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
30640Sstevel@tonic-gate 		if (q != wrq->q_next)
30650Sstevel@tonic-gate 			break;
30660Sstevel@tonic-gate 
30670Sstevel@tonic-gate 		/*
30680Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
30690Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
30700Sstevel@tonic-gate 		 */
30710Sstevel@tonic-gate 		if (freezer != curthread) {
30720Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
30730Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
30740Sstevel@tonic-gate 		}
30750Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(wrq)));
30760Sstevel@tonic-gate 
30770Sstevel@tonic-gate 		if (strmsgsz != 0) {
30780Sstevel@tonic-gate 			if (val == INFPSZ)
30790Sstevel@tonic-gate 				val = strmsgsz;
30800Sstevel@tonic-gate 			else  {
30810Sstevel@tonic-gate 				if (STREAM(q)->sd_vnode->v_type == VFIFO)
30820Sstevel@tonic-gate 					val = MIN(PIPE_BUF, val);
30830Sstevel@tonic-gate 				else
30840Sstevel@tonic-gate 					val = MIN(strmsgsz, val);
30850Sstevel@tonic-gate 			}
30860Sstevel@tonic-gate 		}
30870Sstevel@tonic-gate 		STREAM(q)->sd_qn_maxpsz = val;
30880Sstevel@tonic-gate 		if (freezer != curthread) {
30890Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
30900Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
30910Sstevel@tonic-gate 		}
30920Sstevel@tonic-gate 		break;
30930Sstevel@tonic-gate 
30940Sstevel@tonic-gate 	case QMINPSZ:
30950Sstevel@tonic-gate 		if (qbp)
30960Sstevel@tonic-gate 			error = EINVAL;
30970Sstevel@tonic-gate 		else
30980Sstevel@tonic-gate 			q->q_minpsz = (ssize_t)val;
30990Sstevel@tonic-gate 
31000Sstevel@tonic-gate 		/*
31010Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
31020Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
31030Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
31040Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
31050Sstevel@tonic-gate 		 */
31060Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
31070Sstevel@tonic-gate 		if (q != wrq->q_next)
31080Sstevel@tonic-gate 			break;
31090Sstevel@tonic-gate 
31100Sstevel@tonic-gate 		/*
31110Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
31120Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
31130Sstevel@tonic-gate 		 */
31140Sstevel@tonic-gate 		if (freezer != curthread) {
31150Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
31160Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
31170Sstevel@tonic-gate 		}
31180Sstevel@tonic-gate 		STREAM(q)->sd_qn_minpsz = (ssize_t)val;
31190Sstevel@tonic-gate 
31200Sstevel@tonic-gate 		if (freezer != curthread) {
31210Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
31220Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
31230Sstevel@tonic-gate 		}
31240Sstevel@tonic-gate 		break;
31250Sstevel@tonic-gate 
31260Sstevel@tonic-gate 	case QSTRUIOT:
31270Sstevel@tonic-gate 		if (qbp)
31280Sstevel@tonic-gate 			error = EINVAL;
31290Sstevel@tonic-gate 		else
31300Sstevel@tonic-gate 			q->q_struiot = (ushort_t)val;
31310Sstevel@tonic-gate 		break;
31320Sstevel@tonic-gate 
31330Sstevel@tonic-gate 	case QCOUNT:
31340Sstevel@tonic-gate 	case QFIRST:
31350Sstevel@tonic-gate 	case QLAST:
31360Sstevel@tonic-gate 	case QFLAG:
31370Sstevel@tonic-gate 		error = EPERM;
31380Sstevel@tonic-gate 		break;
31390Sstevel@tonic-gate 
31400Sstevel@tonic-gate 	default:
31410Sstevel@tonic-gate 		error = EINVAL;
31420Sstevel@tonic-gate 		break;
31430Sstevel@tonic-gate 	}
31440Sstevel@tonic-gate done:
31450Sstevel@tonic-gate 	if (freezer != curthread)
31460Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
31470Sstevel@tonic-gate 	return (error);
31480Sstevel@tonic-gate }
31490Sstevel@tonic-gate 
31500Sstevel@tonic-gate /*
31510Sstevel@tonic-gate  * Get queue fields.
31520Sstevel@tonic-gate  */
31530Sstevel@tonic-gate int
31540Sstevel@tonic-gate strqget(queue_t *q, qfields_t what, unsigned char pri, void *valp)
31550Sstevel@tonic-gate {
31560Sstevel@tonic-gate 	qband_t 	*qbp = NULL;
31570Sstevel@tonic-gate 	int 		error = 0;
31580Sstevel@tonic-gate 	kthread_id_t 	freezer;
31590Sstevel@tonic-gate 
31600Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
31610Sstevel@tonic-gate 	if (freezer == curthread) {
31620Sstevel@tonic-gate 		ASSERT(frozenstr(q));
31630Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
31640Sstevel@tonic-gate 	} else
31650Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
31660Sstevel@tonic-gate 	if (what >= QBAD) {
31670Sstevel@tonic-gate 		error = EINVAL;
31680Sstevel@tonic-gate 		goto done;
31690Sstevel@tonic-gate 	}
31700Sstevel@tonic-gate 	if (pri != 0) {
31710Sstevel@tonic-gate 		int i;
31720Sstevel@tonic-gate 		qband_t **qbpp;
31730Sstevel@tonic-gate 
31740Sstevel@tonic-gate 		if (pri > q->q_nband) {
31750Sstevel@tonic-gate 			qbpp = &q->q_bandp;
31760Sstevel@tonic-gate 			while (*qbpp)
31770Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
31780Sstevel@tonic-gate 			while (pri > q->q_nband) {
31790Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
31800Sstevel@tonic-gate 					error = EAGAIN;
31810Sstevel@tonic-gate 					goto done;
31820Sstevel@tonic-gate 				}
31830Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
31840Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
31850Sstevel@tonic-gate 				q->q_nband++;
31860Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
31870Sstevel@tonic-gate 			}
31880Sstevel@tonic-gate 		}
31890Sstevel@tonic-gate 		qbp = q->q_bandp;
31900Sstevel@tonic-gate 		i = pri;
31910Sstevel@tonic-gate 		while (--i)
31920Sstevel@tonic-gate 			qbp = qbp->qb_next;
31930Sstevel@tonic-gate 	}
31940Sstevel@tonic-gate 	switch (what) {
31950Sstevel@tonic-gate 	case QHIWAT:
31960Sstevel@tonic-gate 		if (qbp)
31970Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_hiwat;
31980Sstevel@tonic-gate 		else
31990Sstevel@tonic-gate 			*(size_t *)valp = q->q_hiwat;
32000Sstevel@tonic-gate 		break;
32010Sstevel@tonic-gate 
32020Sstevel@tonic-gate 	case QLOWAT:
32030Sstevel@tonic-gate 		if (qbp)
32040Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_lowat;
32050Sstevel@tonic-gate 		else
32060Sstevel@tonic-gate 			*(size_t *)valp = q->q_lowat;
32070Sstevel@tonic-gate 		break;
32080Sstevel@tonic-gate 
32090Sstevel@tonic-gate 	case QMAXPSZ:
32100Sstevel@tonic-gate 		if (qbp)
32110Sstevel@tonic-gate 			error = EINVAL;
32120Sstevel@tonic-gate 		else
32130Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_maxpsz;
32140Sstevel@tonic-gate 		break;
32150Sstevel@tonic-gate 
32160Sstevel@tonic-gate 	case QMINPSZ:
32170Sstevel@tonic-gate 		if (qbp)
32180Sstevel@tonic-gate 			error = EINVAL;
32190Sstevel@tonic-gate 		else
32200Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_minpsz;
32210Sstevel@tonic-gate 		break;
32220Sstevel@tonic-gate 
32230Sstevel@tonic-gate 	case QCOUNT:
32240Sstevel@tonic-gate 		if (qbp)
32250Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_count;
32260Sstevel@tonic-gate 		else
32270Sstevel@tonic-gate 			*(size_t *)valp = q->q_count;
32280Sstevel@tonic-gate 		break;
32290Sstevel@tonic-gate 
32300Sstevel@tonic-gate 	case QFIRST:
32310Sstevel@tonic-gate 		if (qbp)
32320Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_first;
32330Sstevel@tonic-gate 		else
32340Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_first;
32350Sstevel@tonic-gate 		break;
32360Sstevel@tonic-gate 
32370Sstevel@tonic-gate 	case QLAST:
32380Sstevel@tonic-gate 		if (qbp)
32390Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_last;
32400Sstevel@tonic-gate 		else
32410Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_last;
32420Sstevel@tonic-gate 		break;
32430Sstevel@tonic-gate 
32440Sstevel@tonic-gate 	case QFLAG:
32450Sstevel@tonic-gate 		if (qbp)
32460Sstevel@tonic-gate 			*(uint_t *)valp = qbp->qb_flag;
32470Sstevel@tonic-gate 		else
32480Sstevel@tonic-gate 			*(uint_t *)valp = q->q_flag;
32490Sstevel@tonic-gate 		break;
32500Sstevel@tonic-gate 
32510Sstevel@tonic-gate 	case QSTRUIOT:
32520Sstevel@tonic-gate 		if (qbp)
32530Sstevel@tonic-gate 			error = EINVAL;
32540Sstevel@tonic-gate 		else
32550Sstevel@tonic-gate 			*(short *)valp = q->q_struiot;
32560Sstevel@tonic-gate 		break;
32570Sstevel@tonic-gate 
32580Sstevel@tonic-gate 	default:
32590Sstevel@tonic-gate 		error = EINVAL;
32600Sstevel@tonic-gate 		break;
32610Sstevel@tonic-gate 	}
32620Sstevel@tonic-gate done:
32630Sstevel@tonic-gate 	if (freezer != curthread)
32640Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
32650Sstevel@tonic-gate 	return (error);
32660Sstevel@tonic-gate }
32670Sstevel@tonic-gate 
32680Sstevel@tonic-gate /*
32690Sstevel@tonic-gate  * Function awakes all in cvwait/sigwait/pollwait, on one of:
32700Sstevel@tonic-gate  *	QWANTWSYNC or QWANTR or QWANTW,
32710Sstevel@tonic-gate  *
32720Sstevel@tonic-gate  * Note: for QWANTWSYNC/QWANTW and QWANTR, if no WSLEEPer or RSLEEPer then a
32730Sstevel@tonic-gate  *	 deferred wakeup will be done. Also if strpoll() in progress then a
32740Sstevel@tonic-gate  *	 deferred pollwakeup will be done.
32750Sstevel@tonic-gate  */
32760Sstevel@tonic-gate void
32770Sstevel@tonic-gate strwakeq(queue_t *q, int flag)
32780Sstevel@tonic-gate {
32790Sstevel@tonic-gate 	stdata_t 	*stp = STREAM(q);
32800Sstevel@tonic-gate 	pollhead_t 	*pl;
32810Sstevel@tonic-gate 
32820Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
32830Sstevel@tonic-gate 	pl = &stp->sd_pollist;
32840Sstevel@tonic-gate 	if (flag & QWANTWSYNC) {
32850Sstevel@tonic-gate 		ASSERT(!(q->q_flag & QREADR));
32860Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
32870Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
32880Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
32890Sstevel@tonic-gate 		} else {
32900Sstevel@tonic-gate 			stp->sd_wakeq |= WSLEEP;
32910Sstevel@tonic-gate 		}
32920Sstevel@tonic-gate 
32930Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
32940Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
32950Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
32960Sstevel@tonic-gate 
32970Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
32980Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
32990Sstevel@tonic-gate 	} else if (flag & QWANTR) {
33000Sstevel@tonic-gate 		if (stp->sd_flag & RSLEEP) {
33010Sstevel@tonic-gate 			stp->sd_flag &= ~RSLEEP;
33020Sstevel@tonic-gate 			cv_broadcast(&_RD(stp->sd_wrq)->q_wait);
33030Sstevel@tonic-gate 		} else {
33040Sstevel@tonic-gate 			stp->sd_wakeq |= RSLEEP;
33050Sstevel@tonic-gate 		}
33060Sstevel@tonic-gate 
33070Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
33080Sstevel@tonic-gate 		pollwakeup(pl, POLLIN | POLLRDNORM);
33090Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
33100Sstevel@tonic-gate 
33110Sstevel@tonic-gate 		{
33120Sstevel@tonic-gate 			int events = stp->sd_sigflags & (S_INPUT | S_RDNORM);
33130Sstevel@tonic-gate 
33140Sstevel@tonic-gate 			if (events)
33150Sstevel@tonic-gate 				strsendsig(stp->sd_siglist, events, 0, 0);
33160Sstevel@tonic-gate 		}
33170Sstevel@tonic-gate 	} else {
33180Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
33190Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
33200Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
33210Sstevel@tonic-gate 		}
33220Sstevel@tonic-gate 
33230Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
33240Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
33250Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
33260Sstevel@tonic-gate 
33270Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
33280Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
33290Sstevel@tonic-gate 	}
33300Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
33310Sstevel@tonic-gate }
33320Sstevel@tonic-gate 
33330Sstevel@tonic-gate int
33340Sstevel@tonic-gate struioget(queue_t *q, mblk_t *mp, struiod_t *dp, int noblock)
33350Sstevel@tonic-gate {
33360Sstevel@tonic-gate 	stdata_t *stp = STREAM(q);
33370Sstevel@tonic-gate 	int typ  = STRUIOT_STANDARD;
33380Sstevel@tonic-gate 	uio_t	 *uiop = &dp->d_uio;
33390Sstevel@tonic-gate 	dblk_t	 *dbp;
33400Sstevel@tonic-gate 	ssize_t	 uiocnt;
33410Sstevel@tonic-gate 	ssize_t	 cnt;
33420Sstevel@tonic-gate 	unsigned char *ptr;
33430Sstevel@tonic-gate 	ssize_t	 resid;
33440Sstevel@tonic-gate 	int	 error = 0;
33450Sstevel@tonic-gate 	on_trap_data_t otd;
33460Sstevel@tonic-gate 	queue_t	*stwrq;
33470Sstevel@tonic-gate 
33480Sstevel@tonic-gate 	/*
33490Sstevel@tonic-gate 	 * Plumbing may change while taking the type so store the
33500Sstevel@tonic-gate 	 * queue in a temporary variable. It doesn't matter even
33510Sstevel@tonic-gate 	 * if the we take the type from the previous plumbing,
33520Sstevel@tonic-gate 	 * that's because if the plumbing has changed when we were
33530Sstevel@tonic-gate 	 * holding the queue in a temporary variable, we can continue
33540Sstevel@tonic-gate 	 * processing the message the way it would have been processed
33550Sstevel@tonic-gate 	 * in the old plumbing, without any side effects but a bit
33560Sstevel@tonic-gate 	 * extra processing for partial ip header checksum.
33570Sstevel@tonic-gate 	 *
33580Sstevel@tonic-gate 	 * This has been done to avoid holding the sd_lock which is
33590Sstevel@tonic-gate 	 * very hot.
33600Sstevel@tonic-gate 	 */
33610Sstevel@tonic-gate 
33620Sstevel@tonic-gate 	stwrq = stp->sd_struiowrq;
33630Sstevel@tonic-gate 	if (stwrq)
33640Sstevel@tonic-gate 		typ = stwrq->q_struiot;
33650Sstevel@tonic-gate 
33660Sstevel@tonic-gate 	for (; (resid = uiop->uio_resid) > 0 && mp; mp = mp->b_cont) {
33670Sstevel@tonic-gate 		dbp = mp->b_datap;
33680Sstevel@tonic-gate 		ptr = (uchar_t *)(mp->b_rptr + dbp->db_cksumstuff);
33690Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
33700Sstevel@tonic-gate 		cnt = MIN(uiocnt, uiop->uio_resid);
33710Sstevel@tonic-gate 		if (!(dbp->db_struioflag & STRUIO_SPEC) ||
33720Sstevel@tonic-gate 		    (dbp->db_struioflag & STRUIO_DONE) || cnt == 0) {
33730Sstevel@tonic-gate 			/*
33740Sstevel@tonic-gate 			 * Either this mblk has already been processed
33750Sstevel@tonic-gate 			 * or there is no more room in this mblk (?).
33760Sstevel@tonic-gate 			 */
33770Sstevel@tonic-gate 			continue;
33780Sstevel@tonic-gate 		}
33790Sstevel@tonic-gate 		switch (typ) {
33800Sstevel@tonic-gate 		case STRUIOT_STANDARD:
33810Sstevel@tonic-gate 			if (noblock) {
33820Sstevel@tonic-gate 				if (on_trap(&otd, OT_DATA_ACCESS)) {
33830Sstevel@tonic-gate 					no_trap();
33840Sstevel@tonic-gate 					error = EWOULDBLOCK;
33850Sstevel@tonic-gate 					goto out;
33860Sstevel@tonic-gate 				}
33870Sstevel@tonic-gate 			}
33880Sstevel@tonic-gate 			if (error = uiomove(ptr, cnt, UIO_WRITE, uiop)) {
33890Sstevel@tonic-gate 				if (noblock)
33900Sstevel@tonic-gate 					no_trap();
33910Sstevel@tonic-gate 				goto out;
33920Sstevel@tonic-gate 			}
33930Sstevel@tonic-gate 			if (noblock)
33940Sstevel@tonic-gate 				no_trap();
33950Sstevel@tonic-gate 			break;
33960Sstevel@tonic-gate 
33970Sstevel@tonic-gate 		default:
33980Sstevel@tonic-gate 			error = EIO;
33990Sstevel@tonic-gate 			goto out;
34000Sstevel@tonic-gate 		}
34010Sstevel@tonic-gate 		dbp->db_struioflag |= STRUIO_DONE;
34020Sstevel@tonic-gate 		dbp->db_cksumstuff += cnt;
34030Sstevel@tonic-gate 	}
34040Sstevel@tonic-gate out:
34050Sstevel@tonic-gate 	if (error == EWOULDBLOCK && (resid -= uiop->uio_resid) > 0) {
34060Sstevel@tonic-gate 		/*
34070Sstevel@tonic-gate 		 * A fault has occured and some bytes were moved to the
34080Sstevel@tonic-gate 		 * current mblk, the uio_t has already been updated by
34090Sstevel@tonic-gate 		 * the appropriate uio routine, so also update the mblk
34100Sstevel@tonic-gate 		 * to reflect this in case this same mblk chain is used
34110Sstevel@tonic-gate 		 * again (after the fault has been handled).
34120Sstevel@tonic-gate 		 */
34130Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
34140Sstevel@tonic-gate 		if (uiocnt >= resid)
34150Sstevel@tonic-gate 			dbp->db_cksumstuff += resid;
34160Sstevel@tonic-gate 	}
34170Sstevel@tonic-gate 	return (error);
34180Sstevel@tonic-gate }
34190Sstevel@tonic-gate 
34200Sstevel@tonic-gate /*
34210Sstevel@tonic-gate  * Try to enter queue synchronously. Any attempt to enter a closing queue will
34220Sstevel@tonic-gate  * fails. The qp->q_rwcnt keeps track of the number of successful entries so
34230Sstevel@tonic-gate  * that removeq() will not try to close the queue while a thread is inside the
34240Sstevel@tonic-gate  * queue.
34250Sstevel@tonic-gate  */
34260Sstevel@tonic-gate static boolean_t
34270Sstevel@tonic-gate rwnext_enter(queue_t *qp)
34280Sstevel@tonic-gate {
34290Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
34300Sstevel@tonic-gate 	if (qp->q_flag & QWCLOSE) {
34310Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
34320Sstevel@tonic-gate 		return (B_FALSE);
34330Sstevel@tonic-gate 	}
34340Sstevel@tonic-gate 	qp->q_rwcnt++;
34350Sstevel@tonic-gate 	ASSERT(qp->q_rwcnt != 0);
34360Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
34370Sstevel@tonic-gate 	return (B_TRUE);
34380Sstevel@tonic-gate }
34390Sstevel@tonic-gate 
34400Sstevel@tonic-gate /*
34410Sstevel@tonic-gate  * Decrease the count of threads running in sync stream queue and wake up any
34420Sstevel@tonic-gate  * threads blocked in removeq().
34430Sstevel@tonic-gate  */
34440Sstevel@tonic-gate static void
34450Sstevel@tonic-gate rwnext_exit(queue_t *qp)
34460Sstevel@tonic-gate {
34470Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
34480Sstevel@tonic-gate 	qp->q_rwcnt--;
34490Sstevel@tonic-gate 	if (qp->q_flag & QWANTRMQSYNC) {
34500Sstevel@tonic-gate 		qp->q_flag &= ~QWANTRMQSYNC;
34510Sstevel@tonic-gate 		cv_broadcast(&qp->q_wait);
34520Sstevel@tonic-gate 	}
34530Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
34540Sstevel@tonic-gate }
34550Sstevel@tonic-gate 
34560Sstevel@tonic-gate /*
34570Sstevel@tonic-gate  * The purpose of rwnext() is to call the rw procedure of the next
34580Sstevel@tonic-gate  * (downstream) modules queue.
34590Sstevel@tonic-gate  *
34600Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
34610Sstevel@tonic-gate  *
34620Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
34630Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue sq_count is incremented and it does
34640Sstevel@tonic-gate  * not matter if any regular put entrypoints have been already entered. We
34650Sstevel@tonic-gate  * can't increment one of the sq_putcounts (instead of sq_count) because
34660Sstevel@tonic-gate  * qwait_rw won't know which counter to decrement.
34670Sstevel@tonic-gate  *
34680Sstevel@tonic-gate  * It would be reasonable to add the lockless FASTPUT logic.
34690Sstevel@tonic-gate  */
34700Sstevel@tonic-gate int
34710Sstevel@tonic-gate rwnext(queue_t *qp, struiod_t *dp)
34720Sstevel@tonic-gate {
34730Sstevel@tonic-gate 	queue_t		*nqp;
34740Sstevel@tonic-gate 	syncq_t		*sq;
34750Sstevel@tonic-gate 	uint16_t	count;
34760Sstevel@tonic-gate 	uint16_t	flags;
34770Sstevel@tonic-gate 	struct qinit	*qi;
34780Sstevel@tonic-gate 	int		(*proc)();
34790Sstevel@tonic-gate 	struct stdata	*stp;
34800Sstevel@tonic-gate 	int		isread;
34810Sstevel@tonic-gate 	int		rval;
34820Sstevel@tonic-gate 
34830Sstevel@tonic-gate 	stp = STREAM(qp);
34840Sstevel@tonic-gate 	/*
34850Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until acquiring
34860Sstevel@tonic-gate 	 * SQLOCK. Note that a read-side rwnext from the streamhead will
34870Sstevel@tonic-gate 	 * already have sd_lock acquired. In either case sd_lock is always
34880Sstevel@tonic-gate 	 * released after acquiring SQLOCK.
34890Sstevel@tonic-gate 	 *
34900Sstevel@tonic-gate 	 * The streamhead read-side holding sd_lock when calling rwnext is
34910Sstevel@tonic-gate 	 * required to prevent a race condition were M_DATA mblks flowing
34920Sstevel@tonic-gate 	 * up the read-side of the stream could be bypassed by a rwnext()
34930Sstevel@tonic-gate 	 * down-call. In this case sd_lock acts as the streamhead perimeter.
34940Sstevel@tonic-gate 	 */
34950Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
34960Sstevel@tonic-gate 		isread = 0;
34970Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
34980Sstevel@tonic-gate 		qp = nqp->q_next;
34990Sstevel@tonic-gate 	} else {
35000Sstevel@tonic-gate 		isread = 1;
35010Sstevel@tonic-gate 		if (nqp != stp->sd_wrq)
35020Sstevel@tonic-gate 			/* Not streamhead */
35030Sstevel@tonic-gate 			mutex_enter(&stp->sd_lock);
35040Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
35050Sstevel@tonic-gate 	}
35060Sstevel@tonic-gate 	qi = qp->q_qinfo;
35070Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_rwp)) {
35080Sstevel@tonic-gate 		/*
35090Sstevel@tonic-gate 		 * Not a synchronous module or no r/w procedure for this
35100Sstevel@tonic-gate 		 * queue, so just return EINVAL and let the caller handle it.
35110Sstevel@tonic-gate 		 */
35120Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
35130Sstevel@tonic-gate 		return (EINVAL);
35140Sstevel@tonic-gate 	}
35150Sstevel@tonic-gate 
35160Sstevel@tonic-gate 	if (rwnext_enter(qp) == B_FALSE) {
35170Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
35180Sstevel@tonic-gate 		return (EINVAL);
35190Sstevel@tonic-gate 	}
35200Sstevel@tonic-gate 
35210Sstevel@tonic-gate 	sq = qp->q_syncq;
35220Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
35230Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
35240Sstevel@tonic-gate 	count = sq->sq_count;
35250Sstevel@tonic-gate 	flags = sq->sq_flags;
35260Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
35270Sstevel@tonic-gate 
35280Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
35290Sstevel@tonic-gate 		/*
35300Sstevel@tonic-gate 		 * if this queue is being closed, return.
35310Sstevel@tonic-gate 		 */
35320Sstevel@tonic-gate 		if (qp->q_flag & QWCLOSE) {
35330Sstevel@tonic-gate 			mutex_exit(SQLOCK(sq));
35340Sstevel@tonic-gate 			rwnext_exit(qp);
35350Sstevel@tonic-gate 			return (EINVAL);
35360Sstevel@tonic-gate 		}
35370Sstevel@tonic-gate 
35380Sstevel@tonic-gate 		/*
35390Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
35400Sstevel@tonic-gate 		 */
35410Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
35420Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
35430Sstevel@tonic-gate 		count = sq->sq_count;
35440Sstevel@tonic-gate 		flags = sq->sq_flags;
35450Sstevel@tonic-gate 	}
35460Sstevel@tonic-gate 
35470Sstevel@tonic-gate 	if (isread == 0 && stp->sd_struiowrq == NULL ||
35480Sstevel@tonic-gate 	    isread == 1 && stp->sd_struiordq == NULL) {
35490Sstevel@tonic-gate 		/*
35500Sstevel@tonic-gate 		 * Stream plumbing changed while waiting for inner perimeter
35510Sstevel@tonic-gate 		 * so just return EINVAL and let the caller handle it.
35520Sstevel@tonic-gate 		 */
35530Sstevel@tonic-gate 		mutex_exit(SQLOCK(sq));
35540Sstevel@tonic-gate 		rwnext_exit(qp);
35550Sstevel@tonic-gate 		return (EINVAL);
35560Sstevel@tonic-gate 	}
35570Sstevel@tonic-gate 	if (!(flags & SQ_CIPUT))
35580Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
35590Sstevel@tonic-gate 	sq->sq_count = count + 1;
35600Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
35610Sstevel@tonic-gate 	/*
35620Sstevel@tonic-gate 	 * Note: The only message ordering guarantee that rwnext() makes is
35630Sstevel@tonic-gate 	 *	 for the write queue flow-control case. All others (r/w queue
35640Sstevel@tonic-gate 	 *	 with q_count > 0 (or q_first != 0)) are the resposibilty of
35650Sstevel@tonic-gate 	 *	 the queue's rw procedure. This could be genralized here buy
35660Sstevel@tonic-gate 	 *	 running the queue's service procedure, but that wouldn't be
35670Sstevel@tonic-gate 	 *	 the most efficent for all cases.
35680Sstevel@tonic-gate 	 */
35690Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
35700Sstevel@tonic-gate 	if (! isread && (qp->q_flag & QFULL)) {
35710Sstevel@tonic-gate 		/*
35720Sstevel@tonic-gate 		 * Write queue may be flow controlled. If so,
35730Sstevel@tonic-gate 		 * mark the queue for wakeup when it's not.
35740Sstevel@tonic-gate 		 */
35750Sstevel@tonic-gate 		mutex_enter(QLOCK(qp));
35760Sstevel@tonic-gate 		if (qp->q_flag & QFULL) {
35770Sstevel@tonic-gate 			qp->q_flag |= QWANTWSYNC;
35780Sstevel@tonic-gate 			mutex_exit(QLOCK(qp));
35790Sstevel@tonic-gate 			rval = EWOULDBLOCK;
35800Sstevel@tonic-gate 			goto out;
35810Sstevel@tonic-gate 		}
35820Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
35830Sstevel@tonic-gate 	}
35840Sstevel@tonic-gate 
35850Sstevel@tonic-gate 	if (! isread && dp->d_mp)
35860Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, nqp, FTEV_RWNEXT, dp->d_mp->b_rptr -
35870Sstevel@tonic-gate 		    dp->d_mp->b_datap->db_base);
35880Sstevel@tonic-gate 
35890Sstevel@tonic-gate 	rval = (*proc)(qp, dp);
35900Sstevel@tonic-gate 
35910Sstevel@tonic-gate 	if (isread && dp->d_mp)
35920Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, _RD(nqp), FTEV_RWNEXT,
35930Sstevel@tonic-gate 		    dp->d_mp->b_rptr - dp->d_mp->b_datap->db_base);
35940Sstevel@tonic-gate out:
35950Sstevel@tonic-gate 	/*
35960Sstevel@tonic-gate 	 * The queue is protected from being freed by sq_count, so it is
35970Sstevel@tonic-gate 	 * safe to call rwnext_exit and reacquire SQLOCK(sq).
35980Sstevel@tonic-gate 	 */
35990Sstevel@tonic-gate 	rwnext_exit(qp);
36000Sstevel@tonic-gate 
36010Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
36020Sstevel@tonic-gate 	flags = sq->sq_flags;
36030Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
36040Sstevel@tonic-gate 	sq->sq_count--;
36050Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
36060Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
36070Sstevel@tonic-gate 		/*
36080Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
36090Sstevel@tonic-gate 		 * in DEBUG kernel.
36100Sstevel@tonic-gate 		 */
36110Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
36120Sstevel@tonic-gate 		return (rval);
36130Sstevel@tonic-gate 	}
36140Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
36150Sstevel@tonic-gate 	/*
36160Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
36170Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
36180Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
36190Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
36200Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
36210Sstevel@tonic-gate 	 *
36220Sstevel@tonic-gate 	 * I would like to make the following assertion:
36230Sstevel@tonic-gate 	 *
36240Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
36250Sstevel@tonic-gate 	 * 	sq->sq_count == 0);
36260Sstevel@tonic-gate 	 *
36270Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
36280Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
36290Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
36300Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
36310Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
36320Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
36330Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
36340Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
36350Sstevel@tonic-gate 	 * test invalid.
36360Sstevel@tonic-gate 	 */
36370Sstevel@tonic-gate 
36380Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
36390Sstevel@tonic-gate 	if (sq->sq_flags & SQ_WANTWAKEUP) {
36400Sstevel@tonic-gate 		sq->sq_flags &= ~SQ_WANTWAKEUP;
36410Sstevel@tonic-gate 		cv_broadcast(&sq->sq_wait);
36420Sstevel@tonic-gate 	}
36430Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
36440Sstevel@tonic-gate 	return (rval);
36450Sstevel@tonic-gate }
36460Sstevel@tonic-gate 
36470Sstevel@tonic-gate /*
36480Sstevel@tonic-gate  * The purpose of infonext() is to call the info procedure of the next
36490Sstevel@tonic-gate  * (downstream) modules queue.
36500Sstevel@tonic-gate  *
36510Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
36520Sstevel@tonic-gate  *
36530Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
36540Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue regular sq_count is incremented and
36550Sstevel@tonic-gate  * it does not matter if any regular put entrypoints have been already
36560Sstevel@tonic-gate  * entered.
36570Sstevel@tonic-gate  */
36580Sstevel@tonic-gate int
36590Sstevel@tonic-gate infonext(queue_t *qp, infod_t *idp)
36600Sstevel@tonic-gate {
36610Sstevel@tonic-gate 	queue_t		*nqp;
36620Sstevel@tonic-gate 	syncq_t		*sq;
36630Sstevel@tonic-gate 	uint16_t	count;
36640Sstevel@tonic-gate 	uint16_t 	flags;
36650Sstevel@tonic-gate 	struct qinit	*qi;
36660Sstevel@tonic-gate 	int		(*proc)();
36670Sstevel@tonic-gate 	struct stdata	*stp;
36680Sstevel@tonic-gate 	int		rval;
36690Sstevel@tonic-gate 
36700Sstevel@tonic-gate 	stp = STREAM(qp);
36710Sstevel@tonic-gate 	/*
36720Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until
36730Sstevel@tonic-gate 	 * acquiring SQLOCK.
36740Sstevel@tonic-gate 	 */
36750Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
36760Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
36770Sstevel@tonic-gate 		qp = nqp->q_next;
36780Sstevel@tonic-gate 	} else {
36790Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
36800Sstevel@tonic-gate 	}
36810Sstevel@tonic-gate 	qi = qp->q_qinfo;
36820Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_infop)) {
36830Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
36840Sstevel@tonic-gate 		return (EINVAL);
36850Sstevel@tonic-gate 	}
36860Sstevel@tonic-gate 	sq = qp->q_syncq;
36870Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
36880Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
36890Sstevel@tonic-gate 	count = sq->sq_count;
36900Sstevel@tonic-gate 	flags = sq->sq_flags;
36910Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
36920Sstevel@tonic-gate 
36930Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
36940Sstevel@tonic-gate 		/*
36950Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
36960Sstevel@tonic-gate 		 */
36970Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
36980Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
36990Sstevel@tonic-gate 		count = sq->sq_count;
37000Sstevel@tonic-gate 		flags = sq->sq_flags;
37010Sstevel@tonic-gate 	}
37020Sstevel@tonic-gate 
37030Sstevel@tonic-gate 	if (! (flags & SQ_CIPUT))
37040Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
37050Sstevel@tonic-gate 	sq->sq_count = count + 1;
37060Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
37070Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
37080Sstevel@tonic-gate 
37090Sstevel@tonic-gate 	rval = (*proc)(qp, idp);
37100Sstevel@tonic-gate 
37110Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
37120Sstevel@tonic-gate 	flags = sq->sq_flags;
37130Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
37140Sstevel@tonic-gate 	sq->sq_count--;
37150Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
37160Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
37170Sstevel@tonic-gate 		/*
37180Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
37190Sstevel@tonic-gate 		 * in DEBUG kernel.
37200Sstevel@tonic-gate 		 */
37210Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
37220Sstevel@tonic-gate 		return (rval);
37230Sstevel@tonic-gate 	}
37240Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
37250Sstevel@tonic-gate /*
37260Sstevel@tonic-gate  * XXXX
37270Sstevel@tonic-gate  * I am not certain the next comment is correct here.  I need to consider
37280Sstevel@tonic-gate  * why the infonext is called, and if dropping SQ_EXCL unless non-CIPUT
37290Sstevel@tonic-gate  * might cause other problems.  It just might be safer to drop it if
37300Sstevel@tonic-gate  * !SQ_CIPUT because that is when we set it.
37310Sstevel@tonic-gate  */
37320Sstevel@tonic-gate 	/*
37330Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
37340Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
37350Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
37360Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
37370Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
37380Sstevel@tonic-gate 	 *
37390Sstevel@tonic-gate 	 * I would like to make the following assertion:
37400Sstevel@tonic-gate 	 *
37410Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
37420Sstevel@tonic-gate 	 *	sq->sq_count == 0);
37430Sstevel@tonic-gate 	 *
37440Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
37450Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
37460Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
37470Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
37480Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
37490Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
37500Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
37510Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
37520Sstevel@tonic-gate 	 * test invalid.
37530Sstevel@tonic-gate 	 */
37540Sstevel@tonic-gate 
37550Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
37560Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
37570Sstevel@tonic-gate 	return (rval);
37580Sstevel@tonic-gate }
37590Sstevel@tonic-gate 
37600Sstevel@tonic-gate /*
37610Sstevel@tonic-gate  * Return nonzero if the queue is responsible for struio(), else return 0.
37620Sstevel@tonic-gate  */
37630Sstevel@tonic-gate int
37640Sstevel@tonic-gate isuioq(queue_t *q)
37650Sstevel@tonic-gate {
37660Sstevel@tonic-gate 	if (q->q_flag & QREADR)
37670Sstevel@tonic-gate 		return (STREAM(q)->sd_struiordq == q);
37680Sstevel@tonic-gate 	else
37690Sstevel@tonic-gate 		return (STREAM(q)->sd_struiowrq == q);
37700Sstevel@tonic-gate }
37710Sstevel@tonic-gate 
37720Sstevel@tonic-gate #if defined(__sparc)
37730Sstevel@tonic-gate int disable_putlocks = 0;
37740Sstevel@tonic-gate #else
37750Sstevel@tonic-gate int disable_putlocks = 1;
37760Sstevel@tonic-gate #endif
37770Sstevel@tonic-gate 
37780Sstevel@tonic-gate /*
37790Sstevel@tonic-gate  * called by create_putlock.
37800Sstevel@tonic-gate  */
37810Sstevel@tonic-gate static void
37820Sstevel@tonic-gate create_syncq_putlocks(queue_t *q)
37830Sstevel@tonic-gate {
37840Sstevel@tonic-gate 	syncq_t	*sq = q->q_syncq;
37850Sstevel@tonic-gate 	ciputctrl_t *cip;
37860Sstevel@tonic-gate 	int i;
37870Sstevel@tonic-gate 
37880Sstevel@tonic-gate 	ASSERT(sq != NULL);
37890Sstevel@tonic-gate 
37900Sstevel@tonic-gate 	ASSERT(disable_putlocks == 0);
37910Sstevel@tonic-gate 	ASSERT(n_ciputctrl >= min_n_ciputctrl);
37920Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
37930Sstevel@tonic-gate 
37940Sstevel@tonic-gate 	if (!(sq->sq_type & SQ_CIPUT))
37950Sstevel@tonic-gate 		return;
37960Sstevel@tonic-gate 
37970Sstevel@tonic-gate 	for (i = 0; i <= 1; i++) {
37980Sstevel@tonic-gate 		if (sq->sq_ciputctrl == NULL) {
37990Sstevel@tonic-gate 			cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
38000Sstevel@tonic-gate 			SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
38010Sstevel@tonic-gate 			mutex_enter(SQLOCK(sq));
38020Sstevel@tonic-gate 			if (sq->sq_ciputctrl != NULL) {
38030Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
38040Sstevel@tonic-gate 				kmem_cache_free(ciputctrl_cache, cip);
38050Sstevel@tonic-gate 			} else {
38060Sstevel@tonic-gate 				ASSERT(sq->sq_nciputctrl == 0);
38070Sstevel@tonic-gate 				sq->sq_nciputctrl = n_ciputctrl - 1;
38080Sstevel@tonic-gate 				/*
38090Sstevel@tonic-gate 				 * putnext checks sq_ciputctrl without holding
38100Sstevel@tonic-gate 				 * SQLOCK. if it is not NULL putnext assumes
38110Sstevel@tonic-gate 				 * sq_nciputctrl is initialized. membar below
38120Sstevel@tonic-gate 				 * insures that.
38130Sstevel@tonic-gate 				 */
38140Sstevel@tonic-gate 				membar_producer();
38150Sstevel@tonic-gate 				sq->sq_ciputctrl = cip;
38160Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
38170Sstevel@tonic-gate 			}
38180Sstevel@tonic-gate 		}
38190Sstevel@tonic-gate 		ASSERT(sq->sq_nciputctrl == n_ciputctrl - 1);
38200Sstevel@tonic-gate 		if (i == 1)
38210Sstevel@tonic-gate 			break;
38220Sstevel@tonic-gate 		q = _OTHERQ(q);
38230Sstevel@tonic-gate 		if (!(q->q_flag & QPERQ)) {
38240Sstevel@tonic-gate 			ASSERT(sq == q->q_syncq);
38250Sstevel@tonic-gate 			break;
38260Sstevel@tonic-gate 		}
38270Sstevel@tonic-gate 		ASSERT(q->q_syncq != NULL);
38280Sstevel@tonic-gate 		ASSERT(sq != q->q_syncq);
38290Sstevel@tonic-gate 		sq = q->q_syncq;
38300Sstevel@tonic-gate 		ASSERT(sq->sq_type & SQ_CIPUT);
38310Sstevel@tonic-gate 	}
38320Sstevel@tonic-gate }
38330Sstevel@tonic-gate 
38340Sstevel@tonic-gate /*
38350Sstevel@tonic-gate  * If stream argument is 0 only create per cpu sq_putlocks/sq_putcounts for
38360Sstevel@tonic-gate  * syncq of q. If stream argument is not 0 create per cpu stream_putlocks for
38370Sstevel@tonic-gate  * the stream of q and per cpu sq_putlocks/sq_putcounts for all syncq's
38380Sstevel@tonic-gate  * starting from q and down to the driver.
38390Sstevel@tonic-gate  *
38400Sstevel@tonic-gate  * This should be called after the affected queues are part of stream
38410Sstevel@tonic-gate  * geometry. It should be called from driver/module open routine after
38420Sstevel@tonic-gate  * qprocson() call. It is also called from nfs syscall where it is known that
38430Sstevel@tonic-gate  * stream is configured and won't change its geometry during create_putlock
38440Sstevel@tonic-gate  * call.
38450Sstevel@tonic-gate  *
38460Sstevel@tonic-gate  * caller normally uses 0 value for the stream argument to speed up MT putnext
38470Sstevel@tonic-gate  * into the perimeter of q for example because its perimeter is per module
38480Sstevel@tonic-gate  * (e.g. IP).
38490Sstevel@tonic-gate  *
38500Sstevel@tonic-gate  * caller normally uses non 0 value for the stream argument to hint the system
38510Sstevel@tonic-gate  * that the stream of q is a very contended global system stream
38520Sstevel@tonic-gate  * (e.g. NFS/UDP) and the part of the stream from q to the driver is
38530Sstevel@tonic-gate  * particularly MT hot.
38540Sstevel@tonic-gate  *
38550Sstevel@tonic-gate  * Caller insures stream plumbing won't happen while we are here and therefore
38560Sstevel@tonic-gate  * q_next can be safely used.
38570Sstevel@tonic-gate  */
38580Sstevel@tonic-gate 
38590Sstevel@tonic-gate void
38600Sstevel@tonic-gate create_putlocks(queue_t *q, int stream)
38610Sstevel@tonic-gate {
38620Sstevel@tonic-gate 	ciputctrl_t	*cip;
38630Sstevel@tonic-gate 	struct stdata	*stp = STREAM(q);
38640Sstevel@tonic-gate 
38650Sstevel@tonic-gate 	q = _WR(q);
38660Sstevel@tonic-gate 	ASSERT(stp != NULL);
38670Sstevel@tonic-gate 
38680Sstevel@tonic-gate 	if (disable_putlocks != 0)
38690Sstevel@tonic-gate 		return;
38700Sstevel@tonic-gate 
38710Sstevel@tonic-gate 	if (n_ciputctrl < min_n_ciputctrl)
38720Sstevel@tonic-gate 		return;
38730Sstevel@tonic-gate 
38740Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
38750Sstevel@tonic-gate 
38760Sstevel@tonic-gate 	if (stream != 0 && stp->sd_ciputctrl == NULL) {
38770Sstevel@tonic-gate 		cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
38780Sstevel@tonic-gate 		SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
38790Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
38800Sstevel@tonic-gate 		if (stp->sd_ciputctrl != NULL) {
38810Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
38820Sstevel@tonic-gate 			kmem_cache_free(ciputctrl_cache, cip);
38830Sstevel@tonic-gate 		} else {
38840Sstevel@tonic-gate 			ASSERT(stp->sd_nciputctrl == 0);
38850Sstevel@tonic-gate 			stp->sd_nciputctrl = n_ciputctrl - 1;
38860Sstevel@tonic-gate 			/*
38870Sstevel@tonic-gate 			 * putnext checks sd_ciputctrl without holding
38880Sstevel@tonic-gate 			 * sd_lock. if it is not NULL putnext assumes
38890Sstevel@tonic-gate 			 * sd_nciputctrl is initialized. membar below
38900Sstevel@tonic-gate 			 * insures that.
38910Sstevel@tonic-gate 			 */
38920Sstevel@tonic-gate 			membar_producer();
38930Sstevel@tonic-gate 			stp->sd_ciputctrl = cip;
38940Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
38950Sstevel@tonic-gate 		}
38960Sstevel@tonic-gate 	}
38970Sstevel@tonic-gate 
38980Sstevel@tonic-gate 	ASSERT(stream == 0 || stp->sd_nciputctrl == n_ciputctrl - 1);
38990Sstevel@tonic-gate 
39000Sstevel@tonic-gate 	while (_SAMESTR(q)) {
39010Sstevel@tonic-gate 		create_syncq_putlocks(q);
39020Sstevel@tonic-gate 		if (stream == 0)
39030Sstevel@tonic-gate 			return;
39040Sstevel@tonic-gate 		q = q->q_next;
39050Sstevel@tonic-gate 	}
39060Sstevel@tonic-gate 	ASSERT(q != NULL);
39070Sstevel@tonic-gate 	create_syncq_putlocks(q);
39080Sstevel@tonic-gate }
39090Sstevel@tonic-gate 
39100Sstevel@tonic-gate /*
39110Sstevel@tonic-gate  * STREAMS Flow Trace - record STREAMS Flow Trace events as an mblk flows
39120Sstevel@tonic-gate  * through a stream.
39130Sstevel@tonic-gate  *
39140Sstevel@tonic-gate  * Data currently record per event is a hrtime stamp, queue address, event
39150Sstevel@tonic-gate  * type, and a per type datum.  Much of the STREAMS framework is instrumented
39160Sstevel@tonic-gate  * for automatic flow tracing (when enabled).  Events can be defined and used
39170Sstevel@tonic-gate  * by STREAMS modules and drivers.
39180Sstevel@tonic-gate  *
39190Sstevel@tonic-gate  * Global objects:
39200Sstevel@tonic-gate  *
39210Sstevel@tonic-gate  *	str_ftevent() - Add a flow-trace event to a dblk.
39220Sstevel@tonic-gate  *	str_ftfree() - Free flow-trace data
39230Sstevel@tonic-gate  *
39240Sstevel@tonic-gate  * Local objects:
39250Sstevel@tonic-gate  *
39260Sstevel@tonic-gate  *	fthdr_cache - pointer to the kmem cache for trace header.
39270Sstevel@tonic-gate  *	ftblk_cache - pointer to the kmem cache for trace data blocks.
39280Sstevel@tonic-gate  */
39290Sstevel@tonic-gate 
39300Sstevel@tonic-gate int str_ftnever = 1;	/* Don't do STREAMS flow tracing */
39310Sstevel@tonic-gate 
39320Sstevel@tonic-gate void
39330Sstevel@tonic-gate str_ftevent(fthdr_t *hp, void *p, ushort_t evnt, ushort_t data)
39340Sstevel@tonic-gate {
39350Sstevel@tonic-gate 	ftblk_t *bp = hp->tail;
39360Sstevel@tonic-gate 	ftblk_t *nbp;
39370Sstevel@tonic-gate 	ftevnt_t *ep;
39380Sstevel@tonic-gate 	int ix, nix;
39390Sstevel@tonic-gate 
39400Sstevel@tonic-gate 	ASSERT(hp != NULL);
39410Sstevel@tonic-gate 
39420Sstevel@tonic-gate 	for (;;) {
39430Sstevel@tonic-gate 		if ((ix = bp->ix) == FTBLK_EVNTS) {
39440Sstevel@tonic-gate 			/*
39450Sstevel@tonic-gate 			 * Tail doesn't have room, so need a new tail.
39460Sstevel@tonic-gate 			 *
39470Sstevel@tonic-gate 			 * To make this MT safe, first, allocate a new
39480Sstevel@tonic-gate 			 * ftblk, and initialize it.  To make life a
39490Sstevel@tonic-gate 			 * little easier, reserve the first slot (mostly
39500Sstevel@tonic-gate 			 * by making ix = 1).  When we are finished with
39510Sstevel@tonic-gate 			 * the initialization, CAS this pointer to the
39520Sstevel@tonic-gate 			 * tail.  If this succeeds, this is the new
39530Sstevel@tonic-gate 			 * "next" block.  Otherwise, another thread
39540Sstevel@tonic-gate 			 * got here first, so free the block and start
39550Sstevel@tonic-gate 			 * again.
39560Sstevel@tonic-gate 			 */
39570Sstevel@tonic-gate 			if (!(nbp = kmem_cache_alloc(ftblk_cache,
39580Sstevel@tonic-gate 			    KM_NOSLEEP))) {
39590Sstevel@tonic-gate 				/* no mem, so punt */
39600Sstevel@tonic-gate 				str_ftnever++;
39610Sstevel@tonic-gate 				/* free up all flow data? */
39620Sstevel@tonic-gate 				return;
39630Sstevel@tonic-gate 			}
39640Sstevel@tonic-gate 			nbp->nxt = NULL;
39650Sstevel@tonic-gate 			nbp->ix = 1;
39660Sstevel@tonic-gate 			/*
39670Sstevel@tonic-gate 			 * Just in case there is another thread about
39680Sstevel@tonic-gate 			 * to get the next index, we need to make sure
39690Sstevel@tonic-gate 			 * the value is there for it.
39700Sstevel@tonic-gate 			 */
39710Sstevel@tonic-gate 			membar_producer();
39720Sstevel@tonic-gate 			if (casptr(&hp->tail, bp, nbp) == bp) {
39730Sstevel@tonic-gate 				/* CAS was successful */
39740Sstevel@tonic-gate 				bp->nxt = nbp;
39750Sstevel@tonic-gate 				membar_producer();
39760Sstevel@tonic-gate 				bp = nbp;
39770Sstevel@tonic-gate 				ix = 0;
39780Sstevel@tonic-gate 				goto cas_good;
39790Sstevel@tonic-gate 			} else {
39800Sstevel@tonic-gate 				kmem_cache_free(ftblk_cache, nbp);
39810Sstevel@tonic-gate 				bp = hp->tail;
39820Sstevel@tonic-gate 				continue;
39830Sstevel@tonic-gate 			}
39840Sstevel@tonic-gate 		}
39850Sstevel@tonic-gate 		nix = ix + 1;
39860Sstevel@tonic-gate 		if (cas32((uint32_t *)&bp->ix, ix, nix) == ix) {
39870Sstevel@tonic-gate 		cas_good:
39880Sstevel@tonic-gate 			if (curthread != hp->thread) {
39890Sstevel@tonic-gate 				hp->thread = curthread;
39900Sstevel@tonic-gate 				evnt |= FTEV_CS;
39910Sstevel@tonic-gate 			}
39920Sstevel@tonic-gate 			if (CPU->cpu_seqid != hp->cpu_seqid) {
39930Sstevel@tonic-gate 				hp->cpu_seqid = CPU->cpu_seqid;
39940Sstevel@tonic-gate 				evnt |= FTEV_PS;
39950Sstevel@tonic-gate 			}
39960Sstevel@tonic-gate 			ep = &bp->ev[ix];
39970Sstevel@tonic-gate 			break;
39980Sstevel@tonic-gate 		}
39990Sstevel@tonic-gate 	}
40000Sstevel@tonic-gate 
40010Sstevel@tonic-gate 	if (evnt & FTEV_QMASK) {
40020Sstevel@tonic-gate 		queue_t *qp = p;
40030Sstevel@tonic-gate 
40040Sstevel@tonic-gate 		/*
40050Sstevel@tonic-gate 		 * It is possible that the module info is broke
40060Sstevel@tonic-gate 		 * (as is logsubr.c at this comment writing).
40070Sstevel@tonic-gate 		 * Instead of panicing or doing other unmentionables,
40080Sstevel@tonic-gate 		 * we shall put a dummy name as the mid, and continue.
40090Sstevel@tonic-gate 		 */
40100Sstevel@tonic-gate 		if (qp->q_qinfo == NULL)
40110Sstevel@tonic-gate 			ep->mid = "NONAME";
40120Sstevel@tonic-gate 		else
40130Sstevel@tonic-gate 			ep->mid = qp->q_qinfo->qi_minfo->mi_idname;
40140Sstevel@tonic-gate 
40150Sstevel@tonic-gate 		if (!(qp->q_flag & QREADR))
40160Sstevel@tonic-gate 			evnt |= FTEV_ISWR;
40170Sstevel@tonic-gate 	} else {
40180Sstevel@tonic-gate 		ep->mid = (char *)p;
40190Sstevel@tonic-gate 	}
40200Sstevel@tonic-gate 
40210Sstevel@tonic-gate 	ep->ts = gethrtime();
40220Sstevel@tonic-gate 	ep->evnt = evnt;
40230Sstevel@tonic-gate 	ep->data = data;
40240Sstevel@tonic-gate 	hp->hash = (hp->hash << 9) + hp->hash;
40250Sstevel@tonic-gate 	hp->hash += (evnt << 16) | data;
40260Sstevel@tonic-gate 	hp->hash += (uintptr_t)ep->mid;
40270Sstevel@tonic-gate }
40280Sstevel@tonic-gate 
40290Sstevel@tonic-gate /*
40300Sstevel@tonic-gate  * Free flow-trace data.
40310Sstevel@tonic-gate  */
40320Sstevel@tonic-gate void
40330Sstevel@tonic-gate str_ftfree(dblk_t *dbp)
40340Sstevel@tonic-gate {
40350Sstevel@tonic-gate 	fthdr_t *hp = dbp->db_fthdr;
40360Sstevel@tonic-gate 	ftblk_t *bp = &hp->first;
40370Sstevel@tonic-gate 	ftblk_t *nbp;
40380Sstevel@tonic-gate 
40390Sstevel@tonic-gate 	if (bp != hp->tail || bp->ix != 0) {
40400Sstevel@tonic-gate 		/*
40410Sstevel@tonic-gate 		 * Clear out the hash, have the tail point to itself, and free
40420Sstevel@tonic-gate 		 * any continuation blocks.
40430Sstevel@tonic-gate 		 */
40440Sstevel@tonic-gate 		bp = hp->first.nxt;
40450Sstevel@tonic-gate 		hp->tail = &hp->first;
40460Sstevel@tonic-gate 		hp->hash = 0;
40470Sstevel@tonic-gate 		hp->first.nxt = NULL;
40480Sstevel@tonic-gate 		hp->first.ix = 0;
40490Sstevel@tonic-gate 		while (bp != NULL) {
40500Sstevel@tonic-gate 			nbp = bp->nxt;
40510Sstevel@tonic-gate 			kmem_cache_free(ftblk_cache, bp);
40520Sstevel@tonic-gate 			bp = nbp;
40530Sstevel@tonic-gate 		}
40540Sstevel@tonic-gate 	}
40550Sstevel@tonic-gate 	kmem_cache_free(fthdr_cache, hp);
40560Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
40570Sstevel@tonic-gate }
4058