xref: /illumos-gate/usr/src/uts/common/io/stream.c (revision 8cfbb92d03aaa6d46404fec39826315ef745a557)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5381a2a9aSdr146992  * Common Development and Distribution License (the "License").
6381a2a9aSdr146992  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
227c478bd9Sstevel@tonic-gate /*	All Rights Reserved	*/
237c478bd9Sstevel@tonic-gate 
247c478bd9Sstevel@tonic-gate /*
25a45f3f93Smeem  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
267c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
272ab478d4SGordon Ross  *
282ab478d4SGordon Ross  * Copyright 2021 Tintri by DDN, Inc. All rights reserved.
299b664393SGarrett D'Amore  * Copyright 2022 Garrett D'Amore
30*8cfbb92dSKyle Simpson  * Copyright 2024 Oxide Computer Company
317c478bd9Sstevel@tonic-gate  */
327c478bd9Sstevel@tonic-gate 
337c478bd9Sstevel@tonic-gate #include <sys/types.h>
347c478bd9Sstevel@tonic-gate #include <sys/param.h>
357c478bd9Sstevel@tonic-gate #include <sys/thread.h>
367c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
377c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
387c478bd9Sstevel@tonic-gate #include <sys/stream.h>
397c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
407c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
417c478bd9Sstevel@tonic-gate #include <sys/conf.h>
427c478bd9Sstevel@tonic-gate #include <sys/debug.h>
437c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
447c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
457c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
467c478bd9Sstevel@tonic-gate #include <sys/errno.h>
477c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
487c478bd9Sstevel@tonic-gate #include <sys/ftrace.h>
497c478bd9Sstevel@tonic-gate #include <sys/ontrap.h>
507c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
51ae35285aSmeem #include <sys/strft.h>
527c478bd9Sstevel@tonic-gate 
537c478bd9Sstevel@tonic-gate #ifdef DEBUG
547c478bd9Sstevel@tonic-gate #include <sys/kmem_impl.h>
557c478bd9Sstevel@tonic-gate #endif
567c478bd9Sstevel@tonic-gate 
577c478bd9Sstevel@tonic-gate /*
587c478bd9Sstevel@tonic-gate  * This file contains all the STREAMS utility routines that may
597c478bd9Sstevel@tonic-gate  * be used by modules and drivers.
607c478bd9Sstevel@tonic-gate  */
617c478bd9Sstevel@tonic-gate 
627c478bd9Sstevel@tonic-gate /*
637c478bd9Sstevel@tonic-gate  * STREAMS message allocator: principles of operation
647c478bd9Sstevel@tonic-gate  *
657c478bd9Sstevel@tonic-gate  * The streams message allocator consists of all the routines that
667c478bd9Sstevel@tonic-gate  * allocate, dup and free streams messages: allocb(), [d]esballoc[a],
677c478bd9Sstevel@tonic-gate  * dupb(), freeb() and freemsg().  What follows is a high-level view
687c478bd9Sstevel@tonic-gate  * of how the allocator works.
697c478bd9Sstevel@tonic-gate  *
707c478bd9Sstevel@tonic-gate  * Every streams message consists of one or more mblks, a dblk, and data.
717c478bd9Sstevel@tonic-gate  * All mblks for all types of messages come from a common mblk_cache.
727c478bd9Sstevel@tonic-gate  * The dblk and data come in several flavors, depending on how the
737c478bd9Sstevel@tonic-gate  * message is allocated:
747c478bd9Sstevel@tonic-gate  *
757c478bd9Sstevel@tonic-gate  * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of
767c478bd9Sstevel@tonic-gate  *     fixed-size dblk/data caches. For message sizes that are multiples of
777c478bd9Sstevel@tonic-gate  *     PAGESIZE, dblks are allocated separately from the buffer.
787c478bd9Sstevel@tonic-gate  *     The associated buffer is allocated by the constructor using kmem_alloc().
797c478bd9Sstevel@tonic-gate  *     For all other message sizes, dblk and its associated data is allocated
807c478bd9Sstevel@tonic-gate  *     as a single contiguous chunk of memory.
817c478bd9Sstevel@tonic-gate  *     Objects in these caches consist of a dblk plus its associated data.
827c478bd9Sstevel@tonic-gate  *     allocb() determines the nearest-size cache by table lookup:
837c478bd9Sstevel@tonic-gate  *     the dblk_cache[] array provides the mapping from size to dblk cache.
847c478bd9Sstevel@tonic-gate  *
857c478bd9Sstevel@tonic-gate  * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by
867c478bd9Sstevel@tonic-gate  *     kmem_alloc()'ing a buffer for the data and supplying that
877c478bd9Sstevel@tonic-gate  *     buffer to gesballoc(), described below.
887c478bd9Sstevel@tonic-gate  *
897c478bd9Sstevel@tonic-gate  * (3) The four flavors of [d]esballoc[a] are all implemented by a
907c478bd9Sstevel@tonic-gate  *     common routine, gesballoc() ("generic esballoc").  gesballoc()
917c478bd9Sstevel@tonic-gate  *     allocates a dblk from the global dblk_esb_cache and sets db_base,
927c478bd9Sstevel@tonic-gate  *     db_lim and db_frtnp to describe the caller-supplied buffer.
937c478bd9Sstevel@tonic-gate  *
947c478bd9Sstevel@tonic-gate  * While there are several routines to allocate messages, there is only
957c478bd9Sstevel@tonic-gate  * one routine to free messages: freeb().  freeb() simply invokes the
967c478bd9Sstevel@tonic-gate  * dblk's free method, dbp->db_free(), which is set at allocation time.
977c478bd9Sstevel@tonic-gate  *
987c478bd9Sstevel@tonic-gate  * dupb() creates a new reference to a message by allocating a new mblk,
997c478bd9Sstevel@tonic-gate  * incrementing the dblk reference count and setting the dblk's free
1007c478bd9Sstevel@tonic-gate  * method to dblk_decref().  The dblk's original free method is retained
1017c478bd9Sstevel@tonic-gate  * in db_lastfree.  dblk_decref() decrements the reference count on each
1027c478bd9Sstevel@tonic-gate  * freeb().  If this is not the last reference it just frees the mblk;
1037c478bd9Sstevel@tonic-gate  * if this *is* the last reference, it restores db_free to db_lastfree,
1047c478bd9Sstevel@tonic-gate  * sets db_mblk to the current mblk (see below), and invokes db_lastfree.
1057c478bd9Sstevel@tonic-gate  *
1067c478bd9Sstevel@tonic-gate  * The implementation makes aggressive use of kmem object caching for
1077c478bd9Sstevel@tonic-gate  * maximum performance.  This makes the code simple and compact, but
1087c478bd9Sstevel@tonic-gate  * also a bit abstruse in some places.  The invariants that constitute a
1097c478bd9Sstevel@tonic-gate  * message's constructed state, described below, are more subtle than usual.
1107c478bd9Sstevel@tonic-gate  *
1117c478bd9Sstevel@tonic-gate  * Every dblk has an "attached mblk" as part of its constructed state.
1127c478bd9Sstevel@tonic-gate  * The mblk is allocated by the dblk's constructor and remains attached
1137c478bd9Sstevel@tonic-gate  * until the message is either dup'ed or pulled up.  In the dupb() case
1147c478bd9Sstevel@tonic-gate  * the mblk association doesn't matter until the last free, at which time
1157c478bd9Sstevel@tonic-gate  * dblk_decref() attaches the last mblk to the dblk.  pullupmsg() affects
1167c478bd9Sstevel@tonic-gate  * the mblk association because it swaps the leading mblks of two messages,
1177c478bd9Sstevel@tonic-gate  * so it is responsible for swapping their db_mblk pointers accordingly.
1187c478bd9Sstevel@tonic-gate  * From a constructed-state viewpoint it doesn't matter that a dblk's
1197c478bd9Sstevel@tonic-gate  * attached mblk can change while the message is allocated; all that
1207c478bd9Sstevel@tonic-gate  * matters is that the dblk has *some* attached mblk when it's freed.
1217c478bd9Sstevel@tonic-gate  *
1227c478bd9Sstevel@tonic-gate  * The sizes of the allocb() small-message caches are not magical.
1237c478bd9Sstevel@tonic-gate  * They represent a good trade-off between internal and external
1247c478bd9Sstevel@tonic-gate  * fragmentation for current workloads.  They should be reevaluated
1257c478bd9Sstevel@tonic-gate  * periodically, especially if allocations larger than DBLK_MAX_CACHE
1267c478bd9Sstevel@tonic-gate  * become common.  We use 64-byte alignment so that dblks don't
1277c478bd9Sstevel@tonic-gate  * straddle cache lines unnecessarily.
1287c478bd9Sstevel@tonic-gate  */
1297c478bd9Sstevel@tonic-gate #define	DBLK_MAX_CACHE		73728
1307c478bd9Sstevel@tonic-gate #define	DBLK_CACHE_ALIGN	64
1317c478bd9Sstevel@tonic-gate #define	DBLK_MIN_SIZE		8
1327c478bd9Sstevel@tonic-gate #define	DBLK_SIZE_SHIFT		3
1337c478bd9Sstevel@tonic-gate 
1347c478bd9Sstevel@tonic-gate #ifdef _BIG_ENDIAN
1357c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
1367c478bd9Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
1377c478bd9Sstevel@tonic-gate #else
1387c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
1397c478bd9Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
1407c478bd9Sstevel@tonic-gate #endif
1417c478bd9Sstevel@tonic-gate 
1427c478bd9Sstevel@tonic-gate #define	DBLK_RTFU(ref, type, flags, uioflag)	\
1437c478bd9Sstevel@tonic-gate 	(((ref) << DBLK_RTFU_SHIFT(db_ref)) | \
1447c478bd9Sstevel@tonic-gate 	((type) << DBLK_RTFU_SHIFT(db_type)) | \
1457c478bd9Sstevel@tonic-gate 	(((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
1467c478bd9Sstevel@tonic-gate 	((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
1477c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_REF_MASK	(DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
1487c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_WORD(dbp)	(*((uint32_t *)&(dbp)->db_ref))
1497c478bd9Sstevel@tonic-gate #define	MBLK_BAND_FLAG_WORD(mp)	(*((uint32_t *)&(mp)->b_band))
1507c478bd9Sstevel@tonic-gate 
1517c478bd9Sstevel@tonic-gate static size_t dblk_sizes[] = {
1527c478bd9Sstevel@tonic-gate #ifdef _LP64
153b5fca8f8Stomee 	16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3856,
154b5fca8f8Stomee 	8192, 12048, 16384, 20240, 24576, 28432, 32768, 36624,
155b5fca8f8Stomee 	40960, 44816, 49152, 53008, 57344, 61200, 65536, 69392,
1567c478bd9Sstevel@tonic-gate #else
157b5fca8f8Stomee 	64, 128, 320, 576, 1088, 1536, 1984, 2624, 3904,
158b5fca8f8Stomee 	8192, 12096, 16384, 20288, 24576, 28480, 32768, 36672,
159b5fca8f8Stomee 	40960, 44864, 49152, 53056, 57344, 61248, 65536, 69440,
1607c478bd9Sstevel@tonic-gate #endif
1617c478bd9Sstevel@tonic-gate 	DBLK_MAX_CACHE, 0
1627c478bd9Sstevel@tonic-gate };
1637c478bd9Sstevel@tonic-gate 
1647c478bd9Sstevel@tonic-gate static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE];
1657c478bd9Sstevel@tonic-gate static struct kmem_cache *mblk_cache;
1667c478bd9Sstevel@tonic-gate static struct kmem_cache *dblk_esb_cache;
1677c478bd9Sstevel@tonic-gate static struct kmem_cache *fthdr_cache;
1687c478bd9Sstevel@tonic-gate static struct kmem_cache *ftblk_cache;
1697c478bd9Sstevel@tonic-gate 
1707c478bd9Sstevel@tonic-gate static void dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1717c478bd9Sstevel@tonic-gate static mblk_t *allocb_oversize(size_t size, int flags);
1727c478bd9Sstevel@tonic-gate static int allocb_tryhard_fails;
1737c478bd9Sstevel@tonic-gate static void frnop_func(void *arg);
1747c478bd9Sstevel@tonic-gate frtn_t frnop = { frnop_func };
1757c478bd9Sstevel@tonic-gate static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate static boolean_t rwnext_enter(queue_t *qp);
1787c478bd9Sstevel@tonic-gate static void rwnext_exit(queue_t *qp);
1797c478bd9Sstevel@tonic-gate 
1807c478bd9Sstevel@tonic-gate /*
1817c478bd9Sstevel@tonic-gate  * Patchable mblk/dblk kmem_cache flags.
1827c478bd9Sstevel@tonic-gate  */
1837c478bd9Sstevel@tonic-gate int dblk_kmem_flags = 0;
1847c478bd9Sstevel@tonic-gate int mblk_kmem_flags = 0;
1857c478bd9Sstevel@tonic-gate 
1867c478bd9Sstevel@tonic-gate static int
dblk_constructor(void * buf,void * cdrarg,int kmflags)1877c478bd9Sstevel@tonic-gate dblk_constructor(void *buf, void *cdrarg, int kmflags)
1887c478bd9Sstevel@tonic-gate {
1897c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
1907c478bd9Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
1917c478bd9Sstevel@tonic-gate 	size_t index;
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate 	ASSERT(msg_size != 0);
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate 	index = (msg_size - 1) >> DBLK_SIZE_SHIFT;
1967c478bd9Sstevel@tonic-gate 
197e4506d67Smeem 	ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT));
1987c478bd9Sstevel@tonic-gate 
1997c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2007c478bd9Sstevel@tonic-gate 		return (-1);
2017c478bd9Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
2027c478bd9Sstevel@tonic-gate 		dbp->db_base = kmem_alloc(msg_size, kmflags);
2037c478bd9Sstevel@tonic-gate 		if (dbp->db_base == NULL) {
2047c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, dbp->db_mblk);
2057c478bd9Sstevel@tonic-gate 			return (-1);
2067c478bd9Sstevel@tonic-gate 		}
2077c478bd9Sstevel@tonic-gate 	} else {
2087c478bd9Sstevel@tonic-gate 		dbp->db_base = (unsigned char *)&dbp[1];
2097c478bd9Sstevel@tonic-gate 	}
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2127c478bd9Sstevel@tonic-gate 	dbp->db_cache = dblk_cache[index];
2137c478bd9Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + msg_size;
2147c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = dblk_lastfree;
2157c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
2167c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2177c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2187c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2197c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2207c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2217c478bd9Sstevel@tonic-gate 	return (0);
2227c478bd9Sstevel@tonic-gate }
2237c478bd9Sstevel@tonic-gate 
2247c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2257c478bd9Sstevel@tonic-gate static int
dblk_esb_constructor(void * buf,void * cdrarg,int kmflags)2267c478bd9Sstevel@tonic-gate dblk_esb_constructor(void *buf, void *cdrarg, int kmflags)
2277c478bd9Sstevel@tonic-gate {
2287c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2317c478bd9Sstevel@tonic-gate 		return (-1);
2327c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2337c478bd9Sstevel@tonic-gate 	dbp->db_cache = dblk_esb_cache;
2347c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2357c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2367c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2377c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2387c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2397c478bd9Sstevel@tonic-gate 	return (0);
2407c478bd9Sstevel@tonic-gate }
2417c478bd9Sstevel@tonic-gate 
2427c478bd9Sstevel@tonic-gate static int
bcache_dblk_constructor(void * buf,void * cdrarg,int kmflags)2437c478bd9Sstevel@tonic-gate bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags)
2447c478bd9Sstevel@tonic-gate {
2457c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
246a45f3f93Smeem 	bcache_t *bcp = cdrarg;
2477c478bd9Sstevel@tonic-gate 
2487c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2497c478bd9Sstevel@tonic-gate 		return (-1);
2507c478bd9Sstevel@tonic-gate 
251a45f3f93Smeem 	dbp->db_base = kmem_cache_alloc(bcp->buffer_cache, kmflags);
252a45f3f93Smeem 	if (dbp->db_base == NULL) {
2537c478bd9Sstevel@tonic-gate 		kmem_cache_free(mblk_cache, dbp->db_mblk);
2547c478bd9Sstevel@tonic-gate 		return (-1);
2557c478bd9Sstevel@tonic-gate 	}
2567c478bd9Sstevel@tonic-gate 
2577c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2587c478bd9Sstevel@tonic-gate 	dbp->db_cache = (void *)bcp;
2597c478bd9Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + bcp->size;
2607c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree;
2617c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
2627c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2637c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2647c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2657c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2667c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2677c478bd9Sstevel@tonic-gate 	return (0);
2687c478bd9Sstevel@tonic-gate }
2697c478bd9Sstevel@tonic-gate 
2707c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2717c478bd9Sstevel@tonic-gate static void
dblk_destructor(void * buf,void * cdrarg)2727c478bd9Sstevel@tonic-gate dblk_destructor(void *buf, void *cdrarg)
2737c478bd9Sstevel@tonic-gate {
2747c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2757c478bd9Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
2767c478bd9Sstevel@tonic-gate 
2777c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
2787c478bd9Sstevel@tonic-gate 	ASSERT(msg_size != 0);
2797c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
2807c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
2817c478bd9Sstevel@tonic-gate 
2827c478bd9Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
2837c478bd9Sstevel@tonic-gate 		kmem_free(dbp->db_base, msg_size);
2847c478bd9Sstevel@tonic-gate 	}
2857c478bd9Sstevel@tonic-gate 
2867c478bd9Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
2877c478bd9Sstevel@tonic-gate }
2887c478bd9Sstevel@tonic-gate 
2897c478bd9Sstevel@tonic-gate static void
bcache_dblk_destructor(void * buf,void * cdrarg)2907c478bd9Sstevel@tonic-gate bcache_dblk_destructor(void *buf, void *cdrarg)
2917c478bd9Sstevel@tonic-gate {
2927c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
293a45f3f93Smeem 	bcache_t *bcp = cdrarg;
2947c478bd9Sstevel@tonic-gate 
2957c478bd9Sstevel@tonic-gate 	kmem_cache_free(bcp->buffer_cache, dbp->db_base);
2967c478bd9Sstevel@tonic-gate 
2977c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
2987c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
2997c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
3027c478bd9Sstevel@tonic-gate }
3037c478bd9Sstevel@tonic-gate 
304a45f3f93Smeem /* ARGSUSED */
305a45f3f93Smeem static int
ftblk_constructor(void * buf,void * cdrarg,int kmflags)306a45f3f93Smeem ftblk_constructor(void *buf, void *cdrarg, int kmflags)
307a45f3f93Smeem {
308a45f3f93Smeem 	ftblk_t *fbp = buf;
309a45f3f93Smeem 	int i;
310a45f3f93Smeem 
311a45f3f93Smeem 	bzero(fbp, sizeof (ftblk_t));
312a45f3f93Smeem 	if (str_ftstack != 0) {
313a45f3f93Smeem 		for (i = 0; i < FTBLK_EVNTS; i++)
314a45f3f93Smeem 			fbp->ev[i].stk = kmem_alloc(sizeof (ftstk_t), kmflags);
315a45f3f93Smeem 	}
316a45f3f93Smeem 
317a45f3f93Smeem 	return (0);
318a45f3f93Smeem }
319a45f3f93Smeem 
320a45f3f93Smeem /* ARGSUSED */
321a45f3f93Smeem static void
ftblk_destructor(void * buf,void * cdrarg)322a45f3f93Smeem ftblk_destructor(void *buf, void *cdrarg)
323a45f3f93Smeem {
324a45f3f93Smeem 	ftblk_t *fbp = buf;
325a45f3f93Smeem 	int i;
326a45f3f93Smeem 
327a45f3f93Smeem 	if (str_ftstack != 0) {
328a45f3f93Smeem 		for (i = 0; i < FTBLK_EVNTS; i++) {
329a45f3f93Smeem 			if (fbp->ev[i].stk != NULL) {
330a45f3f93Smeem 				kmem_free(fbp->ev[i].stk, sizeof (ftstk_t));
331a45f3f93Smeem 				fbp->ev[i].stk = NULL;
332a45f3f93Smeem 			}
333a45f3f93Smeem 		}
334a45f3f93Smeem 	}
335a45f3f93Smeem }
336a45f3f93Smeem 
337a45f3f93Smeem static int
fthdr_constructor(void * buf,void * cdrarg,int kmflags)338a45f3f93Smeem fthdr_constructor(void *buf, void *cdrarg, int kmflags)
339a45f3f93Smeem {
340a45f3f93Smeem 	fthdr_t *fhp = buf;
341a45f3f93Smeem 
342a45f3f93Smeem 	return (ftblk_constructor(&fhp->first, cdrarg, kmflags));
343a45f3f93Smeem }
344a45f3f93Smeem 
345a45f3f93Smeem static void
fthdr_destructor(void * buf,void * cdrarg)346a45f3f93Smeem fthdr_destructor(void *buf, void *cdrarg)
347a45f3f93Smeem {
348a45f3f93Smeem 	fthdr_t *fhp = buf;
349a45f3f93Smeem 
350a45f3f93Smeem 	ftblk_destructor(&fhp->first, cdrarg);
351a45f3f93Smeem }
352a45f3f93Smeem 
3537c478bd9Sstevel@tonic-gate void
streams_msg_init(void)3547c478bd9Sstevel@tonic-gate streams_msg_init(void)
3557c478bd9Sstevel@tonic-gate {
3567c478bd9Sstevel@tonic-gate 	char name[40];
3577c478bd9Sstevel@tonic-gate 	size_t size;
3587c478bd9Sstevel@tonic-gate 	size_t lastsize = DBLK_MIN_SIZE;
3597c478bd9Sstevel@tonic-gate 	size_t *sizep;
3607c478bd9Sstevel@tonic-gate 	struct kmem_cache *cp;
3617c478bd9Sstevel@tonic-gate 	size_t tot_size;
3627c478bd9Sstevel@tonic-gate 	int offset;
3637c478bd9Sstevel@tonic-gate 
364a45f3f93Smeem 	mblk_cache = kmem_cache_create("streams_mblk", sizeof (mblk_t), 32,
365a45f3f93Smeem 	    NULL, NULL, NULL, NULL, NULL, mblk_kmem_flags);
3667c478bd9Sstevel@tonic-gate 
3677c478bd9Sstevel@tonic-gate 	for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
3687c478bd9Sstevel@tonic-gate 
3697c478bd9Sstevel@tonic-gate 		if ((offset = (size & PAGEOFFSET)) != 0) {
3707c478bd9Sstevel@tonic-gate 			/*
3717c478bd9Sstevel@tonic-gate 			 * We are in the middle of a page, dblk should
3727c478bd9Sstevel@tonic-gate 			 * be allocated on the same page
3737c478bd9Sstevel@tonic-gate 			 */
3747c478bd9Sstevel@tonic-gate 			tot_size = size + sizeof (dblk_t);
3757c478bd9Sstevel@tonic-gate 			ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t))
3767c478bd9Sstevel@tonic-gate 			    < PAGESIZE);
3777c478bd9Sstevel@tonic-gate 			ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
3787c478bd9Sstevel@tonic-gate 
3797c478bd9Sstevel@tonic-gate 		} else {
3807c478bd9Sstevel@tonic-gate 
3817c478bd9Sstevel@tonic-gate 			/*
3827c478bd9Sstevel@tonic-gate 			 * buf size is multiple of page size, dblk and
3837c478bd9Sstevel@tonic-gate 			 * buffer are allocated separately.
3847c478bd9Sstevel@tonic-gate 			 */
3857c478bd9Sstevel@tonic-gate 
3867c478bd9Sstevel@tonic-gate 			ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0);
3877c478bd9Sstevel@tonic-gate 			tot_size = sizeof (dblk_t);
3887c478bd9Sstevel@tonic-gate 		}
3897c478bd9Sstevel@tonic-gate 
3907c478bd9Sstevel@tonic-gate 		(void) sprintf(name, "streams_dblk_%ld", size);
391a45f3f93Smeem 		cp = kmem_cache_create(name, tot_size, DBLK_CACHE_ALIGN,
392a45f3f93Smeem 		    dblk_constructor, dblk_destructor, NULL, (void *)(size),
393a45f3f93Smeem 		    NULL, dblk_kmem_flags);
3947c478bd9Sstevel@tonic-gate 
3957c478bd9Sstevel@tonic-gate 		while (lastsize <= size) {
3967c478bd9Sstevel@tonic-gate 			dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
3977c478bd9Sstevel@tonic-gate 			lastsize += DBLK_MIN_SIZE;
3987c478bd9Sstevel@tonic-gate 		}
3997c478bd9Sstevel@tonic-gate 	}
4007c478bd9Sstevel@tonic-gate 
401a45f3f93Smeem 	dblk_esb_cache = kmem_cache_create("streams_dblk_esb", sizeof (dblk_t),
402a45f3f93Smeem 	    DBLK_CACHE_ALIGN, dblk_esb_constructor, dblk_destructor, NULL,
4037c478bd9Sstevel@tonic-gate 	    (void *)sizeof (dblk_t), NULL, dblk_kmem_flags);
404a45f3f93Smeem 	fthdr_cache = kmem_cache_create("streams_fthdr", sizeof (fthdr_t), 32,
405a45f3f93Smeem 	    fthdr_constructor, fthdr_destructor, NULL, NULL, NULL, 0);
406a45f3f93Smeem 	ftblk_cache = kmem_cache_create("streams_ftblk", sizeof (ftblk_t), 32,
407a45f3f93Smeem 	    ftblk_constructor, ftblk_destructor, NULL, NULL, NULL, 0);
4087c478bd9Sstevel@tonic-gate 
409e7d4b76fSss146032 	/* initialize throttling queue for esballoc */
410e7d4b76fSss146032 	esballoc_queue_init();
4117c478bd9Sstevel@tonic-gate }
4127c478bd9Sstevel@tonic-gate 
4137c478bd9Sstevel@tonic-gate /*ARGSUSED*/
4147c478bd9Sstevel@tonic-gate mblk_t *
allocb(size_t size,uint_t pri)4157c478bd9Sstevel@tonic-gate allocb(size_t size, uint_t pri)
4167c478bd9Sstevel@tonic-gate {
4177c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
4187c478bd9Sstevel@tonic-gate 	mblk_t *mp;
4197c478bd9Sstevel@tonic-gate 	size_t index;
4207c478bd9Sstevel@tonic-gate 
4217c478bd9Sstevel@tonic-gate 	index =  (size - 1)  >> DBLK_SIZE_SHIFT;
4227c478bd9Sstevel@tonic-gate 
4237c478bd9Sstevel@tonic-gate 	if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
4247c478bd9Sstevel@tonic-gate 		if (size != 0) {
4257c478bd9Sstevel@tonic-gate 			mp = allocb_oversize(size, KM_NOSLEEP);
4267c478bd9Sstevel@tonic-gate 			goto out;
4277c478bd9Sstevel@tonic-gate 		}
4287c478bd9Sstevel@tonic-gate 		index = 0;
4297c478bd9Sstevel@tonic-gate 	}
4307c478bd9Sstevel@tonic-gate 
4317c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) {
4327c478bd9Sstevel@tonic-gate 		mp = NULL;
4337c478bd9Sstevel@tonic-gate 		goto out;
4347c478bd9Sstevel@tonic-gate 	}
4357c478bd9Sstevel@tonic-gate 
4367c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
4377c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
4387c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
4397c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
4407c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
4417c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
4427c478bd9Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size);
4437c478bd9Sstevel@tonic-gate out:
4447c478bd9Sstevel@tonic-gate 	FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp);
4457c478bd9Sstevel@tonic-gate 
4467c478bd9Sstevel@tonic-gate 	return (mp);
4477c478bd9Sstevel@tonic-gate }
4487c478bd9Sstevel@tonic-gate 
449de8c4a14SErik Nordmark /*
450de8c4a14SErik Nordmark  * Allocate an mblk taking db_credp and db_cpid from the template.
451de8c4a14SErik Nordmark  * Allow the cred to be NULL.
452de8c4a14SErik Nordmark  */
4537c478bd9Sstevel@tonic-gate mblk_t *
allocb_tmpl(size_t size,const mblk_t * tmpl)4547c478bd9Sstevel@tonic-gate allocb_tmpl(size_t size, const mblk_t *tmpl)
4557c478bd9Sstevel@tonic-gate {
4567c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
4577c478bd9Sstevel@tonic-gate 
4587c478bd9Sstevel@tonic-gate 	if (mp != NULL) {
459de8c4a14SErik Nordmark 		dblk_t *src = tmpl->b_datap;
460de8c4a14SErik Nordmark 		dblk_t *dst = mp->b_datap;
461aa62bbf8Sken Powell - Sun Microsystem 		cred_t *cr;
462aa62bbf8Sken Powell - Sun Microsystem 		pid_t cpid;
463de8c4a14SErik Nordmark 
464aa62bbf8Sken Powell - Sun Microsystem 		cr = msg_getcred(tmpl, &cpid);
4657c478bd9Sstevel@tonic-gate 		if (cr != NULL)
466de8c4a14SErik Nordmark 			crhold(dst->db_credp = cr);
467aa62bbf8Sken Powell - Sun Microsystem 		dst->db_cpid = cpid;
468de8c4a14SErik Nordmark 		dst->db_type = src->db_type;
4697c478bd9Sstevel@tonic-gate 	}
4707c478bd9Sstevel@tonic-gate 	return (mp);
4717c478bd9Sstevel@tonic-gate }
4727c478bd9Sstevel@tonic-gate 
4737c478bd9Sstevel@tonic-gate mblk_t *
allocb_cred(size_t size,cred_t * cr,pid_t cpid)474de8c4a14SErik Nordmark allocb_cred(size_t size, cred_t *cr, pid_t cpid)
4757c478bd9Sstevel@tonic-gate {
4767c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
4777c478bd9Sstevel@tonic-gate 
478de8c4a14SErik Nordmark 	ASSERT(cr != NULL);
479de8c4a14SErik Nordmark 	if (mp != NULL) {
480de8c4a14SErik Nordmark 		dblk_t *dbp = mp->b_datap;
4817c478bd9Sstevel@tonic-gate 
482de8c4a14SErik Nordmark 		crhold(dbp->db_credp = cr);
483de8c4a14SErik Nordmark 		dbp->db_cpid = cpid;
484de8c4a14SErik Nordmark 	}
4857c478bd9Sstevel@tonic-gate 	return (mp);
4867c478bd9Sstevel@tonic-gate }
4877c478bd9Sstevel@tonic-gate 
4887c478bd9Sstevel@tonic-gate mblk_t *
allocb_cred_wait(size_t size,uint_t flags,int * error,cred_t * cr,pid_t cpid)489de8c4a14SErik Nordmark allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr, pid_t cpid)
4907c478bd9Sstevel@tonic-gate {
4917c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb_wait(size, 0, flags, error);
4927c478bd9Sstevel@tonic-gate 
493de8c4a14SErik Nordmark 	ASSERT(cr != NULL);
494de8c4a14SErik Nordmark 	if (mp != NULL) {
495de8c4a14SErik Nordmark 		dblk_t *dbp = mp->b_datap;
496de8c4a14SErik Nordmark 
497de8c4a14SErik Nordmark 		crhold(dbp->db_credp = cr);
498de8c4a14SErik Nordmark 		dbp->db_cpid = cpid;
499de8c4a14SErik Nordmark 	}
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 	return (mp);
5027c478bd9Sstevel@tonic-gate }
5037c478bd9Sstevel@tonic-gate 
504de8c4a14SErik Nordmark /*
505de8c4a14SErik Nordmark  * Extract the db_cred (and optionally db_cpid) from a message.
506de8c4a14SErik Nordmark  * We find the first mblk which has a non-NULL db_cred and use that.
507de8c4a14SErik Nordmark  * If none found we return NULL.
508de8c4a14SErik Nordmark  * Does NOT get a hold on the cred.
509de8c4a14SErik Nordmark  */
510de8c4a14SErik Nordmark cred_t *
msg_getcred(const mblk_t * mp,pid_t * cpidp)511de8c4a14SErik Nordmark msg_getcred(const mblk_t *mp, pid_t *cpidp)
512de8c4a14SErik Nordmark {
513de8c4a14SErik Nordmark 	cred_t *cr = NULL;
514de8c4a14SErik Nordmark 	cred_t *cr2;
515aa62bbf8Sken Powell - Sun Microsystem 	mblk_t *mp2;
516de8c4a14SErik Nordmark 
517de8c4a14SErik Nordmark 	while (mp != NULL) {
518de8c4a14SErik Nordmark 		dblk_t *dbp = mp->b_datap;
519de8c4a14SErik Nordmark 
520de8c4a14SErik Nordmark 		cr = dbp->db_credp;
521de8c4a14SErik Nordmark 		if (cr == NULL) {
522de8c4a14SErik Nordmark 			mp = mp->b_cont;
523de8c4a14SErik Nordmark 			continue;
524de8c4a14SErik Nordmark 		}
525de8c4a14SErik Nordmark 		if (cpidp != NULL)
526de8c4a14SErik Nordmark 			*cpidp = dbp->db_cpid;
527de8c4a14SErik Nordmark 
528de8c4a14SErik Nordmark #ifdef DEBUG
529de8c4a14SErik Nordmark 		/*
530de8c4a14SErik Nordmark 		 * Normally there should at most one db_credp in a message.
531de8c4a14SErik Nordmark 		 * But if there are multiple (as in the case of some M_IOC*
532de8c4a14SErik Nordmark 		 * and some internal messages in TCP/IP bind logic) then
533de8c4a14SErik Nordmark 		 * they must be identical in the normal case.
534de8c4a14SErik Nordmark 		 * However, a socket can be shared between different uids
535de8c4a14SErik Nordmark 		 * in which case data queued in TCP would be from different
536de8c4a14SErik Nordmark 		 * creds. Thus we can only assert for the zoneid being the
537de8c4a14SErik Nordmark 		 * same. Due to Multi-level Level Ports for TX, some
538de8c4a14SErik Nordmark 		 * cred_t can have a NULL cr_zone, and we skip the comparison
539de8c4a14SErik Nordmark 		 * in that case.
540de8c4a14SErik Nordmark 		 */
541aa62bbf8Sken Powell - Sun Microsystem 		mp2 = mp->b_cont;
542aa62bbf8Sken Powell - Sun Microsystem 		while (mp2 != NULL) {
543aa62bbf8Sken Powell - Sun Microsystem 			cr2 = DB_CRED(mp2);
544de8c4a14SErik Nordmark 			if (cr2 != NULL) {
545de8c4a14SErik Nordmark 				DTRACE_PROBE2(msg__getcred,
546de8c4a14SErik Nordmark 				    cred_t *, cr, cred_t *, cr2);
547de8c4a14SErik Nordmark 				ASSERT(crgetzoneid(cr) == crgetzoneid(cr2) ||
548de8c4a14SErik Nordmark 				    crgetzone(cr) == NULL ||
549de8c4a14SErik Nordmark 				    crgetzone(cr2) == NULL);
550de8c4a14SErik Nordmark 			}
551aa62bbf8Sken Powell - Sun Microsystem 			mp2 = mp2->b_cont;
552aa62bbf8Sken Powell - Sun Microsystem 		}
553de8c4a14SErik Nordmark #endif
554de8c4a14SErik Nordmark 		return (cr);
555de8c4a14SErik Nordmark 	}
556de8c4a14SErik Nordmark 	if (cpidp != NULL)
557de8c4a14SErik Nordmark 		*cpidp = NOPID;
558de8c4a14SErik Nordmark 	return (NULL);
559de8c4a14SErik Nordmark }
560de8c4a14SErik Nordmark 
561de8c4a14SErik Nordmark /*
562de8c4a14SErik Nordmark  * Variant of msg_getcred which, when a cred is found
563de8c4a14SErik Nordmark  * 1. Returns with a hold on the cred
564de8c4a14SErik Nordmark  * 2. Clears the first cred in the mblk.
565de8c4a14SErik Nordmark  * This is more efficient to use than a msg_getcred() + crhold() when
566de8c4a14SErik Nordmark  * the message is freed after the cred has been extracted.
567de8c4a14SErik Nordmark  *
568de8c4a14SErik Nordmark  * The caller is responsible for ensuring that there is no other reference
569de8c4a14SErik Nordmark  * on the message since db_credp can not be cleared when there are other
570de8c4a14SErik Nordmark  * references.
571de8c4a14SErik Nordmark  */
572de8c4a14SErik Nordmark cred_t *
msg_extractcred(mblk_t * mp,pid_t * cpidp)573de8c4a14SErik Nordmark msg_extractcred(mblk_t *mp, pid_t *cpidp)
574de8c4a14SErik Nordmark {
575de8c4a14SErik Nordmark 	cred_t *cr = NULL;
576de8c4a14SErik Nordmark 	cred_t *cr2;
577aa62bbf8Sken Powell - Sun Microsystem 	mblk_t *mp2;
578de8c4a14SErik Nordmark 
579de8c4a14SErik Nordmark 	while (mp != NULL) {
580de8c4a14SErik Nordmark 		dblk_t *dbp = mp->b_datap;
581de8c4a14SErik Nordmark 
582de8c4a14SErik Nordmark 		cr = dbp->db_credp;
583de8c4a14SErik Nordmark 		if (cr == NULL) {
584de8c4a14SErik Nordmark 			mp = mp->b_cont;
585de8c4a14SErik Nordmark 			continue;
586de8c4a14SErik Nordmark 		}
587de8c4a14SErik Nordmark 		ASSERT(dbp->db_ref == 1);
588de8c4a14SErik Nordmark 		dbp->db_credp = NULL;
589de8c4a14SErik Nordmark 		if (cpidp != NULL)
590de8c4a14SErik Nordmark 			*cpidp = dbp->db_cpid;
591de8c4a14SErik Nordmark #ifdef DEBUG
592de8c4a14SErik Nordmark 		/*
593de8c4a14SErik Nordmark 		 * Normally there should at most one db_credp in a message.
594de8c4a14SErik Nordmark 		 * But if there are multiple (as in the case of some M_IOC*
595de8c4a14SErik Nordmark 		 * and some internal messages in TCP/IP bind logic) then
596de8c4a14SErik Nordmark 		 * they must be identical in the normal case.
597de8c4a14SErik Nordmark 		 * However, a socket can be shared between different uids
598de8c4a14SErik Nordmark 		 * in which case data queued in TCP would be from different
599de8c4a14SErik Nordmark 		 * creds. Thus we can only assert for the zoneid being the
600de8c4a14SErik Nordmark 		 * same. Due to Multi-level Level Ports for TX, some
601de8c4a14SErik Nordmark 		 * cred_t can have a NULL cr_zone, and we skip the comparison
602de8c4a14SErik Nordmark 		 * in that case.
603de8c4a14SErik Nordmark 		 */
604aa62bbf8Sken Powell - Sun Microsystem 		mp2 = mp->b_cont;
605aa62bbf8Sken Powell - Sun Microsystem 		while (mp2 != NULL) {
606aa62bbf8Sken Powell - Sun Microsystem 			cr2 = DB_CRED(mp2);
607de8c4a14SErik Nordmark 			if (cr2 != NULL) {
608de8c4a14SErik Nordmark 				DTRACE_PROBE2(msg__extractcred,
609de8c4a14SErik Nordmark 				    cred_t *, cr, cred_t *, cr2);
610de8c4a14SErik Nordmark 				ASSERT(crgetzoneid(cr) == crgetzoneid(cr2) ||
611de8c4a14SErik Nordmark 				    crgetzone(cr) == NULL ||
612de8c4a14SErik Nordmark 				    crgetzone(cr2) == NULL);
613de8c4a14SErik Nordmark 			}
614aa62bbf8Sken Powell - Sun Microsystem 			mp2 = mp2->b_cont;
615aa62bbf8Sken Powell - Sun Microsystem 		}
616de8c4a14SErik Nordmark #endif
617de8c4a14SErik Nordmark 		return (cr);
618de8c4a14SErik Nordmark 	}
619de8c4a14SErik Nordmark 	return (NULL);
620de8c4a14SErik Nordmark }
621de8c4a14SErik Nordmark /*
622de8c4a14SErik Nordmark  * Get the label for a message. Uses the first mblk in the message
623de8c4a14SErik Nordmark  * which has a non-NULL db_credp.
624de8c4a14SErik Nordmark  * Returns NULL if there is no credp.
625de8c4a14SErik Nordmark  */
626de8c4a14SErik Nordmark extern struct ts_label_s *
msg_getlabel(const mblk_t * mp)627de8c4a14SErik Nordmark msg_getlabel(const mblk_t *mp)
628de8c4a14SErik Nordmark {
629de8c4a14SErik Nordmark 	cred_t *cr = msg_getcred(mp, NULL);
630de8c4a14SErik Nordmark 
631de8c4a14SErik Nordmark 	if (cr == NULL)
632de8c4a14SErik Nordmark 		return (NULL);
633de8c4a14SErik Nordmark 
634de8c4a14SErik Nordmark 	return (crgetlabel(cr));
635de8c4a14SErik Nordmark }
636de8c4a14SErik Nordmark 
6377c478bd9Sstevel@tonic-gate void
freeb(mblk_t * mp)6387c478bd9Sstevel@tonic-gate freeb(mblk_t *mp)
6397c478bd9Sstevel@tonic-gate {
6407c478bd9Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
6417c478bd9Sstevel@tonic-gate 
6427c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_ref > 0);
6437c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
6447c478bd9Sstevel@tonic-gate 	FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp);
6457c478bd9Sstevel@tonic-gate 
6467c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
6477c478bd9Sstevel@tonic-gate 
6487c478bd9Sstevel@tonic-gate 	dbp->db_free(mp, dbp);
6497c478bd9Sstevel@tonic-gate }
6507c478bd9Sstevel@tonic-gate 
6517c478bd9Sstevel@tonic-gate void
freemsg(mblk_t * mp)6527c478bd9Sstevel@tonic-gate freemsg(mblk_t *mp)
6537c478bd9Sstevel@tonic-gate {
6547c478bd9Sstevel@tonic-gate 	FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
6557c478bd9Sstevel@tonic-gate 	while (mp) {
6567c478bd9Sstevel@tonic-gate 		dblk_t *dbp = mp->b_datap;
6577c478bd9Sstevel@tonic-gate 		mblk_t *mp_cont = mp->b_cont;
6587c478bd9Sstevel@tonic-gate 
6597c478bd9Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
6607c478bd9Sstevel@tonic-gate 		ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
6617c478bd9Sstevel@tonic-gate 
6627c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
6637c478bd9Sstevel@tonic-gate 
6647c478bd9Sstevel@tonic-gate 		dbp->db_free(mp, dbp);
6657c478bd9Sstevel@tonic-gate 		mp = mp_cont;
6667c478bd9Sstevel@tonic-gate 	}
6677c478bd9Sstevel@tonic-gate }
6687c478bd9Sstevel@tonic-gate 
6697c478bd9Sstevel@tonic-gate /*
6707c478bd9Sstevel@tonic-gate  * Reallocate a block for another use.  Try hard to use the old block.
6717c478bd9Sstevel@tonic-gate  * If the old data is wanted (copy), leave b_wptr at the end of the data,
6727c478bd9Sstevel@tonic-gate  * otherwise return b_wptr = b_rptr.
6737c478bd9Sstevel@tonic-gate  *
6747c478bd9Sstevel@tonic-gate  * This routine is private and unstable.
6757c478bd9Sstevel@tonic-gate  */
6767c478bd9Sstevel@tonic-gate mblk_t	*
reallocb(mblk_t * mp,size_t size,uint_t copy)6777c478bd9Sstevel@tonic-gate reallocb(mblk_t *mp, size_t size, uint_t copy)
6787c478bd9Sstevel@tonic-gate {
6797c478bd9Sstevel@tonic-gate 	mblk_t		*mp1;
6807c478bd9Sstevel@tonic-gate 	unsigned char	*old_rptr;
6817c478bd9Sstevel@tonic-gate 	ptrdiff_t	cur_size;
6827c478bd9Sstevel@tonic-gate 
6837c478bd9Sstevel@tonic-gate 	if (mp == NULL)
6847c478bd9Sstevel@tonic-gate 		return (allocb(size, BPRI_HI));
6857c478bd9Sstevel@tonic-gate 
6867c478bd9Sstevel@tonic-gate 	cur_size = mp->b_wptr - mp->b_rptr;
6877c478bd9Sstevel@tonic-gate 	old_rptr = mp->b_rptr;
6887c478bd9Sstevel@tonic-gate 
6897c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref != 0);
6907c478bd9Sstevel@tonic-gate 
6917c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) {
6927c478bd9Sstevel@tonic-gate 		/*
6937c478bd9Sstevel@tonic-gate 		 * If the data is wanted and it will fit where it is, no
6947c478bd9Sstevel@tonic-gate 		 * work is required.
6957c478bd9Sstevel@tonic-gate 		 */
6967c478bd9Sstevel@tonic-gate 		if (copy && mp->b_datap->db_lim - mp->b_rptr >= size)
6977c478bd9Sstevel@tonic-gate 			return (mp);
6987c478bd9Sstevel@tonic-gate 
6997c478bd9Sstevel@tonic-gate 		mp->b_wptr = mp->b_rptr = mp->b_datap->db_base;
7007c478bd9Sstevel@tonic-gate 		mp1 = mp;
7017c478bd9Sstevel@tonic-gate 	} else if ((mp1 = allocb_tmpl(size, mp)) != NULL) {
7027c478bd9Sstevel@tonic-gate 		/* XXX other mp state could be copied too, db_flags ... ? */
7037c478bd9Sstevel@tonic-gate 		mp1->b_cont = mp->b_cont;
7047c478bd9Sstevel@tonic-gate 	} else {
7057c478bd9Sstevel@tonic-gate 		return (NULL);
7067c478bd9Sstevel@tonic-gate 	}
7077c478bd9Sstevel@tonic-gate 
7087c478bd9Sstevel@tonic-gate 	if (copy) {
7097c478bd9Sstevel@tonic-gate 		bcopy(old_rptr, mp1->b_rptr, cur_size);
7107c478bd9Sstevel@tonic-gate 		mp1->b_wptr = mp1->b_rptr + cur_size;
7117c478bd9Sstevel@tonic-gate 	}
7127c478bd9Sstevel@tonic-gate 
7137c478bd9Sstevel@tonic-gate 	if (mp != mp1)
7147c478bd9Sstevel@tonic-gate 		freeb(mp);
7157c478bd9Sstevel@tonic-gate 
7167c478bd9Sstevel@tonic-gate 	return (mp1);
7177c478bd9Sstevel@tonic-gate }
7187c478bd9Sstevel@tonic-gate 
7197c478bd9Sstevel@tonic-gate static void
dblk_lastfree(mblk_t * mp,dblk_t * dbp)7207c478bd9Sstevel@tonic-gate dblk_lastfree(mblk_t *mp, dblk_t *dbp)
7217c478bd9Sstevel@tonic-gate {
7227c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
7237c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
7247c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
7257c478bd9Sstevel@tonic-gate 
7267c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
7277c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
7287c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
7297c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
7307c478bd9Sstevel@tonic-gate 	}
7317c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
7327c478bd9Sstevel@tonic-gate 
7337c478bd9Sstevel@tonic-gate 	/* Reset the struioflag and the checksum flag fields */
7347c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
7357c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
7367c478bd9Sstevel@tonic-gate 
73717169044Sbrutus 	/* and the COOKED and/or UIOA flag(s) */
73817169044Sbrutus 	dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA);
739c28749e9Skais 
7407c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
7417c478bd9Sstevel@tonic-gate }
7427c478bd9Sstevel@tonic-gate 
7437c478bd9Sstevel@tonic-gate static void
dblk_decref(mblk_t * mp,dblk_t * dbp)7447c478bd9Sstevel@tonic-gate dblk_decref(mblk_t *mp, dblk_t *dbp)
7457c478bd9Sstevel@tonic-gate {
7467c478bd9Sstevel@tonic-gate 	if (dbp->db_ref != 1) {
7477c478bd9Sstevel@tonic-gate 		uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp),
7487c478bd9Sstevel@tonic-gate 		    -(1 << DBLK_RTFU_SHIFT(db_ref)));
7497c478bd9Sstevel@tonic-gate 		/*
7507c478bd9Sstevel@tonic-gate 		 * atomic_add_32_nv() just decremented db_ref, so we no longer
7517c478bd9Sstevel@tonic-gate 		 * have a reference to the dblk, which means another thread
7527c478bd9Sstevel@tonic-gate 		 * could free it.  Therefore we cannot examine the dblk to
7537c478bd9Sstevel@tonic-gate 		 * determine whether ours was the last reference.  Instead,
7547c478bd9Sstevel@tonic-gate 		 * we extract the new and minimum reference counts from rtfu.
7557c478bd9Sstevel@tonic-gate 		 * Note that all we're really saying is "if (ref != refmin)".
7567c478bd9Sstevel@tonic-gate 		 */
7577c478bd9Sstevel@tonic-gate 		if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) !=
7587c478bd9Sstevel@tonic-gate 		    ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) {
7597c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, mp);
7607c478bd9Sstevel@tonic-gate 			return;
7617c478bd9Sstevel@tonic-gate 		}
7627c478bd9Sstevel@tonic-gate 	}
7637c478bd9Sstevel@tonic-gate 	dbp->db_mblk = mp;
7647c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree;
7657c478bd9Sstevel@tonic-gate 	dbp->db_lastfree(mp, dbp);
7667c478bd9Sstevel@tonic-gate }
7677c478bd9Sstevel@tonic-gate 
7687c478bd9Sstevel@tonic-gate mblk_t *
dupb(mblk_t * mp)7697c478bd9Sstevel@tonic-gate dupb(mblk_t *mp)
7707c478bd9Sstevel@tonic-gate {
7717c478bd9Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
7727c478bd9Sstevel@tonic-gate 	mblk_t *new_mp;
7737c478bd9Sstevel@tonic-gate 	uint32_t oldrtfu, newrtfu;
7747c478bd9Sstevel@tonic-gate 
7757c478bd9Sstevel@tonic-gate 	if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL)
7767c478bd9Sstevel@tonic-gate 		goto out;
7777c478bd9Sstevel@tonic-gate 
7787c478bd9Sstevel@tonic-gate 	new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL;
7797c478bd9Sstevel@tonic-gate 	new_mp->b_rptr = mp->b_rptr;
7807c478bd9Sstevel@tonic-gate 	new_mp->b_wptr = mp->b_wptr;
7817c478bd9Sstevel@tonic-gate 	new_mp->b_datap = dbp;
7827c478bd9Sstevel@tonic-gate 	new_mp->b_queue = NULL;
7837c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp);
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref);
7867c478bd9Sstevel@tonic-gate 
7877c478bd9Sstevel@tonic-gate 	dbp->db_free = dblk_decref;
7887c478bd9Sstevel@tonic-gate 	do {
7897c478bd9Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
7907c478bd9Sstevel@tonic-gate 		oldrtfu = DBLK_RTFU_WORD(dbp);
7917c478bd9Sstevel@tonic-gate 		newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref));
7927c478bd9Sstevel@tonic-gate 		/*
7937c478bd9Sstevel@tonic-gate 		 * If db_ref is maxed out we can't dup this message anymore.
7947c478bd9Sstevel@tonic-gate 		 */
7957c478bd9Sstevel@tonic-gate 		if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) {
7967c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, new_mp);
7977c478bd9Sstevel@tonic-gate 			new_mp = NULL;
7987c478bd9Sstevel@tonic-gate 			goto out;
7997c478bd9Sstevel@tonic-gate 		}
80075d94465SJosef 'Jeff' Sipek 	} while (atomic_cas_32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) !=
80175d94465SJosef 'Jeff' Sipek 	    oldrtfu);
8027c478bd9Sstevel@tonic-gate 
8037c478bd9Sstevel@tonic-gate out:
8047c478bd9Sstevel@tonic-gate 	FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp);
8057c478bd9Sstevel@tonic-gate 	return (new_mp);
8067c478bd9Sstevel@tonic-gate }
8077c478bd9Sstevel@tonic-gate 
8087c478bd9Sstevel@tonic-gate static void
dblk_lastfree_desb(mblk_t * mp,dblk_t * dbp)8097c478bd9Sstevel@tonic-gate dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp)
8107c478bd9Sstevel@tonic-gate {
8117c478bd9Sstevel@tonic-gate 	frtn_t *frp = dbp->db_frtnp;
8127c478bd9Sstevel@tonic-gate 
8137c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
8147c478bd9Sstevel@tonic-gate 	frp->free_func(frp->free_arg);
8157c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
8167c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
8177c478bd9Sstevel@tonic-gate 
8187c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
8197c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
8207c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
8217c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
8227c478bd9Sstevel@tonic-gate 	}
8237c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
8247c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
8257c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
8267c478bd9Sstevel@tonic-gate 
8277c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
8287c478bd9Sstevel@tonic-gate }
8297c478bd9Sstevel@tonic-gate 
8307c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8317c478bd9Sstevel@tonic-gate static void
frnop_func(void * arg)8327c478bd9Sstevel@tonic-gate frnop_func(void *arg)
8337c478bd9Sstevel@tonic-gate {
8347c478bd9Sstevel@tonic-gate }
8357c478bd9Sstevel@tonic-gate 
8367c478bd9Sstevel@tonic-gate /*
8377c478bd9Sstevel@tonic-gate  * Generic esballoc used to implement the four flavors: [d]esballoc[a].
8382ab478d4SGordon Ross  *
8392ab478d4SGordon Ross  * The variants with a 'd' prefix (desballoc, desballoca)
8402ab478d4SGordon Ross  *	directly free the mblk when it loses its last ref,
8412ab478d4SGordon Ross  *	where the other variants free asynchronously.
8422ab478d4SGordon Ross  * The variants with an 'a' suffix (esballoca, desballoca)
8432ab478d4SGordon Ross  *	add an extra ref, effectively letting the streams subsystem
8442ab478d4SGordon Ross  *	know that the message data should not be modified.
8452ab478d4SGordon Ross  *	(eg. see db_ref checks in reallocb and elsewhere)
8462ab478d4SGordon Ross  *
8472ab478d4SGordon Ross  * The method used by the 'a' suffix functions to keep the dblk
8482ab478d4SGordon Ross  * db_ref > 1 is non-obvious.  The macro DBLK_RTFU(2,...) passed to
8492ab478d4SGordon Ross  * gesballoc sets the initial db_ref = 2 and sets the DBLK_REFMIN
8502ab478d4SGordon Ross  * bit in db_flags.  In dblk_decref() that flag essentially means
8512ab478d4SGordon Ross  * the dblk has one extra ref, so the "last ref" is one, not zero.
8527c478bd9Sstevel@tonic-gate  */
8537c478bd9Sstevel@tonic-gate static mblk_t *
gesballoc(unsigned char * base,size_t size,uint32_t db_rtfu,frtn_t * frp,void (* lastfree)(mblk_t *,dblk_t *),int kmflags)8547c478bd9Sstevel@tonic-gate gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
8557c478bd9Sstevel@tonic-gate     void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
8567c478bd9Sstevel@tonic-gate {
8577c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
8587c478bd9Sstevel@tonic-gate 	mblk_t *mp;
8597c478bd9Sstevel@tonic-gate 
8607c478bd9Sstevel@tonic-gate 	ASSERT(base != NULL && frp != NULL);
8617c478bd9Sstevel@tonic-gate 
8627c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) {
8637c478bd9Sstevel@tonic-gate 		mp = NULL;
8647c478bd9Sstevel@tonic-gate 		goto out;
8657c478bd9Sstevel@tonic-gate 	}
8667c478bd9Sstevel@tonic-gate 
8677c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
8687c478bd9Sstevel@tonic-gate 	dbp->db_base = base;
8697c478bd9Sstevel@tonic-gate 	dbp->db_lim = base + size;
8707c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = lastfree;
8717c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = frp;
8727c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = db_rtfu;
8737c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
8747c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = base;
8757c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
8767c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
8777c478bd9Sstevel@tonic-gate 
8787c478bd9Sstevel@tonic-gate out:
8797c478bd9Sstevel@tonic-gate 	FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp);
8807c478bd9Sstevel@tonic-gate 	return (mp);
8817c478bd9Sstevel@tonic-gate }
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8847c478bd9Sstevel@tonic-gate mblk_t *
esballoc(unsigned char * base,size_t size,uint_t pri,frtn_t * frp)8857c478bd9Sstevel@tonic-gate esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
8867c478bd9Sstevel@tonic-gate {
8877c478bd9Sstevel@tonic-gate 	mblk_t *mp;
8887c478bd9Sstevel@tonic-gate 
8897c478bd9Sstevel@tonic-gate 	/*
8907c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
8917c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
8927c478bd9Sstevel@tonic-gate 	 * call optimization.
8937c478bd9Sstevel@tonic-gate 	 */
8947c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
8957c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
8967c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
8977c478bd9Sstevel@tonic-gate 
8987c478bd9Sstevel@tonic-gate 		if (mp != NULL)
8997c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
9007c478bd9Sstevel@tonic-gate 		return (mp);
9017c478bd9Sstevel@tonic-gate 	}
9027c478bd9Sstevel@tonic-gate 
9037c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
9047c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
9057c478bd9Sstevel@tonic-gate }
9067c478bd9Sstevel@tonic-gate 
9077c478bd9Sstevel@tonic-gate /*
9087c478bd9Sstevel@tonic-gate  * Same as esballoc() but sleeps waiting for memory.
9097c478bd9Sstevel@tonic-gate  */
9107c478bd9Sstevel@tonic-gate /*ARGSUSED*/
9117c478bd9Sstevel@tonic-gate mblk_t *
esballoc_wait(unsigned char * base,size_t size,uint_t pri,frtn_t * frp)9127c478bd9Sstevel@tonic-gate esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
9137c478bd9Sstevel@tonic-gate {
9147c478bd9Sstevel@tonic-gate 	mblk_t *mp;
9157c478bd9Sstevel@tonic-gate 
9167c478bd9Sstevel@tonic-gate 	/*
9177c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
9187c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
9197c478bd9Sstevel@tonic-gate 	 * call optimization.
9207c478bd9Sstevel@tonic-gate 	 */
9217c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
9227c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
9237c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_SLEEP);
9247c478bd9Sstevel@tonic-gate 
9257c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
9267c478bd9Sstevel@tonic-gate 		return (mp);
9277c478bd9Sstevel@tonic-gate 	}
9287c478bd9Sstevel@tonic-gate 
9297c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
9307c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_SLEEP));
9317c478bd9Sstevel@tonic-gate }
9327c478bd9Sstevel@tonic-gate 
9337c478bd9Sstevel@tonic-gate /*ARGSUSED*/
9347c478bd9Sstevel@tonic-gate mblk_t *
desballoc(unsigned char * base,size_t size,uint_t pri,frtn_t * frp)9357c478bd9Sstevel@tonic-gate desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
9367c478bd9Sstevel@tonic-gate {
9377c478bd9Sstevel@tonic-gate 	mblk_t *mp;
9387c478bd9Sstevel@tonic-gate 
9397c478bd9Sstevel@tonic-gate 	/*
9407c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
9417c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
9427c478bd9Sstevel@tonic-gate 	 * call optimization.
9437c478bd9Sstevel@tonic-gate 	 */
9447c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
9457c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
9467c478bd9Sstevel@tonic-gate 		    frp, dblk_lastfree_desb, KM_NOSLEEP);
9477c478bd9Sstevel@tonic-gate 
9487c478bd9Sstevel@tonic-gate 		if (mp != NULL)
9497c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size);
9507c478bd9Sstevel@tonic-gate 		return (mp);
9517c478bd9Sstevel@tonic-gate 	}
9527c478bd9Sstevel@tonic-gate 
9537c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
9547c478bd9Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
9557c478bd9Sstevel@tonic-gate }
9567c478bd9Sstevel@tonic-gate 
9577c478bd9Sstevel@tonic-gate /*ARGSUSED*/
9587c478bd9Sstevel@tonic-gate mblk_t *
esballoca(unsigned char * base,size_t size,uint_t pri,frtn_t * frp)9597c478bd9Sstevel@tonic-gate esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
9607c478bd9Sstevel@tonic-gate {
9617c478bd9Sstevel@tonic-gate 	mblk_t *mp;
9627c478bd9Sstevel@tonic-gate 
9637c478bd9Sstevel@tonic-gate 	/*
9647c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
9657c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
9667c478bd9Sstevel@tonic-gate 	 * call optimization.
9677c478bd9Sstevel@tonic-gate 	 */
9687c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
9697c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
9707c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
9717c478bd9Sstevel@tonic-gate 
9727c478bd9Sstevel@tonic-gate 		if (mp != NULL)
9737c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size);
9747c478bd9Sstevel@tonic-gate 		return (mp);
9757c478bd9Sstevel@tonic-gate 	}
9767c478bd9Sstevel@tonic-gate 
9777c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
9787c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
9797c478bd9Sstevel@tonic-gate }
9807c478bd9Sstevel@tonic-gate 
981f82c7503SGordon Ross /*
982f82c7503SGordon Ross  * Same as esballoca() but sleeps waiting for memory.
983f82c7503SGordon Ross  */
984f82c7503SGordon Ross mblk_t *
esballoca_wait(unsigned char * base,size_t size,uint_t pri,frtn_t * frp)985f82c7503SGordon Ross esballoca_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
986f82c7503SGordon Ross {
987f82c7503SGordon Ross 	mblk_t *mp;
988f82c7503SGordon Ross 
989f82c7503SGordon Ross 	/*
990f82c7503SGordon Ross 	 * Note that this is structured to allow the common case (i.e.
991f82c7503SGordon Ross 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
992f82c7503SGordon Ross 	 * call optimization.
993f82c7503SGordon Ross 	 */
994f82c7503SGordon Ross 	if (!str_ftnever) {
995f82c7503SGordon Ross 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
996f82c7503SGordon Ross 		    frp, freebs_enqueue, KM_SLEEP);
997f82c7503SGordon Ross 
998f82c7503SGordon Ross 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size);
999f82c7503SGordon Ross 		return (mp);
1000f82c7503SGordon Ross 	}
1001f82c7503SGordon Ross 
1002f82c7503SGordon Ross 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
1003f82c7503SGordon Ross 	    frp, freebs_enqueue, KM_SLEEP));
1004f82c7503SGordon Ross }
1005f82c7503SGordon Ross 
10067c478bd9Sstevel@tonic-gate /*ARGSUSED*/
10077c478bd9Sstevel@tonic-gate mblk_t *
desballoca(unsigned char * base,size_t size,uint_t pri,frtn_t * frp)10087c478bd9Sstevel@tonic-gate desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
10097c478bd9Sstevel@tonic-gate {
10107c478bd9Sstevel@tonic-gate 	mblk_t *mp;
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate 	/*
10137c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
10147c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
10157c478bd9Sstevel@tonic-gate 	 * call optimization.
10167c478bd9Sstevel@tonic-gate 	 */
10177c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
10187c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
10197c478bd9Sstevel@tonic-gate 		    frp, dblk_lastfree_desb, KM_NOSLEEP);
10207c478bd9Sstevel@tonic-gate 
10217c478bd9Sstevel@tonic-gate 		if (mp != NULL)
10227c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size);
10237c478bd9Sstevel@tonic-gate 		return (mp);
10247c478bd9Sstevel@tonic-gate 	}
10257c478bd9Sstevel@tonic-gate 
10267c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
10277c478bd9Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
10287c478bd9Sstevel@tonic-gate }
10297c478bd9Sstevel@tonic-gate 
10307c478bd9Sstevel@tonic-gate static void
bcache_dblk_lastfree(mblk_t * mp,dblk_t * dbp)10317c478bd9Sstevel@tonic-gate bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp)
10327c478bd9Sstevel@tonic-gate {
10337c478bd9Sstevel@tonic-gate 	bcache_t *bcp = dbp->db_cache;
10347c478bd9Sstevel@tonic-gate 
10357c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
10367c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
10377c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
10387c478bd9Sstevel@tonic-gate 
10397c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
10407c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
10417c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
10427c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
10437c478bd9Sstevel@tonic-gate 	}
10447c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
10457c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
10467c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
10477c478bd9Sstevel@tonic-gate 
10487c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
10497c478bd9Sstevel@tonic-gate 	kmem_cache_free(bcp->dblk_cache, dbp);
10507c478bd9Sstevel@tonic-gate 	bcp->alloc--;
10517c478bd9Sstevel@tonic-gate 
10527c478bd9Sstevel@tonic-gate 	if (bcp->alloc == 0 && bcp->destroy != 0) {
10537c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
10547c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
10557c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
10567c478bd9Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
10577c478bd9Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
10587c478bd9Sstevel@tonic-gate 	} else {
10597c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
10607c478bd9Sstevel@tonic-gate 	}
10617c478bd9Sstevel@tonic-gate }
10627c478bd9Sstevel@tonic-gate 
10637c478bd9Sstevel@tonic-gate bcache_t *
bcache_create(char * name,size_t size,uint_t align)10647c478bd9Sstevel@tonic-gate bcache_create(char *name, size_t size, uint_t align)
10657c478bd9Sstevel@tonic-gate {
10667c478bd9Sstevel@tonic-gate 	bcache_t *bcp;
10677c478bd9Sstevel@tonic-gate 	char buffer[255];
10687c478bd9Sstevel@tonic-gate 
10697c478bd9Sstevel@tonic-gate 	ASSERT((align & (align - 1)) == 0);
10707c478bd9Sstevel@tonic-gate 
1071a45f3f93Smeem 	if ((bcp = kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) == NULL)
10727c478bd9Sstevel@tonic-gate 		return (NULL);
10737c478bd9Sstevel@tonic-gate 
10747c478bd9Sstevel@tonic-gate 	bcp->size = size;
10757c478bd9Sstevel@tonic-gate 	bcp->align = align;
10767c478bd9Sstevel@tonic-gate 	bcp->alloc = 0;
10777c478bd9Sstevel@tonic-gate 	bcp->destroy = 0;
10787c478bd9Sstevel@tonic-gate 
10797c478bd9Sstevel@tonic-gate 	mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL);
10807c478bd9Sstevel@tonic-gate 
10817c478bd9Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_buffer_cache", name);
10827c478bd9Sstevel@tonic-gate 	bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL,
10837c478bd9Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
10847c478bd9Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_dblk_cache", name);
10857c478bd9Sstevel@tonic-gate 	bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
10867c478bd9Sstevel@tonic-gate 	    DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
10877c478bd9Sstevel@tonic-gate 	    NULL, (void *)bcp, NULL, 0);
10887c478bd9Sstevel@tonic-gate 
10897c478bd9Sstevel@tonic-gate 	return (bcp);
10907c478bd9Sstevel@tonic-gate }
10917c478bd9Sstevel@tonic-gate 
10927c478bd9Sstevel@tonic-gate void
bcache_destroy(bcache_t * bcp)10937c478bd9Sstevel@tonic-gate bcache_destroy(bcache_t *bcp)
10947c478bd9Sstevel@tonic-gate {
10957c478bd9Sstevel@tonic-gate 	ASSERT(bcp != NULL);
10967c478bd9Sstevel@tonic-gate 
10977c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
10987c478bd9Sstevel@tonic-gate 	if (bcp->alloc == 0) {
10997c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
11007c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
11017c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
11027c478bd9Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
11037c478bd9Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
11047c478bd9Sstevel@tonic-gate 	} else {
11057c478bd9Sstevel@tonic-gate 		bcp->destroy++;
11067c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
11077c478bd9Sstevel@tonic-gate 	}
11087c478bd9Sstevel@tonic-gate }
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate /*ARGSUSED*/
11117c478bd9Sstevel@tonic-gate mblk_t *
bcache_allocb(bcache_t * bcp,uint_t pri)11127c478bd9Sstevel@tonic-gate bcache_allocb(bcache_t *bcp, uint_t pri)
11137c478bd9Sstevel@tonic-gate {
11147c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
11157c478bd9Sstevel@tonic-gate 	mblk_t *mp = NULL;
11167c478bd9Sstevel@tonic-gate 
11177c478bd9Sstevel@tonic-gate 	ASSERT(bcp != NULL);
11187c478bd9Sstevel@tonic-gate 
11197c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
11207c478bd9Sstevel@tonic-gate 	if (bcp->destroy != 0) {
11217c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
11227c478bd9Sstevel@tonic-gate 		goto out;
11237c478bd9Sstevel@tonic-gate 	}
11247c478bd9Sstevel@tonic-gate 
11257c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) {
11267c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
11277c478bd9Sstevel@tonic-gate 		goto out;
11287c478bd9Sstevel@tonic-gate 	}
11297c478bd9Sstevel@tonic-gate 	bcp->alloc++;
11307c478bd9Sstevel@tonic-gate 	mutex_exit(&bcp->mutex);
11317c478bd9Sstevel@tonic-gate 
11327c478bd9Sstevel@tonic-gate 	ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0);
11337c478bd9Sstevel@tonic-gate 
11347c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
11357c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
11367c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
11377c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
11387c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
11397c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
11407c478bd9Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size);
11417c478bd9Sstevel@tonic-gate out:
11427c478bd9Sstevel@tonic-gate 	FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp);
11437c478bd9Sstevel@tonic-gate 
11447c478bd9Sstevel@tonic-gate 	return (mp);
11457c478bd9Sstevel@tonic-gate }
11467c478bd9Sstevel@tonic-gate 
11477c478bd9Sstevel@tonic-gate static void
dblk_lastfree_oversize(mblk_t * mp,dblk_t * dbp)11487c478bd9Sstevel@tonic-gate dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp)
11497c478bd9Sstevel@tonic-gate {
11507c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
11517c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
11527c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
11537c478bd9Sstevel@tonic-gate 
11547c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
11557c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
11567c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
11577c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
11587c478bd9Sstevel@tonic-gate 	}
11597c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
11607c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
11617c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
11627c478bd9Sstevel@tonic-gate 
11637c478bd9Sstevel@tonic-gate 	kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base);
11647c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
11657c478bd9Sstevel@tonic-gate }
11667c478bd9Sstevel@tonic-gate 
11677c478bd9Sstevel@tonic-gate static mblk_t *
allocb_oversize(size_t size,int kmflags)11687c478bd9Sstevel@tonic-gate allocb_oversize(size_t size, int kmflags)
11697c478bd9Sstevel@tonic-gate {
11707c478bd9Sstevel@tonic-gate 	mblk_t *mp;
11717c478bd9Sstevel@tonic-gate 	void *buf;
11727c478bd9Sstevel@tonic-gate 
11737c478bd9Sstevel@tonic-gate 	size = P2ROUNDUP(size, DBLK_CACHE_ALIGN);
11747c478bd9Sstevel@tonic-gate 	if ((buf = kmem_alloc(size, kmflags)) == NULL)
11757c478bd9Sstevel@tonic-gate 		return (NULL);
11767c478bd9Sstevel@tonic-gate 	if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0),
11777c478bd9Sstevel@tonic-gate 	    &frnop, dblk_lastfree_oversize, kmflags)) == NULL)
11787c478bd9Sstevel@tonic-gate 		kmem_free(buf, size);
11797c478bd9Sstevel@tonic-gate 
11807c478bd9Sstevel@tonic-gate 	if (mp != NULL)
11817c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size);
11827c478bd9Sstevel@tonic-gate 
11837c478bd9Sstevel@tonic-gate 	return (mp);
11847c478bd9Sstevel@tonic-gate }
11857c478bd9Sstevel@tonic-gate 
11867c478bd9Sstevel@tonic-gate mblk_t *
allocb_tryhard(size_t target_size)11877c478bd9Sstevel@tonic-gate allocb_tryhard(size_t target_size)
11887c478bd9Sstevel@tonic-gate {
11897c478bd9Sstevel@tonic-gate 	size_t size;
11907c478bd9Sstevel@tonic-gate 	mblk_t *bp;
11917c478bd9Sstevel@tonic-gate 
11927c478bd9Sstevel@tonic-gate 	for (size = target_size; size < target_size + 512;
11937c478bd9Sstevel@tonic-gate 	    size += DBLK_CACHE_ALIGN)
11947c478bd9Sstevel@tonic-gate 		if ((bp = allocb(size, BPRI_HI)) != NULL)
11957c478bd9Sstevel@tonic-gate 			return (bp);
11967c478bd9Sstevel@tonic-gate 	allocb_tryhard_fails++;
11977c478bd9Sstevel@tonic-gate 	return (NULL);
11987c478bd9Sstevel@tonic-gate }
11997c478bd9Sstevel@tonic-gate 
12007c478bd9Sstevel@tonic-gate /*
12017c478bd9Sstevel@tonic-gate  * This routine is consolidation private for STREAMS internal use
12027c478bd9Sstevel@tonic-gate  * This routine may only be called from sync routines (i.e., not
12037c478bd9Sstevel@tonic-gate  * from put or service procedures).  It is located here (rather
12047c478bd9Sstevel@tonic-gate  * than strsubr.c) so that we don't have to expose all of the
12057c478bd9Sstevel@tonic-gate  * allocb() implementation details in header files.
12067c478bd9Sstevel@tonic-gate  */
12077c478bd9Sstevel@tonic-gate mblk_t *
allocb_wait(size_t size,uint_t pri,uint_t flags,int * error)12087c478bd9Sstevel@tonic-gate allocb_wait(size_t size, uint_t pri, uint_t flags, int *error)
12097c478bd9Sstevel@tonic-gate {
12107c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
12117c478bd9Sstevel@tonic-gate 	mblk_t *mp;
12127c478bd9Sstevel@tonic-gate 	size_t index;
12137c478bd9Sstevel@tonic-gate 
12147c478bd9Sstevel@tonic-gate 	index = (size -1) >> DBLK_SIZE_SHIFT;
12157c478bd9Sstevel@tonic-gate 
12167c478bd9Sstevel@tonic-gate 	if (flags & STR_NOSIG) {
12177c478bd9Sstevel@tonic-gate 		if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
12187c478bd9Sstevel@tonic-gate 			if (size != 0) {
12197c478bd9Sstevel@tonic-gate 				mp = allocb_oversize(size, KM_SLEEP);
12207c478bd9Sstevel@tonic-gate 				FTRACE_1("allocb_wait (NOSIG): mp=0x%lx",
12217c478bd9Sstevel@tonic-gate 				    (uintptr_t)mp);
12227c478bd9Sstevel@tonic-gate 				return (mp);
12237c478bd9Sstevel@tonic-gate 			}
12247c478bd9Sstevel@tonic-gate 			index = 0;
12257c478bd9Sstevel@tonic-gate 		}
12267c478bd9Sstevel@tonic-gate 
12277c478bd9Sstevel@tonic-gate 		dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP);
12287c478bd9Sstevel@tonic-gate 		mp = dbp->db_mblk;
12297c478bd9Sstevel@tonic-gate 		DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
12307c478bd9Sstevel@tonic-gate 		mp->b_next = mp->b_prev = mp->b_cont = NULL;
12317c478bd9Sstevel@tonic-gate 		mp->b_rptr = mp->b_wptr = dbp->db_base;
12327c478bd9Sstevel@tonic-gate 		mp->b_queue = NULL;
12337c478bd9Sstevel@tonic-gate 		MBLK_BAND_FLAG_WORD(mp) = 0;
12347c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size);
12357c478bd9Sstevel@tonic-gate 
12367c478bd9Sstevel@tonic-gate 		FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp);
12377c478bd9Sstevel@tonic-gate 
12387c478bd9Sstevel@tonic-gate 	} else {
12397c478bd9Sstevel@tonic-gate 		while ((mp = allocb(size, pri)) == NULL) {
12407c478bd9Sstevel@tonic-gate 			if ((*error = strwaitbuf(size, BPRI_HI)) != 0)
12417c478bd9Sstevel@tonic-gate 				return (NULL);
12427c478bd9Sstevel@tonic-gate 		}
12437c478bd9Sstevel@tonic-gate 	}
12447c478bd9Sstevel@tonic-gate 
12457c478bd9Sstevel@tonic-gate 	return (mp);
12467c478bd9Sstevel@tonic-gate }
12477c478bd9Sstevel@tonic-gate 
12487c478bd9Sstevel@tonic-gate /*
12497c478bd9Sstevel@tonic-gate  * Call function 'func' with 'arg' when a class zero block can
12507c478bd9Sstevel@tonic-gate  * be allocated with priority 'pri'.
12517c478bd9Sstevel@tonic-gate  */
12527c478bd9Sstevel@tonic-gate bufcall_id_t
esbbcall(uint_t pri,void (* func)(void *),void * arg)12537c478bd9Sstevel@tonic-gate esbbcall(uint_t pri, void (*func)(void *), void *arg)
12547c478bd9Sstevel@tonic-gate {
12557c478bd9Sstevel@tonic-gate 	return (bufcall(1, pri, func, arg));
12567c478bd9Sstevel@tonic-gate }
12577c478bd9Sstevel@tonic-gate 
12587c478bd9Sstevel@tonic-gate /*
12597c478bd9Sstevel@tonic-gate  * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials
12607c478bd9Sstevel@tonic-gate  * ioc_id, rval and error of the struct ioctl to set up an ioctl call.
12617c478bd9Sstevel@tonic-gate  * This provides consistency for all internal allocators of ioctl.
12627c478bd9Sstevel@tonic-gate  */
12637c478bd9Sstevel@tonic-gate mblk_t *
mkiocb(uint_t cmd)12647c478bd9Sstevel@tonic-gate mkiocb(uint_t cmd)
12657c478bd9Sstevel@tonic-gate {
12667c478bd9Sstevel@tonic-gate 	struct iocblk	*ioc;
12677c478bd9Sstevel@tonic-gate 	mblk_t		*mp;
12687c478bd9Sstevel@tonic-gate 
12697c478bd9Sstevel@tonic-gate 	/*
12707c478bd9Sstevel@tonic-gate 	 * Allocate enough space for any of the ioctl related messages.
12717c478bd9Sstevel@tonic-gate 	 */
12727c478bd9Sstevel@tonic-gate 	if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL)
12737c478bd9Sstevel@tonic-gate 		return (NULL);
12747c478bd9Sstevel@tonic-gate 
12757c478bd9Sstevel@tonic-gate 	bzero(mp->b_rptr, sizeof (union ioctypes));
12767c478bd9Sstevel@tonic-gate 
12777c478bd9Sstevel@tonic-gate 	/*
12787c478bd9Sstevel@tonic-gate 	 * Set the mblk_t information and ptrs correctly.
12797c478bd9Sstevel@tonic-gate 	 */
12807c478bd9Sstevel@tonic-gate 	mp->b_wptr += sizeof (struct iocblk);
12817c478bd9Sstevel@tonic-gate 	mp->b_datap->db_type = M_IOCTL;
12827c478bd9Sstevel@tonic-gate 
12837c478bd9Sstevel@tonic-gate 	/*
12847c478bd9Sstevel@tonic-gate 	 * Fill in the fields.
12857c478bd9Sstevel@tonic-gate 	 */
12867c478bd9Sstevel@tonic-gate 	ioc		= (struct iocblk *)mp->b_rptr;
12877c478bd9Sstevel@tonic-gate 	ioc->ioc_cmd	= cmd;
12887c478bd9Sstevel@tonic-gate 	ioc->ioc_cr	= kcred;
12897c478bd9Sstevel@tonic-gate 	ioc->ioc_id	= getiocseqno();
12907c478bd9Sstevel@tonic-gate 	ioc->ioc_flag	= IOC_NATIVE;
12917c478bd9Sstevel@tonic-gate 	return (mp);
12927c478bd9Sstevel@tonic-gate }
12937c478bd9Sstevel@tonic-gate 
12947c478bd9Sstevel@tonic-gate /*
12957c478bd9Sstevel@tonic-gate  * test if block of given size can be allocated with a request of
12967c478bd9Sstevel@tonic-gate  * the given priority.
12977c478bd9Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
12987c478bd9Sstevel@tonic-gate  */
12997c478bd9Sstevel@tonic-gate /* ARGSUSED */
13007c478bd9Sstevel@tonic-gate int
testb(size_t size,uint_t pri)13017c478bd9Sstevel@tonic-gate testb(size_t size, uint_t pri)
13027c478bd9Sstevel@tonic-gate {
13037c478bd9Sstevel@tonic-gate 	return ((size + sizeof (dblk_t)) <= kmem_avail());
13047c478bd9Sstevel@tonic-gate }
13057c478bd9Sstevel@tonic-gate 
13067c478bd9Sstevel@tonic-gate /*
13077c478bd9Sstevel@tonic-gate  * Call function 'func' with argument 'arg' when there is a reasonably
13087c478bd9Sstevel@tonic-gate  * good chance that a block of size 'size' can be allocated.
13097c478bd9Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
13107c478bd9Sstevel@tonic-gate  */
13117c478bd9Sstevel@tonic-gate /* ARGSUSED */
13127c478bd9Sstevel@tonic-gate bufcall_id_t
bufcall(size_t size,uint_t pri,void (* func)(void *),void * arg)13137c478bd9Sstevel@tonic-gate bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg)
13147c478bd9Sstevel@tonic-gate {
13157c478bd9Sstevel@tonic-gate 	static long bid = 1;	/* always odd to save checking for zero */
13167c478bd9Sstevel@tonic-gate 	bufcall_id_t bc_id;
13177c478bd9Sstevel@tonic-gate 	struct strbufcall *bcp;
13187c478bd9Sstevel@tonic-gate 
13197c478bd9Sstevel@tonic-gate 	if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL)
13207c478bd9Sstevel@tonic-gate 		return (0);
13217c478bd9Sstevel@tonic-gate 
13227c478bd9Sstevel@tonic-gate 	bcp->bc_func = func;
13237c478bd9Sstevel@tonic-gate 	bcp->bc_arg = arg;
13247c478bd9Sstevel@tonic-gate 	bcp->bc_size = size;
13257c478bd9Sstevel@tonic-gate 	bcp->bc_next = NULL;
13267c478bd9Sstevel@tonic-gate 	bcp->bc_executor = NULL;
13277c478bd9Sstevel@tonic-gate 
13287c478bd9Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
13297c478bd9Sstevel@tonic-gate 	/*
13307c478bd9Sstevel@tonic-gate 	 * After bcp is linked into strbcalls and strbcall_lock is dropped there
13317c478bd9Sstevel@tonic-gate 	 * should be no references to bcp since it may be freed by
13327c478bd9Sstevel@tonic-gate 	 * runbufcalls(). Since bcp_id field is returned, we save its value in
13337c478bd9Sstevel@tonic-gate 	 * the local var.
13347c478bd9Sstevel@tonic-gate 	 */
13357c478bd9Sstevel@tonic-gate 	bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2);	/* keep it odd */
13367c478bd9Sstevel@tonic-gate 
13377c478bd9Sstevel@tonic-gate 	/*
13387c478bd9Sstevel@tonic-gate 	 * add newly allocated stream event to existing
13397c478bd9Sstevel@tonic-gate 	 * linked list of events.
13407c478bd9Sstevel@tonic-gate 	 */
13417c478bd9Sstevel@tonic-gate 	if (strbcalls.bc_head == NULL) {
13427c478bd9Sstevel@tonic-gate 		strbcalls.bc_head = strbcalls.bc_tail = bcp;
13437c478bd9Sstevel@tonic-gate 	} else {
13447c478bd9Sstevel@tonic-gate 		strbcalls.bc_tail->bc_next = bcp;
13457c478bd9Sstevel@tonic-gate 		strbcalls.bc_tail = bcp;
13467c478bd9Sstevel@tonic-gate 	}
13477c478bd9Sstevel@tonic-gate 
13487c478bd9Sstevel@tonic-gate 	cv_signal(&strbcall_cv);
13497c478bd9Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
13507c478bd9Sstevel@tonic-gate 	return (bc_id);
13517c478bd9Sstevel@tonic-gate }
13527c478bd9Sstevel@tonic-gate 
13537c478bd9Sstevel@tonic-gate /*
13547c478bd9Sstevel@tonic-gate  * Cancel a bufcall request.
13557c478bd9Sstevel@tonic-gate  */
13567c478bd9Sstevel@tonic-gate void
unbufcall(bufcall_id_t id)13577c478bd9Sstevel@tonic-gate unbufcall(bufcall_id_t id)
13587c478bd9Sstevel@tonic-gate {
13597c478bd9Sstevel@tonic-gate 	strbufcall_t *bcp, *pbcp;
13607c478bd9Sstevel@tonic-gate 
13617c478bd9Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
13627c478bd9Sstevel@tonic-gate again:
13637c478bd9Sstevel@tonic-gate 	pbcp = NULL;
13647c478bd9Sstevel@tonic-gate 	for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) {
13657c478bd9Sstevel@tonic-gate 		if (id == bcp->bc_id)
13667c478bd9Sstevel@tonic-gate 			break;
13677c478bd9Sstevel@tonic-gate 		pbcp = bcp;
13687c478bd9Sstevel@tonic-gate 	}
13697c478bd9Sstevel@tonic-gate 	if (bcp) {
13707c478bd9Sstevel@tonic-gate 		if (bcp->bc_executor != NULL) {
13717c478bd9Sstevel@tonic-gate 			if (bcp->bc_executor != curthread) {
13727c478bd9Sstevel@tonic-gate 				cv_wait(&bcall_cv, &strbcall_lock);
13737c478bd9Sstevel@tonic-gate 				goto again;
13747c478bd9Sstevel@tonic-gate 			}
13757c478bd9Sstevel@tonic-gate 		} else {
13767c478bd9Sstevel@tonic-gate 			if (pbcp)
13777c478bd9Sstevel@tonic-gate 				pbcp->bc_next = bcp->bc_next;
13787c478bd9Sstevel@tonic-gate 			else
13797c478bd9Sstevel@tonic-gate 				strbcalls.bc_head = bcp->bc_next;
13807c478bd9Sstevel@tonic-gate 			if (bcp == strbcalls.bc_tail)
13817c478bd9Sstevel@tonic-gate 				strbcalls.bc_tail = pbcp;
13827c478bd9Sstevel@tonic-gate 			kmem_free(bcp, sizeof (strbufcall_t));
13837c478bd9Sstevel@tonic-gate 		}
13847c478bd9Sstevel@tonic-gate 	}
13857c478bd9Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
13867c478bd9Sstevel@tonic-gate }
13877c478bd9Sstevel@tonic-gate 
13887c478bd9Sstevel@tonic-gate /*
13897c478bd9Sstevel@tonic-gate  * Duplicate a message block by block (uses dupb), returning
13907c478bd9Sstevel@tonic-gate  * a pointer to the duplicate message.
13917c478bd9Sstevel@tonic-gate  * Returns a non-NULL value only if the entire message
13927c478bd9Sstevel@tonic-gate  * was dup'd.
13937c478bd9Sstevel@tonic-gate  */
13947c478bd9Sstevel@tonic-gate mblk_t *
dupmsg(mblk_t * bp)13957c478bd9Sstevel@tonic-gate dupmsg(mblk_t *bp)
13967c478bd9Sstevel@tonic-gate {
13977c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
13987c478bd9Sstevel@tonic-gate 
13997c478bd9Sstevel@tonic-gate 	if (!bp || !(nbp = head = dupb(bp)))
14007c478bd9Sstevel@tonic-gate 		return (NULL);
14017c478bd9Sstevel@tonic-gate 
14027c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
14037c478bd9Sstevel@tonic-gate 		if (!(nbp->b_cont = dupb(bp->b_cont))) {
14047c478bd9Sstevel@tonic-gate 			freemsg(head);
14057c478bd9Sstevel@tonic-gate 			return (NULL);
14067c478bd9Sstevel@tonic-gate 		}
14077c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
14087c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
14097c478bd9Sstevel@tonic-gate 	}
14107c478bd9Sstevel@tonic-gate 	return (head);
14117c478bd9Sstevel@tonic-gate }
14127c478bd9Sstevel@tonic-gate 
14137c478bd9Sstevel@tonic-gate #define	DUPB_NOLOAN(bp) \
14147c478bd9Sstevel@tonic-gate 	((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \
14157c478bd9Sstevel@tonic-gate 	copyb((bp)) : dupb((bp)))
14167c478bd9Sstevel@tonic-gate 
14177c478bd9Sstevel@tonic-gate mblk_t *
dupmsg_noloan(mblk_t * bp)14187c478bd9Sstevel@tonic-gate dupmsg_noloan(mblk_t *bp)
14197c478bd9Sstevel@tonic-gate {
14207c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
14217c478bd9Sstevel@tonic-gate 
14227c478bd9Sstevel@tonic-gate 	if (bp == NULL || DB_TYPE(bp) != M_DATA ||
14237c478bd9Sstevel@tonic-gate 	    ((nbp = head = DUPB_NOLOAN(bp)) == NULL))
14247c478bd9Sstevel@tonic-gate 		return (NULL);
14257c478bd9Sstevel@tonic-gate 
14267c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
14277c478bd9Sstevel@tonic-gate 		if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) {
14287c478bd9Sstevel@tonic-gate 			freemsg(head);
14297c478bd9Sstevel@tonic-gate 			return (NULL);
14307c478bd9Sstevel@tonic-gate 		}
14317c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
14327c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
14337c478bd9Sstevel@tonic-gate 	}
14347c478bd9Sstevel@tonic-gate 	return (head);
14357c478bd9Sstevel@tonic-gate }
14367c478bd9Sstevel@tonic-gate 
14377c478bd9Sstevel@tonic-gate /*
14387c478bd9Sstevel@tonic-gate  * Copy data from message and data block to newly allocated message and
14397c478bd9Sstevel@tonic-gate  * data block. Returns new message block pointer, or NULL if error.
14407c478bd9Sstevel@tonic-gate  * The alignment of rptr (w.r.t. word alignment) will be the same in the copy
14417c478bd9Sstevel@tonic-gate  * as in the original even when db_base is not word aligned. (bug 1052877)
14427c478bd9Sstevel@tonic-gate  */
14437c478bd9Sstevel@tonic-gate mblk_t *
copyb(mblk_t * bp)14447c478bd9Sstevel@tonic-gate copyb(mblk_t *bp)
14457c478bd9Sstevel@tonic-gate {
14467c478bd9Sstevel@tonic-gate 	mblk_t	*nbp;
14477c478bd9Sstevel@tonic-gate 	dblk_t	*dp, *ndp;
14487c478bd9Sstevel@tonic-gate 	uchar_t *base;
14497c478bd9Sstevel@tonic-gate 	size_t	size;
14507c478bd9Sstevel@tonic-gate 	size_t	unaligned;
14517c478bd9Sstevel@tonic-gate 
14527c478bd9Sstevel@tonic-gate 	ASSERT(bp->b_wptr >= bp->b_rptr);
14537c478bd9Sstevel@tonic-gate 
14547c478bd9Sstevel@tonic-gate 	dp = bp->b_datap;
14557c478bd9Sstevel@tonic-gate 	if (dp->db_fthdr != NULL)
14567c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0);
14577c478bd9Sstevel@tonic-gate 
14587c478bd9Sstevel@tonic-gate 	size = dp->db_lim - dp->db_base;
14597c478bd9Sstevel@tonic-gate 	unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t));
14607c478bd9Sstevel@tonic-gate 	if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL)
14617c478bd9Sstevel@tonic-gate 		return (NULL);
14627c478bd9Sstevel@tonic-gate 	nbp->b_flag = bp->b_flag;
14637c478bd9Sstevel@tonic-gate 	nbp->b_band = bp->b_band;
14647c478bd9Sstevel@tonic-gate 	ndp = nbp->b_datap;
1465c61a1653SRyan Zezeski 
1466c61a1653SRyan Zezeski 	/*
1467c61a1653SRyan Zezeski 	 * Copy the various checksum information that came in
1468c61a1653SRyan Zezeski 	 * originally.
1469c61a1653SRyan Zezeski 	 */
1470c61a1653SRyan Zezeski 	ndp->db_cksumstart = dp->db_cksumstart;
1471c61a1653SRyan Zezeski 	ndp->db_cksumend = dp->db_cksumend;
1472c61a1653SRyan Zezeski 	ndp->db_cksumstuff = dp->db_cksumstuff;
1473c61a1653SRyan Zezeski 	bcopy(dp->db_struioun.data, ndp->db_struioun.data,
1474c61a1653SRyan Zezeski 	    sizeof (dp->db_struioun.data));
14757c478bd9Sstevel@tonic-gate 
14767c478bd9Sstevel@tonic-gate 	/*
14777c478bd9Sstevel@tonic-gate 	 * Well, here is a potential issue.  If we are trying to
14787c478bd9Sstevel@tonic-gate 	 * trace a flow, and we copy the message, we might lose
14797c478bd9Sstevel@tonic-gate 	 * information about where this message might have been.
14807c478bd9Sstevel@tonic-gate 	 * So we should inherit the FT data.  On the other hand,
14817c478bd9Sstevel@tonic-gate 	 * a user might be interested only in alloc to free data.
14827c478bd9Sstevel@tonic-gate 	 * So I guess the real answer is to provide a tunable.
14837c478bd9Sstevel@tonic-gate 	 */
14847c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
14857c478bd9Sstevel@tonic-gate 
14867c478bd9Sstevel@tonic-gate 	base = ndp->db_base + unaligned;
14877c478bd9Sstevel@tonic-gate 	bcopy(dp->db_base, ndp->db_base + unaligned, size);
14887c478bd9Sstevel@tonic-gate 
14897c478bd9Sstevel@tonic-gate 	nbp->b_rptr = base + (bp->b_rptr - dp->db_base);
14907c478bd9Sstevel@tonic-gate 	nbp->b_wptr = nbp->b_rptr + MBLKL(bp);
14917c478bd9Sstevel@tonic-gate 
14927c478bd9Sstevel@tonic-gate 	return (nbp);
14937c478bd9Sstevel@tonic-gate }
14947c478bd9Sstevel@tonic-gate 
14957c478bd9Sstevel@tonic-gate /*
14967c478bd9Sstevel@tonic-gate  * Copy data from message to newly allocated message using new
14977c478bd9Sstevel@tonic-gate  * data blocks.  Returns a pointer to the new message, or NULL if error.
14987c478bd9Sstevel@tonic-gate  */
14997c478bd9Sstevel@tonic-gate mblk_t *
copymsg(mblk_t * bp)15007c478bd9Sstevel@tonic-gate copymsg(mblk_t *bp)
15017c478bd9Sstevel@tonic-gate {
15027c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
15037c478bd9Sstevel@tonic-gate 
15047c478bd9Sstevel@tonic-gate 	if (!bp || !(nbp = head = copyb(bp)))
15057c478bd9Sstevel@tonic-gate 		return (NULL);
15067c478bd9Sstevel@tonic-gate 
15077c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
15087c478bd9Sstevel@tonic-gate 		if (!(nbp->b_cont = copyb(bp->b_cont))) {
15097c478bd9Sstevel@tonic-gate 			freemsg(head);
15107c478bd9Sstevel@tonic-gate 			return (NULL);
15117c478bd9Sstevel@tonic-gate 		}
15127c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
15137c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
15147c478bd9Sstevel@tonic-gate 	}
15157c478bd9Sstevel@tonic-gate 	return (head);
15167c478bd9Sstevel@tonic-gate }
15177c478bd9Sstevel@tonic-gate 
15187c478bd9Sstevel@tonic-gate /*
15197c478bd9Sstevel@tonic-gate  * link a message block to tail of message
15207c478bd9Sstevel@tonic-gate  */
15217c478bd9Sstevel@tonic-gate void
linkb(mblk_t * mp,mblk_t * bp)15227c478bd9Sstevel@tonic-gate linkb(mblk_t *mp, mblk_t *bp)
15237c478bd9Sstevel@tonic-gate {
15247c478bd9Sstevel@tonic-gate 	ASSERT(mp && bp);
15257c478bd9Sstevel@tonic-gate 
15267c478bd9Sstevel@tonic-gate 	for (; mp->b_cont; mp = mp->b_cont)
15277c478bd9Sstevel@tonic-gate 		;
15287c478bd9Sstevel@tonic-gate 	mp->b_cont = bp;
15297c478bd9Sstevel@tonic-gate }
15307c478bd9Sstevel@tonic-gate 
15317c478bd9Sstevel@tonic-gate /*
15327c478bd9Sstevel@tonic-gate  * unlink a message block from head of message
15337c478bd9Sstevel@tonic-gate  * return pointer to new message.
15347c478bd9Sstevel@tonic-gate  * NULL if message becomes empty.
15357c478bd9Sstevel@tonic-gate  */
15367c478bd9Sstevel@tonic-gate mblk_t *
unlinkb(mblk_t * bp)15377c478bd9Sstevel@tonic-gate unlinkb(mblk_t *bp)
15387c478bd9Sstevel@tonic-gate {
15397c478bd9Sstevel@tonic-gate 	mblk_t *bp1;
15407c478bd9Sstevel@tonic-gate 
15417c478bd9Sstevel@tonic-gate 	bp1 = bp->b_cont;
15427c478bd9Sstevel@tonic-gate 	bp->b_cont = NULL;
15437c478bd9Sstevel@tonic-gate 	return (bp1);
15447c478bd9Sstevel@tonic-gate }
15457c478bd9Sstevel@tonic-gate 
15467c478bd9Sstevel@tonic-gate /*
15477c478bd9Sstevel@tonic-gate  * remove a message block "bp" from message "mp"
15487c478bd9Sstevel@tonic-gate  *
15497c478bd9Sstevel@tonic-gate  * Return pointer to new message or NULL if no message remains.
15507c478bd9Sstevel@tonic-gate  * Return -1 if bp is not found in message.
15517c478bd9Sstevel@tonic-gate  */
15527c478bd9Sstevel@tonic-gate mblk_t *
rmvb(mblk_t * mp,mblk_t * bp)15537c478bd9Sstevel@tonic-gate rmvb(mblk_t *mp, mblk_t *bp)
15547c478bd9Sstevel@tonic-gate {
15557c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
15567c478bd9Sstevel@tonic-gate 	mblk_t *lastp = NULL;
15577c478bd9Sstevel@tonic-gate 
15587c478bd9Sstevel@tonic-gate 	ASSERT(mp && bp);
15597c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
15607c478bd9Sstevel@tonic-gate 		if (tmp == bp) {
15617c478bd9Sstevel@tonic-gate 			if (lastp)
15627c478bd9Sstevel@tonic-gate 				lastp->b_cont = tmp->b_cont;
15637c478bd9Sstevel@tonic-gate 			else
15647c478bd9Sstevel@tonic-gate 				mp = tmp->b_cont;
15657c478bd9Sstevel@tonic-gate 			tmp->b_cont = NULL;
15667c478bd9Sstevel@tonic-gate 			return (mp);
15677c478bd9Sstevel@tonic-gate 		}
15687c478bd9Sstevel@tonic-gate 		lastp = tmp;
15697c478bd9Sstevel@tonic-gate 	}
15707c478bd9Sstevel@tonic-gate 	return ((mblk_t *)-1);
15717c478bd9Sstevel@tonic-gate }
15727c478bd9Sstevel@tonic-gate 
15737c478bd9Sstevel@tonic-gate /*
15747c478bd9Sstevel@tonic-gate  * Concatenate and align first len bytes of common
15757c478bd9Sstevel@tonic-gate  * message type.  Len == -1, means concat everything.
15767c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure
15777c478bd9Sstevel@tonic-gate  * After the pullup, mp points to the pulled up data.
15787c478bd9Sstevel@tonic-gate  */
15797c478bd9Sstevel@tonic-gate int
pullupmsg(mblk_t * mp,ssize_t len)15807c478bd9Sstevel@tonic-gate pullupmsg(mblk_t *mp, ssize_t len)
15817c478bd9Sstevel@tonic-gate {
15827c478bd9Sstevel@tonic-gate 	mblk_t *bp, *b_cont;
15837c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
15847c478bd9Sstevel@tonic-gate 	ssize_t n;
15857c478bd9Sstevel@tonic-gate 
15867c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref > 0);
15877c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
15887c478bd9Sstevel@tonic-gate 
15897c478bd9Sstevel@tonic-gate 	if (len == -1) {
15907c478bd9Sstevel@tonic-gate 		if (mp->b_cont == NULL && str_aligned(mp->b_rptr))
15917c478bd9Sstevel@tonic-gate 			return (1);
15927c478bd9Sstevel@tonic-gate 		len = xmsgsize(mp);
15937c478bd9Sstevel@tonic-gate 	} else {
15947c478bd9Sstevel@tonic-gate 		ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr;
15957c478bd9Sstevel@tonic-gate 		ASSERT(first_mblk_len >= 0);
15967c478bd9Sstevel@tonic-gate 		/*
15977c478bd9Sstevel@tonic-gate 		 * If the length is less than that of the first mblk,
15987c478bd9Sstevel@tonic-gate 		 * we want to pull up the message into an aligned mblk.
15997c478bd9Sstevel@tonic-gate 		 * Though not part of the spec, some callers assume it.
16007c478bd9Sstevel@tonic-gate 		 */
16017c478bd9Sstevel@tonic-gate 		if (len <= first_mblk_len) {
16027c478bd9Sstevel@tonic-gate 			if (str_aligned(mp->b_rptr))
16037c478bd9Sstevel@tonic-gate 				return (1);
16047c478bd9Sstevel@tonic-gate 			len = first_mblk_len;
16057c478bd9Sstevel@tonic-gate 		} else if (xmsgsize(mp) < len)
16067c478bd9Sstevel@tonic-gate 			return (0);
16077c478bd9Sstevel@tonic-gate 	}
16087c478bd9Sstevel@tonic-gate 
16097c478bd9Sstevel@tonic-gate 	if ((bp = allocb_tmpl(len, mp)) == NULL)
16107c478bd9Sstevel@tonic-gate 		return (0);
16117c478bd9Sstevel@tonic-gate 
16127c478bd9Sstevel@tonic-gate 	dbp = bp->b_datap;
16137c478bd9Sstevel@tonic-gate 	*bp = *mp;		/* swap mblks so bp heads the old msg... */
16147c478bd9Sstevel@tonic-gate 	mp->b_datap = dbp;	/* ... and mp heads the new message */
16157c478bd9Sstevel@tonic-gate 	mp->b_datap->db_mblk = mp;
16167c478bd9Sstevel@tonic-gate 	bp->b_datap->db_mblk = bp;
16177c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
16187c478bd9Sstevel@tonic-gate 
16197c478bd9Sstevel@tonic-gate 	do {
16207c478bd9Sstevel@tonic-gate 		ASSERT(bp->b_datap->db_ref > 0);
16217c478bd9Sstevel@tonic-gate 		ASSERT(bp->b_wptr >= bp->b_rptr);
16227c478bd9Sstevel@tonic-gate 		n = MIN(bp->b_wptr - bp->b_rptr, len);
1623bd670b35SErik Nordmark 		ASSERT(n >= 0);		/* allow zero-length mblk_t's */
1624bd670b35SErik Nordmark 		if (n > 0)
16257c478bd9Sstevel@tonic-gate 			bcopy(bp->b_rptr, mp->b_wptr, (size_t)n);
16267c478bd9Sstevel@tonic-gate 		mp->b_wptr += n;
16277c478bd9Sstevel@tonic-gate 		bp->b_rptr += n;
16287c478bd9Sstevel@tonic-gate 		len -= n;
16297c478bd9Sstevel@tonic-gate 		if (bp->b_rptr != bp->b_wptr)
16307c478bd9Sstevel@tonic-gate 			break;
16317c478bd9Sstevel@tonic-gate 		b_cont = bp->b_cont;
16327c478bd9Sstevel@tonic-gate 		freeb(bp);
16337c478bd9Sstevel@tonic-gate 		bp = b_cont;
16347c478bd9Sstevel@tonic-gate 	} while (len && bp);
16357c478bd9Sstevel@tonic-gate 
16367c478bd9Sstevel@tonic-gate 	mp->b_cont = bp;	/* tack on whatever wasn't pulled up */
16377c478bd9Sstevel@tonic-gate 
16387c478bd9Sstevel@tonic-gate 	return (1);
16397c478bd9Sstevel@tonic-gate }
16407c478bd9Sstevel@tonic-gate 
16417c478bd9Sstevel@tonic-gate /*
16427c478bd9Sstevel@tonic-gate  * Concatenate and align at least the first len bytes of common message
16437c478bd9Sstevel@tonic-gate  * type.  Len == -1 means concatenate everything.  The original message is
16447c478bd9Sstevel@tonic-gate  * unaltered.  Returns a pointer to a new message on success, otherwise
16457c478bd9Sstevel@tonic-gate  * returns NULL.
16467c478bd9Sstevel@tonic-gate  */
16477c478bd9Sstevel@tonic-gate mblk_t *
msgpullup(mblk_t * mp,ssize_t len)16487c478bd9Sstevel@tonic-gate msgpullup(mblk_t *mp, ssize_t len)
16497c478bd9Sstevel@tonic-gate {
16507c478bd9Sstevel@tonic-gate 	mblk_t *newmp;
1651*8cfbb92dSKyle Simpson 	ssize_t totlen = xmsgsize(mp);
1652*8cfbb92dSKyle Simpson 	ssize_t offset = 0;
16537c478bd9Sstevel@tonic-gate 
1654*8cfbb92dSKyle Simpson 	if (len == -1)
16557c478bd9Sstevel@tonic-gate 		len = totlen;
16567c478bd9Sstevel@tonic-gate 
1657*8cfbb92dSKyle Simpson 	if (len < 0 || (len > 0 && len > totlen))
1658*8cfbb92dSKyle Simpson 		return (NULL);
1659*8cfbb92dSKyle Simpson 
16607c478bd9Sstevel@tonic-gate 	if ((newmp = allocb_tmpl(len, mp)) == NULL)
16617c478bd9Sstevel@tonic-gate 		return (NULL);
16627c478bd9Sstevel@tonic-gate 
16637c478bd9Sstevel@tonic-gate 	newmp->b_flag = mp->b_flag;
16647c478bd9Sstevel@tonic-gate 	newmp->b_band = mp->b_band;
16657c478bd9Sstevel@tonic-gate 
16667c478bd9Sstevel@tonic-gate 	while (len > 0) {
1667*8cfbb92dSKyle Simpson 		ssize_t seglen = MBLKL(mp);
1668*8cfbb92dSKyle Simpson 		ssize_t n = MIN(seglen, len);
1669*8cfbb92dSKyle Simpson 
1670*8cfbb92dSKyle Simpson 		ASSERT3P(mp, !=, NULL);	/* guaranteed by len <= totlen */
1671*8cfbb92dSKyle Simpson 		ASSERT3S(n, >=, 0);	/* allow zero-length mblk_t's */
16727c478bd9Sstevel@tonic-gate 		if (n > 0)
16737c478bd9Sstevel@tonic-gate 			bcopy(mp->b_rptr, newmp->b_wptr, n);
16747c478bd9Sstevel@tonic-gate 		newmp->b_wptr += n;
16757c478bd9Sstevel@tonic-gate 		len -= n;
1676*8cfbb92dSKyle Simpson 
1677*8cfbb92dSKyle Simpson 		if (n == seglen)
16787c478bd9Sstevel@tonic-gate 			mp = mp->b_cont;
1679*8cfbb92dSKyle Simpson 		else if (len == 0)
1680*8cfbb92dSKyle Simpson 			offset = n;
16817c478bd9Sstevel@tonic-gate 	}
1682*8cfbb92dSKyle Simpson 	ASSERT3S(len, ==, 0);
16837c478bd9Sstevel@tonic-gate 
16847c478bd9Sstevel@tonic-gate 	if (mp != NULL) {
16857c478bd9Sstevel@tonic-gate 		newmp->b_cont = dupmsg(mp);
16867c478bd9Sstevel@tonic-gate 		if (newmp->b_cont == NULL) {
16877c478bd9Sstevel@tonic-gate 			freemsg(newmp);
16887c478bd9Sstevel@tonic-gate 			return (NULL);
16897c478bd9Sstevel@tonic-gate 		}
1690*8cfbb92dSKyle Simpson 		ASSERT3S(offset, >=, 0);
1691*8cfbb92dSKyle Simpson 		ASSERT3U(MBLKL(newmp->b_cont), >=, offset);
1692*8cfbb92dSKyle Simpson 		newmp->b_cont->b_rptr += offset;
16937c478bd9Sstevel@tonic-gate 	}
16947c478bd9Sstevel@tonic-gate 
16957c478bd9Sstevel@tonic-gate 	return (newmp);
16967c478bd9Sstevel@tonic-gate }
16977c478bd9Sstevel@tonic-gate 
16987c478bd9Sstevel@tonic-gate /*
16997c478bd9Sstevel@tonic-gate  * Trim bytes from message
17007c478bd9Sstevel@tonic-gate  *  len > 0, trim from head
17017c478bd9Sstevel@tonic-gate  *  len < 0, trim from tail
17027c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure.
17037c478bd9Sstevel@tonic-gate  */
17047c478bd9Sstevel@tonic-gate int
adjmsg(mblk_t * mp,ssize_t len)17057c478bd9Sstevel@tonic-gate adjmsg(mblk_t *mp, ssize_t len)
17067c478bd9Sstevel@tonic-gate {
17077c478bd9Sstevel@tonic-gate 	mblk_t *bp;
17087c478bd9Sstevel@tonic-gate 	mblk_t *save_bp = NULL;
17097c478bd9Sstevel@tonic-gate 	mblk_t *prev_bp;
17107c478bd9Sstevel@tonic-gate 	mblk_t *bcont;
17117c478bd9Sstevel@tonic-gate 	unsigned char type;
17127c478bd9Sstevel@tonic-gate 	ssize_t n;
17137c478bd9Sstevel@tonic-gate 	int fromhead;
17147c478bd9Sstevel@tonic-gate 	int first;
17157c478bd9Sstevel@tonic-gate 
17167c478bd9Sstevel@tonic-gate 	ASSERT(mp != NULL);
17177c478bd9Sstevel@tonic-gate 
17187c478bd9Sstevel@tonic-gate 	if (len < 0) {
17197c478bd9Sstevel@tonic-gate 		fromhead = 0;
17207c478bd9Sstevel@tonic-gate 		len = -len;
17217c478bd9Sstevel@tonic-gate 	} else {
17227c478bd9Sstevel@tonic-gate 		fromhead = 1;
17237c478bd9Sstevel@tonic-gate 	}
17247c478bd9Sstevel@tonic-gate 
17257c478bd9Sstevel@tonic-gate 	if (xmsgsize(mp) < len)
17267c478bd9Sstevel@tonic-gate 		return (0);
17277c478bd9Sstevel@tonic-gate 
17287c478bd9Sstevel@tonic-gate 	if (fromhead) {
17297c478bd9Sstevel@tonic-gate 		first = 1;
17307c478bd9Sstevel@tonic-gate 		while (len) {
17317c478bd9Sstevel@tonic-gate 			ASSERT(mp->b_wptr >= mp->b_rptr);
17327c478bd9Sstevel@tonic-gate 			n = MIN(mp->b_wptr - mp->b_rptr, len);
17337c478bd9Sstevel@tonic-gate 			mp->b_rptr += n;
17347c478bd9Sstevel@tonic-gate 			len -= n;
17357c478bd9Sstevel@tonic-gate 
17367c478bd9Sstevel@tonic-gate 			/*
17377c478bd9Sstevel@tonic-gate 			 * If this is not the first zero length
17387c478bd9Sstevel@tonic-gate 			 * message remove it
17397c478bd9Sstevel@tonic-gate 			 */
17407c478bd9Sstevel@tonic-gate 			if (!first && (mp->b_wptr == mp->b_rptr)) {
17417c478bd9Sstevel@tonic-gate 				bcont = mp->b_cont;
17427c478bd9Sstevel@tonic-gate 				freeb(mp);
17437c478bd9Sstevel@tonic-gate 				mp = save_bp->b_cont = bcont;
17447c478bd9Sstevel@tonic-gate 			} else {
17457c478bd9Sstevel@tonic-gate 				save_bp = mp;
17467c478bd9Sstevel@tonic-gate 				mp = mp->b_cont;
17477c478bd9Sstevel@tonic-gate 			}
17487c478bd9Sstevel@tonic-gate 			first = 0;
17497c478bd9Sstevel@tonic-gate 		}
17507c478bd9Sstevel@tonic-gate 	} else {
17517c478bd9Sstevel@tonic-gate 		type = mp->b_datap->db_type;
17527c478bd9Sstevel@tonic-gate 		while (len) {
17537c478bd9Sstevel@tonic-gate 			bp = mp;
17547c478bd9Sstevel@tonic-gate 			save_bp = NULL;
17557c478bd9Sstevel@tonic-gate 
17567c478bd9Sstevel@tonic-gate 			/*
17577c478bd9Sstevel@tonic-gate 			 * Find the last message of same type
17587c478bd9Sstevel@tonic-gate 			 */
17597c478bd9Sstevel@tonic-gate 			while (bp && bp->b_datap->db_type == type) {
17607c478bd9Sstevel@tonic-gate 				ASSERT(bp->b_wptr >= bp->b_rptr);
17617c478bd9Sstevel@tonic-gate 				prev_bp = save_bp;
17627c478bd9Sstevel@tonic-gate 				save_bp = bp;
17637c478bd9Sstevel@tonic-gate 				bp = bp->b_cont;
17647c478bd9Sstevel@tonic-gate 			}
17657c478bd9Sstevel@tonic-gate 			if (save_bp == NULL)
17667c478bd9Sstevel@tonic-gate 				break;
17677c478bd9Sstevel@tonic-gate 			n = MIN(save_bp->b_wptr - save_bp->b_rptr, len);
17687c478bd9Sstevel@tonic-gate 			save_bp->b_wptr -= n;
17697c478bd9Sstevel@tonic-gate 			len -= n;
17707c478bd9Sstevel@tonic-gate 
17717c478bd9Sstevel@tonic-gate 			/*
17727c478bd9Sstevel@tonic-gate 			 * If this is not the first message
17737c478bd9Sstevel@tonic-gate 			 * and we have taken away everything
17747c478bd9Sstevel@tonic-gate 			 * from this message, remove it
17757c478bd9Sstevel@tonic-gate 			 */
17767c478bd9Sstevel@tonic-gate 
17777c478bd9Sstevel@tonic-gate 			if ((save_bp != mp) &&
17787c478bd9Sstevel@tonic-gate 			    (save_bp->b_wptr == save_bp->b_rptr)) {
17797c478bd9Sstevel@tonic-gate 				bcont = save_bp->b_cont;
17807c478bd9Sstevel@tonic-gate 				freeb(save_bp);
17817c478bd9Sstevel@tonic-gate 				prev_bp->b_cont = bcont;
17827c478bd9Sstevel@tonic-gate 			}
17837c478bd9Sstevel@tonic-gate 		}
17847c478bd9Sstevel@tonic-gate 	}
17857c478bd9Sstevel@tonic-gate 	return (1);
17867c478bd9Sstevel@tonic-gate }
17877c478bd9Sstevel@tonic-gate 
17887c478bd9Sstevel@tonic-gate /*
17897c478bd9Sstevel@tonic-gate  * get number of data bytes in message
17907c478bd9Sstevel@tonic-gate  */
17917c478bd9Sstevel@tonic-gate size_t
msgdsize(mblk_t * bp)17927c478bd9Sstevel@tonic-gate msgdsize(mblk_t *bp)
17937c478bd9Sstevel@tonic-gate {
17947c478bd9Sstevel@tonic-gate 	size_t count = 0;
17957c478bd9Sstevel@tonic-gate 
17967c478bd9Sstevel@tonic-gate 	for (; bp; bp = bp->b_cont)
17977c478bd9Sstevel@tonic-gate 		if (bp->b_datap->db_type == M_DATA) {
17987c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_wptr >= bp->b_rptr);
17997c478bd9Sstevel@tonic-gate 			count += bp->b_wptr - bp->b_rptr;
18007c478bd9Sstevel@tonic-gate 		}
18017c478bd9Sstevel@tonic-gate 	return (count);
18027c478bd9Sstevel@tonic-gate }
18037c478bd9Sstevel@tonic-gate 
18047c478bd9Sstevel@tonic-gate /*
18057c478bd9Sstevel@tonic-gate  * Get a message off head of queue
18067c478bd9Sstevel@tonic-gate  *
18077c478bd9Sstevel@tonic-gate  * If queue has no buffers then mark queue
18087c478bd9Sstevel@tonic-gate  * with QWANTR. (queue wants to be read by
18097c478bd9Sstevel@tonic-gate  * someone when data becomes available)
18107c478bd9Sstevel@tonic-gate  *
18117c478bd9Sstevel@tonic-gate  * If there is something to take off then do so.
18127c478bd9Sstevel@tonic-gate  * If queue falls below hi water mark turn off QFULL
18137c478bd9Sstevel@tonic-gate  * flag.  Decrement weighted count of queue.
18147c478bd9Sstevel@tonic-gate  * Also turn off QWANTR because queue is being read.
18157c478bd9Sstevel@tonic-gate  *
18167c478bd9Sstevel@tonic-gate  * The queue count is maintained on a per-band basis.
18177c478bd9Sstevel@tonic-gate  * Priority band 0 (normal messages) uses q_count,
18187c478bd9Sstevel@tonic-gate  * q_lowat, etc.  Non-zero priority bands use the
18197c478bd9Sstevel@tonic-gate  * fields in their respective qband structures
18207c478bd9Sstevel@tonic-gate  * (qb_count, qb_lowat, etc.)  All messages appear
18217c478bd9Sstevel@tonic-gate  * on the same list, linked via their b_next pointers.
18227c478bd9Sstevel@tonic-gate  * q_first is the head of the list.  q_count does
18237c478bd9Sstevel@tonic-gate  * not reflect the size of all the messages on the
18247c478bd9Sstevel@tonic-gate  * queue.  It only reflects those messages in the
18257c478bd9Sstevel@tonic-gate  * normal band of flow.  The one exception to this
18267c478bd9Sstevel@tonic-gate  * deals with high priority messages.  They are in
18277c478bd9Sstevel@tonic-gate  * their own conceptual "band", but are accounted
18287c478bd9Sstevel@tonic-gate  * against q_count.
18297c478bd9Sstevel@tonic-gate  *
18307c478bd9Sstevel@tonic-gate  * If queue count is below the lo water mark and QWANTW
18317c478bd9Sstevel@tonic-gate  * is set, enable the closest backq which has a service
18327c478bd9Sstevel@tonic-gate  * procedure and turn off the QWANTW flag.
18337c478bd9Sstevel@tonic-gate  *
18347c478bd9Sstevel@tonic-gate  * getq could be built on top of rmvq, but isn't because
18357c478bd9Sstevel@tonic-gate  * of performance considerations.
18367c478bd9Sstevel@tonic-gate  *
18377c478bd9Sstevel@tonic-gate  * A note on the use of q_count and q_mblkcnt:
18387c478bd9Sstevel@tonic-gate  *   q_count is the traditional byte count for messages that
18397c478bd9Sstevel@tonic-gate  *   have been put on a queue.  Documentation tells us that
18407c478bd9Sstevel@tonic-gate  *   we shouldn't rely on that count, but some drivers/modules
18417c478bd9Sstevel@tonic-gate  *   do.  What was needed, however, is a mechanism to prevent
18427c478bd9Sstevel@tonic-gate  *   runaway streams from consuming all of the resources,
18437c478bd9Sstevel@tonic-gate  *   and particularly be able to flow control zero-length
18447c478bd9Sstevel@tonic-gate  *   messages.  q_mblkcnt is used for this purpose.  It
18457c478bd9Sstevel@tonic-gate  *   counts the number of mblk's that are being put on
18467c478bd9Sstevel@tonic-gate  *   the queue.  The intention here, is that each mblk should
18477c478bd9Sstevel@tonic-gate  *   contain one byte of data and, for the purpose of
18487c478bd9Sstevel@tonic-gate  *   flow-control, logically does.  A queue will become
18497c478bd9Sstevel@tonic-gate  *   full when EITHER of these values (q_count and q_mblkcnt)
18507c478bd9Sstevel@tonic-gate  *   reach the highwater mark.  It will clear when BOTH
18517c478bd9Sstevel@tonic-gate  *   of them drop below the highwater mark.  And it will
18527c478bd9Sstevel@tonic-gate  *   backenable when BOTH of them drop below the lowwater
18537c478bd9Sstevel@tonic-gate  *   mark.
18547c478bd9Sstevel@tonic-gate  *   With this algorithm, a driver/module might be able
18557c478bd9Sstevel@tonic-gate  *   to find a reasonably accurate q_count, and the
18567c478bd9Sstevel@tonic-gate  *   framework can still try and limit resource usage.
18577c478bd9Sstevel@tonic-gate  */
18587c478bd9Sstevel@tonic-gate mblk_t *
getq(queue_t * q)18597c478bd9Sstevel@tonic-gate getq(queue_t *q)
18607c478bd9Sstevel@tonic-gate {
18617c478bd9Sstevel@tonic-gate 	mblk_t *bp;
1862116094b2Smicheng 	uchar_t band = 0;
18637c478bd9Sstevel@tonic-gate 
1864301ce41fSja97890 	bp = getq_noenab(q, 0);
18657c478bd9Sstevel@tonic-gate 	if (bp != NULL)
18667c478bd9Sstevel@tonic-gate 		band = bp->b_band;
18677c478bd9Sstevel@tonic-gate 
18687c478bd9Sstevel@tonic-gate 	/*
18697c478bd9Sstevel@tonic-gate 	 * Inlined from qbackenable().
18707c478bd9Sstevel@tonic-gate 	 * Quick check without holding the lock.
18717c478bd9Sstevel@tonic-gate 	 */
18727c478bd9Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
18737c478bd9Sstevel@tonic-gate 		return (bp);
18747c478bd9Sstevel@tonic-gate 
18757c478bd9Sstevel@tonic-gate 	qbackenable(q, band);
18767c478bd9Sstevel@tonic-gate 	return (bp);
18777c478bd9Sstevel@tonic-gate }
18787c478bd9Sstevel@tonic-gate 
18797c478bd9Sstevel@tonic-gate /*
1880301ce41fSja97890  * Returns the number of bytes in a message (a message is defined as a
1881301ce41fSja97890  * chain of mblks linked by b_cont). If a non-NULL mblkcnt is supplied we
1882301ce41fSja97890  * also return the number of distinct mblks in the message.
1883301ce41fSja97890  */
1884301ce41fSja97890 int
mp_cont_len(mblk_t * bp,int * mblkcnt)1885301ce41fSja97890 mp_cont_len(mblk_t *bp, int *mblkcnt)
1886301ce41fSja97890 {
1887301ce41fSja97890 	mblk_t	*mp;
1888301ce41fSja97890 	int	mblks = 0;
1889301ce41fSja97890 	int	bytes = 0;
1890301ce41fSja97890 
1891301ce41fSja97890 	for (mp = bp; mp != NULL; mp = mp->b_cont) {
18929b664393SGarrett D'Amore 		bytes += MBLKL(mp);
1893301ce41fSja97890 		mblks++;
1894301ce41fSja97890 	}
1895301ce41fSja97890 
1896301ce41fSja97890 	if (mblkcnt != NULL)
1897301ce41fSja97890 		*mblkcnt = mblks;
1898301ce41fSja97890 
1899301ce41fSja97890 	return (bytes);
1900301ce41fSja97890 }
1901301ce41fSja97890 
1902301ce41fSja97890 /*
19037c478bd9Sstevel@tonic-gate  * Like getq() but does not backenable.  This is used by the stream
19047c478bd9Sstevel@tonic-gate  * head when a putback() is likely.  The caller must call qbackenable()
19057c478bd9Sstevel@tonic-gate  * after it is done with accessing the queue.
1906301ce41fSja97890  * The rbytes arguments to getq_noneab() allows callers to specify a
1907301ce41fSja97890  * the maximum number of bytes to return. If the current amount on the
1908301ce41fSja97890  * queue is less than this then the entire message will be returned.
1909301ce41fSja97890  * A value of 0 returns the entire message and is equivalent to the old
1910301ce41fSja97890  * default behaviour prior to the addition of the rbytes argument.
19117c478bd9Sstevel@tonic-gate  */
19127c478bd9Sstevel@tonic-gate mblk_t *
getq_noenab(queue_t * q,ssize_t rbytes)1913301ce41fSja97890 getq_noenab(queue_t *q, ssize_t rbytes)
19147c478bd9Sstevel@tonic-gate {
1915301ce41fSja97890 	mblk_t *bp, *mp1;
1916301ce41fSja97890 	mblk_t *mp2 = NULL;
19177c478bd9Sstevel@tonic-gate 	qband_t *qbp;
19187c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
19197c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
19207c478bd9Sstevel@tonic-gate 
19217c478bd9Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
19227c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
19237c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
19247c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
19257c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
19267c478bd9Sstevel@tonic-gate 	} else
19277c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
19287c478bd9Sstevel@tonic-gate 
19297c478bd9Sstevel@tonic-gate 	if ((bp = q->q_first) == 0) {
19307c478bd9Sstevel@tonic-gate 		q->q_flag |= QWANTR;
19317c478bd9Sstevel@tonic-gate 	} else {
1932301ce41fSja97890 		/*
1933301ce41fSja97890 		 * If the caller supplied a byte threshold and there is
1934301ce41fSja97890 		 * more than this amount on the queue then break up the
1935301ce41fSja97890 		 * the message appropriately.  We can only safely do
1936301ce41fSja97890 		 * this for M_DATA messages.
1937301ce41fSja97890 		 */
1938301ce41fSja97890 		if ((DB_TYPE(bp) == M_DATA) && (rbytes > 0) &&
1939301ce41fSja97890 		    (q->q_count > rbytes)) {
1940301ce41fSja97890 			/*
1941301ce41fSja97890 			 * Inline version of mp_cont_len() which terminates
1942301ce41fSja97890 			 * when we meet or exceed rbytes.
1943301ce41fSja97890 			 */
1944301ce41fSja97890 			for (mp1 = bp; mp1 != NULL; mp1 = mp1->b_cont) {
1945301ce41fSja97890 				mblkcnt++;
19469b664393SGarrett D'Amore 				bytecnt += MBLKL(mp1);
1947301ce41fSja97890 				if (bytecnt  >= rbytes)
1948301ce41fSja97890 					break;
1949301ce41fSja97890 			}
1950301ce41fSja97890 			/*
1951301ce41fSja97890 			 * We need to account for the following scenarios:
1952301ce41fSja97890 			 *
1953301ce41fSja97890 			 * 1) Too much data in the first message:
1954301ce41fSja97890 			 *	mp1 will be the mblk which puts us over our
1955301ce41fSja97890 			 *	byte limit.
1956301ce41fSja97890 			 * 2) Not enough data in the first message:
1957301ce41fSja97890 			 *	mp1 will be NULL.
1958301ce41fSja97890 			 * 3) Exactly the right amount of data contained within
1959301ce41fSja97890 			 *    whole mblks:
1960301ce41fSja97890 			 *	mp1->b_cont will be where we break the message.
1961301ce41fSja97890 			 */
1962301ce41fSja97890 			if (bytecnt > rbytes) {
1963301ce41fSja97890 				/*
1964301ce41fSja97890 				 * Dup/copy mp1 and put what we don't need
1965301ce41fSja97890 				 * back onto the queue. Adjust the read/write
1966301ce41fSja97890 				 * and continuation pointers appropriately
1967301ce41fSja97890 				 * and decrement the current mblk count to
1968301ce41fSja97890 				 * reflect we are putting an mblk back onto
1969301ce41fSja97890 				 * the queue.
1970301ce41fSja97890 				 * When adjusting the message pointers, it's
1971301ce41fSja97890 				 * OK to use the existing bytecnt and the
1972301ce41fSja97890 				 * requested amount (rbytes) to calculate the
1973301ce41fSja97890 				 * the new write offset (b_wptr) of what we
1974301ce41fSja97890 				 * are taking. However, we  cannot use these
1975301ce41fSja97890 				 * values when calculating the read offset of
1976301ce41fSja97890 				 * the mblk we are putting back on the queue.
1977301ce41fSja97890 				 * This is because the begining (b_rptr) of the
1978301ce41fSja97890 				 * mblk represents some arbitrary point within
1979301ce41fSja97890 				 * the message.
1980301ce41fSja97890 				 * It's simplest to do this by advancing b_rptr
1981301ce41fSja97890 				 * by the new length of mp1 as we don't have to
1982301ce41fSja97890 				 * remember any intermediate state.
1983301ce41fSja97890 				 */
1984301ce41fSja97890 				ASSERT(mp1 != NULL);
1985301ce41fSja97890 				mblkcnt--;
1986301ce41fSja97890 				if ((mp2 = dupb(mp1)) == NULL &&
1987301ce41fSja97890 				    (mp2 = copyb(mp1)) == NULL) {
1988301ce41fSja97890 					bytecnt = mblkcnt = 0;
1989301ce41fSja97890 					goto dup_failed;
1990301ce41fSja97890 				}
1991301ce41fSja97890 				mp2->b_cont = mp1->b_cont;
1992301ce41fSja97890 				mp1->b_wptr -= bytecnt - rbytes;
1993301ce41fSja97890 				mp2->b_rptr += mp1->b_wptr - mp1->b_rptr;
1994301ce41fSja97890 				mp1->b_cont = NULL;
1995301ce41fSja97890 				bytecnt = rbytes;
1996301ce41fSja97890 			} else {
1997301ce41fSja97890 				/*
1998301ce41fSja97890 				 * Either there is not enough data in the first
1999301ce41fSja97890 				 * message or there is no excess data to deal
2000301ce41fSja97890 				 * with. If mp1 is NULL, we are taking the
2001301ce41fSja97890 				 * whole message. No need to do anything.
2002301ce41fSja97890 				 * Otherwise we assign mp1->b_cont to mp2 as
2003301ce41fSja97890 				 * we will be putting this back onto the head of
2004301ce41fSja97890 				 * the queue.
2005301ce41fSja97890 				 */
2006301ce41fSja97890 				if (mp1 != NULL) {
2007301ce41fSja97890 					mp2 = mp1->b_cont;
2008301ce41fSja97890 					mp1->b_cont = NULL;
2009301ce41fSja97890 				}
2010301ce41fSja97890 			}
2011301ce41fSja97890 			/*
2012301ce41fSja97890 			 * If mp2 is not NULL then we have part of the message
2013301ce41fSja97890 			 * to put back onto the queue.
2014301ce41fSja97890 			 */
2015301ce41fSja97890 			if (mp2 != NULL) {
2016301ce41fSja97890 				if ((mp2->b_next = bp->b_next) == NULL)
2017301ce41fSja97890 					q->q_last = mp2;
2018301ce41fSja97890 				else
2019301ce41fSja97890 					bp->b_next->b_prev = mp2;
2020301ce41fSja97890 				q->q_first = mp2;
2021301ce41fSja97890 			} else {
20227c478bd9Sstevel@tonic-gate 				if ((q->q_first = bp->b_next) == NULL)
20237c478bd9Sstevel@tonic-gate 					q->q_last = NULL;
20247c478bd9Sstevel@tonic-gate 				else
20257c478bd9Sstevel@tonic-gate 					q->q_first->b_prev = NULL;
20267c478bd9Sstevel@tonic-gate 			}
2027301ce41fSja97890 		} else {
2028301ce41fSja97890 			/*
2029301ce41fSja97890 			 * Either no byte threshold was supplied, there is
2030301ce41fSja97890 			 * not enough on the queue or we failed to
2031301ce41fSja97890 			 * duplicate/copy a data block. In these cases we
2032301ce41fSja97890 			 * just take the entire first message.
2033301ce41fSja97890 			 */
2034301ce41fSja97890 dup_failed:
2035301ce41fSja97890 			bytecnt = mp_cont_len(bp, &mblkcnt);
2036301ce41fSja97890 			if ((q->q_first = bp->b_next) == NULL)
2037301ce41fSja97890 				q->q_last = NULL;
2038301ce41fSja97890 			else
2039301ce41fSja97890 				q->q_first->b_prev = NULL;
2040301ce41fSja97890 		}
20417c478bd9Sstevel@tonic-gate 		if (bp->b_band == 0) {
20427c478bd9Sstevel@tonic-gate 			q->q_count -= bytecnt;
20437c478bd9Sstevel@tonic-gate 			q->q_mblkcnt -= mblkcnt;
2044ba464308Srk129064 			if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) &&
2045ba464308Srk129064 			    (q->q_mblkcnt < q->q_hiwat))) {
20467c478bd9Sstevel@tonic-gate 				q->q_flag &= ~QFULL;
20477c478bd9Sstevel@tonic-gate 			}
20487c478bd9Sstevel@tonic-gate 		} else {
20497c478bd9Sstevel@tonic-gate 			int i;
20507c478bd9Sstevel@tonic-gate 
20517c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_band <= q->q_nband);
20527c478bd9Sstevel@tonic-gate 			ASSERT(q->q_bandp != NULL);
20537c478bd9Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(QLOCK(q)));
20547c478bd9Sstevel@tonic-gate 			qbp = q->q_bandp;
20557c478bd9Sstevel@tonic-gate 			i = bp->b_band;
20567c478bd9Sstevel@tonic-gate 			while (--i > 0)
20577c478bd9Sstevel@tonic-gate 				qbp = qbp->qb_next;
20587c478bd9Sstevel@tonic-gate 			if (qbp->qb_first == qbp->qb_last) {
20597c478bd9Sstevel@tonic-gate 				qbp->qb_first = NULL;
20607c478bd9Sstevel@tonic-gate 				qbp->qb_last = NULL;
20617c478bd9Sstevel@tonic-gate 			} else {
20627c478bd9Sstevel@tonic-gate 				qbp->qb_first = bp->b_next;
20637c478bd9Sstevel@tonic-gate 			}
20647c478bd9Sstevel@tonic-gate 			qbp->qb_count -= bytecnt;
20657c478bd9Sstevel@tonic-gate 			qbp->qb_mblkcnt -= mblkcnt;
2066ba464308Srk129064 			if (qbp->qb_mblkcnt == 0 ||
2067ba464308Srk129064 			    ((qbp->qb_count < qbp->qb_hiwat) &&
2068ba464308Srk129064 			    (qbp->qb_mblkcnt < qbp->qb_hiwat))) {
20697c478bd9Sstevel@tonic-gate 				qbp->qb_flag &= ~QB_FULL;
20707c478bd9Sstevel@tonic-gate 			}
20717c478bd9Sstevel@tonic-gate 		}
20727c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTR;
20737c478bd9Sstevel@tonic-gate 		bp->b_next = NULL;
20747c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
20757c478bd9Sstevel@tonic-gate 	}
20767c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
20777c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
20787c478bd9Sstevel@tonic-gate 
20797e12ceb3SToomas Soome 	STR_FTEVENT_MSG(bp, q, FTEV_GETQ, 0);
20807c478bd9Sstevel@tonic-gate 
20817c478bd9Sstevel@tonic-gate 	return (bp);
20827c478bd9Sstevel@tonic-gate }
20837c478bd9Sstevel@tonic-gate 
20847c478bd9Sstevel@tonic-gate /*
20857c478bd9Sstevel@tonic-gate  * Determine if a backenable is needed after removing a message in the
20867c478bd9Sstevel@tonic-gate  * specified band.
20877c478bd9Sstevel@tonic-gate  * NOTE: This routine assumes that something like getq_noenab() has been
20887c478bd9Sstevel@tonic-gate  * already called.
20897c478bd9Sstevel@tonic-gate  *
20907c478bd9Sstevel@tonic-gate  * For the read side it is ok to hold sd_lock across calling this (and the
20917c478bd9Sstevel@tonic-gate  * stream head often does).
20927c478bd9Sstevel@tonic-gate  * But for the write side strwakeq might be invoked and it acquires sd_lock.
20937c478bd9Sstevel@tonic-gate  */
20947c478bd9Sstevel@tonic-gate void
qbackenable(queue_t * q,uchar_t band)2095116094b2Smicheng qbackenable(queue_t *q, uchar_t band)
20967c478bd9Sstevel@tonic-gate {
20977c478bd9Sstevel@tonic-gate 	int backenab = 0;
20987c478bd9Sstevel@tonic-gate 	qband_t *qbp;
20997c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
21007c478bd9Sstevel@tonic-gate 
21017c478bd9Sstevel@tonic-gate 	ASSERT(q);
21027c478bd9Sstevel@tonic-gate 	ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock));
21037c478bd9Sstevel@tonic-gate 
21047c478bd9Sstevel@tonic-gate 	/*
21057c478bd9Sstevel@tonic-gate 	 * Quick check without holding the lock.
21067c478bd9Sstevel@tonic-gate 	 * OK since after getq() has lowered the q_count these flags
21077c478bd9Sstevel@tonic-gate 	 * would not change unless either the qbackenable() is done by
21087c478bd9Sstevel@tonic-gate 	 * another thread (which is ok) or the queue has gotten QFULL
21097c478bd9Sstevel@tonic-gate 	 * in which case another backenable will take place when the queue
21107c478bd9Sstevel@tonic-gate 	 * drops below q_lowat.
21117c478bd9Sstevel@tonic-gate 	 */
21127c478bd9Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
21137c478bd9Sstevel@tonic-gate 		return;
21147c478bd9Sstevel@tonic-gate 
21157c478bd9Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
21167c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
21177c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
21187c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
21197c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
21207c478bd9Sstevel@tonic-gate 	} else
21217c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
21227c478bd9Sstevel@tonic-gate 
21237c478bd9Sstevel@tonic-gate 	if (band == 0) {
21247c478bd9Sstevel@tonic-gate 		if (q->q_lowat == 0 || (q->q_count < q->q_lowat &&
21257c478bd9Sstevel@tonic-gate 		    q->q_mblkcnt < q->q_lowat)) {
21267c478bd9Sstevel@tonic-gate 			backenab = q->q_flag & (QWANTW|QWANTWSYNC);
21277c478bd9Sstevel@tonic-gate 		}
21287c478bd9Sstevel@tonic-gate 	} else {
21297c478bd9Sstevel@tonic-gate 		int i;
21307c478bd9Sstevel@tonic-gate 
21317c478bd9Sstevel@tonic-gate 		ASSERT((unsigned)band <= q->q_nband);
21327c478bd9Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
21337c478bd9Sstevel@tonic-gate 
21347c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
21357c478bd9Sstevel@tonic-gate 		i = band;
21367c478bd9Sstevel@tonic-gate 		while (--i > 0)
21377c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
21387c478bd9Sstevel@tonic-gate 
21397c478bd9Sstevel@tonic-gate 		if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat &&
21407c478bd9Sstevel@tonic-gate 		    qbp->qb_mblkcnt < qbp->qb_lowat)) {
21417c478bd9Sstevel@tonic-gate 			backenab = qbp->qb_flag & QB_WANTW;
21427c478bd9Sstevel@tonic-gate 		}
21437c478bd9Sstevel@tonic-gate 	}
21447c478bd9Sstevel@tonic-gate 
21457c478bd9Sstevel@tonic-gate 	if (backenab == 0) {
21467c478bd9Sstevel@tonic-gate 		if (freezer != curthread)
21477c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21487c478bd9Sstevel@tonic-gate 		return;
21497c478bd9Sstevel@tonic-gate 	}
21507c478bd9Sstevel@tonic-gate 
21517c478bd9Sstevel@tonic-gate 	/* Have to drop the lock across strwakeq and backenable */
21527c478bd9Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
21537c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTWSYNC;
21547c478bd9Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW)) {
21557c478bd9Sstevel@tonic-gate 		if (band != 0)
21567c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
21577c478bd9Sstevel@tonic-gate 		else {
21587c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
21597c478bd9Sstevel@tonic-gate 		}
21607c478bd9Sstevel@tonic-gate 	}
21617c478bd9Sstevel@tonic-gate 
21627c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
21637c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
21647c478bd9Sstevel@tonic-gate 
21657c478bd9Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
21667c478bd9Sstevel@tonic-gate 		strwakeq(q, QWANTWSYNC);
21677c478bd9Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW))
21687c478bd9Sstevel@tonic-gate 		backenable(q, band);
21697c478bd9Sstevel@tonic-gate }
21707c478bd9Sstevel@tonic-gate 
21717c478bd9Sstevel@tonic-gate /*
21727c478bd9Sstevel@tonic-gate  * Remove a message from a queue.  The queue count and other
21737c478bd9Sstevel@tonic-gate  * flow control parameters are adjusted and the back queue
21747c478bd9Sstevel@tonic-gate  * enabled if necessary.
21757c478bd9Sstevel@tonic-gate  *
21767c478bd9Sstevel@tonic-gate  * rmvq can be called with the stream frozen, but other utility functions
21777c478bd9Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
21787c478bd9Sstevel@tonic-gate  */
21797c478bd9Sstevel@tonic-gate void
rmvq(queue_t * q,mblk_t * mp)21807c478bd9Sstevel@tonic-gate rmvq(queue_t *q, mblk_t *mp)
21817c478bd9Sstevel@tonic-gate {
21827c478bd9Sstevel@tonic-gate 	ASSERT(mp != NULL);
21837c478bd9Sstevel@tonic-gate 
21847c478bd9Sstevel@tonic-gate 	rmvq_noenab(q, mp);
21857c478bd9Sstevel@tonic-gate 	if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) {
21867c478bd9Sstevel@tonic-gate 		/*
21877c478bd9Sstevel@tonic-gate 		 * qbackenable can handle a frozen stream but not a "random"
21887c478bd9Sstevel@tonic-gate 		 * qlock being held. Drop lock across qbackenable.
21897c478bd9Sstevel@tonic-gate 		 */
21907c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
21917c478bd9Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
21927c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
21937c478bd9Sstevel@tonic-gate 	} else {
21947c478bd9Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
21957c478bd9Sstevel@tonic-gate 	}
21967c478bd9Sstevel@tonic-gate }
21977c478bd9Sstevel@tonic-gate 
21987c478bd9Sstevel@tonic-gate /*
21997c478bd9Sstevel@tonic-gate  * Like rmvq() but without any backenabling.
22007c478bd9Sstevel@tonic-gate  * This exists to handle SR_CONSOL_DATA in strrput().
22017c478bd9Sstevel@tonic-gate  */
22027c478bd9Sstevel@tonic-gate void
rmvq_noenab(queue_t * q,mblk_t * mp)22037c478bd9Sstevel@tonic-gate rmvq_noenab(queue_t *q, mblk_t *mp)
22047c478bd9Sstevel@tonic-gate {
22057c478bd9Sstevel@tonic-gate 	int i;
22067c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
22077c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
22087c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
22097c478bd9Sstevel@tonic-gate 
22107c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
22117c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
22127c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
22137c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
22147c478bd9Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
22157c478bd9Sstevel@tonic-gate 		/* Don't drop lock on exit */
22167c478bd9Sstevel@tonic-gate 		freezer = curthread;
22177c478bd9Sstevel@tonic-gate 	} else
22187c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
22197c478bd9Sstevel@tonic-gate 
22207c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_band <= q->q_nband);
22217c478bd9Sstevel@tonic-gate 	if (mp->b_band != 0) {		/* Adjust band pointers */
22227c478bd9Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
22237c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
22247c478bd9Sstevel@tonic-gate 		i = mp->b_band;
22257c478bd9Sstevel@tonic-gate 		while (--i > 0)
22267c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
22277c478bd9Sstevel@tonic-gate 		if (mp == qbp->qb_first) {
22287c478bd9Sstevel@tonic-gate 			if (mp->b_next && mp->b_band == mp->b_next->b_band)
22297c478bd9Sstevel@tonic-gate 				qbp->qb_first = mp->b_next;
22307c478bd9Sstevel@tonic-gate 			else
22317c478bd9Sstevel@tonic-gate 				qbp->qb_first = NULL;
22327c478bd9Sstevel@tonic-gate 		}
22337c478bd9Sstevel@tonic-gate 		if (mp == qbp->qb_last) {
22347c478bd9Sstevel@tonic-gate 			if (mp->b_prev && mp->b_band == mp->b_prev->b_band)
22357c478bd9Sstevel@tonic-gate 				qbp->qb_last = mp->b_prev;
22367c478bd9Sstevel@tonic-gate 			else
22377c478bd9Sstevel@tonic-gate 				qbp->qb_last = NULL;
22387c478bd9Sstevel@tonic-gate 		}
22397c478bd9Sstevel@tonic-gate 	}
22407c478bd9Sstevel@tonic-gate 
22417c478bd9Sstevel@tonic-gate 	/*
22427c478bd9Sstevel@tonic-gate 	 * Remove the message from the list.
22437c478bd9Sstevel@tonic-gate 	 */
22447c478bd9Sstevel@tonic-gate 	if (mp->b_prev)
22457c478bd9Sstevel@tonic-gate 		mp->b_prev->b_next = mp->b_next;
22467c478bd9Sstevel@tonic-gate 	else
22477c478bd9Sstevel@tonic-gate 		q->q_first = mp->b_next;
22487c478bd9Sstevel@tonic-gate 	if (mp->b_next)
22497c478bd9Sstevel@tonic-gate 		mp->b_next->b_prev = mp->b_prev;
22507c478bd9Sstevel@tonic-gate 	else
22517c478bd9Sstevel@tonic-gate 		q->q_last = mp->b_prev;
22527c478bd9Sstevel@tonic-gate 	mp->b_next = NULL;
22537c478bd9Sstevel@tonic-gate 	mp->b_prev = NULL;
22547c478bd9Sstevel@tonic-gate 
22557c478bd9Sstevel@tonic-gate 	/* Get the size of the message for q_count accounting */
2256301ce41fSja97890 	bytecnt = mp_cont_len(mp, &mblkcnt);
22577c478bd9Sstevel@tonic-gate 
22587c478bd9Sstevel@tonic-gate 	if (mp->b_band == 0) {		/* Perform q_count accounting */
22597c478bd9Sstevel@tonic-gate 		q->q_count -= bytecnt;
22607c478bd9Sstevel@tonic-gate 		q->q_mblkcnt -= mblkcnt;
2261ba464308Srk129064 		if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) &&
2262ba464308Srk129064 		    (q->q_mblkcnt < q->q_hiwat))) {
22637c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QFULL;
22647c478bd9Sstevel@tonic-gate 		}
22657c478bd9Sstevel@tonic-gate 	} else {			/* Perform qb_count accounting */
22667c478bd9Sstevel@tonic-gate 		qbp->qb_count -= bytecnt;
22677c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt -= mblkcnt;
2268ba464308Srk129064 		if (qbp->qb_mblkcnt == 0 || ((qbp->qb_count < qbp->qb_hiwat) &&
2269ba464308Srk129064 		    (qbp->qb_mblkcnt < qbp->qb_hiwat))) {
22707c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
22717c478bd9Sstevel@tonic-gate 		}
22727c478bd9Sstevel@tonic-gate 	}
22737c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
22747c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
22757c478bd9Sstevel@tonic-gate 
22767e12ceb3SToomas Soome 	STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, 0);
22777c478bd9Sstevel@tonic-gate }
22787c478bd9Sstevel@tonic-gate 
22797c478bd9Sstevel@tonic-gate /*
22807c478bd9Sstevel@tonic-gate  * Empty a queue.
22817c478bd9Sstevel@tonic-gate  * If flag is set, remove all messages.  Otherwise, remove
22827c478bd9Sstevel@tonic-gate  * only non-control messages.  If queue falls below its low
22837c478bd9Sstevel@tonic-gate  * water mark, and QWANTW is set, enable the nearest upstream
22847c478bd9Sstevel@tonic-gate  * service procedure.
22857c478bd9Sstevel@tonic-gate  *
22867c478bd9Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
22877c478bd9Sstevel@tonic-gate  * code one difference was discovered. flushq did not have a check
22887c478bd9Sstevel@tonic-gate  * for q_lowat == 0 in the backenabling test.
22897c478bd9Sstevel@tonic-gate  *
22907c478bd9Sstevel@tonic-gate  * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed
22917c478bd9Sstevel@tonic-gate  * if one exists on the queue.
22927c478bd9Sstevel@tonic-gate  */
22937c478bd9Sstevel@tonic-gate void
flushq_common(queue_t * q,int flag,int pcproto_flag)22947c478bd9Sstevel@tonic-gate flushq_common(queue_t *q, int flag, int pcproto_flag)
22957c478bd9Sstevel@tonic-gate {
22967c478bd9Sstevel@tonic-gate 	mblk_t *mp, *nmp;
22977c478bd9Sstevel@tonic-gate 	qband_t *qbp;
22987c478bd9Sstevel@tonic-gate 	int backenab = 0;
22997c478bd9Sstevel@tonic-gate 	unsigned char bpri;
23007c478bd9Sstevel@tonic-gate 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
23017c478bd9Sstevel@tonic-gate 
23027c478bd9Sstevel@tonic-gate 	if (q->q_first == NULL)
23037c478bd9Sstevel@tonic-gate 		return;
23047c478bd9Sstevel@tonic-gate 
23057c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
23067c478bd9Sstevel@tonic-gate 	mp = q->q_first;
23077c478bd9Sstevel@tonic-gate 	q->q_first = NULL;
23087c478bd9Sstevel@tonic-gate 	q->q_last = NULL;
23097c478bd9Sstevel@tonic-gate 	q->q_count = 0;
23107c478bd9Sstevel@tonic-gate 	q->q_mblkcnt = 0;
23117c478bd9Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
23127c478bd9Sstevel@tonic-gate 		qbp->qb_first = NULL;
23137c478bd9Sstevel@tonic-gate 		qbp->qb_last = NULL;
23147c478bd9Sstevel@tonic-gate 		qbp->qb_count = 0;
23157c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt = 0;
23167c478bd9Sstevel@tonic-gate 		qbp->qb_flag &= ~QB_FULL;
23177c478bd9Sstevel@tonic-gate 	}
23187c478bd9Sstevel@tonic-gate 	q->q_flag &= ~QFULL;
23197c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
23207c478bd9Sstevel@tonic-gate 	while (mp) {
23217c478bd9Sstevel@tonic-gate 		nmp = mp->b_next;
23227c478bd9Sstevel@tonic-gate 		mp->b_next = mp->b_prev = NULL;
23237c478bd9Sstevel@tonic-gate 
23247e12ceb3SToomas Soome 		STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, 0);
23257c478bd9Sstevel@tonic-gate 
23267c478bd9Sstevel@tonic-gate 		if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO))
23277c478bd9Sstevel@tonic-gate 			(void) putq(q, mp);
23287c478bd9Sstevel@tonic-gate 		else if (flag || datamsg(mp->b_datap->db_type))
23297c478bd9Sstevel@tonic-gate 			freemsg(mp);
23307c478bd9Sstevel@tonic-gate 		else
23317c478bd9Sstevel@tonic-gate 			(void) putq(q, mp);
23327c478bd9Sstevel@tonic-gate 		mp = nmp;
23337c478bd9Sstevel@tonic-gate 	}
23347c478bd9Sstevel@tonic-gate 	bpri = 1;
23357c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
23367c478bd9Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
23377c478bd9Sstevel@tonic-gate 		if ((qbp->qb_flag & QB_WANTW) &&
23387c478bd9Sstevel@tonic-gate 		    (((qbp->qb_count < qbp->qb_lowat) &&
23397c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_lowat)) ||
23407c478bd9Sstevel@tonic-gate 		    qbp->qb_lowat == 0)) {
23417c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
23427c478bd9Sstevel@tonic-gate 			backenab = 1;
23437c478bd9Sstevel@tonic-gate 			qbf[bpri] = 1;
23447c478bd9Sstevel@tonic-gate 		} else
23457c478bd9Sstevel@tonic-gate 			qbf[bpri] = 0;
23467c478bd9Sstevel@tonic-gate 		bpri++;
23477c478bd9Sstevel@tonic-gate 	}
23487c478bd9Sstevel@tonic-gate 	ASSERT(bpri == (unsigned char)(q->q_nband + 1));
23497c478bd9Sstevel@tonic-gate 	if ((q->q_flag & QWANTW) &&
23507c478bd9Sstevel@tonic-gate 	    (((q->q_count < q->q_lowat) &&
23517c478bd9Sstevel@tonic-gate 	    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
23527c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTW;
23537c478bd9Sstevel@tonic-gate 		backenab = 1;
23547c478bd9Sstevel@tonic-gate 		qbf[0] = 1;
23557c478bd9Sstevel@tonic-gate 	} else
23567c478bd9Sstevel@tonic-gate 		qbf[0] = 0;
23577c478bd9Sstevel@tonic-gate 
23587c478bd9Sstevel@tonic-gate 	/*
23597c478bd9Sstevel@tonic-gate 	 * If any band can now be written to, and there is a writer
23607c478bd9Sstevel@tonic-gate 	 * for that band, then backenable the closest service procedure.
23617c478bd9Sstevel@tonic-gate 	 */
23627c478bd9Sstevel@tonic-gate 	if (backenab) {
23637c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
23647c478bd9Sstevel@tonic-gate 		for (bpri = q->q_nband; bpri != 0; bpri--)
23657c478bd9Sstevel@tonic-gate 			if (qbf[bpri])
2366116094b2Smicheng 				backenable(q, bpri);
23677c478bd9Sstevel@tonic-gate 		if (qbf[0])
23687c478bd9Sstevel@tonic-gate 			backenable(q, 0);
23697c478bd9Sstevel@tonic-gate 	} else
23707c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
23717c478bd9Sstevel@tonic-gate }
23727c478bd9Sstevel@tonic-gate 
23737c478bd9Sstevel@tonic-gate /*
23747c478bd9Sstevel@tonic-gate  * The real flushing takes place in flushq_common. This is done so that
23757c478bd9Sstevel@tonic-gate  * a flag which specifies whether or not M_PCPROTO messages should be flushed
23767c478bd9Sstevel@tonic-gate  * or not. Currently the only place that uses this flag is the stream head.
23777c478bd9Sstevel@tonic-gate  */
23787c478bd9Sstevel@tonic-gate void
flushq(queue_t * q,int flag)23797c478bd9Sstevel@tonic-gate flushq(queue_t *q, int flag)
23807c478bd9Sstevel@tonic-gate {
23817c478bd9Sstevel@tonic-gate 	flushq_common(q, flag, 0);
23827c478bd9Sstevel@tonic-gate }
23837c478bd9Sstevel@tonic-gate 
23847c478bd9Sstevel@tonic-gate /*
23857c478bd9Sstevel@tonic-gate  * Flush the queue of messages of the given priority band.
23867c478bd9Sstevel@tonic-gate  * There is some duplication of code between flushq and flushband.
23877c478bd9Sstevel@tonic-gate  * This is because we want to optimize the code as much as possible.
23887c478bd9Sstevel@tonic-gate  * The assumption is that there will be more messages in the normal
23897c478bd9Sstevel@tonic-gate  * (priority 0) band than in any other.
23907c478bd9Sstevel@tonic-gate  *
23917c478bd9Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
23927c478bd9Sstevel@tonic-gate  * code one difference was discovered. flushband had an extra check for
23937c478bd9Sstevel@tonic-gate  * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0
23947c478bd9Sstevel@tonic-gate  * case. That check does not match the man page for flushband and was not
23957c478bd9Sstevel@tonic-gate  * in the strrput flush code hence it was removed.
23967c478bd9Sstevel@tonic-gate  */
23977c478bd9Sstevel@tonic-gate void
flushband(queue_t * q,unsigned char pri,int flag)23987c478bd9Sstevel@tonic-gate flushband(queue_t *q, unsigned char pri, int flag)
23997c478bd9Sstevel@tonic-gate {
24007c478bd9Sstevel@tonic-gate 	mblk_t *mp;
24017c478bd9Sstevel@tonic-gate 	mblk_t *nmp;
24027c478bd9Sstevel@tonic-gate 	mblk_t *last;
24037c478bd9Sstevel@tonic-gate 	qband_t *qbp;
24047c478bd9Sstevel@tonic-gate 	int band;
24057c478bd9Sstevel@tonic-gate 
24067c478bd9Sstevel@tonic-gate 	ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL));
24077c478bd9Sstevel@tonic-gate 	if (pri > q->q_nband) {
24087c478bd9Sstevel@tonic-gate 		return;
24097c478bd9Sstevel@tonic-gate 	}
24107c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
24117c478bd9Sstevel@tonic-gate 	if (pri == 0) {
24127c478bd9Sstevel@tonic-gate 		mp = q->q_first;
24137c478bd9Sstevel@tonic-gate 		q->q_first = NULL;
24147c478bd9Sstevel@tonic-gate 		q->q_last = NULL;
24157c478bd9Sstevel@tonic-gate 		q->q_count = 0;
24167c478bd9Sstevel@tonic-gate 		q->q_mblkcnt = 0;
24177c478bd9Sstevel@tonic-gate 		for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
24187c478bd9Sstevel@tonic-gate 			qbp->qb_first = NULL;
24197c478bd9Sstevel@tonic-gate 			qbp->qb_last = NULL;
24207c478bd9Sstevel@tonic-gate 			qbp->qb_count = 0;
24217c478bd9Sstevel@tonic-gate 			qbp->qb_mblkcnt = 0;
24227c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
24237c478bd9Sstevel@tonic-gate 		}
24247c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QFULL;
24257c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
24267c478bd9Sstevel@tonic-gate 		while (mp) {
24277c478bd9Sstevel@tonic-gate 			nmp = mp->b_next;
24287c478bd9Sstevel@tonic-gate 			mp->b_next = mp->b_prev = NULL;
24297c478bd9Sstevel@tonic-gate 			if ((mp->b_band == 0) &&
24307c478bd9Sstevel@tonic-gate 			    ((flag == FLUSHALL) ||
24317c478bd9Sstevel@tonic-gate 			    datamsg(mp->b_datap->db_type)))
24327c478bd9Sstevel@tonic-gate 				freemsg(mp);
24337c478bd9Sstevel@tonic-gate 			else
24347c478bd9Sstevel@tonic-gate 				(void) putq(q, mp);
24357c478bd9Sstevel@tonic-gate 			mp = nmp;
24367c478bd9Sstevel@tonic-gate 		}
24377c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
24387c478bd9Sstevel@tonic-gate 		if ((q->q_flag & QWANTW) &&
24397c478bd9Sstevel@tonic-gate 		    (((q->q_count < q->q_lowat) &&
24407c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
24417c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
24427c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
24437c478bd9Sstevel@tonic-gate 
2444116094b2Smicheng 			backenable(q, pri);
24457c478bd9Sstevel@tonic-gate 		} else
24467c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
24477c478bd9Sstevel@tonic-gate 	} else {	/* pri != 0 */
24487c478bd9Sstevel@tonic-gate 		boolean_t flushed = B_FALSE;
24497c478bd9Sstevel@tonic-gate 		band = pri;
24507c478bd9Sstevel@tonic-gate 
24517c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
24527c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
24537c478bd9Sstevel@tonic-gate 		while (--band > 0)
24547c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
24557c478bd9Sstevel@tonic-gate 		mp = qbp->qb_first;
24567c478bd9Sstevel@tonic-gate 		if (mp == NULL) {
24577c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
24587c478bd9Sstevel@tonic-gate 			return;
24597c478bd9Sstevel@tonic-gate 		}
24607c478bd9Sstevel@tonic-gate 		last = qbp->qb_last->b_next;
24617c478bd9Sstevel@tonic-gate 		/*
24627c478bd9Sstevel@tonic-gate 		 * rmvq_noenab() and freemsg() are called for each mblk that
24637c478bd9Sstevel@tonic-gate 		 * meets the criteria.  The loop is executed until the last
24647c478bd9Sstevel@tonic-gate 		 * mblk has been processed.
24657c478bd9Sstevel@tonic-gate 		 */
24667c478bd9Sstevel@tonic-gate 		while (mp != last) {
24677c478bd9Sstevel@tonic-gate 			ASSERT(mp->b_band == pri);
24687c478bd9Sstevel@tonic-gate 			nmp = mp->b_next;
24697c478bd9Sstevel@tonic-gate 			if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) {
24707c478bd9Sstevel@tonic-gate 				rmvq_noenab(q, mp);
24717c478bd9Sstevel@tonic-gate 				freemsg(mp);
24727c478bd9Sstevel@tonic-gate 				flushed = B_TRUE;
24737c478bd9Sstevel@tonic-gate 			}
24747c478bd9Sstevel@tonic-gate 			mp = nmp;
24757c478bd9Sstevel@tonic-gate 		}
24767c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
24777c478bd9Sstevel@tonic-gate 
24787c478bd9Sstevel@tonic-gate 		/*
24797c478bd9Sstevel@tonic-gate 		 * If any mblk(s) has been freed, we know that qbackenable()
24807c478bd9Sstevel@tonic-gate 		 * will need to be called.
24817c478bd9Sstevel@tonic-gate 		 */
24827c478bd9Sstevel@tonic-gate 		if (flushed)
2483116094b2Smicheng 			qbackenable(q, pri);
24847c478bd9Sstevel@tonic-gate 	}
24857c478bd9Sstevel@tonic-gate }
24867c478bd9Sstevel@tonic-gate 
24877c478bd9Sstevel@tonic-gate /*
24887c478bd9Sstevel@tonic-gate  * Return 1 if the queue is not full.  If the queue is full, return
24897c478bd9Sstevel@tonic-gate  * 0 (may not put message) and set QWANTW flag (caller wants to write
24907c478bd9Sstevel@tonic-gate  * to the queue).
24917c478bd9Sstevel@tonic-gate  */
24927c478bd9Sstevel@tonic-gate int
canput(queue_t * q)24937c478bd9Sstevel@tonic-gate canput(queue_t *q)
24947c478bd9Sstevel@tonic-gate {
24957c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q);
24967c478bd9Sstevel@tonic-gate 
24977c478bd9Sstevel@tonic-gate 	/* this is for loopback transports, they should not do a canput */
24987c478bd9Sstevel@tonic-gate 	ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv));
24997c478bd9Sstevel@tonic-gate 
25007c478bd9Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
25017c478bd9Sstevel@tonic-gate 	q = q->q_nfsrv;
25027c478bd9Sstevel@tonic-gate 
25037c478bd9Sstevel@tonic-gate 	if (!(q->q_flag & QFULL)) {
25047c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
25057c478bd9Sstevel@tonic-gate 		return (1);
25067c478bd9Sstevel@tonic-gate 	}
25077c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
25087c478bd9Sstevel@tonic-gate 	if (q->q_flag & QFULL) {
25097c478bd9Sstevel@tonic-gate 		q->q_flag |= QWANTW;
25107c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
25117c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0);
25127c478bd9Sstevel@tonic-gate 		return (0);
25137c478bd9Sstevel@tonic-gate 	}
25147c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
25157c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
25167c478bd9Sstevel@tonic-gate 	return (1);
25177c478bd9Sstevel@tonic-gate }
25187c478bd9Sstevel@tonic-gate 
25197c478bd9Sstevel@tonic-gate /*
25207c478bd9Sstevel@tonic-gate  * This is the new canput for use with priority bands.  Return 1 if the
25217c478bd9Sstevel@tonic-gate  * band is not full.  If the band is full, return 0 (may not put message)
25227c478bd9Sstevel@tonic-gate  * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to
25237c478bd9Sstevel@tonic-gate  * write to the queue).
25247c478bd9Sstevel@tonic-gate  */
25257c478bd9Sstevel@tonic-gate int
bcanput(queue_t * q,unsigned char pri)25267c478bd9Sstevel@tonic-gate bcanput(queue_t *q, unsigned char pri)
25277c478bd9Sstevel@tonic-gate {
25287c478bd9Sstevel@tonic-gate 	qband_t *qbp;
25297c478bd9Sstevel@tonic-gate 
25307c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri);
25317c478bd9Sstevel@tonic-gate 	if (!q)
25327c478bd9Sstevel@tonic-gate 		return (0);
25337c478bd9Sstevel@tonic-gate 
25347c478bd9Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
25357c478bd9Sstevel@tonic-gate 	q = q->q_nfsrv;
25367c478bd9Sstevel@tonic-gate 
25377c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
25387c478bd9Sstevel@tonic-gate 	if (pri == 0) {
25397c478bd9Sstevel@tonic-gate 		if (q->q_flag & QFULL) {
25407c478bd9Sstevel@tonic-gate 			q->q_flag |= QWANTW;
25417c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
25427c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
25437c478bd9Sstevel@tonic-gate 			    "bcanput:%p %X %d", q, pri, 0);
25447c478bd9Sstevel@tonic-gate 			return (0);
25457c478bd9Sstevel@tonic-gate 		}
25467c478bd9Sstevel@tonic-gate 	} else {	/* pri != 0 */
25477c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
25487c478bd9Sstevel@tonic-gate 			/*
25497c478bd9Sstevel@tonic-gate 			 * No band exists yet, so return success.
25507c478bd9Sstevel@tonic-gate 			 */
25517c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
25527c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
25537c478bd9Sstevel@tonic-gate 			    "bcanput:%p %X %d", q, pri, 1);
25547c478bd9Sstevel@tonic-gate 			return (1);
25557c478bd9Sstevel@tonic-gate 		}
25567c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
25577c478bd9Sstevel@tonic-gate 		while (--pri)
25587c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
25597c478bd9Sstevel@tonic-gate 		if (qbp->qb_flag & QB_FULL) {
25607c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_WANTW;
25617c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
25627c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
25637c478bd9Sstevel@tonic-gate 			    "bcanput:%p %X %d", q, pri, 0);
25647c478bd9Sstevel@tonic-gate 			return (0);
25657c478bd9Sstevel@tonic-gate 		}
25667c478bd9Sstevel@tonic-gate 	}
25677c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
25687c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
25697c478bd9Sstevel@tonic-gate 	    "bcanput:%p %X %d", q, pri, 1);
25707c478bd9Sstevel@tonic-gate 	return (1);
25717c478bd9Sstevel@tonic-gate }
25727c478bd9Sstevel@tonic-gate 
25737c478bd9Sstevel@tonic-gate /*
25747c478bd9Sstevel@tonic-gate  * Put a message on a queue.
25757c478bd9Sstevel@tonic-gate  *
25767c478bd9Sstevel@tonic-gate  * Messages are enqueued on a priority basis.  The priority classes
25777c478bd9Sstevel@tonic-gate  * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0),
25787c478bd9Sstevel@tonic-gate  * and B_NORMAL (type < QPCTL && band == 0).
25797c478bd9Sstevel@tonic-gate  *
25807c478bd9Sstevel@tonic-gate  * Add appropriate weighted data block sizes to queue count.
25817c478bd9Sstevel@tonic-gate  * If queue hits high water mark then set QFULL flag.
25827c478bd9Sstevel@tonic-gate  *
25837c478bd9Sstevel@tonic-gate  * If QNOENAB is not set (putq is allowed to enable the queue),
25847c478bd9Sstevel@tonic-gate  * enable the queue only if the message is PRIORITY,
25857c478bd9Sstevel@tonic-gate  * or the QWANTR flag is set (indicating that the service procedure
25867c478bd9Sstevel@tonic-gate  * is ready to read the queue.  This implies that a service
25877c478bd9Sstevel@tonic-gate  * procedure must NEVER put a high priority message back on its own
25887c478bd9Sstevel@tonic-gate  * queue, as this would result in an infinite loop (!).
25897c478bd9Sstevel@tonic-gate  */
25907c478bd9Sstevel@tonic-gate int
putq(queue_t * q,mblk_t * bp)25917c478bd9Sstevel@tonic-gate putq(queue_t *q, mblk_t *bp)
25927c478bd9Sstevel@tonic-gate {
25937c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
25947c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
25957c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
25967c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
25977c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
25987c478bd9Sstevel@tonic-gate 
25997c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
26007c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
26017c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
26027c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
26037c478bd9Sstevel@tonic-gate 	} else
26047c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
26057c478bd9Sstevel@tonic-gate 
26067c478bd9Sstevel@tonic-gate 	/*
26077c478bd9Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
26087c478bd9Sstevel@tonic-gate 	 * allocated, do so.
26097c478bd9Sstevel@tonic-gate 	 */
26107c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
26117c478bd9Sstevel@tonic-gate 		if (bp->b_band != 0)
26127c478bd9Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
26137c478bd9Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
26147c478bd9Sstevel@tonic-gate 		int i;
26157c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
26167c478bd9Sstevel@tonic-gate 
26177c478bd9Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
26187c478bd9Sstevel@tonic-gate 
26197c478bd9Sstevel@tonic-gate 			/*
26207c478bd9Sstevel@tonic-gate 			 * The qband structure for this priority band is
26217c478bd9Sstevel@tonic-gate 			 * not on the queue yet, so we have to allocate
26227c478bd9Sstevel@tonic-gate 			 * one on the fly.  It would be wasteful to
26237c478bd9Sstevel@tonic-gate 			 * associate the qband structures with every
26247c478bd9Sstevel@tonic-gate 			 * queue when the queues are allocated.  This is
26257c478bd9Sstevel@tonic-gate 			 * because most queues will only need the normal
26267c478bd9Sstevel@tonic-gate 			 * band of flow which can be described entirely
26277c478bd9Sstevel@tonic-gate 			 * by the queue itself.
26287c478bd9Sstevel@tonic-gate 			 */
26297c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
26307c478bd9Sstevel@tonic-gate 			while (*qbpp)
26317c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
26327c478bd9Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
26337c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
26347c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
26357c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
26367c478bd9Sstevel@tonic-gate 					return (0);
26377c478bd9Sstevel@tonic-gate 				}
26387c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
26397c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
26407c478bd9Sstevel@tonic-gate 				q->q_nband++;
26417c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
26427c478bd9Sstevel@tonic-gate 			}
26437c478bd9Sstevel@tonic-gate 		}
26447c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
26457c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
26467c478bd9Sstevel@tonic-gate 		i = bp->b_band;
26477c478bd9Sstevel@tonic-gate 		while (--i)
26487c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
26497c478bd9Sstevel@tonic-gate 	}
26507c478bd9Sstevel@tonic-gate 
26517c478bd9Sstevel@tonic-gate 	/*
26527c478bd9Sstevel@tonic-gate 	 * If queue is empty, add the message and initialize the pointers.
26537c478bd9Sstevel@tonic-gate 	 * Otherwise, adjust message pointers and queue pointers based on
26547c478bd9Sstevel@tonic-gate 	 * the type of the message and where it belongs on the queue.  Some
26557c478bd9Sstevel@tonic-gate 	 * code is duplicated to minimize the number of conditionals and
26567c478bd9Sstevel@tonic-gate 	 * hopefully minimize the amount of time this routine takes.
26577c478bd9Sstevel@tonic-gate 	 */
26587c478bd9Sstevel@tonic-gate 	if (!q->q_first) {
26597c478bd9Sstevel@tonic-gate 		bp->b_next = NULL;
26607c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
26617c478bd9Sstevel@tonic-gate 		q->q_first = bp;
26627c478bd9Sstevel@tonic-gate 		q->q_last = bp;
26637c478bd9Sstevel@tonic-gate 		if (qbp) {
26647c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
26657c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
26667c478bd9Sstevel@tonic-gate 		}
26677c478bd9Sstevel@tonic-gate 	} else if (!qbp) {	/* bp->b_band == 0 */
26687c478bd9Sstevel@tonic-gate 
26697c478bd9Sstevel@tonic-gate 		/*
26707c478bd9Sstevel@tonic-gate 		 * If queue class of message is less than or equal to
26717c478bd9Sstevel@tonic-gate 		 * that of the last one on the queue, tack on to the end.
26727c478bd9Sstevel@tonic-gate 		 */
26737c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
26747c478bd9Sstevel@tonic-gate 		if (mcls <= (int)queclass(tmp)) {
26757c478bd9Sstevel@tonic-gate 			bp->b_next = NULL;
26767c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
26777c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
26787c478bd9Sstevel@tonic-gate 			q->q_last = bp;
26797c478bd9Sstevel@tonic-gate 		} else {
26807c478bd9Sstevel@tonic-gate 			tmp = q->q_first;
26817c478bd9Sstevel@tonic-gate 			while ((int)queclass(tmp) >= mcls)
26827c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
26837c478bd9Sstevel@tonic-gate 
26847c478bd9Sstevel@tonic-gate 			/*
26857c478bd9Sstevel@tonic-gate 			 * Insert bp before tmp.
26867c478bd9Sstevel@tonic-gate 			 */
26877c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
26887c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
26897c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
26907c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
26917c478bd9Sstevel@tonic-gate 			else
26927c478bd9Sstevel@tonic-gate 				q->q_first = bp;
26937c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
26947c478bd9Sstevel@tonic-gate 		}
26957c478bd9Sstevel@tonic-gate 	} else {		/* bp->b_band != 0 */
26967c478bd9Sstevel@tonic-gate 		if (qbp->qb_first) {
26977c478bd9Sstevel@tonic-gate 			tmp = qbp->qb_last;
26987c478bd9Sstevel@tonic-gate 
26997c478bd9Sstevel@tonic-gate 			/*
27007c478bd9Sstevel@tonic-gate 			 * Insert bp after the last message in this band.
27017c478bd9Sstevel@tonic-gate 			 */
27027c478bd9Sstevel@tonic-gate 			bp->b_next = tmp->b_next;
27037c478bd9Sstevel@tonic-gate 			if (tmp->b_next)
27047c478bd9Sstevel@tonic-gate 				tmp->b_next->b_prev = bp;
27057c478bd9Sstevel@tonic-gate 			else
27067c478bd9Sstevel@tonic-gate 				q->q_last = bp;
27077c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
27087c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
27097c478bd9Sstevel@tonic-gate 		} else {
27107c478bd9Sstevel@tonic-gate 			tmp = q->q_last;
27117c478bd9Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
27127c478bd9Sstevel@tonic-gate 			    (bp->b_band <= tmp->b_band)) {
27137c478bd9Sstevel@tonic-gate 
27147c478bd9Sstevel@tonic-gate 				/*
27157c478bd9Sstevel@tonic-gate 				 * Tack bp on end of queue.
27167c478bd9Sstevel@tonic-gate 				 */
27177c478bd9Sstevel@tonic-gate 				bp->b_next = NULL;
27187c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp;
27197c478bd9Sstevel@tonic-gate 				tmp->b_next = bp;
27207c478bd9Sstevel@tonic-gate 				q->q_last = bp;
27217c478bd9Sstevel@tonic-gate 			} else {
27227c478bd9Sstevel@tonic-gate 				tmp = q->q_first;
27237c478bd9Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
27247c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
27257c478bd9Sstevel@tonic-gate 				while (tmp->b_band >= bp->b_band)
27267c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
27277c478bd9Sstevel@tonic-gate 
27287c478bd9Sstevel@tonic-gate 				/*
27297c478bd9Sstevel@tonic-gate 				 * Insert bp before tmp.
27307c478bd9Sstevel@tonic-gate 				 */
27317c478bd9Sstevel@tonic-gate 				bp->b_next = tmp;
27327c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
27337c478bd9Sstevel@tonic-gate 				if (tmp->b_prev)
27347c478bd9Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
27357c478bd9Sstevel@tonic-gate 				else
27367c478bd9Sstevel@tonic-gate 					q->q_first = bp;
27377c478bd9Sstevel@tonic-gate 				tmp->b_prev = bp;
27387c478bd9Sstevel@tonic-gate 			}
27397c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
27407c478bd9Sstevel@tonic-gate 		}
27417c478bd9Sstevel@tonic-gate 		qbp->qb_last = bp;
27427c478bd9Sstevel@tonic-gate 	}
27437c478bd9Sstevel@tonic-gate 
27447c478bd9Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
2745301ce41fSja97890 	bytecnt = mp_cont_len(bp, &mblkcnt);
2746ff550d0eSmasputra 
27477c478bd9Sstevel@tonic-gate 	if (qbp) {
27487c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
27497c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
27507c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
27517c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
27527c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
27537c478bd9Sstevel@tonic-gate 		}
27547c478bd9Sstevel@tonic-gate 	} else {
27557c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
27567c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
27577c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
27587c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
27597c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
27607c478bd9Sstevel@tonic-gate 		}
27617c478bd9Sstevel@tonic-gate 	}
27627c478bd9Sstevel@tonic-gate 
27637e12ceb3SToomas Soome 	STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, 0);
27647c478bd9Sstevel@tonic-gate 
27657c478bd9Sstevel@tonic-gate 	if ((mcls > QNORM) ||
27667c478bd9Sstevel@tonic-gate 	    (canenable(q) && (q->q_flag & QWANTR || bp->b_band)))
27677c478bd9Sstevel@tonic-gate 		qenable_locked(q);
27687c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
27697c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
27707c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
27717c478bd9Sstevel@tonic-gate 
27727c478bd9Sstevel@tonic-gate 	return (1);
27737c478bd9Sstevel@tonic-gate }
27747c478bd9Sstevel@tonic-gate 
27757c478bd9Sstevel@tonic-gate /*
27767c478bd9Sstevel@tonic-gate  * Put stuff back at beginning of Q according to priority order.
27777c478bd9Sstevel@tonic-gate  * See comment on putq above for details.
27787c478bd9Sstevel@tonic-gate  */
27797c478bd9Sstevel@tonic-gate int
putbq(queue_t * q,mblk_t * bp)27807c478bd9Sstevel@tonic-gate putbq(queue_t *q, mblk_t *bp)
27817c478bd9Sstevel@tonic-gate {
27827c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
27837c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
27847c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
27857c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
27867c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
27877c478bd9Sstevel@tonic-gate 
27887c478bd9Sstevel@tonic-gate 	ASSERT(q && bp);
27897c478bd9Sstevel@tonic-gate 	ASSERT(bp->b_next == NULL);
27907c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
27917c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
27927c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
27937c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
27947c478bd9Sstevel@tonic-gate 	} else
27957c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
27967c478bd9Sstevel@tonic-gate 
27977c478bd9Sstevel@tonic-gate 	/*
27987c478bd9Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
27997c478bd9Sstevel@tonic-gate 	 * allocated, do so.
28007c478bd9Sstevel@tonic-gate 	 */
28017c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
28027c478bd9Sstevel@tonic-gate 		if (bp->b_band != 0)
28037c478bd9Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
28047c478bd9Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
28057c478bd9Sstevel@tonic-gate 		int i;
28067c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
28077c478bd9Sstevel@tonic-gate 
28087c478bd9Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
28097c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
28107c478bd9Sstevel@tonic-gate 			while (*qbpp)
28117c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
28127c478bd9Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
28137c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
28147c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
28157c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
28167c478bd9Sstevel@tonic-gate 					return (0);
28177c478bd9Sstevel@tonic-gate 				}
28187c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
28197c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
28207c478bd9Sstevel@tonic-gate 				q->q_nband++;
28217c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
28227c478bd9Sstevel@tonic-gate 			}
28237c478bd9Sstevel@tonic-gate 		}
28247c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
28257c478bd9Sstevel@tonic-gate 		i = bp->b_band;
28267c478bd9Sstevel@tonic-gate 		while (--i)
28277c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
28287c478bd9Sstevel@tonic-gate 	}
28297c478bd9Sstevel@tonic-gate 
28307c478bd9Sstevel@tonic-gate 	/*
28317c478bd9Sstevel@tonic-gate 	 * If queue is empty or if message is high priority,
28327c478bd9Sstevel@tonic-gate 	 * place on the front of the queue.
28337c478bd9Sstevel@tonic-gate 	 */
28347c478bd9Sstevel@tonic-gate 	tmp = q->q_first;
28357c478bd9Sstevel@tonic-gate 	if ((!tmp) || (mcls == QPCTL)) {
28367c478bd9Sstevel@tonic-gate 		bp->b_next = tmp;
28377c478bd9Sstevel@tonic-gate 		if (tmp)
28387c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
28397c478bd9Sstevel@tonic-gate 		else
28407c478bd9Sstevel@tonic-gate 			q->q_last = bp;
28417c478bd9Sstevel@tonic-gate 		q->q_first = bp;
28427c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
28437c478bd9Sstevel@tonic-gate 		if (qbp) {
28447c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
28457c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
28467c478bd9Sstevel@tonic-gate 		}
28477c478bd9Sstevel@tonic-gate 	} else if (qbp) {	/* bp->b_band != 0 */
28487c478bd9Sstevel@tonic-gate 		tmp = qbp->qb_first;
28497c478bd9Sstevel@tonic-gate 		if (tmp) {
28507c478bd9Sstevel@tonic-gate 
28517c478bd9Sstevel@tonic-gate 			/*
28527c478bd9Sstevel@tonic-gate 			 * Insert bp before the first message in this band.
28537c478bd9Sstevel@tonic-gate 			 */
28547c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
28557c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
28567c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
28577c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
28587c478bd9Sstevel@tonic-gate 			else
28597c478bd9Sstevel@tonic-gate 				q->q_first = bp;
28607c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
28617c478bd9Sstevel@tonic-gate 		} else {
28627c478bd9Sstevel@tonic-gate 			tmp = q->q_last;
28637c478bd9Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
28647c478bd9Sstevel@tonic-gate 			    (bp->b_band < tmp->b_band)) {
28657c478bd9Sstevel@tonic-gate 
28667c478bd9Sstevel@tonic-gate 				/*
28677c478bd9Sstevel@tonic-gate 				 * Tack bp on end of queue.
28687c478bd9Sstevel@tonic-gate 				 */
28697c478bd9Sstevel@tonic-gate 				bp->b_next = NULL;
28707c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp;
28717c478bd9Sstevel@tonic-gate 				tmp->b_next = bp;
28727c478bd9Sstevel@tonic-gate 				q->q_last = bp;
28737c478bd9Sstevel@tonic-gate 			} else {
28747c478bd9Sstevel@tonic-gate 				tmp = q->q_first;
28757c478bd9Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
28767c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
28777c478bd9Sstevel@tonic-gate 				while (tmp->b_band > bp->b_band)
28787c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
28797c478bd9Sstevel@tonic-gate 
28807c478bd9Sstevel@tonic-gate 				/*
28817c478bd9Sstevel@tonic-gate 				 * Insert bp before tmp.
28827c478bd9Sstevel@tonic-gate 				 */
28837c478bd9Sstevel@tonic-gate 				bp->b_next = tmp;
28847c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
28857c478bd9Sstevel@tonic-gate 				if (tmp->b_prev)
28867c478bd9Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
28877c478bd9Sstevel@tonic-gate 				else
28887c478bd9Sstevel@tonic-gate 					q->q_first = bp;
28897c478bd9Sstevel@tonic-gate 				tmp->b_prev = bp;
28907c478bd9Sstevel@tonic-gate 			}
28917c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
28927c478bd9Sstevel@tonic-gate 		}
28937c478bd9Sstevel@tonic-gate 		qbp->qb_first = bp;
28947c478bd9Sstevel@tonic-gate 	} else {		/* bp->b_band == 0 && !QPCTL */
28957c478bd9Sstevel@tonic-gate 
28967c478bd9Sstevel@tonic-gate 		/*
28977c478bd9Sstevel@tonic-gate 		 * If the queue class or band is less than that of the last
28987c478bd9Sstevel@tonic-gate 		 * message on the queue, tack bp on the end of the queue.
28997c478bd9Sstevel@tonic-gate 		 */
29007c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
29017c478bd9Sstevel@tonic-gate 		if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) {
29027c478bd9Sstevel@tonic-gate 			bp->b_next = NULL;
29037c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
29047c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
29057c478bd9Sstevel@tonic-gate 			q->q_last = bp;
29067c478bd9Sstevel@tonic-gate 		} else {
29077c478bd9Sstevel@tonic-gate 			tmp = q->q_first;
29087c478bd9Sstevel@tonic-gate 			while (tmp->b_datap->db_type >= QPCTL)
29097c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
29107c478bd9Sstevel@tonic-gate 			while (tmp->b_band > bp->b_band)
29117c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
29127c478bd9Sstevel@tonic-gate 
29137c478bd9Sstevel@tonic-gate 			/*
29147c478bd9Sstevel@tonic-gate 			 * Insert bp before tmp.
29157c478bd9Sstevel@tonic-gate 			 */
29167c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
29177c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
29187c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
29197c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
29207c478bd9Sstevel@tonic-gate 			else
29217c478bd9Sstevel@tonic-gate 				q->q_first = bp;
29227c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
29237c478bd9Sstevel@tonic-gate 		}
29247c478bd9Sstevel@tonic-gate 	}
29257c478bd9Sstevel@tonic-gate 
29267c478bd9Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
2927301ce41fSja97890 	bytecnt = mp_cont_len(bp, &mblkcnt);
2928301ce41fSja97890 
29297c478bd9Sstevel@tonic-gate 	if (qbp) {
29307c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
29317c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
29327c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
29337c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
29347c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
29357c478bd9Sstevel@tonic-gate 		}
29367c478bd9Sstevel@tonic-gate 	} else {
29377c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
29387c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
29397c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
29407c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
29417c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
29427c478bd9Sstevel@tonic-gate 		}
29437c478bd9Sstevel@tonic-gate 	}
29447c478bd9Sstevel@tonic-gate 
29457e12ceb3SToomas Soome 	STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, 0);
29467c478bd9Sstevel@tonic-gate 
29477c478bd9Sstevel@tonic-gate 	if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR)))
29487c478bd9Sstevel@tonic-gate 		qenable_locked(q);
29497c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
29507c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
29517c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
29527c478bd9Sstevel@tonic-gate 
29537c478bd9Sstevel@tonic-gate 	return (1);
29547c478bd9Sstevel@tonic-gate }
29557c478bd9Sstevel@tonic-gate 
29567c478bd9Sstevel@tonic-gate /*
29577c478bd9Sstevel@tonic-gate  * Insert a message before an existing message on the queue.  If the
29587c478bd9Sstevel@tonic-gate  * existing message is NULL, the new messages is placed on the end of
29597c478bd9Sstevel@tonic-gate  * the queue.  The queue class of the new message is ignored.  However,
29607c478bd9Sstevel@tonic-gate  * the priority band of the new message must adhere to the following
29617c478bd9Sstevel@tonic-gate  * ordering:
29627c478bd9Sstevel@tonic-gate  *
29637c478bd9Sstevel@tonic-gate  *	emp->b_prev->b_band >= mp->b_band >= emp->b_band.
29647c478bd9Sstevel@tonic-gate  *
29657c478bd9Sstevel@tonic-gate  * All flow control parameters are updated.
29667c478bd9Sstevel@tonic-gate  *
29677c478bd9Sstevel@tonic-gate  * insq can be called with the stream frozen, but other utility functions
29687c478bd9Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
29697c478bd9Sstevel@tonic-gate  */
29707c478bd9Sstevel@tonic-gate int
insq(queue_t * q,mblk_t * emp,mblk_t * mp)29717c478bd9Sstevel@tonic-gate insq(queue_t *q, mblk_t *emp, mblk_t *mp)
29727c478bd9Sstevel@tonic-gate {
29737c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
29747c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
29757c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(mp);
29767c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
29777c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
29787c478bd9Sstevel@tonic-gate 
29797c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
29807c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
29817c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
29827c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
29837c478bd9Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
29847c478bd9Sstevel@tonic-gate 		/* Don't drop lock on exit */
29857c478bd9Sstevel@tonic-gate 		freezer = curthread;
29867c478bd9Sstevel@tonic-gate 	} else
29877c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
29887c478bd9Sstevel@tonic-gate 
29897c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
29907c478bd9Sstevel@tonic-gate 		if (mp->b_band != 0)
29917c478bd9Sstevel@tonic-gate 			mp->b_band = 0;		/* force to be correct */
29927c478bd9Sstevel@tonic-gate 		if (emp && emp->b_prev &&
29937c478bd9Sstevel@tonic-gate 		    (emp->b_prev->b_datap->db_type < QPCTL))
29947c478bd9Sstevel@tonic-gate 			goto badord;
29957c478bd9Sstevel@tonic-gate 	}
29967c478bd9Sstevel@tonic-gate 	if (emp) {
29977c478bd9Sstevel@tonic-gate 		if (((mcls == QNORM) && (mp->b_band < emp->b_band)) ||
29987c478bd9Sstevel@tonic-gate 		    (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) &&
29997c478bd9Sstevel@tonic-gate 		    (emp->b_prev->b_band < mp->b_band))) {
30007c478bd9Sstevel@tonic-gate 			goto badord;
30017c478bd9Sstevel@tonic-gate 		}
30027c478bd9Sstevel@tonic-gate 	} else {
30037c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
30047c478bd9Sstevel@tonic-gate 		if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) {
30057c478bd9Sstevel@tonic-gate badord:
30067c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
30077c478bd9Sstevel@tonic-gate 			    "insq: attempt to insert message out of order "
30087c478bd9Sstevel@tonic-gate 			    "on q %p", (void *)q);
30097c478bd9Sstevel@tonic-gate 			if (freezer != curthread)
30107c478bd9Sstevel@tonic-gate 				mutex_exit(QLOCK(q));
30117c478bd9Sstevel@tonic-gate 			return (0);
30127c478bd9Sstevel@tonic-gate 		}
30137c478bd9Sstevel@tonic-gate 	}
30147c478bd9Sstevel@tonic-gate 
30157c478bd9Sstevel@tonic-gate 	if (mp->b_band != 0) {
30167c478bd9Sstevel@tonic-gate 		int i;
30177c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
30187c478bd9Sstevel@tonic-gate 
30197c478bd9Sstevel@tonic-gate 		if (mp->b_band > q->q_nband) {
30207c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
30217c478bd9Sstevel@tonic-gate 			while (*qbpp)
30227c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
30237c478bd9Sstevel@tonic-gate 			while (mp->b_band > q->q_nband) {
30247c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
30257c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
30267c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
30277c478bd9Sstevel@tonic-gate 					return (0);
30287c478bd9Sstevel@tonic-gate 				}
30297c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
30307c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
30317c478bd9Sstevel@tonic-gate 				q->q_nband++;
30327c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
30337c478bd9Sstevel@tonic-gate 			}
30347c478bd9Sstevel@tonic-gate 		}
30357c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
30367c478bd9Sstevel@tonic-gate 		i = mp->b_band;
30377c478bd9Sstevel@tonic-gate 		while (--i)
30387c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
30397c478bd9Sstevel@tonic-gate 	}
30407c478bd9Sstevel@tonic-gate 
30417c478bd9Sstevel@tonic-gate 	if ((mp->b_next = emp) != NULL) {
30427c478bd9Sstevel@tonic-gate 		if ((mp->b_prev = emp->b_prev) != NULL)
30437c478bd9Sstevel@tonic-gate 			emp->b_prev->b_next = mp;
30447c478bd9Sstevel@tonic-gate 		else
30457c478bd9Sstevel@tonic-gate 			q->q_first = mp;
30467c478bd9Sstevel@tonic-gate 		emp->b_prev = mp;
30477c478bd9Sstevel@tonic-gate 	} else {
30487c478bd9Sstevel@tonic-gate 		if ((mp->b_prev = q->q_last) != NULL)
30497c478bd9Sstevel@tonic-gate 			q->q_last->b_next = mp;
30507c478bd9Sstevel@tonic-gate 		else
30517c478bd9Sstevel@tonic-gate 			q->q_first = mp;
30527c478bd9Sstevel@tonic-gate 		q->q_last = mp;
30537c478bd9Sstevel@tonic-gate 	}
30547c478bd9Sstevel@tonic-gate 
30557c478bd9Sstevel@tonic-gate 	/* Get mblk and byte count for q_count accounting */
3056301ce41fSja97890 	bytecnt = mp_cont_len(mp, &mblkcnt);
30577c478bd9Sstevel@tonic-gate 
30587c478bd9Sstevel@tonic-gate 	if (qbp) {	/* adjust qband pointers and count */
30597c478bd9Sstevel@tonic-gate 		if (!qbp->qb_first) {
30607c478bd9Sstevel@tonic-gate 			qbp->qb_first = mp;
30617c478bd9Sstevel@tonic-gate 			qbp->qb_last = mp;
30627c478bd9Sstevel@tonic-gate 		} else {
30637c478bd9Sstevel@tonic-gate 			if (mp->b_prev == NULL || (mp->b_prev != NULL &&
30647c478bd9Sstevel@tonic-gate 			    (mp->b_prev->b_band != mp->b_band)))
30657c478bd9Sstevel@tonic-gate 				qbp->qb_first = mp;
30667c478bd9Sstevel@tonic-gate 			else if (mp->b_next == NULL || (mp->b_next != NULL &&
30677c478bd9Sstevel@tonic-gate 			    (mp->b_next->b_band != mp->b_band)))
30687c478bd9Sstevel@tonic-gate 				qbp->qb_last = mp;
30697c478bd9Sstevel@tonic-gate 		}
30707c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
30717c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
30727c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
30737c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
30747c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
30757c478bd9Sstevel@tonic-gate 		}
30767c478bd9Sstevel@tonic-gate 	} else {
30777c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
30787c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
30797c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
30807c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
30817c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
30827c478bd9Sstevel@tonic-gate 		}
30837c478bd9Sstevel@tonic-gate 	}
30847c478bd9Sstevel@tonic-gate 
30857e12ceb3SToomas Soome 	STR_FTEVENT_MSG(mp, q, FTEV_INSQ, 0);
30867c478bd9Sstevel@tonic-gate 
30877c478bd9Sstevel@tonic-gate 	if (canenable(q) && (q->q_flag & QWANTR))
30887c478bd9Sstevel@tonic-gate 		qenable_locked(q);
30897c478bd9Sstevel@tonic-gate 
30907c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
30917c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
30927c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
30937c478bd9Sstevel@tonic-gate 
30947c478bd9Sstevel@tonic-gate 	return (1);
30957c478bd9Sstevel@tonic-gate }
30967c478bd9Sstevel@tonic-gate 
30977c478bd9Sstevel@tonic-gate /*
30987c478bd9Sstevel@tonic-gate  * Create and put a control message on queue.
30997c478bd9Sstevel@tonic-gate  */
31007c478bd9Sstevel@tonic-gate int
putctl(queue_t * q,int type)31017c478bd9Sstevel@tonic-gate putctl(queue_t *q, int type)
31027c478bd9Sstevel@tonic-gate {
31037c478bd9Sstevel@tonic-gate 	mblk_t *bp;
31047c478bd9Sstevel@tonic-gate 
31057c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
31067c478bd9Sstevel@tonic-gate 	    (bp = allocb_tryhard(0)) == NULL)
31077c478bd9Sstevel@tonic-gate 		return (0);
31087c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char) type;
31097c478bd9Sstevel@tonic-gate 
31107c478bd9Sstevel@tonic-gate 	put(q, bp);
31117c478bd9Sstevel@tonic-gate 
31127c478bd9Sstevel@tonic-gate 	return (1);
31137c478bd9Sstevel@tonic-gate }
31147c478bd9Sstevel@tonic-gate 
31157c478bd9Sstevel@tonic-gate /*
31167c478bd9Sstevel@tonic-gate  * Control message with a single-byte parameter
31177c478bd9Sstevel@tonic-gate  */
31187c478bd9Sstevel@tonic-gate int
putctl1(queue_t * q,int type,int param)31197c478bd9Sstevel@tonic-gate putctl1(queue_t *q, int type, int param)
31207c478bd9Sstevel@tonic-gate {
31217c478bd9Sstevel@tonic-gate 	mblk_t *bp;
31227c478bd9Sstevel@tonic-gate 
31237c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
31247c478bd9Sstevel@tonic-gate 	    (bp = allocb_tryhard(1)) == NULL)
31257c478bd9Sstevel@tonic-gate 		return (0);
31267c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
31277c478bd9Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
31287c478bd9Sstevel@tonic-gate 
31297c478bd9Sstevel@tonic-gate 	put(q, bp);
31307c478bd9Sstevel@tonic-gate 
31317c478bd9Sstevel@tonic-gate 	return (1);
31327c478bd9Sstevel@tonic-gate }
31337c478bd9Sstevel@tonic-gate 
31347c478bd9Sstevel@tonic-gate int
putnextctl1(queue_t * q,int type,int param)31357c478bd9Sstevel@tonic-gate putnextctl1(queue_t *q, int type, int param)
31367c478bd9Sstevel@tonic-gate {
31377c478bd9Sstevel@tonic-gate 	mblk_t *bp;
31387c478bd9Sstevel@tonic-gate 
31397c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
31407c478bd9Sstevel@tonic-gate 	    ((bp = allocb_tryhard(1)) == NULL))
31417c478bd9Sstevel@tonic-gate 		return (0);
31427c478bd9Sstevel@tonic-gate 
31437c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
31447c478bd9Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
31457c478bd9Sstevel@tonic-gate 
31467c478bd9Sstevel@tonic-gate 	putnext(q, bp);
31477c478bd9Sstevel@tonic-gate 
31487c478bd9Sstevel@tonic-gate 	return (1);
31497c478bd9Sstevel@tonic-gate }
31507c478bd9Sstevel@tonic-gate 
31517c478bd9Sstevel@tonic-gate int
putnextctl(queue_t * q,int type)31527c478bd9Sstevel@tonic-gate putnextctl(queue_t *q, int type)
31537c478bd9Sstevel@tonic-gate {
31547c478bd9Sstevel@tonic-gate 	mblk_t *bp;
31557c478bd9Sstevel@tonic-gate 
31567c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
31577c478bd9Sstevel@tonic-gate 	    ((bp = allocb_tryhard(0)) == NULL))
31587c478bd9Sstevel@tonic-gate 		return (0);
31597c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
31607c478bd9Sstevel@tonic-gate 
31617c478bd9Sstevel@tonic-gate 	putnext(q, bp);
31627c478bd9Sstevel@tonic-gate 
31637c478bd9Sstevel@tonic-gate 	return (1);
31647c478bd9Sstevel@tonic-gate }
31657c478bd9Sstevel@tonic-gate 
31667c478bd9Sstevel@tonic-gate /*
31677c478bd9Sstevel@tonic-gate  * Return the queue upstream from this one
31687c478bd9Sstevel@tonic-gate  */
31697c478bd9Sstevel@tonic-gate queue_t *
backq(queue_t * q)31707c478bd9Sstevel@tonic-gate backq(queue_t *q)
31717c478bd9Sstevel@tonic-gate {
31727c478bd9Sstevel@tonic-gate 	q = _OTHERQ(q);
31737c478bd9Sstevel@tonic-gate 	if (q->q_next) {
31747c478bd9Sstevel@tonic-gate 		q = q->q_next;
31757c478bd9Sstevel@tonic-gate 		return (_OTHERQ(q));
31767c478bd9Sstevel@tonic-gate 	}
31777c478bd9Sstevel@tonic-gate 	return (NULL);
31787c478bd9Sstevel@tonic-gate }
31797c478bd9Sstevel@tonic-gate 
31807c478bd9Sstevel@tonic-gate /*
31817c478bd9Sstevel@tonic-gate  * Send a block back up the queue in reverse from this
31827c478bd9Sstevel@tonic-gate  * one (e.g. to respond to ioctls)
31837c478bd9Sstevel@tonic-gate  */
31847c478bd9Sstevel@tonic-gate void
qreply(queue_t * q,mblk_t * bp)31857c478bd9Sstevel@tonic-gate qreply(queue_t *q, mblk_t *bp)
31867c478bd9Sstevel@tonic-gate {
31877c478bd9Sstevel@tonic-gate 	ASSERT(q && bp);
31887c478bd9Sstevel@tonic-gate 
31897c478bd9Sstevel@tonic-gate 	putnext(_OTHERQ(q), bp);
31907c478bd9Sstevel@tonic-gate }
31917c478bd9Sstevel@tonic-gate 
31927c478bd9Sstevel@tonic-gate /*
31937c478bd9Sstevel@tonic-gate  * Streams Queue Scheduling
31947c478bd9Sstevel@tonic-gate  *
31957c478bd9Sstevel@tonic-gate  * Queues are enabled through qenable() when they have messages to
31967c478bd9Sstevel@tonic-gate  * process.  They are serviced by queuerun(), which runs each enabled
31977c478bd9Sstevel@tonic-gate  * queue's service procedure.  The call to queuerun() is processor
31987c478bd9Sstevel@tonic-gate  * dependent - the general principle is that it be run whenever a queue
31997c478bd9Sstevel@tonic-gate  * is enabled but before returning to user level.  For system calls,
32007c478bd9Sstevel@tonic-gate  * the function runqueues() is called if their action causes a queue
32017c478bd9Sstevel@tonic-gate  * to be enabled.  For device interrupts, queuerun() should be
32027c478bd9Sstevel@tonic-gate  * called before returning from the last level of interrupt.  Beyond
32037c478bd9Sstevel@tonic-gate  * this, no timing assumptions should be made about queue scheduling.
32047c478bd9Sstevel@tonic-gate  */
32057c478bd9Sstevel@tonic-gate 
32067c478bd9Sstevel@tonic-gate /*
32077c478bd9Sstevel@tonic-gate  * Enable a queue: put it on list of those whose service procedures are
32087c478bd9Sstevel@tonic-gate  * ready to run and set up the scheduling mechanism.
32097c478bd9Sstevel@tonic-gate  * The broadcast is done outside the mutex -> to avoid the woken thread
32107c478bd9Sstevel@tonic-gate  * from contending with the mutex. This is OK 'cos the queue has been
32117c478bd9Sstevel@tonic-gate  * enqueued on the runlist and flagged safely at this point.
32127c478bd9Sstevel@tonic-gate  */
32137c478bd9Sstevel@tonic-gate void
qenable(queue_t * q)32147c478bd9Sstevel@tonic-gate qenable(queue_t *q)
32157c478bd9Sstevel@tonic-gate {
32167c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
32177c478bd9Sstevel@tonic-gate 	qenable_locked(q);
32187c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
32197c478bd9Sstevel@tonic-gate }
32207c478bd9Sstevel@tonic-gate /*
32217c478bd9Sstevel@tonic-gate  * Return number of messages on queue
32227c478bd9Sstevel@tonic-gate  */
32237c478bd9Sstevel@tonic-gate int
qsize(queue_t * qp)32247c478bd9Sstevel@tonic-gate qsize(queue_t *qp)
32257c478bd9Sstevel@tonic-gate {
32267c478bd9Sstevel@tonic-gate 	int count = 0;
32277c478bd9Sstevel@tonic-gate 	mblk_t *mp;
32287c478bd9Sstevel@tonic-gate 
32297c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
32307c478bd9Sstevel@tonic-gate 	for (mp = qp->q_first; mp; mp = mp->b_next)
32317c478bd9Sstevel@tonic-gate 		count++;
32327c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
32337c478bd9Sstevel@tonic-gate 	return (count);
32347c478bd9Sstevel@tonic-gate }
32357c478bd9Sstevel@tonic-gate 
32367c478bd9Sstevel@tonic-gate /*
32377c478bd9Sstevel@tonic-gate  * noenable - set queue so that putq() will not enable it.
32387c478bd9Sstevel@tonic-gate  * enableok - set queue so that putq() can enable it.
32397c478bd9Sstevel@tonic-gate  */
32407c478bd9Sstevel@tonic-gate void
noenable(queue_t * q)32417c478bd9Sstevel@tonic-gate noenable(queue_t *q)
32427c478bd9Sstevel@tonic-gate {
32437c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
32447c478bd9Sstevel@tonic-gate 	q->q_flag |= QNOENB;
32457c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
32467c478bd9Sstevel@tonic-gate }
32477c478bd9Sstevel@tonic-gate 
32487c478bd9Sstevel@tonic-gate void
enableok(queue_t * q)32497c478bd9Sstevel@tonic-gate enableok(queue_t *q)
32507c478bd9Sstevel@tonic-gate {
32517c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
32527c478bd9Sstevel@tonic-gate 	q->q_flag &= ~QNOENB;
32537c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
32547c478bd9Sstevel@tonic-gate }
32557c478bd9Sstevel@tonic-gate 
32567c478bd9Sstevel@tonic-gate /*
32577c478bd9Sstevel@tonic-gate  * Set queue fields.
32587c478bd9Sstevel@tonic-gate  */
32597c478bd9Sstevel@tonic-gate int
strqset(queue_t * q,qfields_t what,unsigned char pri,intptr_t val)32607c478bd9Sstevel@tonic-gate strqset(queue_t *q, qfields_t what, unsigned char pri, intptr_t val)
32617c478bd9Sstevel@tonic-gate {
32627c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
32637c478bd9Sstevel@tonic-gate 	queue_t	*wrq;
32647c478bd9Sstevel@tonic-gate 	int error = 0;
32657c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
32667c478bd9Sstevel@tonic-gate 
32677c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
32687c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
32697c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
32707c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
32717c478bd9Sstevel@tonic-gate 	} else
32727c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
32737c478bd9Sstevel@tonic-gate 
32747c478bd9Sstevel@tonic-gate 	if (what >= QBAD) {
32757c478bd9Sstevel@tonic-gate 		error = EINVAL;
32767c478bd9Sstevel@tonic-gate 		goto done;
32777c478bd9Sstevel@tonic-gate 	}
32787c478bd9Sstevel@tonic-gate 	if (pri != 0) {
32797c478bd9Sstevel@tonic-gate 		int i;
32807c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
32817c478bd9Sstevel@tonic-gate 
32827c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
32837c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
32847c478bd9Sstevel@tonic-gate 			while (*qbpp)
32857c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
32867c478bd9Sstevel@tonic-gate 			while (pri > q->q_nband) {
32877c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
32887c478bd9Sstevel@tonic-gate 					error = EAGAIN;
32897c478bd9Sstevel@tonic-gate 					goto done;
32907c478bd9Sstevel@tonic-gate 				}
32917c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
32927c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
32937c478bd9Sstevel@tonic-gate 				q->q_nband++;
32947c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
32957c478bd9Sstevel@tonic-gate 			}
32967c478bd9Sstevel@tonic-gate 		}
32977c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
32987c478bd9Sstevel@tonic-gate 		i = pri;
32997c478bd9Sstevel@tonic-gate 		while (--i)
33007c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
33017c478bd9Sstevel@tonic-gate 	}
33027c478bd9Sstevel@tonic-gate 	switch (what) {
33037c478bd9Sstevel@tonic-gate 
33047c478bd9Sstevel@tonic-gate 	case QHIWAT:
33057c478bd9Sstevel@tonic-gate 		if (qbp)
33067c478bd9Sstevel@tonic-gate 			qbp->qb_hiwat = (size_t)val;
33077c478bd9Sstevel@tonic-gate 		else
33087c478bd9Sstevel@tonic-gate 			q->q_hiwat = (size_t)val;
33097c478bd9Sstevel@tonic-gate 		break;
33107c478bd9Sstevel@tonic-gate 
33117c478bd9Sstevel@tonic-gate 	case QLOWAT:
33127c478bd9Sstevel@tonic-gate 		if (qbp)
33137c478bd9Sstevel@tonic-gate 			qbp->qb_lowat = (size_t)val;
33147c478bd9Sstevel@tonic-gate 		else
33157c478bd9Sstevel@tonic-gate 			q->q_lowat = (size_t)val;
33167c478bd9Sstevel@tonic-gate 		break;
33177c478bd9Sstevel@tonic-gate 
33187c478bd9Sstevel@tonic-gate 	case QMAXPSZ:
33197c478bd9Sstevel@tonic-gate 		if (qbp)
33207c478bd9Sstevel@tonic-gate 			error = EINVAL;
33217c478bd9Sstevel@tonic-gate 		else
33227c478bd9Sstevel@tonic-gate 			q->q_maxpsz = (ssize_t)val;
33237c478bd9Sstevel@tonic-gate 
33247c478bd9Sstevel@tonic-gate 		/*
33257c478bd9Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
33267c478bd9Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
33277c478bd9Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
33287c478bd9Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
33297c478bd9Sstevel@tonic-gate 		 */
33307c478bd9Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
33317c478bd9Sstevel@tonic-gate 		if (q != wrq->q_next)
33327c478bd9Sstevel@tonic-gate 			break;
33337c478bd9Sstevel@tonic-gate 
33347c478bd9Sstevel@tonic-gate 		/*
33357c478bd9Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
33367c478bd9Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
33377c478bd9Sstevel@tonic-gate 		 */
33387c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
33397c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
33407c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
33417c478bd9Sstevel@tonic-gate 		}
33427c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(wrq)));
33437c478bd9Sstevel@tonic-gate 
33447c478bd9Sstevel@tonic-gate 		if (strmsgsz != 0) {
33457c478bd9Sstevel@tonic-gate 			if (val == INFPSZ)
33467c478bd9Sstevel@tonic-gate 				val = strmsgsz;
33477c478bd9Sstevel@tonic-gate 			else  {
33487c478bd9Sstevel@tonic-gate 				if (STREAM(q)->sd_vnode->v_type == VFIFO)
33497c478bd9Sstevel@tonic-gate 					val = MIN(PIPE_BUF, val);
33507c478bd9Sstevel@tonic-gate 				else
33517c478bd9Sstevel@tonic-gate 					val = MIN(strmsgsz, val);
33527c478bd9Sstevel@tonic-gate 			}
33537c478bd9Sstevel@tonic-gate 		}
33547c478bd9Sstevel@tonic-gate 		STREAM(q)->sd_qn_maxpsz = val;
33557c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
33567c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
33577c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
33587c478bd9Sstevel@tonic-gate 		}
33597c478bd9Sstevel@tonic-gate 		break;
33607c478bd9Sstevel@tonic-gate 
33617c478bd9Sstevel@tonic-gate 	case QMINPSZ:
33627c478bd9Sstevel@tonic-gate 		if (qbp)
33637c478bd9Sstevel@tonic-gate 			error = EINVAL;
33647c478bd9Sstevel@tonic-gate 		else
33657c478bd9Sstevel@tonic-gate 			q->q_minpsz = (ssize_t)val;
33667c478bd9Sstevel@tonic-gate 
33677c478bd9Sstevel@tonic-gate 		/*
33687c478bd9Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
33697c478bd9Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
33707c478bd9Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
33717c478bd9Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
33727c478bd9Sstevel@tonic-gate 		 */
33737c478bd9Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
33747c478bd9Sstevel@tonic-gate 		if (q != wrq->q_next)
33757c478bd9Sstevel@tonic-gate 			break;
33767c478bd9Sstevel@tonic-gate 
33777c478bd9Sstevel@tonic-gate 		/*
33787c478bd9Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
33797c478bd9Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
33807c478bd9Sstevel@tonic-gate 		 */
33817c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
33827c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
33837c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
33847c478bd9Sstevel@tonic-gate 		}
33857c478bd9Sstevel@tonic-gate 		STREAM(q)->sd_qn_minpsz = (ssize_t)val;
33867c478bd9Sstevel@tonic-gate 
33877c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
33887c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
33897c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
33907c478bd9Sstevel@tonic-gate 		}
33917c478bd9Sstevel@tonic-gate 		break;
33927c478bd9Sstevel@tonic-gate 
33937c478bd9Sstevel@tonic-gate 	case QSTRUIOT:
33947c478bd9Sstevel@tonic-gate 		if (qbp)
33957c478bd9Sstevel@tonic-gate 			error = EINVAL;
33967c478bd9Sstevel@tonic-gate 		else
33977c478bd9Sstevel@tonic-gate 			q->q_struiot = (ushort_t)val;
33987c478bd9Sstevel@tonic-gate 		break;
33997c478bd9Sstevel@tonic-gate 
34007c478bd9Sstevel@tonic-gate 	case QCOUNT:
34017c478bd9Sstevel@tonic-gate 	case QFIRST:
34027c478bd9Sstevel@tonic-gate 	case QLAST:
34037c478bd9Sstevel@tonic-gate 	case QFLAG:
34047c478bd9Sstevel@tonic-gate 		error = EPERM;
34057c478bd9Sstevel@tonic-gate 		break;
34067c478bd9Sstevel@tonic-gate 
34077c478bd9Sstevel@tonic-gate 	default:
34087c478bd9Sstevel@tonic-gate 		error = EINVAL;
34097c478bd9Sstevel@tonic-gate 		break;
34107c478bd9Sstevel@tonic-gate 	}
34117c478bd9Sstevel@tonic-gate done:
34127c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
34137c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
34147c478bd9Sstevel@tonic-gate 	return (error);
34157c478bd9Sstevel@tonic-gate }
34167c478bd9Sstevel@tonic-gate 
34177c478bd9Sstevel@tonic-gate /*
34187c478bd9Sstevel@tonic-gate  * Get queue fields.
34197c478bd9Sstevel@tonic-gate  */
34207c478bd9Sstevel@tonic-gate int
strqget(queue_t * q,qfields_t what,unsigned char pri,void * valp)34217c478bd9Sstevel@tonic-gate strqget(queue_t *q, qfields_t what, unsigned char pri, void *valp)
34227c478bd9Sstevel@tonic-gate {
34237c478bd9Sstevel@tonic-gate 	qband_t		*qbp = NULL;
34247c478bd9Sstevel@tonic-gate 	int		error = 0;
34257c478bd9Sstevel@tonic-gate 	kthread_id_t	freezer;
34267c478bd9Sstevel@tonic-gate 
34277c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
34287c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
34297c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
34307c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
34317c478bd9Sstevel@tonic-gate 	} else
34327c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
34337c478bd9Sstevel@tonic-gate 	if (what >= QBAD) {
34347c478bd9Sstevel@tonic-gate 		error = EINVAL;
34357c478bd9Sstevel@tonic-gate 		goto done;
34367c478bd9Sstevel@tonic-gate 	}
34377c478bd9Sstevel@tonic-gate 	if (pri != 0) {
34387c478bd9Sstevel@tonic-gate 		int i;
34397c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
34407c478bd9Sstevel@tonic-gate 
34417c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
34427c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
34437c478bd9Sstevel@tonic-gate 			while (*qbpp)
34447c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
34457c478bd9Sstevel@tonic-gate 			while (pri > q->q_nband) {
34467c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
34477c478bd9Sstevel@tonic-gate 					error = EAGAIN;
34487c478bd9Sstevel@tonic-gate 					goto done;
34497c478bd9Sstevel@tonic-gate 				}
34507c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
34517c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
34527c478bd9Sstevel@tonic-gate 				q->q_nband++;
34537c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
34547c478bd9Sstevel@tonic-gate 			}
34557c478bd9Sstevel@tonic-gate 		}
34567c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
34577c478bd9Sstevel@tonic-gate 		i = pri;
34587c478bd9Sstevel@tonic-gate 		while (--i)
34597c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
34607c478bd9Sstevel@tonic-gate 	}
34617c478bd9Sstevel@tonic-gate 	switch (what) {
34627c478bd9Sstevel@tonic-gate 	case QHIWAT:
34637c478bd9Sstevel@tonic-gate 		if (qbp)
34647c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_hiwat;
34657c478bd9Sstevel@tonic-gate 		else
34667c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_hiwat;
34677c478bd9Sstevel@tonic-gate 		break;
34687c478bd9Sstevel@tonic-gate 
34697c478bd9Sstevel@tonic-gate 	case QLOWAT:
34707c478bd9Sstevel@tonic-gate 		if (qbp)
34717c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_lowat;
34727c478bd9Sstevel@tonic-gate 		else
34737c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_lowat;
34747c478bd9Sstevel@tonic-gate 		break;
34757c478bd9Sstevel@tonic-gate 
34767c478bd9Sstevel@tonic-gate 	case QMAXPSZ:
34777c478bd9Sstevel@tonic-gate 		if (qbp)
34787c478bd9Sstevel@tonic-gate 			error = EINVAL;
34797c478bd9Sstevel@tonic-gate 		else
34807c478bd9Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_maxpsz;
34817c478bd9Sstevel@tonic-gate 		break;
34827c478bd9Sstevel@tonic-gate 
34837c478bd9Sstevel@tonic-gate 	case QMINPSZ:
34847c478bd9Sstevel@tonic-gate 		if (qbp)
34857c478bd9Sstevel@tonic-gate 			error = EINVAL;
34867c478bd9Sstevel@tonic-gate 		else
34877c478bd9Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_minpsz;
34887c478bd9Sstevel@tonic-gate 		break;
34897c478bd9Sstevel@tonic-gate 
34907c478bd9Sstevel@tonic-gate 	case QCOUNT:
34917c478bd9Sstevel@tonic-gate 		if (qbp)
34927c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_count;
34937c478bd9Sstevel@tonic-gate 		else
34947c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_count;
34957c478bd9Sstevel@tonic-gate 		break;
34967c478bd9Sstevel@tonic-gate 
34977c478bd9Sstevel@tonic-gate 	case QFIRST:
34987c478bd9Sstevel@tonic-gate 		if (qbp)
34997c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_first;
35007c478bd9Sstevel@tonic-gate 		else
35017c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_first;
35027c478bd9Sstevel@tonic-gate 		break;
35037c478bd9Sstevel@tonic-gate 
35047c478bd9Sstevel@tonic-gate 	case QLAST:
35057c478bd9Sstevel@tonic-gate 		if (qbp)
35067c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_last;
35077c478bd9Sstevel@tonic-gate 		else
35087c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_last;
35097c478bd9Sstevel@tonic-gate 		break;
35107c478bd9Sstevel@tonic-gate 
35117c478bd9Sstevel@tonic-gate 	case QFLAG:
35127c478bd9Sstevel@tonic-gate 		if (qbp)
35137c478bd9Sstevel@tonic-gate 			*(uint_t *)valp = qbp->qb_flag;
35147c478bd9Sstevel@tonic-gate 		else
35157c478bd9Sstevel@tonic-gate 			*(uint_t *)valp = q->q_flag;
35167c478bd9Sstevel@tonic-gate 		break;
35177c478bd9Sstevel@tonic-gate 
35187c478bd9Sstevel@tonic-gate 	case QSTRUIOT:
35197c478bd9Sstevel@tonic-gate 		if (qbp)
35207c478bd9Sstevel@tonic-gate 			error = EINVAL;
35217c478bd9Sstevel@tonic-gate 		else
35227c478bd9Sstevel@tonic-gate 			*(short *)valp = q->q_struiot;
35237c478bd9Sstevel@tonic-gate 		break;
35247c478bd9Sstevel@tonic-gate 
35257c478bd9Sstevel@tonic-gate 	default:
35267c478bd9Sstevel@tonic-gate 		error = EINVAL;
35277c478bd9Sstevel@tonic-gate 		break;
35287c478bd9Sstevel@tonic-gate 	}
35297c478bd9Sstevel@tonic-gate done:
35307c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
35317c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
35327c478bd9Sstevel@tonic-gate 	return (error);
35337c478bd9Sstevel@tonic-gate }
35347c478bd9Sstevel@tonic-gate 
35357c478bd9Sstevel@tonic-gate /*
35367c478bd9Sstevel@tonic-gate  * Function awakes all in cvwait/sigwait/pollwait, on one of:
35377c478bd9Sstevel@tonic-gate  *	QWANTWSYNC or QWANTR or QWANTW,
35387c478bd9Sstevel@tonic-gate  *
35397c478bd9Sstevel@tonic-gate  * Note: for QWANTWSYNC/QWANTW and QWANTR, if no WSLEEPer or RSLEEPer then a
35407c478bd9Sstevel@tonic-gate  *	 deferred wakeup will be done. Also if strpoll() in progress then a
35417c478bd9Sstevel@tonic-gate  *	 deferred pollwakeup will be done.
35427c478bd9Sstevel@tonic-gate  */
35437c478bd9Sstevel@tonic-gate void
strwakeq(queue_t * q,int flag)35447c478bd9Sstevel@tonic-gate strwakeq(queue_t *q, int flag)
35457c478bd9Sstevel@tonic-gate {
35467c478bd9Sstevel@tonic-gate 	stdata_t	*stp = STREAM(q);
35477c478bd9Sstevel@tonic-gate 	pollhead_t	*pl;
35487c478bd9Sstevel@tonic-gate 
35497c478bd9Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
35507c478bd9Sstevel@tonic-gate 	pl = &stp->sd_pollist;
35517c478bd9Sstevel@tonic-gate 	if (flag & QWANTWSYNC) {
35527c478bd9Sstevel@tonic-gate 		ASSERT(!(q->q_flag & QREADR));
35537c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
35547c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
35557c478bd9Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
35567c478bd9Sstevel@tonic-gate 		} else {
35577c478bd9Sstevel@tonic-gate 			stp->sd_wakeq |= WSLEEP;
35587c478bd9Sstevel@tonic-gate 		}
35597c478bd9Sstevel@tonic-gate 
35607c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
35617c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
35627c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
35637c478bd9Sstevel@tonic-gate 
35647c478bd9Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
35657c478bd9Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
35667c478bd9Sstevel@tonic-gate 	} else if (flag & QWANTR) {
35677c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & RSLEEP) {
35687c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~RSLEEP;
35697c478bd9Sstevel@tonic-gate 			cv_broadcast(&_RD(stp->sd_wrq)->q_wait);
35707c478bd9Sstevel@tonic-gate 		} else {
35717c478bd9Sstevel@tonic-gate 			stp->sd_wakeq |= RSLEEP;
35727c478bd9Sstevel@tonic-gate 		}
35737c478bd9Sstevel@tonic-gate 
35747c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
35757c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLIN | POLLRDNORM);
35767c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
35777c478bd9Sstevel@tonic-gate 
35787c478bd9Sstevel@tonic-gate 		{
35797c478bd9Sstevel@tonic-gate 			int events = stp->sd_sigflags & (S_INPUT | S_RDNORM);
35807c478bd9Sstevel@tonic-gate 
35817c478bd9Sstevel@tonic-gate 			if (events)
35827c478bd9Sstevel@tonic-gate 				strsendsig(stp->sd_siglist, events, 0, 0);
35837c478bd9Sstevel@tonic-gate 		}
35847c478bd9Sstevel@tonic-gate 	} else {
35857c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
35867c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
35877c478bd9Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
35887c478bd9Sstevel@tonic-gate 		}
35897c478bd9Sstevel@tonic-gate 
35907c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
35917c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
35927c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
35937c478bd9Sstevel@tonic-gate 
35947c478bd9Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
35957c478bd9Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
35967c478bd9Sstevel@tonic-gate 	}
35977c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
35987c478bd9Sstevel@tonic-gate }
35997c478bd9Sstevel@tonic-gate 
36007c478bd9Sstevel@tonic-gate int
struioget(queue_t * q,mblk_t * mp,struiod_t * dp,int noblock)36017c478bd9Sstevel@tonic-gate struioget(queue_t *q, mblk_t *mp, struiod_t *dp, int noblock)
36027c478bd9Sstevel@tonic-gate {
36037c478bd9Sstevel@tonic-gate 	stdata_t *stp = STREAM(q);
36047c478bd9Sstevel@tonic-gate 	int typ  = STRUIOT_STANDARD;
36057c478bd9Sstevel@tonic-gate 	uio_t	 *uiop = &dp->d_uio;
36067c478bd9Sstevel@tonic-gate 	dblk_t	 *dbp;
36077c478bd9Sstevel@tonic-gate 	ssize_t	 uiocnt;
36087c478bd9Sstevel@tonic-gate 	ssize_t	 cnt;
36097c478bd9Sstevel@tonic-gate 	unsigned char *ptr;
36107c478bd9Sstevel@tonic-gate 	ssize_t	 resid;
36117c478bd9Sstevel@tonic-gate 	int	 error = 0;
36127c478bd9Sstevel@tonic-gate 	on_trap_data_t otd;
36137c478bd9Sstevel@tonic-gate 	queue_t	*stwrq;
36147c478bd9Sstevel@tonic-gate 
36157c478bd9Sstevel@tonic-gate 	/*
36167c478bd9Sstevel@tonic-gate 	 * Plumbing may change while taking the type so store the
36177c478bd9Sstevel@tonic-gate 	 * queue in a temporary variable. It doesn't matter even
36187c478bd9Sstevel@tonic-gate 	 * if the we take the type from the previous plumbing,
36197c478bd9Sstevel@tonic-gate 	 * that's because if the plumbing has changed when we were
36207c478bd9Sstevel@tonic-gate 	 * holding the queue in a temporary variable, we can continue
36217c478bd9Sstevel@tonic-gate 	 * processing the message the way it would have been processed
36227c478bd9Sstevel@tonic-gate 	 * in the old plumbing, without any side effects but a bit
36237c478bd9Sstevel@tonic-gate 	 * extra processing for partial ip header checksum.
36247c478bd9Sstevel@tonic-gate 	 *
36257c478bd9Sstevel@tonic-gate 	 * This has been done to avoid holding the sd_lock which is
36267c478bd9Sstevel@tonic-gate 	 * very hot.
36277c478bd9Sstevel@tonic-gate 	 */
36287c478bd9Sstevel@tonic-gate 
36297c478bd9Sstevel@tonic-gate 	stwrq = stp->sd_struiowrq;
36307c478bd9Sstevel@tonic-gate 	if (stwrq)
36317c478bd9Sstevel@tonic-gate 		typ = stwrq->q_struiot;
36327c478bd9Sstevel@tonic-gate 
36337c478bd9Sstevel@tonic-gate 	for (; (resid = uiop->uio_resid) > 0 && mp; mp = mp->b_cont) {
36347c478bd9Sstevel@tonic-gate 		dbp = mp->b_datap;
36357c478bd9Sstevel@tonic-gate 		ptr = (uchar_t *)(mp->b_rptr + dbp->db_cksumstuff);
36367c478bd9Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
36377c478bd9Sstevel@tonic-gate 		cnt = MIN(uiocnt, uiop->uio_resid);
36387c478bd9Sstevel@tonic-gate 		if (!(dbp->db_struioflag & STRUIO_SPEC) ||
36397c478bd9Sstevel@tonic-gate 		    (dbp->db_struioflag & STRUIO_DONE) || cnt == 0) {
36407c478bd9Sstevel@tonic-gate 			/*
36417c478bd9Sstevel@tonic-gate 			 * Either this mblk has already been processed
36427c478bd9Sstevel@tonic-gate 			 * or there is no more room in this mblk (?).
36437c478bd9Sstevel@tonic-gate 			 */
36447c478bd9Sstevel@tonic-gate 			continue;
36457c478bd9Sstevel@tonic-gate 		}
36467c478bd9Sstevel@tonic-gate 		switch (typ) {
36477c478bd9Sstevel@tonic-gate 		case STRUIOT_STANDARD:
36487c478bd9Sstevel@tonic-gate 			if (noblock) {
36497c478bd9Sstevel@tonic-gate 				if (on_trap(&otd, OT_DATA_ACCESS)) {
36507c478bd9Sstevel@tonic-gate 					no_trap();
36517c478bd9Sstevel@tonic-gate 					error = EWOULDBLOCK;
36527c478bd9Sstevel@tonic-gate 					goto out;
36537c478bd9Sstevel@tonic-gate 				}
36547c478bd9Sstevel@tonic-gate 			}
36557c478bd9Sstevel@tonic-gate 			if (error = uiomove(ptr, cnt, UIO_WRITE, uiop)) {
36567c478bd9Sstevel@tonic-gate 				if (noblock)
36577c478bd9Sstevel@tonic-gate 					no_trap();
36587c478bd9Sstevel@tonic-gate 				goto out;
36597c478bd9Sstevel@tonic-gate 			}
36607c478bd9Sstevel@tonic-gate 			if (noblock)
36617c478bd9Sstevel@tonic-gate 				no_trap();
36627c478bd9Sstevel@tonic-gate 			break;
36637c478bd9Sstevel@tonic-gate 
36647c478bd9Sstevel@tonic-gate 		default:
36657c478bd9Sstevel@tonic-gate 			error = EIO;
36667c478bd9Sstevel@tonic-gate 			goto out;
36677c478bd9Sstevel@tonic-gate 		}
36687c478bd9Sstevel@tonic-gate 		dbp->db_struioflag |= STRUIO_DONE;
36697c478bd9Sstevel@tonic-gate 		dbp->db_cksumstuff += cnt;
36707c478bd9Sstevel@tonic-gate 	}
36717c478bd9Sstevel@tonic-gate out:
36727c478bd9Sstevel@tonic-gate 	if (error == EWOULDBLOCK && (resid -= uiop->uio_resid) > 0) {
36737c478bd9Sstevel@tonic-gate 		/*
36747c478bd9Sstevel@tonic-gate 		 * A fault has occured and some bytes were moved to the
36757c478bd9Sstevel@tonic-gate 		 * current mblk, the uio_t has already been updated by
36767c478bd9Sstevel@tonic-gate 		 * the appropriate uio routine, so also update the mblk
36777c478bd9Sstevel@tonic-gate 		 * to reflect this in case this same mblk chain is used
36787c478bd9Sstevel@tonic-gate 		 * again (after the fault has been handled).
36797c478bd9Sstevel@tonic-gate 		 */
36807c478bd9Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
36817c478bd9Sstevel@tonic-gate 		if (uiocnt >= resid)
36827c478bd9Sstevel@tonic-gate 			dbp->db_cksumstuff += resid;
36837c478bd9Sstevel@tonic-gate 	}
36847c478bd9Sstevel@tonic-gate 	return (error);
36857c478bd9Sstevel@tonic-gate }
36867c478bd9Sstevel@tonic-gate 
36877c478bd9Sstevel@tonic-gate /*
36887c478bd9Sstevel@tonic-gate  * Try to enter queue synchronously. Any attempt to enter a closing queue will
36897c478bd9Sstevel@tonic-gate  * fails. The qp->q_rwcnt keeps track of the number of successful entries so
36907c478bd9Sstevel@tonic-gate  * that removeq() will not try to close the queue while a thread is inside the
36917c478bd9Sstevel@tonic-gate  * queue.
36927c478bd9Sstevel@tonic-gate  */
36937c478bd9Sstevel@tonic-gate static boolean_t
rwnext_enter(queue_t * qp)36947c478bd9Sstevel@tonic-gate rwnext_enter(queue_t *qp)
36957c478bd9Sstevel@tonic-gate {
36967c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
36977c478bd9Sstevel@tonic-gate 	if (qp->q_flag & QWCLOSE) {
36987c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
36997c478bd9Sstevel@tonic-gate 		return (B_FALSE);
37007c478bd9Sstevel@tonic-gate 	}
37017c478bd9Sstevel@tonic-gate 	qp->q_rwcnt++;
37027c478bd9Sstevel@tonic-gate 	ASSERT(qp->q_rwcnt != 0);
37037c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
37047c478bd9Sstevel@tonic-gate 	return (B_TRUE);
37057c478bd9Sstevel@tonic-gate }
37067c478bd9Sstevel@tonic-gate 
37077c478bd9Sstevel@tonic-gate /*
37087c478bd9Sstevel@tonic-gate  * Decrease the count of threads running in sync stream queue and wake up any
37097c478bd9Sstevel@tonic-gate  * threads blocked in removeq().
37107c478bd9Sstevel@tonic-gate  */
37117c478bd9Sstevel@tonic-gate static void
rwnext_exit(queue_t * qp)37127c478bd9Sstevel@tonic-gate rwnext_exit(queue_t *qp)
37137c478bd9Sstevel@tonic-gate {
37147c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
37157c478bd9Sstevel@tonic-gate 	qp->q_rwcnt--;
37167c478bd9Sstevel@tonic-gate 	if (qp->q_flag & QWANTRMQSYNC) {
37177c478bd9Sstevel@tonic-gate 		qp->q_flag &= ~QWANTRMQSYNC;
37187c478bd9Sstevel@tonic-gate 		cv_broadcast(&qp->q_wait);
37197c478bd9Sstevel@tonic-gate 	}
37207c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
37217c478bd9Sstevel@tonic-gate }
37227c478bd9Sstevel@tonic-gate 
37237c478bd9Sstevel@tonic-gate /*
37247c478bd9Sstevel@tonic-gate  * The purpose of rwnext() is to call the rw procedure of the next
37257c478bd9Sstevel@tonic-gate  * (downstream) modules queue.
37267c478bd9Sstevel@tonic-gate  *
37277c478bd9Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
37287c478bd9Sstevel@tonic-gate  *
37297c478bd9Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
37307c478bd9Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue sq_count is incremented and it does
37317c478bd9Sstevel@tonic-gate  * not matter if any regular put entrypoints have been already entered. We
37327c478bd9Sstevel@tonic-gate  * can't increment one of the sq_putcounts (instead of sq_count) because
37337c478bd9Sstevel@tonic-gate  * qwait_rw won't know which counter to decrement.
37347c478bd9Sstevel@tonic-gate  *
37357c478bd9Sstevel@tonic-gate  * It would be reasonable to add the lockless FASTPUT logic.
37367c478bd9Sstevel@tonic-gate  */
37377c478bd9Sstevel@tonic-gate int
rwnext(queue_t * qp,struiod_t * dp)37387c478bd9Sstevel@tonic-gate rwnext(queue_t *qp, struiod_t *dp)
37397c478bd9Sstevel@tonic-gate {
37407c478bd9Sstevel@tonic-gate 	queue_t		*nqp;
37417c478bd9Sstevel@tonic-gate 	syncq_t		*sq;
37427c478bd9Sstevel@tonic-gate 	uint16_t	count;
37437c478bd9Sstevel@tonic-gate 	uint16_t	flags;
37447c478bd9Sstevel@tonic-gate 	struct qinit	*qi;
37457c478bd9Sstevel@tonic-gate 	int		(*proc)();
37467c478bd9Sstevel@tonic-gate 	struct stdata	*stp;
37477c478bd9Sstevel@tonic-gate 	int		isread;
37487c478bd9Sstevel@tonic-gate 	int		rval;
37497c478bd9Sstevel@tonic-gate 
37507c478bd9Sstevel@tonic-gate 	stp = STREAM(qp);
37517c478bd9Sstevel@tonic-gate 	/*
37527c478bd9Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until acquiring
37537c478bd9Sstevel@tonic-gate 	 * SQLOCK. Note that a read-side rwnext from the streamhead will
37547c478bd9Sstevel@tonic-gate 	 * already have sd_lock acquired. In either case sd_lock is always
37557c478bd9Sstevel@tonic-gate 	 * released after acquiring SQLOCK.
37567c478bd9Sstevel@tonic-gate 	 *
37577c478bd9Sstevel@tonic-gate 	 * The streamhead read-side holding sd_lock when calling rwnext is
37587c478bd9Sstevel@tonic-gate 	 * required to prevent a race condition were M_DATA mblks flowing
37597c478bd9Sstevel@tonic-gate 	 * up the read-side of the stream could be bypassed by a rwnext()
37607c478bd9Sstevel@tonic-gate 	 * down-call. In this case sd_lock acts as the streamhead perimeter.
37617c478bd9Sstevel@tonic-gate 	 */
37627c478bd9Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
37637c478bd9Sstevel@tonic-gate 		isread = 0;
37647c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
37657c478bd9Sstevel@tonic-gate 		qp = nqp->q_next;
37667c478bd9Sstevel@tonic-gate 	} else {
37677c478bd9Sstevel@tonic-gate 		isread = 1;
37687c478bd9Sstevel@tonic-gate 		if (nqp != stp->sd_wrq)
37697c478bd9Sstevel@tonic-gate 			/* Not streamhead */
37707c478bd9Sstevel@tonic-gate 			mutex_enter(&stp->sd_lock);
37717c478bd9Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
37727c478bd9Sstevel@tonic-gate 	}
37737c478bd9Sstevel@tonic-gate 	qi = qp->q_qinfo;
37747c478bd9Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_rwp)) {
37757c478bd9Sstevel@tonic-gate 		/*
37767c478bd9Sstevel@tonic-gate 		 * Not a synchronous module or no r/w procedure for this
37777c478bd9Sstevel@tonic-gate 		 * queue, so just return EINVAL and let the caller handle it.
37787c478bd9Sstevel@tonic-gate 		 */
37797c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
37807c478bd9Sstevel@tonic-gate 		return (EINVAL);
37817c478bd9Sstevel@tonic-gate 	}
37827c478bd9Sstevel@tonic-gate 
37837c478bd9Sstevel@tonic-gate 	if (rwnext_enter(qp) == B_FALSE) {
37847c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
37857c478bd9Sstevel@tonic-gate 		return (EINVAL);
37867c478bd9Sstevel@tonic-gate 	}
37877c478bd9Sstevel@tonic-gate 
37887c478bd9Sstevel@tonic-gate 	sq = qp->q_syncq;
37897c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
37907c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
37917c478bd9Sstevel@tonic-gate 	count = sq->sq_count;
37927c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
37937c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
37947c478bd9Sstevel@tonic-gate 
37957c478bd9Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
37967c478bd9Sstevel@tonic-gate 		/*
37977c478bd9Sstevel@tonic-gate 		 * if this queue is being closed, return.
37987c478bd9Sstevel@tonic-gate 		 */
37997c478bd9Sstevel@tonic-gate 		if (qp->q_flag & QWCLOSE) {
38007c478bd9Sstevel@tonic-gate 			mutex_exit(SQLOCK(sq));
38017c478bd9Sstevel@tonic-gate 			rwnext_exit(qp);
38027c478bd9Sstevel@tonic-gate 			return (EINVAL);
38037c478bd9Sstevel@tonic-gate 		}
38047c478bd9Sstevel@tonic-gate 
38057c478bd9Sstevel@tonic-gate 		/*
38067c478bd9Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
38077c478bd9Sstevel@tonic-gate 		 */
38087c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
38097c478bd9Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
38107c478bd9Sstevel@tonic-gate 		count = sq->sq_count;
38117c478bd9Sstevel@tonic-gate 		flags = sq->sq_flags;
38127c478bd9Sstevel@tonic-gate 	}
38137c478bd9Sstevel@tonic-gate 
38147c478bd9Sstevel@tonic-gate 	if (isread == 0 && stp->sd_struiowrq == NULL ||
38157c478bd9Sstevel@tonic-gate 	    isread == 1 && stp->sd_struiordq == NULL) {
38167c478bd9Sstevel@tonic-gate 		/*
38177c478bd9Sstevel@tonic-gate 		 * Stream plumbing changed while waiting for inner perimeter
38187c478bd9Sstevel@tonic-gate 		 * so just return EINVAL and let the caller handle it.
38197c478bd9Sstevel@tonic-gate 		 */
38207c478bd9Sstevel@tonic-gate 		mutex_exit(SQLOCK(sq));
38217c478bd9Sstevel@tonic-gate 		rwnext_exit(qp);
38227c478bd9Sstevel@tonic-gate 		return (EINVAL);
38237c478bd9Sstevel@tonic-gate 	}
38247c478bd9Sstevel@tonic-gate 	if (!(flags & SQ_CIPUT))
38257c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
38267c478bd9Sstevel@tonic-gate 	sq->sq_count = count + 1;
38277c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
38287c478bd9Sstevel@tonic-gate 	/*
38297c478bd9Sstevel@tonic-gate 	 * Note: The only message ordering guarantee that rwnext() makes is
38307c478bd9Sstevel@tonic-gate 	 *	 for the write queue flow-control case. All others (r/w queue
38317c478bd9Sstevel@tonic-gate 	 *	 with q_count > 0 (or q_first != 0)) are the resposibilty of
38327c478bd9Sstevel@tonic-gate 	 *	 the queue's rw procedure. This could be genralized here buy
38337c478bd9Sstevel@tonic-gate 	 *	 running the queue's service procedure, but that wouldn't be
38347c478bd9Sstevel@tonic-gate 	 *	 the most efficent for all cases.
38357c478bd9Sstevel@tonic-gate 	 */
38367c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
38377c478bd9Sstevel@tonic-gate 	if (! isread && (qp->q_flag & QFULL)) {
38387c478bd9Sstevel@tonic-gate 		/*
38397c478bd9Sstevel@tonic-gate 		 * Write queue may be flow controlled. If so,
38407c478bd9Sstevel@tonic-gate 		 * mark the queue for wakeup when it's not.
38417c478bd9Sstevel@tonic-gate 		 */
38427c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(qp));
38437c478bd9Sstevel@tonic-gate 		if (qp->q_flag & QFULL) {
38447c478bd9Sstevel@tonic-gate 			qp->q_flag |= QWANTWSYNC;
38457c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(qp));
38467c478bd9Sstevel@tonic-gate 			rval = EWOULDBLOCK;
38477c478bd9Sstevel@tonic-gate 			goto out;
38487c478bd9Sstevel@tonic-gate 		}
38497c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
38507c478bd9Sstevel@tonic-gate 	}
38517c478bd9Sstevel@tonic-gate 
38527c478bd9Sstevel@tonic-gate 	if (! isread && dp->d_mp)
38537c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, nqp, FTEV_RWNEXT, dp->d_mp->b_rptr -
38547c478bd9Sstevel@tonic-gate 		    dp->d_mp->b_datap->db_base);
38557c478bd9Sstevel@tonic-gate 
38567c478bd9Sstevel@tonic-gate 	rval = (*proc)(qp, dp);
38577c478bd9Sstevel@tonic-gate 
38587c478bd9Sstevel@tonic-gate 	if (isread && dp->d_mp)
38597c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, _RD(nqp), FTEV_RWNEXT,
38607c478bd9Sstevel@tonic-gate 		    dp->d_mp->b_rptr - dp->d_mp->b_datap->db_base);
38617c478bd9Sstevel@tonic-gate out:
38627c478bd9Sstevel@tonic-gate 	/*
38637c478bd9Sstevel@tonic-gate 	 * The queue is protected from being freed by sq_count, so it is
38647c478bd9Sstevel@tonic-gate 	 * safe to call rwnext_exit and reacquire SQLOCK(sq).
38657c478bd9Sstevel@tonic-gate 	 */
38667c478bd9Sstevel@tonic-gate 	rwnext_exit(qp);
38677c478bd9Sstevel@tonic-gate 
38687c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
38697c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
38707c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
38717c478bd9Sstevel@tonic-gate 	sq->sq_count--;
38727c478bd9Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
38737c478bd9Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
38747c478bd9Sstevel@tonic-gate 		/*
38757c478bd9Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
38767c478bd9Sstevel@tonic-gate 		 * in DEBUG kernel.
38777c478bd9Sstevel@tonic-gate 		 */
38787c478bd9Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
38797c478bd9Sstevel@tonic-gate 		return (rval);
38807c478bd9Sstevel@tonic-gate 	}
38817c478bd9Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
38827c478bd9Sstevel@tonic-gate 	/*
38837c478bd9Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
38847c478bd9Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
38857c478bd9Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
38867c478bd9Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
38877c478bd9Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
38887c478bd9Sstevel@tonic-gate 	 *
38897c478bd9Sstevel@tonic-gate 	 * I would like to make the following assertion:
38907c478bd9Sstevel@tonic-gate 	 *
38917c478bd9Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
38927c478bd9Sstevel@tonic-gate 	 *	sq->sq_count == 0);
38937c478bd9Sstevel@tonic-gate 	 *
38947c478bd9Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
38957c478bd9Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
38967c478bd9Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
38977c478bd9Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
38987c478bd9Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
38997c478bd9Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
39007c478bd9Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
39017c478bd9Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
39027c478bd9Sstevel@tonic-gate 	 * test invalid.
39037c478bd9Sstevel@tonic-gate 	 */
39047c478bd9Sstevel@tonic-gate 
39057c478bd9Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
39067c478bd9Sstevel@tonic-gate 	if (sq->sq_flags & SQ_WANTWAKEUP) {
39077c478bd9Sstevel@tonic-gate 		sq->sq_flags &= ~SQ_WANTWAKEUP;
39087c478bd9Sstevel@tonic-gate 		cv_broadcast(&sq->sq_wait);
39097c478bd9Sstevel@tonic-gate 	}
39107c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
39117c478bd9Sstevel@tonic-gate 	return (rval);
39127c478bd9Sstevel@tonic-gate }
39137c478bd9Sstevel@tonic-gate 
39147c478bd9Sstevel@tonic-gate /*
39157c478bd9Sstevel@tonic-gate  * The purpose of infonext() is to call the info procedure of the next
39167c478bd9Sstevel@tonic-gate  * (downstream) modules queue.
39177c478bd9Sstevel@tonic-gate  *
39187c478bd9Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
39197c478bd9Sstevel@tonic-gate  *
39207c478bd9Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
39217c478bd9Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue regular sq_count is incremented and
39227c478bd9Sstevel@tonic-gate  * it does not matter if any regular put entrypoints have been already
39237c478bd9Sstevel@tonic-gate  * entered.
39247c478bd9Sstevel@tonic-gate  */
39257c478bd9Sstevel@tonic-gate int
infonext(queue_t * qp,infod_t * idp)39267c478bd9Sstevel@tonic-gate infonext(queue_t *qp, infod_t *idp)
39277c478bd9Sstevel@tonic-gate {
39287c478bd9Sstevel@tonic-gate 	queue_t		*nqp;
39297c478bd9Sstevel@tonic-gate 	syncq_t		*sq;
39307c478bd9Sstevel@tonic-gate 	uint16_t	count;
39317c478bd9Sstevel@tonic-gate 	uint16_t	flags;
39327c478bd9Sstevel@tonic-gate 	struct qinit	*qi;
39337c478bd9Sstevel@tonic-gate 	int		(*proc)();
39347c478bd9Sstevel@tonic-gate 	struct stdata	*stp;
39357c478bd9Sstevel@tonic-gate 	int		rval;
39367c478bd9Sstevel@tonic-gate 
39377c478bd9Sstevel@tonic-gate 	stp = STREAM(qp);
39387c478bd9Sstevel@tonic-gate 	/*
39397c478bd9Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until
39407c478bd9Sstevel@tonic-gate 	 * acquiring SQLOCK.
39417c478bd9Sstevel@tonic-gate 	 */
39427c478bd9Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
39437c478bd9Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
39447c478bd9Sstevel@tonic-gate 		qp = nqp->q_next;
39457c478bd9Sstevel@tonic-gate 	} else {
39467c478bd9Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
39477c478bd9Sstevel@tonic-gate 	}
39487c478bd9Sstevel@tonic-gate 	qi = qp->q_qinfo;
39497c478bd9Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_infop)) {
39507c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
39517c478bd9Sstevel@tonic-gate 		return (EINVAL);
39527c478bd9Sstevel@tonic-gate 	}
39537c478bd9Sstevel@tonic-gate 	sq = qp->q_syncq;
39547c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
39557c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
39567c478bd9Sstevel@tonic-gate 	count = sq->sq_count;
39577c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
39587c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
39597c478bd9Sstevel@tonic-gate 
39607c478bd9Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
39617c478bd9Sstevel@tonic-gate 		/*
39627c478bd9Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
39637c478bd9Sstevel@tonic-gate 		 */
39647c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
39657c478bd9Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
39667c478bd9Sstevel@tonic-gate 		count = sq->sq_count;
39677c478bd9Sstevel@tonic-gate 		flags = sq->sq_flags;
39687c478bd9Sstevel@tonic-gate 	}
39697c478bd9Sstevel@tonic-gate 
39707c478bd9Sstevel@tonic-gate 	if (! (flags & SQ_CIPUT))
39717c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
39727c478bd9Sstevel@tonic-gate 	sq->sq_count = count + 1;
39737c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
39747c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
39757c478bd9Sstevel@tonic-gate 
39767c478bd9Sstevel@tonic-gate 	rval = (*proc)(qp, idp);
39777c478bd9Sstevel@tonic-gate 
39787c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
39797c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
39807c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
39817c478bd9Sstevel@tonic-gate 	sq->sq_count--;
39827c478bd9Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
39837c478bd9Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
39847c478bd9Sstevel@tonic-gate 		/*
39857c478bd9Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
39867c478bd9Sstevel@tonic-gate 		 * in DEBUG kernel.
39877c478bd9Sstevel@tonic-gate 		 */
39887c478bd9Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
39897c478bd9Sstevel@tonic-gate 		return (rval);
39907c478bd9Sstevel@tonic-gate 	}
39917c478bd9Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
39927c478bd9Sstevel@tonic-gate /*
39937c478bd9Sstevel@tonic-gate  * XXXX
39947c478bd9Sstevel@tonic-gate  * I am not certain the next comment is correct here.  I need to consider
39957c478bd9Sstevel@tonic-gate  * why the infonext is called, and if dropping SQ_EXCL unless non-CIPUT
39967c478bd9Sstevel@tonic-gate  * might cause other problems.  It just might be safer to drop it if
39977c478bd9Sstevel@tonic-gate  * !SQ_CIPUT because that is when we set it.
39987c478bd9Sstevel@tonic-gate  */
39997c478bd9Sstevel@tonic-gate 	/*
40007c478bd9Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
40017c478bd9Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
40027c478bd9Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
40037c478bd9Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
40047c478bd9Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
40057c478bd9Sstevel@tonic-gate 	 *
40067c478bd9Sstevel@tonic-gate 	 * I would like to make the following assertion:
40077c478bd9Sstevel@tonic-gate 	 *
40087c478bd9Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
40097c478bd9Sstevel@tonic-gate 	 *	sq->sq_count == 0);
40107c478bd9Sstevel@tonic-gate 	 *
40117c478bd9Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
40127c478bd9Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
40137c478bd9Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
40147c478bd9Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
40157c478bd9Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
40167c478bd9Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
40177c478bd9Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
40187c478bd9Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
40197c478bd9Sstevel@tonic-gate 	 * test invalid.
40207c478bd9Sstevel@tonic-gate 	 */
40217c478bd9Sstevel@tonic-gate 
40227c478bd9Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
40237c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
40247c478bd9Sstevel@tonic-gate 	return (rval);
40257c478bd9Sstevel@tonic-gate }
40267c478bd9Sstevel@tonic-gate 
40277c478bd9Sstevel@tonic-gate /*
40287c478bd9Sstevel@tonic-gate  * Return nonzero if the queue is responsible for struio(), else return 0.
40297c478bd9Sstevel@tonic-gate  */
40307c478bd9Sstevel@tonic-gate int
isuioq(queue_t * q)40317c478bd9Sstevel@tonic-gate isuioq(queue_t *q)
40327c478bd9Sstevel@tonic-gate {
40337c478bd9Sstevel@tonic-gate 	if (q->q_flag & QREADR)
40347c478bd9Sstevel@tonic-gate 		return (STREAM(q)->sd_struiordq == q);
40357c478bd9Sstevel@tonic-gate 	else
40367c478bd9Sstevel@tonic-gate 		return (STREAM(q)->sd_struiowrq == q);
40377c478bd9Sstevel@tonic-gate }
40387c478bd9Sstevel@tonic-gate 
40397c478bd9Sstevel@tonic-gate #if defined(__sparc)
40407c478bd9Sstevel@tonic-gate int disable_putlocks = 0;
40417c478bd9Sstevel@tonic-gate #else
40427c478bd9Sstevel@tonic-gate int disable_putlocks = 1;
40437c478bd9Sstevel@tonic-gate #endif
40447c478bd9Sstevel@tonic-gate 
40457c478bd9Sstevel@tonic-gate /*
40467c478bd9Sstevel@tonic-gate  * called by create_putlock.
40477c478bd9Sstevel@tonic-gate  */
40487c478bd9Sstevel@tonic-gate static void
create_syncq_putlocks(queue_t * q)40497c478bd9Sstevel@tonic-gate create_syncq_putlocks(queue_t *q)
40507c478bd9Sstevel@tonic-gate {
40517c478bd9Sstevel@tonic-gate 	syncq_t	*sq = q->q_syncq;
40527c478bd9Sstevel@tonic-gate 	ciputctrl_t *cip;
40537c478bd9Sstevel@tonic-gate 	int i;
40547c478bd9Sstevel@tonic-gate 
40557c478bd9Sstevel@tonic-gate 	ASSERT(sq != NULL);
40567c478bd9Sstevel@tonic-gate 
40577c478bd9Sstevel@tonic-gate 	ASSERT(disable_putlocks == 0);
40587c478bd9Sstevel@tonic-gate 	ASSERT(n_ciputctrl >= min_n_ciputctrl);
40597c478bd9Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
40607c478bd9Sstevel@tonic-gate 
40617c478bd9Sstevel@tonic-gate 	if (!(sq->sq_type & SQ_CIPUT))
40627c478bd9Sstevel@tonic-gate 		return;
40637c478bd9Sstevel@tonic-gate 
40647c478bd9Sstevel@tonic-gate 	for (i = 0; i <= 1; i++) {
40657c478bd9Sstevel@tonic-gate 		if (sq->sq_ciputctrl == NULL) {
40667c478bd9Sstevel@tonic-gate 			cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
40677c478bd9Sstevel@tonic-gate 			SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
40687c478bd9Sstevel@tonic-gate 			mutex_enter(SQLOCK(sq));
40697c478bd9Sstevel@tonic-gate 			if (sq->sq_ciputctrl != NULL) {
40707c478bd9Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
40717c478bd9Sstevel@tonic-gate 				kmem_cache_free(ciputctrl_cache, cip);
40727c478bd9Sstevel@tonic-gate 			} else {
40737c478bd9Sstevel@tonic-gate 				ASSERT(sq->sq_nciputctrl == 0);
40747c478bd9Sstevel@tonic-gate 				sq->sq_nciputctrl = n_ciputctrl - 1;
40757c478bd9Sstevel@tonic-gate 				/*
40767c478bd9Sstevel@tonic-gate 				 * putnext checks sq_ciputctrl without holding
40777c478bd9Sstevel@tonic-gate 				 * SQLOCK. if it is not NULL putnext assumes
40787c478bd9Sstevel@tonic-gate 				 * sq_nciputctrl is initialized. membar below
40797c478bd9Sstevel@tonic-gate 				 * insures that.
40807c478bd9Sstevel@tonic-gate 				 */
40817c478bd9Sstevel@tonic-gate 				membar_producer();
40827c478bd9Sstevel@tonic-gate 				sq->sq_ciputctrl = cip;
40837c478bd9Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
40847c478bd9Sstevel@tonic-gate 			}
40857c478bd9Sstevel@tonic-gate 		}
40867c478bd9Sstevel@tonic-gate 		ASSERT(sq->sq_nciputctrl == n_ciputctrl - 1);
40877c478bd9Sstevel@tonic-gate 		if (i == 1)
40887c478bd9Sstevel@tonic-gate 			break;
40897c478bd9Sstevel@tonic-gate 		q = _OTHERQ(q);
40907c478bd9Sstevel@tonic-gate 		if (!(q->q_flag & QPERQ)) {
40917c478bd9Sstevel@tonic-gate 			ASSERT(sq == q->q_syncq);
40927c478bd9Sstevel@tonic-gate 			break;
40937c478bd9Sstevel@tonic-gate 		}
40947c478bd9Sstevel@tonic-gate 		ASSERT(q->q_syncq != NULL);
40957c478bd9Sstevel@tonic-gate 		ASSERT(sq != q->q_syncq);
40967c478bd9Sstevel@tonic-gate 		sq = q->q_syncq;
40977c478bd9Sstevel@tonic-gate 		ASSERT(sq->sq_type & SQ_CIPUT);
40987c478bd9Sstevel@tonic-gate 	}
40997c478bd9Sstevel@tonic-gate }
41007c478bd9Sstevel@tonic-gate 
41017c478bd9Sstevel@tonic-gate /*
41027c478bd9Sstevel@tonic-gate  * If stream argument is 0 only create per cpu sq_putlocks/sq_putcounts for
41037c478bd9Sstevel@tonic-gate  * syncq of q. If stream argument is not 0 create per cpu stream_putlocks for
41047c478bd9Sstevel@tonic-gate  * the stream of q and per cpu sq_putlocks/sq_putcounts for all syncq's
41057c478bd9Sstevel@tonic-gate  * starting from q and down to the driver.
41067c478bd9Sstevel@tonic-gate  *
41077c478bd9Sstevel@tonic-gate  * This should be called after the affected queues are part of stream
41087c478bd9Sstevel@tonic-gate  * geometry. It should be called from driver/module open routine after
41097c478bd9Sstevel@tonic-gate  * qprocson() call. It is also called from nfs syscall where it is known that
41107c478bd9Sstevel@tonic-gate  * stream is configured and won't change its geometry during create_putlock
41117c478bd9Sstevel@tonic-gate  * call.
41127c478bd9Sstevel@tonic-gate  *
41137c478bd9Sstevel@tonic-gate  * caller normally uses 0 value for the stream argument to speed up MT putnext
41147c478bd9Sstevel@tonic-gate  * into the perimeter of q for example because its perimeter is per module
41157c478bd9Sstevel@tonic-gate  * (e.g. IP).
41167c478bd9Sstevel@tonic-gate  *
41177c478bd9Sstevel@tonic-gate  * caller normally uses non 0 value for the stream argument to hint the system
41187c478bd9Sstevel@tonic-gate  * that the stream of q is a very contended global system stream
41197c478bd9Sstevel@tonic-gate  * (e.g. NFS/UDP) and the part of the stream from q to the driver is
41207c478bd9Sstevel@tonic-gate  * particularly MT hot.
41217c478bd9Sstevel@tonic-gate  *
41227c478bd9Sstevel@tonic-gate  * Caller insures stream plumbing won't happen while we are here and therefore
41237c478bd9Sstevel@tonic-gate  * q_next can be safely used.
41247c478bd9Sstevel@tonic-gate  */
41257c478bd9Sstevel@tonic-gate 
41267c478bd9Sstevel@tonic-gate void
create_putlocks(queue_t * q,int stream)41277c478bd9Sstevel@tonic-gate create_putlocks(queue_t *q, int stream)
41287c478bd9Sstevel@tonic-gate {
41297c478bd9Sstevel@tonic-gate 	ciputctrl_t	*cip;
41307c478bd9Sstevel@tonic-gate 	struct stdata	*stp = STREAM(q);
41317c478bd9Sstevel@tonic-gate 
41327c478bd9Sstevel@tonic-gate 	q = _WR(q);
41337c478bd9Sstevel@tonic-gate 	ASSERT(stp != NULL);
41347c478bd9Sstevel@tonic-gate 
41357c478bd9Sstevel@tonic-gate 	if (disable_putlocks != 0)
41367c478bd9Sstevel@tonic-gate 		return;
41377c478bd9Sstevel@tonic-gate 
41387c478bd9Sstevel@tonic-gate 	if (n_ciputctrl < min_n_ciputctrl)
41397c478bd9Sstevel@tonic-gate 		return;
41407c478bd9Sstevel@tonic-gate 
41417c478bd9Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
41427c478bd9Sstevel@tonic-gate 
41437c478bd9Sstevel@tonic-gate 	if (stream != 0 && stp->sd_ciputctrl == NULL) {
41447c478bd9Sstevel@tonic-gate 		cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
41457c478bd9Sstevel@tonic-gate 		SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
41467c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
41477c478bd9Sstevel@tonic-gate 		if (stp->sd_ciputctrl != NULL) {
41487c478bd9Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
41497c478bd9Sstevel@tonic-gate 			kmem_cache_free(ciputctrl_cache, cip);
41507c478bd9Sstevel@tonic-gate 		} else {
41517c478bd9Sstevel@tonic-gate 			ASSERT(stp->sd_nciputctrl == 0);
41527c478bd9Sstevel@tonic-gate 			stp->sd_nciputctrl = n_ciputctrl - 1;
41537c478bd9Sstevel@tonic-gate 			/*
41547c478bd9Sstevel@tonic-gate 			 * putnext checks sd_ciputctrl without holding
41557c478bd9Sstevel@tonic-gate 			 * sd_lock. if it is not NULL putnext assumes
41567c478bd9Sstevel@tonic-gate 			 * sd_nciputctrl is initialized. membar below
41577c478bd9Sstevel@tonic-gate 			 * insures that.
41587c478bd9Sstevel@tonic-gate 			 */
41597c478bd9Sstevel@tonic-gate 			membar_producer();
41607c478bd9Sstevel@tonic-gate 			stp->sd_ciputctrl = cip;
41617c478bd9Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
41627c478bd9Sstevel@tonic-gate 		}
41637c478bd9Sstevel@tonic-gate 	}
41647c478bd9Sstevel@tonic-gate 
41657c478bd9Sstevel@tonic-gate 	ASSERT(stream == 0 || stp->sd_nciputctrl == n_ciputctrl - 1);
41667c478bd9Sstevel@tonic-gate 
41677c478bd9Sstevel@tonic-gate 	while (_SAMESTR(q)) {
41687c478bd9Sstevel@tonic-gate 		create_syncq_putlocks(q);
41697c478bd9Sstevel@tonic-gate 		if (stream == 0)
41707c478bd9Sstevel@tonic-gate 			return;
41717c478bd9Sstevel@tonic-gate 		q = q->q_next;
41727c478bd9Sstevel@tonic-gate 	}
41737c478bd9Sstevel@tonic-gate 	ASSERT(q != NULL);
41747c478bd9Sstevel@tonic-gate 	create_syncq_putlocks(q);
41757c478bd9Sstevel@tonic-gate }
41767c478bd9Sstevel@tonic-gate 
41777c478bd9Sstevel@tonic-gate /*
41787c478bd9Sstevel@tonic-gate  * STREAMS Flow Trace - record STREAMS Flow Trace events as an mblk flows
41797c478bd9Sstevel@tonic-gate  * through a stream.
41807c478bd9Sstevel@tonic-gate  *
4181a45f3f93Smeem  * Data currently record per-event is a timestamp, module/driver name,
4182a45f3f93Smeem  * downstream module/driver name, optional callstack, event type and a per
4183a45f3f93Smeem  * type datum.  Much of the STREAMS framework is instrumented for automatic
4184a45f3f93Smeem  * flow tracing (when enabled).  Events can be defined and used by STREAMS
4185a45f3f93Smeem  * modules and drivers.
41867c478bd9Sstevel@tonic-gate  *
41877c478bd9Sstevel@tonic-gate  * Global objects:
41887c478bd9Sstevel@tonic-gate  *
41897c478bd9Sstevel@tonic-gate  *	str_ftevent() - Add a flow-trace event to a dblk.
41907c478bd9Sstevel@tonic-gate  *	str_ftfree() - Free flow-trace data
41917c478bd9Sstevel@tonic-gate  *
41927c478bd9Sstevel@tonic-gate  * Local objects:
41937c478bd9Sstevel@tonic-gate  *
41947c478bd9Sstevel@tonic-gate  *	fthdr_cache - pointer to the kmem cache for trace header.
41957c478bd9Sstevel@tonic-gate  *	ftblk_cache - pointer to the kmem cache for trace data blocks.
41967c478bd9Sstevel@tonic-gate  */
41977c478bd9Sstevel@tonic-gate 
41987c478bd9Sstevel@tonic-gate int str_ftnever = 1;	/* Don't do STREAMS flow tracing */
4199a45f3f93Smeem int str_ftstack = 0;	/* Don't record event call stacks */
42007c478bd9Sstevel@tonic-gate 
42017c478bd9Sstevel@tonic-gate void
str_ftevent(fthdr_t * hp,void * p,ushort_t evnt,ushort_t data)42027c478bd9Sstevel@tonic-gate str_ftevent(fthdr_t *hp, void *p, ushort_t evnt, ushort_t data)
42037c478bd9Sstevel@tonic-gate {
42047c478bd9Sstevel@tonic-gate 	ftblk_t *bp = hp->tail;
42057c478bd9Sstevel@tonic-gate 	ftblk_t *nbp;
42067c478bd9Sstevel@tonic-gate 	ftevnt_t *ep;
42077c478bd9Sstevel@tonic-gate 	int ix, nix;
42087c478bd9Sstevel@tonic-gate 
42097c478bd9Sstevel@tonic-gate 	ASSERT(hp != NULL);
42107c478bd9Sstevel@tonic-gate 
42117c478bd9Sstevel@tonic-gate 	for (;;) {
42127c478bd9Sstevel@tonic-gate 		if ((ix = bp->ix) == FTBLK_EVNTS) {
42137c478bd9Sstevel@tonic-gate 			/*
42147c478bd9Sstevel@tonic-gate 			 * Tail doesn't have room, so need a new tail.
42157c478bd9Sstevel@tonic-gate 			 *
42167c478bd9Sstevel@tonic-gate 			 * To make this MT safe, first, allocate a new
42177c478bd9Sstevel@tonic-gate 			 * ftblk, and initialize it.  To make life a
42187c478bd9Sstevel@tonic-gate 			 * little easier, reserve the first slot (mostly
42197c478bd9Sstevel@tonic-gate 			 * by making ix = 1).  When we are finished with
42207c478bd9Sstevel@tonic-gate 			 * the initialization, CAS this pointer to the
42217c478bd9Sstevel@tonic-gate 			 * tail.  If this succeeds, this is the new
42227c478bd9Sstevel@tonic-gate 			 * "next" block.  Otherwise, another thread
42237c478bd9Sstevel@tonic-gate 			 * got here first, so free the block and start
42247c478bd9Sstevel@tonic-gate 			 * again.
42257c478bd9Sstevel@tonic-gate 			 */
4226a45f3f93Smeem 			nbp = kmem_cache_alloc(ftblk_cache, KM_NOSLEEP);
4227a45f3f93Smeem 			if (nbp == NULL) {
42287c478bd9Sstevel@tonic-gate 				/* no mem, so punt */
42297c478bd9Sstevel@tonic-gate 				str_ftnever++;
42307c478bd9Sstevel@tonic-gate 				/* free up all flow data? */
42317c478bd9Sstevel@tonic-gate 				return;
42327c478bd9Sstevel@tonic-gate 			}
42337c478bd9Sstevel@tonic-gate 			nbp->nxt = NULL;
42347c478bd9Sstevel@tonic-gate 			nbp->ix = 1;
42357c478bd9Sstevel@tonic-gate 			/*
42367c478bd9Sstevel@tonic-gate 			 * Just in case there is another thread about
42377c478bd9Sstevel@tonic-gate 			 * to get the next index, we need to make sure
42387c478bd9Sstevel@tonic-gate 			 * the value is there for it.
42397c478bd9Sstevel@tonic-gate 			 */
42407c478bd9Sstevel@tonic-gate 			membar_producer();
424175d94465SJosef 'Jeff' Sipek 			if (atomic_cas_ptr(&hp->tail, bp, nbp) == bp) {
42427c478bd9Sstevel@tonic-gate 				/* CAS was successful */
42437c478bd9Sstevel@tonic-gate 				bp->nxt = nbp;
42447c478bd9Sstevel@tonic-gate 				membar_producer();
42457c478bd9Sstevel@tonic-gate 				bp = nbp;
42467c478bd9Sstevel@tonic-gate 				ix = 0;
42477c478bd9Sstevel@tonic-gate 				goto cas_good;
42487c478bd9Sstevel@tonic-gate 			} else {
42497c478bd9Sstevel@tonic-gate 				kmem_cache_free(ftblk_cache, nbp);
42507c478bd9Sstevel@tonic-gate 				bp = hp->tail;
42517c478bd9Sstevel@tonic-gate 				continue;
42527c478bd9Sstevel@tonic-gate 			}
42537c478bd9Sstevel@tonic-gate 		}
42547c478bd9Sstevel@tonic-gate 		nix = ix + 1;
425575d94465SJosef 'Jeff' Sipek 		if (atomic_cas_32((uint32_t *)&bp->ix, ix, nix) == ix) {
42567c478bd9Sstevel@tonic-gate 		cas_good:
42577c478bd9Sstevel@tonic-gate 			if (curthread != hp->thread) {
42587c478bd9Sstevel@tonic-gate 				hp->thread = curthread;
42597c478bd9Sstevel@tonic-gate 				evnt |= FTEV_CS;
42607c478bd9Sstevel@tonic-gate 			}
42617c478bd9Sstevel@tonic-gate 			if (CPU->cpu_seqid != hp->cpu_seqid) {
42627c478bd9Sstevel@tonic-gate 				hp->cpu_seqid = CPU->cpu_seqid;
42637c478bd9Sstevel@tonic-gate 				evnt |= FTEV_PS;
42647c478bd9Sstevel@tonic-gate 			}
42657c478bd9Sstevel@tonic-gate 			ep = &bp->ev[ix];
42667c478bd9Sstevel@tonic-gate 			break;
42677c478bd9Sstevel@tonic-gate 		}
42687c478bd9Sstevel@tonic-gate 	}
42697c478bd9Sstevel@tonic-gate 
42707c478bd9Sstevel@tonic-gate 	if (evnt & FTEV_QMASK) {
42717c478bd9Sstevel@tonic-gate 		queue_t *qp = p;
42727c478bd9Sstevel@tonic-gate 
42737c478bd9Sstevel@tonic-gate 		if (!(qp->q_flag & QREADR))
42747c478bd9Sstevel@tonic-gate 			evnt |= FTEV_ISWR;
4275a45f3f93Smeem 
4276a45f3f93Smeem 		ep->mid = Q2NAME(qp);
4277a45f3f93Smeem 
4278a45f3f93Smeem 		/*
4279a45f3f93Smeem 		 * We only record the next queue name for FTEV_PUTNEXT since
4280a45f3f93Smeem 		 * that's the only time we *really* need it, and the putnext()
4281a45f3f93Smeem 		 * code ensures that qp->q_next won't vanish.  (We could use
4282a45f3f93Smeem 		 * claimstr()/releasestr() but at a performance cost.)
4283a45f3f93Smeem 		 */
4284a45f3f93Smeem 		if ((evnt & FTEV_MASK) == FTEV_PUTNEXT && qp->q_next != NULL)
4285a45f3f93Smeem 			ep->midnext = Q2NAME(qp->q_next);
4286a45f3f93Smeem 		else
4287a45f3f93Smeem 			ep->midnext = NULL;
42887c478bd9Sstevel@tonic-gate 	} else {
4289a45f3f93Smeem 		ep->mid = p;
4290a45f3f93Smeem 		ep->midnext = NULL;
42917c478bd9Sstevel@tonic-gate 	}
42927c478bd9Sstevel@tonic-gate 
4293a45f3f93Smeem 	if (ep->stk != NULL)
4294a45f3f93Smeem 		ep->stk->fs_depth = getpcstack(ep->stk->fs_stk, FTSTK_DEPTH);
4295a45f3f93Smeem 
42967c478bd9Sstevel@tonic-gate 	ep->ts = gethrtime();
42977c478bd9Sstevel@tonic-gate 	ep->evnt = evnt;
42987c478bd9Sstevel@tonic-gate 	ep->data = data;
42997c478bd9Sstevel@tonic-gate 	hp->hash = (hp->hash << 9) + hp->hash;
43007c478bd9Sstevel@tonic-gate 	hp->hash += (evnt << 16) | data;
43017c478bd9Sstevel@tonic-gate 	hp->hash += (uintptr_t)ep->mid;
43027c478bd9Sstevel@tonic-gate }
43037c478bd9Sstevel@tonic-gate 
43047c478bd9Sstevel@tonic-gate /*
43057c478bd9Sstevel@tonic-gate  * Free flow-trace data.
43067c478bd9Sstevel@tonic-gate  */
43077c478bd9Sstevel@tonic-gate void
str_ftfree(dblk_t * dbp)43087c478bd9Sstevel@tonic-gate str_ftfree(dblk_t *dbp)
43097c478bd9Sstevel@tonic-gate {
43107c478bd9Sstevel@tonic-gate 	fthdr_t *hp = dbp->db_fthdr;
43117c478bd9Sstevel@tonic-gate 	ftblk_t *bp = &hp->first;
43127c478bd9Sstevel@tonic-gate 	ftblk_t *nbp;
43137c478bd9Sstevel@tonic-gate 
43147c478bd9Sstevel@tonic-gate 	if (bp != hp->tail || bp->ix != 0) {
43157c478bd9Sstevel@tonic-gate 		/*
43167c478bd9Sstevel@tonic-gate 		 * Clear out the hash, have the tail point to itself, and free
43177c478bd9Sstevel@tonic-gate 		 * any continuation blocks.
43187c478bd9Sstevel@tonic-gate 		 */
43197c478bd9Sstevel@tonic-gate 		bp = hp->first.nxt;
43207c478bd9Sstevel@tonic-gate 		hp->tail = &hp->first;
43217c478bd9Sstevel@tonic-gate 		hp->hash = 0;
43227c478bd9Sstevel@tonic-gate 		hp->first.nxt = NULL;
43237c478bd9Sstevel@tonic-gate 		hp->first.ix = 0;
43247c478bd9Sstevel@tonic-gate 		while (bp != NULL) {
43257c478bd9Sstevel@tonic-gate 			nbp = bp->nxt;
43267c478bd9Sstevel@tonic-gate 			kmem_cache_free(ftblk_cache, bp);
43277c478bd9Sstevel@tonic-gate 			bp = nbp;
43287c478bd9Sstevel@tonic-gate 		}
43297c478bd9Sstevel@tonic-gate 	}
43307c478bd9Sstevel@tonic-gate 	kmem_cache_free(fthdr_cache, hp);
43317c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
43327c478bd9Sstevel@tonic-gate }
4333