xref: /onnv-gate/usr/src/uts/common/io/stream.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23*0Sstevel@tonic-gate /*	  All Rights Reserved  	*/
24*0Sstevel@tonic-gate 
25*0Sstevel@tonic-gate 
26*0Sstevel@tonic-gate /*
27*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28*0Sstevel@tonic-gate  * Use is subject to license terms.
29*0Sstevel@tonic-gate  */
30*0Sstevel@tonic-gate 
31*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
32*0Sstevel@tonic-gate 
33*0Sstevel@tonic-gate #include <sys/types.h>
34*0Sstevel@tonic-gate #include <sys/param.h>
35*0Sstevel@tonic-gate #include <sys/thread.h>
36*0Sstevel@tonic-gate #include <sys/sysmacros.h>
37*0Sstevel@tonic-gate #include <sys/stropts.h>
38*0Sstevel@tonic-gate #include <sys/stream.h>
39*0Sstevel@tonic-gate #include <sys/strsubr.h>
40*0Sstevel@tonic-gate #include <sys/strsun.h>
41*0Sstevel@tonic-gate #include <sys/conf.h>
42*0Sstevel@tonic-gate #include <sys/debug.h>
43*0Sstevel@tonic-gate #include <sys/cmn_err.h>
44*0Sstevel@tonic-gate #include <sys/kmem.h>
45*0Sstevel@tonic-gate #include <sys/atomic.h>
46*0Sstevel@tonic-gate #include <sys/errno.h>
47*0Sstevel@tonic-gate #include <sys/vtrace.h>
48*0Sstevel@tonic-gate #include <sys/ftrace.h>
49*0Sstevel@tonic-gate #include <sys/ontrap.h>
50*0Sstevel@tonic-gate #include <sys/multidata.h>
51*0Sstevel@tonic-gate #include <sys/multidata_impl.h>
52*0Sstevel@tonic-gate #include <sys/sdt.h>
53*0Sstevel@tonic-gate 
54*0Sstevel@tonic-gate #ifdef DEBUG
55*0Sstevel@tonic-gate #include <sys/kmem_impl.h>
56*0Sstevel@tonic-gate #endif
57*0Sstevel@tonic-gate 
58*0Sstevel@tonic-gate /*
59*0Sstevel@tonic-gate  * This file contains all the STREAMS utility routines that may
60*0Sstevel@tonic-gate  * be used by modules and drivers.
61*0Sstevel@tonic-gate  */
62*0Sstevel@tonic-gate 
63*0Sstevel@tonic-gate /*
64*0Sstevel@tonic-gate  * STREAMS message allocator: principles of operation
65*0Sstevel@tonic-gate  *
66*0Sstevel@tonic-gate  * The streams message allocator consists of all the routines that
67*0Sstevel@tonic-gate  * allocate, dup and free streams messages: allocb(), [d]esballoc[a],
68*0Sstevel@tonic-gate  * dupb(), freeb() and freemsg().  What follows is a high-level view
69*0Sstevel@tonic-gate  * of how the allocator works.
70*0Sstevel@tonic-gate  *
71*0Sstevel@tonic-gate  * Every streams message consists of one or more mblks, a dblk, and data.
72*0Sstevel@tonic-gate  * All mblks for all types of messages come from a common mblk_cache.
73*0Sstevel@tonic-gate  * The dblk and data come in several flavors, depending on how the
74*0Sstevel@tonic-gate  * message is allocated:
75*0Sstevel@tonic-gate  *
76*0Sstevel@tonic-gate  * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of
77*0Sstevel@tonic-gate  *     fixed-size dblk/data caches. For message sizes that are multiples of
78*0Sstevel@tonic-gate  *     PAGESIZE, dblks are allocated separately from the buffer.
79*0Sstevel@tonic-gate  *     The associated buffer is allocated by the constructor using kmem_alloc().
80*0Sstevel@tonic-gate  *     For all other message sizes, dblk and its associated data is allocated
81*0Sstevel@tonic-gate  *     as a single contiguous chunk of memory.
82*0Sstevel@tonic-gate  *     Objects in these caches consist of a dblk plus its associated data.
83*0Sstevel@tonic-gate  *     allocb() determines the nearest-size cache by table lookup:
84*0Sstevel@tonic-gate  *     the dblk_cache[] array provides the mapping from size to dblk cache.
85*0Sstevel@tonic-gate  *
86*0Sstevel@tonic-gate  * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by
87*0Sstevel@tonic-gate  *     kmem_alloc()'ing a buffer for the data and supplying that
88*0Sstevel@tonic-gate  *     buffer to gesballoc(), described below.
89*0Sstevel@tonic-gate  *
90*0Sstevel@tonic-gate  * (3) The four flavors of [d]esballoc[a] are all implemented by a
91*0Sstevel@tonic-gate  *     common routine, gesballoc() ("generic esballoc").  gesballoc()
92*0Sstevel@tonic-gate  *     allocates a dblk from the global dblk_esb_cache and sets db_base,
93*0Sstevel@tonic-gate  *     db_lim and db_frtnp to describe the caller-supplied buffer.
94*0Sstevel@tonic-gate  *
95*0Sstevel@tonic-gate  * While there are several routines to allocate messages, there is only
96*0Sstevel@tonic-gate  * one routine to free messages: freeb().  freeb() simply invokes the
97*0Sstevel@tonic-gate  * dblk's free method, dbp->db_free(), which is set at allocation time.
98*0Sstevel@tonic-gate  *
99*0Sstevel@tonic-gate  * dupb() creates a new reference to a message by allocating a new mblk,
100*0Sstevel@tonic-gate  * incrementing the dblk reference count and setting the dblk's free
101*0Sstevel@tonic-gate  * method to dblk_decref().  The dblk's original free method is retained
102*0Sstevel@tonic-gate  * in db_lastfree.  dblk_decref() decrements the reference count on each
103*0Sstevel@tonic-gate  * freeb().  If this is not the last reference it just frees the mblk;
104*0Sstevel@tonic-gate  * if this *is* the last reference, it restores db_free to db_lastfree,
105*0Sstevel@tonic-gate  * sets db_mblk to the current mblk (see below), and invokes db_lastfree.
106*0Sstevel@tonic-gate  *
107*0Sstevel@tonic-gate  * The implementation makes aggressive use of kmem object caching for
108*0Sstevel@tonic-gate  * maximum performance.  This makes the code simple and compact, but
109*0Sstevel@tonic-gate  * also a bit abstruse in some places.  The invariants that constitute a
110*0Sstevel@tonic-gate  * message's constructed state, described below, are more subtle than usual.
111*0Sstevel@tonic-gate  *
112*0Sstevel@tonic-gate  * Every dblk has an "attached mblk" as part of its constructed state.
113*0Sstevel@tonic-gate  * The mblk is allocated by the dblk's constructor and remains attached
114*0Sstevel@tonic-gate  * until the message is either dup'ed or pulled up.  In the dupb() case
115*0Sstevel@tonic-gate  * the mblk association doesn't matter until the last free, at which time
116*0Sstevel@tonic-gate  * dblk_decref() attaches the last mblk to the dblk.  pullupmsg() affects
117*0Sstevel@tonic-gate  * the mblk association because it swaps the leading mblks of two messages,
118*0Sstevel@tonic-gate  * so it is responsible for swapping their db_mblk pointers accordingly.
119*0Sstevel@tonic-gate  * From a constructed-state viewpoint it doesn't matter that a dblk's
120*0Sstevel@tonic-gate  * attached mblk can change while the message is allocated; all that
121*0Sstevel@tonic-gate  * matters is that the dblk has *some* attached mblk when it's freed.
122*0Sstevel@tonic-gate  *
123*0Sstevel@tonic-gate  * The sizes of the allocb() small-message caches are not magical.
124*0Sstevel@tonic-gate  * They represent a good trade-off between internal and external
125*0Sstevel@tonic-gate  * fragmentation for current workloads.  They should be reevaluated
126*0Sstevel@tonic-gate  * periodically, especially if allocations larger than DBLK_MAX_CACHE
127*0Sstevel@tonic-gate  * become common.  We use 64-byte alignment so that dblks don't
128*0Sstevel@tonic-gate  * straddle cache lines unnecessarily.
129*0Sstevel@tonic-gate  */
130*0Sstevel@tonic-gate #define	DBLK_MAX_CACHE		73728
131*0Sstevel@tonic-gate #define	DBLK_CACHE_ALIGN	64
132*0Sstevel@tonic-gate #define	DBLK_MIN_SIZE		8
133*0Sstevel@tonic-gate #define	DBLK_SIZE_SHIFT		3
134*0Sstevel@tonic-gate 
135*0Sstevel@tonic-gate #ifdef _BIG_ENDIAN
136*0Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
137*0Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
138*0Sstevel@tonic-gate #else
139*0Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
140*0Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
141*0Sstevel@tonic-gate #endif
142*0Sstevel@tonic-gate 
143*0Sstevel@tonic-gate #define	DBLK_RTFU(ref, type, flags, uioflag)	\
144*0Sstevel@tonic-gate 	(((ref) << DBLK_RTFU_SHIFT(db_ref)) | \
145*0Sstevel@tonic-gate 	((type) << DBLK_RTFU_SHIFT(db_type)) | \
146*0Sstevel@tonic-gate 	(((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
147*0Sstevel@tonic-gate 	((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
148*0Sstevel@tonic-gate #define	DBLK_RTFU_REF_MASK	(DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
149*0Sstevel@tonic-gate #define	DBLK_RTFU_WORD(dbp)	(*((uint32_t *)&(dbp)->db_ref))
150*0Sstevel@tonic-gate #define	MBLK_BAND_FLAG_WORD(mp)	(*((uint32_t *)&(mp)->b_band))
151*0Sstevel@tonic-gate 
152*0Sstevel@tonic-gate static size_t dblk_sizes[] = {
153*0Sstevel@tonic-gate #ifdef _LP64
154*0Sstevel@tonic-gate 	16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3920,
155*0Sstevel@tonic-gate 	8192, 12112, 16384, 20304, 24576, 28496, 32768, 36688,
156*0Sstevel@tonic-gate 	40960, 44880, 49152, 53072, 57344, 61264, 65536, 69456,
157*0Sstevel@tonic-gate #else
158*0Sstevel@tonic-gate 	64, 128, 320, 576, 1088, 1536, 1984, 2624, 3968,
159*0Sstevel@tonic-gate 	8192, 12160, 16384, 20352, 24576, 28544, 32768, 36736,
160*0Sstevel@tonic-gate 	40960, 44928, 49152, 53120, 57344, 61312, 65536, 69504,
161*0Sstevel@tonic-gate #endif
162*0Sstevel@tonic-gate 	DBLK_MAX_CACHE, 0
163*0Sstevel@tonic-gate };
164*0Sstevel@tonic-gate 
165*0Sstevel@tonic-gate static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE];
166*0Sstevel@tonic-gate static struct kmem_cache *mblk_cache;
167*0Sstevel@tonic-gate static struct kmem_cache *dblk_esb_cache;
168*0Sstevel@tonic-gate static struct kmem_cache *fthdr_cache;
169*0Sstevel@tonic-gate static struct kmem_cache *ftblk_cache;
170*0Sstevel@tonic-gate 
171*0Sstevel@tonic-gate static void dblk_lastfree(mblk_t *mp, dblk_t *dbp);
172*0Sstevel@tonic-gate static mblk_t *allocb_oversize(size_t size, int flags);
173*0Sstevel@tonic-gate static int allocb_tryhard_fails;
174*0Sstevel@tonic-gate static void frnop_func(void *arg);
175*0Sstevel@tonic-gate frtn_t frnop = { frnop_func };
176*0Sstevel@tonic-gate static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp);
177*0Sstevel@tonic-gate 
178*0Sstevel@tonic-gate static boolean_t rwnext_enter(queue_t *qp);
179*0Sstevel@tonic-gate static void rwnext_exit(queue_t *qp);
180*0Sstevel@tonic-gate 
181*0Sstevel@tonic-gate /*
182*0Sstevel@tonic-gate  * Patchable mblk/dblk kmem_cache flags.
183*0Sstevel@tonic-gate  */
184*0Sstevel@tonic-gate int dblk_kmem_flags = 0;
185*0Sstevel@tonic-gate int mblk_kmem_flags = 0;
186*0Sstevel@tonic-gate 
187*0Sstevel@tonic-gate 
188*0Sstevel@tonic-gate static int
189*0Sstevel@tonic-gate dblk_constructor(void *buf, void *cdrarg, int kmflags)
190*0Sstevel@tonic-gate {
191*0Sstevel@tonic-gate 	dblk_t *dbp = buf;
192*0Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
193*0Sstevel@tonic-gate 	size_t index;
194*0Sstevel@tonic-gate 
195*0Sstevel@tonic-gate 	ASSERT(msg_size != 0);
196*0Sstevel@tonic-gate 
197*0Sstevel@tonic-gate 	index = (msg_size - 1) >> DBLK_SIZE_SHIFT;
198*0Sstevel@tonic-gate 
199*0Sstevel@tonic-gate 	ASSERT(index <= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT));
200*0Sstevel@tonic-gate 
201*0Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
202*0Sstevel@tonic-gate 		return (-1);
203*0Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
204*0Sstevel@tonic-gate 		dbp->db_base = kmem_alloc(msg_size, kmflags);
205*0Sstevel@tonic-gate 		if (dbp->db_base == NULL) {
206*0Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, dbp->db_mblk);
207*0Sstevel@tonic-gate 			return (-1);
208*0Sstevel@tonic-gate 		}
209*0Sstevel@tonic-gate 	} else {
210*0Sstevel@tonic-gate 		dbp->db_base = (unsigned char *)&dbp[1];
211*0Sstevel@tonic-gate 	}
212*0Sstevel@tonic-gate 
213*0Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
214*0Sstevel@tonic-gate 	dbp->db_cache = dblk_cache[index];
215*0Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + msg_size;
216*0Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = dblk_lastfree;
217*0Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
218*0Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
219*0Sstevel@tonic-gate 	dbp->db_credp = NULL;
220*0Sstevel@tonic-gate 	dbp->db_cpid = -1;
221*0Sstevel@tonic-gate 	dbp->db_struioflag = 0;
222*0Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
223*0Sstevel@tonic-gate 	return (0);
224*0Sstevel@tonic-gate }
225*0Sstevel@tonic-gate 
226*0Sstevel@tonic-gate /*ARGSUSED*/
227*0Sstevel@tonic-gate static int
228*0Sstevel@tonic-gate dblk_esb_constructor(void *buf, void *cdrarg, int kmflags)
229*0Sstevel@tonic-gate {
230*0Sstevel@tonic-gate 	dblk_t *dbp = buf;
231*0Sstevel@tonic-gate 
232*0Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
233*0Sstevel@tonic-gate 		return (-1);
234*0Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
235*0Sstevel@tonic-gate 	dbp->db_cache = dblk_esb_cache;
236*0Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
237*0Sstevel@tonic-gate 	dbp->db_credp = NULL;
238*0Sstevel@tonic-gate 	dbp->db_cpid = -1;
239*0Sstevel@tonic-gate 	dbp->db_struioflag = 0;
240*0Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
241*0Sstevel@tonic-gate 	return (0);
242*0Sstevel@tonic-gate }
243*0Sstevel@tonic-gate 
244*0Sstevel@tonic-gate static int
245*0Sstevel@tonic-gate bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags)
246*0Sstevel@tonic-gate {
247*0Sstevel@tonic-gate 	dblk_t *dbp = buf;
248*0Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
249*0Sstevel@tonic-gate 
250*0Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
251*0Sstevel@tonic-gate 		return (-1);
252*0Sstevel@tonic-gate 
253*0Sstevel@tonic-gate 	if ((dbp->db_base = (unsigned char *)kmem_cache_alloc(bcp->buffer_cache,
254*0Sstevel@tonic-gate 	    kmflags)) == NULL) {
255*0Sstevel@tonic-gate 		kmem_cache_free(mblk_cache, dbp->db_mblk);
256*0Sstevel@tonic-gate 		return (-1);
257*0Sstevel@tonic-gate 	}
258*0Sstevel@tonic-gate 
259*0Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
260*0Sstevel@tonic-gate 	dbp->db_cache = (void *)bcp;
261*0Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + bcp->size;
262*0Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree;
263*0Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
264*0Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
265*0Sstevel@tonic-gate 	dbp->db_credp = NULL;
266*0Sstevel@tonic-gate 	dbp->db_cpid = -1;
267*0Sstevel@tonic-gate 	dbp->db_struioflag = 0;
268*0Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
269*0Sstevel@tonic-gate 	return (0);
270*0Sstevel@tonic-gate }
271*0Sstevel@tonic-gate 
272*0Sstevel@tonic-gate /*ARGSUSED*/
273*0Sstevel@tonic-gate static void
274*0Sstevel@tonic-gate dblk_destructor(void *buf, void *cdrarg)
275*0Sstevel@tonic-gate {
276*0Sstevel@tonic-gate 	dblk_t *dbp = buf;
277*0Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
278*0Sstevel@tonic-gate 
279*0Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
280*0Sstevel@tonic-gate 
281*0Sstevel@tonic-gate 	ASSERT(msg_size != 0);
282*0Sstevel@tonic-gate 
283*0Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
284*0Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
285*0Sstevel@tonic-gate 
286*0Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
287*0Sstevel@tonic-gate 		kmem_free(dbp->db_base, msg_size);
288*0Sstevel@tonic-gate 	}
289*0Sstevel@tonic-gate 
290*0Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
291*0Sstevel@tonic-gate }
292*0Sstevel@tonic-gate 
293*0Sstevel@tonic-gate static void
294*0Sstevel@tonic-gate bcache_dblk_destructor(void *buf, void *cdrarg)
295*0Sstevel@tonic-gate {
296*0Sstevel@tonic-gate 	dblk_t *dbp = buf;
297*0Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
298*0Sstevel@tonic-gate 
299*0Sstevel@tonic-gate 	kmem_cache_free(bcp->buffer_cache, dbp->db_base);
300*0Sstevel@tonic-gate 
301*0Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
302*0Sstevel@tonic-gate 
303*0Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
304*0Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
305*0Sstevel@tonic-gate 
306*0Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
307*0Sstevel@tonic-gate }
308*0Sstevel@tonic-gate 
309*0Sstevel@tonic-gate void
310*0Sstevel@tonic-gate streams_msg_init(void)
311*0Sstevel@tonic-gate {
312*0Sstevel@tonic-gate 	char name[40];
313*0Sstevel@tonic-gate 	size_t size;
314*0Sstevel@tonic-gate 	size_t lastsize = DBLK_MIN_SIZE;
315*0Sstevel@tonic-gate 	size_t *sizep;
316*0Sstevel@tonic-gate 	struct kmem_cache *cp;
317*0Sstevel@tonic-gate 	size_t tot_size;
318*0Sstevel@tonic-gate 	int offset;
319*0Sstevel@tonic-gate 
320*0Sstevel@tonic-gate 	mblk_cache = kmem_cache_create("streams_mblk",
321*0Sstevel@tonic-gate 		sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL,
322*0Sstevel@tonic-gate 		mblk_kmem_flags);
323*0Sstevel@tonic-gate 
324*0Sstevel@tonic-gate 	for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
325*0Sstevel@tonic-gate 
326*0Sstevel@tonic-gate 		if ((offset = (size & PAGEOFFSET)) != 0) {
327*0Sstevel@tonic-gate 			/*
328*0Sstevel@tonic-gate 			 * We are in the middle of a page, dblk should
329*0Sstevel@tonic-gate 			 * be allocated on the same page
330*0Sstevel@tonic-gate 			 */
331*0Sstevel@tonic-gate 			tot_size = size + sizeof (dblk_t);
332*0Sstevel@tonic-gate 			ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t))
333*0Sstevel@tonic-gate 								< PAGESIZE);
334*0Sstevel@tonic-gate 			ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
335*0Sstevel@tonic-gate 
336*0Sstevel@tonic-gate 		} else {
337*0Sstevel@tonic-gate 
338*0Sstevel@tonic-gate 			/*
339*0Sstevel@tonic-gate 			 * buf size is multiple of page size, dblk and
340*0Sstevel@tonic-gate 			 * buffer are allocated separately.
341*0Sstevel@tonic-gate 			 */
342*0Sstevel@tonic-gate 
343*0Sstevel@tonic-gate 			ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0);
344*0Sstevel@tonic-gate 			tot_size = sizeof (dblk_t);
345*0Sstevel@tonic-gate 		}
346*0Sstevel@tonic-gate 
347*0Sstevel@tonic-gate 		(void) sprintf(name, "streams_dblk_%ld", size);
348*0Sstevel@tonic-gate 		cp = kmem_cache_create(name, tot_size,
349*0Sstevel@tonic-gate 			DBLK_CACHE_ALIGN, dblk_constructor,
350*0Sstevel@tonic-gate 			dblk_destructor, NULL,
351*0Sstevel@tonic-gate 			(void *)(size), NULL, dblk_kmem_flags);
352*0Sstevel@tonic-gate 
353*0Sstevel@tonic-gate 		while (lastsize <= size) {
354*0Sstevel@tonic-gate 			dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
355*0Sstevel@tonic-gate 			lastsize += DBLK_MIN_SIZE;
356*0Sstevel@tonic-gate 		}
357*0Sstevel@tonic-gate 	}
358*0Sstevel@tonic-gate 
359*0Sstevel@tonic-gate 	dblk_esb_cache = kmem_cache_create("streams_dblk_esb",
360*0Sstevel@tonic-gate 			sizeof (dblk_t), DBLK_CACHE_ALIGN,
361*0Sstevel@tonic-gate 			dblk_esb_constructor, dblk_destructor, NULL,
362*0Sstevel@tonic-gate 			(void *) sizeof (dblk_t), NULL, dblk_kmem_flags);
363*0Sstevel@tonic-gate 	fthdr_cache = kmem_cache_create("streams_fthdr",
364*0Sstevel@tonic-gate 		sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
365*0Sstevel@tonic-gate 	ftblk_cache = kmem_cache_create("streams_ftblk",
366*0Sstevel@tonic-gate 		sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
367*0Sstevel@tonic-gate 
368*0Sstevel@tonic-gate 	/* Initialize Multidata caches */
369*0Sstevel@tonic-gate 	mmd_init();
370*0Sstevel@tonic-gate }
371*0Sstevel@tonic-gate 
372*0Sstevel@tonic-gate /*ARGSUSED*/
373*0Sstevel@tonic-gate mblk_t *
374*0Sstevel@tonic-gate allocb(size_t size, uint_t pri)
375*0Sstevel@tonic-gate {
376*0Sstevel@tonic-gate 	dblk_t *dbp;
377*0Sstevel@tonic-gate 	mblk_t *mp;
378*0Sstevel@tonic-gate 	size_t index;
379*0Sstevel@tonic-gate 
380*0Sstevel@tonic-gate 	index =  (size - 1)  >> DBLK_SIZE_SHIFT;
381*0Sstevel@tonic-gate 
382*0Sstevel@tonic-gate 	if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
383*0Sstevel@tonic-gate 		if (size != 0) {
384*0Sstevel@tonic-gate 			mp = allocb_oversize(size, KM_NOSLEEP);
385*0Sstevel@tonic-gate 			goto out;
386*0Sstevel@tonic-gate 		}
387*0Sstevel@tonic-gate 		index = 0;
388*0Sstevel@tonic-gate 	}
389*0Sstevel@tonic-gate 
390*0Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) {
391*0Sstevel@tonic-gate 		mp = NULL;
392*0Sstevel@tonic-gate 		goto out;
393*0Sstevel@tonic-gate 	}
394*0Sstevel@tonic-gate 
395*0Sstevel@tonic-gate 	mp = dbp->db_mblk;
396*0Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
397*0Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
398*0Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
399*0Sstevel@tonic-gate 	mp->b_queue = NULL;
400*0Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
401*0Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size);
402*0Sstevel@tonic-gate out:
403*0Sstevel@tonic-gate 	FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp);
404*0Sstevel@tonic-gate 
405*0Sstevel@tonic-gate 	return (mp);
406*0Sstevel@tonic-gate }
407*0Sstevel@tonic-gate 
408*0Sstevel@tonic-gate mblk_t *
409*0Sstevel@tonic-gate allocb_tmpl(size_t size, const mblk_t *tmpl)
410*0Sstevel@tonic-gate {
411*0Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
412*0Sstevel@tonic-gate 
413*0Sstevel@tonic-gate 	if (mp != NULL) {
414*0Sstevel@tonic-gate 		cred_t *cr = DB_CRED(tmpl);
415*0Sstevel@tonic-gate 		if (cr != NULL)
416*0Sstevel@tonic-gate 			crhold(mp->b_datap->db_credp = cr);
417*0Sstevel@tonic-gate 		DB_CPID(mp) = DB_CPID(tmpl);
418*0Sstevel@tonic-gate 		DB_TYPE(mp) = DB_TYPE(tmpl);
419*0Sstevel@tonic-gate 	}
420*0Sstevel@tonic-gate 	return (mp);
421*0Sstevel@tonic-gate }
422*0Sstevel@tonic-gate 
423*0Sstevel@tonic-gate mblk_t *
424*0Sstevel@tonic-gate allocb_cred(size_t size, cred_t *cr)
425*0Sstevel@tonic-gate {
426*0Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
427*0Sstevel@tonic-gate 
428*0Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
429*0Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
430*0Sstevel@tonic-gate 
431*0Sstevel@tonic-gate 	return (mp);
432*0Sstevel@tonic-gate }
433*0Sstevel@tonic-gate 
434*0Sstevel@tonic-gate mblk_t *
435*0Sstevel@tonic-gate allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr)
436*0Sstevel@tonic-gate {
437*0Sstevel@tonic-gate 	mblk_t *mp = allocb_wait(size, 0, flags, error);
438*0Sstevel@tonic-gate 
439*0Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
440*0Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
441*0Sstevel@tonic-gate 
442*0Sstevel@tonic-gate 	return (mp);
443*0Sstevel@tonic-gate }
444*0Sstevel@tonic-gate 
445*0Sstevel@tonic-gate void
446*0Sstevel@tonic-gate freeb(mblk_t *mp)
447*0Sstevel@tonic-gate {
448*0Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
449*0Sstevel@tonic-gate 
450*0Sstevel@tonic-gate 	ASSERT(dbp->db_ref > 0);
451*0Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
452*0Sstevel@tonic-gate 	FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp);
453*0Sstevel@tonic-gate 
454*0Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
455*0Sstevel@tonic-gate 
456*0Sstevel@tonic-gate 	dbp->db_free(mp, dbp);
457*0Sstevel@tonic-gate }
458*0Sstevel@tonic-gate 
459*0Sstevel@tonic-gate void
460*0Sstevel@tonic-gate freemsg(mblk_t *mp)
461*0Sstevel@tonic-gate {
462*0Sstevel@tonic-gate 	FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
463*0Sstevel@tonic-gate 	while (mp) {
464*0Sstevel@tonic-gate 		dblk_t *dbp = mp->b_datap;
465*0Sstevel@tonic-gate 		mblk_t *mp_cont = mp->b_cont;
466*0Sstevel@tonic-gate 
467*0Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
468*0Sstevel@tonic-gate 		ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
469*0Sstevel@tonic-gate 
470*0Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
471*0Sstevel@tonic-gate 
472*0Sstevel@tonic-gate 		dbp->db_free(mp, dbp);
473*0Sstevel@tonic-gate 		mp = mp_cont;
474*0Sstevel@tonic-gate 	}
475*0Sstevel@tonic-gate }
476*0Sstevel@tonic-gate 
477*0Sstevel@tonic-gate /*
478*0Sstevel@tonic-gate  * Reallocate a block for another use.  Try hard to use the old block.
479*0Sstevel@tonic-gate  * If the old data is wanted (copy), leave b_wptr at the end of the data,
480*0Sstevel@tonic-gate  * otherwise return b_wptr = b_rptr.
481*0Sstevel@tonic-gate  *
482*0Sstevel@tonic-gate  * This routine is private and unstable.
483*0Sstevel@tonic-gate  */
484*0Sstevel@tonic-gate mblk_t	*
485*0Sstevel@tonic-gate reallocb(mblk_t *mp, size_t size, uint_t copy)
486*0Sstevel@tonic-gate {
487*0Sstevel@tonic-gate 	mblk_t		*mp1;
488*0Sstevel@tonic-gate 	unsigned char	*old_rptr;
489*0Sstevel@tonic-gate 	ptrdiff_t	cur_size;
490*0Sstevel@tonic-gate 
491*0Sstevel@tonic-gate 	if (mp == NULL)
492*0Sstevel@tonic-gate 		return (allocb(size, BPRI_HI));
493*0Sstevel@tonic-gate 
494*0Sstevel@tonic-gate 	cur_size = mp->b_wptr - mp->b_rptr;
495*0Sstevel@tonic-gate 	old_rptr = mp->b_rptr;
496*0Sstevel@tonic-gate 
497*0Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref != 0);
498*0Sstevel@tonic-gate 
499*0Sstevel@tonic-gate 	if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) {
500*0Sstevel@tonic-gate 		/*
501*0Sstevel@tonic-gate 		 * If the data is wanted and it will fit where it is, no
502*0Sstevel@tonic-gate 		 * work is required.
503*0Sstevel@tonic-gate 		 */
504*0Sstevel@tonic-gate 		if (copy && mp->b_datap->db_lim - mp->b_rptr >= size)
505*0Sstevel@tonic-gate 			return (mp);
506*0Sstevel@tonic-gate 
507*0Sstevel@tonic-gate 		mp->b_wptr = mp->b_rptr = mp->b_datap->db_base;
508*0Sstevel@tonic-gate 		mp1 = mp;
509*0Sstevel@tonic-gate 	} else if ((mp1 = allocb_tmpl(size, mp)) != NULL) {
510*0Sstevel@tonic-gate 		/* XXX other mp state could be copied too, db_flags ... ? */
511*0Sstevel@tonic-gate 		mp1->b_cont = mp->b_cont;
512*0Sstevel@tonic-gate 	} else {
513*0Sstevel@tonic-gate 		return (NULL);
514*0Sstevel@tonic-gate 	}
515*0Sstevel@tonic-gate 
516*0Sstevel@tonic-gate 	if (copy) {
517*0Sstevel@tonic-gate 		bcopy(old_rptr, mp1->b_rptr, cur_size);
518*0Sstevel@tonic-gate 		mp1->b_wptr = mp1->b_rptr + cur_size;
519*0Sstevel@tonic-gate 	}
520*0Sstevel@tonic-gate 
521*0Sstevel@tonic-gate 	if (mp != mp1)
522*0Sstevel@tonic-gate 		freeb(mp);
523*0Sstevel@tonic-gate 
524*0Sstevel@tonic-gate 	return (mp1);
525*0Sstevel@tonic-gate }
526*0Sstevel@tonic-gate 
527*0Sstevel@tonic-gate static void
528*0Sstevel@tonic-gate dblk_lastfree(mblk_t *mp, dblk_t *dbp)
529*0Sstevel@tonic-gate {
530*0Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
531*0Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
532*0Sstevel@tonic-gate 		str_ftfree(dbp);
533*0Sstevel@tonic-gate 
534*0Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
535*0Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
536*0Sstevel@tonic-gate 		crfree(dbp->db_credp);
537*0Sstevel@tonic-gate 		dbp->db_credp = NULL;
538*0Sstevel@tonic-gate 	}
539*0Sstevel@tonic-gate 	dbp->db_cpid = -1;
540*0Sstevel@tonic-gate 
541*0Sstevel@tonic-gate 	/* Reset the struioflag and the checksum flag fields */
542*0Sstevel@tonic-gate 	dbp->db_struioflag = 0;
543*0Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
544*0Sstevel@tonic-gate 
545*0Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
546*0Sstevel@tonic-gate }
547*0Sstevel@tonic-gate 
548*0Sstevel@tonic-gate static void
549*0Sstevel@tonic-gate dblk_decref(mblk_t *mp, dblk_t *dbp)
550*0Sstevel@tonic-gate {
551*0Sstevel@tonic-gate 	if (dbp->db_ref != 1) {
552*0Sstevel@tonic-gate 		uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp),
553*0Sstevel@tonic-gate 		    -(1 << DBLK_RTFU_SHIFT(db_ref)));
554*0Sstevel@tonic-gate 		/*
555*0Sstevel@tonic-gate 		 * atomic_add_32_nv() just decremented db_ref, so we no longer
556*0Sstevel@tonic-gate 		 * have a reference to the dblk, which means another thread
557*0Sstevel@tonic-gate 		 * could free it.  Therefore we cannot examine the dblk to
558*0Sstevel@tonic-gate 		 * determine whether ours was the last reference.  Instead,
559*0Sstevel@tonic-gate 		 * we extract the new and minimum reference counts from rtfu.
560*0Sstevel@tonic-gate 		 * Note that all we're really saying is "if (ref != refmin)".
561*0Sstevel@tonic-gate 		 */
562*0Sstevel@tonic-gate 		if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) !=
563*0Sstevel@tonic-gate 		    ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) {
564*0Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, mp);
565*0Sstevel@tonic-gate 			return;
566*0Sstevel@tonic-gate 		}
567*0Sstevel@tonic-gate 	}
568*0Sstevel@tonic-gate 	dbp->db_mblk = mp;
569*0Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree;
570*0Sstevel@tonic-gate 	dbp->db_lastfree(mp, dbp);
571*0Sstevel@tonic-gate }
572*0Sstevel@tonic-gate 
573*0Sstevel@tonic-gate mblk_t *
574*0Sstevel@tonic-gate dupb(mblk_t *mp)
575*0Sstevel@tonic-gate {
576*0Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
577*0Sstevel@tonic-gate 	mblk_t *new_mp;
578*0Sstevel@tonic-gate 	uint32_t oldrtfu, newrtfu;
579*0Sstevel@tonic-gate 
580*0Sstevel@tonic-gate 	if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL)
581*0Sstevel@tonic-gate 		goto out;
582*0Sstevel@tonic-gate 
583*0Sstevel@tonic-gate 	new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL;
584*0Sstevel@tonic-gate 	new_mp->b_rptr = mp->b_rptr;
585*0Sstevel@tonic-gate 	new_mp->b_wptr = mp->b_wptr;
586*0Sstevel@tonic-gate 	new_mp->b_datap = dbp;
587*0Sstevel@tonic-gate 	new_mp->b_queue = NULL;
588*0Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp);
589*0Sstevel@tonic-gate 
590*0Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref);
591*0Sstevel@tonic-gate 
592*0Sstevel@tonic-gate 	/*
593*0Sstevel@tonic-gate 	 * First-dup optimization.  The enabling assumption is that there
594*0Sstevel@tonic-gate 	 * can can never be a race (in correct code) to dup the first copy
595*0Sstevel@tonic-gate 	 * of a message.  Therefore we don't need to do it atomically.
596*0Sstevel@tonic-gate 	 */
597*0Sstevel@tonic-gate 	if (dbp->db_free != dblk_decref) {
598*0Sstevel@tonic-gate 		dbp->db_free = dblk_decref;
599*0Sstevel@tonic-gate 		dbp->db_ref++;
600*0Sstevel@tonic-gate 		goto out;
601*0Sstevel@tonic-gate 	}
602*0Sstevel@tonic-gate 
603*0Sstevel@tonic-gate 	do {
604*0Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
605*0Sstevel@tonic-gate 		oldrtfu = DBLK_RTFU_WORD(dbp);
606*0Sstevel@tonic-gate 		newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref));
607*0Sstevel@tonic-gate 		/*
608*0Sstevel@tonic-gate 		 * If db_ref is maxed out we can't dup this message anymore.
609*0Sstevel@tonic-gate 		 */
610*0Sstevel@tonic-gate 		if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) {
611*0Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, new_mp);
612*0Sstevel@tonic-gate 			new_mp = NULL;
613*0Sstevel@tonic-gate 			goto out;
614*0Sstevel@tonic-gate 		}
615*0Sstevel@tonic-gate 	} while (cas32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != oldrtfu);
616*0Sstevel@tonic-gate 
617*0Sstevel@tonic-gate out:
618*0Sstevel@tonic-gate 	FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp);
619*0Sstevel@tonic-gate 	return (new_mp);
620*0Sstevel@tonic-gate }
621*0Sstevel@tonic-gate 
622*0Sstevel@tonic-gate static void
623*0Sstevel@tonic-gate dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp)
624*0Sstevel@tonic-gate {
625*0Sstevel@tonic-gate 	frtn_t *frp = dbp->db_frtnp;
626*0Sstevel@tonic-gate 
627*0Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
628*0Sstevel@tonic-gate 	frp->free_func(frp->free_arg);
629*0Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
630*0Sstevel@tonic-gate 		str_ftfree(dbp);
631*0Sstevel@tonic-gate 
632*0Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
633*0Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
634*0Sstevel@tonic-gate 		crfree(dbp->db_credp);
635*0Sstevel@tonic-gate 		dbp->db_credp = NULL;
636*0Sstevel@tonic-gate 	}
637*0Sstevel@tonic-gate 	dbp->db_cpid = -1;
638*0Sstevel@tonic-gate 	dbp->db_struioflag = 0;
639*0Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
640*0Sstevel@tonic-gate 
641*0Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
642*0Sstevel@tonic-gate }
643*0Sstevel@tonic-gate 
644*0Sstevel@tonic-gate /*ARGSUSED*/
645*0Sstevel@tonic-gate static void
646*0Sstevel@tonic-gate frnop_func(void *arg)
647*0Sstevel@tonic-gate {
648*0Sstevel@tonic-gate }
649*0Sstevel@tonic-gate 
650*0Sstevel@tonic-gate /*
651*0Sstevel@tonic-gate  * Generic esballoc used to implement the four flavors: [d]esballoc[a].
652*0Sstevel@tonic-gate  */
653*0Sstevel@tonic-gate static mblk_t *
654*0Sstevel@tonic-gate gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
655*0Sstevel@tonic-gate 	void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
656*0Sstevel@tonic-gate {
657*0Sstevel@tonic-gate 	dblk_t *dbp;
658*0Sstevel@tonic-gate 	mblk_t *mp;
659*0Sstevel@tonic-gate 
660*0Sstevel@tonic-gate 	ASSERT(base != NULL && frp != NULL);
661*0Sstevel@tonic-gate 
662*0Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) {
663*0Sstevel@tonic-gate 		mp = NULL;
664*0Sstevel@tonic-gate 		goto out;
665*0Sstevel@tonic-gate 	}
666*0Sstevel@tonic-gate 
667*0Sstevel@tonic-gate 	mp = dbp->db_mblk;
668*0Sstevel@tonic-gate 	dbp->db_base = base;
669*0Sstevel@tonic-gate 	dbp->db_lim = base + size;
670*0Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = lastfree;
671*0Sstevel@tonic-gate 	dbp->db_frtnp = frp;
672*0Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = db_rtfu;
673*0Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
674*0Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = base;
675*0Sstevel@tonic-gate 	mp->b_queue = NULL;
676*0Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
677*0Sstevel@tonic-gate 
678*0Sstevel@tonic-gate out:
679*0Sstevel@tonic-gate 	FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp);
680*0Sstevel@tonic-gate 	return (mp);
681*0Sstevel@tonic-gate }
682*0Sstevel@tonic-gate 
683*0Sstevel@tonic-gate /*ARGSUSED*/
684*0Sstevel@tonic-gate mblk_t *
685*0Sstevel@tonic-gate esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
686*0Sstevel@tonic-gate {
687*0Sstevel@tonic-gate 	mblk_t *mp;
688*0Sstevel@tonic-gate 
689*0Sstevel@tonic-gate 	/*
690*0Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
691*0Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
692*0Sstevel@tonic-gate 	 * call optimization.
693*0Sstevel@tonic-gate 	 */
694*0Sstevel@tonic-gate 	if (!str_ftnever) {
695*0Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
696*0Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
697*0Sstevel@tonic-gate 
698*0Sstevel@tonic-gate 		if (mp != NULL)
699*0Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
700*0Sstevel@tonic-gate 		return (mp);
701*0Sstevel@tonic-gate 	}
702*0Sstevel@tonic-gate 
703*0Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
704*0Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
705*0Sstevel@tonic-gate }
706*0Sstevel@tonic-gate 
707*0Sstevel@tonic-gate /*
708*0Sstevel@tonic-gate  * Same as esballoc() but sleeps waiting for memory.
709*0Sstevel@tonic-gate  */
710*0Sstevel@tonic-gate /*ARGSUSED*/
711*0Sstevel@tonic-gate mblk_t *
712*0Sstevel@tonic-gate esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
713*0Sstevel@tonic-gate {
714*0Sstevel@tonic-gate 	mblk_t *mp;
715*0Sstevel@tonic-gate 
716*0Sstevel@tonic-gate 	/*
717*0Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
718*0Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
719*0Sstevel@tonic-gate 	 * call optimization.
720*0Sstevel@tonic-gate 	 */
721*0Sstevel@tonic-gate 	if (!str_ftnever) {
722*0Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
723*0Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_SLEEP);
724*0Sstevel@tonic-gate 
725*0Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
726*0Sstevel@tonic-gate 		return (mp);
727*0Sstevel@tonic-gate 	}
728*0Sstevel@tonic-gate 
729*0Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
730*0Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_SLEEP));
731*0Sstevel@tonic-gate }
732*0Sstevel@tonic-gate 
733*0Sstevel@tonic-gate /*ARGSUSED*/
734*0Sstevel@tonic-gate mblk_t *
735*0Sstevel@tonic-gate desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
736*0Sstevel@tonic-gate {
737*0Sstevel@tonic-gate 	mblk_t *mp;
738*0Sstevel@tonic-gate 
739*0Sstevel@tonic-gate 	/*
740*0Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
741*0Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
742*0Sstevel@tonic-gate 	 * call optimization.
743*0Sstevel@tonic-gate 	 */
744*0Sstevel@tonic-gate 	if (!str_ftnever) {
745*0Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
746*0Sstevel@tonic-gate 			frp, dblk_lastfree_desb, KM_NOSLEEP);
747*0Sstevel@tonic-gate 
748*0Sstevel@tonic-gate 		if (mp != NULL)
749*0Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size);
750*0Sstevel@tonic-gate 		return (mp);
751*0Sstevel@tonic-gate 	}
752*0Sstevel@tonic-gate 
753*0Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
754*0Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
755*0Sstevel@tonic-gate }
756*0Sstevel@tonic-gate 
757*0Sstevel@tonic-gate /*ARGSUSED*/
758*0Sstevel@tonic-gate mblk_t *
759*0Sstevel@tonic-gate esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
760*0Sstevel@tonic-gate {
761*0Sstevel@tonic-gate 	mblk_t *mp;
762*0Sstevel@tonic-gate 
763*0Sstevel@tonic-gate 	/*
764*0Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
765*0Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
766*0Sstevel@tonic-gate 	 * call optimization.
767*0Sstevel@tonic-gate 	 */
768*0Sstevel@tonic-gate 	if (!str_ftnever) {
769*0Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
770*0Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
771*0Sstevel@tonic-gate 
772*0Sstevel@tonic-gate 		if (mp != NULL)
773*0Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size);
774*0Sstevel@tonic-gate 		return (mp);
775*0Sstevel@tonic-gate 	}
776*0Sstevel@tonic-gate 
777*0Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
778*0Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
779*0Sstevel@tonic-gate }
780*0Sstevel@tonic-gate 
781*0Sstevel@tonic-gate /*ARGSUSED*/
782*0Sstevel@tonic-gate mblk_t *
783*0Sstevel@tonic-gate desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
784*0Sstevel@tonic-gate {
785*0Sstevel@tonic-gate 	mblk_t *mp;
786*0Sstevel@tonic-gate 
787*0Sstevel@tonic-gate 	/*
788*0Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
789*0Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
790*0Sstevel@tonic-gate 	 * call optimization.
791*0Sstevel@tonic-gate 	 */
792*0Sstevel@tonic-gate 	if (!str_ftnever) {
793*0Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
794*0Sstevel@tonic-gate 		    frp, dblk_lastfree_desb, KM_NOSLEEP);
795*0Sstevel@tonic-gate 
796*0Sstevel@tonic-gate 		if (mp != NULL)
797*0Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size);
798*0Sstevel@tonic-gate 		return (mp);
799*0Sstevel@tonic-gate 	}
800*0Sstevel@tonic-gate 
801*0Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
802*0Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
803*0Sstevel@tonic-gate }
804*0Sstevel@tonic-gate 
805*0Sstevel@tonic-gate static void
806*0Sstevel@tonic-gate bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp)
807*0Sstevel@tonic-gate {
808*0Sstevel@tonic-gate 	bcache_t *bcp = dbp->db_cache;
809*0Sstevel@tonic-gate 
810*0Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
811*0Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
812*0Sstevel@tonic-gate 		str_ftfree(dbp);
813*0Sstevel@tonic-gate 
814*0Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
815*0Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
816*0Sstevel@tonic-gate 		crfree(dbp->db_credp);
817*0Sstevel@tonic-gate 		dbp->db_credp = NULL;
818*0Sstevel@tonic-gate 	}
819*0Sstevel@tonic-gate 	dbp->db_cpid = -1;
820*0Sstevel@tonic-gate 	dbp->db_struioflag = 0;
821*0Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
822*0Sstevel@tonic-gate 
823*0Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
824*0Sstevel@tonic-gate 	kmem_cache_free(bcp->dblk_cache, dbp);
825*0Sstevel@tonic-gate 	bcp->alloc--;
826*0Sstevel@tonic-gate 
827*0Sstevel@tonic-gate 	if (bcp->alloc == 0 && bcp->destroy != 0) {
828*0Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
829*0Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
830*0Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
831*0Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
832*0Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
833*0Sstevel@tonic-gate 	} else {
834*0Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
835*0Sstevel@tonic-gate 	}
836*0Sstevel@tonic-gate }
837*0Sstevel@tonic-gate 
838*0Sstevel@tonic-gate bcache_t *
839*0Sstevel@tonic-gate bcache_create(char *name, size_t size, uint_t align)
840*0Sstevel@tonic-gate {
841*0Sstevel@tonic-gate 	bcache_t *bcp;
842*0Sstevel@tonic-gate 	char buffer[255];
843*0Sstevel@tonic-gate 
844*0Sstevel@tonic-gate 	ASSERT((align & (align - 1)) == 0);
845*0Sstevel@tonic-gate 
846*0Sstevel@tonic-gate 	if ((bcp = (bcache_t *)kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) ==
847*0Sstevel@tonic-gate 	    NULL) {
848*0Sstevel@tonic-gate 		return (NULL);
849*0Sstevel@tonic-gate 	}
850*0Sstevel@tonic-gate 
851*0Sstevel@tonic-gate 	bcp->size = size;
852*0Sstevel@tonic-gate 	bcp->align = align;
853*0Sstevel@tonic-gate 	bcp->alloc = 0;
854*0Sstevel@tonic-gate 	bcp->destroy = 0;
855*0Sstevel@tonic-gate 
856*0Sstevel@tonic-gate 	mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL);
857*0Sstevel@tonic-gate 
858*0Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_buffer_cache", name);
859*0Sstevel@tonic-gate 	bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL,
860*0Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
861*0Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_dblk_cache", name);
862*0Sstevel@tonic-gate 	bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
863*0Sstevel@tonic-gate 	    DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
864*0Sstevel@tonic-gate 						NULL, (void *)bcp, NULL, 0);
865*0Sstevel@tonic-gate 
866*0Sstevel@tonic-gate 	return (bcp);
867*0Sstevel@tonic-gate }
868*0Sstevel@tonic-gate 
869*0Sstevel@tonic-gate void
870*0Sstevel@tonic-gate bcache_destroy(bcache_t *bcp)
871*0Sstevel@tonic-gate {
872*0Sstevel@tonic-gate 	ASSERT(bcp != NULL);
873*0Sstevel@tonic-gate 
874*0Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
875*0Sstevel@tonic-gate 	if (bcp->alloc == 0) {
876*0Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
877*0Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
878*0Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
879*0Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
880*0Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
881*0Sstevel@tonic-gate 	} else {
882*0Sstevel@tonic-gate 		bcp->destroy++;
883*0Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
884*0Sstevel@tonic-gate 	}
885*0Sstevel@tonic-gate }
886*0Sstevel@tonic-gate 
887*0Sstevel@tonic-gate /*ARGSUSED*/
888*0Sstevel@tonic-gate mblk_t *
889*0Sstevel@tonic-gate bcache_allocb(bcache_t *bcp, uint_t pri)
890*0Sstevel@tonic-gate {
891*0Sstevel@tonic-gate 	dblk_t *dbp;
892*0Sstevel@tonic-gate 	mblk_t *mp = NULL;
893*0Sstevel@tonic-gate 
894*0Sstevel@tonic-gate 	ASSERT(bcp != NULL);
895*0Sstevel@tonic-gate 
896*0Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
897*0Sstevel@tonic-gate 	if (bcp->destroy != 0) {
898*0Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
899*0Sstevel@tonic-gate 		goto out;
900*0Sstevel@tonic-gate 	}
901*0Sstevel@tonic-gate 
902*0Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) {
903*0Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
904*0Sstevel@tonic-gate 		goto out;
905*0Sstevel@tonic-gate 	}
906*0Sstevel@tonic-gate 	bcp->alloc++;
907*0Sstevel@tonic-gate 	mutex_exit(&bcp->mutex);
908*0Sstevel@tonic-gate 
909*0Sstevel@tonic-gate 	ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0);
910*0Sstevel@tonic-gate 
911*0Sstevel@tonic-gate 	mp = dbp->db_mblk;
912*0Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
913*0Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
914*0Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
915*0Sstevel@tonic-gate 	mp->b_queue = NULL;
916*0Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
917*0Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size);
918*0Sstevel@tonic-gate out:
919*0Sstevel@tonic-gate 	FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp);
920*0Sstevel@tonic-gate 
921*0Sstevel@tonic-gate 	return (mp);
922*0Sstevel@tonic-gate }
923*0Sstevel@tonic-gate 
924*0Sstevel@tonic-gate static void
925*0Sstevel@tonic-gate dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp)
926*0Sstevel@tonic-gate {
927*0Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
928*0Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
929*0Sstevel@tonic-gate 		str_ftfree(dbp);
930*0Sstevel@tonic-gate 
931*0Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
932*0Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
933*0Sstevel@tonic-gate 		crfree(dbp->db_credp);
934*0Sstevel@tonic-gate 		dbp->db_credp = NULL;
935*0Sstevel@tonic-gate 	}
936*0Sstevel@tonic-gate 	dbp->db_cpid = -1;
937*0Sstevel@tonic-gate 	dbp->db_struioflag = 0;
938*0Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
939*0Sstevel@tonic-gate 
940*0Sstevel@tonic-gate 	kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base);
941*0Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
942*0Sstevel@tonic-gate }
943*0Sstevel@tonic-gate 
944*0Sstevel@tonic-gate static mblk_t *
945*0Sstevel@tonic-gate allocb_oversize(size_t size, int kmflags)
946*0Sstevel@tonic-gate {
947*0Sstevel@tonic-gate 	mblk_t *mp;
948*0Sstevel@tonic-gate 	void *buf;
949*0Sstevel@tonic-gate 
950*0Sstevel@tonic-gate 	size = P2ROUNDUP(size, DBLK_CACHE_ALIGN);
951*0Sstevel@tonic-gate 	if ((buf = kmem_alloc(size, kmflags)) == NULL)
952*0Sstevel@tonic-gate 		return (NULL);
953*0Sstevel@tonic-gate 	if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0),
954*0Sstevel@tonic-gate 	    &frnop, dblk_lastfree_oversize, kmflags)) == NULL)
955*0Sstevel@tonic-gate 		kmem_free(buf, size);
956*0Sstevel@tonic-gate 
957*0Sstevel@tonic-gate 	if (mp != NULL)
958*0Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size);
959*0Sstevel@tonic-gate 
960*0Sstevel@tonic-gate 	return (mp);
961*0Sstevel@tonic-gate }
962*0Sstevel@tonic-gate 
963*0Sstevel@tonic-gate mblk_t *
964*0Sstevel@tonic-gate allocb_tryhard(size_t target_size)
965*0Sstevel@tonic-gate {
966*0Sstevel@tonic-gate 	size_t size;
967*0Sstevel@tonic-gate 	mblk_t *bp;
968*0Sstevel@tonic-gate 
969*0Sstevel@tonic-gate 	for (size = target_size; size < target_size + 512;
970*0Sstevel@tonic-gate 	    size += DBLK_CACHE_ALIGN)
971*0Sstevel@tonic-gate 		if ((bp = allocb(size, BPRI_HI)) != NULL)
972*0Sstevel@tonic-gate 			return (bp);
973*0Sstevel@tonic-gate 	allocb_tryhard_fails++;
974*0Sstevel@tonic-gate 	return (NULL);
975*0Sstevel@tonic-gate }
976*0Sstevel@tonic-gate 
977*0Sstevel@tonic-gate /*
978*0Sstevel@tonic-gate  * This routine is consolidation private for STREAMS internal use
979*0Sstevel@tonic-gate  * This routine may only be called from sync routines (i.e., not
980*0Sstevel@tonic-gate  * from put or service procedures).  It is located here (rather
981*0Sstevel@tonic-gate  * than strsubr.c) so that we don't have to expose all of the
982*0Sstevel@tonic-gate  * allocb() implementation details in header files.
983*0Sstevel@tonic-gate  */
984*0Sstevel@tonic-gate mblk_t *
985*0Sstevel@tonic-gate allocb_wait(size_t size, uint_t pri, uint_t flags, int *error)
986*0Sstevel@tonic-gate {
987*0Sstevel@tonic-gate 	dblk_t *dbp;
988*0Sstevel@tonic-gate 	mblk_t *mp;
989*0Sstevel@tonic-gate 	size_t index;
990*0Sstevel@tonic-gate 
991*0Sstevel@tonic-gate 	index = (size -1) >> DBLK_SIZE_SHIFT;
992*0Sstevel@tonic-gate 
993*0Sstevel@tonic-gate 	if (flags & STR_NOSIG) {
994*0Sstevel@tonic-gate 		if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
995*0Sstevel@tonic-gate 			if (size != 0) {
996*0Sstevel@tonic-gate 				mp = allocb_oversize(size, KM_SLEEP);
997*0Sstevel@tonic-gate 				FTRACE_1("allocb_wait (NOSIG): mp=0x%lx",
998*0Sstevel@tonic-gate 				    (uintptr_t)mp);
999*0Sstevel@tonic-gate 				return (mp);
1000*0Sstevel@tonic-gate 			}
1001*0Sstevel@tonic-gate 			index = 0;
1002*0Sstevel@tonic-gate 		}
1003*0Sstevel@tonic-gate 
1004*0Sstevel@tonic-gate 		dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP);
1005*0Sstevel@tonic-gate 		mp = dbp->db_mblk;
1006*0Sstevel@tonic-gate 		DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
1007*0Sstevel@tonic-gate 		mp->b_next = mp->b_prev = mp->b_cont = NULL;
1008*0Sstevel@tonic-gate 		mp->b_rptr = mp->b_wptr = dbp->db_base;
1009*0Sstevel@tonic-gate 		mp->b_queue = NULL;
1010*0Sstevel@tonic-gate 		MBLK_BAND_FLAG_WORD(mp) = 0;
1011*0Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size);
1012*0Sstevel@tonic-gate 
1013*0Sstevel@tonic-gate 		FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp);
1014*0Sstevel@tonic-gate 
1015*0Sstevel@tonic-gate 	} else {
1016*0Sstevel@tonic-gate 		while ((mp = allocb(size, pri)) == NULL) {
1017*0Sstevel@tonic-gate 			if ((*error = strwaitbuf(size, BPRI_HI)) != 0)
1018*0Sstevel@tonic-gate 				return (NULL);
1019*0Sstevel@tonic-gate 		}
1020*0Sstevel@tonic-gate 	}
1021*0Sstevel@tonic-gate 
1022*0Sstevel@tonic-gate 	return (mp);
1023*0Sstevel@tonic-gate }
1024*0Sstevel@tonic-gate 
1025*0Sstevel@tonic-gate /*
1026*0Sstevel@tonic-gate  * Call function 'func' with 'arg' when a class zero block can
1027*0Sstevel@tonic-gate  * be allocated with priority 'pri'.
1028*0Sstevel@tonic-gate  */
1029*0Sstevel@tonic-gate bufcall_id_t
1030*0Sstevel@tonic-gate esbbcall(uint_t pri, void (*func)(void *), void *arg)
1031*0Sstevel@tonic-gate {
1032*0Sstevel@tonic-gate 	return (bufcall(1, pri, func, arg));
1033*0Sstevel@tonic-gate }
1034*0Sstevel@tonic-gate 
1035*0Sstevel@tonic-gate /*
1036*0Sstevel@tonic-gate  * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials
1037*0Sstevel@tonic-gate  * ioc_id, rval and error of the struct ioctl to set up an ioctl call.
1038*0Sstevel@tonic-gate  * This provides consistency for all internal allocators of ioctl.
1039*0Sstevel@tonic-gate  */
1040*0Sstevel@tonic-gate mblk_t *
1041*0Sstevel@tonic-gate mkiocb(uint_t cmd)
1042*0Sstevel@tonic-gate {
1043*0Sstevel@tonic-gate 	struct iocblk	*ioc;
1044*0Sstevel@tonic-gate 	mblk_t		*mp;
1045*0Sstevel@tonic-gate 
1046*0Sstevel@tonic-gate 	/*
1047*0Sstevel@tonic-gate 	 * Allocate enough space for any of the ioctl related messages.
1048*0Sstevel@tonic-gate 	 */
1049*0Sstevel@tonic-gate 	if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL)
1050*0Sstevel@tonic-gate 		return (NULL);
1051*0Sstevel@tonic-gate 
1052*0Sstevel@tonic-gate 	bzero(mp->b_rptr, sizeof (union ioctypes));
1053*0Sstevel@tonic-gate 
1054*0Sstevel@tonic-gate 	/*
1055*0Sstevel@tonic-gate 	 * Set the mblk_t information and ptrs correctly.
1056*0Sstevel@tonic-gate 	 */
1057*0Sstevel@tonic-gate 	mp->b_wptr += sizeof (struct iocblk);
1058*0Sstevel@tonic-gate 	mp->b_datap->db_type = M_IOCTL;
1059*0Sstevel@tonic-gate 
1060*0Sstevel@tonic-gate 	/*
1061*0Sstevel@tonic-gate 	 * Fill in the fields.
1062*0Sstevel@tonic-gate 	 */
1063*0Sstevel@tonic-gate 	ioc		= (struct iocblk *)mp->b_rptr;
1064*0Sstevel@tonic-gate 	ioc->ioc_cmd	= cmd;
1065*0Sstevel@tonic-gate 	ioc->ioc_cr	= kcred;
1066*0Sstevel@tonic-gate 	ioc->ioc_id	= getiocseqno();
1067*0Sstevel@tonic-gate 	ioc->ioc_flag	= IOC_NATIVE;
1068*0Sstevel@tonic-gate 	return (mp);
1069*0Sstevel@tonic-gate }
1070*0Sstevel@tonic-gate 
1071*0Sstevel@tonic-gate /*
1072*0Sstevel@tonic-gate  * test if block of given size can be allocated with a request of
1073*0Sstevel@tonic-gate  * the given priority.
1074*0Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
1075*0Sstevel@tonic-gate  */
1076*0Sstevel@tonic-gate /* ARGSUSED */
1077*0Sstevel@tonic-gate int
1078*0Sstevel@tonic-gate testb(size_t size, uint_t pri)
1079*0Sstevel@tonic-gate {
1080*0Sstevel@tonic-gate 	return ((size + sizeof (dblk_t)) <= kmem_avail());
1081*0Sstevel@tonic-gate }
1082*0Sstevel@tonic-gate 
1083*0Sstevel@tonic-gate /*
1084*0Sstevel@tonic-gate  * Call function 'func' with argument 'arg' when there is a reasonably
1085*0Sstevel@tonic-gate  * good chance that a block of size 'size' can be allocated.
1086*0Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
1087*0Sstevel@tonic-gate  */
1088*0Sstevel@tonic-gate /* ARGSUSED */
1089*0Sstevel@tonic-gate bufcall_id_t
1090*0Sstevel@tonic-gate bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg)
1091*0Sstevel@tonic-gate {
1092*0Sstevel@tonic-gate 	static long bid = 1;	/* always odd to save checking for zero */
1093*0Sstevel@tonic-gate 	bufcall_id_t bc_id;
1094*0Sstevel@tonic-gate 	struct strbufcall *bcp;
1095*0Sstevel@tonic-gate 
1096*0Sstevel@tonic-gate 	if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL)
1097*0Sstevel@tonic-gate 		return (0);
1098*0Sstevel@tonic-gate 
1099*0Sstevel@tonic-gate 	bcp->bc_func = func;
1100*0Sstevel@tonic-gate 	bcp->bc_arg = arg;
1101*0Sstevel@tonic-gate 	bcp->bc_size = size;
1102*0Sstevel@tonic-gate 	bcp->bc_next = NULL;
1103*0Sstevel@tonic-gate 	bcp->bc_executor = NULL;
1104*0Sstevel@tonic-gate 
1105*0Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
1106*0Sstevel@tonic-gate 	/*
1107*0Sstevel@tonic-gate 	 * After bcp is linked into strbcalls and strbcall_lock is dropped there
1108*0Sstevel@tonic-gate 	 * should be no references to bcp since it may be freed by
1109*0Sstevel@tonic-gate 	 * runbufcalls(). Since bcp_id field is returned, we save its value in
1110*0Sstevel@tonic-gate 	 * the local var.
1111*0Sstevel@tonic-gate 	 */
1112*0Sstevel@tonic-gate 	bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2);	/* keep it odd */
1113*0Sstevel@tonic-gate 
1114*0Sstevel@tonic-gate 	/*
1115*0Sstevel@tonic-gate 	 * add newly allocated stream event to existing
1116*0Sstevel@tonic-gate 	 * linked list of events.
1117*0Sstevel@tonic-gate 	 */
1118*0Sstevel@tonic-gate 	if (strbcalls.bc_head == NULL) {
1119*0Sstevel@tonic-gate 		strbcalls.bc_head = strbcalls.bc_tail = bcp;
1120*0Sstevel@tonic-gate 	} else {
1121*0Sstevel@tonic-gate 		strbcalls.bc_tail->bc_next = bcp;
1122*0Sstevel@tonic-gate 		strbcalls.bc_tail = bcp;
1123*0Sstevel@tonic-gate 	}
1124*0Sstevel@tonic-gate 
1125*0Sstevel@tonic-gate 	cv_signal(&strbcall_cv);
1126*0Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
1127*0Sstevel@tonic-gate 	return (bc_id);
1128*0Sstevel@tonic-gate }
1129*0Sstevel@tonic-gate 
1130*0Sstevel@tonic-gate /*
1131*0Sstevel@tonic-gate  * Cancel a bufcall request.
1132*0Sstevel@tonic-gate  */
1133*0Sstevel@tonic-gate void
1134*0Sstevel@tonic-gate unbufcall(bufcall_id_t id)
1135*0Sstevel@tonic-gate {
1136*0Sstevel@tonic-gate 	strbufcall_t *bcp, *pbcp;
1137*0Sstevel@tonic-gate 
1138*0Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
1139*0Sstevel@tonic-gate again:
1140*0Sstevel@tonic-gate 	pbcp = NULL;
1141*0Sstevel@tonic-gate 	for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) {
1142*0Sstevel@tonic-gate 		if (id == bcp->bc_id)
1143*0Sstevel@tonic-gate 			break;
1144*0Sstevel@tonic-gate 		pbcp = bcp;
1145*0Sstevel@tonic-gate 	}
1146*0Sstevel@tonic-gate 	if (bcp) {
1147*0Sstevel@tonic-gate 		if (bcp->bc_executor != NULL) {
1148*0Sstevel@tonic-gate 			if (bcp->bc_executor != curthread) {
1149*0Sstevel@tonic-gate 				cv_wait(&bcall_cv, &strbcall_lock);
1150*0Sstevel@tonic-gate 				goto again;
1151*0Sstevel@tonic-gate 			}
1152*0Sstevel@tonic-gate 		} else {
1153*0Sstevel@tonic-gate 			if (pbcp)
1154*0Sstevel@tonic-gate 				pbcp->bc_next = bcp->bc_next;
1155*0Sstevel@tonic-gate 			else
1156*0Sstevel@tonic-gate 				strbcalls.bc_head = bcp->bc_next;
1157*0Sstevel@tonic-gate 			if (bcp == strbcalls.bc_tail)
1158*0Sstevel@tonic-gate 				strbcalls.bc_tail = pbcp;
1159*0Sstevel@tonic-gate 			kmem_free(bcp, sizeof (strbufcall_t));
1160*0Sstevel@tonic-gate 		}
1161*0Sstevel@tonic-gate 	}
1162*0Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
1163*0Sstevel@tonic-gate }
1164*0Sstevel@tonic-gate 
1165*0Sstevel@tonic-gate /*
1166*0Sstevel@tonic-gate  * Duplicate a message block by block (uses dupb), returning
1167*0Sstevel@tonic-gate  * a pointer to the duplicate message.
1168*0Sstevel@tonic-gate  * Returns a non-NULL value only if the entire message
1169*0Sstevel@tonic-gate  * was dup'd.
1170*0Sstevel@tonic-gate  */
1171*0Sstevel@tonic-gate mblk_t *
1172*0Sstevel@tonic-gate dupmsg(mblk_t *bp)
1173*0Sstevel@tonic-gate {
1174*0Sstevel@tonic-gate 	mblk_t *head, *nbp;
1175*0Sstevel@tonic-gate 
1176*0Sstevel@tonic-gate 	if (!bp || !(nbp = head = dupb(bp)))
1177*0Sstevel@tonic-gate 		return (NULL);
1178*0Sstevel@tonic-gate 
1179*0Sstevel@tonic-gate 	while (bp->b_cont) {
1180*0Sstevel@tonic-gate 		if (!(nbp->b_cont = dupb(bp->b_cont))) {
1181*0Sstevel@tonic-gate 			freemsg(head);
1182*0Sstevel@tonic-gate 			return (NULL);
1183*0Sstevel@tonic-gate 		}
1184*0Sstevel@tonic-gate 		nbp = nbp->b_cont;
1185*0Sstevel@tonic-gate 		bp = bp->b_cont;
1186*0Sstevel@tonic-gate 	}
1187*0Sstevel@tonic-gate 	return (head);
1188*0Sstevel@tonic-gate }
1189*0Sstevel@tonic-gate 
1190*0Sstevel@tonic-gate #define	DUPB_NOLOAN(bp) \
1191*0Sstevel@tonic-gate 	((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \
1192*0Sstevel@tonic-gate 	copyb((bp)) : dupb((bp)))
1193*0Sstevel@tonic-gate 
1194*0Sstevel@tonic-gate mblk_t *
1195*0Sstevel@tonic-gate dupmsg_noloan(mblk_t *bp)
1196*0Sstevel@tonic-gate {
1197*0Sstevel@tonic-gate 	mblk_t *head, *nbp;
1198*0Sstevel@tonic-gate 
1199*0Sstevel@tonic-gate 	if (bp == NULL || DB_TYPE(bp) != M_DATA ||
1200*0Sstevel@tonic-gate 	    ((nbp = head = DUPB_NOLOAN(bp)) == NULL))
1201*0Sstevel@tonic-gate 		return (NULL);
1202*0Sstevel@tonic-gate 
1203*0Sstevel@tonic-gate 	while (bp->b_cont) {
1204*0Sstevel@tonic-gate 		if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) {
1205*0Sstevel@tonic-gate 			freemsg(head);
1206*0Sstevel@tonic-gate 			return (NULL);
1207*0Sstevel@tonic-gate 		}
1208*0Sstevel@tonic-gate 		nbp = nbp->b_cont;
1209*0Sstevel@tonic-gate 		bp = bp->b_cont;
1210*0Sstevel@tonic-gate 	}
1211*0Sstevel@tonic-gate 	return (head);
1212*0Sstevel@tonic-gate }
1213*0Sstevel@tonic-gate 
1214*0Sstevel@tonic-gate /*
1215*0Sstevel@tonic-gate  * Copy data from message and data block to newly allocated message and
1216*0Sstevel@tonic-gate  * data block. Returns new message block pointer, or NULL if error.
1217*0Sstevel@tonic-gate  * The alignment of rptr (w.r.t. word alignment) will be the same in the copy
1218*0Sstevel@tonic-gate  * as in the original even when db_base is not word aligned. (bug 1052877)
1219*0Sstevel@tonic-gate  */
1220*0Sstevel@tonic-gate mblk_t *
1221*0Sstevel@tonic-gate copyb(mblk_t *bp)
1222*0Sstevel@tonic-gate {
1223*0Sstevel@tonic-gate 	mblk_t	*nbp;
1224*0Sstevel@tonic-gate 	dblk_t	*dp, *ndp;
1225*0Sstevel@tonic-gate 	uchar_t *base;
1226*0Sstevel@tonic-gate 	size_t	size;
1227*0Sstevel@tonic-gate 	size_t	unaligned;
1228*0Sstevel@tonic-gate 
1229*0Sstevel@tonic-gate 	ASSERT(bp->b_wptr >= bp->b_rptr);
1230*0Sstevel@tonic-gate 
1231*0Sstevel@tonic-gate 	dp = bp->b_datap;
1232*0Sstevel@tonic-gate 	if (dp->db_fthdr != NULL)
1233*0Sstevel@tonic-gate 		STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0);
1234*0Sstevel@tonic-gate 
1235*0Sstevel@tonic-gate 	/*
1236*0Sstevel@tonic-gate 	 * Special handling for Multidata message; this should be
1237*0Sstevel@tonic-gate 	 * removed once a copy-callback routine is made available.
1238*0Sstevel@tonic-gate 	 */
1239*0Sstevel@tonic-gate 	if (dp->db_type == M_MULTIDATA) {
1240*0Sstevel@tonic-gate 		cred_t *cr;
1241*0Sstevel@tonic-gate 
1242*0Sstevel@tonic-gate 		if ((nbp = mmd_copy(bp, KM_NOSLEEP)) == NULL)
1243*0Sstevel@tonic-gate 			return (NULL);
1244*0Sstevel@tonic-gate 
1245*0Sstevel@tonic-gate 		nbp->b_flag = bp->b_flag;
1246*0Sstevel@tonic-gate 		nbp->b_band = bp->b_band;
1247*0Sstevel@tonic-gate 		ndp = nbp->b_datap;
1248*0Sstevel@tonic-gate 
1249*0Sstevel@tonic-gate 		/* See comments below on potential issues. */
1250*0Sstevel@tonic-gate 		STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
1251*0Sstevel@tonic-gate 
1252*0Sstevel@tonic-gate 		ASSERT(ndp->db_type == dp->db_type);
1253*0Sstevel@tonic-gate 		cr = dp->db_credp;
1254*0Sstevel@tonic-gate 		if (cr != NULL)
1255*0Sstevel@tonic-gate 			crhold(ndp->db_credp = cr);
1256*0Sstevel@tonic-gate 		ndp->db_cpid = dp->db_cpid;
1257*0Sstevel@tonic-gate 		return (nbp);
1258*0Sstevel@tonic-gate 	}
1259*0Sstevel@tonic-gate 
1260*0Sstevel@tonic-gate 	size = dp->db_lim - dp->db_base;
1261*0Sstevel@tonic-gate 	unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t));
1262*0Sstevel@tonic-gate 	if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL)
1263*0Sstevel@tonic-gate 		return (NULL);
1264*0Sstevel@tonic-gate 	nbp->b_flag = bp->b_flag;
1265*0Sstevel@tonic-gate 	nbp->b_band = bp->b_band;
1266*0Sstevel@tonic-gate 	ndp = nbp->b_datap;
1267*0Sstevel@tonic-gate 
1268*0Sstevel@tonic-gate 	/*
1269*0Sstevel@tonic-gate 	 * Well, here is a potential issue.  If we are trying to
1270*0Sstevel@tonic-gate 	 * trace a flow, and we copy the message, we might lose
1271*0Sstevel@tonic-gate 	 * information about where this message might have been.
1272*0Sstevel@tonic-gate 	 * So we should inherit the FT data.  On the other hand,
1273*0Sstevel@tonic-gate 	 * a user might be interested only in alloc to free data.
1274*0Sstevel@tonic-gate 	 * So I guess the real answer is to provide a tunable.
1275*0Sstevel@tonic-gate 	 */
1276*0Sstevel@tonic-gate 	STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
1277*0Sstevel@tonic-gate 
1278*0Sstevel@tonic-gate 	base = ndp->db_base + unaligned;
1279*0Sstevel@tonic-gate 	bcopy(dp->db_base, ndp->db_base + unaligned, size);
1280*0Sstevel@tonic-gate 
1281*0Sstevel@tonic-gate 	nbp->b_rptr = base + (bp->b_rptr - dp->db_base);
1282*0Sstevel@tonic-gate 	nbp->b_wptr = nbp->b_rptr + MBLKL(bp);
1283*0Sstevel@tonic-gate 
1284*0Sstevel@tonic-gate 	return (nbp);
1285*0Sstevel@tonic-gate }
1286*0Sstevel@tonic-gate 
1287*0Sstevel@tonic-gate /*
1288*0Sstevel@tonic-gate  * Copy data from message to newly allocated message using new
1289*0Sstevel@tonic-gate  * data blocks.  Returns a pointer to the new message, or NULL if error.
1290*0Sstevel@tonic-gate  */
1291*0Sstevel@tonic-gate mblk_t *
1292*0Sstevel@tonic-gate copymsg(mblk_t *bp)
1293*0Sstevel@tonic-gate {
1294*0Sstevel@tonic-gate 	mblk_t *head, *nbp;
1295*0Sstevel@tonic-gate 
1296*0Sstevel@tonic-gate 	if (!bp || !(nbp = head = copyb(bp)))
1297*0Sstevel@tonic-gate 		return (NULL);
1298*0Sstevel@tonic-gate 
1299*0Sstevel@tonic-gate 	while (bp->b_cont) {
1300*0Sstevel@tonic-gate 		if (!(nbp->b_cont = copyb(bp->b_cont))) {
1301*0Sstevel@tonic-gate 			freemsg(head);
1302*0Sstevel@tonic-gate 			return (NULL);
1303*0Sstevel@tonic-gate 		}
1304*0Sstevel@tonic-gate 		nbp = nbp->b_cont;
1305*0Sstevel@tonic-gate 		bp = bp->b_cont;
1306*0Sstevel@tonic-gate 	}
1307*0Sstevel@tonic-gate 	return (head);
1308*0Sstevel@tonic-gate }
1309*0Sstevel@tonic-gate 
1310*0Sstevel@tonic-gate /*
1311*0Sstevel@tonic-gate  * link a message block to tail of message
1312*0Sstevel@tonic-gate  */
1313*0Sstevel@tonic-gate void
1314*0Sstevel@tonic-gate linkb(mblk_t *mp, mblk_t *bp)
1315*0Sstevel@tonic-gate {
1316*0Sstevel@tonic-gate 	ASSERT(mp && bp);
1317*0Sstevel@tonic-gate 
1318*0Sstevel@tonic-gate 	for (; mp->b_cont; mp = mp->b_cont)
1319*0Sstevel@tonic-gate 		;
1320*0Sstevel@tonic-gate 	mp->b_cont = bp;
1321*0Sstevel@tonic-gate }
1322*0Sstevel@tonic-gate 
1323*0Sstevel@tonic-gate /*
1324*0Sstevel@tonic-gate  * unlink a message block from head of message
1325*0Sstevel@tonic-gate  * return pointer to new message.
1326*0Sstevel@tonic-gate  * NULL if message becomes empty.
1327*0Sstevel@tonic-gate  */
1328*0Sstevel@tonic-gate mblk_t *
1329*0Sstevel@tonic-gate unlinkb(mblk_t *bp)
1330*0Sstevel@tonic-gate {
1331*0Sstevel@tonic-gate 	mblk_t *bp1;
1332*0Sstevel@tonic-gate 
1333*0Sstevel@tonic-gate 	bp1 = bp->b_cont;
1334*0Sstevel@tonic-gate 	bp->b_cont = NULL;
1335*0Sstevel@tonic-gate 	return (bp1);
1336*0Sstevel@tonic-gate }
1337*0Sstevel@tonic-gate 
1338*0Sstevel@tonic-gate /*
1339*0Sstevel@tonic-gate  * remove a message block "bp" from message "mp"
1340*0Sstevel@tonic-gate  *
1341*0Sstevel@tonic-gate  * Return pointer to new message or NULL if no message remains.
1342*0Sstevel@tonic-gate  * Return -1 if bp is not found in message.
1343*0Sstevel@tonic-gate  */
1344*0Sstevel@tonic-gate mblk_t *
1345*0Sstevel@tonic-gate rmvb(mblk_t *mp, mblk_t *bp)
1346*0Sstevel@tonic-gate {
1347*0Sstevel@tonic-gate 	mblk_t *tmp;
1348*0Sstevel@tonic-gate 	mblk_t *lastp = NULL;
1349*0Sstevel@tonic-gate 
1350*0Sstevel@tonic-gate 	ASSERT(mp && bp);
1351*0Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
1352*0Sstevel@tonic-gate 		if (tmp == bp) {
1353*0Sstevel@tonic-gate 			if (lastp)
1354*0Sstevel@tonic-gate 				lastp->b_cont = tmp->b_cont;
1355*0Sstevel@tonic-gate 			else
1356*0Sstevel@tonic-gate 				mp = tmp->b_cont;
1357*0Sstevel@tonic-gate 			tmp->b_cont = NULL;
1358*0Sstevel@tonic-gate 			return (mp);
1359*0Sstevel@tonic-gate 		}
1360*0Sstevel@tonic-gate 		lastp = tmp;
1361*0Sstevel@tonic-gate 	}
1362*0Sstevel@tonic-gate 	return ((mblk_t *)-1);
1363*0Sstevel@tonic-gate }
1364*0Sstevel@tonic-gate 
1365*0Sstevel@tonic-gate /*
1366*0Sstevel@tonic-gate  * Concatenate and align first len bytes of common
1367*0Sstevel@tonic-gate  * message type.  Len == -1, means concat everything.
1368*0Sstevel@tonic-gate  * Returns 1 on success, 0 on failure
1369*0Sstevel@tonic-gate  * After the pullup, mp points to the pulled up data.
1370*0Sstevel@tonic-gate  */
1371*0Sstevel@tonic-gate int
1372*0Sstevel@tonic-gate pullupmsg(mblk_t *mp, ssize_t len)
1373*0Sstevel@tonic-gate {
1374*0Sstevel@tonic-gate 	mblk_t *bp, *b_cont;
1375*0Sstevel@tonic-gate 	dblk_t *dbp;
1376*0Sstevel@tonic-gate 	ssize_t n;
1377*0Sstevel@tonic-gate 
1378*0Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref > 0);
1379*0Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
1380*0Sstevel@tonic-gate 
1381*0Sstevel@tonic-gate 	/*
1382*0Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
1383*0Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
1384*0Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
1385*0Sstevel@tonic-gate 	 */
1386*0Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
1387*0Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
1388*0Sstevel@tonic-gate 		return (0);
1389*0Sstevel@tonic-gate 
1390*0Sstevel@tonic-gate 	if (len == -1) {
1391*0Sstevel@tonic-gate 		if (mp->b_cont == NULL && str_aligned(mp->b_rptr))
1392*0Sstevel@tonic-gate 			return (1);
1393*0Sstevel@tonic-gate 		len = xmsgsize(mp);
1394*0Sstevel@tonic-gate 	} else {
1395*0Sstevel@tonic-gate 		ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr;
1396*0Sstevel@tonic-gate 		ASSERT(first_mblk_len >= 0);
1397*0Sstevel@tonic-gate 		/*
1398*0Sstevel@tonic-gate 		 * If the length is less than that of the first mblk,
1399*0Sstevel@tonic-gate 		 * we want to pull up the message into an aligned mblk.
1400*0Sstevel@tonic-gate 		 * Though not part of the spec, some callers assume it.
1401*0Sstevel@tonic-gate 		 */
1402*0Sstevel@tonic-gate 		if (len <= first_mblk_len) {
1403*0Sstevel@tonic-gate 			if (str_aligned(mp->b_rptr))
1404*0Sstevel@tonic-gate 				return (1);
1405*0Sstevel@tonic-gate 			len = first_mblk_len;
1406*0Sstevel@tonic-gate 		} else if (xmsgsize(mp) < len)
1407*0Sstevel@tonic-gate 			return (0);
1408*0Sstevel@tonic-gate 	}
1409*0Sstevel@tonic-gate 
1410*0Sstevel@tonic-gate 	if ((bp = allocb_tmpl(len, mp)) == NULL)
1411*0Sstevel@tonic-gate 		return (0);
1412*0Sstevel@tonic-gate 
1413*0Sstevel@tonic-gate 	dbp = bp->b_datap;
1414*0Sstevel@tonic-gate 	*bp = *mp;		/* swap mblks so bp heads the old msg... */
1415*0Sstevel@tonic-gate 	mp->b_datap = dbp;	/* ... and mp heads the new message */
1416*0Sstevel@tonic-gate 	mp->b_datap->db_mblk = mp;
1417*0Sstevel@tonic-gate 	bp->b_datap->db_mblk = bp;
1418*0Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
1419*0Sstevel@tonic-gate 
1420*0Sstevel@tonic-gate 	do {
1421*0Sstevel@tonic-gate 		ASSERT(bp->b_datap->db_ref > 0);
1422*0Sstevel@tonic-gate 		ASSERT(bp->b_wptr >= bp->b_rptr);
1423*0Sstevel@tonic-gate 		n = MIN(bp->b_wptr - bp->b_rptr, len);
1424*0Sstevel@tonic-gate 		bcopy(bp->b_rptr, mp->b_wptr, (size_t)n);
1425*0Sstevel@tonic-gate 		mp->b_wptr += n;
1426*0Sstevel@tonic-gate 		bp->b_rptr += n;
1427*0Sstevel@tonic-gate 		len -= n;
1428*0Sstevel@tonic-gate 		if (bp->b_rptr != bp->b_wptr)
1429*0Sstevel@tonic-gate 			break;
1430*0Sstevel@tonic-gate 		b_cont = bp->b_cont;
1431*0Sstevel@tonic-gate 		freeb(bp);
1432*0Sstevel@tonic-gate 		bp = b_cont;
1433*0Sstevel@tonic-gate 	} while (len && bp);
1434*0Sstevel@tonic-gate 
1435*0Sstevel@tonic-gate 	mp->b_cont = bp;	/* tack on whatever wasn't pulled up */
1436*0Sstevel@tonic-gate 
1437*0Sstevel@tonic-gate 	return (1);
1438*0Sstevel@tonic-gate }
1439*0Sstevel@tonic-gate 
1440*0Sstevel@tonic-gate /*
1441*0Sstevel@tonic-gate  * Concatenate and align at least the first len bytes of common message
1442*0Sstevel@tonic-gate  * type.  Len == -1 means concatenate everything.  The original message is
1443*0Sstevel@tonic-gate  * unaltered.  Returns a pointer to a new message on success, otherwise
1444*0Sstevel@tonic-gate  * returns NULL.
1445*0Sstevel@tonic-gate  */
1446*0Sstevel@tonic-gate mblk_t *
1447*0Sstevel@tonic-gate msgpullup(mblk_t *mp, ssize_t len)
1448*0Sstevel@tonic-gate {
1449*0Sstevel@tonic-gate 	mblk_t	*newmp;
1450*0Sstevel@tonic-gate 	ssize_t	totlen;
1451*0Sstevel@tonic-gate 	ssize_t	n;
1452*0Sstevel@tonic-gate 
1453*0Sstevel@tonic-gate 	/*
1454*0Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
1455*0Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
1456*0Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
1457*0Sstevel@tonic-gate 	 */
1458*0Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
1459*0Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
1460*0Sstevel@tonic-gate 		return (NULL);
1461*0Sstevel@tonic-gate 
1462*0Sstevel@tonic-gate 	totlen = xmsgsize(mp);
1463*0Sstevel@tonic-gate 
1464*0Sstevel@tonic-gate 	if ((len > 0) && (len > totlen))
1465*0Sstevel@tonic-gate 		return (NULL);
1466*0Sstevel@tonic-gate 
1467*0Sstevel@tonic-gate 	/*
1468*0Sstevel@tonic-gate 	 * Copy all of the first msg type into one new mblk, then dupmsg
1469*0Sstevel@tonic-gate 	 * and link the rest onto this.
1470*0Sstevel@tonic-gate 	 */
1471*0Sstevel@tonic-gate 
1472*0Sstevel@tonic-gate 	len = totlen;
1473*0Sstevel@tonic-gate 
1474*0Sstevel@tonic-gate 	if ((newmp = allocb_tmpl(len, mp)) == NULL)
1475*0Sstevel@tonic-gate 		return (NULL);
1476*0Sstevel@tonic-gate 
1477*0Sstevel@tonic-gate 	newmp->b_flag = mp->b_flag;
1478*0Sstevel@tonic-gate 	newmp->b_band = mp->b_band;
1479*0Sstevel@tonic-gate 
1480*0Sstevel@tonic-gate 	while (len > 0) {
1481*0Sstevel@tonic-gate 		n = mp->b_wptr - mp->b_rptr;
1482*0Sstevel@tonic-gate 		ASSERT(n >= 0);		/* allow zero-length mblk_t's */
1483*0Sstevel@tonic-gate 		if (n > 0)
1484*0Sstevel@tonic-gate 			bcopy(mp->b_rptr, newmp->b_wptr, n);
1485*0Sstevel@tonic-gate 		newmp->b_wptr += n;
1486*0Sstevel@tonic-gate 		len -= n;
1487*0Sstevel@tonic-gate 		mp = mp->b_cont;
1488*0Sstevel@tonic-gate 	}
1489*0Sstevel@tonic-gate 
1490*0Sstevel@tonic-gate 	if (mp != NULL) {
1491*0Sstevel@tonic-gate 		newmp->b_cont = dupmsg(mp);
1492*0Sstevel@tonic-gate 		if (newmp->b_cont == NULL) {
1493*0Sstevel@tonic-gate 			freemsg(newmp);
1494*0Sstevel@tonic-gate 			return (NULL);
1495*0Sstevel@tonic-gate 		}
1496*0Sstevel@tonic-gate 	}
1497*0Sstevel@tonic-gate 
1498*0Sstevel@tonic-gate 	return (newmp);
1499*0Sstevel@tonic-gate }
1500*0Sstevel@tonic-gate 
1501*0Sstevel@tonic-gate /*
1502*0Sstevel@tonic-gate  * Trim bytes from message
1503*0Sstevel@tonic-gate  *  len > 0, trim from head
1504*0Sstevel@tonic-gate  *  len < 0, trim from tail
1505*0Sstevel@tonic-gate  * Returns 1 on success, 0 on failure.
1506*0Sstevel@tonic-gate  */
1507*0Sstevel@tonic-gate int
1508*0Sstevel@tonic-gate adjmsg(mblk_t *mp, ssize_t len)
1509*0Sstevel@tonic-gate {
1510*0Sstevel@tonic-gate 	mblk_t *bp;
1511*0Sstevel@tonic-gate 	mblk_t *save_bp = NULL;
1512*0Sstevel@tonic-gate 	mblk_t *prev_bp;
1513*0Sstevel@tonic-gate 	mblk_t *bcont;
1514*0Sstevel@tonic-gate 	unsigned char type;
1515*0Sstevel@tonic-gate 	ssize_t n;
1516*0Sstevel@tonic-gate 	int fromhead;
1517*0Sstevel@tonic-gate 	int first;
1518*0Sstevel@tonic-gate 
1519*0Sstevel@tonic-gate 	ASSERT(mp != NULL);
1520*0Sstevel@tonic-gate 	/*
1521*0Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
1522*0Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
1523*0Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
1524*0Sstevel@tonic-gate 	 */
1525*0Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
1526*0Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
1527*0Sstevel@tonic-gate 		return (0);
1528*0Sstevel@tonic-gate 
1529*0Sstevel@tonic-gate 	if (len < 0) {
1530*0Sstevel@tonic-gate 		fromhead = 0;
1531*0Sstevel@tonic-gate 		len = -len;
1532*0Sstevel@tonic-gate 	} else {
1533*0Sstevel@tonic-gate 		fromhead = 1;
1534*0Sstevel@tonic-gate 	}
1535*0Sstevel@tonic-gate 
1536*0Sstevel@tonic-gate 	if (xmsgsize(mp) < len)
1537*0Sstevel@tonic-gate 		return (0);
1538*0Sstevel@tonic-gate 
1539*0Sstevel@tonic-gate 
1540*0Sstevel@tonic-gate 	if (fromhead) {
1541*0Sstevel@tonic-gate 		first = 1;
1542*0Sstevel@tonic-gate 		while (len) {
1543*0Sstevel@tonic-gate 			ASSERT(mp->b_wptr >= mp->b_rptr);
1544*0Sstevel@tonic-gate 			n = MIN(mp->b_wptr - mp->b_rptr, len);
1545*0Sstevel@tonic-gate 			mp->b_rptr += n;
1546*0Sstevel@tonic-gate 			len -= n;
1547*0Sstevel@tonic-gate 
1548*0Sstevel@tonic-gate 			/*
1549*0Sstevel@tonic-gate 			 * If this is not the first zero length
1550*0Sstevel@tonic-gate 			 * message remove it
1551*0Sstevel@tonic-gate 			 */
1552*0Sstevel@tonic-gate 			if (!first && (mp->b_wptr == mp->b_rptr)) {
1553*0Sstevel@tonic-gate 				bcont = mp->b_cont;
1554*0Sstevel@tonic-gate 				freeb(mp);
1555*0Sstevel@tonic-gate 				mp = save_bp->b_cont = bcont;
1556*0Sstevel@tonic-gate 			} else {
1557*0Sstevel@tonic-gate 				save_bp = mp;
1558*0Sstevel@tonic-gate 				mp = mp->b_cont;
1559*0Sstevel@tonic-gate 			}
1560*0Sstevel@tonic-gate 			first = 0;
1561*0Sstevel@tonic-gate 		}
1562*0Sstevel@tonic-gate 	} else {
1563*0Sstevel@tonic-gate 		type = mp->b_datap->db_type;
1564*0Sstevel@tonic-gate 		while (len) {
1565*0Sstevel@tonic-gate 			bp = mp;
1566*0Sstevel@tonic-gate 			save_bp = NULL;
1567*0Sstevel@tonic-gate 
1568*0Sstevel@tonic-gate 			/*
1569*0Sstevel@tonic-gate 			 * Find the last message of same type
1570*0Sstevel@tonic-gate 			 */
1571*0Sstevel@tonic-gate 
1572*0Sstevel@tonic-gate 			while (bp && bp->b_datap->db_type == type) {
1573*0Sstevel@tonic-gate 				ASSERT(bp->b_wptr >= bp->b_rptr);
1574*0Sstevel@tonic-gate 				prev_bp = save_bp;
1575*0Sstevel@tonic-gate 				save_bp = bp;
1576*0Sstevel@tonic-gate 				bp = bp->b_cont;
1577*0Sstevel@tonic-gate 			}
1578*0Sstevel@tonic-gate 			if (save_bp == NULL)
1579*0Sstevel@tonic-gate 				break;
1580*0Sstevel@tonic-gate 			n = MIN(save_bp->b_wptr - save_bp->b_rptr, len);
1581*0Sstevel@tonic-gate 			save_bp->b_wptr -= n;
1582*0Sstevel@tonic-gate 			len -= n;
1583*0Sstevel@tonic-gate 
1584*0Sstevel@tonic-gate 			/*
1585*0Sstevel@tonic-gate 			 * If this is not the first message
1586*0Sstevel@tonic-gate 			 * and we have taken away everything
1587*0Sstevel@tonic-gate 			 * from this message, remove it
1588*0Sstevel@tonic-gate 			 */
1589*0Sstevel@tonic-gate 
1590*0Sstevel@tonic-gate 			if ((save_bp != mp) &&
1591*0Sstevel@tonic-gate 				(save_bp->b_wptr == save_bp->b_rptr)) {
1592*0Sstevel@tonic-gate 				bcont = save_bp->b_cont;
1593*0Sstevel@tonic-gate 				freeb(save_bp);
1594*0Sstevel@tonic-gate 				prev_bp->b_cont = bcont;
1595*0Sstevel@tonic-gate 			}
1596*0Sstevel@tonic-gate 		}
1597*0Sstevel@tonic-gate 	}
1598*0Sstevel@tonic-gate 	return (1);
1599*0Sstevel@tonic-gate }
1600*0Sstevel@tonic-gate 
1601*0Sstevel@tonic-gate /*
1602*0Sstevel@tonic-gate  * get number of data bytes in message
1603*0Sstevel@tonic-gate  */
1604*0Sstevel@tonic-gate size_t
1605*0Sstevel@tonic-gate msgdsize(mblk_t *bp)
1606*0Sstevel@tonic-gate {
1607*0Sstevel@tonic-gate 	size_t count = 0;
1608*0Sstevel@tonic-gate 
1609*0Sstevel@tonic-gate 	for (; bp; bp = bp->b_cont)
1610*0Sstevel@tonic-gate 		if (bp->b_datap->db_type == M_DATA) {
1611*0Sstevel@tonic-gate 			ASSERT(bp->b_wptr >= bp->b_rptr);
1612*0Sstevel@tonic-gate 			count += bp->b_wptr - bp->b_rptr;
1613*0Sstevel@tonic-gate 		}
1614*0Sstevel@tonic-gate 	return (count);
1615*0Sstevel@tonic-gate }
1616*0Sstevel@tonic-gate 
1617*0Sstevel@tonic-gate /*
1618*0Sstevel@tonic-gate  * Get a message off head of queue
1619*0Sstevel@tonic-gate  *
1620*0Sstevel@tonic-gate  * If queue has no buffers then mark queue
1621*0Sstevel@tonic-gate  * with QWANTR. (queue wants to be read by
1622*0Sstevel@tonic-gate  * someone when data becomes available)
1623*0Sstevel@tonic-gate  *
1624*0Sstevel@tonic-gate  * If there is something to take off then do so.
1625*0Sstevel@tonic-gate  * If queue falls below hi water mark turn off QFULL
1626*0Sstevel@tonic-gate  * flag.  Decrement weighted count of queue.
1627*0Sstevel@tonic-gate  * Also turn off QWANTR because queue is being read.
1628*0Sstevel@tonic-gate  *
1629*0Sstevel@tonic-gate  * The queue count is maintained on a per-band basis.
1630*0Sstevel@tonic-gate  * Priority band 0 (normal messages) uses q_count,
1631*0Sstevel@tonic-gate  * q_lowat, etc.  Non-zero priority bands use the
1632*0Sstevel@tonic-gate  * fields in their respective qband structures
1633*0Sstevel@tonic-gate  * (qb_count, qb_lowat, etc.)  All messages appear
1634*0Sstevel@tonic-gate  * on the same list, linked via their b_next pointers.
1635*0Sstevel@tonic-gate  * q_first is the head of the list.  q_count does
1636*0Sstevel@tonic-gate  * not reflect the size of all the messages on the
1637*0Sstevel@tonic-gate  * queue.  It only reflects those messages in the
1638*0Sstevel@tonic-gate  * normal band of flow.  The one exception to this
1639*0Sstevel@tonic-gate  * deals with high priority messages.  They are in
1640*0Sstevel@tonic-gate  * their own conceptual "band", but are accounted
1641*0Sstevel@tonic-gate  * against q_count.
1642*0Sstevel@tonic-gate  *
1643*0Sstevel@tonic-gate  * If queue count is below the lo water mark and QWANTW
1644*0Sstevel@tonic-gate  * is set, enable the closest backq which has a service
1645*0Sstevel@tonic-gate  * procedure and turn off the QWANTW flag.
1646*0Sstevel@tonic-gate  *
1647*0Sstevel@tonic-gate  * getq could be built on top of rmvq, but isn't because
1648*0Sstevel@tonic-gate  * of performance considerations.
1649*0Sstevel@tonic-gate  *
1650*0Sstevel@tonic-gate  * A note on the use of q_count and q_mblkcnt:
1651*0Sstevel@tonic-gate  *   q_count is the traditional byte count for messages that
1652*0Sstevel@tonic-gate  *   have been put on a queue.  Documentation tells us that
1653*0Sstevel@tonic-gate  *   we shouldn't rely on that count, but some drivers/modules
1654*0Sstevel@tonic-gate  *   do.  What was needed, however, is a mechanism to prevent
1655*0Sstevel@tonic-gate  *   runaway streams from consuming all of the resources,
1656*0Sstevel@tonic-gate  *   and particularly be able to flow control zero-length
1657*0Sstevel@tonic-gate  *   messages.  q_mblkcnt is used for this purpose.  It
1658*0Sstevel@tonic-gate  *   counts the number of mblk's that are being put on
1659*0Sstevel@tonic-gate  *   the queue.  The intention here, is that each mblk should
1660*0Sstevel@tonic-gate  *   contain one byte of data and, for the purpose of
1661*0Sstevel@tonic-gate  *   flow-control, logically does.  A queue will become
1662*0Sstevel@tonic-gate  *   full when EITHER of these values (q_count and q_mblkcnt)
1663*0Sstevel@tonic-gate  *   reach the highwater mark.  It will clear when BOTH
1664*0Sstevel@tonic-gate  *   of them drop below the highwater mark.  And it will
1665*0Sstevel@tonic-gate  *   backenable when BOTH of them drop below the lowwater
1666*0Sstevel@tonic-gate  *   mark.
1667*0Sstevel@tonic-gate  *   With this algorithm, a driver/module might be able
1668*0Sstevel@tonic-gate  *   to find a reasonably accurate q_count, and the
1669*0Sstevel@tonic-gate  *   framework can still try and limit resource usage.
1670*0Sstevel@tonic-gate  */
1671*0Sstevel@tonic-gate mblk_t *
1672*0Sstevel@tonic-gate getq(queue_t *q)
1673*0Sstevel@tonic-gate {
1674*0Sstevel@tonic-gate 	mblk_t *bp;
1675*0Sstevel@tonic-gate 	int band = 0;
1676*0Sstevel@tonic-gate 
1677*0Sstevel@tonic-gate 	bp = getq_noenab(q);
1678*0Sstevel@tonic-gate 	if (bp != NULL)
1679*0Sstevel@tonic-gate 		band = bp->b_band;
1680*0Sstevel@tonic-gate 
1681*0Sstevel@tonic-gate 	/*
1682*0Sstevel@tonic-gate 	 * Inlined from qbackenable().
1683*0Sstevel@tonic-gate 	 * Quick check without holding the lock.
1684*0Sstevel@tonic-gate 	 */
1685*0Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
1686*0Sstevel@tonic-gate 		return (bp);
1687*0Sstevel@tonic-gate 
1688*0Sstevel@tonic-gate 	qbackenable(q, band);
1689*0Sstevel@tonic-gate 	return (bp);
1690*0Sstevel@tonic-gate }
1691*0Sstevel@tonic-gate 
1692*0Sstevel@tonic-gate /*
1693*0Sstevel@tonic-gate  * Like getq() but does not backenable.  This is used by the stream
1694*0Sstevel@tonic-gate  * head when a putback() is likely.  The caller must call qbackenable()
1695*0Sstevel@tonic-gate  * after it is done with accessing the queue.
1696*0Sstevel@tonic-gate  */
1697*0Sstevel@tonic-gate mblk_t *
1698*0Sstevel@tonic-gate getq_noenab(queue_t *q)
1699*0Sstevel@tonic-gate {
1700*0Sstevel@tonic-gate 	mblk_t *bp;
1701*0Sstevel@tonic-gate 	mblk_t *tmp;
1702*0Sstevel@tonic-gate 	qband_t *qbp;
1703*0Sstevel@tonic-gate 	kthread_id_t freezer;
1704*0Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
1705*0Sstevel@tonic-gate 
1706*0Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
1707*0Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
1708*0Sstevel@tonic-gate 	if (freezer == curthread) {
1709*0Sstevel@tonic-gate 		ASSERT(frozenstr(q));
1710*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
1711*0Sstevel@tonic-gate 	} else
1712*0Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
1713*0Sstevel@tonic-gate 
1714*0Sstevel@tonic-gate 	if ((bp = q->q_first) == 0) {
1715*0Sstevel@tonic-gate 		q->q_flag |= QWANTR;
1716*0Sstevel@tonic-gate 	} else {
1717*0Sstevel@tonic-gate 		if ((q->q_first = bp->b_next) == NULL)
1718*0Sstevel@tonic-gate 			q->q_last = NULL;
1719*0Sstevel@tonic-gate 		else
1720*0Sstevel@tonic-gate 			q->q_first->b_prev = NULL;
1721*0Sstevel@tonic-gate 
1722*0Sstevel@tonic-gate 		/* Get message byte count for q_count accounting */
1723*0Sstevel@tonic-gate 		for (tmp = bp; tmp; tmp = tmp->b_cont) {
1724*0Sstevel@tonic-gate 			bytecnt += (tmp->b_wptr - tmp->b_rptr);
1725*0Sstevel@tonic-gate 			mblkcnt++;
1726*0Sstevel@tonic-gate 		}
1727*0Sstevel@tonic-gate 
1728*0Sstevel@tonic-gate 		if (bp->b_band == 0) {
1729*0Sstevel@tonic-gate 			q->q_count -= bytecnt;
1730*0Sstevel@tonic-gate 			q->q_mblkcnt -= mblkcnt;
1731*0Sstevel@tonic-gate 			if ((q->q_count < q->q_hiwat) &&
1732*0Sstevel@tonic-gate 			    (q->q_mblkcnt < q->q_hiwat)) {
1733*0Sstevel@tonic-gate 				q->q_flag &= ~QFULL;
1734*0Sstevel@tonic-gate 			}
1735*0Sstevel@tonic-gate 		} else {
1736*0Sstevel@tonic-gate 			int i;
1737*0Sstevel@tonic-gate 
1738*0Sstevel@tonic-gate 			ASSERT(bp->b_band <= q->q_nband);
1739*0Sstevel@tonic-gate 			ASSERT(q->q_bandp != NULL);
1740*0Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(QLOCK(q)));
1741*0Sstevel@tonic-gate 			qbp = q->q_bandp;
1742*0Sstevel@tonic-gate 			i = bp->b_band;
1743*0Sstevel@tonic-gate 			while (--i > 0)
1744*0Sstevel@tonic-gate 				qbp = qbp->qb_next;
1745*0Sstevel@tonic-gate 			if (qbp->qb_first == qbp->qb_last) {
1746*0Sstevel@tonic-gate 				qbp->qb_first = NULL;
1747*0Sstevel@tonic-gate 				qbp->qb_last = NULL;
1748*0Sstevel@tonic-gate 			} else {
1749*0Sstevel@tonic-gate 				qbp->qb_first = bp->b_next;
1750*0Sstevel@tonic-gate 			}
1751*0Sstevel@tonic-gate 			qbp->qb_count -= bytecnt;
1752*0Sstevel@tonic-gate 			qbp->qb_mblkcnt -= mblkcnt;
1753*0Sstevel@tonic-gate 			if ((qbp->qb_count < qbp->qb_hiwat) &&
1754*0Sstevel@tonic-gate 			    (qbp->qb_mblkcnt < qbp->qb_hiwat)) {
1755*0Sstevel@tonic-gate 				qbp->qb_flag &= ~QB_FULL;
1756*0Sstevel@tonic-gate 			}
1757*0Sstevel@tonic-gate 		}
1758*0Sstevel@tonic-gate 		q->q_flag &= ~QWANTR;
1759*0Sstevel@tonic-gate 		bp->b_next = NULL;
1760*0Sstevel@tonic-gate 		bp->b_prev = NULL;
1761*0Sstevel@tonic-gate 	}
1762*0Sstevel@tonic-gate 	if (freezer != curthread)
1763*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
1764*0Sstevel@tonic-gate 
1765*0Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_GETQ, NULL);
1766*0Sstevel@tonic-gate 
1767*0Sstevel@tonic-gate 	return (bp);
1768*0Sstevel@tonic-gate }
1769*0Sstevel@tonic-gate 
1770*0Sstevel@tonic-gate /*
1771*0Sstevel@tonic-gate  * Determine if a backenable is needed after removing a message in the
1772*0Sstevel@tonic-gate  * specified band.
1773*0Sstevel@tonic-gate  * NOTE: This routine assumes that something like getq_noenab() has been
1774*0Sstevel@tonic-gate  * already called.
1775*0Sstevel@tonic-gate  *
1776*0Sstevel@tonic-gate  * For the read side it is ok to hold sd_lock across calling this (and the
1777*0Sstevel@tonic-gate  * stream head often does).
1778*0Sstevel@tonic-gate  * But for the write side strwakeq might be invoked and it acquires sd_lock.
1779*0Sstevel@tonic-gate  */
1780*0Sstevel@tonic-gate void
1781*0Sstevel@tonic-gate qbackenable(queue_t *q, int band)
1782*0Sstevel@tonic-gate {
1783*0Sstevel@tonic-gate 	int backenab = 0;
1784*0Sstevel@tonic-gate 	qband_t *qbp;
1785*0Sstevel@tonic-gate 	kthread_id_t freezer;
1786*0Sstevel@tonic-gate 
1787*0Sstevel@tonic-gate 	ASSERT(q);
1788*0Sstevel@tonic-gate 	ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock));
1789*0Sstevel@tonic-gate 
1790*0Sstevel@tonic-gate 	/*
1791*0Sstevel@tonic-gate 	 * Quick check without holding the lock.
1792*0Sstevel@tonic-gate 	 * OK since after getq() has lowered the q_count these flags
1793*0Sstevel@tonic-gate 	 * would not change unless either the qbackenable() is done by
1794*0Sstevel@tonic-gate 	 * another thread (which is ok) or the queue has gotten QFULL
1795*0Sstevel@tonic-gate 	 * in which case another backenable will take place when the queue
1796*0Sstevel@tonic-gate 	 * drops below q_lowat.
1797*0Sstevel@tonic-gate 	 */
1798*0Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
1799*0Sstevel@tonic-gate 		return;
1800*0Sstevel@tonic-gate 
1801*0Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
1802*0Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
1803*0Sstevel@tonic-gate 	if (freezer == curthread) {
1804*0Sstevel@tonic-gate 		ASSERT(frozenstr(q));
1805*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
1806*0Sstevel@tonic-gate 	} else
1807*0Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
1808*0Sstevel@tonic-gate 
1809*0Sstevel@tonic-gate 	if (band == 0) {
1810*0Sstevel@tonic-gate 		if (q->q_lowat == 0 || (q->q_count < q->q_lowat &&
1811*0Sstevel@tonic-gate 		    q->q_mblkcnt < q->q_lowat)) {
1812*0Sstevel@tonic-gate 			backenab = q->q_flag & (QWANTW|QWANTWSYNC);
1813*0Sstevel@tonic-gate 		}
1814*0Sstevel@tonic-gate 	} else {
1815*0Sstevel@tonic-gate 		int i;
1816*0Sstevel@tonic-gate 
1817*0Sstevel@tonic-gate 		ASSERT((unsigned)band <= q->q_nband);
1818*0Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
1819*0Sstevel@tonic-gate 
1820*0Sstevel@tonic-gate 		qbp = q->q_bandp;
1821*0Sstevel@tonic-gate 		i = band;
1822*0Sstevel@tonic-gate 		while (--i > 0)
1823*0Sstevel@tonic-gate 			qbp = qbp->qb_next;
1824*0Sstevel@tonic-gate 
1825*0Sstevel@tonic-gate 		if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat &&
1826*0Sstevel@tonic-gate 		    qbp->qb_mblkcnt < qbp->qb_lowat)) {
1827*0Sstevel@tonic-gate 			backenab = qbp->qb_flag & QB_WANTW;
1828*0Sstevel@tonic-gate 		}
1829*0Sstevel@tonic-gate 	}
1830*0Sstevel@tonic-gate 
1831*0Sstevel@tonic-gate 	if (backenab == 0) {
1832*0Sstevel@tonic-gate 		if (freezer != curthread)
1833*0Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
1834*0Sstevel@tonic-gate 		return;
1835*0Sstevel@tonic-gate 	}
1836*0Sstevel@tonic-gate 
1837*0Sstevel@tonic-gate 	/* Have to drop the lock across strwakeq and backenable */
1838*0Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
1839*0Sstevel@tonic-gate 		q->q_flag &= ~QWANTWSYNC;
1840*0Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW)) {
1841*0Sstevel@tonic-gate 		if (band != 0)
1842*0Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
1843*0Sstevel@tonic-gate 		else {
1844*0Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
1845*0Sstevel@tonic-gate 		}
1846*0Sstevel@tonic-gate 	}
1847*0Sstevel@tonic-gate 
1848*0Sstevel@tonic-gate 	if (freezer != curthread)
1849*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
1850*0Sstevel@tonic-gate 
1851*0Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
1852*0Sstevel@tonic-gate 		strwakeq(q, QWANTWSYNC);
1853*0Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW))
1854*0Sstevel@tonic-gate 		backenable(q, band);
1855*0Sstevel@tonic-gate }
1856*0Sstevel@tonic-gate 
1857*0Sstevel@tonic-gate /*
1858*0Sstevel@tonic-gate  * Remove a message from a queue.  The queue count and other
1859*0Sstevel@tonic-gate  * flow control parameters are adjusted and the back queue
1860*0Sstevel@tonic-gate  * enabled if necessary.
1861*0Sstevel@tonic-gate  *
1862*0Sstevel@tonic-gate  * rmvq can be called with the stream frozen, but other utility functions
1863*0Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
1864*0Sstevel@tonic-gate  */
1865*0Sstevel@tonic-gate void
1866*0Sstevel@tonic-gate rmvq(queue_t *q, mblk_t *mp)
1867*0Sstevel@tonic-gate {
1868*0Sstevel@tonic-gate 	ASSERT(mp != NULL);
1869*0Sstevel@tonic-gate 
1870*0Sstevel@tonic-gate 	rmvq_noenab(q, mp);
1871*0Sstevel@tonic-gate 	if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) {
1872*0Sstevel@tonic-gate 		/*
1873*0Sstevel@tonic-gate 		 * qbackenable can handle a frozen stream but not a "random"
1874*0Sstevel@tonic-gate 		 * qlock being held. Drop lock across qbackenable.
1875*0Sstevel@tonic-gate 		 */
1876*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
1877*0Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
1878*0Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
1879*0Sstevel@tonic-gate 	} else {
1880*0Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
1881*0Sstevel@tonic-gate 	}
1882*0Sstevel@tonic-gate }
1883*0Sstevel@tonic-gate 
1884*0Sstevel@tonic-gate /*
1885*0Sstevel@tonic-gate  * Like rmvq() but without any backenabling.
1886*0Sstevel@tonic-gate  * This exists to handle SR_CONSOL_DATA in strrput().
1887*0Sstevel@tonic-gate  */
1888*0Sstevel@tonic-gate void
1889*0Sstevel@tonic-gate rmvq_noenab(queue_t *q, mblk_t *mp)
1890*0Sstevel@tonic-gate {
1891*0Sstevel@tonic-gate 	mblk_t *tmp;
1892*0Sstevel@tonic-gate 	int i;
1893*0Sstevel@tonic-gate 	qband_t *qbp = NULL;
1894*0Sstevel@tonic-gate 	kthread_id_t freezer;
1895*0Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
1896*0Sstevel@tonic-gate 
1897*0Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
1898*0Sstevel@tonic-gate 	if (freezer == curthread) {
1899*0Sstevel@tonic-gate 		ASSERT(frozenstr(q));
1900*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
1901*0Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
1902*0Sstevel@tonic-gate 		/* Don't drop lock on exit */
1903*0Sstevel@tonic-gate 		freezer = curthread;
1904*0Sstevel@tonic-gate 	} else
1905*0Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
1906*0Sstevel@tonic-gate 
1907*0Sstevel@tonic-gate 	ASSERT(mp->b_band <= q->q_nband);
1908*0Sstevel@tonic-gate 	if (mp->b_band != 0) {		/* Adjust band pointers */
1909*0Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
1910*0Sstevel@tonic-gate 		qbp = q->q_bandp;
1911*0Sstevel@tonic-gate 		i = mp->b_band;
1912*0Sstevel@tonic-gate 		while (--i > 0)
1913*0Sstevel@tonic-gate 			qbp = qbp->qb_next;
1914*0Sstevel@tonic-gate 		if (mp == qbp->qb_first) {
1915*0Sstevel@tonic-gate 			if (mp->b_next && mp->b_band == mp->b_next->b_band)
1916*0Sstevel@tonic-gate 				qbp->qb_first = mp->b_next;
1917*0Sstevel@tonic-gate 			else
1918*0Sstevel@tonic-gate 				qbp->qb_first = NULL;
1919*0Sstevel@tonic-gate 		}
1920*0Sstevel@tonic-gate 		if (mp == qbp->qb_last) {
1921*0Sstevel@tonic-gate 			if (mp->b_prev && mp->b_band == mp->b_prev->b_band)
1922*0Sstevel@tonic-gate 				qbp->qb_last = mp->b_prev;
1923*0Sstevel@tonic-gate 			else
1924*0Sstevel@tonic-gate 				qbp->qb_last = NULL;
1925*0Sstevel@tonic-gate 		}
1926*0Sstevel@tonic-gate 	}
1927*0Sstevel@tonic-gate 
1928*0Sstevel@tonic-gate 	/*
1929*0Sstevel@tonic-gate 	 * Remove the message from the list.
1930*0Sstevel@tonic-gate 	 */
1931*0Sstevel@tonic-gate 	if (mp->b_prev)
1932*0Sstevel@tonic-gate 		mp->b_prev->b_next = mp->b_next;
1933*0Sstevel@tonic-gate 	else
1934*0Sstevel@tonic-gate 		q->q_first = mp->b_next;
1935*0Sstevel@tonic-gate 	if (mp->b_next)
1936*0Sstevel@tonic-gate 		mp->b_next->b_prev = mp->b_prev;
1937*0Sstevel@tonic-gate 	else
1938*0Sstevel@tonic-gate 		q->q_last = mp->b_prev;
1939*0Sstevel@tonic-gate 	mp->b_next = NULL;
1940*0Sstevel@tonic-gate 	mp->b_prev = NULL;
1941*0Sstevel@tonic-gate 
1942*0Sstevel@tonic-gate 	/* Get the size of the message for q_count accounting */
1943*0Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
1944*0Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
1945*0Sstevel@tonic-gate 		mblkcnt++;
1946*0Sstevel@tonic-gate 	}
1947*0Sstevel@tonic-gate 
1948*0Sstevel@tonic-gate 	if (mp->b_band == 0) {		/* Perform q_count accounting */
1949*0Sstevel@tonic-gate 		q->q_count -= bytecnt;
1950*0Sstevel@tonic-gate 		q->q_mblkcnt -= mblkcnt;
1951*0Sstevel@tonic-gate 		if ((q->q_count < q->q_hiwat) &&
1952*0Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_hiwat)) {
1953*0Sstevel@tonic-gate 			q->q_flag &= ~QFULL;
1954*0Sstevel@tonic-gate 		}
1955*0Sstevel@tonic-gate 	} else {			/* Perform qb_count accounting */
1956*0Sstevel@tonic-gate 		qbp->qb_count -= bytecnt;
1957*0Sstevel@tonic-gate 		qbp->qb_mblkcnt -= mblkcnt;
1958*0Sstevel@tonic-gate 		if ((qbp->qb_count < qbp->qb_hiwat) &&
1959*0Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_hiwat)) {
1960*0Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
1961*0Sstevel@tonic-gate 		}
1962*0Sstevel@tonic-gate 	}
1963*0Sstevel@tonic-gate 	if (freezer != curthread)
1964*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
1965*0Sstevel@tonic-gate 
1966*0Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, NULL);
1967*0Sstevel@tonic-gate }
1968*0Sstevel@tonic-gate 
1969*0Sstevel@tonic-gate /*
1970*0Sstevel@tonic-gate  * Empty a queue.
1971*0Sstevel@tonic-gate  * If flag is set, remove all messages.  Otherwise, remove
1972*0Sstevel@tonic-gate  * only non-control messages.  If queue falls below its low
1973*0Sstevel@tonic-gate  * water mark, and QWANTW is set, enable the nearest upstream
1974*0Sstevel@tonic-gate  * service procedure.
1975*0Sstevel@tonic-gate  *
1976*0Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
1977*0Sstevel@tonic-gate  * code one difference was discovered. flushq did not have a check
1978*0Sstevel@tonic-gate  * for q_lowat == 0 in the backenabling test.
1979*0Sstevel@tonic-gate  *
1980*0Sstevel@tonic-gate  * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed
1981*0Sstevel@tonic-gate  * if one exists on the queue.
1982*0Sstevel@tonic-gate  */
1983*0Sstevel@tonic-gate void
1984*0Sstevel@tonic-gate flushq_common(queue_t *q, int flag, int pcproto_flag)
1985*0Sstevel@tonic-gate {
1986*0Sstevel@tonic-gate 	mblk_t *mp, *nmp;
1987*0Sstevel@tonic-gate 	qband_t *qbp;
1988*0Sstevel@tonic-gate 	int backenab = 0;
1989*0Sstevel@tonic-gate 	unsigned char bpri;
1990*0Sstevel@tonic-gate 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
1991*0Sstevel@tonic-gate 
1992*0Sstevel@tonic-gate 	if (q->q_first == NULL)
1993*0Sstevel@tonic-gate 		return;
1994*0Sstevel@tonic-gate 
1995*0Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
1996*0Sstevel@tonic-gate 	mp = q->q_first;
1997*0Sstevel@tonic-gate 	q->q_first = NULL;
1998*0Sstevel@tonic-gate 	q->q_last = NULL;
1999*0Sstevel@tonic-gate 	q->q_count = 0;
2000*0Sstevel@tonic-gate 	q->q_mblkcnt = 0;
2001*0Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
2002*0Sstevel@tonic-gate 		qbp->qb_first = NULL;
2003*0Sstevel@tonic-gate 		qbp->qb_last = NULL;
2004*0Sstevel@tonic-gate 		qbp->qb_count = 0;
2005*0Sstevel@tonic-gate 		qbp->qb_mblkcnt = 0;
2006*0Sstevel@tonic-gate 		qbp->qb_flag &= ~QB_FULL;
2007*0Sstevel@tonic-gate 	}
2008*0Sstevel@tonic-gate 	q->q_flag &= ~QFULL;
2009*0Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2010*0Sstevel@tonic-gate 	while (mp) {
2011*0Sstevel@tonic-gate 		nmp = mp->b_next;
2012*0Sstevel@tonic-gate 		mp->b_next = mp->b_prev = NULL;
2013*0Sstevel@tonic-gate 
2014*0Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, NULL);
2015*0Sstevel@tonic-gate 
2016*0Sstevel@tonic-gate 		if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO))
2017*0Sstevel@tonic-gate 			(void) putq(q, mp);
2018*0Sstevel@tonic-gate 		else if (flag || datamsg(mp->b_datap->db_type))
2019*0Sstevel@tonic-gate 			freemsg(mp);
2020*0Sstevel@tonic-gate 		else
2021*0Sstevel@tonic-gate 			(void) putq(q, mp);
2022*0Sstevel@tonic-gate 		mp = nmp;
2023*0Sstevel@tonic-gate 	}
2024*0Sstevel@tonic-gate 	bpri = 1;
2025*0Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2026*0Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
2027*0Sstevel@tonic-gate 		if ((qbp->qb_flag & QB_WANTW) &&
2028*0Sstevel@tonic-gate 		    (((qbp->qb_count < qbp->qb_lowat) &&
2029*0Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_lowat)) ||
2030*0Sstevel@tonic-gate 		    qbp->qb_lowat == 0)) {
2031*0Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
2032*0Sstevel@tonic-gate 			backenab = 1;
2033*0Sstevel@tonic-gate 			qbf[bpri] = 1;
2034*0Sstevel@tonic-gate 		} else
2035*0Sstevel@tonic-gate 			qbf[bpri] = 0;
2036*0Sstevel@tonic-gate 		bpri++;
2037*0Sstevel@tonic-gate 	}
2038*0Sstevel@tonic-gate 	ASSERT(bpri == (unsigned char)(q->q_nband + 1));
2039*0Sstevel@tonic-gate 	if ((q->q_flag & QWANTW) &&
2040*0Sstevel@tonic-gate 	    (((q->q_count < q->q_lowat) &&
2041*0Sstevel@tonic-gate 	    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
2042*0Sstevel@tonic-gate 		q->q_flag &= ~QWANTW;
2043*0Sstevel@tonic-gate 		backenab = 1;
2044*0Sstevel@tonic-gate 		qbf[0] = 1;
2045*0Sstevel@tonic-gate 	} else
2046*0Sstevel@tonic-gate 		qbf[0] = 0;
2047*0Sstevel@tonic-gate 
2048*0Sstevel@tonic-gate 	/*
2049*0Sstevel@tonic-gate 	 * If any band can now be written to, and there is a writer
2050*0Sstevel@tonic-gate 	 * for that band, then backenable the closest service procedure.
2051*0Sstevel@tonic-gate 	 */
2052*0Sstevel@tonic-gate 	if (backenab) {
2053*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2054*0Sstevel@tonic-gate 		for (bpri = q->q_nband; bpri != 0; bpri--)
2055*0Sstevel@tonic-gate 			if (qbf[bpri])
2056*0Sstevel@tonic-gate 				backenable(q, (int)bpri);
2057*0Sstevel@tonic-gate 		if (qbf[0])
2058*0Sstevel@tonic-gate 			backenable(q, 0);
2059*0Sstevel@tonic-gate 	} else
2060*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2061*0Sstevel@tonic-gate }
2062*0Sstevel@tonic-gate 
2063*0Sstevel@tonic-gate /*
2064*0Sstevel@tonic-gate  * The real flushing takes place in flushq_common. This is done so that
2065*0Sstevel@tonic-gate  * a flag which specifies whether or not M_PCPROTO messages should be flushed
2066*0Sstevel@tonic-gate  * or not. Currently the only place that uses this flag is the stream head.
2067*0Sstevel@tonic-gate  */
2068*0Sstevel@tonic-gate void
2069*0Sstevel@tonic-gate flushq(queue_t *q, int flag)
2070*0Sstevel@tonic-gate {
2071*0Sstevel@tonic-gate 	flushq_common(q, flag, 0);
2072*0Sstevel@tonic-gate }
2073*0Sstevel@tonic-gate 
2074*0Sstevel@tonic-gate /*
2075*0Sstevel@tonic-gate  * Flush the queue of messages of the given priority band.
2076*0Sstevel@tonic-gate  * There is some duplication of code between flushq and flushband.
2077*0Sstevel@tonic-gate  * This is because we want to optimize the code as much as possible.
2078*0Sstevel@tonic-gate  * The assumption is that there will be more messages in the normal
2079*0Sstevel@tonic-gate  * (priority 0) band than in any other.
2080*0Sstevel@tonic-gate  *
2081*0Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
2082*0Sstevel@tonic-gate  * code one difference was discovered. flushband had an extra check for
2083*0Sstevel@tonic-gate  * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0
2084*0Sstevel@tonic-gate  * case. That check does not match the man page for flushband and was not
2085*0Sstevel@tonic-gate  * in the strrput flush code hence it was removed.
2086*0Sstevel@tonic-gate  */
2087*0Sstevel@tonic-gate void
2088*0Sstevel@tonic-gate flushband(queue_t *q, unsigned char pri, int flag)
2089*0Sstevel@tonic-gate {
2090*0Sstevel@tonic-gate 	mblk_t *mp;
2091*0Sstevel@tonic-gate 	mblk_t *nmp;
2092*0Sstevel@tonic-gate 	mblk_t *last;
2093*0Sstevel@tonic-gate 	qband_t *qbp;
2094*0Sstevel@tonic-gate 	int band;
2095*0Sstevel@tonic-gate 
2096*0Sstevel@tonic-gate 	ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL));
2097*0Sstevel@tonic-gate 	if (pri > q->q_nband) {
2098*0Sstevel@tonic-gate 		return;
2099*0Sstevel@tonic-gate 	}
2100*0Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2101*0Sstevel@tonic-gate 	if (pri == 0) {
2102*0Sstevel@tonic-gate 		mp = q->q_first;
2103*0Sstevel@tonic-gate 		q->q_first = NULL;
2104*0Sstevel@tonic-gate 		q->q_last = NULL;
2105*0Sstevel@tonic-gate 		q->q_count = 0;
2106*0Sstevel@tonic-gate 		q->q_mblkcnt = 0;
2107*0Sstevel@tonic-gate 		for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
2108*0Sstevel@tonic-gate 			qbp->qb_first = NULL;
2109*0Sstevel@tonic-gate 			qbp->qb_last = NULL;
2110*0Sstevel@tonic-gate 			qbp->qb_count = 0;
2111*0Sstevel@tonic-gate 			qbp->qb_mblkcnt = 0;
2112*0Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
2113*0Sstevel@tonic-gate 		}
2114*0Sstevel@tonic-gate 		q->q_flag &= ~QFULL;
2115*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2116*0Sstevel@tonic-gate 		while (mp) {
2117*0Sstevel@tonic-gate 			nmp = mp->b_next;
2118*0Sstevel@tonic-gate 			mp->b_next = mp->b_prev = NULL;
2119*0Sstevel@tonic-gate 			if ((mp->b_band == 0) &&
2120*0Sstevel@tonic-gate 				((flag == FLUSHALL) ||
2121*0Sstevel@tonic-gate 				datamsg(mp->b_datap->db_type)))
2122*0Sstevel@tonic-gate 				freemsg(mp);
2123*0Sstevel@tonic-gate 			else
2124*0Sstevel@tonic-gate 				(void) putq(q, mp);
2125*0Sstevel@tonic-gate 			mp = nmp;
2126*0Sstevel@tonic-gate 		}
2127*0Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
2128*0Sstevel@tonic-gate 		if ((q->q_flag & QWANTW) &&
2129*0Sstevel@tonic-gate 		    (((q->q_count < q->q_lowat) &&
2130*0Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
2131*0Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
2132*0Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2133*0Sstevel@tonic-gate 
2134*0Sstevel@tonic-gate 			backenable(q, (int)pri);
2135*0Sstevel@tonic-gate 		} else
2136*0Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2137*0Sstevel@tonic-gate 	} else {	/* pri != 0 */
2138*0Sstevel@tonic-gate 		boolean_t flushed = B_FALSE;
2139*0Sstevel@tonic-gate 		band = pri;
2140*0Sstevel@tonic-gate 
2141*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2142*0Sstevel@tonic-gate 		qbp = q->q_bandp;
2143*0Sstevel@tonic-gate 		while (--band > 0)
2144*0Sstevel@tonic-gate 			qbp = qbp->qb_next;
2145*0Sstevel@tonic-gate 		mp = qbp->qb_first;
2146*0Sstevel@tonic-gate 		if (mp == NULL) {
2147*0Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2148*0Sstevel@tonic-gate 			return;
2149*0Sstevel@tonic-gate 		}
2150*0Sstevel@tonic-gate 		last = qbp->qb_last->b_next;
2151*0Sstevel@tonic-gate 		/*
2152*0Sstevel@tonic-gate 		 * rmvq_noenab() and freemsg() are called for each mblk that
2153*0Sstevel@tonic-gate 		 * meets the criteria.  The loop is executed until the last
2154*0Sstevel@tonic-gate 		 * mblk has been processed.
2155*0Sstevel@tonic-gate 		 */
2156*0Sstevel@tonic-gate 		while (mp != last) {
2157*0Sstevel@tonic-gate 			ASSERT(mp->b_band == pri);
2158*0Sstevel@tonic-gate 			nmp = mp->b_next;
2159*0Sstevel@tonic-gate 			if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) {
2160*0Sstevel@tonic-gate 				rmvq_noenab(q, mp);
2161*0Sstevel@tonic-gate 				freemsg(mp);
2162*0Sstevel@tonic-gate 				flushed = B_TRUE;
2163*0Sstevel@tonic-gate 			}
2164*0Sstevel@tonic-gate 			mp = nmp;
2165*0Sstevel@tonic-gate 		}
2166*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2167*0Sstevel@tonic-gate 
2168*0Sstevel@tonic-gate 		/*
2169*0Sstevel@tonic-gate 		 * If any mblk(s) has been freed, we know that qbackenable()
2170*0Sstevel@tonic-gate 		 * will need to be called.
2171*0Sstevel@tonic-gate 		 */
2172*0Sstevel@tonic-gate 		if (flushed)
2173*0Sstevel@tonic-gate 			qbackenable(q, (int)pri);
2174*0Sstevel@tonic-gate 	}
2175*0Sstevel@tonic-gate }
2176*0Sstevel@tonic-gate 
2177*0Sstevel@tonic-gate /*
2178*0Sstevel@tonic-gate  * Return 1 if the queue is not full.  If the queue is full, return
2179*0Sstevel@tonic-gate  * 0 (may not put message) and set QWANTW flag (caller wants to write
2180*0Sstevel@tonic-gate  * to the queue).
2181*0Sstevel@tonic-gate  */
2182*0Sstevel@tonic-gate int
2183*0Sstevel@tonic-gate canput(queue_t *q)
2184*0Sstevel@tonic-gate {
2185*0Sstevel@tonic-gate 	TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q);
2186*0Sstevel@tonic-gate 
2187*0Sstevel@tonic-gate 	/* this is for loopback transports, they should not do a canput */
2188*0Sstevel@tonic-gate 	ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv));
2189*0Sstevel@tonic-gate 
2190*0Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
2191*0Sstevel@tonic-gate 	q = q->q_nfsrv;
2192*0Sstevel@tonic-gate 
2193*0Sstevel@tonic-gate 	if (!(q->q_flag & QFULL)) {
2194*0Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
2195*0Sstevel@tonic-gate 		return (1);
2196*0Sstevel@tonic-gate 	}
2197*0Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2198*0Sstevel@tonic-gate 	if (q->q_flag & QFULL) {
2199*0Sstevel@tonic-gate 		q->q_flag |= QWANTW;
2200*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2201*0Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0);
2202*0Sstevel@tonic-gate 		return (0);
2203*0Sstevel@tonic-gate 	}
2204*0Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2205*0Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
2206*0Sstevel@tonic-gate 	return (1);
2207*0Sstevel@tonic-gate }
2208*0Sstevel@tonic-gate 
2209*0Sstevel@tonic-gate /*
2210*0Sstevel@tonic-gate  * This is the new canput for use with priority bands.  Return 1 if the
2211*0Sstevel@tonic-gate  * band is not full.  If the band is full, return 0 (may not put message)
2212*0Sstevel@tonic-gate  * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to
2213*0Sstevel@tonic-gate  * write to the queue).
2214*0Sstevel@tonic-gate  */
2215*0Sstevel@tonic-gate int
2216*0Sstevel@tonic-gate bcanput(queue_t *q, unsigned char pri)
2217*0Sstevel@tonic-gate {
2218*0Sstevel@tonic-gate 	qband_t *qbp;
2219*0Sstevel@tonic-gate 
2220*0Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri);
2221*0Sstevel@tonic-gate 	if (!q)
2222*0Sstevel@tonic-gate 		return (0);
2223*0Sstevel@tonic-gate 
2224*0Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
2225*0Sstevel@tonic-gate 	q = q->q_nfsrv;
2226*0Sstevel@tonic-gate 
2227*0Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2228*0Sstevel@tonic-gate 	if (pri == 0) {
2229*0Sstevel@tonic-gate 		if (q->q_flag & QFULL) {
2230*0Sstevel@tonic-gate 			q->q_flag |= QWANTW;
2231*0Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2232*0Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
2233*0Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 0);
2234*0Sstevel@tonic-gate 			return (0);
2235*0Sstevel@tonic-gate 		}
2236*0Sstevel@tonic-gate 	} else {	/* pri != 0 */
2237*0Sstevel@tonic-gate 		if (pri > q->q_nband) {
2238*0Sstevel@tonic-gate 			/*
2239*0Sstevel@tonic-gate 			 * No band exists yet, so return success.
2240*0Sstevel@tonic-gate 			 */
2241*0Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2242*0Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
2243*0Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 1);
2244*0Sstevel@tonic-gate 			return (1);
2245*0Sstevel@tonic-gate 		}
2246*0Sstevel@tonic-gate 		qbp = q->q_bandp;
2247*0Sstevel@tonic-gate 		while (--pri)
2248*0Sstevel@tonic-gate 			qbp = qbp->qb_next;
2249*0Sstevel@tonic-gate 		if (qbp->qb_flag & QB_FULL) {
2250*0Sstevel@tonic-gate 			qbp->qb_flag |= QB_WANTW;
2251*0Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2252*0Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
2253*0Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 0);
2254*0Sstevel@tonic-gate 			return (0);
2255*0Sstevel@tonic-gate 		}
2256*0Sstevel@tonic-gate 	}
2257*0Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2258*0Sstevel@tonic-gate 	TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
2259*0Sstevel@tonic-gate 		"bcanput:%p %X %d", q, pri, 1);
2260*0Sstevel@tonic-gate 	return (1);
2261*0Sstevel@tonic-gate }
2262*0Sstevel@tonic-gate 
2263*0Sstevel@tonic-gate /*
2264*0Sstevel@tonic-gate  * Put a message on a queue.
2265*0Sstevel@tonic-gate  *
2266*0Sstevel@tonic-gate  * Messages are enqueued on a priority basis.  The priority classes
2267*0Sstevel@tonic-gate  * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0),
2268*0Sstevel@tonic-gate  * and B_NORMAL (type < QPCTL && band == 0).
2269*0Sstevel@tonic-gate  *
2270*0Sstevel@tonic-gate  * Add appropriate weighted data block sizes to queue count.
2271*0Sstevel@tonic-gate  * If queue hits high water mark then set QFULL flag.
2272*0Sstevel@tonic-gate  *
2273*0Sstevel@tonic-gate  * If QNOENAB is not set (putq is allowed to enable the queue),
2274*0Sstevel@tonic-gate  * enable the queue only if the message is PRIORITY,
2275*0Sstevel@tonic-gate  * or the QWANTR flag is set (indicating that the service procedure
2276*0Sstevel@tonic-gate  * is ready to read the queue.  This implies that a service
2277*0Sstevel@tonic-gate  * procedure must NEVER put a high priority message back on its own
2278*0Sstevel@tonic-gate  * queue, as this would result in an infinite loop (!).
2279*0Sstevel@tonic-gate  */
2280*0Sstevel@tonic-gate int
2281*0Sstevel@tonic-gate putq(queue_t *q, mblk_t *bp)
2282*0Sstevel@tonic-gate {
2283*0Sstevel@tonic-gate 	mblk_t *tmp;
2284*0Sstevel@tonic-gate 	qband_t *qbp = NULL;
2285*0Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
2286*0Sstevel@tonic-gate 	kthread_id_t freezer;
2287*0Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
2288*0Sstevel@tonic-gate 
2289*0Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
2290*0Sstevel@tonic-gate 	if (freezer == curthread) {
2291*0Sstevel@tonic-gate 		ASSERT(frozenstr(q));
2292*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2293*0Sstevel@tonic-gate 	} else
2294*0Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
2295*0Sstevel@tonic-gate 
2296*0Sstevel@tonic-gate 	/*
2297*0Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
2298*0Sstevel@tonic-gate 	 * allocated, do so.
2299*0Sstevel@tonic-gate 	 */
2300*0Sstevel@tonic-gate 	if (mcls == QPCTL) {
2301*0Sstevel@tonic-gate 		if (bp->b_band != 0)
2302*0Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
2303*0Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
2304*0Sstevel@tonic-gate 		int i;
2305*0Sstevel@tonic-gate 		qband_t **qbpp;
2306*0Sstevel@tonic-gate 
2307*0Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
2308*0Sstevel@tonic-gate 
2309*0Sstevel@tonic-gate 			/*
2310*0Sstevel@tonic-gate 			 * The qband structure for this priority band is
2311*0Sstevel@tonic-gate 			 * not on the queue yet, so we have to allocate
2312*0Sstevel@tonic-gate 			 * one on the fly.  It would be wasteful to
2313*0Sstevel@tonic-gate 			 * associate the qband structures with every
2314*0Sstevel@tonic-gate 			 * queue when the queues are allocated.  This is
2315*0Sstevel@tonic-gate 			 * because most queues will only need the normal
2316*0Sstevel@tonic-gate 			 * band of flow which can be described entirely
2317*0Sstevel@tonic-gate 			 * by the queue itself.
2318*0Sstevel@tonic-gate 			 */
2319*0Sstevel@tonic-gate 			qbpp = &q->q_bandp;
2320*0Sstevel@tonic-gate 			while (*qbpp)
2321*0Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2322*0Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
2323*0Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
2324*0Sstevel@tonic-gate 					if (freezer != curthread)
2325*0Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
2326*0Sstevel@tonic-gate 					return (0);
2327*0Sstevel@tonic-gate 				}
2328*0Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
2329*0Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
2330*0Sstevel@tonic-gate 				q->q_nband++;
2331*0Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2332*0Sstevel@tonic-gate 			}
2333*0Sstevel@tonic-gate 		}
2334*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2335*0Sstevel@tonic-gate 		qbp = q->q_bandp;
2336*0Sstevel@tonic-gate 		i = bp->b_band;
2337*0Sstevel@tonic-gate 		while (--i)
2338*0Sstevel@tonic-gate 			qbp = qbp->qb_next;
2339*0Sstevel@tonic-gate 	}
2340*0Sstevel@tonic-gate 
2341*0Sstevel@tonic-gate 	/*
2342*0Sstevel@tonic-gate 	 * If queue is empty, add the message and initialize the pointers.
2343*0Sstevel@tonic-gate 	 * Otherwise, adjust message pointers and queue pointers based on
2344*0Sstevel@tonic-gate 	 * the type of the message and where it belongs on the queue.  Some
2345*0Sstevel@tonic-gate 	 * code is duplicated to minimize the number of conditionals and
2346*0Sstevel@tonic-gate 	 * hopefully minimize the amount of time this routine takes.
2347*0Sstevel@tonic-gate 	 */
2348*0Sstevel@tonic-gate 	if (!q->q_first) {
2349*0Sstevel@tonic-gate 		bp->b_next = NULL;
2350*0Sstevel@tonic-gate 		bp->b_prev = NULL;
2351*0Sstevel@tonic-gate 		q->q_first = bp;
2352*0Sstevel@tonic-gate 		q->q_last = bp;
2353*0Sstevel@tonic-gate 		if (qbp) {
2354*0Sstevel@tonic-gate 			qbp->qb_first = bp;
2355*0Sstevel@tonic-gate 			qbp->qb_last = bp;
2356*0Sstevel@tonic-gate 		}
2357*0Sstevel@tonic-gate 	} else if (!qbp) {	/* bp->b_band == 0 */
2358*0Sstevel@tonic-gate 
2359*0Sstevel@tonic-gate 		/*
2360*0Sstevel@tonic-gate 		 * If queue class of message is less than or equal to
2361*0Sstevel@tonic-gate 		 * that of the last one on the queue, tack on to the end.
2362*0Sstevel@tonic-gate 		 */
2363*0Sstevel@tonic-gate 		tmp = q->q_last;
2364*0Sstevel@tonic-gate 		if (mcls <= (int)queclass(tmp)) {
2365*0Sstevel@tonic-gate 			bp->b_next = NULL;
2366*0Sstevel@tonic-gate 			bp->b_prev = tmp;
2367*0Sstevel@tonic-gate 			tmp->b_next = bp;
2368*0Sstevel@tonic-gate 			q->q_last = bp;
2369*0Sstevel@tonic-gate 		} else {
2370*0Sstevel@tonic-gate 			tmp = q->q_first;
2371*0Sstevel@tonic-gate 			while ((int)queclass(tmp) >= mcls)
2372*0Sstevel@tonic-gate 				tmp = tmp->b_next;
2373*0Sstevel@tonic-gate 
2374*0Sstevel@tonic-gate 			/*
2375*0Sstevel@tonic-gate 			 * Insert bp before tmp.
2376*0Sstevel@tonic-gate 			 */
2377*0Sstevel@tonic-gate 			bp->b_next = tmp;
2378*0Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
2379*0Sstevel@tonic-gate 			if (tmp->b_prev)
2380*0Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
2381*0Sstevel@tonic-gate 			else
2382*0Sstevel@tonic-gate 				q->q_first = bp;
2383*0Sstevel@tonic-gate 			tmp->b_prev = bp;
2384*0Sstevel@tonic-gate 		}
2385*0Sstevel@tonic-gate 	} else {		/* bp->b_band != 0 */
2386*0Sstevel@tonic-gate 		if (qbp->qb_first) {
2387*0Sstevel@tonic-gate 			tmp = qbp->qb_last;
2388*0Sstevel@tonic-gate 
2389*0Sstevel@tonic-gate 			/*
2390*0Sstevel@tonic-gate 			 * Insert bp after the last message in this band.
2391*0Sstevel@tonic-gate 			 */
2392*0Sstevel@tonic-gate 			bp->b_next = tmp->b_next;
2393*0Sstevel@tonic-gate 			if (tmp->b_next)
2394*0Sstevel@tonic-gate 				tmp->b_next->b_prev = bp;
2395*0Sstevel@tonic-gate 			else
2396*0Sstevel@tonic-gate 				q->q_last = bp;
2397*0Sstevel@tonic-gate 			bp->b_prev = tmp;
2398*0Sstevel@tonic-gate 			tmp->b_next = bp;
2399*0Sstevel@tonic-gate 		} else {
2400*0Sstevel@tonic-gate 			tmp = q->q_last;
2401*0Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
2402*0Sstevel@tonic-gate 			    (bp->b_band <= tmp->b_band)) {
2403*0Sstevel@tonic-gate 
2404*0Sstevel@tonic-gate 				/*
2405*0Sstevel@tonic-gate 				 * Tack bp on end of queue.
2406*0Sstevel@tonic-gate 				 */
2407*0Sstevel@tonic-gate 				bp->b_next = NULL;
2408*0Sstevel@tonic-gate 				bp->b_prev = tmp;
2409*0Sstevel@tonic-gate 				tmp->b_next = bp;
2410*0Sstevel@tonic-gate 				q->q_last = bp;
2411*0Sstevel@tonic-gate 			} else {
2412*0Sstevel@tonic-gate 				tmp = q->q_first;
2413*0Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
2414*0Sstevel@tonic-gate 					tmp = tmp->b_next;
2415*0Sstevel@tonic-gate 				while (tmp->b_band >= bp->b_band)
2416*0Sstevel@tonic-gate 					tmp = tmp->b_next;
2417*0Sstevel@tonic-gate 
2418*0Sstevel@tonic-gate 				/*
2419*0Sstevel@tonic-gate 				 * Insert bp before tmp.
2420*0Sstevel@tonic-gate 				 */
2421*0Sstevel@tonic-gate 				bp->b_next = tmp;
2422*0Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
2423*0Sstevel@tonic-gate 				if (tmp->b_prev)
2424*0Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
2425*0Sstevel@tonic-gate 				else
2426*0Sstevel@tonic-gate 					q->q_first = bp;
2427*0Sstevel@tonic-gate 				tmp->b_prev = bp;
2428*0Sstevel@tonic-gate 			}
2429*0Sstevel@tonic-gate 			qbp->qb_first = bp;
2430*0Sstevel@tonic-gate 		}
2431*0Sstevel@tonic-gate 		qbp->qb_last = bp;
2432*0Sstevel@tonic-gate 	}
2433*0Sstevel@tonic-gate 
2434*0Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
2435*0Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
2436*0Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
2437*0Sstevel@tonic-gate 		mblkcnt++;
2438*0Sstevel@tonic-gate 	}
2439*0Sstevel@tonic-gate 	if (qbp) {
2440*0Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
2441*0Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
2442*0Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
2443*0Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
2444*0Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
2445*0Sstevel@tonic-gate 		}
2446*0Sstevel@tonic-gate 	} else {
2447*0Sstevel@tonic-gate 		q->q_count += bytecnt;
2448*0Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
2449*0Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
2450*0Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
2451*0Sstevel@tonic-gate 			q->q_flag |= QFULL;
2452*0Sstevel@tonic-gate 		}
2453*0Sstevel@tonic-gate 	}
2454*0Sstevel@tonic-gate 
2455*0Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, NULL);
2456*0Sstevel@tonic-gate 
2457*0Sstevel@tonic-gate 	if ((mcls > QNORM) ||
2458*0Sstevel@tonic-gate 	    (canenable(q) && (q->q_flag & QWANTR || bp->b_band)))
2459*0Sstevel@tonic-gate 		qenable_locked(q);
2460*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
2461*0Sstevel@tonic-gate 	if (freezer != curthread)
2462*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2463*0Sstevel@tonic-gate 
2464*0Sstevel@tonic-gate 	return (1);
2465*0Sstevel@tonic-gate }
2466*0Sstevel@tonic-gate 
2467*0Sstevel@tonic-gate /*
2468*0Sstevel@tonic-gate  * Put stuff back at beginning of Q according to priority order.
2469*0Sstevel@tonic-gate  * See comment on putq above for details.
2470*0Sstevel@tonic-gate  */
2471*0Sstevel@tonic-gate int
2472*0Sstevel@tonic-gate putbq(queue_t *q, mblk_t *bp)
2473*0Sstevel@tonic-gate {
2474*0Sstevel@tonic-gate 	mblk_t *tmp;
2475*0Sstevel@tonic-gate 	qband_t *qbp = NULL;
2476*0Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
2477*0Sstevel@tonic-gate 	kthread_id_t freezer;
2478*0Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
2479*0Sstevel@tonic-gate 
2480*0Sstevel@tonic-gate 	ASSERT(q && bp);
2481*0Sstevel@tonic-gate 	ASSERT(bp->b_next == NULL);
2482*0Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
2483*0Sstevel@tonic-gate 	if (freezer == curthread) {
2484*0Sstevel@tonic-gate 		ASSERT(frozenstr(q));
2485*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2486*0Sstevel@tonic-gate 	} else
2487*0Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
2488*0Sstevel@tonic-gate 
2489*0Sstevel@tonic-gate 	/*
2490*0Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
2491*0Sstevel@tonic-gate 	 * allocated, do so.
2492*0Sstevel@tonic-gate 	 */
2493*0Sstevel@tonic-gate 	if (mcls == QPCTL) {
2494*0Sstevel@tonic-gate 		if (bp->b_band != 0)
2495*0Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
2496*0Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
2497*0Sstevel@tonic-gate 		int i;
2498*0Sstevel@tonic-gate 		qband_t **qbpp;
2499*0Sstevel@tonic-gate 
2500*0Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
2501*0Sstevel@tonic-gate 			qbpp = &q->q_bandp;
2502*0Sstevel@tonic-gate 			while (*qbpp)
2503*0Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2504*0Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
2505*0Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
2506*0Sstevel@tonic-gate 					if (freezer != curthread)
2507*0Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
2508*0Sstevel@tonic-gate 					return (0);
2509*0Sstevel@tonic-gate 				}
2510*0Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
2511*0Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
2512*0Sstevel@tonic-gate 				q->q_nband++;
2513*0Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2514*0Sstevel@tonic-gate 			}
2515*0Sstevel@tonic-gate 		}
2516*0Sstevel@tonic-gate 		qbp = q->q_bandp;
2517*0Sstevel@tonic-gate 		i = bp->b_band;
2518*0Sstevel@tonic-gate 		while (--i)
2519*0Sstevel@tonic-gate 			qbp = qbp->qb_next;
2520*0Sstevel@tonic-gate 	}
2521*0Sstevel@tonic-gate 
2522*0Sstevel@tonic-gate 	/*
2523*0Sstevel@tonic-gate 	 * If queue is empty or if message is high priority,
2524*0Sstevel@tonic-gate 	 * place on the front of the queue.
2525*0Sstevel@tonic-gate 	 */
2526*0Sstevel@tonic-gate 	tmp = q->q_first;
2527*0Sstevel@tonic-gate 	if ((!tmp) || (mcls == QPCTL)) {
2528*0Sstevel@tonic-gate 		bp->b_next = tmp;
2529*0Sstevel@tonic-gate 		if (tmp)
2530*0Sstevel@tonic-gate 			tmp->b_prev = bp;
2531*0Sstevel@tonic-gate 		else
2532*0Sstevel@tonic-gate 			q->q_last = bp;
2533*0Sstevel@tonic-gate 		q->q_first = bp;
2534*0Sstevel@tonic-gate 		bp->b_prev = NULL;
2535*0Sstevel@tonic-gate 		if (qbp) {
2536*0Sstevel@tonic-gate 			qbp->qb_first = bp;
2537*0Sstevel@tonic-gate 			qbp->qb_last = bp;
2538*0Sstevel@tonic-gate 		}
2539*0Sstevel@tonic-gate 	} else if (qbp) {	/* bp->b_band != 0 */
2540*0Sstevel@tonic-gate 		tmp = qbp->qb_first;
2541*0Sstevel@tonic-gate 		if (tmp) {
2542*0Sstevel@tonic-gate 
2543*0Sstevel@tonic-gate 			/*
2544*0Sstevel@tonic-gate 			 * Insert bp before the first message in this band.
2545*0Sstevel@tonic-gate 			 */
2546*0Sstevel@tonic-gate 			bp->b_next = tmp;
2547*0Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
2548*0Sstevel@tonic-gate 			if (tmp->b_prev)
2549*0Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
2550*0Sstevel@tonic-gate 			else
2551*0Sstevel@tonic-gate 				q->q_first = bp;
2552*0Sstevel@tonic-gate 			tmp->b_prev = bp;
2553*0Sstevel@tonic-gate 		} else {
2554*0Sstevel@tonic-gate 			tmp = q->q_last;
2555*0Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
2556*0Sstevel@tonic-gate 			    (bp->b_band < tmp->b_band)) {
2557*0Sstevel@tonic-gate 
2558*0Sstevel@tonic-gate 				/*
2559*0Sstevel@tonic-gate 				 * Tack bp on end of queue.
2560*0Sstevel@tonic-gate 				 */
2561*0Sstevel@tonic-gate 				bp->b_next = NULL;
2562*0Sstevel@tonic-gate 				bp->b_prev = tmp;
2563*0Sstevel@tonic-gate 				tmp->b_next = bp;
2564*0Sstevel@tonic-gate 				q->q_last = bp;
2565*0Sstevel@tonic-gate 			} else {
2566*0Sstevel@tonic-gate 				tmp = q->q_first;
2567*0Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
2568*0Sstevel@tonic-gate 					tmp = tmp->b_next;
2569*0Sstevel@tonic-gate 				while (tmp->b_band > bp->b_band)
2570*0Sstevel@tonic-gate 					tmp = tmp->b_next;
2571*0Sstevel@tonic-gate 
2572*0Sstevel@tonic-gate 				/*
2573*0Sstevel@tonic-gate 				 * Insert bp before tmp.
2574*0Sstevel@tonic-gate 				 */
2575*0Sstevel@tonic-gate 				bp->b_next = tmp;
2576*0Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
2577*0Sstevel@tonic-gate 				if (tmp->b_prev)
2578*0Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
2579*0Sstevel@tonic-gate 				else
2580*0Sstevel@tonic-gate 					q->q_first = bp;
2581*0Sstevel@tonic-gate 				tmp->b_prev = bp;
2582*0Sstevel@tonic-gate 			}
2583*0Sstevel@tonic-gate 			qbp->qb_last = bp;
2584*0Sstevel@tonic-gate 		}
2585*0Sstevel@tonic-gate 		qbp->qb_first = bp;
2586*0Sstevel@tonic-gate 	} else {		/* bp->b_band == 0 && !QPCTL */
2587*0Sstevel@tonic-gate 
2588*0Sstevel@tonic-gate 		/*
2589*0Sstevel@tonic-gate 		 * If the queue class or band is less than that of the last
2590*0Sstevel@tonic-gate 		 * message on the queue, tack bp on the end of the queue.
2591*0Sstevel@tonic-gate 		 */
2592*0Sstevel@tonic-gate 		tmp = q->q_last;
2593*0Sstevel@tonic-gate 		if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) {
2594*0Sstevel@tonic-gate 			bp->b_next = NULL;
2595*0Sstevel@tonic-gate 			bp->b_prev = tmp;
2596*0Sstevel@tonic-gate 			tmp->b_next = bp;
2597*0Sstevel@tonic-gate 			q->q_last = bp;
2598*0Sstevel@tonic-gate 		} else {
2599*0Sstevel@tonic-gate 			tmp = q->q_first;
2600*0Sstevel@tonic-gate 			while (tmp->b_datap->db_type >= QPCTL)
2601*0Sstevel@tonic-gate 				tmp = tmp->b_next;
2602*0Sstevel@tonic-gate 			while (tmp->b_band > bp->b_band)
2603*0Sstevel@tonic-gate 				tmp = tmp->b_next;
2604*0Sstevel@tonic-gate 
2605*0Sstevel@tonic-gate 			/*
2606*0Sstevel@tonic-gate 			 * Insert bp before tmp.
2607*0Sstevel@tonic-gate 			 */
2608*0Sstevel@tonic-gate 			bp->b_next = tmp;
2609*0Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
2610*0Sstevel@tonic-gate 			if (tmp->b_prev)
2611*0Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
2612*0Sstevel@tonic-gate 			else
2613*0Sstevel@tonic-gate 				q->q_first = bp;
2614*0Sstevel@tonic-gate 			tmp->b_prev = bp;
2615*0Sstevel@tonic-gate 		}
2616*0Sstevel@tonic-gate 	}
2617*0Sstevel@tonic-gate 
2618*0Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
2619*0Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
2620*0Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
2621*0Sstevel@tonic-gate 		mblkcnt++;
2622*0Sstevel@tonic-gate 	}
2623*0Sstevel@tonic-gate 	if (qbp) {
2624*0Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
2625*0Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
2626*0Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
2627*0Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
2628*0Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
2629*0Sstevel@tonic-gate 		}
2630*0Sstevel@tonic-gate 	} else {
2631*0Sstevel@tonic-gate 		q->q_count += bytecnt;
2632*0Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
2633*0Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
2634*0Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
2635*0Sstevel@tonic-gate 			q->q_flag |= QFULL;
2636*0Sstevel@tonic-gate 		}
2637*0Sstevel@tonic-gate 	}
2638*0Sstevel@tonic-gate 
2639*0Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, NULL);
2640*0Sstevel@tonic-gate 
2641*0Sstevel@tonic-gate 	if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR)))
2642*0Sstevel@tonic-gate 		qenable_locked(q);
2643*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
2644*0Sstevel@tonic-gate 	if (freezer != curthread)
2645*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2646*0Sstevel@tonic-gate 
2647*0Sstevel@tonic-gate 	return (1);
2648*0Sstevel@tonic-gate }
2649*0Sstevel@tonic-gate 
2650*0Sstevel@tonic-gate /*
2651*0Sstevel@tonic-gate  * Insert a message before an existing message on the queue.  If the
2652*0Sstevel@tonic-gate  * existing message is NULL, the new messages is placed on the end of
2653*0Sstevel@tonic-gate  * the queue.  The queue class of the new message is ignored.  However,
2654*0Sstevel@tonic-gate  * the priority band of the new message must adhere to the following
2655*0Sstevel@tonic-gate  * ordering:
2656*0Sstevel@tonic-gate  *
2657*0Sstevel@tonic-gate  *	emp->b_prev->b_band >= mp->b_band >= emp->b_band.
2658*0Sstevel@tonic-gate  *
2659*0Sstevel@tonic-gate  * All flow control parameters are updated.
2660*0Sstevel@tonic-gate  *
2661*0Sstevel@tonic-gate  * insq can be called with the stream frozen, but other utility functions
2662*0Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
2663*0Sstevel@tonic-gate  */
2664*0Sstevel@tonic-gate int
2665*0Sstevel@tonic-gate insq(queue_t *q, mblk_t *emp, mblk_t *mp)
2666*0Sstevel@tonic-gate {
2667*0Sstevel@tonic-gate 	mblk_t *tmp;
2668*0Sstevel@tonic-gate 	qband_t *qbp = NULL;
2669*0Sstevel@tonic-gate 	int mcls = (int)queclass(mp);
2670*0Sstevel@tonic-gate 	kthread_id_t freezer;
2671*0Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
2672*0Sstevel@tonic-gate 
2673*0Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
2674*0Sstevel@tonic-gate 	if (freezer == curthread) {
2675*0Sstevel@tonic-gate 		ASSERT(frozenstr(q));
2676*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2677*0Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
2678*0Sstevel@tonic-gate 		/* Don't drop lock on exit */
2679*0Sstevel@tonic-gate 		freezer = curthread;
2680*0Sstevel@tonic-gate 	} else
2681*0Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
2682*0Sstevel@tonic-gate 
2683*0Sstevel@tonic-gate 	if (mcls == QPCTL) {
2684*0Sstevel@tonic-gate 		if (mp->b_band != 0)
2685*0Sstevel@tonic-gate 			mp->b_band = 0;		/* force to be correct */
2686*0Sstevel@tonic-gate 		if (emp && emp->b_prev &&
2687*0Sstevel@tonic-gate 		    (emp->b_prev->b_datap->db_type < QPCTL))
2688*0Sstevel@tonic-gate 			goto badord;
2689*0Sstevel@tonic-gate 	}
2690*0Sstevel@tonic-gate 	if (emp) {
2691*0Sstevel@tonic-gate 		if (((mcls == QNORM) && (mp->b_band < emp->b_band)) ||
2692*0Sstevel@tonic-gate 		    (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) &&
2693*0Sstevel@tonic-gate 		    (emp->b_prev->b_band < mp->b_band))) {
2694*0Sstevel@tonic-gate 			goto badord;
2695*0Sstevel@tonic-gate 		}
2696*0Sstevel@tonic-gate 	} else {
2697*0Sstevel@tonic-gate 		tmp = q->q_last;
2698*0Sstevel@tonic-gate 		if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) {
2699*0Sstevel@tonic-gate badord:
2700*0Sstevel@tonic-gate 			cmn_err(CE_WARN,
2701*0Sstevel@tonic-gate 			    "insq: attempt to insert message out of order "
2702*0Sstevel@tonic-gate 			    "on q %p", (void *)q);
2703*0Sstevel@tonic-gate 			if (freezer != curthread)
2704*0Sstevel@tonic-gate 				mutex_exit(QLOCK(q));
2705*0Sstevel@tonic-gate 			return (0);
2706*0Sstevel@tonic-gate 		}
2707*0Sstevel@tonic-gate 	}
2708*0Sstevel@tonic-gate 
2709*0Sstevel@tonic-gate 	if (mp->b_band != 0) {
2710*0Sstevel@tonic-gate 		int i;
2711*0Sstevel@tonic-gate 		qband_t **qbpp;
2712*0Sstevel@tonic-gate 
2713*0Sstevel@tonic-gate 		if (mp->b_band > q->q_nband) {
2714*0Sstevel@tonic-gate 			qbpp = &q->q_bandp;
2715*0Sstevel@tonic-gate 			while (*qbpp)
2716*0Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2717*0Sstevel@tonic-gate 			while (mp->b_band > q->q_nband) {
2718*0Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
2719*0Sstevel@tonic-gate 					if (freezer != curthread)
2720*0Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
2721*0Sstevel@tonic-gate 					return (0);
2722*0Sstevel@tonic-gate 				}
2723*0Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
2724*0Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
2725*0Sstevel@tonic-gate 				q->q_nband++;
2726*0Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2727*0Sstevel@tonic-gate 			}
2728*0Sstevel@tonic-gate 		}
2729*0Sstevel@tonic-gate 		qbp = q->q_bandp;
2730*0Sstevel@tonic-gate 		i = mp->b_band;
2731*0Sstevel@tonic-gate 		while (--i)
2732*0Sstevel@tonic-gate 			qbp = qbp->qb_next;
2733*0Sstevel@tonic-gate 	}
2734*0Sstevel@tonic-gate 
2735*0Sstevel@tonic-gate 	if ((mp->b_next = emp) != NULL) {
2736*0Sstevel@tonic-gate 		if ((mp->b_prev = emp->b_prev) != NULL)
2737*0Sstevel@tonic-gate 			emp->b_prev->b_next = mp;
2738*0Sstevel@tonic-gate 		else
2739*0Sstevel@tonic-gate 			q->q_first = mp;
2740*0Sstevel@tonic-gate 		emp->b_prev = mp;
2741*0Sstevel@tonic-gate 	} else {
2742*0Sstevel@tonic-gate 		if ((mp->b_prev = q->q_last) != NULL)
2743*0Sstevel@tonic-gate 			q->q_last->b_next = mp;
2744*0Sstevel@tonic-gate 		else
2745*0Sstevel@tonic-gate 			q->q_first = mp;
2746*0Sstevel@tonic-gate 		q->q_last = mp;
2747*0Sstevel@tonic-gate 	}
2748*0Sstevel@tonic-gate 
2749*0Sstevel@tonic-gate 	/* Get mblk and byte count for q_count accounting */
2750*0Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
2751*0Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
2752*0Sstevel@tonic-gate 		mblkcnt++;
2753*0Sstevel@tonic-gate 	}
2754*0Sstevel@tonic-gate 
2755*0Sstevel@tonic-gate 	if (qbp) {	/* adjust qband pointers and count */
2756*0Sstevel@tonic-gate 		if (!qbp->qb_first) {
2757*0Sstevel@tonic-gate 			qbp->qb_first = mp;
2758*0Sstevel@tonic-gate 			qbp->qb_last = mp;
2759*0Sstevel@tonic-gate 		} else {
2760*0Sstevel@tonic-gate 			if (mp->b_prev == NULL || (mp->b_prev != NULL &&
2761*0Sstevel@tonic-gate 			    (mp->b_prev->b_band != mp->b_band)))
2762*0Sstevel@tonic-gate 				qbp->qb_first = mp;
2763*0Sstevel@tonic-gate 			else if (mp->b_next == NULL || (mp->b_next != NULL &&
2764*0Sstevel@tonic-gate 			    (mp->b_next->b_band != mp->b_band)))
2765*0Sstevel@tonic-gate 				qbp->qb_last = mp;
2766*0Sstevel@tonic-gate 		}
2767*0Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
2768*0Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
2769*0Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
2770*0Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
2771*0Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
2772*0Sstevel@tonic-gate 		}
2773*0Sstevel@tonic-gate 	} else {
2774*0Sstevel@tonic-gate 		q->q_count += bytecnt;
2775*0Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
2776*0Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
2777*0Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
2778*0Sstevel@tonic-gate 			q->q_flag |= QFULL;
2779*0Sstevel@tonic-gate 		}
2780*0Sstevel@tonic-gate 	}
2781*0Sstevel@tonic-gate 
2782*0Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_INSQ, NULL);
2783*0Sstevel@tonic-gate 
2784*0Sstevel@tonic-gate 	if (canenable(q) && (q->q_flag & QWANTR))
2785*0Sstevel@tonic-gate 		qenable_locked(q);
2786*0Sstevel@tonic-gate 
2787*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
2788*0Sstevel@tonic-gate 	if (freezer != curthread)
2789*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2790*0Sstevel@tonic-gate 
2791*0Sstevel@tonic-gate 	return (1);
2792*0Sstevel@tonic-gate }
2793*0Sstevel@tonic-gate 
2794*0Sstevel@tonic-gate /*
2795*0Sstevel@tonic-gate  * Create and put a control message on queue.
2796*0Sstevel@tonic-gate  */
2797*0Sstevel@tonic-gate int
2798*0Sstevel@tonic-gate putctl(queue_t *q, int type)
2799*0Sstevel@tonic-gate {
2800*0Sstevel@tonic-gate 	mblk_t *bp;
2801*0Sstevel@tonic-gate 
2802*0Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
2803*0Sstevel@tonic-gate 	    (bp = allocb_tryhard(0)) == NULL)
2804*0Sstevel@tonic-gate 		return (0);
2805*0Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char) type;
2806*0Sstevel@tonic-gate 
2807*0Sstevel@tonic-gate 	put(q, bp);
2808*0Sstevel@tonic-gate 
2809*0Sstevel@tonic-gate 	return (1);
2810*0Sstevel@tonic-gate }
2811*0Sstevel@tonic-gate 
2812*0Sstevel@tonic-gate /*
2813*0Sstevel@tonic-gate  * Control message with a single-byte parameter
2814*0Sstevel@tonic-gate  */
2815*0Sstevel@tonic-gate int
2816*0Sstevel@tonic-gate putctl1(queue_t *q, int type, int param)
2817*0Sstevel@tonic-gate {
2818*0Sstevel@tonic-gate 	mblk_t *bp;
2819*0Sstevel@tonic-gate 
2820*0Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
2821*0Sstevel@tonic-gate 	    (bp = allocb_tryhard(1)) == NULL)
2822*0Sstevel@tonic-gate 		return (0);
2823*0Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
2824*0Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
2825*0Sstevel@tonic-gate 
2826*0Sstevel@tonic-gate 	put(q, bp);
2827*0Sstevel@tonic-gate 
2828*0Sstevel@tonic-gate 	return (1);
2829*0Sstevel@tonic-gate }
2830*0Sstevel@tonic-gate 
2831*0Sstevel@tonic-gate int
2832*0Sstevel@tonic-gate putnextctl1(queue_t *q, int type, int param)
2833*0Sstevel@tonic-gate {
2834*0Sstevel@tonic-gate 	mblk_t *bp;
2835*0Sstevel@tonic-gate 
2836*0Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
2837*0Sstevel@tonic-gate 		((bp = allocb_tryhard(1)) == NULL))
2838*0Sstevel@tonic-gate 		return (0);
2839*0Sstevel@tonic-gate 
2840*0Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
2841*0Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
2842*0Sstevel@tonic-gate 
2843*0Sstevel@tonic-gate 	putnext(q, bp);
2844*0Sstevel@tonic-gate 
2845*0Sstevel@tonic-gate 	return (1);
2846*0Sstevel@tonic-gate }
2847*0Sstevel@tonic-gate 
2848*0Sstevel@tonic-gate int
2849*0Sstevel@tonic-gate putnextctl(queue_t *q, int type)
2850*0Sstevel@tonic-gate {
2851*0Sstevel@tonic-gate 	mblk_t *bp;
2852*0Sstevel@tonic-gate 
2853*0Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
2854*0Sstevel@tonic-gate 		((bp = allocb_tryhard(0)) == NULL))
2855*0Sstevel@tonic-gate 		return (0);
2856*0Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
2857*0Sstevel@tonic-gate 
2858*0Sstevel@tonic-gate 	putnext(q, bp);
2859*0Sstevel@tonic-gate 
2860*0Sstevel@tonic-gate 	return (1);
2861*0Sstevel@tonic-gate }
2862*0Sstevel@tonic-gate 
2863*0Sstevel@tonic-gate /*
2864*0Sstevel@tonic-gate  * Return the queue upstream from this one
2865*0Sstevel@tonic-gate  */
2866*0Sstevel@tonic-gate queue_t *
2867*0Sstevel@tonic-gate backq(queue_t *q)
2868*0Sstevel@tonic-gate {
2869*0Sstevel@tonic-gate 	q = _OTHERQ(q);
2870*0Sstevel@tonic-gate 	if (q->q_next) {
2871*0Sstevel@tonic-gate 		q = q->q_next;
2872*0Sstevel@tonic-gate 		return (_OTHERQ(q));
2873*0Sstevel@tonic-gate 	}
2874*0Sstevel@tonic-gate 	return (NULL);
2875*0Sstevel@tonic-gate }
2876*0Sstevel@tonic-gate 
2877*0Sstevel@tonic-gate /*
2878*0Sstevel@tonic-gate  * Send a block back up the queue in reverse from this
2879*0Sstevel@tonic-gate  * one (e.g. to respond to ioctls)
2880*0Sstevel@tonic-gate  */
2881*0Sstevel@tonic-gate void
2882*0Sstevel@tonic-gate qreply(queue_t *q, mblk_t *bp)
2883*0Sstevel@tonic-gate {
2884*0Sstevel@tonic-gate 	ASSERT(q && bp);
2885*0Sstevel@tonic-gate 
2886*0Sstevel@tonic-gate 	putnext(_OTHERQ(q), bp);
2887*0Sstevel@tonic-gate }
2888*0Sstevel@tonic-gate 
2889*0Sstevel@tonic-gate /*
2890*0Sstevel@tonic-gate  * Streams Queue Scheduling
2891*0Sstevel@tonic-gate  *
2892*0Sstevel@tonic-gate  * Queues are enabled through qenable() when they have messages to
2893*0Sstevel@tonic-gate  * process.  They are serviced by queuerun(), which runs each enabled
2894*0Sstevel@tonic-gate  * queue's service procedure.  The call to queuerun() is processor
2895*0Sstevel@tonic-gate  * dependent - the general principle is that it be run whenever a queue
2896*0Sstevel@tonic-gate  * is enabled but before returning to user level.  For system calls,
2897*0Sstevel@tonic-gate  * the function runqueues() is called if their action causes a queue
2898*0Sstevel@tonic-gate  * to be enabled.  For device interrupts, queuerun() should be
2899*0Sstevel@tonic-gate  * called before returning from the last level of interrupt.  Beyond
2900*0Sstevel@tonic-gate  * this, no timing assumptions should be made about queue scheduling.
2901*0Sstevel@tonic-gate  */
2902*0Sstevel@tonic-gate 
2903*0Sstevel@tonic-gate /*
2904*0Sstevel@tonic-gate  * Enable a queue: put it on list of those whose service procedures are
2905*0Sstevel@tonic-gate  * ready to run and set up the scheduling mechanism.
2906*0Sstevel@tonic-gate  * The broadcast is done outside the mutex -> to avoid the woken thread
2907*0Sstevel@tonic-gate  * from contending with the mutex. This is OK 'cos the queue has been
2908*0Sstevel@tonic-gate  * enqueued on the runlist and flagged safely at this point.
2909*0Sstevel@tonic-gate  */
2910*0Sstevel@tonic-gate void
2911*0Sstevel@tonic-gate qenable(queue_t *q)
2912*0Sstevel@tonic-gate {
2913*0Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2914*0Sstevel@tonic-gate 	qenable_locked(q);
2915*0Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2916*0Sstevel@tonic-gate }
2917*0Sstevel@tonic-gate /*
2918*0Sstevel@tonic-gate  * Return number of messages on queue
2919*0Sstevel@tonic-gate  */
2920*0Sstevel@tonic-gate int
2921*0Sstevel@tonic-gate qsize(queue_t *qp)
2922*0Sstevel@tonic-gate {
2923*0Sstevel@tonic-gate 	int count = 0;
2924*0Sstevel@tonic-gate 	mblk_t *mp;
2925*0Sstevel@tonic-gate 
2926*0Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
2927*0Sstevel@tonic-gate 	for (mp = qp->q_first; mp; mp = mp->b_next)
2928*0Sstevel@tonic-gate 		count++;
2929*0Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
2930*0Sstevel@tonic-gate 	return (count);
2931*0Sstevel@tonic-gate }
2932*0Sstevel@tonic-gate 
2933*0Sstevel@tonic-gate /*
2934*0Sstevel@tonic-gate  * noenable - set queue so that putq() will not enable it.
2935*0Sstevel@tonic-gate  * enableok - set queue so that putq() can enable it.
2936*0Sstevel@tonic-gate  */
2937*0Sstevel@tonic-gate void
2938*0Sstevel@tonic-gate noenable(queue_t *q)
2939*0Sstevel@tonic-gate {
2940*0Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2941*0Sstevel@tonic-gate 	q->q_flag |= QNOENB;
2942*0Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2943*0Sstevel@tonic-gate }
2944*0Sstevel@tonic-gate 
2945*0Sstevel@tonic-gate void
2946*0Sstevel@tonic-gate enableok(queue_t *q)
2947*0Sstevel@tonic-gate {
2948*0Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2949*0Sstevel@tonic-gate 	q->q_flag &= ~QNOENB;
2950*0Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2951*0Sstevel@tonic-gate }
2952*0Sstevel@tonic-gate 
2953*0Sstevel@tonic-gate /*
2954*0Sstevel@tonic-gate  * Set queue fields.
2955*0Sstevel@tonic-gate  */
2956*0Sstevel@tonic-gate int
2957*0Sstevel@tonic-gate strqset(queue_t *q, qfields_t what, unsigned char pri, intptr_t val)
2958*0Sstevel@tonic-gate {
2959*0Sstevel@tonic-gate 	qband_t *qbp = NULL;
2960*0Sstevel@tonic-gate 	queue_t	*wrq;
2961*0Sstevel@tonic-gate 	int error = 0;
2962*0Sstevel@tonic-gate 	kthread_id_t freezer;
2963*0Sstevel@tonic-gate 
2964*0Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
2965*0Sstevel@tonic-gate 	if (freezer == curthread) {
2966*0Sstevel@tonic-gate 		ASSERT(frozenstr(q));
2967*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2968*0Sstevel@tonic-gate 	} else
2969*0Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
2970*0Sstevel@tonic-gate 
2971*0Sstevel@tonic-gate 	if (what >= QBAD) {
2972*0Sstevel@tonic-gate 		error = EINVAL;
2973*0Sstevel@tonic-gate 		goto done;
2974*0Sstevel@tonic-gate 	}
2975*0Sstevel@tonic-gate 	if (pri != 0) {
2976*0Sstevel@tonic-gate 		int i;
2977*0Sstevel@tonic-gate 		qband_t **qbpp;
2978*0Sstevel@tonic-gate 
2979*0Sstevel@tonic-gate 		if (pri > q->q_nband) {
2980*0Sstevel@tonic-gate 			qbpp = &q->q_bandp;
2981*0Sstevel@tonic-gate 			while (*qbpp)
2982*0Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2983*0Sstevel@tonic-gate 			while (pri > q->q_nband) {
2984*0Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
2985*0Sstevel@tonic-gate 					error = EAGAIN;
2986*0Sstevel@tonic-gate 					goto done;
2987*0Sstevel@tonic-gate 				}
2988*0Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
2989*0Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
2990*0Sstevel@tonic-gate 				q->q_nband++;
2991*0Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2992*0Sstevel@tonic-gate 			}
2993*0Sstevel@tonic-gate 		}
2994*0Sstevel@tonic-gate 		qbp = q->q_bandp;
2995*0Sstevel@tonic-gate 		i = pri;
2996*0Sstevel@tonic-gate 		while (--i)
2997*0Sstevel@tonic-gate 			qbp = qbp->qb_next;
2998*0Sstevel@tonic-gate 	}
2999*0Sstevel@tonic-gate 	switch (what) {
3000*0Sstevel@tonic-gate 
3001*0Sstevel@tonic-gate 	case QHIWAT:
3002*0Sstevel@tonic-gate 		if (qbp)
3003*0Sstevel@tonic-gate 			qbp->qb_hiwat = (size_t)val;
3004*0Sstevel@tonic-gate 		else
3005*0Sstevel@tonic-gate 			q->q_hiwat = (size_t)val;
3006*0Sstevel@tonic-gate 		break;
3007*0Sstevel@tonic-gate 
3008*0Sstevel@tonic-gate 	case QLOWAT:
3009*0Sstevel@tonic-gate 		if (qbp)
3010*0Sstevel@tonic-gate 			qbp->qb_lowat = (size_t)val;
3011*0Sstevel@tonic-gate 		else
3012*0Sstevel@tonic-gate 			q->q_lowat = (size_t)val;
3013*0Sstevel@tonic-gate 		break;
3014*0Sstevel@tonic-gate 
3015*0Sstevel@tonic-gate 	case QMAXPSZ:
3016*0Sstevel@tonic-gate 		if (qbp)
3017*0Sstevel@tonic-gate 			error = EINVAL;
3018*0Sstevel@tonic-gate 		else
3019*0Sstevel@tonic-gate 			q->q_maxpsz = (ssize_t)val;
3020*0Sstevel@tonic-gate 
3021*0Sstevel@tonic-gate 		/*
3022*0Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
3023*0Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
3024*0Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
3025*0Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
3026*0Sstevel@tonic-gate 		 */
3027*0Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
3028*0Sstevel@tonic-gate 		if (q != wrq->q_next)
3029*0Sstevel@tonic-gate 			break;
3030*0Sstevel@tonic-gate 
3031*0Sstevel@tonic-gate 		/*
3032*0Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
3033*0Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
3034*0Sstevel@tonic-gate 		 */
3035*0Sstevel@tonic-gate 		if (freezer != curthread) {
3036*0Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
3037*0Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
3038*0Sstevel@tonic-gate 		}
3039*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(wrq)));
3040*0Sstevel@tonic-gate 
3041*0Sstevel@tonic-gate 		if (strmsgsz != 0) {
3042*0Sstevel@tonic-gate 			if (val == INFPSZ)
3043*0Sstevel@tonic-gate 				val = strmsgsz;
3044*0Sstevel@tonic-gate 			else  {
3045*0Sstevel@tonic-gate 				if (STREAM(q)->sd_vnode->v_type == VFIFO)
3046*0Sstevel@tonic-gate 					val = MIN(PIPE_BUF, val);
3047*0Sstevel@tonic-gate 				else
3048*0Sstevel@tonic-gate 					val = MIN(strmsgsz, val);
3049*0Sstevel@tonic-gate 			}
3050*0Sstevel@tonic-gate 		}
3051*0Sstevel@tonic-gate 		STREAM(q)->sd_qn_maxpsz = val;
3052*0Sstevel@tonic-gate 		if (freezer != curthread) {
3053*0Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
3054*0Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
3055*0Sstevel@tonic-gate 		}
3056*0Sstevel@tonic-gate 		break;
3057*0Sstevel@tonic-gate 
3058*0Sstevel@tonic-gate 	case QMINPSZ:
3059*0Sstevel@tonic-gate 		if (qbp)
3060*0Sstevel@tonic-gate 			error = EINVAL;
3061*0Sstevel@tonic-gate 		else
3062*0Sstevel@tonic-gate 			q->q_minpsz = (ssize_t)val;
3063*0Sstevel@tonic-gate 
3064*0Sstevel@tonic-gate 		/*
3065*0Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
3066*0Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
3067*0Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
3068*0Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
3069*0Sstevel@tonic-gate 		 */
3070*0Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
3071*0Sstevel@tonic-gate 		if (q != wrq->q_next)
3072*0Sstevel@tonic-gate 			break;
3073*0Sstevel@tonic-gate 
3074*0Sstevel@tonic-gate 		/*
3075*0Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
3076*0Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
3077*0Sstevel@tonic-gate 		 */
3078*0Sstevel@tonic-gate 		if (freezer != curthread) {
3079*0Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
3080*0Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
3081*0Sstevel@tonic-gate 		}
3082*0Sstevel@tonic-gate 		STREAM(q)->sd_qn_minpsz = (ssize_t)val;
3083*0Sstevel@tonic-gate 
3084*0Sstevel@tonic-gate 		if (freezer != curthread) {
3085*0Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
3086*0Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
3087*0Sstevel@tonic-gate 		}
3088*0Sstevel@tonic-gate 		break;
3089*0Sstevel@tonic-gate 
3090*0Sstevel@tonic-gate 	case QSTRUIOT:
3091*0Sstevel@tonic-gate 		if (qbp)
3092*0Sstevel@tonic-gate 			error = EINVAL;
3093*0Sstevel@tonic-gate 		else
3094*0Sstevel@tonic-gate 			q->q_struiot = (ushort_t)val;
3095*0Sstevel@tonic-gate 		break;
3096*0Sstevel@tonic-gate 
3097*0Sstevel@tonic-gate 	case QCOUNT:
3098*0Sstevel@tonic-gate 	case QFIRST:
3099*0Sstevel@tonic-gate 	case QLAST:
3100*0Sstevel@tonic-gate 	case QFLAG:
3101*0Sstevel@tonic-gate 		error = EPERM;
3102*0Sstevel@tonic-gate 		break;
3103*0Sstevel@tonic-gate 
3104*0Sstevel@tonic-gate 	default:
3105*0Sstevel@tonic-gate 		error = EINVAL;
3106*0Sstevel@tonic-gate 		break;
3107*0Sstevel@tonic-gate 	}
3108*0Sstevel@tonic-gate done:
3109*0Sstevel@tonic-gate 	if (freezer != curthread)
3110*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
3111*0Sstevel@tonic-gate 	return (error);
3112*0Sstevel@tonic-gate }
3113*0Sstevel@tonic-gate 
3114*0Sstevel@tonic-gate /*
3115*0Sstevel@tonic-gate  * Get queue fields.
3116*0Sstevel@tonic-gate  */
3117*0Sstevel@tonic-gate int
3118*0Sstevel@tonic-gate strqget(queue_t *q, qfields_t what, unsigned char pri, void *valp)
3119*0Sstevel@tonic-gate {
3120*0Sstevel@tonic-gate 	qband_t 	*qbp = NULL;
3121*0Sstevel@tonic-gate 	int 		error = 0;
3122*0Sstevel@tonic-gate 	kthread_id_t 	freezer;
3123*0Sstevel@tonic-gate 
3124*0Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
3125*0Sstevel@tonic-gate 	if (freezer == curthread) {
3126*0Sstevel@tonic-gate 		ASSERT(frozenstr(q));
3127*0Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
3128*0Sstevel@tonic-gate 	} else
3129*0Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
3130*0Sstevel@tonic-gate 	if (what >= QBAD) {
3131*0Sstevel@tonic-gate 		error = EINVAL;
3132*0Sstevel@tonic-gate 		goto done;
3133*0Sstevel@tonic-gate 	}
3134*0Sstevel@tonic-gate 	if (pri != 0) {
3135*0Sstevel@tonic-gate 		int i;
3136*0Sstevel@tonic-gate 		qband_t **qbpp;
3137*0Sstevel@tonic-gate 
3138*0Sstevel@tonic-gate 		if (pri > q->q_nband) {
3139*0Sstevel@tonic-gate 			qbpp = &q->q_bandp;
3140*0Sstevel@tonic-gate 			while (*qbpp)
3141*0Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
3142*0Sstevel@tonic-gate 			while (pri > q->q_nband) {
3143*0Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
3144*0Sstevel@tonic-gate 					error = EAGAIN;
3145*0Sstevel@tonic-gate 					goto done;
3146*0Sstevel@tonic-gate 				}
3147*0Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
3148*0Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
3149*0Sstevel@tonic-gate 				q->q_nband++;
3150*0Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
3151*0Sstevel@tonic-gate 			}
3152*0Sstevel@tonic-gate 		}
3153*0Sstevel@tonic-gate 		qbp = q->q_bandp;
3154*0Sstevel@tonic-gate 		i = pri;
3155*0Sstevel@tonic-gate 		while (--i)
3156*0Sstevel@tonic-gate 			qbp = qbp->qb_next;
3157*0Sstevel@tonic-gate 	}
3158*0Sstevel@tonic-gate 	switch (what) {
3159*0Sstevel@tonic-gate 	case QHIWAT:
3160*0Sstevel@tonic-gate 		if (qbp)
3161*0Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_hiwat;
3162*0Sstevel@tonic-gate 		else
3163*0Sstevel@tonic-gate 			*(size_t *)valp = q->q_hiwat;
3164*0Sstevel@tonic-gate 		break;
3165*0Sstevel@tonic-gate 
3166*0Sstevel@tonic-gate 	case QLOWAT:
3167*0Sstevel@tonic-gate 		if (qbp)
3168*0Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_lowat;
3169*0Sstevel@tonic-gate 		else
3170*0Sstevel@tonic-gate 			*(size_t *)valp = q->q_lowat;
3171*0Sstevel@tonic-gate 		break;
3172*0Sstevel@tonic-gate 
3173*0Sstevel@tonic-gate 	case QMAXPSZ:
3174*0Sstevel@tonic-gate 		if (qbp)
3175*0Sstevel@tonic-gate 			error = EINVAL;
3176*0Sstevel@tonic-gate 		else
3177*0Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_maxpsz;
3178*0Sstevel@tonic-gate 		break;
3179*0Sstevel@tonic-gate 
3180*0Sstevel@tonic-gate 	case QMINPSZ:
3181*0Sstevel@tonic-gate 		if (qbp)
3182*0Sstevel@tonic-gate 			error = EINVAL;
3183*0Sstevel@tonic-gate 		else
3184*0Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_minpsz;
3185*0Sstevel@tonic-gate 		break;
3186*0Sstevel@tonic-gate 
3187*0Sstevel@tonic-gate 	case QCOUNT:
3188*0Sstevel@tonic-gate 		if (qbp)
3189*0Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_count;
3190*0Sstevel@tonic-gate 		else
3191*0Sstevel@tonic-gate 			*(size_t *)valp = q->q_count;
3192*0Sstevel@tonic-gate 		break;
3193*0Sstevel@tonic-gate 
3194*0Sstevel@tonic-gate 	case QFIRST:
3195*0Sstevel@tonic-gate 		if (qbp)
3196*0Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_first;
3197*0Sstevel@tonic-gate 		else
3198*0Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_first;
3199*0Sstevel@tonic-gate 		break;
3200*0Sstevel@tonic-gate 
3201*0Sstevel@tonic-gate 	case QLAST:
3202*0Sstevel@tonic-gate 		if (qbp)
3203*0Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_last;
3204*0Sstevel@tonic-gate 		else
3205*0Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_last;
3206*0Sstevel@tonic-gate 		break;
3207*0Sstevel@tonic-gate 
3208*0Sstevel@tonic-gate 	case QFLAG:
3209*0Sstevel@tonic-gate 		if (qbp)
3210*0Sstevel@tonic-gate 			*(uint_t *)valp = qbp->qb_flag;
3211*0Sstevel@tonic-gate 		else
3212*0Sstevel@tonic-gate 			*(uint_t *)valp = q->q_flag;
3213*0Sstevel@tonic-gate 		break;
3214*0Sstevel@tonic-gate 
3215*0Sstevel@tonic-gate 	case QSTRUIOT:
3216*0Sstevel@tonic-gate 		if (qbp)
3217*0Sstevel@tonic-gate 			error = EINVAL;
3218*0Sstevel@tonic-gate 		else
3219*0Sstevel@tonic-gate 			*(short *)valp = q->q_struiot;
3220*0Sstevel@tonic-gate 		break;
3221*0Sstevel@tonic-gate 
3222*0Sstevel@tonic-gate 	default:
3223*0Sstevel@tonic-gate 		error = EINVAL;
3224*0Sstevel@tonic-gate 		break;
3225*0Sstevel@tonic-gate 	}
3226*0Sstevel@tonic-gate done:
3227*0Sstevel@tonic-gate 	if (freezer != curthread)
3228*0Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
3229*0Sstevel@tonic-gate 	return (error);
3230*0Sstevel@tonic-gate }
3231*0Sstevel@tonic-gate 
3232*0Sstevel@tonic-gate /*
3233*0Sstevel@tonic-gate  * Function awakes all in cvwait/sigwait/pollwait, on one of:
3234*0Sstevel@tonic-gate  *	QWANTWSYNC or QWANTR or QWANTW,
3235*0Sstevel@tonic-gate  *
3236*0Sstevel@tonic-gate  * Note: for QWANTWSYNC/QWANTW and QWANTR, if no WSLEEPer or RSLEEPer then a
3237*0Sstevel@tonic-gate  *	 deferred wakeup will be done. Also if strpoll() in progress then a
3238*0Sstevel@tonic-gate  *	 deferred pollwakeup will be done.
3239*0Sstevel@tonic-gate  */
3240*0Sstevel@tonic-gate void
3241*0Sstevel@tonic-gate strwakeq(queue_t *q, int flag)
3242*0Sstevel@tonic-gate {
3243*0Sstevel@tonic-gate 	stdata_t 	*stp = STREAM(q);
3244*0Sstevel@tonic-gate 	pollhead_t 	*pl;
3245*0Sstevel@tonic-gate 
3246*0Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
3247*0Sstevel@tonic-gate 	pl = &stp->sd_pollist;
3248*0Sstevel@tonic-gate 	if (flag & QWANTWSYNC) {
3249*0Sstevel@tonic-gate 		ASSERT(!(q->q_flag & QREADR));
3250*0Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
3251*0Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
3252*0Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
3253*0Sstevel@tonic-gate 		} else {
3254*0Sstevel@tonic-gate 			stp->sd_wakeq |= WSLEEP;
3255*0Sstevel@tonic-gate 		}
3256*0Sstevel@tonic-gate 
3257*0Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3258*0Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
3259*0Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
3260*0Sstevel@tonic-gate 
3261*0Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
3262*0Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
3263*0Sstevel@tonic-gate 	} else if (flag & QWANTR) {
3264*0Sstevel@tonic-gate 		if (stp->sd_flag & RSLEEP) {
3265*0Sstevel@tonic-gate 			stp->sd_flag &= ~RSLEEP;
3266*0Sstevel@tonic-gate 			cv_broadcast(&_RD(stp->sd_wrq)->q_wait);
3267*0Sstevel@tonic-gate 		} else {
3268*0Sstevel@tonic-gate 			stp->sd_wakeq |= RSLEEP;
3269*0Sstevel@tonic-gate 		}
3270*0Sstevel@tonic-gate 
3271*0Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3272*0Sstevel@tonic-gate 		pollwakeup(pl, POLLIN | POLLRDNORM);
3273*0Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
3274*0Sstevel@tonic-gate 
3275*0Sstevel@tonic-gate 		{
3276*0Sstevel@tonic-gate 			int events = stp->sd_sigflags & (S_INPUT | S_RDNORM);
3277*0Sstevel@tonic-gate 
3278*0Sstevel@tonic-gate 			if (events)
3279*0Sstevel@tonic-gate 				strsendsig(stp->sd_siglist, events, 0, 0);
3280*0Sstevel@tonic-gate 		}
3281*0Sstevel@tonic-gate 	} else {
3282*0Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
3283*0Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
3284*0Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
3285*0Sstevel@tonic-gate 		}
3286*0Sstevel@tonic-gate 
3287*0Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3288*0Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
3289*0Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
3290*0Sstevel@tonic-gate 
3291*0Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
3292*0Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
3293*0Sstevel@tonic-gate 	}
3294*0Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
3295*0Sstevel@tonic-gate }
3296*0Sstevel@tonic-gate 
3297*0Sstevel@tonic-gate int
3298*0Sstevel@tonic-gate struioget(queue_t *q, mblk_t *mp, struiod_t *dp, int noblock)
3299*0Sstevel@tonic-gate {
3300*0Sstevel@tonic-gate 	stdata_t *stp = STREAM(q);
3301*0Sstevel@tonic-gate 	int typ  = STRUIOT_STANDARD;
3302*0Sstevel@tonic-gate 	uio_t	 *uiop = &dp->d_uio;
3303*0Sstevel@tonic-gate 	dblk_t	 *dbp;
3304*0Sstevel@tonic-gate 	ssize_t	 uiocnt;
3305*0Sstevel@tonic-gate 	ssize_t	 cnt;
3306*0Sstevel@tonic-gate 	unsigned char *ptr;
3307*0Sstevel@tonic-gate 	ssize_t	 resid;
3308*0Sstevel@tonic-gate 	int	 error = 0;
3309*0Sstevel@tonic-gate 	on_trap_data_t otd;
3310*0Sstevel@tonic-gate 	queue_t	*stwrq;
3311*0Sstevel@tonic-gate 
3312*0Sstevel@tonic-gate 	/*
3313*0Sstevel@tonic-gate 	 * Plumbing may change while taking the type so store the
3314*0Sstevel@tonic-gate 	 * queue in a temporary variable. It doesn't matter even
3315*0Sstevel@tonic-gate 	 * if the we take the type from the previous plumbing,
3316*0Sstevel@tonic-gate 	 * that's because if the plumbing has changed when we were
3317*0Sstevel@tonic-gate 	 * holding the queue in a temporary variable, we can continue
3318*0Sstevel@tonic-gate 	 * processing the message the way it would have been processed
3319*0Sstevel@tonic-gate 	 * in the old plumbing, without any side effects but a bit
3320*0Sstevel@tonic-gate 	 * extra processing for partial ip header checksum.
3321*0Sstevel@tonic-gate 	 *
3322*0Sstevel@tonic-gate 	 * This has been done to avoid holding the sd_lock which is
3323*0Sstevel@tonic-gate 	 * very hot.
3324*0Sstevel@tonic-gate 	 */
3325*0Sstevel@tonic-gate 
3326*0Sstevel@tonic-gate 	stwrq = stp->sd_struiowrq;
3327*0Sstevel@tonic-gate 	if (stwrq)
3328*0Sstevel@tonic-gate 		typ = stwrq->q_struiot;
3329*0Sstevel@tonic-gate 
3330*0Sstevel@tonic-gate 	for (; (resid = uiop->uio_resid) > 0 && mp; mp = mp->b_cont) {
3331*0Sstevel@tonic-gate 		dbp = mp->b_datap;
3332*0Sstevel@tonic-gate 		ptr = (uchar_t *)(mp->b_rptr + dbp->db_cksumstuff);
3333*0Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
3334*0Sstevel@tonic-gate 		cnt = MIN(uiocnt, uiop->uio_resid);
3335*0Sstevel@tonic-gate 		if (!(dbp->db_struioflag & STRUIO_SPEC) ||
3336*0Sstevel@tonic-gate 		    (dbp->db_struioflag & STRUIO_DONE) || cnt == 0) {
3337*0Sstevel@tonic-gate 			/*
3338*0Sstevel@tonic-gate 			 * Either this mblk has already been processed
3339*0Sstevel@tonic-gate 			 * or there is no more room in this mblk (?).
3340*0Sstevel@tonic-gate 			 */
3341*0Sstevel@tonic-gate 			continue;
3342*0Sstevel@tonic-gate 		}
3343*0Sstevel@tonic-gate 		switch (typ) {
3344*0Sstevel@tonic-gate 		case STRUIOT_STANDARD:
3345*0Sstevel@tonic-gate 			if (noblock) {
3346*0Sstevel@tonic-gate 				if (on_trap(&otd, OT_DATA_ACCESS)) {
3347*0Sstevel@tonic-gate 					no_trap();
3348*0Sstevel@tonic-gate 					error = EWOULDBLOCK;
3349*0Sstevel@tonic-gate 					goto out;
3350*0Sstevel@tonic-gate 				}
3351*0Sstevel@tonic-gate 			}
3352*0Sstevel@tonic-gate 			if (error = uiomove(ptr, cnt, UIO_WRITE, uiop)) {
3353*0Sstevel@tonic-gate 				if (noblock)
3354*0Sstevel@tonic-gate 					no_trap();
3355*0Sstevel@tonic-gate 				goto out;
3356*0Sstevel@tonic-gate 			}
3357*0Sstevel@tonic-gate 			if (noblock)
3358*0Sstevel@tonic-gate 				no_trap();
3359*0Sstevel@tonic-gate 			break;
3360*0Sstevel@tonic-gate 
3361*0Sstevel@tonic-gate 		default:
3362*0Sstevel@tonic-gate 			error = EIO;
3363*0Sstevel@tonic-gate 			goto out;
3364*0Sstevel@tonic-gate 		}
3365*0Sstevel@tonic-gate 		dbp->db_struioflag |= STRUIO_DONE;
3366*0Sstevel@tonic-gate 		dbp->db_cksumstuff += cnt;
3367*0Sstevel@tonic-gate 	}
3368*0Sstevel@tonic-gate out:
3369*0Sstevel@tonic-gate 	if (error == EWOULDBLOCK && (resid -= uiop->uio_resid) > 0) {
3370*0Sstevel@tonic-gate 		/*
3371*0Sstevel@tonic-gate 		 * A fault has occured and some bytes were moved to the
3372*0Sstevel@tonic-gate 		 * current mblk, the uio_t has already been updated by
3373*0Sstevel@tonic-gate 		 * the appropriate uio routine, so also update the mblk
3374*0Sstevel@tonic-gate 		 * to reflect this in case this same mblk chain is used
3375*0Sstevel@tonic-gate 		 * again (after the fault has been handled).
3376*0Sstevel@tonic-gate 		 */
3377*0Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
3378*0Sstevel@tonic-gate 		if (uiocnt >= resid)
3379*0Sstevel@tonic-gate 			dbp->db_cksumstuff += resid;
3380*0Sstevel@tonic-gate 	}
3381*0Sstevel@tonic-gate 	return (error);
3382*0Sstevel@tonic-gate }
3383*0Sstevel@tonic-gate 
3384*0Sstevel@tonic-gate /*
3385*0Sstevel@tonic-gate  * Try to enter queue synchronously. Any attempt to enter a closing queue will
3386*0Sstevel@tonic-gate  * fails. The qp->q_rwcnt keeps track of the number of successful entries so
3387*0Sstevel@tonic-gate  * that removeq() will not try to close the queue while a thread is inside the
3388*0Sstevel@tonic-gate  * queue.
3389*0Sstevel@tonic-gate  */
3390*0Sstevel@tonic-gate static boolean_t
3391*0Sstevel@tonic-gate rwnext_enter(queue_t *qp)
3392*0Sstevel@tonic-gate {
3393*0Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
3394*0Sstevel@tonic-gate 	if (qp->q_flag & QWCLOSE) {
3395*0Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
3396*0Sstevel@tonic-gate 		return (B_FALSE);
3397*0Sstevel@tonic-gate 	}
3398*0Sstevel@tonic-gate 	qp->q_rwcnt++;
3399*0Sstevel@tonic-gate 	ASSERT(qp->q_rwcnt != 0);
3400*0Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
3401*0Sstevel@tonic-gate 	return (B_TRUE);
3402*0Sstevel@tonic-gate }
3403*0Sstevel@tonic-gate 
3404*0Sstevel@tonic-gate /*
3405*0Sstevel@tonic-gate  * Decrease the count of threads running in sync stream queue and wake up any
3406*0Sstevel@tonic-gate  * threads blocked in removeq().
3407*0Sstevel@tonic-gate  */
3408*0Sstevel@tonic-gate static void
3409*0Sstevel@tonic-gate rwnext_exit(queue_t *qp)
3410*0Sstevel@tonic-gate {
3411*0Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
3412*0Sstevel@tonic-gate 	qp->q_rwcnt--;
3413*0Sstevel@tonic-gate 	if (qp->q_flag & QWANTRMQSYNC) {
3414*0Sstevel@tonic-gate 		qp->q_flag &= ~QWANTRMQSYNC;
3415*0Sstevel@tonic-gate 		cv_broadcast(&qp->q_wait);
3416*0Sstevel@tonic-gate 	}
3417*0Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
3418*0Sstevel@tonic-gate }
3419*0Sstevel@tonic-gate 
3420*0Sstevel@tonic-gate /*
3421*0Sstevel@tonic-gate  * The purpose of rwnext() is to call the rw procedure of the next
3422*0Sstevel@tonic-gate  * (downstream) modules queue.
3423*0Sstevel@tonic-gate  *
3424*0Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
3425*0Sstevel@tonic-gate  *
3426*0Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
3427*0Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue sq_count is incremented and it does
3428*0Sstevel@tonic-gate  * not matter if any regular put entrypoints have been already entered. We
3429*0Sstevel@tonic-gate  * can't increment one of the sq_putcounts (instead of sq_count) because
3430*0Sstevel@tonic-gate  * qwait_rw won't know which counter to decrement.
3431*0Sstevel@tonic-gate  *
3432*0Sstevel@tonic-gate  * It would be reasonable to add the lockless FASTPUT logic.
3433*0Sstevel@tonic-gate  */
3434*0Sstevel@tonic-gate int
3435*0Sstevel@tonic-gate rwnext(queue_t *qp, struiod_t *dp)
3436*0Sstevel@tonic-gate {
3437*0Sstevel@tonic-gate 	queue_t		*nqp;
3438*0Sstevel@tonic-gate 	syncq_t		*sq;
3439*0Sstevel@tonic-gate 	uint16_t	count;
3440*0Sstevel@tonic-gate 	uint16_t	flags;
3441*0Sstevel@tonic-gate 	struct qinit	*qi;
3442*0Sstevel@tonic-gate 	int		(*proc)();
3443*0Sstevel@tonic-gate 	struct stdata	*stp;
3444*0Sstevel@tonic-gate 	int		isread;
3445*0Sstevel@tonic-gate 	int		rval;
3446*0Sstevel@tonic-gate 
3447*0Sstevel@tonic-gate 	stp = STREAM(qp);
3448*0Sstevel@tonic-gate 	/*
3449*0Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until acquiring
3450*0Sstevel@tonic-gate 	 * SQLOCK. Note that a read-side rwnext from the streamhead will
3451*0Sstevel@tonic-gate 	 * already have sd_lock acquired. In either case sd_lock is always
3452*0Sstevel@tonic-gate 	 * released after acquiring SQLOCK.
3453*0Sstevel@tonic-gate 	 *
3454*0Sstevel@tonic-gate 	 * The streamhead read-side holding sd_lock when calling rwnext is
3455*0Sstevel@tonic-gate 	 * required to prevent a race condition were M_DATA mblks flowing
3456*0Sstevel@tonic-gate 	 * up the read-side of the stream could be bypassed by a rwnext()
3457*0Sstevel@tonic-gate 	 * down-call. In this case sd_lock acts as the streamhead perimeter.
3458*0Sstevel@tonic-gate 	 */
3459*0Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
3460*0Sstevel@tonic-gate 		isread = 0;
3461*0Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
3462*0Sstevel@tonic-gate 		qp = nqp->q_next;
3463*0Sstevel@tonic-gate 	} else {
3464*0Sstevel@tonic-gate 		isread = 1;
3465*0Sstevel@tonic-gate 		if (nqp != stp->sd_wrq)
3466*0Sstevel@tonic-gate 			/* Not streamhead */
3467*0Sstevel@tonic-gate 			mutex_enter(&stp->sd_lock);
3468*0Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
3469*0Sstevel@tonic-gate 	}
3470*0Sstevel@tonic-gate 	qi = qp->q_qinfo;
3471*0Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_rwp)) {
3472*0Sstevel@tonic-gate 		/*
3473*0Sstevel@tonic-gate 		 * Not a synchronous module or no r/w procedure for this
3474*0Sstevel@tonic-gate 		 * queue, so just return EINVAL and let the caller handle it.
3475*0Sstevel@tonic-gate 		 */
3476*0Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3477*0Sstevel@tonic-gate 		return (EINVAL);
3478*0Sstevel@tonic-gate 	}
3479*0Sstevel@tonic-gate 
3480*0Sstevel@tonic-gate 	if (rwnext_enter(qp) == B_FALSE) {
3481*0Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3482*0Sstevel@tonic-gate 		return (EINVAL);
3483*0Sstevel@tonic-gate 	}
3484*0Sstevel@tonic-gate 
3485*0Sstevel@tonic-gate 	sq = qp->q_syncq;
3486*0Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
3487*0Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
3488*0Sstevel@tonic-gate 	count = sq->sq_count;
3489*0Sstevel@tonic-gate 	flags = sq->sq_flags;
3490*0Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
3491*0Sstevel@tonic-gate 
3492*0Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
3493*0Sstevel@tonic-gate 		/*
3494*0Sstevel@tonic-gate 		 * if this queue is being closed, return.
3495*0Sstevel@tonic-gate 		 */
3496*0Sstevel@tonic-gate 		if (qp->q_flag & QWCLOSE) {
3497*0Sstevel@tonic-gate 			mutex_exit(SQLOCK(sq));
3498*0Sstevel@tonic-gate 			rwnext_exit(qp);
3499*0Sstevel@tonic-gate 			return (EINVAL);
3500*0Sstevel@tonic-gate 		}
3501*0Sstevel@tonic-gate 
3502*0Sstevel@tonic-gate 		/*
3503*0Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
3504*0Sstevel@tonic-gate 		 */
3505*0Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
3506*0Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
3507*0Sstevel@tonic-gate 		count = sq->sq_count;
3508*0Sstevel@tonic-gate 		flags = sq->sq_flags;
3509*0Sstevel@tonic-gate 	}
3510*0Sstevel@tonic-gate 
3511*0Sstevel@tonic-gate 	if (isread == 0 && stp->sd_struiowrq == NULL ||
3512*0Sstevel@tonic-gate 	    isread == 1 && stp->sd_struiordq == NULL) {
3513*0Sstevel@tonic-gate 		/*
3514*0Sstevel@tonic-gate 		 * Stream plumbing changed while waiting for inner perimeter
3515*0Sstevel@tonic-gate 		 * so just return EINVAL and let the caller handle it.
3516*0Sstevel@tonic-gate 		 */
3517*0Sstevel@tonic-gate 		mutex_exit(SQLOCK(sq));
3518*0Sstevel@tonic-gate 		rwnext_exit(qp);
3519*0Sstevel@tonic-gate 		return (EINVAL);
3520*0Sstevel@tonic-gate 	}
3521*0Sstevel@tonic-gate 	if (!(flags & SQ_CIPUT))
3522*0Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
3523*0Sstevel@tonic-gate 	sq->sq_count = count + 1;
3524*0Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
3525*0Sstevel@tonic-gate 	/*
3526*0Sstevel@tonic-gate 	 * Note: The only message ordering guarantee that rwnext() makes is
3527*0Sstevel@tonic-gate 	 *	 for the write queue flow-control case. All others (r/w queue
3528*0Sstevel@tonic-gate 	 *	 with q_count > 0 (or q_first != 0)) are the resposibilty of
3529*0Sstevel@tonic-gate 	 *	 the queue's rw procedure. This could be genralized here buy
3530*0Sstevel@tonic-gate 	 *	 running the queue's service procedure, but that wouldn't be
3531*0Sstevel@tonic-gate 	 *	 the most efficent for all cases.
3532*0Sstevel@tonic-gate 	 */
3533*0Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
3534*0Sstevel@tonic-gate 	if (! isread && (qp->q_flag & QFULL)) {
3535*0Sstevel@tonic-gate 		/*
3536*0Sstevel@tonic-gate 		 * Write queue may be flow controlled. If so,
3537*0Sstevel@tonic-gate 		 * mark the queue for wakeup when it's not.
3538*0Sstevel@tonic-gate 		 */
3539*0Sstevel@tonic-gate 		mutex_enter(QLOCK(qp));
3540*0Sstevel@tonic-gate 		if (qp->q_flag & QFULL) {
3541*0Sstevel@tonic-gate 			qp->q_flag |= QWANTWSYNC;
3542*0Sstevel@tonic-gate 			mutex_exit(QLOCK(qp));
3543*0Sstevel@tonic-gate 			rval = EWOULDBLOCK;
3544*0Sstevel@tonic-gate 			goto out;
3545*0Sstevel@tonic-gate 		}
3546*0Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
3547*0Sstevel@tonic-gate 	}
3548*0Sstevel@tonic-gate 
3549*0Sstevel@tonic-gate 	if (! isread && dp->d_mp)
3550*0Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, nqp, FTEV_RWNEXT, dp->d_mp->b_rptr -
3551*0Sstevel@tonic-gate 		    dp->d_mp->b_datap->db_base);
3552*0Sstevel@tonic-gate 
3553*0Sstevel@tonic-gate 	rval = (*proc)(qp, dp);
3554*0Sstevel@tonic-gate 
3555*0Sstevel@tonic-gate 	if (isread && dp->d_mp)
3556*0Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, _RD(nqp), FTEV_RWNEXT,
3557*0Sstevel@tonic-gate 		    dp->d_mp->b_rptr - dp->d_mp->b_datap->db_base);
3558*0Sstevel@tonic-gate out:
3559*0Sstevel@tonic-gate 	/*
3560*0Sstevel@tonic-gate 	 * The queue is protected from being freed by sq_count, so it is
3561*0Sstevel@tonic-gate 	 * safe to call rwnext_exit and reacquire SQLOCK(sq).
3562*0Sstevel@tonic-gate 	 */
3563*0Sstevel@tonic-gate 	rwnext_exit(qp);
3564*0Sstevel@tonic-gate 
3565*0Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
3566*0Sstevel@tonic-gate 	flags = sq->sq_flags;
3567*0Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
3568*0Sstevel@tonic-gate 	sq->sq_count--;
3569*0Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
3570*0Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
3571*0Sstevel@tonic-gate 		/*
3572*0Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
3573*0Sstevel@tonic-gate 		 * in DEBUG kernel.
3574*0Sstevel@tonic-gate 		 */
3575*0Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
3576*0Sstevel@tonic-gate 		return (rval);
3577*0Sstevel@tonic-gate 	}
3578*0Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
3579*0Sstevel@tonic-gate 	/*
3580*0Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
3581*0Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
3582*0Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
3583*0Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
3584*0Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
3585*0Sstevel@tonic-gate 	 *
3586*0Sstevel@tonic-gate 	 * I would like to make the following assertion:
3587*0Sstevel@tonic-gate 	 *
3588*0Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
3589*0Sstevel@tonic-gate 	 * 	sq->sq_count == 0);
3590*0Sstevel@tonic-gate 	 *
3591*0Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
3592*0Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
3593*0Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
3594*0Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
3595*0Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
3596*0Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
3597*0Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
3598*0Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
3599*0Sstevel@tonic-gate 	 * test invalid.
3600*0Sstevel@tonic-gate 	 */
3601*0Sstevel@tonic-gate 
3602*0Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
3603*0Sstevel@tonic-gate 	if (sq->sq_flags & SQ_WANTWAKEUP) {
3604*0Sstevel@tonic-gate 		sq->sq_flags &= ~SQ_WANTWAKEUP;
3605*0Sstevel@tonic-gate 		cv_broadcast(&sq->sq_wait);
3606*0Sstevel@tonic-gate 	}
3607*0Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
3608*0Sstevel@tonic-gate 	return (rval);
3609*0Sstevel@tonic-gate }
3610*0Sstevel@tonic-gate 
3611*0Sstevel@tonic-gate /*
3612*0Sstevel@tonic-gate  * The purpose of infonext() is to call the info procedure of the next
3613*0Sstevel@tonic-gate  * (downstream) modules queue.
3614*0Sstevel@tonic-gate  *
3615*0Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
3616*0Sstevel@tonic-gate  *
3617*0Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
3618*0Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue regular sq_count is incremented and
3619*0Sstevel@tonic-gate  * it does not matter if any regular put entrypoints have been already
3620*0Sstevel@tonic-gate  * entered.
3621*0Sstevel@tonic-gate  */
3622*0Sstevel@tonic-gate int
3623*0Sstevel@tonic-gate infonext(queue_t *qp, infod_t *idp)
3624*0Sstevel@tonic-gate {
3625*0Sstevel@tonic-gate 	queue_t		*nqp;
3626*0Sstevel@tonic-gate 	syncq_t		*sq;
3627*0Sstevel@tonic-gate 	uint16_t	count;
3628*0Sstevel@tonic-gate 	uint16_t 	flags;
3629*0Sstevel@tonic-gate 	struct qinit	*qi;
3630*0Sstevel@tonic-gate 	int		(*proc)();
3631*0Sstevel@tonic-gate 	struct stdata	*stp;
3632*0Sstevel@tonic-gate 	int		rval;
3633*0Sstevel@tonic-gate 
3634*0Sstevel@tonic-gate 	stp = STREAM(qp);
3635*0Sstevel@tonic-gate 	/*
3636*0Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until
3637*0Sstevel@tonic-gate 	 * acquiring SQLOCK.
3638*0Sstevel@tonic-gate 	 */
3639*0Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
3640*0Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
3641*0Sstevel@tonic-gate 		qp = nqp->q_next;
3642*0Sstevel@tonic-gate 	} else {
3643*0Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
3644*0Sstevel@tonic-gate 	}
3645*0Sstevel@tonic-gate 	qi = qp->q_qinfo;
3646*0Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_infop)) {
3647*0Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3648*0Sstevel@tonic-gate 		return (EINVAL);
3649*0Sstevel@tonic-gate 	}
3650*0Sstevel@tonic-gate 	sq = qp->q_syncq;
3651*0Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
3652*0Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
3653*0Sstevel@tonic-gate 	count = sq->sq_count;
3654*0Sstevel@tonic-gate 	flags = sq->sq_flags;
3655*0Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
3656*0Sstevel@tonic-gate 
3657*0Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
3658*0Sstevel@tonic-gate 		/*
3659*0Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
3660*0Sstevel@tonic-gate 		 */
3661*0Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
3662*0Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
3663*0Sstevel@tonic-gate 		count = sq->sq_count;
3664*0Sstevel@tonic-gate 		flags = sq->sq_flags;
3665*0Sstevel@tonic-gate 	}
3666*0Sstevel@tonic-gate 
3667*0Sstevel@tonic-gate 	if (! (flags & SQ_CIPUT))
3668*0Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
3669*0Sstevel@tonic-gate 	sq->sq_count = count + 1;
3670*0Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
3671*0Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
3672*0Sstevel@tonic-gate 
3673*0Sstevel@tonic-gate 	rval = (*proc)(qp, idp);
3674*0Sstevel@tonic-gate 
3675*0Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
3676*0Sstevel@tonic-gate 	flags = sq->sq_flags;
3677*0Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
3678*0Sstevel@tonic-gate 	sq->sq_count--;
3679*0Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
3680*0Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
3681*0Sstevel@tonic-gate 		/*
3682*0Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
3683*0Sstevel@tonic-gate 		 * in DEBUG kernel.
3684*0Sstevel@tonic-gate 		 */
3685*0Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
3686*0Sstevel@tonic-gate 		return (rval);
3687*0Sstevel@tonic-gate 	}
3688*0Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
3689*0Sstevel@tonic-gate /*
3690*0Sstevel@tonic-gate  * XXXX
3691*0Sstevel@tonic-gate  * I am not certain the next comment is correct here.  I need to consider
3692*0Sstevel@tonic-gate  * why the infonext is called, and if dropping SQ_EXCL unless non-CIPUT
3693*0Sstevel@tonic-gate  * might cause other problems.  It just might be safer to drop it if
3694*0Sstevel@tonic-gate  * !SQ_CIPUT because that is when we set it.
3695*0Sstevel@tonic-gate  */
3696*0Sstevel@tonic-gate 	/*
3697*0Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
3698*0Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
3699*0Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
3700*0Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
3701*0Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
3702*0Sstevel@tonic-gate 	 *
3703*0Sstevel@tonic-gate 	 * I would like to make the following assertion:
3704*0Sstevel@tonic-gate 	 *
3705*0Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
3706*0Sstevel@tonic-gate 	 *	sq->sq_count == 0);
3707*0Sstevel@tonic-gate 	 *
3708*0Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
3709*0Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
3710*0Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
3711*0Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
3712*0Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
3713*0Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
3714*0Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
3715*0Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
3716*0Sstevel@tonic-gate 	 * test invalid.
3717*0Sstevel@tonic-gate 	 */
3718*0Sstevel@tonic-gate 
3719*0Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
3720*0Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
3721*0Sstevel@tonic-gate 	return (rval);
3722*0Sstevel@tonic-gate }
3723*0Sstevel@tonic-gate 
3724*0Sstevel@tonic-gate /*
3725*0Sstevel@tonic-gate  * Return nonzero if the queue is responsible for struio(), else return 0.
3726*0Sstevel@tonic-gate  */
3727*0Sstevel@tonic-gate int
3728*0Sstevel@tonic-gate isuioq(queue_t *q)
3729*0Sstevel@tonic-gate {
3730*0Sstevel@tonic-gate 	if (q->q_flag & QREADR)
3731*0Sstevel@tonic-gate 		return (STREAM(q)->sd_struiordq == q);
3732*0Sstevel@tonic-gate 	else
3733*0Sstevel@tonic-gate 		return (STREAM(q)->sd_struiowrq == q);
3734*0Sstevel@tonic-gate }
3735*0Sstevel@tonic-gate 
3736*0Sstevel@tonic-gate #if defined(__sparc)
3737*0Sstevel@tonic-gate int disable_putlocks = 0;
3738*0Sstevel@tonic-gate #else
3739*0Sstevel@tonic-gate int disable_putlocks = 1;
3740*0Sstevel@tonic-gate #endif
3741*0Sstevel@tonic-gate 
3742*0Sstevel@tonic-gate /*
3743*0Sstevel@tonic-gate  * called by create_putlock.
3744*0Sstevel@tonic-gate  */
3745*0Sstevel@tonic-gate static void
3746*0Sstevel@tonic-gate create_syncq_putlocks(queue_t *q)
3747*0Sstevel@tonic-gate {
3748*0Sstevel@tonic-gate 	syncq_t	*sq = q->q_syncq;
3749*0Sstevel@tonic-gate 	ciputctrl_t *cip;
3750*0Sstevel@tonic-gate 	int i;
3751*0Sstevel@tonic-gate 
3752*0Sstevel@tonic-gate 	ASSERT(sq != NULL);
3753*0Sstevel@tonic-gate 
3754*0Sstevel@tonic-gate 	ASSERT(disable_putlocks == 0);
3755*0Sstevel@tonic-gate 	ASSERT(n_ciputctrl >= min_n_ciputctrl);
3756*0Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
3757*0Sstevel@tonic-gate 
3758*0Sstevel@tonic-gate 	if (!(sq->sq_type & SQ_CIPUT))
3759*0Sstevel@tonic-gate 		return;
3760*0Sstevel@tonic-gate 
3761*0Sstevel@tonic-gate 	for (i = 0; i <= 1; i++) {
3762*0Sstevel@tonic-gate 		if (sq->sq_ciputctrl == NULL) {
3763*0Sstevel@tonic-gate 			cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
3764*0Sstevel@tonic-gate 			SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
3765*0Sstevel@tonic-gate 			mutex_enter(SQLOCK(sq));
3766*0Sstevel@tonic-gate 			if (sq->sq_ciputctrl != NULL) {
3767*0Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
3768*0Sstevel@tonic-gate 				kmem_cache_free(ciputctrl_cache, cip);
3769*0Sstevel@tonic-gate 			} else {
3770*0Sstevel@tonic-gate 				ASSERT(sq->sq_nciputctrl == 0);
3771*0Sstevel@tonic-gate 				sq->sq_nciputctrl = n_ciputctrl - 1;
3772*0Sstevel@tonic-gate 				/*
3773*0Sstevel@tonic-gate 				 * putnext checks sq_ciputctrl without holding
3774*0Sstevel@tonic-gate 				 * SQLOCK. if it is not NULL putnext assumes
3775*0Sstevel@tonic-gate 				 * sq_nciputctrl is initialized. membar below
3776*0Sstevel@tonic-gate 				 * insures that.
3777*0Sstevel@tonic-gate 				 */
3778*0Sstevel@tonic-gate 				membar_producer();
3779*0Sstevel@tonic-gate 				sq->sq_ciputctrl = cip;
3780*0Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
3781*0Sstevel@tonic-gate 			}
3782*0Sstevel@tonic-gate 		}
3783*0Sstevel@tonic-gate 		ASSERT(sq->sq_nciputctrl == n_ciputctrl - 1);
3784*0Sstevel@tonic-gate 		if (i == 1)
3785*0Sstevel@tonic-gate 			break;
3786*0Sstevel@tonic-gate 		q = _OTHERQ(q);
3787*0Sstevel@tonic-gate 		if (!(q->q_flag & QPERQ)) {
3788*0Sstevel@tonic-gate 			ASSERT(sq == q->q_syncq);
3789*0Sstevel@tonic-gate 			break;
3790*0Sstevel@tonic-gate 		}
3791*0Sstevel@tonic-gate 		ASSERT(q->q_syncq != NULL);
3792*0Sstevel@tonic-gate 		ASSERT(sq != q->q_syncq);
3793*0Sstevel@tonic-gate 		sq = q->q_syncq;
3794*0Sstevel@tonic-gate 		ASSERT(sq->sq_type & SQ_CIPUT);
3795*0Sstevel@tonic-gate 	}
3796*0Sstevel@tonic-gate }
3797*0Sstevel@tonic-gate 
3798*0Sstevel@tonic-gate /*
3799*0Sstevel@tonic-gate  * If stream argument is 0 only create per cpu sq_putlocks/sq_putcounts for
3800*0Sstevel@tonic-gate  * syncq of q. If stream argument is not 0 create per cpu stream_putlocks for
3801*0Sstevel@tonic-gate  * the stream of q and per cpu sq_putlocks/sq_putcounts for all syncq's
3802*0Sstevel@tonic-gate  * starting from q and down to the driver.
3803*0Sstevel@tonic-gate  *
3804*0Sstevel@tonic-gate  * This should be called after the affected queues are part of stream
3805*0Sstevel@tonic-gate  * geometry. It should be called from driver/module open routine after
3806*0Sstevel@tonic-gate  * qprocson() call. It is also called from nfs syscall where it is known that
3807*0Sstevel@tonic-gate  * stream is configured and won't change its geometry during create_putlock
3808*0Sstevel@tonic-gate  * call.
3809*0Sstevel@tonic-gate  *
3810*0Sstevel@tonic-gate  * caller normally uses 0 value for the stream argument to speed up MT putnext
3811*0Sstevel@tonic-gate  * into the perimeter of q for example because its perimeter is per module
3812*0Sstevel@tonic-gate  * (e.g. IP).
3813*0Sstevel@tonic-gate  *
3814*0Sstevel@tonic-gate  * caller normally uses non 0 value for the stream argument to hint the system
3815*0Sstevel@tonic-gate  * that the stream of q is a very contended global system stream
3816*0Sstevel@tonic-gate  * (e.g. NFS/UDP) and the part of the stream from q to the driver is
3817*0Sstevel@tonic-gate  * particularly MT hot.
3818*0Sstevel@tonic-gate  *
3819*0Sstevel@tonic-gate  * Caller insures stream plumbing won't happen while we are here and therefore
3820*0Sstevel@tonic-gate  * q_next can be safely used.
3821*0Sstevel@tonic-gate  */
3822*0Sstevel@tonic-gate 
3823*0Sstevel@tonic-gate void
3824*0Sstevel@tonic-gate create_putlocks(queue_t *q, int stream)
3825*0Sstevel@tonic-gate {
3826*0Sstevel@tonic-gate 	ciputctrl_t	*cip;
3827*0Sstevel@tonic-gate 	struct stdata	*stp = STREAM(q);
3828*0Sstevel@tonic-gate 
3829*0Sstevel@tonic-gate 	q = _WR(q);
3830*0Sstevel@tonic-gate 	ASSERT(stp != NULL);
3831*0Sstevel@tonic-gate 
3832*0Sstevel@tonic-gate 	if (disable_putlocks != 0)
3833*0Sstevel@tonic-gate 		return;
3834*0Sstevel@tonic-gate 
3835*0Sstevel@tonic-gate 	if (n_ciputctrl < min_n_ciputctrl)
3836*0Sstevel@tonic-gate 		return;
3837*0Sstevel@tonic-gate 
3838*0Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
3839*0Sstevel@tonic-gate 
3840*0Sstevel@tonic-gate 	if (stream != 0 && stp->sd_ciputctrl == NULL) {
3841*0Sstevel@tonic-gate 		cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
3842*0Sstevel@tonic-gate 		SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
3843*0Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
3844*0Sstevel@tonic-gate 		if (stp->sd_ciputctrl != NULL) {
3845*0Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
3846*0Sstevel@tonic-gate 			kmem_cache_free(ciputctrl_cache, cip);
3847*0Sstevel@tonic-gate 		} else {
3848*0Sstevel@tonic-gate 			ASSERT(stp->sd_nciputctrl == 0);
3849*0Sstevel@tonic-gate 			stp->sd_nciputctrl = n_ciputctrl - 1;
3850*0Sstevel@tonic-gate 			/*
3851*0Sstevel@tonic-gate 			 * putnext checks sd_ciputctrl without holding
3852*0Sstevel@tonic-gate 			 * sd_lock. if it is not NULL putnext assumes
3853*0Sstevel@tonic-gate 			 * sd_nciputctrl is initialized. membar below
3854*0Sstevel@tonic-gate 			 * insures that.
3855*0Sstevel@tonic-gate 			 */
3856*0Sstevel@tonic-gate 			membar_producer();
3857*0Sstevel@tonic-gate 			stp->sd_ciputctrl = cip;
3858*0Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
3859*0Sstevel@tonic-gate 		}
3860*0Sstevel@tonic-gate 	}
3861*0Sstevel@tonic-gate 
3862*0Sstevel@tonic-gate 	ASSERT(stream == 0 || stp->sd_nciputctrl == n_ciputctrl - 1);
3863*0Sstevel@tonic-gate 
3864*0Sstevel@tonic-gate 	while (_SAMESTR(q)) {
3865*0Sstevel@tonic-gate 		create_syncq_putlocks(q);
3866*0Sstevel@tonic-gate 		if (stream == 0)
3867*0Sstevel@tonic-gate 			return;
3868*0Sstevel@tonic-gate 		q = q->q_next;
3869*0Sstevel@tonic-gate 	}
3870*0Sstevel@tonic-gate 	ASSERT(q != NULL);
3871*0Sstevel@tonic-gate 	create_syncq_putlocks(q);
3872*0Sstevel@tonic-gate }
3873*0Sstevel@tonic-gate 
3874*0Sstevel@tonic-gate /*
3875*0Sstevel@tonic-gate  * STREAMS Flow Trace - record STREAMS Flow Trace events as an mblk flows
3876*0Sstevel@tonic-gate  * through a stream.
3877*0Sstevel@tonic-gate  *
3878*0Sstevel@tonic-gate  * Data currently record per event is a hrtime stamp, queue address, event
3879*0Sstevel@tonic-gate  * type, and a per type datum.  Much of the STREAMS framework is instrumented
3880*0Sstevel@tonic-gate  * for automatic flow tracing (when enabled).  Events can be defined and used
3881*0Sstevel@tonic-gate  * by STREAMS modules and drivers.
3882*0Sstevel@tonic-gate  *
3883*0Sstevel@tonic-gate  * Global objects:
3884*0Sstevel@tonic-gate  *
3885*0Sstevel@tonic-gate  *	str_ftevent() - Add a flow-trace event to a dblk.
3886*0Sstevel@tonic-gate  *	str_ftfree() - Free flow-trace data
3887*0Sstevel@tonic-gate  *
3888*0Sstevel@tonic-gate  * Local objects:
3889*0Sstevel@tonic-gate  *
3890*0Sstevel@tonic-gate  *	fthdr_cache - pointer to the kmem cache for trace header.
3891*0Sstevel@tonic-gate  *	ftblk_cache - pointer to the kmem cache for trace data blocks.
3892*0Sstevel@tonic-gate  */
3893*0Sstevel@tonic-gate 
3894*0Sstevel@tonic-gate int str_ftnever = 1;	/* Don't do STREAMS flow tracing */
3895*0Sstevel@tonic-gate 
3896*0Sstevel@tonic-gate void
3897*0Sstevel@tonic-gate str_ftevent(fthdr_t *hp, void *p, ushort_t evnt, ushort_t data)
3898*0Sstevel@tonic-gate {
3899*0Sstevel@tonic-gate 	ftblk_t *bp = hp->tail;
3900*0Sstevel@tonic-gate 	ftblk_t *nbp;
3901*0Sstevel@tonic-gate 	ftevnt_t *ep;
3902*0Sstevel@tonic-gate 	int ix, nix;
3903*0Sstevel@tonic-gate 
3904*0Sstevel@tonic-gate 	ASSERT(hp != NULL);
3905*0Sstevel@tonic-gate 
3906*0Sstevel@tonic-gate 	for (;;) {
3907*0Sstevel@tonic-gate 		if ((ix = bp->ix) == FTBLK_EVNTS) {
3908*0Sstevel@tonic-gate 			/*
3909*0Sstevel@tonic-gate 			 * Tail doesn't have room, so need a new tail.
3910*0Sstevel@tonic-gate 			 *
3911*0Sstevel@tonic-gate 			 * To make this MT safe, first, allocate a new
3912*0Sstevel@tonic-gate 			 * ftblk, and initialize it.  To make life a
3913*0Sstevel@tonic-gate 			 * little easier, reserve the first slot (mostly
3914*0Sstevel@tonic-gate 			 * by making ix = 1).  When we are finished with
3915*0Sstevel@tonic-gate 			 * the initialization, CAS this pointer to the
3916*0Sstevel@tonic-gate 			 * tail.  If this succeeds, this is the new
3917*0Sstevel@tonic-gate 			 * "next" block.  Otherwise, another thread
3918*0Sstevel@tonic-gate 			 * got here first, so free the block and start
3919*0Sstevel@tonic-gate 			 * again.
3920*0Sstevel@tonic-gate 			 */
3921*0Sstevel@tonic-gate 			if (!(nbp = kmem_cache_alloc(ftblk_cache,
3922*0Sstevel@tonic-gate 			    KM_NOSLEEP))) {
3923*0Sstevel@tonic-gate 				/* no mem, so punt */
3924*0Sstevel@tonic-gate 				str_ftnever++;
3925*0Sstevel@tonic-gate 				/* free up all flow data? */
3926*0Sstevel@tonic-gate 				return;
3927*0Sstevel@tonic-gate 			}
3928*0Sstevel@tonic-gate 			nbp->nxt = NULL;
3929*0Sstevel@tonic-gate 			nbp->ix = 1;
3930*0Sstevel@tonic-gate 			/*
3931*0Sstevel@tonic-gate 			 * Just in case there is another thread about
3932*0Sstevel@tonic-gate 			 * to get the next index, we need to make sure
3933*0Sstevel@tonic-gate 			 * the value is there for it.
3934*0Sstevel@tonic-gate 			 */
3935*0Sstevel@tonic-gate 			membar_producer();
3936*0Sstevel@tonic-gate 			if (casptr(&hp->tail, bp, nbp) == bp) {
3937*0Sstevel@tonic-gate 				/* CAS was successful */
3938*0Sstevel@tonic-gate 				bp->nxt = nbp;
3939*0Sstevel@tonic-gate 				membar_producer();
3940*0Sstevel@tonic-gate 				bp = nbp;
3941*0Sstevel@tonic-gate 				ix = 0;
3942*0Sstevel@tonic-gate 				goto cas_good;
3943*0Sstevel@tonic-gate 			} else {
3944*0Sstevel@tonic-gate 				kmem_cache_free(ftblk_cache, nbp);
3945*0Sstevel@tonic-gate 				bp = hp->tail;
3946*0Sstevel@tonic-gate 				continue;
3947*0Sstevel@tonic-gate 			}
3948*0Sstevel@tonic-gate 		}
3949*0Sstevel@tonic-gate 		nix = ix + 1;
3950*0Sstevel@tonic-gate 		if (cas32((uint32_t *)&bp->ix, ix, nix) == ix) {
3951*0Sstevel@tonic-gate 		cas_good:
3952*0Sstevel@tonic-gate 			if (curthread != hp->thread) {
3953*0Sstevel@tonic-gate 				hp->thread = curthread;
3954*0Sstevel@tonic-gate 				evnt |= FTEV_CS;
3955*0Sstevel@tonic-gate 			}
3956*0Sstevel@tonic-gate 			if (CPU->cpu_seqid != hp->cpu_seqid) {
3957*0Sstevel@tonic-gate 				hp->cpu_seqid = CPU->cpu_seqid;
3958*0Sstevel@tonic-gate 				evnt |= FTEV_PS;
3959*0Sstevel@tonic-gate 			}
3960*0Sstevel@tonic-gate 			ep = &bp->ev[ix];
3961*0Sstevel@tonic-gate 			break;
3962*0Sstevel@tonic-gate 		}
3963*0Sstevel@tonic-gate 	}
3964*0Sstevel@tonic-gate 
3965*0Sstevel@tonic-gate 	if (evnt & FTEV_QMASK) {
3966*0Sstevel@tonic-gate 		queue_t *qp = p;
3967*0Sstevel@tonic-gate 
3968*0Sstevel@tonic-gate 		/*
3969*0Sstevel@tonic-gate 		 * It is possible that the module info is broke
3970*0Sstevel@tonic-gate 		 * (as is logsubr.c at this comment writing).
3971*0Sstevel@tonic-gate 		 * Instead of panicing or doing other unmentionables,
3972*0Sstevel@tonic-gate 		 * we shall put a dummy name as the mid, and continue.
3973*0Sstevel@tonic-gate 		 */
3974*0Sstevel@tonic-gate 		if (qp->q_qinfo == NULL)
3975*0Sstevel@tonic-gate 			ep->mid = "NONAME";
3976*0Sstevel@tonic-gate 		else
3977*0Sstevel@tonic-gate 			ep->mid = qp->q_qinfo->qi_minfo->mi_idname;
3978*0Sstevel@tonic-gate 
3979*0Sstevel@tonic-gate 		if (!(qp->q_flag & QREADR))
3980*0Sstevel@tonic-gate 			evnt |= FTEV_ISWR;
3981*0Sstevel@tonic-gate 	} else {
3982*0Sstevel@tonic-gate 		ep->mid = (char *)p;
3983*0Sstevel@tonic-gate 	}
3984*0Sstevel@tonic-gate 
3985*0Sstevel@tonic-gate 	ep->ts = gethrtime();
3986*0Sstevel@tonic-gate 	ep->evnt = evnt;
3987*0Sstevel@tonic-gate 	ep->data = data;
3988*0Sstevel@tonic-gate 	hp->hash = (hp->hash << 9) + hp->hash;
3989*0Sstevel@tonic-gate 	hp->hash += (evnt << 16) | data;
3990*0Sstevel@tonic-gate 	hp->hash += (uintptr_t)ep->mid;
3991*0Sstevel@tonic-gate }
3992*0Sstevel@tonic-gate 
3993*0Sstevel@tonic-gate /*
3994*0Sstevel@tonic-gate  * Free flow-trace data.
3995*0Sstevel@tonic-gate  */
3996*0Sstevel@tonic-gate void
3997*0Sstevel@tonic-gate str_ftfree(dblk_t *dbp)
3998*0Sstevel@tonic-gate {
3999*0Sstevel@tonic-gate 	fthdr_t *hp = dbp->db_fthdr;
4000*0Sstevel@tonic-gate 	ftblk_t *bp = &hp->first;
4001*0Sstevel@tonic-gate 	ftblk_t *nbp;
4002*0Sstevel@tonic-gate 
4003*0Sstevel@tonic-gate 	if (bp != hp->tail || bp->ix != 0) {
4004*0Sstevel@tonic-gate 		/*
4005*0Sstevel@tonic-gate 		 * Clear out the hash, have the tail point to itself, and free
4006*0Sstevel@tonic-gate 		 * any continuation blocks.
4007*0Sstevel@tonic-gate 		 */
4008*0Sstevel@tonic-gate 		bp = hp->first.nxt;
4009*0Sstevel@tonic-gate 		hp->tail = &hp->first;
4010*0Sstevel@tonic-gate 		hp->hash = 0;
4011*0Sstevel@tonic-gate 		hp->first.nxt = NULL;
4012*0Sstevel@tonic-gate 		hp->first.ix = 0;
4013*0Sstevel@tonic-gate 		while (bp != NULL) {
4014*0Sstevel@tonic-gate 			nbp = bp->nxt;
4015*0Sstevel@tonic-gate 			kmem_cache_free(ftblk_cache, bp);
4016*0Sstevel@tonic-gate 			bp = nbp;
4017*0Sstevel@tonic-gate 		}
4018*0Sstevel@tonic-gate 	}
4019*0Sstevel@tonic-gate 	kmem_cache_free(fthdr_cache, hp);
4020*0Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
4021*0Sstevel@tonic-gate }
4022