10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52958Sdr146992 * Common Development and Distribution License (the "License").
62958Sdr146992 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
220Sstevel@tonic-gate /* All Rights Reserved */
230Sstevel@tonic-gate
240Sstevel@tonic-gate /*
258752SPeter.Memishian@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
260Sstevel@tonic-gate * Use is subject to license terms.
270Sstevel@tonic-gate */
280Sstevel@tonic-gate
290Sstevel@tonic-gate #include <sys/types.h>
300Sstevel@tonic-gate #include <sys/param.h>
310Sstevel@tonic-gate #include <sys/thread.h>
320Sstevel@tonic-gate #include <sys/sysmacros.h>
330Sstevel@tonic-gate #include <sys/stropts.h>
340Sstevel@tonic-gate #include <sys/stream.h>
350Sstevel@tonic-gate #include <sys/strsubr.h>
360Sstevel@tonic-gate #include <sys/strsun.h>
370Sstevel@tonic-gate #include <sys/conf.h>
380Sstevel@tonic-gate #include <sys/debug.h>
390Sstevel@tonic-gate #include <sys/cmn_err.h>
400Sstevel@tonic-gate #include <sys/kmem.h>
410Sstevel@tonic-gate #include <sys/atomic.h>
420Sstevel@tonic-gate #include <sys/errno.h>
430Sstevel@tonic-gate #include <sys/vtrace.h>
440Sstevel@tonic-gate #include <sys/ftrace.h>
450Sstevel@tonic-gate #include <sys/ontrap.h>
460Sstevel@tonic-gate #include <sys/multidata.h>
470Sstevel@tonic-gate #include <sys/multidata_impl.h>
480Sstevel@tonic-gate #include <sys/sdt.h>
491110Smeem #include <sys/strft.h>
500Sstevel@tonic-gate
510Sstevel@tonic-gate #ifdef DEBUG
520Sstevel@tonic-gate #include <sys/kmem_impl.h>
530Sstevel@tonic-gate #endif
540Sstevel@tonic-gate
550Sstevel@tonic-gate /*
560Sstevel@tonic-gate * This file contains all the STREAMS utility routines that may
570Sstevel@tonic-gate * be used by modules and drivers.
580Sstevel@tonic-gate */
590Sstevel@tonic-gate
600Sstevel@tonic-gate /*
610Sstevel@tonic-gate * STREAMS message allocator: principles of operation
620Sstevel@tonic-gate *
630Sstevel@tonic-gate * The streams message allocator consists of all the routines that
640Sstevel@tonic-gate * allocate, dup and free streams messages: allocb(), [d]esballoc[a],
650Sstevel@tonic-gate * dupb(), freeb() and freemsg(). What follows is a high-level view
660Sstevel@tonic-gate * of how the allocator works.
670Sstevel@tonic-gate *
680Sstevel@tonic-gate * Every streams message consists of one or more mblks, a dblk, and data.
690Sstevel@tonic-gate * All mblks for all types of messages come from a common mblk_cache.
700Sstevel@tonic-gate * The dblk and data come in several flavors, depending on how the
710Sstevel@tonic-gate * message is allocated:
720Sstevel@tonic-gate *
730Sstevel@tonic-gate * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of
740Sstevel@tonic-gate * fixed-size dblk/data caches. For message sizes that are multiples of
750Sstevel@tonic-gate * PAGESIZE, dblks are allocated separately from the buffer.
760Sstevel@tonic-gate * The associated buffer is allocated by the constructor using kmem_alloc().
770Sstevel@tonic-gate * For all other message sizes, dblk and its associated data is allocated
780Sstevel@tonic-gate * as a single contiguous chunk of memory.
790Sstevel@tonic-gate * Objects in these caches consist of a dblk plus its associated data.
800Sstevel@tonic-gate * allocb() determines the nearest-size cache by table lookup:
810Sstevel@tonic-gate * the dblk_cache[] array provides the mapping from size to dblk cache.
820Sstevel@tonic-gate *
830Sstevel@tonic-gate * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by
840Sstevel@tonic-gate * kmem_alloc()'ing a buffer for the data and supplying that
850Sstevel@tonic-gate * buffer to gesballoc(), described below.
860Sstevel@tonic-gate *
870Sstevel@tonic-gate * (3) The four flavors of [d]esballoc[a] are all implemented by a
880Sstevel@tonic-gate * common routine, gesballoc() ("generic esballoc"). gesballoc()
890Sstevel@tonic-gate * allocates a dblk from the global dblk_esb_cache and sets db_base,
900Sstevel@tonic-gate * db_lim and db_frtnp to describe the caller-supplied buffer.
910Sstevel@tonic-gate *
920Sstevel@tonic-gate * While there are several routines to allocate messages, there is only
930Sstevel@tonic-gate * one routine to free messages: freeb(). freeb() simply invokes the
940Sstevel@tonic-gate * dblk's free method, dbp->db_free(), which is set at allocation time.
950Sstevel@tonic-gate *
960Sstevel@tonic-gate * dupb() creates a new reference to a message by allocating a new mblk,
970Sstevel@tonic-gate * incrementing the dblk reference count and setting the dblk's free
980Sstevel@tonic-gate * method to dblk_decref(). The dblk's original free method is retained
990Sstevel@tonic-gate * in db_lastfree. dblk_decref() decrements the reference count on each
1000Sstevel@tonic-gate * freeb(). If this is not the last reference it just frees the mblk;
1010Sstevel@tonic-gate * if this *is* the last reference, it restores db_free to db_lastfree,
1020Sstevel@tonic-gate * sets db_mblk to the current mblk (see below), and invokes db_lastfree.
1030Sstevel@tonic-gate *
1040Sstevel@tonic-gate * The implementation makes aggressive use of kmem object caching for
1050Sstevel@tonic-gate * maximum performance. This makes the code simple and compact, but
1060Sstevel@tonic-gate * also a bit abstruse in some places. The invariants that constitute a
1070Sstevel@tonic-gate * message's constructed state, described below, are more subtle than usual.
1080Sstevel@tonic-gate *
1090Sstevel@tonic-gate * Every dblk has an "attached mblk" as part of its constructed state.
1100Sstevel@tonic-gate * The mblk is allocated by the dblk's constructor and remains attached
1110Sstevel@tonic-gate * until the message is either dup'ed or pulled up. In the dupb() case
1120Sstevel@tonic-gate * the mblk association doesn't matter until the last free, at which time
1130Sstevel@tonic-gate * dblk_decref() attaches the last mblk to the dblk. pullupmsg() affects
1140Sstevel@tonic-gate * the mblk association because it swaps the leading mblks of two messages,
1150Sstevel@tonic-gate * so it is responsible for swapping their db_mblk pointers accordingly.
1160Sstevel@tonic-gate * From a constructed-state viewpoint it doesn't matter that a dblk's
1170Sstevel@tonic-gate * attached mblk can change while the message is allocated; all that
1180Sstevel@tonic-gate * matters is that the dblk has *some* attached mblk when it's freed.
1190Sstevel@tonic-gate *
1200Sstevel@tonic-gate * The sizes of the allocb() small-message caches are not magical.
1210Sstevel@tonic-gate * They represent a good trade-off between internal and external
1220Sstevel@tonic-gate * fragmentation for current workloads. They should be reevaluated
1230Sstevel@tonic-gate * periodically, especially if allocations larger than DBLK_MAX_CACHE
1240Sstevel@tonic-gate * become common. We use 64-byte alignment so that dblks don't
1250Sstevel@tonic-gate * straddle cache lines unnecessarily.
1260Sstevel@tonic-gate */
1270Sstevel@tonic-gate #define DBLK_MAX_CACHE 73728
1280Sstevel@tonic-gate #define DBLK_CACHE_ALIGN 64
1290Sstevel@tonic-gate #define DBLK_MIN_SIZE 8
1300Sstevel@tonic-gate #define DBLK_SIZE_SHIFT 3
1310Sstevel@tonic-gate
1320Sstevel@tonic-gate #ifdef _BIG_ENDIAN
1330Sstevel@tonic-gate #define DBLK_RTFU_SHIFT(field) \
1340Sstevel@tonic-gate (8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
1350Sstevel@tonic-gate #else
1360Sstevel@tonic-gate #define DBLK_RTFU_SHIFT(field) \
1370Sstevel@tonic-gate (8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
1380Sstevel@tonic-gate #endif
1390Sstevel@tonic-gate
1400Sstevel@tonic-gate #define DBLK_RTFU(ref, type, flags, uioflag) \
1410Sstevel@tonic-gate (((ref) << DBLK_RTFU_SHIFT(db_ref)) | \
1420Sstevel@tonic-gate ((type) << DBLK_RTFU_SHIFT(db_type)) | \
1430Sstevel@tonic-gate (((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
1440Sstevel@tonic-gate ((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
1450Sstevel@tonic-gate #define DBLK_RTFU_REF_MASK (DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
1460Sstevel@tonic-gate #define DBLK_RTFU_WORD(dbp) (*((uint32_t *)&(dbp)->db_ref))
1470Sstevel@tonic-gate #define MBLK_BAND_FLAG_WORD(mp) (*((uint32_t *)&(mp)->b_band))
1480Sstevel@tonic-gate
1490Sstevel@tonic-gate static size_t dblk_sizes[] = {
1500Sstevel@tonic-gate #ifdef _LP64
1516712Stomee 16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3856,
1526712Stomee 8192, 12048, 16384, 20240, 24576, 28432, 32768, 36624,
1536712Stomee 40960, 44816, 49152, 53008, 57344, 61200, 65536, 69392,
1540Sstevel@tonic-gate #else
1556712Stomee 64, 128, 320, 576, 1088, 1536, 1984, 2624, 3904,
1566712Stomee 8192, 12096, 16384, 20288, 24576, 28480, 32768, 36672,
1576712Stomee 40960, 44864, 49152, 53056, 57344, 61248, 65536, 69440,
1580Sstevel@tonic-gate #endif
1590Sstevel@tonic-gate DBLK_MAX_CACHE, 0
1600Sstevel@tonic-gate };
1610Sstevel@tonic-gate
1620Sstevel@tonic-gate static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE];
1630Sstevel@tonic-gate static struct kmem_cache *mblk_cache;
1640Sstevel@tonic-gate static struct kmem_cache *dblk_esb_cache;
1650Sstevel@tonic-gate static struct kmem_cache *fthdr_cache;
1660Sstevel@tonic-gate static struct kmem_cache *ftblk_cache;
1670Sstevel@tonic-gate
1680Sstevel@tonic-gate static void dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1690Sstevel@tonic-gate static mblk_t *allocb_oversize(size_t size, int flags);
1700Sstevel@tonic-gate static int allocb_tryhard_fails;
1710Sstevel@tonic-gate static void frnop_func(void *arg);
1720Sstevel@tonic-gate frtn_t frnop = { frnop_func };
1730Sstevel@tonic-gate static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1740Sstevel@tonic-gate
1750Sstevel@tonic-gate static boolean_t rwnext_enter(queue_t *qp);
1760Sstevel@tonic-gate static void rwnext_exit(queue_t *qp);
1770Sstevel@tonic-gate
1780Sstevel@tonic-gate /*
1790Sstevel@tonic-gate * Patchable mblk/dblk kmem_cache flags.
1800Sstevel@tonic-gate */
1810Sstevel@tonic-gate int dblk_kmem_flags = 0;
1820Sstevel@tonic-gate int mblk_kmem_flags = 0;
1830Sstevel@tonic-gate
1840Sstevel@tonic-gate static int
dblk_constructor(void * buf,void * cdrarg,int kmflags)1850Sstevel@tonic-gate dblk_constructor(void *buf, void *cdrarg, int kmflags)
1860Sstevel@tonic-gate {
1870Sstevel@tonic-gate dblk_t *dbp = buf;
1880Sstevel@tonic-gate ssize_t msg_size = (ssize_t)cdrarg;
1890Sstevel@tonic-gate size_t index;
1900Sstevel@tonic-gate
1910Sstevel@tonic-gate ASSERT(msg_size != 0);
1920Sstevel@tonic-gate
1930Sstevel@tonic-gate index = (msg_size - 1) >> DBLK_SIZE_SHIFT;
1940Sstevel@tonic-gate
195577Smeem ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT));
1960Sstevel@tonic-gate
1970Sstevel@tonic-gate if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
1980Sstevel@tonic-gate return (-1);
1990Sstevel@tonic-gate if ((msg_size & PAGEOFFSET) == 0) {
2000Sstevel@tonic-gate dbp->db_base = kmem_alloc(msg_size, kmflags);
2010Sstevel@tonic-gate if (dbp->db_base == NULL) {
2020Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk);
2030Sstevel@tonic-gate return (-1);
2040Sstevel@tonic-gate }
2050Sstevel@tonic-gate } else {
2060Sstevel@tonic-gate dbp->db_base = (unsigned char *)&dbp[1];
2070Sstevel@tonic-gate }
2080Sstevel@tonic-gate
2090Sstevel@tonic-gate dbp->db_mblk->b_datap = dbp;
2100Sstevel@tonic-gate dbp->db_cache = dblk_cache[index];
2110Sstevel@tonic-gate dbp->db_lim = dbp->db_base + msg_size;
2120Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree = dblk_lastfree;
2130Sstevel@tonic-gate dbp->db_frtnp = NULL;
2140Sstevel@tonic-gate dbp->db_fthdr = NULL;
2150Sstevel@tonic-gate dbp->db_credp = NULL;
2160Sstevel@tonic-gate dbp->db_cpid = -1;
2170Sstevel@tonic-gate dbp->db_struioflag = 0;
2180Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0;
2190Sstevel@tonic-gate return (0);
2200Sstevel@tonic-gate }
2210Sstevel@tonic-gate
2220Sstevel@tonic-gate /*ARGSUSED*/
2230Sstevel@tonic-gate static int
dblk_esb_constructor(void * buf,void * cdrarg,int kmflags)2240Sstevel@tonic-gate dblk_esb_constructor(void *buf, void *cdrarg, int kmflags)
2250Sstevel@tonic-gate {
2260Sstevel@tonic-gate dblk_t *dbp = buf;
2270Sstevel@tonic-gate
2280Sstevel@tonic-gate if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2290Sstevel@tonic-gate return (-1);
2300Sstevel@tonic-gate dbp->db_mblk->b_datap = dbp;
2310Sstevel@tonic-gate dbp->db_cache = dblk_esb_cache;
2320Sstevel@tonic-gate dbp->db_fthdr = NULL;
2330Sstevel@tonic-gate dbp->db_credp = NULL;
2340Sstevel@tonic-gate dbp->db_cpid = -1;
2350Sstevel@tonic-gate dbp->db_struioflag = 0;
2360Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0;
2370Sstevel@tonic-gate return (0);
2380Sstevel@tonic-gate }
2390Sstevel@tonic-gate
2400Sstevel@tonic-gate static int
bcache_dblk_constructor(void * buf,void * cdrarg,int kmflags)2410Sstevel@tonic-gate bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags)
2420Sstevel@tonic-gate {
2430Sstevel@tonic-gate dblk_t *dbp = buf;
2448752SPeter.Memishian@Sun.COM bcache_t *bcp = cdrarg;
2450Sstevel@tonic-gate
2460Sstevel@tonic-gate if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2470Sstevel@tonic-gate return (-1);
2480Sstevel@tonic-gate
2498752SPeter.Memishian@Sun.COM dbp->db_base = kmem_cache_alloc(bcp->buffer_cache, kmflags);
2508752SPeter.Memishian@Sun.COM if (dbp->db_base == NULL) {
2510Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk);
2520Sstevel@tonic-gate return (-1);
2530Sstevel@tonic-gate }
2540Sstevel@tonic-gate
2550Sstevel@tonic-gate dbp->db_mblk->b_datap = dbp;
2560Sstevel@tonic-gate dbp->db_cache = (void *)bcp;
2570Sstevel@tonic-gate dbp->db_lim = dbp->db_base + bcp->size;
2580Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree;
2590Sstevel@tonic-gate dbp->db_frtnp = NULL;
2600Sstevel@tonic-gate dbp->db_fthdr = NULL;
2610Sstevel@tonic-gate dbp->db_credp = NULL;
2620Sstevel@tonic-gate dbp->db_cpid = -1;
2630Sstevel@tonic-gate dbp->db_struioflag = 0;
2640Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0;
2650Sstevel@tonic-gate return (0);
2660Sstevel@tonic-gate }
2670Sstevel@tonic-gate
2680Sstevel@tonic-gate /*ARGSUSED*/
2690Sstevel@tonic-gate static void
dblk_destructor(void * buf,void * cdrarg)2700Sstevel@tonic-gate dblk_destructor(void *buf, void *cdrarg)
2710Sstevel@tonic-gate {
2720Sstevel@tonic-gate dblk_t *dbp = buf;
2730Sstevel@tonic-gate ssize_t msg_size = (ssize_t)cdrarg;
2740Sstevel@tonic-gate
2750Sstevel@tonic-gate ASSERT(dbp->db_mblk->b_datap == dbp);
2760Sstevel@tonic-gate ASSERT(msg_size != 0);
2770Sstevel@tonic-gate ASSERT(dbp->db_struioflag == 0);
2780Sstevel@tonic-gate ASSERT(dbp->db_struioun.cksum.flags == 0);
2790Sstevel@tonic-gate
2800Sstevel@tonic-gate if ((msg_size & PAGEOFFSET) == 0) {
2810Sstevel@tonic-gate kmem_free(dbp->db_base, msg_size);
2820Sstevel@tonic-gate }
2830Sstevel@tonic-gate
2840Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk);
2850Sstevel@tonic-gate }
2860Sstevel@tonic-gate
2870Sstevel@tonic-gate static void
bcache_dblk_destructor(void * buf,void * cdrarg)2880Sstevel@tonic-gate bcache_dblk_destructor(void *buf, void *cdrarg)
2890Sstevel@tonic-gate {
2900Sstevel@tonic-gate dblk_t *dbp = buf;
2918752SPeter.Memishian@Sun.COM bcache_t *bcp = cdrarg;
2920Sstevel@tonic-gate
2930Sstevel@tonic-gate kmem_cache_free(bcp->buffer_cache, dbp->db_base);
2940Sstevel@tonic-gate
2950Sstevel@tonic-gate ASSERT(dbp->db_mblk->b_datap == dbp);
2960Sstevel@tonic-gate ASSERT(dbp->db_struioflag == 0);
2970Sstevel@tonic-gate ASSERT(dbp->db_struioun.cksum.flags == 0);
2980Sstevel@tonic-gate
2990Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk);
3000Sstevel@tonic-gate }
3010Sstevel@tonic-gate
3028752SPeter.Memishian@Sun.COM /* ARGSUSED */
3038752SPeter.Memishian@Sun.COM static int
ftblk_constructor(void * buf,void * cdrarg,int kmflags)3048752SPeter.Memishian@Sun.COM ftblk_constructor(void *buf, void *cdrarg, int kmflags)
3058752SPeter.Memishian@Sun.COM {
3068752SPeter.Memishian@Sun.COM ftblk_t *fbp = buf;
3078752SPeter.Memishian@Sun.COM int i;
3088752SPeter.Memishian@Sun.COM
3098752SPeter.Memishian@Sun.COM bzero(fbp, sizeof (ftblk_t));
3108752SPeter.Memishian@Sun.COM if (str_ftstack != 0) {
3118752SPeter.Memishian@Sun.COM for (i = 0; i < FTBLK_EVNTS; i++)
3128752SPeter.Memishian@Sun.COM fbp->ev[i].stk = kmem_alloc(sizeof (ftstk_t), kmflags);
3138752SPeter.Memishian@Sun.COM }
3148752SPeter.Memishian@Sun.COM
3158752SPeter.Memishian@Sun.COM return (0);
3168752SPeter.Memishian@Sun.COM }
3178752SPeter.Memishian@Sun.COM
3188752SPeter.Memishian@Sun.COM /* ARGSUSED */
3198752SPeter.Memishian@Sun.COM static void
ftblk_destructor(void * buf,void * cdrarg)3208752SPeter.Memishian@Sun.COM ftblk_destructor(void *buf, void *cdrarg)
3218752SPeter.Memishian@Sun.COM {
3228752SPeter.Memishian@Sun.COM ftblk_t *fbp = buf;
3238752SPeter.Memishian@Sun.COM int i;
3248752SPeter.Memishian@Sun.COM
3258752SPeter.Memishian@Sun.COM if (str_ftstack != 0) {
3268752SPeter.Memishian@Sun.COM for (i = 0; i < FTBLK_EVNTS; i++) {
3278752SPeter.Memishian@Sun.COM if (fbp->ev[i].stk != NULL) {
3288752SPeter.Memishian@Sun.COM kmem_free(fbp->ev[i].stk, sizeof (ftstk_t));
3298752SPeter.Memishian@Sun.COM fbp->ev[i].stk = NULL;
3308752SPeter.Memishian@Sun.COM }
3318752SPeter.Memishian@Sun.COM }
3328752SPeter.Memishian@Sun.COM }
3338752SPeter.Memishian@Sun.COM }
3348752SPeter.Memishian@Sun.COM
3358752SPeter.Memishian@Sun.COM static int
fthdr_constructor(void * buf,void * cdrarg,int kmflags)3368752SPeter.Memishian@Sun.COM fthdr_constructor(void *buf, void *cdrarg, int kmflags)
3378752SPeter.Memishian@Sun.COM {
3388752SPeter.Memishian@Sun.COM fthdr_t *fhp = buf;
3398752SPeter.Memishian@Sun.COM
3408752SPeter.Memishian@Sun.COM return (ftblk_constructor(&fhp->first, cdrarg, kmflags));
3418752SPeter.Memishian@Sun.COM }
3428752SPeter.Memishian@Sun.COM
3438752SPeter.Memishian@Sun.COM static void
fthdr_destructor(void * buf,void * cdrarg)3448752SPeter.Memishian@Sun.COM fthdr_destructor(void *buf, void *cdrarg)
3458752SPeter.Memishian@Sun.COM {
3468752SPeter.Memishian@Sun.COM fthdr_t *fhp = buf;
3478752SPeter.Memishian@Sun.COM
3488752SPeter.Memishian@Sun.COM ftblk_destructor(&fhp->first, cdrarg);
3498752SPeter.Memishian@Sun.COM }
3508752SPeter.Memishian@Sun.COM
3510Sstevel@tonic-gate void
streams_msg_init(void)3520Sstevel@tonic-gate streams_msg_init(void)
3530Sstevel@tonic-gate {
3540Sstevel@tonic-gate char name[40];
3550Sstevel@tonic-gate size_t size;
3560Sstevel@tonic-gate size_t lastsize = DBLK_MIN_SIZE;
3570Sstevel@tonic-gate size_t *sizep;
3580Sstevel@tonic-gate struct kmem_cache *cp;
3590Sstevel@tonic-gate size_t tot_size;
3600Sstevel@tonic-gate int offset;
3610Sstevel@tonic-gate
3628752SPeter.Memishian@Sun.COM mblk_cache = kmem_cache_create("streams_mblk", sizeof (mblk_t), 32,
3638752SPeter.Memishian@Sun.COM NULL, NULL, NULL, NULL, NULL, mblk_kmem_flags);
3640Sstevel@tonic-gate
3650Sstevel@tonic-gate for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
3660Sstevel@tonic-gate
3670Sstevel@tonic-gate if ((offset = (size & PAGEOFFSET)) != 0) {
3680Sstevel@tonic-gate /*
3690Sstevel@tonic-gate * We are in the middle of a page, dblk should
3700Sstevel@tonic-gate * be allocated on the same page
3710Sstevel@tonic-gate */
3720Sstevel@tonic-gate tot_size = size + sizeof (dblk_t);
3730Sstevel@tonic-gate ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t))
3746707Sbrutus < PAGESIZE);
3750Sstevel@tonic-gate ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
3760Sstevel@tonic-gate
3770Sstevel@tonic-gate } else {
3780Sstevel@tonic-gate
3790Sstevel@tonic-gate /*
3800Sstevel@tonic-gate * buf size is multiple of page size, dblk and
3810Sstevel@tonic-gate * buffer are allocated separately.
3820Sstevel@tonic-gate */
3830Sstevel@tonic-gate
3840Sstevel@tonic-gate ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0);
3850Sstevel@tonic-gate tot_size = sizeof (dblk_t);
3860Sstevel@tonic-gate }
3870Sstevel@tonic-gate
3880Sstevel@tonic-gate (void) sprintf(name, "streams_dblk_%ld", size);
3898752SPeter.Memishian@Sun.COM cp = kmem_cache_create(name, tot_size, DBLK_CACHE_ALIGN,
3908752SPeter.Memishian@Sun.COM dblk_constructor, dblk_destructor, NULL, (void *)(size),
3918752SPeter.Memishian@Sun.COM NULL, dblk_kmem_flags);
3920Sstevel@tonic-gate
3930Sstevel@tonic-gate while (lastsize <= size) {
3940Sstevel@tonic-gate dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
3950Sstevel@tonic-gate lastsize += DBLK_MIN_SIZE;
3960Sstevel@tonic-gate }
3970Sstevel@tonic-gate }
3980Sstevel@tonic-gate
3998752SPeter.Memishian@Sun.COM dblk_esb_cache = kmem_cache_create("streams_dblk_esb", sizeof (dblk_t),
4008752SPeter.Memishian@Sun.COM DBLK_CACHE_ALIGN, dblk_esb_constructor, dblk_destructor, NULL,
4018752SPeter.Memishian@Sun.COM (void *)sizeof (dblk_t), NULL, dblk_kmem_flags);
4028752SPeter.Memishian@Sun.COM fthdr_cache = kmem_cache_create("streams_fthdr", sizeof (fthdr_t), 32,
4038752SPeter.Memishian@Sun.COM fthdr_constructor, fthdr_destructor, NULL, NULL, NULL, 0);
4048752SPeter.Memishian@Sun.COM ftblk_cache = kmem_cache_create("streams_ftblk", sizeof (ftblk_t), 32,
4058752SPeter.Memishian@Sun.COM ftblk_constructor, ftblk_destructor, NULL, NULL, NULL, 0);
4060Sstevel@tonic-gate
4070Sstevel@tonic-gate /* Initialize Multidata caches */
4080Sstevel@tonic-gate mmd_init();
4093932Sss146032
4103932Sss146032 /* initialize throttling queue for esballoc */
4113932Sss146032 esballoc_queue_init();
4120Sstevel@tonic-gate }
4130Sstevel@tonic-gate
4140Sstevel@tonic-gate /*ARGSUSED*/
4150Sstevel@tonic-gate mblk_t *
allocb(size_t size,uint_t pri)4160Sstevel@tonic-gate allocb(size_t size, uint_t pri)
4170Sstevel@tonic-gate {
4180Sstevel@tonic-gate dblk_t *dbp;
4190Sstevel@tonic-gate mblk_t *mp;
4200Sstevel@tonic-gate size_t index;
4210Sstevel@tonic-gate
4220Sstevel@tonic-gate index = (size - 1) >> DBLK_SIZE_SHIFT;
4230Sstevel@tonic-gate
4240Sstevel@tonic-gate if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
4250Sstevel@tonic-gate if (size != 0) {
4260Sstevel@tonic-gate mp = allocb_oversize(size, KM_NOSLEEP);
4270Sstevel@tonic-gate goto out;
4280Sstevel@tonic-gate }
4290Sstevel@tonic-gate index = 0;
4300Sstevel@tonic-gate }
4310Sstevel@tonic-gate
4320Sstevel@tonic-gate if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) {
4330Sstevel@tonic-gate mp = NULL;
4340Sstevel@tonic-gate goto out;
4350Sstevel@tonic-gate }
4360Sstevel@tonic-gate
4370Sstevel@tonic-gate mp = dbp->db_mblk;
4380Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
4390Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL;
4400Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base;
4410Sstevel@tonic-gate mp->b_queue = NULL;
4420Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0;
4430Sstevel@tonic-gate STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size);
4440Sstevel@tonic-gate out:
4450Sstevel@tonic-gate FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp);
4460Sstevel@tonic-gate
4470Sstevel@tonic-gate return (mp);
4480Sstevel@tonic-gate }
4490Sstevel@tonic-gate
4508778SErik.Nordmark@Sun.COM /*
4518778SErik.Nordmark@Sun.COM * Allocate an mblk taking db_credp and db_cpid from the template.
4528778SErik.Nordmark@Sun.COM * Allow the cred to be NULL.
4538778SErik.Nordmark@Sun.COM */
4540Sstevel@tonic-gate mblk_t *
allocb_tmpl(size_t size,const mblk_t * tmpl)4550Sstevel@tonic-gate allocb_tmpl(size_t size, const mblk_t *tmpl)
4560Sstevel@tonic-gate {
4570Sstevel@tonic-gate mblk_t *mp = allocb(size, 0);
4580Sstevel@tonic-gate
4590Sstevel@tonic-gate if (mp != NULL) {
4608778SErik.Nordmark@Sun.COM dblk_t *src = tmpl->b_datap;
4618778SErik.Nordmark@Sun.COM dblk_t *dst = mp->b_datap;
46210163SKen.Powell@Sun.COM cred_t *cr;
46310163SKen.Powell@Sun.COM pid_t cpid;
46410163SKen.Powell@Sun.COM
46510163SKen.Powell@Sun.COM cr = msg_getcred(tmpl, &cpid);
4660Sstevel@tonic-gate if (cr != NULL)
4678778SErik.Nordmark@Sun.COM crhold(dst->db_credp = cr);
46810163SKen.Powell@Sun.COM dst->db_cpid = cpid;
4698778SErik.Nordmark@Sun.COM dst->db_type = src->db_type;
4708778SErik.Nordmark@Sun.COM }
4718778SErik.Nordmark@Sun.COM return (mp);
4728778SErik.Nordmark@Sun.COM }
4738778SErik.Nordmark@Sun.COM
4748778SErik.Nordmark@Sun.COM mblk_t *
allocb_cred(size_t size,cred_t * cr,pid_t cpid)4758778SErik.Nordmark@Sun.COM allocb_cred(size_t size, cred_t *cr, pid_t cpid)
4768778SErik.Nordmark@Sun.COM {
4778778SErik.Nordmark@Sun.COM mblk_t *mp = allocb(size, 0);
4788778SErik.Nordmark@Sun.COM
4798778SErik.Nordmark@Sun.COM ASSERT(cr != NULL);
4808778SErik.Nordmark@Sun.COM if (mp != NULL) {
4818778SErik.Nordmark@Sun.COM dblk_t *dbp = mp->b_datap;
4828778SErik.Nordmark@Sun.COM
4838778SErik.Nordmark@Sun.COM crhold(dbp->db_credp = cr);
4848778SErik.Nordmark@Sun.COM dbp->db_cpid = cpid;
4850Sstevel@tonic-gate }
4860Sstevel@tonic-gate return (mp);
4870Sstevel@tonic-gate }
4880Sstevel@tonic-gate
4890Sstevel@tonic-gate mblk_t *
allocb_cred_wait(size_t size,uint_t flags,int * error,cred_t * cr,pid_t cpid)4908778SErik.Nordmark@Sun.COM allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr, pid_t cpid)
4910Sstevel@tonic-gate {
4928778SErik.Nordmark@Sun.COM mblk_t *mp = allocb_wait(size, 0, flags, error);
4938778SErik.Nordmark@Sun.COM
4948778SErik.Nordmark@Sun.COM ASSERT(cr != NULL);
4958778SErik.Nordmark@Sun.COM if (mp != NULL) {
4968778SErik.Nordmark@Sun.COM dblk_t *dbp = mp->b_datap;
4978778SErik.Nordmark@Sun.COM
4988778SErik.Nordmark@Sun.COM crhold(dbp->db_credp = cr);
4998778SErik.Nordmark@Sun.COM dbp->db_cpid = cpid;
5008778SErik.Nordmark@Sun.COM }
5010Sstevel@tonic-gate
5020Sstevel@tonic-gate return (mp);
5030Sstevel@tonic-gate }
5040Sstevel@tonic-gate
5058778SErik.Nordmark@Sun.COM /*
5068778SErik.Nordmark@Sun.COM * Extract the db_cred (and optionally db_cpid) from a message.
5078778SErik.Nordmark@Sun.COM * We find the first mblk which has a non-NULL db_cred and use that.
5088778SErik.Nordmark@Sun.COM * If none found we return NULL.
5098778SErik.Nordmark@Sun.COM * Does NOT get a hold on the cred.
5108778SErik.Nordmark@Sun.COM */
5118778SErik.Nordmark@Sun.COM cred_t *
msg_getcred(const mblk_t * mp,pid_t * cpidp)5128778SErik.Nordmark@Sun.COM msg_getcred(const mblk_t *mp, pid_t *cpidp)
5130Sstevel@tonic-gate {
5148778SErik.Nordmark@Sun.COM cred_t *cr = NULL;
5158778SErik.Nordmark@Sun.COM cred_t *cr2;
51610163SKen.Powell@Sun.COM mblk_t *mp2;
5178778SErik.Nordmark@Sun.COM
5188778SErik.Nordmark@Sun.COM while (mp != NULL) {
5198778SErik.Nordmark@Sun.COM dblk_t *dbp = mp->b_datap;
5208778SErik.Nordmark@Sun.COM
5218778SErik.Nordmark@Sun.COM cr = dbp->db_credp;
5228778SErik.Nordmark@Sun.COM if (cr == NULL) {
5238778SErik.Nordmark@Sun.COM mp = mp->b_cont;
5248778SErik.Nordmark@Sun.COM continue;
5258778SErik.Nordmark@Sun.COM }
5268778SErik.Nordmark@Sun.COM if (cpidp != NULL)
5278778SErik.Nordmark@Sun.COM *cpidp = dbp->db_cpid;
5288778SErik.Nordmark@Sun.COM
5298778SErik.Nordmark@Sun.COM #ifdef DEBUG
5308778SErik.Nordmark@Sun.COM /*
5318778SErik.Nordmark@Sun.COM * Normally there should at most one db_credp in a message.
5328778SErik.Nordmark@Sun.COM * But if there are multiple (as in the case of some M_IOC*
5338778SErik.Nordmark@Sun.COM * and some internal messages in TCP/IP bind logic) then
5348778SErik.Nordmark@Sun.COM * they must be identical in the normal case.
5358778SErik.Nordmark@Sun.COM * However, a socket can be shared between different uids
5368778SErik.Nordmark@Sun.COM * in which case data queued in TCP would be from different
5378778SErik.Nordmark@Sun.COM * creds. Thus we can only assert for the zoneid being the
5388778SErik.Nordmark@Sun.COM * same. Due to Multi-level Level Ports for TX, some
5398778SErik.Nordmark@Sun.COM * cred_t can have a NULL cr_zone, and we skip the comparison
5408778SErik.Nordmark@Sun.COM * in that case.
5418778SErik.Nordmark@Sun.COM */
54210163SKen.Powell@Sun.COM mp2 = mp->b_cont;
54310163SKen.Powell@Sun.COM while (mp2 != NULL) {
54410163SKen.Powell@Sun.COM cr2 = DB_CRED(mp2);
54510163SKen.Powell@Sun.COM if (cr2 != NULL) {
54610163SKen.Powell@Sun.COM DTRACE_PROBE2(msg__getcred,
54710163SKen.Powell@Sun.COM cred_t *, cr, cred_t *, cr2);
54810163SKen.Powell@Sun.COM ASSERT(crgetzoneid(cr) == crgetzoneid(cr2) ||
54910163SKen.Powell@Sun.COM crgetzone(cr) == NULL ||
55010163SKen.Powell@Sun.COM crgetzone(cr2) == NULL);
55110163SKen.Powell@Sun.COM }
55210163SKen.Powell@Sun.COM mp2 = mp2->b_cont;
5538778SErik.Nordmark@Sun.COM }
5548778SErik.Nordmark@Sun.COM #endif
5558778SErik.Nordmark@Sun.COM return (cr);
5568778SErik.Nordmark@Sun.COM }
5578778SErik.Nordmark@Sun.COM if (cpidp != NULL)
5588778SErik.Nordmark@Sun.COM *cpidp = NOPID;
5598778SErik.Nordmark@Sun.COM return (NULL);
5608778SErik.Nordmark@Sun.COM }
5618778SErik.Nordmark@Sun.COM
5628778SErik.Nordmark@Sun.COM /*
5638778SErik.Nordmark@Sun.COM * Variant of msg_getcred which, when a cred is found
5648778SErik.Nordmark@Sun.COM * 1. Returns with a hold on the cred
5658778SErik.Nordmark@Sun.COM * 2. Clears the first cred in the mblk.
5668778SErik.Nordmark@Sun.COM * This is more efficient to use than a msg_getcred() + crhold() when
5678778SErik.Nordmark@Sun.COM * the message is freed after the cred has been extracted.
5688778SErik.Nordmark@Sun.COM *
5698778SErik.Nordmark@Sun.COM * The caller is responsible for ensuring that there is no other reference
5708778SErik.Nordmark@Sun.COM * on the message since db_credp can not be cleared when there are other
5718778SErik.Nordmark@Sun.COM * references.
5728778SErik.Nordmark@Sun.COM */
5738778SErik.Nordmark@Sun.COM cred_t *
msg_extractcred(mblk_t * mp,pid_t * cpidp)5748778SErik.Nordmark@Sun.COM msg_extractcred(mblk_t *mp, pid_t *cpidp)
5758778SErik.Nordmark@Sun.COM {
5768778SErik.Nordmark@Sun.COM cred_t *cr = NULL;
5778778SErik.Nordmark@Sun.COM cred_t *cr2;
57810163SKen.Powell@Sun.COM mblk_t *mp2;
5798778SErik.Nordmark@Sun.COM
5808778SErik.Nordmark@Sun.COM while (mp != NULL) {
5818778SErik.Nordmark@Sun.COM dblk_t *dbp = mp->b_datap;
5828778SErik.Nordmark@Sun.COM
5838778SErik.Nordmark@Sun.COM cr = dbp->db_credp;
5848778SErik.Nordmark@Sun.COM if (cr == NULL) {
5858778SErik.Nordmark@Sun.COM mp = mp->b_cont;
5868778SErik.Nordmark@Sun.COM continue;
5878778SErik.Nordmark@Sun.COM }
5888778SErik.Nordmark@Sun.COM ASSERT(dbp->db_ref == 1);
5898778SErik.Nordmark@Sun.COM dbp->db_credp = NULL;
5908778SErik.Nordmark@Sun.COM if (cpidp != NULL)
5918778SErik.Nordmark@Sun.COM *cpidp = dbp->db_cpid;
5928778SErik.Nordmark@Sun.COM #ifdef DEBUG
5938778SErik.Nordmark@Sun.COM /*
5948778SErik.Nordmark@Sun.COM * Normally there should at most one db_credp in a message.
5958778SErik.Nordmark@Sun.COM * But if there are multiple (as in the case of some M_IOC*
5968778SErik.Nordmark@Sun.COM * and some internal messages in TCP/IP bind logic) then
5978778SErik.Nordmark@Sun.COM * they must be identical in the normal case.
5988778SErik.Nordmark@Sun.COM * However, a socket can be shared between different uids
5998778SErik.Nordmark@Sun.COM * in which case data queued in TCP would be from different
6008778SErik.Nordmark@Sun.COM * creds. Thus we can only assert for the zoneid being the
6018778SErik.Nordmark@Sun.COM * same. Due to Multi-level Level Ports for TX, some
6028778SErik.Nordmark@Sun.COM * cred_t can have a NULL cr_zone, and we skip the comparison
6038778SErik.Nordmark@Sun.COM * in that case.
6048778SErik.Nordmark@Sun.COM */
60510163SKen.Powell@Sun.COM mp2 = mp->b_cont;
60610163SKen.Powell@Sun.COM while (mp2 != NULL) {
60710163SKen.Powell@Sun.COM cr2 = DB_CRED(mp2);
60810163SKen.Powell@Sun.COM if (cr2 != NULL) {
60910163SKen.Powell@Sun.COM DTRACE_PROBE2(msg__extractcred,
61010163SKen.Powell@Sun.COM cred_t *, cr, cred_t *, cr2);
61110163SKen.Powell@Sun.COM ASSERT(crgetzoneid(cr) == crgetzoneid(cr2) ||
61210163SKen.Powell@Sun.COM crgetzone(cr) == NULL ||
61310163SKen.Powell@Sun.COM crgetzone(cr2) == NULL);
61410163SKen.Powell@Sun.COM }
61510163SKen.Powell@Sun.COM mp2 = mp2->b_cont;
6168778SErik.Nordmark@Sun.COM }
6178778SErik.Nordmark@Sun.COM #endif
6188778SErik.Nordmark@Sun.COM return (cr);
6198778SErik.Nordmark@Sun.COM }
6208778SErik.Nordmark@Sun.COM return (NULL);
6218778SErik.Nordmark@Sun.COM }
6228778SErik.Nordmark@Sun.COM /*
6238778SErik.Nordmark@Sun.COM * Get the label for a message. Uses the first mblk in the message
6248778SErik.Nordmark@Sun.COM * which has a non-NULL db_credp.
6258778SErik.Nordmark@Sun.COM * Returns NULL if there is no credp.
6268778SErik.Nordmark@Sun.COM */
6278778SErik.Nordmark@Sun.COM extern struct ts_label_s *
msg_getlabel(const mblk_t * mp)6288778SErik.Nordmark@Sun.COM msg_getlabel(const mblk_t *mp)
6298778SErik.Nordmark@Sun.COM {
6308778SErik.Nordmark@Sun.COM cred_t *cr = msg_getcred(mp, NULL);
6318778SErik.Nordmark@Sun.COM
6328778SErik.Nordmark@Sun.COM if (cr == NULL)
6338778SErik.Nordmark@Sun.COM return (NULL);
6348778SErik.Nordmark@Sun.COM
6358778SErik.Nordmark@Sun.COM return (crgetlabel(cr));
6360Sstevel@tonic-gate }
6370Sstevel@tonic-gate
6380Sstevel@tonic-gate void
freeb(mblk_t * mp)6390Sstevel@tonic-gate freeb(mblk_t *mp)
6400Sstevel@tonic-gate {
6410Sstevel@tonic-gate dblk_t *dbp = mp->b_datap;
6420Sstevel@tonic-gate
6430Sstevel@tonic-gate ASSERT(dbp->db_ref > 0);
6440Sstevel@tonic-gate ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
6450Sstevel@tonic-gate FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp);
6460Sstevel@tonic-gate
6470Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
6480Sstevel@tonic-gate
6490Sstevel@tonic-gate dbp->db_free(mp, dbp);
6500Sstevel@tonic-gate }
6510Sstevel@tonic-gate
6520Sstevel@tonic-gate void
freemsg(mblk_t * mp)6530Sstevel@tonic-gate freemsg(mblk_t *mp)
6540Sstevel@tonic-gate {
6550Sstevel@tonic-gate FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
6560Sstevel@tonic-gate while (mp) {
6570Sstevel@tonic-gate dblk_t *dbp = mp->b_datap;
6580Sstevel@tonic-gate mblk_t *mp_cont = mp->b_cont;
6590Sstevel@tonic-gate
6600Sstevel@tonic-gate ASSERT(dbp->db_ref > 0);
6610Sstevel@tonic-gate ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
6620Sstevel@tonic-gate
6630Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
6640Sstevel@tonic-gate
6650Sstevel@tonic-gate dbp->db_free(mp, dbp);
6660Sstevel@tonic-gate mp = mp_cont;
6670Sstevel@tonic-gate }
6680Sstevel@tonic-gate }
6690Sstevel@tonic-gate
6700Sstevel@tonic-gate /*
6710Sstevel@tonic-gate * Reallocate a block for another use. Try hard to use the old block.
6720Sstevel@tonic-gate * If the old data is wanted (copy), leave b_wptr at the end of the data,
6730Sstevel@tonic-gate * otherwise return b_wptr = b_rptr.
6740Sstevel@tonic-gate *
6750Sstevel@tonic-gate * This routine is private and unstable.
6760Sstevel@tonic-gate */
6770Sstevel@tonic-gate mblk_t *
reallocb(mblk_t * mp,size_t size,uint_t copy)6780Sstevel@tonic-gate reallocb(mblk_t *mp, size_t size, uint_t copy)
6790Sstevel@tonic-gate {
6800Sstevel@tonic-gate mblk_t *mp1;
6810Sstevel@tonic-gate unsigned char *old_rptr;
6820Sstevel@tonic-gate ptrdiff_t cur_size;
6830Sstevel@tonic-gate
6840Sstevel@tonic-gate if (mp == NULL)
6850Sstevel@tonic-gate return (allocb(size, BPRI_HI));
6860Sstevel@tonic-gate
6870Sstevel@tonic-gate cur_size = mp->b_wptr - mp->b_rptr;
6880Sstevel@tonic-gate old_rptr = mp->b_rptr;
6890Sstevel@tonic-gate
6900Sstevel@tonic-gate ASSERT(mp->b_datap->db_ref != 0);
6910Sstevel@tonic-gate
6920Sstevel@tonic-gate if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) {
6930Sstevel@tonic-gate /*
6940Sstevel@tonic-gate * If the data is wanted and it will fit where it is, no
6950Sstevel@tonic-gate * work is required.
6960Sstevel@tonic-gate */
6970Sstevel@tonic-gate if (copy && mp->b_datap->db_lim - mp->b_rptr >= size)
6980Sstevel@tonic-gate return (mp);
6990Sstevel@tonic-gate
7000Sstevel@tonic-gate mp->b_wptr = mp->b_rptr = mp->b_datap->db_base;
7010Sstevel@tonic-gate mp1 = mp;
7020Sstevel@tonic-gate } else if ((mp1 = allocb_tmpl(size, mp)) != NULL) {
7030Sstevel@tonic-gate /* XXX other mp state could be copied too, db_flags ... ? */
7040Sstevel@tonic-gate mp1->b_cont = mp->b_cont;
7050Sstevel@tonic-gate } else {
7060Sstevel@tonic-gate return (NULL);
7070Sstevel@tonic-gate }
7080Sstevel@tonic-gate
7090Sstevel@tonic-gate if (copy) {
7100Sstevel@tonic-gate bcopy(old_rptr, mp1->b_rptr, cur_size);
7110Sstevel@tonic-gate mp1->b_wptr = mp1->b_rptr + cur_size;
7120Sstevel@tonic-gate }
7130Sstevel@tonic-gate
7140Sstevel@tonic-gate if (mp != mp1)
7150Sstevel@tonic-gate freeb(mp);
7160Sstevel@tonic-gate
7170Sstevel@tonic-gate return (mp1);
7180Sstevel@tonic-gate }
7190Sstevel@tonic-gate
7200Sstevel@tonic-gate static void
dblk_lastfree(mblk_t * mp,dblk_t * dbp)7210Sstevel@tonic-gate dblk_lastfree(mblk_t *mp, dblk_t *dbp)
7220Sstevel@tonic-gate {
7230Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp);
7240Sstevel@tonic-gate if (dbp->db_fthdr != NULL)
7250Sstevel@tonic-gate str_ftfree(dbp);
7260Sstevel@tonic-gate
7270Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */
7280Sstevel@tonic-gate if (dbp->db_credp != NULL) {
7290Sstevel@tonic-gate crfree(dbp->db_credp);
7300Sstevel@tonic-gate dbp->db_credp = NULL;
7310Sstevel@tonic-gate }
7320Sstevel@tonic-gate dbp->db_cpid = -1;
7330Sstevel@tonic-gate
7340Sstevel@tonic-gate /* Reset the struioflag and the checksum flag fields */
7350Sstevel@tonic-gate dbp->db_struioflag = 0;
7360Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0;
7370Sstevel@tonic-gate
7386707Sbrutus /* and the COOKED and/or UIOA flag(s) */
7396707Sbrutus dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA);
740898Skais
7410Sstevel@tonic-gate kmem_cache_free(dbp->db_cache, dbp);
7420Sstevel@tonic-gate }
7430Sstevel@tonic-gate
7440Sstevel@tonic-gate static void
dblk_decref(mblk_t * mp,dblk_t * dbp)7450Sstevel@tonic-gate dblk_decref(mblk_t *mp, dblk_t *dbp)
7460Sstevel@tonic-gate {
7470Sstevel@tonic-gate if (dbp->db_ref != 1) {
7480Sstevel@tonic-gate uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp),
7490Sstevel@tonic-gate -(1 << DBLK_RTFU_SHIFT(db_ref)));
7500Sstevel@tonic-gate /*
7510Sstevel@tonic-gate * atomic_add_32_nv() just decremented db_ref, so we no longer
7520Sstevel@tonic-gate * have a reference to the dblk, which means another thread
7530Sstevel@tonic-gate * could free it. Therefore we cannot examine the dblk to
7540Sstevel@tonic-gate * determine whether ours was the last reference. Instead,
7550Sstevel@tonic-gate * we extract the new and minimum reference counts from rtfu.
7560Sstevel@tonic-gate * Note that all we're really saying is "if (ref != refmin)".
7570Sstevel@tonic-gate */
7580Sstevel@tonic-gate if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) !=
7590Sstevel@tonic-gate ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) {
7600Sstevel@tonic-gate kmem_cache_free(mblk_cache, mp);
7610Sstevel@tonic-gate return;
7620Sstevel@tonic-gate }
7630Sstevel@tonic-gate }
7640Sstevel@tonic-gate dbp->db_mblk = mp;
7650Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree;
7660Sstevel@tonic-gate dbp->db_lastfree(mp, dbp);
7670Sstevel@tonic-gate }
7680Sstevel@tonic-gate
7690Sstevel@tonic-gate mblk_t *
dupb(mblk_t * mp)7700Sstevel@tonic-gate dupb(mblk_t *mp)
7710Sstevel@tonic-gate {
7720Sstevel@tonic-gate dblk_t *dbp = mp->b_datap;
7730Sstevel@tonic-gate mblk_t *new_mp;
7740Sstevel@tonic-gate uint32_t oldrtfu, newrtfu;
7750Sstevel@tonic-gate
7760Sstevel@tonic-gate if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL)
7770Sstevel@tonic-gate goto out;
7780Sstevel@tonic-gate
7790Sstevel@tonic-gate new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL;
7800Sstevel@tonic-gate new_mp->b_rptr = mp->b_rptr;
7810Sstevel@tonic-gate new_mp->b_wptr = mp->b_wptr;
7820Sstevel@tonic-gate new_mp->b_datap = dbp;
7830Sstevel@tonic-gate new_mp->b_queue = NULL;
7840Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp);
7850Sstevel@tonic-gate
7860Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref);
7870Sstevel@tonic-gate
7883163Sgeorges dbp->db_free = dblk_decref;
7890Sstevel@tonic-gate do {
7900Sstevel@tonic-gate ASSERT(dbp->db_ref > 0);
7910Sstevel@tonic-gate oldrtfu = DBLK_RTFU_WORD(dbp);
7920Sstevel@tonic-gate newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref));
7930Sstevel@tonic-gate /*
7940Sstevel@tonic-gate * If db_ref is maxed out we can't dup this message anymore.
7950Sstevel@tonic-gate */
7960Sstevel@tonic-gate if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) {
7970Sstevel@tonic-gate kmem_cache_free(mblk_cache, new_mp);
7980Sstevel@tonic-gate new_mp = NULL;
7990Sstevel@tonic-gate goto out;
8000Sstevel@tonic-gate }
8010Sstevel@tonic-gate } while (cas32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != oldrtfu);
8020Sstevel@tonic-gate
8030Sstevel@tonic-gate out:
8040Sstevel@tonic-gate FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp);
8050Sstevel@tonic-gate return (new_mp);
8060Sstevel@tonic-gate }
8070Sstevel@tonic-gate
8080Sstevel@tonic-gate static void
dblk_lastfree_desb(mblk_t * mp,dblk_t * dbp)8090Sstevel@tonic-gate dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp)
8100Sstevel@tonic-gate {
8110Sstevel@tonic-gate frtn_t *frp = dbp->db_frtnp;
8120Sstevel@tonic-gate
8130Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp);
8140Sstevel@tonic-gate frp->free_func(frp->free_arg);
8150Sstevel@tonic-gate if (dbp->db_fthdr != NULL)
8160Sstevel@tonic-gate str_ftfree(dbp);
8170Sstevel@tonic-gate
8180Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */
8190Sstevel@tonic-gate if (dbp->db_credp != NULL) {
8200Sstevel@tonic-gate crfree(dbp->db_credp);
8210Sstevel@tonic-gate dbp->db_credp = NULL;
8220Sstevel@tonic-gate }
8230Sstevel@tonic-gate dbp->db_cpid = -1;
8240Sstevel@tonic-gate dbp->db_struioflag = 0;
8250Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0;
8260Sstevel@tonic-gate
8270Sstevel@tonic-gate kmem_cache_free(dbp->db_cache, dbp);
8280Sstevel@tonic-gate }
8290Sstevel@tonic-gate
8300Sstevel@tonic-gate /*ARGSUSED*/
8310Sstevel@tonic-gate static void
frnop_func(void * arg)8320Sstevel@tonic-gate frnop_func(void *arg)
8330Sstevel@tonic-gate {
8340Sstevel@tonic-gate }
8350Sstevel@tonic-gate
8360Sstevel@tonic-gate /*
8370Sstevel@tonic-gate * Generic esballoc used to implement the four flavors: [d]esballoc[a].
8380Sstevel@tonic-gate */
8390Sstevel@tonic-gate static mblk_t *
gesballoc(unsigned char * base,size_t size,uint32_t db_rtfu,frtn_t * frp,void (* lastfree)(mblk_t *,dblk_t *),int kmflags)8400Sstevel@tonic-gate gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
8410Sstevel@tonic-gate void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
8420Sstevel@tonic-gate {
8430Sstevel@tonic-gate dblk_t *dbp;
8440Sstevel@tonic-gate mblk_t *mp;
8450Sstevel@tonic-gate
8460Sstevel@tonic-gate ASSERT(base != NULL && frp != NULL);
8470Sstevel@tonic-gate
8480Sstevel@tonic-gate if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) {
8490Sstevel@tonic-gate mp = NULL;
8500Sstevel@tonic-gate goto out;
8510Sstevel@tonic-gate }
8520Sstevel@tonic-gate
8530Sstevel@tonic-gate mp = dbp->db_mblk;
8540Sstevel@tonic-gate dbp->db_base = base;
8550Sstevel@tonic-gate dbp->db_lim = base + size;
8560Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree = lastfree;
8570Sstevel@tonic-gate dbp->db_frtnp = frp;
8580Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = db_rtfu;
8590Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL;
8600Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = base;
8610Sstevel@tonic-gate mp->b_queue = NULL;
8620Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0;
8630Sstevel@tonic-gate
8640Sstevel@tonic-gate out:
8650Sstevel@tonic-gate FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp);
8660Sstevel@tonic-gate return (mp);
8670Sstevel@tonic-gate }
8680Sstevel@tonic-gate
8690Sstevel@tonic-gate /*ARGSUSED*/
8700Sstevel@tonic-gate mblk_t *
esballoc(unsigned char * base,size_t size,uint_t pri,frtn_t * frp)8710Sstevel@tonic-gate esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
8720Sstevel@tonic-gate {
8730Sstevel@tonic-gate mblk_t *mp;
8740Sstevel@tonic-gate
8750Sstevel@tonic-gate /*
8760Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e.
8770Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail
8780Sstevel@tonic-gate * call optimization.
8790Sstevel@tonic-gate */
8800Sstevel@tonic-gate if (!str_ftnever) {
8810Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
8820Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP);
8830Sstevel@tonic-gate
8840Sstevel@tonic-gate if (mp != NULL)
8850Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
8860Sstevel@tonic-gate return (mp);
8870Sstevel@tonic-gate }
8880Sstevel@tonic-gate
8890Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
8900Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP));
8910Sstevel@tonic-gate }
8920Sstevel@tonic-gate
8930Sstevel@tonic-gate /*
8940Sstevel@tonic-gate * Same as esballoc() but sleeps waiting for memory.
8950Sstevel@tonic-gate */
8960Sstevel@tonic-gate /*ARGSUSED*/
8970Sstevel@tonic-gate mblk_t *
esballoc_wait(unsigned char * base,size_t size,uint_t pri,frtn_t * frp)8980Sstevel@tonic-gate esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
8990Sstevel@tonic-gate {
9000Sstevel@tonic-gate mblk_t *mp;
9010Sstevel@tonic-gate
9020Sstevel@tonic-gate /*
9030Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e.
9040Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail
9050Sstevel@tonic-gate * call optimization.
9060Sstevel@tonic-gate */
9070Sstevel@tonic-gate if (!str_ftnever) {
9080Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
9090Sstevel@tonic-gate frp, freebs_enqueue, KM_SLEEP);
9100Sstevel@tonic-gate
9110Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
9120Sstevel@tonic-gate return (mp);
9130Sstevel@tonic-gate }
9140Sstevel@tonic-gate
9150Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
9160Sstevel@tonic-gate frp, freebs_enqueue, KM_SLEEP));
9170Sstevel@tonic-gate }
9180Sstevel@tonic-gate
9190Sstevel@tonic-gate /*ARGSUSED*/
9200Sstevel@tonic-gate mblk_t *
desballoc(unsigned char * base,size_t size,uint_t pri,frtn_t * frp)9210Sstevel@tonic-gate desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
9220Sstevel@tonic-gate {
9230Sstevel@tonic-gate mblk_t *mp;
9240Sstevel@tonic-gate
9250Sstevel@tonic-gate /*
9260Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e.
9270Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail
9280Sstevel@tonic-gate * call optimization.
9290Sstevel@tonic-gate */
9300Sstevel@tonic-gate if (!str_ftnever) {
9310Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
9326707Sbrutus frp, dblk_lastfree_desb, KM_NOSLEEP);
9330Sstevel@tonic-gate
9340Sstevel@tonic-gate if (mp != NULL)
9350Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size);
9360Sstevel@tonic-gate return (mp);
9370Sstevel@tonic-gate }
9380Sstevel@tonic-gate
9390Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
9400Sstevel@tonic-gate frp, dblk_lastfree_desb, KM_NOSLEEP));
9410Sstevel@tonic-gate }
9420Sstevel@tonic-gate
9430Sstevel@tonic-gate /*ARGSUSED*/
9440Sstevel@tonic-gate mblk_t *
esballoca(unsigned char * base,size_t size,uint_t pri,frtn_t * frp)9450Sstevel@tonic-gate esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
9460Sstevel@tonic-gate {
9470Sstevel@tonic-gate mblk_t *mp;
9480Sstevel@tonic-gate
9490Sstevel@tonic-gate /*
9500Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e.
9510Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail
9520Sstevel@tonic-gate * call optimization.
9530Sstevel@tonic-gate */
9540Sstevel@tonic-gate if (!str_ftnever) {
9550Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
9560Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP);
9570Sstevel@tonic-gate
9580Sstevel@tonic-gate if (mp != NULL)
9590Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size);
9600Sstevel@tonic-gate return (mp);
9610Sstevel@tonic-gate }
9620Sstevel@tonic-gate
9630Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
9640Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP));
9650Sstevel@tonic-gate }
9660Sstevel@tonic-gate
9670Sstevel@tonic-gate /*ARGSUSED*/
9680Sstevel@tonic-gate mblk_t *
desballoca(unsigned char * base,size_t size,uint_t pri,frtn_t * frp)9690Sstevel@tonic-gate desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
9700Sstevel@tonic-gate {
9710Sstevel@tonic-gate mblk_t *mp;
9720Sstevel@tonic-gate
9730Sstevel@tonic-gate /*
9740Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e.
9750Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail
9760Sstevel@tonic-gate * call optimization.
9770Sstevel@tonic-gate */
9780Sstevel@tonic-gate if (!str_ftnever) {
9790Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
9800Sstevel@tonic-gate frp, dblk_lastfree_desb, KM_NOSLEEP);
9810Sstevel@tonic-gate
9820Sstevel@tonic-gate if (mp != NULL)
9830Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size);
9840Sstevel@tonic-gate return (mp);
9850Sstevel@tonic-gate }
9860Sstevel@tonic-gate
9870Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
9880Sstevel@tonic-gate frp, dblk_lastfree_desb, KM_NOSLEEP));
9890Sstevel@tonic-gate }
9900Sstevel@tonic-gate
9910Sstevel@tonic-gate static void
bcache_dblk_lastfree(mblk_t * mp,dblk_t * dbp)9920Sstevel@tonic-gate bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp)
9930Sstevel@tonic-gate {
9940Sstevel@tonic-gate bcache_t *bcp = dbp->db_cache;
9950Sstevel@tonic-gate
9960Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp);
9970Sstevel@tonic-gate if (dbp->db_fthdr != NULL)
9980Sstevel@tonic-gate str_ftfree(dbp);
9990Sstevel@tonic-gate
10000Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */
10010Sstevel@tonic-gate if (dbp->db_credp != NULL) {
10020Sstevel@tonic-gate crfree(dbp->db_credp);
10030Sstevel@tonic-gate dbp->db_credp = NULL;
10040Sstevel@tonic-gate }
10050Sstevel@tonic-gate dbp->db_cpid = -1;
10060Sstevel@tonic-gate dbp->db_struioflag = 0;
10070Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0;
10080Sstevel@tonic-gate
10090Sstevel@tonic-gate mutex_enter(&bcp->mutex);
10100Sstevel@tonic-gate kmem_cache_free(bcp->dblk_cache, dbp);
10110Sstevel@tonic-gate bcp->alloc--;
10120Sstevel@tonic-gate
10130Sstevel@tonic-gate if (bcp->alloc == 0 && bcp->destroy != 0) {
10140Sstevel@tonic-gate kmem_cache_destroy(bcp->dblk_cache);
10150Sstevel@tonic-gate kmem_cache_destroy(bcp->buffer_cache);
10160Sstevel@tonic-gate mutex_exit(&bcp->mutex);
10170Sstevel@tonic-gate mutex_destroy(&bcp->mutex);
10180Sstevel@tonic-gate kmem_free(bcp, sizeof (bcache_t));
10190Sstevel@tonic-gate } else {
10200Sstevel@tonic-gate mutex_exit(&bcp->mutex);
10210Sstevel@tonic-gate }
10220Sstevel@tonic-gate }
10230Sstevel@tonic-gate
10240Sstevel@tonic-gate bcache_t *
bcache_create(char * name,size_t size,uint_t align)10250Sstevel@tonic-gate bcache_create(char *name, size_t size, uint_t align)
10260Sstevel@tonic-gate {
10270Sstevel@tonic-gate bcache_t *bcp;
10280Sstevel@tonic-gate char buffer[255];
10290Sstevel@tonic-gate
10300Sstevel@tonic-gate ASSERT((align & (align - 1)) == 0);
10310Sstevel@tonic-gate
10328752SPeter.Memishian@Sun.COM if ((bcp = kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) == NULL)
10330Sstevel@tonic-gate return (NULL);
10340Sstevel@tonic-gate
10350Sstevel@tonic-gate bcp->size = size;
10360Sstevel@tonic-gate bcp->align = align;
10370Sstevel@tonic-gate bcp->alloc = 0;
10380Sstevel@tonic-gate bcp->destroy = 0;
10390Sstevel@tonic-gate
10400Sstevel@tonic-gate mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL);
10410Sstevel@tonic-gate
10420Sstevel@tonic-gate (void) sprintf(buffer, "%s_buffer_cache", name);
10430Sstevel@tonic-gate bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL,
10440Sstevel@tonic-gate NULL, NULL, NULL, 0);
10450Sstevel@tonic-gate (void) sprintf(buffer, "%s_dblk_cache", name);
10460Sstevel@tonic-gate bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
10470Sstevel@tonic-gate DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
10486707Sbrutus NULL, (void *)bcp, NULL, 0);
10490Sstevel@tonic-gate
10500Sstevel@tonic-gate return (bcp);
10510Sstevel@tonic-gate }
10520Sstevel@tonic-gate
10530Sstevel@tonic-gate void
bcache_destroy(bcache_t * bcp)10540Sstevel@tonic-gate bcache_destroy(bcache_t *bcp)
10550Sstevel@tonic-gate {
10560Sstevel@tonic-gate ASSERT(bcp != NULL);
10570Sstevel@tonic-gate
10580Sstevel@tonic-gate mutex_enter(&bcp->mutex);
10590Sstevel@tonic-gate if (bcp->alloc == 0) {
10600Sstevel@tonic-gate kmem_cache_destroy(bcp->dblk_cache);
10610Sstevel@tonic-gate kmem_cache_destroy(bcp->buffer_cache);
10620Sstevel@tonic-gate mutex_exit(&bcp->mutex);
10630Sstevel@tonic-gate mutex_destroy(&bcp->mutex);
10640Sstevel@tonic-gate kmem_free(bcp, sizeof (bcache_t));
10650Sstevel@tonic-gate } else {
10660Sstevel@tonic-gate bcp->destroy++;
10670Sstevel@tonic-gate mutex_exit(&bcp->mutex);
10680Sstevel@tonic-gate }
10690Sstevel@tonic-gate }
10700Sstevel@tonic-gate
10710Sstevel@tonic-gate /*ARGSUSED*/
10720Sstevel@tonic-gate mblk_t *
bcache_allocb(bcache_t * bcp,uint_t pri)10730Sstevel@tonic-gate bcache_allocb(bcache_t *bcp, uint_t pri)
10740Sstevel@tonic-gate {
10750Sstevel@tonic-gate dblk_t *dbp;
10760Sstevel@tonic-gate mblk_t *mp = NULL;
10770Sstevel@tonic-gate
10780Sstevel@tonic-gate ASSERT(bcp != NULL);
10790Sstevel@tonic-gate
10800Sstevel@tonic-gate mutex_enter(&bcp->mutex);
10810Sstevel@tonic-gate if (bcp->destroy != 0) {
10820Sstevel@tonic-gate mutex_exit(&bcp->mutex);
10830Sstevel@tonic-gate goto out;
10840Sstevel@tonic-gate }
10850Sstevel@tonic-gate
10860Sstevel@tonic-gate if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) {
10870Sstevel@tonic-gate mutex_exit(&bcp->mutex);
10880Sstevel@tonic-gate goto out;
10890Sstevel@tonic-gate }
10900Sstevel@tonic-gate bcp->alloc++;
10910Sstevel@tonic-gate mutex_exit(&bcp->mutex);
10920Sstevel@tonic-gate
10930Sstevel@tonic-gate ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0);
10940Sstevel@tonic-gate
10950Sstevel@tonic-gate mp = dbp->db_mblk;
10960Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
10970Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL;
10980Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base;
10990Sstevel@tonic-gate mp->b_queue = NULL;
11000Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0;
11010Sstevel@tonic-gate STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size);
11020Sstevel@tonic-gate out:
11030Sstevel@tonic-gate FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp);
11040Sstevel@tonic-gate
11050Sstevel@tonic-gate return (mp);
11060Sstevel@tonic-gate }
11070Sstevel@tonic-gate
11080Sstevel@tonic-gate static void
dblk_lastfree_oversize(mblk_t * mp,dblk_t * dbp)11090Sstevel@tonic-gate dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp)
11100Sstevel@tonic-gate {
11110Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp);
11120Sstevel@tonic-gate if (dbp->db_fthdr != NULL)
11130Sstevel@tonic-gate str_ftfree(dbp);
11140Sstevel@tonic-gate
11150Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */
11160Sstevel@tonic-gate if (dbp->db_credp != NULL) {
11170Sstevel@tonic-gate crfree(dbp->db_credp);
11180Sstevel@tonic-gate dbp->db_credp = NULL;
11190Sstevel@tonic-gate }
11200Sstevel@tonic-gate dbp->db_cpid = -1;
11210Sstevel@tonic-gate dbp->db_struioflag = 0;
11220Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0;
11230Sstevel@tonic-gate
11240Sstevel@tonic-gate kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base);
11250Sstevel@tonic-gate kmem_cache_free(dbp->db_cache, dbp);
11260Sstevel@tonic-gate }
11270Sstevel@tonic-gate
11280Sstevel@tonic-gate static mblk_t *
allocb_oversize(size_t size,int kmflags)11290Sstevel@tonic-gate allocb_oversize(size_t size, int kmflags)
11300Sstevel@tonic-gate {
11310Sstevel@tonic-gate mblk_t *mp;
11320Sstevel@tonic-gate void *buf;
11330Sstevel@tonic-gate
11340Sstevel@tonic-gate size = P2ROUNDUP(size, DBLK_CACHE_ALIGN);
11350Sstevel@tonic-gate if ((buf = kmem_alloc(size, kmflags)) == NULL)
11360Sstevel@tonic-gate return (NULL);
11370Sstevel@tonic-gate if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0),
11380Sstevel@tonic-gate &frnop, dblk_lastfree_oversize, kmflags)) == NULL)
11390Sstevel@tonic-gate kmem_free(buf, size);
11400Sstevel@tonic-gate
11410Sstevel@tonic-gate if (mp != NULL)
11420Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size);
11430Sstevel@tonic-gate
11440Sstevel@tonic-gate return (mp);
11450Sstevel@tonic-gate }
11460Sstevel@tonic-gate
11470Sstevel@tonic-gate mblk_t *
allocb_tryhard(size_t target_size)11480Sstevel@tonic-gate allocb_tryhard(size_t target_size)
11490Sstevel@tonic-gate {
11500Sstevel@tonic-gate size_t size;
11510Sstevel@tonic-gate mblk_t *bp;
11520Sstevel@tonic-gate
11530Sstevel@tonic-gate for (size = target_size; size < target_size + 512;
11540Sstevel@tonic-gate size += DBLK_CACHE_ALIGN)
11550Sstevel@tonic-gate if ((bp = allocb(size, BPRI_HI)) != NULL)
11560Sstevel@tonic-gate return (bp);
11570Sstevel@tonic-gate allocb_tryhard_fails++;
11580Sstevel@tonic-gate return (NULL);
11590Sstevel@tonic-gate }
11600Sstevel@tonic-gate
11610Sstevel@tonic-gate /*
11620Sstevel@tonic-gate * This routine is consolidation private for STREAMS internal use
11630Sstevel@tonic-gate * This routine may only be called from sync routines (i.e., not
11640Sstevel@tonic-gate * from put or service procedures). It is located here (rather
11650Sstevel@tonic-gate * than strsubr.c) so that we don't have to expose all of the
11660Sstevel@tonic-gate * allocb() implementation details in header files.
11670Sstevel@tonic-gate */
11680Sstevel@tonic-gate mblk_t *
allocb_wait(size_t size,uint_t pri,uint_t flags,int * error)11690Sstevel@tonic-gate allocb_wait(size_t size, uint_t pri, uint_t flags, int *error)
11700Sstevel@tonic-gate {
11710Sstevel@tonic-gate dblk_t *dbp;
11720Sstevel@tonic-gate mblk_t *mp;
11730Sstevel@tonic-gate size_t index;
11740Sstevel@tonic-gate
11750Sstevel@tonic-gate index = (size -1) >> DBLK_SIZE_SHIFT;
11760Sstevel@tonic-gate
11770Sstevel@tonic-gate if (flags & STR_NOSIG) {
11780Sstevel@tonic-gate if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
11790Sstevel@tonic-gate if (size != 0) {
11800Sstevel@tonic-gate mp = allocb_oversize(size, KM_SLEEP);
11810Sstevel@tonic-gate FTRACE_1("allocb_wait (NOSIG): mp=0x%lx",
11820Sstevel@tonic-gate (uintptr_t)mp);
11830Sstevel@tonic-gate return (mp);
11840Sstevel@tonic-gate }
11850Sstevel@tonic-gate index = 0;
11860Sstevel@tonic-gate }
11870Sstevel@tonic-gate
11880Sstevel@tonic-gate dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP);
11890Sstevel@tonic-gate mp = dbp->db_mblk;
11900Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
11910Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL;
11920Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base;
11930Sstevel@tonic-gate mp->b_queue = NULL;
11940Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0;
11950Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size);
11960Sstevel@tonic-gate
11970Sstevel@tonic-gate FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp);
11980Sstevel@tonic-gate
11990Sstevel@tonic-gate } else {
12000Sstevel@tonic-gate while ((mp = allocb(size, pri)) == NULL) {
12010Sstevel@tonic-gate if ((*error = strwaitbuf(size, BPRI_HI)) != 0)
12020Sstevel@tonic-gate return (NULL);
12030Sstevel@tonic-gate }
12040Sstevel@tonic-gate }
12050Sstevel@tonic-gate
12060Sstevel@tonic-gate return (mp);
12070Sstevel@tonic-gate }
12080Sstevel@tonic-gate
12090Sstevel@tonic-gate /*
12100Sstevel@tonic-gate * Call function 'func' with 'arg' when a class zero block can
12110Sstevel@tonic-gate * be allocated with priority 'pri'.
12120Sstevel@tonic-gate */
12130Sstevel@tonic-gate bufcall_id_t
esbbcall(uint_t pri,void (* func)(void *),void * arg)12140Sstevel@tonic-gate esbbcall(uint_t pri, void (*func)(void *), void *arg)
12150Sstevel@tonic-gate {
12160Sstevel@tonic-gate return (bufcall(1, pri, func, arg));
12170Sstevel@tonic-gate }
12180Sstevel@tonic-gate
12190Sstevel@tonic-gate /*
12200Sstevel@tonic-gate * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials
12210Sstevel@tonic-gate * ioc_id, rval and error of the struct ioctl to set up an ioctl call.
12220Sstevel@tonic-gate * This provides consistency for all internal allocators of ioctl.
12230Sstevel@tonic-gate */
12240Sstevel@tonic-gate mblk_t *
mkiocb(uint_t cmd)12250Sstevel@tonic-gate mkiocb(uint_t cmd)
12260Sstevel@tonic-gate {
12270Sstevel@tonic-gate struct iocblk *ioc;
12280Sstevel@tonic-gate mblk_t *mp;
12290Sstevel@tonic-gate
12300Sstevel@tonic-gate /*
12310Sstevel@tonic-gate * Allocate enough space for any of the ioctl related messages.
12320Sstevel@tonic-gate */
12330Sstevel@tonic-gate if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL)
12340Sstevel@tonic-gate return (NULL);
12350Sstevel@tonic-gate
12360Sstevel@tonic-gate bzero(mp->b_rptr, sizeof (union ioctypes));
12370Sstevel@tonic-gate
12380Sstevel@tonic-gate /*
12390Sstevel@tonic-gate * Set the mblk_t information and ptrs correctly.
12400Sstevel@tonic-gate */
12410Sstevel@tonic-gate mp->b_wptr += sizeof (struct iocblk);
12420Sstevel@tonic-gate mp->b_datap->db_type = M_IOCTL;
12430Sstevel@tonic-gate
12440Sstevel@tonic-gate /*
12450Sstevel@tonic-gate * Fill in the fields.
12460Sstevel@tonic-gate */
12470Sstevel@tonic-gate ioc = (struct iocblk *)mp->b_rptr;
12480Sstevel@tonic-gate ioc->ioc_cmd = cmd;
12490Sstevel@tonic-gate ioc->ioc_cr = kcred;
12500Sstevel@tonic-gate ioc->ioc_id = getiocseqno();
12510Sstevel@tonic-gate ioc->ioc_flag = IOC_NATIVE;
12520Sstevel@tonic-gate return (mp);
12530Sstevel@tonic-gate }
12540Sstevel@tonic-gate
12550Sstevel@tonic-gate /*
12560Sstevel@tonic-gate * test if block of given size can be allocated with a request of
12570Sstevel@tonic-gate * the given priority.
12580Sstevel@tonic-gate * 'pri' is no longer used, but is retained for compatibility.
12590Sstevel@tonic-gate */
12600Sstevel@tonic-gate /* ARGSUSED */
12610Sstevel@tonic-gate int
testb(size_t size,uint_t pri)12620Sstevel@tonic-gate testb(size_t size, uint_t pri)
12630Sstevel@tonic-gate {
12640Sstevel@tonic-gate return ((size + sizeof (dblk_t)) <= kmem_avail());
12650Sstevel@tonic-gate }
12660Sstevel@tonic-gate
12670Sstevel@tonic-gate /*
12680Sstevel@tonic-gate * Call function 'func' with argument 'arg' when there is a reasonably
12690Sstevel@tonic-gate * good chance that a block of size 'size' can be allocated.
12700Sstevel@tonic-gate * 'pri' is no longer used, but is retained for compatibility.
12710Sstevel@tonic-gate */
12720Sstevel@tonic-gate /* ARGSUSED */
12730Sstevel@tonic-gate bufcall_id_t
bufcall(size_t size,uint_t pri,void (* func)(void *),void * arg)12740Sstevel@tonic-gate bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg)
12750Sstevel@tonic-gate {
12760Sstevel@tonic-gate static long bid = 1; /* always odd to save checking for zero */
12770Sstevel@tonic-gate bufcall_id_t bc_id;
12780Sstevel@tonic-gate struct strbufcall *bcp;
12790Sstevel@tonic-gate
12800Sstevel@tonic-gate if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL)
12810Sstevel@tonic-gate return (0);
12820Sstevel@tonic-gate
12830Sstevel@tonic-gate bcp->bc_func = func;
12840Sstevel@tonic-gate bcp->bc_arg = arg;
12850Sstevel@tonic-gate bcp->bc_size = size;
12860Sstevel@tonic-gate bcp->bc_next = NULL;
12870Sstevel@tonic-gate bcp->bc_executor = NULL;
12880Sstevel@tonic-gate
12890Sstevel@tonic-gate mutex_enter(&strbcall_lock);
12900Sstevel@tonic-gate /*
12910Sstevel@tonic-gate * After bcp is linked into strbcalls and strbcall_lock is dropped there
12920Sstevel@tonic-gate * should be no references to bcp since it may be freed by
12930Sstevel@tonic-gate * runbufcalls(). Since bcp_id field is returned, we save its value in
12940Sstevel@tonic-gate * the local var.
12950Sstevel@tonic-gate */
12960Sstevel@tonic-gate bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2); /* keep it odd */
12970Sstevel@tonic-gate
12980Sstevel@tonic-gate /*
12990Sstevel@tonic-gate * add newly allocated stream event to existing
13000Sstevel@tonic-gate * linked list of events.
13010Sstevel@tonic-gate */
13020Sstevel@tonic-gate if (strbcalls.bc_head == NULL) {
13030Sstevel@tonic-gate strbcalls.bc_head = strbcalls.bc_tail = bcp;
13040Sstevel@tonic-gate } else {
13050Sstevel@tonic-gate strbcalls.bc_tail->bc_next = bcp;
13060Sstevel@tonic-gate strbcalls.bc_tail = bcp;
13070Sstevel@tonic-gate }
13080Sstevel@tonic-gate
13090Sstevel@tonic-gate cv_signal(&strbcall_cv);
13100Sstevel@tonic-gate mutex_exit(&strbcall_lock);
13110Sstevel@tonic-gate return (bc_id);
13120Sstevel@tonic-gate }
13130Sstevel@tonic-gate
13140Sstevel@tonic-gate /*
13150Sstevel@tonic-gate * Cancel a bufcall request.
13160Sstevel@tonic-gate */
13170Sstevel@tonic-gate void
unbufcall(bufcall_id_t id)13180Sstevel@tonic-gate unbufcall(bufcall_id_t id)
13190Sstevel@tonic-gate {
13200Sstevel@tonic-gate strbufcall_t *bcp, *pbcp;
13210Sstevel@tonic-gate
13220Sstevel@tonic-gate mutex_enter(&strbcall_lock);
13230Sstevel@tonic-gate again:
13240Sstevel@tonic-gate pbcp = NULL;
13250Sstevel@tonic-gate for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) {
13260Sstevel@tonic-gate if (id == bcp->bc_id)
13270Sstevel@tonic-gate break;
13280Sstevel@tonic-gate pbcp = bcp;
13290Sstevel@tonic-gate }
13300Sstevel@tonic-gate if (bcp) {
13310Sstevel@tonic-gate if (bcp->bc_executor != NULL) {
13320Sstevel@tonic-gate if (bcp->bc_executor != curthread) {
13330Sstevel@tonic-gate cv_wait(&bcall_cv, &strbcall_lock);
13340Sstevel@tonic-gate goto again;
13350Sstevel@tonic-gate }
13360Sstevel@tonic-gate } else {
13370Sstevel@tonic-gate if (pbcp)
13380Sstevel@tonic-gate pbcp->bc_next = bcp->bc_next;
13390Sstevel@tonic-gate else
13400Sstevel@tonic-gate strbcalls.bc_head = bcp->bc_next;
13410Sstevel@tonic-gate if (bcp == strbcalls.bc_tail)
13420Sstevel@tonic-gate strbcalls.bc_tail = pbcp;
13430Sstevel@tonic-gate kmem_free(bcp, sizeof (strbufcall_t));
13440Sstevel@tonic-gate }
13450Sstevel@tonic-gate }
13460Sstevel@tonic-gate mutex_exit(&strbcall_lock);
13470Sstevel@tonic-gate }
13480Sstevel@tonic-gate
13490Sstevel@tonic-gate /*
13500Sstevel@tonic-gate * Duplicate a message block by block (uses dupb), returning
13510Sstevel@tonic-gate * a pointer to the duplicate message.
13520Sstevel@tonic-gate * Returns a non-NULL value only if the entire message
13530Sstevel@tonic-gate * was dup'd.
13540Sstevel@tonic-gate */
13550Sstevel@tonic-gate mblk_t *
dupmsg(mblk_t * bp)13560Sstevel@tonic-gate dupmsg(mblk_t *bp)
13570Sstevel@tonic-gate {
13580Sstevel@tonic-gate mblk_t *head, *nbp;
13590Sstevel@tonic-gate
13600Sstevel@tonic-gate if (!bp || !(nbp = head = dupb(bp)))
13610Sstevel@tonic-gate return (NULL);
13620Sstevel@tonic-gate
13630Sstevel@tonic-gate while (bp->b_cont) {
13640Sstevel@tonic-gate if (!(nbp->b_cont = dupb(bp->b_cont))) {
13650Sstevel@tonic-gate freemsg(head);
13660Sstevel@tonic-gate return (NULL);
13670Sstevel@tonic-gate }
13680Sstevel@tonic-gate nbp = nbp->b_cont;
13690Sstevel@tonic-gate bp = bp->b_cont;
13700Sstevel@tonic-gate }
13710Sstevel@tonic-gate return (head);
13720Sstevel@tonic-gate }
13730Sstevel@tonic-gate
13740Sstevel@tonic-gate #define DUPB_NOLOAN(bp) \
13750Sstevel@tonic-gate ((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \
13760Sstevel@tonic-gate copyb((bp)) : dupb((bp)))
13770Sstevel@tonic-gate
13780Sstevel@tonic-gate mblk_t *
dupmsg_noloan(mblk_t * bp)13790Sstevel@tonic-gate dupmsg_noloan(mblk_t *bp)
13800Sstevel@tonic-gate {
13810Sstevel@tonic-gate mblk_t *head, *nbp;
13820Sstevel@tonic-gate
13830Sstevel@tonic-gate if (bp == NULL || DB_TYPE(bp) != M_DATA ||
13840Sstevel@tonic-gate ((nbp = head = DUPB_NOLOAN(bp)) == NULL))
13850Sstevel@tonic-gate return (NULL);
13860Sstevel@tonic-gate
13870Sstevel@tonic-gate while (bp->b_cont) {
13880Sstevel@tonic-gate if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) {
13890Sstevel@tonic-gate freemsg(head);
13900Sstevel@tonic-gate return (NULL);
13910Sstevel@tonic-gate }
13920Sstevel@tonic-gate nbp = nbp->b_cont;
13930Sstevel@tonic-gate bp = bp->b_cont;
13940Sstevel@tonic-gate }
13950Sstevel@tonic-gate return (head);
13960Sstevel@tonic-gate }
13970Sstevel@tonic-gate
13980Sstevel@tonic-gate /*
13990Sstevel@tonic-gate * Copy data from message and data block to newly allocated message and
14000Sstevel@tonic-gate * data block. Returns new message block pointer, or NULL if error.
14010Sstevel@tonic-gate * The alignment of rptr (w.r.t. word alignment) will be the same in the copy
14020Sstevel@tonic-gate * as in the original even when db_base is not word aligned. (bug 1052877)
14030Sstevel@tonic-gate */
14040Sstevel@tonic-gate mblk_t *
copyb(mblk_t * bp)14050Sstevel@tonic-gate copyb(mblk_t *bp)
14060Sstevel@tonic-gate {
14070Sstevel@tonic-gate mblk_t *nbp;
14080Sstevel@tonic-gate dblk_t *dp, *ndp;
14090Sstevel@tonic-gate uchar_t *base;
14100Sstevel@tonic-gate size_t size;
14110Sstevel@tonic-gate size_t unaligned;
14120Sstevel@tonic-gate
14130Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr);
14140Sstevel@tonic-gate
14150Sstevel@tonic-gate dp = bp->b_datap;
14160Sstevel@tonic-gate if (dp->db_fthdr != NULL)
14170Sstevel@tonic-gate STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0);
14180Sstevel@tonic-gate
14190Sstevel@tonic-gate /*
14200Sstevel@tonic-gate * Special handling for Multidata message; this should be
14210Sstevel@tonic-gate * removed once a copy-callback routine is made available.
14220Sstevel@tonic-gate */
14230Sstevel@tonic-gate if (dp->db_type == M_MULTIDATA) {
14240Sstevel@tonic-gate cred_t *cr;
14250Sstevel@tonic-gate
14260Sstevel@tonic-gate if ((nbp = mmd_copy(bp, KM_NOSLEEP)) == NULL)
14270Sstevel@tonic-gate return (NULL);
14280Sstevel@tonic-gate
14290Sstevel@tonic-gate nbp->b_flag = bp->b_flag;
14300Sstevel@tonic-gate nbp->b_band = bp->b_band;
14310Sstevel@tonic-gate ndp = nbp->b_datap;
14320Sstevel@tonic-gate
14330Sstevel@tonic-gate /* See comments below on potential issues. */
14340Sstevel@tonic-gate STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
14350Sstevel@tonic-gate
14360Sstevel@tonic-gate ASSERT(ndp->db_type == dp->db_type);
14370Sstevel@tonic-gate cr = dp->db_credp;
14380Sstevel@tonic-gate if (cr != NULL)
14390Sstevel@tonic-gate crhold(ndp->db_credp = cr);
14400Sstevel@tonic-gate ndp->db_cpid = dp->db_cpid;
14410Sstevel@tonic-gate return (nbp);
14420Sstevel@tonic-gate }
14430Sstevel@tonic-gate
14440Sstevel@tonic-gate size = dp->db_lim - dp->db_base;
14450Sstevel@tonic-gate unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t));
14460Sstevel@tonic-gate if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL)
14470Sstevel@tonic-gate return (NULL);
14480Sstevel@tonic-gate nbp->b_flag = bp->b_flag;
14490Sstevel@tonic-gate nbp->b_band = bp->b_band;
14500Sstevel@tonic-gate ndp = nbp->b_datap;
14510Sstevel@tonic-gate
14520Sstevel@tonic-gate /*
14530Sstevel@tonic-gate * Well, here is a potential issue. If we are trying to
14540Sstevel@tonic-gate * trace a flow, and we copy the message, we might lose
14550Sstevel@tonic-gate * information about where this message might have been.
14560Sstevel@tonic-gate * So we should inherit the FT data. On the other hand,
14570Sstevel@tonic-gate * a user might be interested only in alloc to free data.
14580Sstevel@tonic-gate * So I guess the real answer is to provide a tunable.
14590Sstevel@tonic-gate */
14600Sstevel@tonic-gate STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
14610Sstevel@tonic-gate
14620Sstevel@tonic-gate base = ndp->db_base + unaligned;
14630Sstevel@tonic-gate bcopy(dp->db_base, ndp->db_base + unaligned, size);
14640Sstevel@tonic-gate
14650Sstevel@tonic-gate nbp->b_rptr = base + (bp->b_rptr - dp->db_base);
14660Sstevel@tonic-gate nbp->b_wptr = nbp->b_rptr + MBLKL(bp);
14670Sstevel@tonic-gate
14680Sstevel@tonic-gate return (nbp);
14690Sstevel@tonic-gate }
14700Sstevel@tonic-gate
14710Sstevel@tonic-gate /*
14720Sstevel@tonic-gate * Copy data from message to newly allocated message using new
14730Sstevel@tonic-gate * data blocks. Returns a pointer to the new message, or NULL if error.
14740Sstevel@tonic-gate */
14750Sstevel@tonic-gate mblk_t *
copymsg(mblk_t * bp)14760Sstevel@tonic-gate copymsg(mblk_t *bp)
14770Sstevel@tonic-gate {
14780Sstevel@tonic-gate mblk_t *head, *nbp;
14790Sstevel@tonic-gate
14800Sstevel@tonic-gate if (!bp || !(nbp = head = copyb(bp)))
14810Sstevel@tonic-gate return (NULL);
14820Sstevel@tonic-gate
14830Sstevel@tonic-gate while (bp->b_cont) {
14840Sstevel@tonic-gate if (!(nbp->b_cont = copyb(bp->b_cont))) {
14850Sstevel@tonic-gate freemsg(head);
14860Sstevel@tonic-gate return (NULL);
14870Sstevel@tonic-gate }
14880Sstevel@tonic-gate nbp = nbp->b_cont;
14890Sstevel@tonic-gate bp = bp->b_cont;
14900Sstevel@tonic-gate }
14910Sstevel@tonic-gate return (head);
14920Sstevel@tonic-gate }
14930Sstevel@tonic-gate
14940Sstevel@tonic-gate /*
14950Sstevel@tonic-gate * link a message block to tail of message
14960Sstevel@tonic-gate */
14970Sstevel@tonic-gate void
linkb(mblk_t * mp,mblk_t * bp)14980Sstevel@tonic-gate linkb(mblk_t *mp, mblk_t *bp)
14990Sstevel@tonic-gate {
15000Sstevel@tonic-gate ASSERT(mp && bp);
15010Sstevel@tonic-gate
15020Sstevel@tonic-gate for (; mp->b_cont; mp = mp->b_cont)
15030Sstevel@tonic-gate ;
15040Sstevel@tonic-gate mp->b_cont = bp;
15050Sstevel@tonic-gate }
15060Sstevel@tonic-gate
15070Sstevel@tonic-gate /*
15080Sstevel@tonic-gate * unlink a message block from head of message
15090Sstevel@tonic-gate * return pointer to new message.
15100Sstevel@tonic-gate * NULL if message becomes empty.
15110Sstevel@tonic-gate */
15120Sstevel@tonic-gate mblk_t *
unlinkb(mblk_t * bp)15130Sstevel@tonic-gate unlinkb(mblk_t *bp)
15140Sstevel@tonic-gate {
15150Sstevel@tonic-gate mblk_t *bp1;
15160Sstevel@tonic-gate
15170Sstevel@tonic-gate bp1 = bp->b_cont;
15180Sstevel@tonic-gate bp->b_cont = NULL;
15190Sstevel@tonic-gate return (bp1);
15200Sstevel@tonic-gate }
15210Sstevel@tonic-gate
15220Sstevel@tonic-gate /*
15230Sstevel@tonic-gate * remove a message block "bp" from message "mp"
15240Sstevel@tonic-gate *
15250Sstevel@tonic-gate * Return pointer to new message or NULL if no message remains.
15260Sstevel@tonic-gate * Return -1 if bp is not found in message.
15270Sstevel@tonic-gate */
15280Sstevel@tonic-gate mblk_t *
rmvb(mblk_t * mp,mblk_t * bp)15290Sstevel@tonic-gate rmvb(mblk_t *mp, mblk_t *bp)
15300Sstevel@tonic-gate {
15310Sstevel@tonic-gate mblk_t *tmp;
15320Sstevel@tonic-gate mblk_t *lastp = NULL;
15330Sstevel@tonic-gate
15340Sstevel@tonic-gate ASSERT(mp && bp);
15350Sstevel@tonic-gate for (tmp = mp; tmp; tmp = tmp->b_cont) {
15360Sstevel@tonic-gate if (tmp == bp) {
15370Sstevel@tonic-gate if (lastp)
15380Sstevel@tonic-gate lastp->b_cont = tmp->b_cont;
15390Sstevel@tonic-gate else
15400Sstevel@tonic-gate mp = tmp->b_cont;
15410Sstevel@tonic-gate tmp->b_cont = NULL;
15420Sstevel@tonic-gate return (mp);
15430Sstevel@tonic-gate }
15440Sstevel@tonic-gate lastp = tmp;
15450Sstevel@tonic-gate }
15460Sstevel@tonic-gate return ((mblk_t *)-1);
15470Sstevel@tonic-gate }
15480Sstevel@tonic-gate
15490Sstevel@tonic-gate /*
15500Sstevel@tonic-gate * Concatenate and align first len bytes of common
15510Sstevel@tonic-gate * message type. Len == -1, means concat everything.
15520Sstevel@tonic-gate * Returns 1 on success, 0 on failure
15530Sstevel@tonic-gate * After the pullup, mp points to the pulled up data.
15540Sstevel@tonic-gate */
15550Sstevel@tonic-gate int
pullupmsg(mblk_t * mp,ssize_t len)15560Sstevel@tonic-gate pullupmsg(mblk_t *mp, ssize_t len)
15570Sstevel@tonic-gate {
15580Sstevel@tonic-gate mblk_t *bp, *b_cont;
15590Sstevel@tonic-gate dblk_t *dbp;
15600Sstevel@tonic-gate ssize_t n;
15610Sstevel@tonic-gate
15620Sstevel@tonic-gate ASSERT(mp->b_datap->db_ref > 0);
15630Sstevel@tonic-gate ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
15640Sstevel@tonic-gate
15650Sstevel@tonic-gate /*
15660Sstevel@tonic-gate * We won't handle Multidata message, since it contains
15670Sstevel@tonic-gate * metadata which this function has no knowledge of; we
15680Sstevel@tonic-gate * assert on DEBUG, and return failure otherwise.
15690Sstevel@tonic-gate */
15700Sstevel@tonic-gate ASSERT(mp->b_datap->db_type != M_MULTIDATA);
15710Sstevel@tonic-gate if (mp->b_datap->db_type == M_MULTIDATA)
15720Sstevel@tonic-gate return (0);
15730Sstevel@tonic-gate
15740Sstevel@tonic-gate if (len == -1) {
15750Sstevel@tonic-gate if (mp->b_cont == NULL && str_aligned(mp->b_rptr))
15760Sstevel@tonic-gate return (1);
15770Sstevel@tonic-gate len = xmsgsize(mp);
15780Sstevel@tonic-gate } else {
15790Sstevel@tonic-gate ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr;
15800Sstevel@tonic-gate ASSERT(first_mblk_len >= 0);
15810Sstevel@tonic-gate /*
15820Sstevel@tonic-gate * If the length is less than that of the first mblk,
15830Sstevel@tonic-gate * we want to pull up the message into an aligned mblk.
15840Sstevel@tonic-gate * Though not part of the spec, some callers assume it.
15850Sstevel@tonic-gate */
15860Sstevel@tonic-gate if (len <= first_mblk_len) {
15870Sstevel@tonic-gate if (str_aligned(mp->b_rptr))
15880Sstevel@tonic-gate return (1);
15890Sstevel@tonic-gate len = first_mblk_len;
15900Sstevel@tonic-gate } else if (xmsgsize(mp) < len)
15910Sstevel@tonic-gate return (0);
15920Sstevel@tonic-gate }
15930Sstevel@tonic-gate
15940Sstevel@tonic-gate if ((bp = allocb_tmpl(len, mp)) == NULL)
15950Sstevel@tonic-gate return (0);
15960Sstevel@tonic-gate
15970Sstevel@tonic-gate dbp = bp->b_datap;
15980Sstevel@tonic-gate *bp = *mp; /* swap mblks so bp heads the old msg... */
15990Sstevel@tonic-gate mp->b_datap = dbp; /* ... and mp heads the new message */
16000Sstevel@tonic-gate mp->b_datap->db_mblk = mp;
16010Sstevel@tonic-gate bp->b_datap->db_mblk = bp;
16020Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base;
16030Sstevel@tonic-gate
16040Sstevel@tonic-gate do {
16050Sstevel@tonic-gate ASSERT(bp->b_datap->db_ref > 0);
16060Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr);
16070Sstevel@tonic-gate n = MIN(bp->b_wptr - bp->b_rptr, len);
1608*11042SErik.Nordmark@Sun.COM ASSERT(n >= 0); /* allow zero-length mblk_t's */
1609*11042SErik.Nordmark@Sun.COM if (n > 0)
1610*11042SErik.Nordmark@Sun.COM bcopy(bp->b_rptr, mp->b_wptr, (size_t)n);
16110Sstevel@tonic-gate mp->b_wptr += n;
16120Sstevel@tonic-gate bp->b_rptr += n;
16130Sstevel@tonic-gate len -= n;
16140Sstevel@tonic-gate if (bp->b_rptr != bp->b_wptr)
16150Sstevel@tonic-gate break;
16160Sstevel@tonic-gate b_cont = bp->b_cont;
16170Sstevel@tonic-gate freeb(bp);
16180Sstevel@tonic-gate bp = b_cont;
16190Sstevel@tonic-gate } while (len && bp);
16200Sstevel@tonic-gate
16210Sstevel@tonic-gate mp->b_cont = bp; /* tack on whatever wasn't pulled up */
16220Sstevel@tonic-gate
16230Sstevel@tonic-gate return (1);
16240Sstevel@tonic-gate }
16250Sstevel@tonic-gate
16260Sstevel@tonic-gate /*
16270Sstevel@tonic-gate * Concatenate and align at least the first len bytes of common message
16280Sstevel@tonic-gate * type. Len == -1 means concatenate everything. The original message is
16290Sstevel@tonic-gate * unaltered. Returns a pointer to a new message on success, otherwise
16300Sstevel@tonic-gate * returns NULL.
16310Sstevel@tonic-gate */
16320Sstevel@tonic-gate mblk_t *
msgpullup(mblk_t * mp,ssize_t len)16330Sstevel@tonic-gate msgpullup(mblk_t *mp, ssize_t len)
16340Sstevel@tonic-gate {
16350Sstevel@tonic-gate mblk_t *newmp;
16360Sstevel@tonic-gate ssize_t totlen;
16370Sstevel@tonic-gate ssize_t n;
16380Sstevel@tonic-gate
16390Sstevel@tonic-gate /*
16400Sstevel@tonic-gate * We won't handle Multidata message, since it contains
16410Sstevel@tonic-gate * metadata which this function has no knowledge of; we
16420Sstevel@tonic-gate * assert on DEBUG, and return failure otherwise.
16430Sstevel@tonic-gate */
16440Sstevel@tonic-gate ASSERT(mp->b_datap->db_type != M_MULTIDATA);
16450Sstevel@tonic-gate if (mp->b_datap->db_type == M_MULTIDATA)
16460Sstevel@tonic-gate return (NULL);
16470Sstevel@tonic-gate
16480Sstevel@tonic-gate totlen = xmsgsize(mp);
16490Sstevel@tonic-gate
16500Sstevel@tonic-gate if ((len > 0) && (len > totlen))
16510Sstevel@tonic-gate return (NULL);
16520Sstevel@tonic-gate
16530Sstevel@tonic-gate /*
16540Sstevel@tonic-gate * Copy all of the first msg type into one new mblk, then dupmsg
16550Sstevel@tonic-gate * and link the rest onto this.
16560Sstevel@tonic-gate */
16570Sstevel@tonic-gate
16580Sstevel@tonic-gate len = totlen;
16590Sstevel@tonic-gate
16600Sstevel@tonic-gate if ((newmp = allocb_tmpl(len, mp)) == NULL)
16610Sstevel@tonic-gate return (NULL);
16620Sstevel@tonic-gate
16630Sstevel@tonic-gate newmp->b_flag = mp->b_flag;
16640Sstevel@tonic-gate newmp->b_band = mp->b_band;
16650Sstevel@tonic-gate
16660Sstevel@tonic-gate while (len > 0) {
16670Sstevel@tonic-gate n = mp->b_wptr - mp->b_rptr;
16680Sstevel@tonic-gate ASSERT(n >= 0); /* allow zero-length mblk_t's */
16690Sstevel@tonic-gate if (n > 0)
16700Sstevel@tonic-gate bcopy(mp->b_rptr, newmp->b_wptr, n);
16710Sstevel@tonic-gate newmp->b_wptr += n;
16720Sstevel@tonic-gate len -= n;
16730Sstevel@tonic-gate mp = mp->b_cont;
16740Sstevel@tonic-gate }
16750Sstevel@tonic-gate
16760Sstevel@tonic-gate if (mp != NULL) {
16770Sstevel@tonic-gate newmp->b_cont = dupmsg(mp);
16780Sstevel@tonic-gate if (newmp->b_cont == NULL) {
16790Sstevel@tonic-gate freemsg(newmp);
16800Sstevel@tonic-gate return (NULL);
16810Sstevel@tonic-gate }
16820Sstevel@tonic-gate }
16830Sstevel@tonic-gate
16840Sstevel@tonic-gate return (newmp);
16850Sstevel@tonic-gate }
16860Sstevel@tonic-gate
16870Sstevel@tonic-gate /*
16880Sstevel@tonic-gate * Trim bytes from message
16890Sstevel@tonic-gate * len > 0, trim from head
16900Sstevel@tonic-gate * len < 0, trim from tail
16910Sstevel@tonic-gate * Returns 1 on success, 0 on failure.
16920Sstevel@tonic-gate */
16930Sstevel@tonic-gate int
adjmsg(mblk_t * mp,ssize_t len)16940Sstevel@tonic-gate adjmsg(mblk_t *mp, ssize_t len)
16950Sstevel@tonic-gate {
16960Sstevel@tonic-gate mblk_t *bp;
16970Sstevel@tonic-gate mblk_t *save_bp = NULL;
16980Sstevel@tonic-gate mblk_t *prev_bp;
16990Sstevel@tonic-gate mblk_t *bcont;
17000Sstevel@tonic-gate unsigned char type;
17010Sstevel@tonic-gate ssize_t n;
17020Sstevel@tonic-gate int fromhead;
17030Sstevel@tonic-gate int first;
17040Sstevel@tonic-gate
17050Sstevel@tonic-gate ASSERT(mp != NULL);
17060Sstevel@tonic-gate /*
17070Sstevel@tonic-gate * We won't handle Multidata message, since it contains
17080Sstevel@tonic-gate * metadata which this function has no knowledge of; we
17090Sstevel@tonic-gate * assert on DEBUG, and return failure otherwise.
17100Sstevel@tonic-gate */
17110Sstevel@tonic-gate ASSERT(mp->b_datap->db_type != M_MULTIDATA);
17120Sstevel@tonic-gate if (mp->b_datap->db_type == M_MULTIDATA)
17130Sstevel@tonic-gate return (0);
17140Sstevel@tonic-gate
17150Sstevel@tonic-gate if (len < 0) {
17160Sstevel@tonic-gate fromhead = 0;
17170Sstevel@tonic-gate len = -len;
17180Sstevel@tonic-gate } else {
17190Sstevel@tonic-gate fromhead = 1;
17200Sstevel@tonic-gate }
17210Sstevel@tonic-gate
17220Sstevel@tonic-gate if (xmsgsize(mp) < len)
17230Sstevel@tonic-gate return (0);
17240Sstevel@tonic-gate
17250Sstevel@tonic-gate if (fromhead) {
17260Sstevel@tonic-gate first = 1;
17270Sstevel@tonic-gate while (len) {
17280Sstevel@tonic-gate ASSERT(mp->b_wptr >= mp->b_rptr);
17290Sstevel@tonic-gate n = MIN(mp->b_wptr - mp->b_rptr, len);
17300Sstevel@tonic-gate mp->b_rptr += n;
17310Sstevel@tonic-gate len -= n;
17320Sstevel@tonic-gate
17330Sstevel@tonic-gate /*
17340Sstevel@tonic-gate * If this is not the first zero length
17350Sstevel@tonic-gate * message remove it
17360Sstevel@tonic-gate */
17370Sstevel@tonic-gate if (!first && (mp->b_wptr == mp->b_rptr)) {
17380Sstevel@tonic-gate bcont = mp->b_cont;
17390Sstevel@tonic-gate freeb(mp);
17400Sstevel@tonic-gate mp = save_bp->b_cont = bcont;
17410Sstevel@tonic-gate } else {
17420Sstevel@tonic-gate save_bp = mp;
17430Sstevel@tonic-gate mp = mp->b_cont;
17440Sstevel@tonic-gate }
17450Sstevel@tonic-gate first = 0;
17460Sstevel@tonic-gate }
17470Sstevel@tonic-gate } else {
17480Sstevel@tonic-gate type = mp->b_datap->db_type;
17490Sstevel@tonic-gate while (len) {
17500Sstevel@tonic-gate bp = mp;
17510Sstevel@tonic-gate save_bp = NULL;
17520Sstevel@tonic-gate
17530Sstevel@tonic-gate /*
17540Sstevel@tonic-gate * Find the last message of same type
17550Sstevel@tonic-gate */
17560Sstevel@tonic-gate while (bp && bp->b_datap->db_type == type) {
17570Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr);
17580Sstevel@tonic-gate prev_bp = save_bp;
17590Sstevel@tonic-gate save_bp = bp;
17600Sstevel@tonic-gate bp = bp->b_cont;
17610Sstevel@tonic-gate }
17620Sstevel@tonic-gate if (save_bp == NULL)
17630Sstevel@tonic-gate break;
17640Sstevel@tonic-gate n = MIN(save_bp->b_wptr - save_bp->b_rptr, len);
17650Sstevel@tonic-gate save_bp->b_wptr -= n;
17660Sstevel@tonic-gate len -= n;
17670Sstevel@tonic-gate
17680Sstevel@tonic-gate /*
17690Sstevel@tonic-gate * If this is not the first message
17700Sstevel@tonic-gate * and we have taken away everything
17710Sstevel@tonic-gate * from this message, remove it
17720Sstevel@tonic-gate */
17730Sstevel@tonic-gate
17740Sstevel@tonic-gate if ((save_bp != mp) &&
17756707Sbrutus (save_bp->b_wptr == save_bp->b_rptr)) {
17760Sstevel@tonic-gate bcont = save_bp->b_cont;
17770Sstevel@tonic-gate freeb(save_bp);
17780Sstevel@tonic-gate prev_bp->b_cont = bcont;
17790Sstevel@tonic-gate }
17800Sstevel@tonic-gate }
17810Sstevel@tonic-gate }
17820Sstevel@tonic-gate return (1);
17830Sstevel@tonic-gate }
17840Sstevel@tonic-gate
17850Sstevel@tonic-gate /*
17860Sstevel@tonic-gate * get number of data bytes in message
17870Sstevel@tonic-gate */
17880Sstevel@tonic-gate size_t
msgdsize(mblk_t * bp)17890Sstevel@tonic-gate msgdsize(mblk_t *bp)
17900Sstevel@tonic-gate {
17910Sstevel@tonic-gate size_t count = 0;
17920Sstevel@tonic-gate
17930Sstevel@tonic-gate for (; bp; bp = bp->b_cont)
17940Sstevel@tonic-gate if (bp->b_datap->db_type == M_DATA) {
17950Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr);
17960Sstevel@tonic-gate count += bp->b_wptr - bp->b_rptr;
17970Sstevel@tonic-gate }
17980Sstevel@tonic-gate return (count);
17990Sstevel@tonic-gate }
18000Sstevel@tonic-gate
18010Sstevel@tonic-gate /*
18020Sstevel@tonic-gate * Get a message off head of queue
18030Sstevel@tonic-gate *
18040Sstevel@tonic-gate * If queue has no buffers then mark queue
18050Sstevel@tonic-gate * with QWANTR. (queue wants to be read by
18060Sstevel@tonic-gate * someone when data becomes available)
18070Sstevel@tonic-gate *
18080Sstevel@tonic-gate * If there is something to take off then do so.
18090Sstevel@tonic-gate * If queue falls below hi water mark turn off QFULL
18100Sstevel@tonic-gate * flag. Decrement weighted count of queue.
18110Sstevel@tonic-gate * Also turn off QWANTR because queue is being read.
18120Sstevel@tonic-gate *
18130Sstevel@tonic-gate * The queue count is maintained on a per-band basis.
18140Sstevel@tonic-gate * Priority band 0 (normal messages) uses q_count,
18150Sstevel@tonic-gate * q_lowat, etc. Non-zero priority bands use the
18160Sstevel@tonic-gate * fields in their respective qband structures
18170Sstevel@tonic-gate * (qb_count, qb_lowat, etc.) All messages appear
18180Sstevel@tonic-gate * on the same list, linked via their b_next pointers.
18190Sstevel@tonic-gate * q_first is the head of the list. q_count does
18200Sstevel@tonic-gate * not reflect the size of all the messages on the
18210Sstevel@tonic-gate * queue. It only reflects those messages in the
18220Sstevel@tonic-gate * normal band of flow. The one exception to this
18230Sstevel@tonic-gate * deals with high priority messages. They are in
18240Sstevel@tonic-gate * their own conceptual "band", but are accounted
18250Sstevel@tonic-gate * against q_count.
18260Sstevel@tonic-gate *
18270Sstevel@tonic-gate * If queue count is below the lo water mark and QWANTW
18280Sstevel@tonic-gate * is set, enable the closest backq which has a service
18290Sstevel@tonic-gate * procedure and turn off the QWANTW flag.
18300Sstevel@tonic-gate *
18310Sstevel@tonic-gate * getq could be built on top of rmvq, but isn't because
18320Sstevel@tonic-gate * of performance considerations.
18330Sstevel@tonic-gate *
18340Sstevel@tonic-gate * A note on the use of q_count and q_mblkcnt:
18350Sstevel@tonic-gate * q_count is the traditional byte count for messages that
18360Sstevel@tonic-gate * have been put on a queue. Documentation tells us that
18370Sstevel@tonic-gate * we shouldn't rely on that count, but some drivers/modules
18380Sstevel@tonic-gate * do. What was needed, however, is a mechanism to prevent
18390Sstevel@tonic-gate * runaway streams from consuming all of the resources,
18400Sstevel@tonic-gate * and particularly be able to flow control zero-length
18410Sstevel@tonic-gate * messages. q_mblkcnt is used for this purpose. It
18420Sstevel@tonic-gate * counts the number of mblk's that are being put on
18430Sstevel@tonic-gate * the queue. The intention here, is that each mblk should
18440Sstevel@tonic-gate * contain one byte of data and, for the purpose of
18450Sstevel@tonic-gate * flow-control, logically does. A queue will become
18460Sstevel@tonic-gate * full when EITHER of these values (q_count and q_mblkcnt)
18470Sstevel@tonic-gate * reach the highwater mark. It will clear when BOTH
18480Sstevel@tonic-gate * of them drop below the highwater mark. And it will
18490Sstevel@tonic-gate * backenable when BOTH of them drop below the lowwater
18500Sstevel@tonic-gate * mark.
18510Sstevel@tonic-gate * With this algorithm, a driver/module might be able
18520Sstevel@tonic-gate * to find a reasonably accurate q_count, and the
18530Sstevel@tonic-gate * framework can still try and limit resource usage.
18540Sstevel@tonic-gate */
18550Sstevel@tonic-gate mblk_t *
getq(queue_t * q)18560Sstevel@tonic-gate getq(queue_t *q)
18570Sstevel@tonic-gate {
18580Sstevel@tonic-gate mblk_t *bp;
1859235Smicheng uchar_t band = 0;
18600Sstevel@tonic-gate
18616769Sja97890 bp = getq_noenab(q, 0);
18620Sstevel@tonic-gate if (bp != NULL)
18630Sstevel@tonic-gate band = bp->b_band;
18640Sstevel@tonic-gate
18650Sstevel@tonic-gate /*
18660Sstevel@tonic-gate * Inlined from qbackenable().
18670Sstevel@tonic-gate * Quick check without holding the lock.
18680Sstevel@tonic-gate */
18690Sstevel@tonic-gate if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
18700Sstevel@tonic-gate return (bp);
18710Sstevel@tonic-gate
18720Sstevel@tonic-gate qbackenable(q, band);
18730Sstevel@tonic-gate return (bp);
18740Sstevel@tonic-gate }
18750Sstevel@tonic-gate
18760Sstevel@tonic-gate /*
1877741Smasputra * Calculate number of data bytes in a single data message block taking
1878741Smasputra * multidata messages into account.
1879741Smasputra */
1880741Smasputra
1881741Smasputra #define ADD_MBLK_SIZE(mp, size) \
1882741Smasputra if (DB_TYPE(mp) != M_MULTIDATA) { \
1883741Smasputra (size) += MBLKL(mp); \
1884741Smasputra } else { \
1885741Smasputra uint_t pinuse; \
1886741Smasputra \
1887741Smasputra mmd_getsize(mmd_getmultidata(mp), NULL, &pinuse); \
1888741Smasputra (size) += pinuse; \
1889741Smasputra }
1890741Smasputra
1891741Smasputra /*
18926769Sja97890 * Returns the number of bytes in a message (a message is defined as a
18936769Sja97890 * chain of mblks linked by b_cont). If a non-NULL mblkcnt is supplied we
18946769Sja97890 * also return the number of distinct mblks in the message.
18956769Sja97890 */
18966769Sja97890 int
mp_cont_len(mblk_t * bp,int * mblkcnt)18976769Sja97890 mp_cont_len(mblk_t *bp, int *mblkcnt)
18986769Sja97890 {
18996769Sja97890 mblk_t *mp;
19006769Sja97890 int mblks = 0;
19016769Sja97890 int bytes = 0;
19026769Sja97890
19036769Sja97890 for (mp = bp; mp != NULL; mp = mp->b_cont) {
19046769Sja97890 ADD_MBLK_SIZE(mp, bytes);
19056769Sja97890 mblks++;
19066769Sja97890 }
19076769Sja97890
19086769Sja97890 if (mblkcnt != NULL)
19096769Sja97890 *mblkcnt = mblks;
19106769Sja97890
19116769Sja97890 return (bytes);
19126769Sja97890 }
19136769Sja97890
19146769Sja97890 /*
19150Sstevel@tonic-gate * Like getq() but does not backenable. This is used by the stream
19160Sstevel@tonic-gate * head when a putback() is likely. The caller must call qbackenable()
19170Sstevel@tonic-gate * after it is done with accessing the queue.
19186769Sja97890 * The rbytes arguments to getq_noneab() allows callers to specify a
19196769Sja97890 * the maximum number of bytes to return. If the current amount on the
19206769Sja97890 * queue is less than this then the entire message will be returned.
19216769Sja97890 * A value of 0 returns the entire message and is equivalent to the old
19226769Sja97890 * default behaviour prior to the addition of the rbytes argument.
19230Sstevel@tonic-gate */
19240Sstevel@tonic-gate mblk_t *
getq_noenab(queue_t * q,ssize_t rbytes)19256769Sja97890 getq_noenab(queue_t *q, ssize_t rbytes)
19260Sstevel@tonic-gate {
19276769Sja97890 mblk_t *bp, *mp1;
19286769Sja97890 mblk_t *mp2 = NULL;
19290Sstevel@tonic-gate qband_t *qbp;
19300Sstevel@tonic-gate kthread_id_t freezer;
19310Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0;
19320Sstevel@tonic-gate
19330Sstevel@tonic-gate /* freezestr should allow its caller to call getq/putq */
19340Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer;
19350Sstevel@tonic-gate if (freezer == curthread) {
19360Sstevel@tonic-gate ASSERT(frozenstr(q));
19370Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
19380Sstevel@tonic-gate } else
19390Sstevel@tonic-gate mutex_enter(QLOCK(q));
19400Sstevel@tonic-gate
19410Sstevel@tonic-gate if ((bp = q->q_first) == 0) {
19420Sstevel@tonic-gate q->q_flag |= QWANTR;
19430Sstevel@tonic-gate } else {
19446769Sja97890 /*
19456769Sja97890 * If the caller supplied a byte threshold and there is
19466769Sja97890 * more than this amount on the queue then break up the
19476769Sja97890 * the message appropriately. We can only safely do
19486769Sja97890 * this for M_DATA messages.
19496769Sja97890 */
19506769Sja97890 if ((DB_TYPE(bp) == M_DATA) && (rbytes > 0) &&
19516769Sja97890 (q->q_count > rbytes)) {
19526769Sja97890 /*
19536769Sja97890 * Inline version of mp_cont_len() which terminates
19546769Sja97890 * when we meet or exceed rbytes.
19556769Sja97890 */
19566769Sja97890 for (mp1 = bp; mp1 != NULL; mp1 = mp1->b_cont) {
19576769Sja97890 mblkcnt++;
19586769Sja97890 ADD_MBLK_SIZE(mp1, bytecnt);
19596769Sja97890 if (bytecnt >= rbytes)
19606769Sja97890 break;
19616769Sja97890 }
19626769Sja97890 /*
19636769Sja97890 * We need to account for the following scenarios:
19646769Sja97890 *
19656769Sja97890 * 1) Too much data in the first message:
19666769Sja97890 * mp1 will be the mblk which puts us over our
19676769Sja97890 * byte limit.
19686769Sja97890 * 2) Not enough data in the first message:
19696769Sja97890 * mp1 will be NULL.
19706769Sja97890 * 3) Exactly the right amount of data contained within
19716769Sja97890 * whole mblks:
19726769Sja97890 * mp1->b_cont will be where we break the message.
19736769Sja97890 */
19746769Sja97890 if (bytecnt > rbytes) {
19756769Sja97890 /*
19766769Sja97890 * Dup/copy mp1 and put what we don't need
19776769Sja97890 * back onto the queue. Adjust the read/write
19786769Sja97890 * and continuation pointers appropriately
19796769Sja97890 * and decrement the current mblk count to
19806769Sja97890 * reflect we are putting an mblk back onto
19816769Sja97890 * the queue.
19826769Sja97890 * When adjusting the message pointers, it's
19836769Sja97890 * OK to use the existing bytecnt and the
19846769Sja97890 * requested amount (rbytes) to calculate the
19856769Sja97890 * the new write offset (b_wptr) of what we
19866769Sja97890 * are taking. However, we cannot use these
19876769Sja97890 * values when calculating the read offset of
19886769Sja97890 * the mblk we are putting back on the queue.
19896769Sja97890 * This is because the begining (b_rptr) of the
19906769Sja97890 * mblk represents some arbitrary point within
19916769Sja97890 * the message.
19926769Sja97890 * It's simplest to do this by advancing b_rptr
19936769Sja97890 * by the new length of mp1 as we don't have to
19946769Sja97890 * remember any intermediate state.
19956769Sja97890 */
19966769Sja97890 ASSERT(mp1 != NULL);
19976769Sja97890 mblkcnt--;
19986769Sja97890 if ((mp2 = dupb(mp1)) == NULL &&
19996769Sja97890 (mp2 = copyb(mp1)) == NULL) {
20006769Sja97890 bytecnt = mblkcnt = 0;
20016769Sja97890 goto dup_failed;
20026769Sja97890 }
20036769Sja97890 mp2->b_cont = mp1->b_cont;
20046769Sja97890 mp1->b_wptr -= bytecnt - rbytes;
20056769Sja97890 mp2->b_rptr += mp1->b_wptr - mp1->b_rptr;
20066769Sja97890 mp1->b_cont = NULL;
20076769Sja97890 bytecnt = rbytes;
20086769Sja97890 } else {
20096769Sja97890 /*
20106769Sja97890 * Either there is not enough data in the first
20116769Sja97890 * message or there is no excess data to deal
20126769Sja97890 * with. If mp1 is NULL, we are taking the
20136769Sja97890 * whole message. No need to do anything.
20146769Sja97890 * Otherwise we assign mp1->b_cont to mp2 as
20156769Sja97890 * we will be putting this back onto the head of
20166769Sja97890 * the queue.
20176769Sja97890 */
20186769Sja97890 if (mp1 != NULL) {
20196769Sja97890 mp2 = mp1->b_cont;
20206769Sja97890 mp1->b_cont = NULL;
20216769Sja97890 }
20226769Sja97890 }
20236769Sja97890 /*
20246769Sja97890 * If mp2 is not NULL then we have part of the message
20256769Sja97890 * to put back onto the queue.
20266769Sja97890 */
20276769Sja97890 if (mp2 != NULL) {
20286769Sja97890 if ((mp2->b_next = bp->b_next) == NULL)
20296769Sja97890 q->q_last = mp2;
20306769Sja97890 else
20316769Sja97890 bp->b_next->b_prev = mp2;
20326769Sja97890 q->q_first = mp2;
20336769Sja97890 } else {
20346769Sja97890 if ((q->q_first = bp->b_next) == NULL)
20356769Sja97890 q->q_last = NULL;
20366769Sja97890 else
20376769Sja97890 q->q_first->b_prev = NULL;
20386769Sja97890 }
20396769Sja97890 } else {
20406769Sja97890 /*
20416769Sja97890 * Either no byte threshold was supplied, there is
20426769Sja97890 * not enough on the queue or we failed to
20436769Sja97890 * duplicate/copy a data block. In these cases we
20446769Sja97890 * just take the entire first message.
20456769Sja97890 */
20466769Sja97890 dup_failed:
20476769Sja97890 bytecnt = mp_cont_len(bp, &mblkcnt);
20486769Sja97890 if ((q->q_first = bp->b_next) == NULL)
20496769Sja97890 q->q_last = NULL;
20506769Sja97890 else
20516769Sja97890 q->q_first->b_prev = NULL;
20520Sstevel@tonic-gate }
20530Sstevel@tonic-gate if (bp->b_band == 0) {
20540Sstevel@tonic-gate q->q_count -= bytecnt;
20550Sstevel@tonic-gate q->q_mblkcnt -= mblkcnt;
20565360Srk129064 if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) &&
20575360Srk129064 (q->q_mblkcnt < q->q_hiwat))) {
20580Sstevel@tonic-gate q->q_flag &= ~QFULL;
20590Sstevel@tonic-gate }
20600Sstevel@tonic-gate } else {
20610Sstevel@tonic-gate int i;
20620Sstevel@tonic-gate
20630Sstevel@tonic-gate ASSERT(bp->b_band <= q->q_nband);
20640Sstevel@tonic-gate ASSERT(q->q_bandp != NULL);
20650Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
20660Sstevel@tonic-gate qbp = q->q_bandp;
20670Sstevel@tonic-gate i = bp->b_band;
20680Sstevel@tonic-gate while (--i > 0)
20690Sstevel@tonic-gate qbp = qbp->qb_next;
20700Sstevel@tonic-gate if (qbp->qb_first == qbp->qb_last) {
20710Sstevel@tonic-gate qbp->qb_first = NULL;
20720Sstevel@tonic-gate qbp->qb_last = NULL;
20730Sstevel@tonic-gate } else {
20740Sstevel@tonic-gate qbp->qb_first = bp->b_next;
20750Sstevel@tonic-gate }
20760Sstevel@tonic-gate qbp->qb_count -= bytecnt;
20770Sstevel@tonic-gate qbp->qb_mblkcnt -= mblkcnt;
20785360Srk129064 if (qbp->qb_mblkcnt == 0 ||
20795360Srk129064 ((qbp->qb_count < qbp->qb_hiwat) &&
20805360Srk129064 (qbp->qb_mblkcnt < qbp->qb_hiwat))) {
20810Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL;
20820Sstevel@tonic-gate }
20830Sstevel@tonic-gate }
20840Sstevel@tonic-gate q->q_flag &= ~QWANTR;
20850Sstevel@tonic-gate bp->b_next = NULL;
20860Sstevel@tonic-gate bp->b_prev = NULL;
20870Sstevel@tonic-gate }
20880Sstevel@tonic-gate if (freezer != curthread)
20890Sstevel@tonic-gate mutex_exit(QLOCK(q));
20900Sstevel@tonic-gate
20910Sstevel@tonic-gate STR_FTEVENT_MSG(bp, q, FTEV_GETQ, NULL);
20920Sstevel@tonic-gate
20930Sstevel@tonic-gate return (bp);
20940Sstevel@tonic-gate }
20950Sstevel@tonic-gate
20960Sstevel@tonic-gate /*
20970Sstevel@tonic-gate * Determine if a backenable is needed after removing a message in the
20980Sstevel@tonic-gate * specified band.
20990Sstevel@tonic-gate * NOTE: This routine assumes that something like getq_noenab() has been
21000Sstevel@tonic-gate * already called.
21010Sstevel@tonic-gate *
21020Sstevel@tonic-gate * For the read side it is ok to hold sd_lock across calling this (and the
21030Sstevel@tonic-gate * stream head often does).
21040Sstevel@tonic-gate * But for the write side strwakeq might be invoked and it acquires sd_lock.
21050Sstevel@tonic-gate */
21060Sstevel@tonic-gate void
qbackenable(queue_t * q,uchar_t band)2107235Smicheng qbackenable(queue_t *q, uchar_t band)
21080Sstevel@tonic-gate {
21090Sstevel@tonic-gate int backenab = 0;
21100Sstevel@tonic-gate qband_t *qbp;
21110Sstevel@tonic-gate kthread_id_t freezer;
21120Sstevel@tonic-gate
21130Sstevel@tonic-gate ASSERT(q);
21140Sstevel@tonic-gate ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock));
21150Sstevel@tonic-gate
21160Sstevel@tonic-gate /*
21170Sstevel@tonic-gate * Quick check without holding the lock.
21180Sstevel@tonic-gate * OK since after getq() has lowered the q_count these flags
21190Sstevel@tonic-gate * would not change unless either the qbackenable() is done by
21200Sstevel@tonic-gate * another thread (which is ok) or the queue has gotten QFULL
21210Sstevel@tonic-gate * in which case another backenable will take place when the queue
21220Sstevel@tonic-gate * drops below q_lowat.
21230Sstevel@tonic-gate */
21240Sstevel@tonic-gate if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
21250Sstevel@tonic-gate return;
21260Sstevel@tonic-gate
21270Sstevel@tonic-gate /* freezestr should allow its caller to call getq/putq */
21280Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer;
21290Sstevel@tonic-gate if (freezer == curthread) {
21300Sstevel@tonic-gate ASSERT(frozenstr(q));
21310Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
21320Sstevel@tonic-gate } else
21330Sstevel@tonic-gate mutex_enter(QLOCK(q));
21340Sstevel@tonic-gate
21350Sstevel@tonic-gate if (band == 0) {
21360Sstevel@tonic-gate if (q->q_lowat == 0 || (q->q_count < q->q_lowat &&
21370Sstevel@tonic-gate q->q_mblkcnt < q->q_lowat)) {
21380Sstevel@tonic-gate backenab = q->q_flag & (QWANTW|QWANTWSYNC);
21390Sstevel@tonic-gate }
21400Sstevel@tonic-gate } else {
21410Sstevel@tonic-gate int i;
21420Sstevel@tonic-gate
21430Sstevel@tonic-gate ASSERT((unsigned)band <= q->q_nband);
21440Sstevel@tonic-gate ASSERT(q->q_bandp != NULL);
21450Sstevel@tonic-gate
21460Sstevel@tonic-gate qbp = q->q_bandp;
21470Sstevel@tonic-gate i = band;
21480Sstevel@tonic-gate while (--i > 0)
21490Sstevel@tonic-gate qbp = qbp->qb_next;
21500Sstevel@tonic-gate
21510Sstevel@tonic-gate if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat &&
21520Sstevel@tonic-gate qbp->qb_mblkcnt < qbp->qb_lowat)) {
21530Sstevel@tonic-gate backenab = qbp->qb_flag & QB_WANTW;
21540Sstevel@tonic-gate }
21550Sstevel@tonic-gate }
21560Sstevel@tonic-gate
21570Sstevel@tonic-gate if (backenab == 0) {
21580Sstevel@tonic-gate if (freezer != curthread)
21590Sstevel@tonic-gate mutex_exit(QLOCK(q));
21600Sstevel@tonic-gate return;
21610Sstevel@tonic-gate }
21620Sstevel@tonic-gate
21630Sstevel@tonic-gate /* Have to drop the lock across strwakeq and backenable */
21640Sstevel@tonic-gate if (backenab & QWANTWSYNC)
21650Sstevel@tonic-gate q->q_flag &= ~QWANTWSYNC;
21660Sstevel@tonic-gate if (backenab & (QWANTW|QB_WANTW)) {
21670Sstevel@tonic-gate if (band != 0)
21680Sstevel@tonic-gate qbp->qb_flag &= ~QB_WANTW;
21690Sstevel@tonic-gate else {
21700Sstevel@tonic-gate q->q_flag &= ~QWANTW;
21710Sstevel@tonic-gate }
21720Sstevel@tonic-gate }
21730Sstevel@tonic-gate
21740Sstevel@tonic-gate if (freezer != curthread)
21750Sstevel@tonic-gate mutex_exit(QLOCK(q));
21760Sstevel@tonic-gate
21770Sstevel@tonic-gate if (backenab & QWANTWSYNC)
21780Sstevel@tonic-gate strwakeq(q, QWANTWSYNC);
21790Sstevel@tonic-gate if (backenab & (QWANTW|QB_WANTW))
21800Sstevel@tonic-gate backenable(q, band);
21810Sstevel@tonic-gate }
21820Sstevel@tonic-gate
21830Sstevel@tonic-gate /*
21840Sstevel@tonic-gate * Remove a message from a queue. The queue count and other
21850Sstevel@tonic-gate * flow control parameters are adjusted and the back queue
21860Sstevel@tonic-gate * enabled if necessary.
21870Sstevel@tonic-gate *
21880Sstevel@tonic-gate * rmvq can be called with the stream frozen, but other utility functions
21890Sstevel@tonic-gate * holding QLOCK, and by streams modules without any locks/frozen.
21900Sstevel@tonic-gate */
21910Sstevel@tonic-gate void
rmvq(queue_t * q,mblk_t * mp)21920Sstevel@tonic-gate rmvq(queue_t *q, mblk_t *mp)
21930Sstevel@tonic-gate {
21940Sstevel@tonic-gate ASSERT(mp != NULL);
21950Sstevel@tonic-gate
21960Sstevel@tonic-gate rmvq_noenab(q, mp);
21970Sstevel@tonic-gate if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) {
21980Sstevel@tonic-gate /*
21990Sstevel@tonic-gate * qbackenable can handle a frozen stream but not a "random"
22000Sstevel@tonic-gate * qlock being held. Drop lock across qbackenable.
22010Sstevel@tonic-gate */
22020Sstevel@tonic-gate mutex_exit(QLOCK(q));
22030Sstevel@tonic-gate qbackenable(q, mp->b_band);
22040Sstevel@tonic-gate mutex_enter(QLOCK(q));
22050Sstevel@tonic-gate } else {
22060Sstevel@tonic-gate qbackenable(q, mp->b_band);
22070Sstevel@tonic-gate }
22080Sstevel@tonic-gate }
22090Sstevel@tonic-gate
22100Sstevel@tonic-gate /*
22110Sstevel@tonic-gate * Like rmvq() but without any backenabling.
22120Sstevel@tonic-gate * This exists to handle SR_CONSOL_DATA in strrput().
22130Sstevel@tonic-gate */
22140Sstevel@tonic-gate void
rmvq_noenab(queue_t * q,mblk_t * mp)22150Sstevel@tonic-gate rmvq_noenab(queue_t *q, mblk_t *mp)
22160Sstevel@tonic-gate {
22170Sstevel@tonic-gate int i;
22180Sstevel@tonic-gate qband_t *qbp = NULL;
22190Sstevel@tonic-gate kthread_id_t freezer;
22200Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0;
22210Sstevel@tonic-gate
22220Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer;
22230Sstevel@tonic-gate if (freezer == curthread) {
22240Sstevel@tonic-gate ASSERT(frozenstr(q));
22250Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
22260Sstevel@tonic-gate } else if (MUTEX_HELD(QLOCK(q))) {
22270Sstevel@tonic-gate /* Don't drop lock on exit */
22280Sstevel@tonic-gate freezer = curthread;
22290Sstevel@tonic-gate } else
22300Sstevel@tonic-gate mutex_enter(QLOCK(q));
22310Sstevel@tonic-gate
22320Sstevel@tonic-gate ASSERT(mp->b_band <= q->q_nband);
22330Sstevel@tonic-gate if (mp->b_band != 0) { /* Adjust band pointers */
22340Sstevel@tonic-gate ASSERT(q->q_bandp != NULL);
22350Sstevel@tonic-gate qbp = q->q_bandp;
22360Sstevel@tonic-gate i = mp->b_band;
22370Sstevel@tonic-gate while (--i > 0)
22380Sstevel@tonic-gate qbp = qbp->qb_next;
22390Sstevel@tonic-gate if (mp == qbp->qb_first) {
22400Sstevel@tonic-gate if (mp->b_next && mp->b_band == mp->b_next->b_band)
22410Sstevel@tonic-gate qbp->qb_first = mp->b_next;
22420Sstevel@tonic-gate else
22430Sstevel@tonic-gate qbp->qb_first = NULL;
22440Sstevel@tonic-gate }
22450Sstevel@tonic-gate if (mp == qbp->qb_last) {
22460Sstevel@tonic-gate if (mp->b_prev && mp->b_band == mp->b_prev->b_band)
22470Sstevel@tonic-gate qbp->qb_last = mp->b_prev;
22480Sstevel@tonic-gate else
22490Sstevel@tonic-gate qbp->qb_last = NULL;
22500Sstevel@tonic-gate }
22510Sstevel@tonic-gate }
22520Sstevel@tonic-gate
22530Sstevel@tonic-gate /*
22540Sstevel@tonic-gate * Remove the message from the list.
22550Sstevel@tonic-gate */
22560Sstevel@tonic-gate if (mp->b_prev)
22570Sstevel@tonic-gate mp->b_prev->b_next = mp->b_next;
22580Sstevel@tonic-gate else
22590Sstevel@tonic-gate q->q_first = mp->b_next;
22600Sstevel@tonic-gate if (mp->b_next)
22610Sstevel@tonic-gate mp->b_next->b_prev = mp->b_prev;
22620Sstevel@tonic-gate else
22630Sstevel@tonic-gate q->q_last = mp->b_prev;
22640Sstevel@tonic-gate mp->b_next = NULL;
22650Sstevel@tonic-gate mp->b_prev = NULL;
22660Sstevel@tonic-gate
22670Sstevel@tonic-gate /* Get the size of the message for q_count accounting */
22686769Sja97890 bytecnt = mp_cont_len(mp, &mblkcnt);
22690Sstevel@tonic-gate
22700Sstevel@tonic-gate if (mp->b_band == 0) { /* Perform q_count accounting */
22710Sstevel@tonic-gate q->q_count -= bytecnt;
22720Sstevel@tonic-gate q->q_mblkcnt -= mblkcnt;
22735360Srk129064 if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) &&
22745360Srk129064 (q->q_mblkcnt < q->q_hiwat))) {
22750Sstevel@tonic-gate q->q_flag &= ~QFULL;
22760Sstevel@tonic-gate }
22770Sstevel@tonic-gate } else { /* Perform qb_count accounting */
22780Sstevel@tonic-gate qbp->qb_count -= bytecnt;
22790Sstevel@tonic-gate qbp->qb_mblkcnt -= mblkcnt;
22805360Srk129064 if (qbp->qb_mblkcnt == 0 || ((qbp->qb_count < qbp->qb_hiwat) &&
22815360Srk129064 (qbp->qb_mblkcnt < qbp->qb_hiwat))) {
22820Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL;
22830Sstevel@tonic-gate }
22840Sstevel@tonic-gate }
22850Sstevel@tonic-gate if (freezer != curthread)
22860Sstevel@tonic-gate mutex_exit(QLOCK(q));
22870Sstevel@tonic-gate
22880Sstevel@tonic-gate STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, NULL);
22890Sstevel@tonic-gate }
22900Sstevel@tonic-gate
22910Sstevel@tonic-gate /*
22920Sstevel@tonic-gate * Empty a queue.
22930Sstevel@tonic-gate * If flag is set, remove all messages. Otherwise, remove
22940Sstevel@tonic-gate * only non-control messages. If queue falls below its low
22950Sstevel@tonic-gate * water mark, and QWANTW is set, enable the nearest upstream
22960Sstevel@tonic-gate * service procedure.
22970Sstevel@tonic-gate *
22980Sstevel@tonic-gate * Historical note: when merging the M_FLUSH code in strrput with this
22990Sstevel@tonic-gate * code one difference was discovered. flushq did not have a check
23000Sstevel@tonic-gate * for q_lowat == 0 in the backenabling test.
23010Sstevel@tonic-gate *
23020Sstevel@tonic-gate * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed
23030Sstevel@tonic-gate * if one exists on the queue.
23040Sstevel@tonic-gate */
23050Sstevel@tonic-gate void
flushq_common(queue_t * q,int flag,int pcproto_flag)23060Sstevel@tonic-gate flushq_common(queue_t *q, int flag, int pcproto_flag)
23070Sstevel@tonic-gate {
23080Sstevel@tonic-gate mblk_t *mp, *nmp;
23090Sstevel@tonic-gate qband_t *qbp;
23100Sstevel@tonic-gate int backenab = 0;
23110Sstevel@tonic-gate unsigned char bpri;
23120Sstevel@tonic-gate unsigned char qbf[NBAND]; /* band flushing backenable flags */
23130Sstevel@tonic-gate
23140Sstevel@tonic-gate if (q->q_first == NULL)
23150Sstevel@tonic-gate return;
23160Sstevel@tonic-gate
23170Sstevel@tonic-gate mutex_enter(QLOCK(q));
23180Sstevel@tonic-gate mp = q->q_first;
23190Sstevel@tonic-gate q->q_first = NULL;
23200Sstevel@tonic-gate q->q_last = NULL;
23210Sstevel@tonic-gate q->q_count = 0;
23220Sstevel@tonic-gate q->q_mblkcnt = 0;
23230Sstevel@tonic-gate for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
23240Sstevel@tonic-gate qbp->qb_first = NULL;
23250Sstevel@tonic-gate qbp->qb_last = NULL;
23260Sstevel@tonic-gate qbp->qb_count = 0;
23270Sstevel@tonic-gate qbp->qb_mblkcnt = 0;
23280Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL;
23290Sstevel@tonic-gate }
23300Sstevel@tonic-gate q->q_flag &= ~QFULL;
23310Sstevel@tonic-gate mutex_exit(QLOCK(q));
23320Sstevel@tonic-gate while (mp) {
23330Sstevel@tonic-gate nmp = mp->b_next;
23340Sstevel@tonic-gate mp->b_next = mp->b_prev = NULL;
23350Sstevel@tonic-gate
23360Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, NULL);
23370Sstevel@tonic-gate
23380Sstevel@tonic-gate if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO))
23390Sstevel@tonic-gate (void) putq(q, mp);
23400Sstevel@tonic-gate else if (flag || datamsg(mp->b_datap->db_type))
23410Sstevel@tonic-gate freemsg(mp);
23420Sstevel@tonic-gate else
23430Sstevel@tonic-gate (void) putq(q, mp);
23440Sstevel@tonic-gate mp = nmp;
23450Sstevel@tonic-gate }
23460Sstevel@tonic-gate bpri = 1;
23470Sstevel@tonic-gate mutex_enter(QLOCK(q));
23480Sstevel@tonic-gate for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
23490Sstevel@tonic-gate if ((qbp->qb_flag & QB_WANTW) &&
23500Sstevel@tonic-gate (((qbp->qb_count < qbp->qb_lowat) &&
23510Sstevel@tonic-gate (qbp->qb_mblkcnt < qbp->qb_lowat)) ||
23520Sstevel@tonic-gate qbp->qb_lowat == 0)) {
23530Sstevel@tonic-gate qbp->qb_flag &= ~QB_WANTW;
23540Sstevel@tonic-gate backenab = 1;
23550Sstevel@tonic-gate qbf[bpri] = 1;
23560Sstevel@tonic-gate } else
23570Sstevel@tonic-gate qbf[bpri] = 0;
23580Sstevel@tonic-gate bpri++;
23590Sstevel@tonic-gate }
23600Sstevel@tonic-gate ASSERT(bpri == (unsigned char)(q->q_nband + 1));
23610Sstevel@tonic-gate if ((q->q_flag & QWANTW) &&
23620Sstevel@tonic-gate (((q->q_count < q->q_lowat) &&
23630Sstevel@tonic-gate (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
23640Sstevel@tonic-gate q->q_flag &= ~QWANTW;
23650Sstevel@tonic-gate backenab = 1;
23660Sstevel@tonic-gate qbf[0] = 1;
23670Sstevel@tonic-gate } else
23680Sstevel@tonic-gate qbf[0] = 0;
23690Sstevel@tonic-gate
23700Sstevel@tonic-gate /*
23710Sstevel@tonic-gate * If any band can now be written to, and there is a writer
23720Sstevel@tonic-gate * for that band, then backenable the closest service procedure.
23730Sstevel@tonic-gate */
23740Sstevel@tonic-gate if (backenab) {
23750Sstevel@tonic-gate mutex_exit(QLOCK(q));
23760Sstevel@tonic-gate for (bpri = q->q_nband; bpri != 0; bpri--)
23770Sstevel@tonic-gate if (qbf[bpri])
2378235Smicheng backenable(q, bpri);
23790Sstevel@tonic-gate if (qbf[0])
23800Sstevel@tonic-gate backenable(q, 0);
23810Sstevel@tonic-gate } else
23820Sstevel@tonic-gate mutex_exit(QLOCK(q));
23830Sstevel@tonic-gate }
23840Sstevel@tonic-gate
23850Sstevel@tonic-gate /*
23860Sstevel@tonic-gate * The real flushing takes place in flushq_common. This is done so that
23870Sstevel@tonic-gate * a flag which specifies whether or not M_PCPROTO messages should be flushed
23880Sstevel@tonic-gate * or not. Currently the only place that uses this flag is the stream head.
23890Sstevel@tonic-gate */
23900Sstevel@tonic-gate void
flushq(queue_t * q,int flag)23910Sstevel@tonic-gate flushq(queue_t *q, int flag)
23920Sstevel@tonic-gate {
23930Sstevel@tonic-gate flushq_common(q, flag, 0);
23940Sstevel@tonic-gate }
23950Sstevel@tonic-gate
23960Sstevel@tonic-gate /*
23970Sstevel@tonic-gate * Flush the queue of messages of the given priority band.
23980Sstevel@tonic-gate * There is some duplication of code between flushq and flushband.
23990Sstevel@tonic-gate * This is because we want to optimize the code as much as possible.
24000Sstevel@tonic-gate * The assumption is that there will be more messages in the normal
24010Sstevel@tonic-gate * (priority 0) band than in any other.
24020Sstevel@tonic-gate *
24030Sstevel@tonic-gate * Historical note: when merging the M_FLUSH code in strrput with this
24040Sstevel@tonic-gate * code one difference was discovered. flushband had an extra check for
24050Sstevel@tonic-gate * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0
24060Sstevel@tonic-gate * case. That check does not match the man page for flushband and was not
24070Sstevel@tonic-gate * in the strrput flush code hence it was removed.
24080Sstevel@tonic-gate */
24090Sstevel@tonic-gate void
flushband(queue_t * q,unsigned char pri,int flag)24100Sstevel@tonic-gate flushband(queue_t *q, unsigned char pri, int flag)
24110Sstevel@tonic-gate {
24120Sstevel@tonic-gate mblk_t *mp;
24130Sstevel@tonic-gate mblk_t *nmp;
24140Sstevel@tonic-gate mblk_t *last;
24150Sstevel@tonic-gate qband_t *qbp;
24160Sstevel@tonic-gate int band;
24170Sstevel@tonic-gate
24180Sstevel@tonic-gate ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL));
24190Sstevel@tonic-gate if (pri > q->q_nband) {
24200Sstevel@tonic-gate return;
24210Sstevel@tonic-gate }
24220Sstevel@tonic-gate mutex_enter(QLOCK(q));
24230Sstevel@tonic-gate if (pri == 0) {
24240Sstevel@tonic-gate mp = q->q_first;
24250Sstevel@tonic-gate q->q_first = NULL;
24260Sstevel@tonic-gate q->q_last = NULL;
24270Sstevel@tonic-gate q->q_count = 0;
24280Sstevel@tonic-gate q->q_mblkcnt = 0;
24290Sstevel@tonic-gate for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
24300Sstevel@tonic-gate qbp->qb_first = NULL;
24310Sstevel@tonic-gate qbp->qb_last = NULL;
24320Sstevel@tonic-gate qbp->qb_count = 0;
24330Sstevel@tonic-gate qbp->qb_mblkcnt = 0;
24340Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL;
24350Sstevel@tonic-gate }
24360Sstevel@tonic-gate q->q_flag &= ~QFULL;
24370Sstevel@tonic-gate mutex_exit(QLOCK(q));
24380Sstevel@tonic-gate while (mp) {
24390Sstevel@tonic-gate nmp = mp->b_next;
24400Sstevel@tonic-gate mp->b_next = mp->b_prev = NULL;
24410Sstevel@tonic-gate if ((mp->b_band == 0) &&
24426707Sbrutus ((flag == FLUSHALL) ||
24436707Sbrutus datamsg(mp->b_datap->db_type)))
24440Sstevel@tonic-gate freemsg(mp);
24450Sstevel@tonic-gate else
24460Sstevel@tonic-gate (void) putq(q, mp);
24470Sstevel@tonic-gate mp = nmp;
24480Sstevel@tonic-gate }
24490Sstevel@tonic-gate mutex_enter(QLOCK(q));
24500Sstevel@tonic-gate if ((q->q_flag & QWANTW) &&
24510Sstevel@tonic-gate (((q->q_count < q->q_lowat) &&
24520Sstevel@tonic-gate (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
24530Sstevel@tonic-gate q->q_flag &= ~QWANTW;
24540Sstevel@tonic-gate mutex_exit(QLOCK(q));
24550Sstevel@tonic-gate
2456235Smicheng backenable(q, pri);
24570Sstevel@tonic-gate } else
24580Sstevel@tonic-gate mutex_exit(QLOCK(q));
24590Sstevel@tonic-gate } else { /* pri != 0 */
24600Sstevel@tonic-gate boolean_t flushed = B_FALSE;
24610Sstevel@tonic-gate band = pri;
24620Sstevel@tonic-gate
24630Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
24640Sstevel@tonic-gate qbp = q->q_bandp;
24650Sstevel@tonic-gate while (--band > 0)
24660Sstevel@tonic-gate qbp = qbp->qb_next;
24670Sstevel@tonic-gate mp = qbp->qb_first;
24680Sstevel@tonic-gate if (mp == NULL) {
24690Sstevel@tonic-gate mutex_exit(QLOCK(q));
24700Sstevel@tonic-gate return;
24710Sstevel@tonic-gate }
24720Sstevel@tonic-gate last = qbp->qb_last->b_next;
24730Sstevel@tonic-gate /*
24740Sstevel@tonic-gate * rmvq_noenab() and freemsg() are called for each mblk that
24750Sstevel@tonic-gate * meets the criteria. The loop is executed until the last
24760Sstevel@tonic-gate * mblk has been processed.
24770Sstevel@tonic-gate */
24780Sstevel@tonic-gate while (mp != last) {
24790Sstevel@tonic-gate ASSERT(mp->b_band == pri);
24800Sstevel@tonic-gate nmp = mp->b_next;
24810Sstevel@tonic-gate if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) {
24820Sstevel@tonic-gate rmvq_noenab(q, mp);
24830Sstevel@tonic-gate freemsg(mp);
24840Sstevel@tonic-gate flushed = B_TRUE;
24850Sstevel@tonic-gate }
24860Sstevel@tonic-gate mp = nmp;
24870Sstevel@tonic-gate }
24880Sstevel@tonic-gate mutex_exit(QLOCK(q));
24890Sstevel@tonic-gate
24900Sstevel@tonic-gate /*
24910Sstevel@tonic-gate * If any mblk(s) has been freed, we know that qbackenable()
24920Sstevel@tonic-gate * will need to be called.
24930Sstevel@tonic-gate */
24940Sstevel@tonic-gate if (flushed)
2495235Smicheng qbackenable(q, pri);
24960Sstevel@tonic-gate }
24970Sstevel@tonic-gate }
24980Sstevel@tonic-gate
24990Sstevel@tonic-gate /*
25000Sstevel@tonic-gate * Return 1 if the queue is not full. If the queue is full, return
25010Sstevel@tonic-gate * 0 (may not put message) and set QWANTW flag (caller wants to write
25020Sstevel@tonic-gate * to the queue).
25030Sstevel@tonic-gate */
25040Sstevel@tonic-gate int
canput(queue_t * q)25050Sstevel@tonic-gate canput(queue_t *q)
25060Sstevel@tonic-gate {
25070Sstevel@tonic-gate TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q);
25080Sstevel@tonic-gate
25090Sstevel@tonic-gate /* this is for loopback transports, they should not do a canput */
25100Sstevel@tonic-gate ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv));
25110Sstevel@tonic-gate
25120Sstevel@tonic-gate /* Find next forward module that has a service procedure */
25130Sstevel@tonic-gate q = q->q_nfsrv;
25140Sstevel@tonic-gate
25150Sstevel@tonic-gate if (!(q->q_flag & QFULL)) {
25160Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
25170Sstevel@tonic-gate return (1);
25180Sstevel@tonic-gate }
25190Sstevel@tonic-gate mutex_enter(QLOCK(q));
25200Sstevel@tonic-gate if (q->q_flag & QFULL) {
25210Sstevel@tonic-gate q->q_flag |= QWANTW;
25220Sstevel@tonic-gate mutex_exit(QLOCK(q));
25230Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0);
25240Sstevel@tonic-gate return (0);
25250Sstevel@tonic-gate }
25260Sstevel@tonic-gate mutex_exit(QLOCK(q));
25270Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
25280Sstevel@tonic-gate return (1);
25290Sstevel@tonic-gate }
25300Sstevel@tonic-gate
25310Sstevel@tonic-gate /*
25320Sstevel@tonic-gate * This is the new canput for use with priority bands. Return 1 if the
25330Sstevel@tonic-gate * band is not full. If the band is full, return 0 (may not put message)
25340Sstevel@tonic-gate * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to
25350Sstevel@tonic-gate * write to the queue).
25360Sstevel@tonic-gate */
25370Sstevel@tonic-gate int
bcanput(queue_t * q,unsigned char pri)25380Sstevel@tonic-gate bcanput(queue_t *q, unsigned char pri)
25390Sstevel@tonic-gate {
25400Sstevel@tonic-gate qband_t *qbp;
25410Sstevel@tonic-gate
25420Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri);
25430Sstevel@tonic-gate if (!q)
25440Sstevel@tonic-gate return (0);
25450Sstevel@tonic-gate
25460Sstevel@tonic-gate /* Find next forward module that has a service procedure */
25470Sstevel@tonic-gate q = q->q_nfsrv;
25480Sstevel@tonic-gate
25490Sstevel@tonic-gate mutex_enter(QLOCK(q));
25500Sstevel@tonic-gate if (pri == 0) {
25510Sstevel@tonic-gate if (q->q_flag & QFULL) {
25520Sstevel@tonic-gate q->q_flag |= QWANTW;
25530Sstevel@tonic-gate mutex_exit(QLOCK(q));
25540Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
25556707Sbrutus "bcanput:%p %X %d", q, pri, 0);
25560Sstevel@tonic-gate return (0);
25570Sstevel@tonic-gate }
25580Sstevel@tonic-gate } else { /* pri != 0 */
25590Sstevel@tonic-gate if (pri > q->q_nband) {
25600Sstevel@tonic-gate /*
25610Sstevel@tonic-gate * No band exists yet, so return success.
25620Sstevel@tonic-gate */
25630Sstevel@tonic-gate mutex_exit(QLOCK(q));
25640Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
25656707Sbrutus "bcanput:%p %X %d", q, pri, 1);
25660Sstevel@tonic-gate return (1);
25670Sstevel@tonic-gate }
25680Sstevel@tonic-gate qbp = q->q_bandp;
25690Sstevel@tonic-gate while (--pri)
25700Sstevel@tonic-gate qbp = qbp->qb_next;
25710Sstevel@tonic-gate if (qbp->qb_flag & QB_FULL) {
25720Sstevel@tonic-gate qbp->qb_flag |= QB_WANTW;
25730Sstevel@tonic-gate mutex_exit(QLOCK(q));
25740Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
25756707Sbrutus "bcanput:%p %X %d", q, pri, 0);
25760Sstevel@tonic-gate return (0);
25770Sstevel@tonic-gate }
25780Sstevel@tonic-gate }
25790Sstevel@tonic-gate mutex_exit(QLOCK(q));
25800Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
25816707Sbrutus "bcanput:%p %X %d", q, pri, 1);
25820Sstevel@tonic-gate return (1);
25830Sstevel@tonic-gate }
25840Sstevel@tonic-gate
25850Sstevel@tonic-gate /*
25860Sstevel@tonic-gate * Put a message on a queue.
25870Sstevel@tonic-gate *
25880Sstevel@tonic-gate * Messages are enqueued on a priority basis. The priority classes
25890Sstevel@tonic-gate * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0),
25900Sstevel@tonic-gate * and B_NORMAL (type < QPCTL && band == 0).
25910Sstevel@tonic-gate *
25920Sstevel@tonic-gate * Add appropriate weighted data block sizes to queue count.
25930Sstevel@tonic-gate * If queue hits high water mark then set QFULL flag.
25940Sstevel@tonic-gate *
25950Sstevel@tonic-gate * If QNOENAB is not set (putq is allowed to enable the queue),
25960Sstevel@tonic-gate * enable the queue only if the message is PRIORITY,
25970Sstevel@tonic-gate * or the QWANTR flag is set (indicating that the service procedure
25980Sstevel@tonic-gate * is ready to read the queue. This implies that a service
25990Sstevel@tonic-gate * procedure must NEVER put a high priority message back on its own
26000Sstevel@tonic-gate * queue, as this would result in an infinite loop (!).
26010Sstevel@tonic-gate */
26020Sstevel@tonic-gate int
putq(queue_t * q,mblk_t * bp)26030Sstevel@tonic-gate putq(queue_t *q, mblk_t *bp)
26040Sstevel@tonic-gate {
26050Sstevel@tonic-gate mblk_t *tmp;
26060Sstevel@tonic-gate qband_t *qbp = NULL;
26070Sstevel@tonic-gate int mcls = (int)queclass(bp);
26080Sstevel@tonic-gate kthread_id_t freezer;
26090Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0;
26100Sstevel@tonic-gate
26110Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer;
26120Sstevel@tonic-gate if (freezer == curthread) {
26130Sstevel@tonic-gate ASSERT(frozenstr(q));
26140Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
26150Sstevel@tonic-gate } else
26160Sstevel@tonic-gate mutex_enter(QLOCK(q));
26170Sstevel@tonic-gate
26180Sstevel@tonic-gate /*
26190Sstevel@tonic-gate * Make sanity checks and if qband structure is not yet
26200Sstevel@tonic-gate * allocated, do so.
26210Sstevel@tonic-gate */
26220Sstevel@tonic-gate if (mcls == QPCTL) {
26230Sstevel@tonic-gate if (bp->b_band != 0)
26240Sstevel@tonic-gate bp->b_band = 0; /* force to be correct */
26250Sstevel@tonic-gate } else if (bp->b_band != 0) {
26260Sstevel@tonic-gate int i;
26270Sstevel@tonic-gate qband_t **qbpp;
26280Sstevel@tonic-gate
26290Sstevel@tonic-gate if (bp->b_band > q->q_nband) {
26300Sstevel@tonic-gate
26310Sstevel@tonic-gate /*
26320Sstevel@tonic-gate * The qband structure for this priority band is
26330Sstevel@tonic-gate * not on the queue yet, so we have to allocate
26340Sstevel@tonic-gate * one on the fly. It would be wasteful to
26350Sstevel@tonic-gate * associate the qband structures with every
26360Sstevel@tonic-gate * queue when the queues are allocated. This is
26370Sstevel@tonic-gate * because most queues will only need the normal
26380Sstevel@tonic-gate * band of flow which can be described entirely
26390Sstevel@tonic-gate * by the queue itself.
26400Sstevel@tonic-gate */
26410Sstevel@tonic-gate qbpp = &q->q_bandp;
26420Sstevel@tonic-gate while (*qbpp)
26430Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next;
26440Sstevel@tonic-gate while (bp->b_band > q->q_nband) {
26450Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) {
26460Sstevel@tonic-gate if (freezer != curthread)
26470Sstevel@tonic-gate mutex_exit(QLOCK(q));
26480Sstevel@tonic-gate return (0);
26490Sstevel@tonic-gate }
26500Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat;
26510Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat;
26520Sstevel@tonic-gate q->q_nband++;
26530Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next;
26540Sstevel@tonic-gate }
26550Sstevel@tonic-gate }
26560Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
26570Sstevel@tonic-gate qbp = q->q_bandp;
26580Sstevel@tonic-gate i = bp->b_band;
26590Sstevel@tonic-gate while (--i)
26600Sstevel@tonic-gate qbp = qbp->qb_next;
26610Sstevel@tonic-gate }
26620Sstevel@tonic-gate
26630Sstevel@tonic-gate /*
26640Sstevel@tonic-gate * If queue is empty, add the message and initialize the pointers.
26650Sstevel@tonic-gate * Otherwise, adjust message pointers and queue pointers based on
26660Sstevel@tonic-gate * the type of the message and where it belongs on the queue. Some
26670Sstevel@tonic-gate * code is duplicated to minimize the number of conditionals and
26680Sstevel@tonic-gate * hopefully minimize the amount of time this routine takes.
26690Sstevel@tonic-gate */
26700Sstevel@tonic-gate if (!q->q_first) {
26710Sstevel@tonic-gate bp->b_next = NULL;
26720Sstevel@tonic-gate bp->b_prev = NULL;
26730Sstevel@tonic-gate q->q_first = bp;
26740Sstevel@tonic-gate q->q_last = bp;
26750Sstevel@tonic-gate if (qbp) {
26760Sstevel@tonic-gate qbp->qb_first = bp;
26770Sstevel@tonic-gate qbp->qb_last = bp;
26780Sstevel@tonic-gate }
26790Sstevel@tonic-gate } else if (!qbp) { /* bp->b_band == 0 */
26800Sstevel@tonic-gate
26810Sstevel@tonic-gate /*
26820Sstevel@tonic-gate * If queue class of message is less than or equal to
26830Sstevel@tonic-gate * that of the last one on the queue, tack on to the end.
26840Sstevel@tonic-gate */
26850Sstevel@tonic-gate tmp = q->q_last;
26860Sstevel@tonic-gate if (mcls <= (int)queclass(tmp)) {
26870Sstevel@tonic-gate bp->b_next = NULL;
26880Sstevel@tonic-gate bp->b_prev = tmp;
26890Sstevel@tonic-gate tmp->b_next = bp;
26900Sstevel@tonic-gate q->q_last = bp;
26910Sstevel@tonic-gate } else {
26920Sstevel@tonic-gate tmp = q->q_first;
26930Sstevel@tonic-gate while ((int)queclass(tmp) >= mcls)
26940Sstevel@tonic-gate tmp = tmp->b_next;
26950Sstevel@tonic-gate
26960Sstevel@tonic-gate /*
26970Sstevel@tonic-gate * Insert bp before tmp.
26980Sstevel@tonic-gate */
26990Sstevel@tonic-gate bp->b_next = tmp;
27000Sstevel@tonic-gate bp->b_prev = tmp->b_prev;
27010Sstevel@tonic-gate if (tmp->b_prev)
27020Sstevel@tonic-gate tmp->b_prev->b_next = bp;
27030Sstevel@tonic-gate else
27040Sstevel@tonic-gate q->q_first = bp;
27050Sstevel@tonic-gate tmp->b_prev = bp;
27060Sstevel@tonic-gate }
27070Sstevel@tonic-gate } else { /* bp->b_band != 0 */
27080Sstevel@tonic-gate if (qbp->qb_first) {
27090Sstevel@tonic-gate tmp = qbp->qb_last;
27100Sstevel@tonic-gate
27110Sstevel@tonic-gate /*
27120Sstevel@tonic-gate * Insert bp after the last message in this band.
27130Sstevel@tonic-gate */
27140Sstevel@tonic-gate bp->b_next = tmp->b_next;
27150Sstevel@tonic-gate if (tmp->b_next)
27160Sstevel@tonic-gate tmp->b_next->b_prev = bp;
27170Sstevel@tonic-gate else
27180Sstevel@tonic-gate q->q_last = bp;
27190Sstevel@tonic-gate bp->b_prev = tmp;
27200Sstevel@tonic-gate tmp->b_next = bp;
27210Sstevel@tonic-gate } else {
27220Sstevel@tonic-gate tmp = q->q_last;
27230Sstevel@tonic-gate if ((mcls < (int)queclass(tmp)) ||
27240Sstevel@tonic-gate (bp->b_band <= tmp->b_band)) {
27250Sstevel@tonic-gate
27260Sstevel@tonic-gate /*
27270Sstevel@tonic-gate * Tack bp on end of queue.
27280Sstevel@tonic-gate */
27290Sstevel@tonic-gate bp->b_next = NULL;
27300Sstevel@tonic-gate bp->b_prev = tmp;
27310Sstevel@tonic-gate tmp->b_next = bp;
27320Sstevel@tonic-gate q->q_last = bp;
27330Sstevel@tonic-gate } else {
27340Sstevel@tonic-gate tmp = q->q_first;
27350Sstevel@tonic-gate while (tmp->b_datap->db_type >= QPCTL)
27360Sstevel@tonic-gate tmp = tmp->b_next;
27370Sstevel@tonic-gate while (tmp->b_band >= bp->b_band)
27380Sstevel@tonic-gate tmp = tmp->b_next;
27390Sstevel@tonic-gate
27400Sstevel@tonic-gate /*
27410Sstevel@tonic-gate * Insert bp before tmp.
27420Sstevel@tonic-gate */
27430Sstevel@tonic-gate bp->b_next = tmp;
27440Sstevel@tonic-gate bp->b_prev = tmp->b_prev;
27450Sstevel@tonic-gate if (tmp->b_prev)
27460Sstevel@tonic-gate tmp->b_prev->b_next = bp;
27470Sstevel@tonic-gate else
27480Sstevel@tonic-gate q->q_first = bp;
27490Sstevel@tonic-gate tmp->b_prev = bp;
27500Sstevel@tonic-gate }
27510Sstevel@tonic-gate qbp->qb_first = bp;
27520Sstevel@tonic-gate }
27530Sstevel@tonic-gate qbp->qb_last = bp;
27540Sstevel@tonic-gate }
27550Sstevel@tonic-gate
27560Sstevel@tonic-gate /* Get message byte count for q_count accounting */
27576769Sja97890 bytecnt = mp_cont_len(bp, &mblkcnt);
2758741Smasputra
27590Sstevel@tonic-gate if (qbp) {
27600Sstevel@tonic-gate qbp->qb_count += bytecnt;
27610Sstevel@tonic-gate qbp->qb_mblkcnt += mblkcnt;
27620Sstevel@tonic-gate if ((qbp->qb_count >= qbp->qb_hiwat) ||
27630Sstevel@tonic-gate (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
27640Sstevel@tonic-gate qbp->qb_flag |= QB_FULL;
27650Sstevel@tonic-gate }
27660Sstevel@tonic-gate } else {
27670Sstevel@tonic-gate q->q_count += bytecnt;
27680Sstevel@tonic-gate q->q_mblkcnt += mblkcnt;
27690Sstevel@tonic-gate if ((q->q_count >= q->q_hiwat) ||
27700Sstevel@tonic-gate (q->q_mblkcnt >= q->q_hiwat)) {
27710Sstevel@tonic-gate q->q_flag |= QFULL;
27720Sstevel@tonic-gate }
27730Sstevel@tonic-gate }
27740Sstevel@tonic-gate
27750Sstevel@tonic-gate STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, NULL);
27760Sstevel@tonic-gate
27770Sstevel@tonic-gate if ((mcls > QNORM) ||
27780Sstevel@tonic-gate (canenable(q) && (q->q_flag & QWANTR || bp->b_band)))
27790Sstevel@tonic-gate qenable_locked(q);
27800Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
27810Sstevel@tonic-gate if (freezer != curthread)
27820Sstevel@tonic-gate mutex_exit(QLOCK(q));
27830Sstevel@tonic-gate
27840Sstevel@tonic-gate return (1);
27850Sstevel@tonic-gate }
27860Sstevel@tonic-gate
27870Sstevel@tonic-gate /*
27880Sstevel@tonic-gate * Put stuff back at beginning of Q according to priority order.
27890Sstevel@tonic-gate * See comment on putq above for details.
27900Sstevel@tonic-gate */
27910Sstevel@tonic-gate int
putbq(queue_t * q,mblk_t * bp)27920Sstevel@tonic-gate putbq(queue_t *q, mblk_t *bp)
27930Sstevel@tonic-gate {
27940Sstevel@tonic-gate mblk_t *tmp;
27950Sstevel@tonic-gate qband_t *qbp = NULL;
27960Sstevel@tonic-gate int mcls = (int)queclass(bp);
27970Sstevel@tonic-gate kthread_id_t freezer;
27980Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0;
27990Sstevel@tonic-gate
28000Sstevel@tonic-gate ASSERT(q && bp);
28010Sstevel@tonic-gate ASSERT(bp->b_next == NULL);
28020Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer;
28030Sstevel@tonic-gate if (freezer == curthread) {
28040Sstevel@tonic-gate ASSERT(frozenstr(q));
28050Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
28060Sstevel@tonic-gate } else
28070Sstevel@tonic-gate mutex_enter(QLOCK(q));
28080Sstevel@tonic-gate
28090Sstevel@tonic-gate /*
28100Sstevel@tonic-gate * Make sanity checks and if qband structure is not yet
28110Sstevel@tonic-gate * allocated, do so.
28120Sstevel@tonic-gate */
28130Sstevel@tonic-gate if (mcls == QPCTL) {
28140Sstevel@tonic-gate if (bp->b_band != 0)
28150Sstevel@tonic-gate bp->b_band = 0; /* force to be correct */
28160Sstevel@tonic-gate } else if (bp->b_band != 0) {
28170Sstevel@tonic-gate int i;
28180Sstevel@tonic-gate qband_t **qbpp;
28190Sstevel@tonic-gate
28200Sstevel@tonic-gate if (bp->b_band > q->q_nband) {
28210Sstevel@tonic-gate qbpp = &q->q_bandp;
28220Sstevel@tonic-gate while (*qbpp)
28230Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next;
28240Sstevel@tonic-gate while (bp->b_band > q->q_nband) {
28250Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) {
28260Sstevel@tonic-gate if (freezer != curthread)
28270Sstevel@tonic-gate mutex_exit(QLOCK(q));
28280Sstevel@tonic-gate return (0);
28290Sstevel@tonic-gate }
28300Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat;
28310Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat;
28320Sstevel@tonic-gate q->q_nband++;
28330Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next;
28340Sstevel@tonic-gate }
28350Sstevel@tonic-gate }
28360Sstevel@tonic-gate qbp = q->q_bandp;
28370Sstevel@tonic-gate i = bp->b_band;
28380Sstevel@tonic-gate while (--i)
28390Sstevel@tonic-gate qbp = qbp->qb_next;
28400Sstevel@tonic-gate }
28410Sstevel@tonic-gate
28420Sstevel@tonic-gate /*
28430Sstevel@tonic-gate * If queue is empty or if message is high priority,
28440Sstevel@tonic-gate * place on the front of the queue.
28450Sstevel@tonic-gate */
28460Sstevel@tonic-gate tmp = q->q_first;
28470Sstevel@tonic-gate if ((!tmp) || (mcls == QPCTL)) {
28480Sstevel@tonic-gate bp->b_next = tmp;
28490Sstevel@tonic-gate if (tmp)
28500Sstevel@tonic-gate tmp->b_prev = bp;
28510Sstevel@tonic-gate else
28520Sstevel@tonic-gate q->q_last = bp;
28530Sstevel@tonic-gate q->q_first = bp;
28540Sstevel@tonic-gate bp->b_prev = NULL;
28550Sstevel@tonic-gate if (qbp) {
28560Sstevel@tonic-gate qbp->qb_first = bp;
28570Sstevel@tonic-gate qbp->qb_last = bp;
28580Sstevel@tonic-gate }
28590Sstevel@tonic-gate } else if (qbp) { /* bp->b_band != 0 */
28600Sstevel@tonic-gate tmp = qbp->qb_first;
28610Sstevel@tonic-gate if (tmp) {
28620Sstevel@tonic-gate
28630Sstevel@tonic-gate /*
28640Sstevel@tonic-gate * Insert bp before the first message in this band.
28650Sstevel@tonic-gate */
28660Sstevel@tonic-gate bp->b_next = tmp;
28670Sstevel@tonic-gate bp->b_prev = tmp->b_prev;
28680Sstevel@tonic-gate if (tmp->b_prev)
28690Sstevel@tonic-gate tmp->b_prev->b_next = bp;
28700Sstevel@tonic-gate else
28710Sstevel@tonic-gate q->q_first = bp;
28720Sstevel@tonic-gate tmp->b_prev = bp;
28730Sstevel@tonic-gate } else {
28740Sstevel@tonic-gate tmp = q->q_last;
28750Sstevel@tonic-gate if ((mcls < (int)queclass(tmp)) ||
28760Sstevel@tonic-gate (bp->b_band < tmp->b_band)) {
28770Sstevel@tonic-gate
28780Sstevel@tonic-gate /*
28790Sstevel@tonic-gate * Tack bp on end of queue.
28800Sstevel@tonic-gate */
28810Sstevel@tonic-gate bp->b_next = NULL;
28820Sstevel@tonic-gate bp->b_prev = tmp;
28830Sstevel@tonic-gate tmp->b_next = bp;
28840Sstevel@tonic-gate q->q_last = bp;
28850Sstevel@tonic-gate } else {
28860Sstevel@tonic-gate tmp = q->q_first;
28870Sstevel@tonic-gate while (tmp->b_datap->db_type >= QPCTL)
28880Sstevel@tonic-gate tmp = tmp->b_next;
28890Sstevel@tonic-gate while (tmp->b_band > bp->b_band)
28900Sstevel@tonic-gate tmp = tmp->b_next;
28910Sstevel@tonic-gate
28920Sstevel@tonic-gate /*
28930Sstevel@tonic-gate * Insert bp before tmp.
28940Sstevel@tonic-gate */
28950Sstevel@tonic-gate bp->b_next = tmp;
28960Sstevel@tonic-gate bp->b_prev = tmp->b_prev;
28970Sstevel@tonic-gate if (tmp->b_prev)
28980Sstevel@tonic-gate tmp->b_prev->b_next = bp;
28990Sstevel@tonic-gate else
29000Sstevel@tonic-gate q->q_first = bp;
29010Sstevel@tonic-gate tmp->b_prev = bp;
29020Sstevel@tonic-gate }
29030Sstevel@tonic-gate qbp->qb_last = bp;
29040Sstevel@tonic-gate }
29050Sstevel@tonic-gate qbp->qb_first = bp;
29060Sstevel@tonic-gate } else { /* bp->b_band == 0 && !QPCTL */
29070Sstevel@tonic-gate
29080Sstevel@tonic-gate /*
29090Sstevel@tonic-gate * If the queue class or band is less than that of the last
29100Sstevel@tonic-gate * message on the queue, tack bp on the end of the queue.
29110Sstevel@tonic-gate */
29120Sstevel@tonic-gate tmp = q->q_last;
29130Sstevel@tonic-gate if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) {
29140Sstevel@tonic-gate bp->b_next = NULL;
29150Sstevel@tonic-gate bp->b_prev = tmp;
29160Sstevel@tonic-gate tmp->b_next = bp;
29170Sstevel@tonic-gate q->q_last = bp;
29180Sstevel@tonic-gate } else {
29190Sstevel@tonic-gate tmp = q->q_first;
29200Sstevel@tonic-gate while (tmp->b_datap->db_type >= QPCTL)
29210Sstevel@tonic-gate tmp = tmp->b_next;
29220Sstevel@tonic-gate while (tmp->b_band > bp->b_band)
29230Sstevel@tonic-gate tmp = tmp->b_next;
29240Sstevel@tonic-gate
29250Sstevel@tonic-gate /*
29260Sstevel@tonic-gate * Insert bp before tmp.
29270Sstevel@tonic-gate */
29280Sstevel@tonic-gate bp->b_next = tmp;
29290Sstevel@tonic-gate bp->b_prev = tmp->b_prev;
29300Sstevel@tonic-gate if (tmp->b_prev)
29310Sstevel@tonic-gate tmp->b_prev->b_next = bp;
29320Sstevel@tonic-gate else
29330Sstevel@tonic-gate q->q_first = bp;
29340Sstevel@tonic-gate tmp->b_prev = bp;
29350Sstevel@tonic-gate }
29360Sstevel@tonic-gate }
29370Sstevel@tonic-gate
29380Sstevel@tonic-gate /* Get message byte count for q_count accounting */
29396769Sja97890 bytecnt = mp_cont_len(bp, &mblkcnt);
29406769Sja97890
29410Sstevel@tonic-gate if (qbp) {
29420Sstevel@tonic-gate qbp->qb_count += bytecnt;
29430Sstevel@tonic-gate qbp->qb_mblkcnt += mblkcnt;
29440Sstevel@tonic-gate if ((qbp->qb_count >= qbp->qb_hiwat) ||
29450Sstevel@tonic-gate (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
29460Sstevel@tonic-gate qbp->qb_flag |= QB_FULL;
29470Sstevel@tonic-gate }
29480Sstevel@tonic-gate } else {
29490Sstevel@tonic-gate q->q_count += bytecnt;
29500Sstevel@tonic-gate q->q_mblkcnt += mblkcnt;
29510Sstevel@tonic-gate if ((q->q_count >= q->q_hiwat) ||
29520Sstevel@tonic-gate (q->q_mblkcnt >= q->q_hiwat)) {
29530Sstevel@tonic-gate q->q_flag |= QFULL;
29540Sstevel@tonic-gate }
29550Sstevel@tonic-gate }
29560Sstevel@tonic-gate
29570Sstevel@tonic-gate STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, NULL);
29580Sstevel@tonic-gate
29590Sstevel@tonic-gate if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR)))
29600Sstevel@tonic-gate qenable_locked(q);
29610Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
29620Sstevel@tonic-gate if (freezer != curthread)
29630Sstevel@tonic-gate mutex_exit(QLOCK(q));
29640Sstevel@tonic-gate
29650Sstevel@tonic-gate return (1);
29660Sstevel@tonic-gate }
29670Sstevel@tonic-gate
29680Sstevel@tonic-gate /*
29690Sstevel@tonic-gate * Insert a message before an existing message on the queue. If the
29700Sstevel@tonic-gate * existing message is NULL, the new messages is placed on the end of
29710Sstevel@tonic-gate * the queue. The queue class of the new message is ignored. However,
29720Sstevel@tonic-gate * the priority band of the new message must adhere to the following
29730Sstevel@tonic-gate * ordering:
29740Sstevel@tonic-gate *
29750Sstevel@tonic-gate * emp->b_prev->b_band >= mp->b_band >= emp->b_band.
29760Sstevel@tonic-gate *
29770Sstevel@tonic-gate * All flow control parameters are updated.
29780Sstevel@tonic-gate *
29790Sstevel@tonic-gate * insq can be called with the stream frozen, but other utility functions
29800Sstevel@tonic-gate * holding QLOCK, and by streams modules without any locks/frozen.
29810Sstevel@tonic-gate */
29820Sstevel@tonic-gate int
insq(queue_t * q,mblk_t * emp,mblk_t * mp)29830Sstevel@tonic-gate insq(queue_t *q, mblk_t *emp, mblk_t *mp)
29840Sstevel@tonic-gate {
29850Sstevel@tonic-gate mblk_t *tmp;
29860Sstevel@tonic-gate qband_t *qbp = NULL;
29870Sstevel@tonic-gate int mcls = (int)queclass(mp);
29880Sstevel@tonic-gate kthread_id_t freezer;
29890Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0;
29900Sstevel@tonic-gate
29910Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer;
29920Sstevel@tonic-gate if (freezer == curthread) {
29930Sstevel@tonic-gate ASSERT(frozenstr(q));
29940Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
29950Sstevel@tonic-gate } else if (MUTEX_HELD(QLOCK(q))) {
29960Sstevel@tonic-gate /* Don't drop lock on exit */
29970Sstevel@tonic-gate freezer = curthread;
29980Sstevel@tonic-gate } else
29990Sstevel@tonic-gate mutex_enter(QLOCK(q));
30000Sstevel@tonic-gate
30010Sstevel@tonic-gate if (mcls == QPCTL) {
30020Sstevel@tonic-gate if (mp->b_band != 0)
30030Sstevel@tonic-gate mp->b_band = 0; /* force to be correct */
30040Sstevel@tonic-gate if (emp && emp->b_prev &&
30050Sstevel@tonic-gate (emp->b_prev->b_datap->db_type < QPCTL))
30060Sstevel@tonic-gate goto badord;
30070Sstevel@tonic-gate }
30080Sstevel@tonic-gate if (emp) {
30090Sstevel@tonic-gate if (((mcls == QNORM) && (mp->b_band < emp->b_band)) ||
30100Sstevel@tonic-gate (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) &&
30110Sstevel@tonic-gate (emp->b_prev->b_band < mp->b_band))) {
30120Sstevel@tonic-gate goto badord;
30130Sstevel@tonic-gate }
30140Sstevel@tonic-gate } else {
30150Sstevel@tonic-gate tmp = q->q_last;
30160Sstevel@tonic-gate if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) {
30170Sstevel@tonic-gate badord:
30180Sstevel@tonic-gate cmn_err(CE_WARN,
30190Sstevel@tonic-gate "insq: attempt to insert message out of order "
30200Sstevel@tonic-gate "on q %p", (void *)q);
30210Sstevel@tonic-gate if (freezer != curthread)
30220Sstevel@tonic-gate mutex_exit(QLOCK(q));
30230Sstevel@tonic-gate return (0);
30240Sstevel@tonic-gate }
30250Sstevel@tonic-gate }
30260Sstevel@tonic-gate
30270Sstevel@tonic-gate if (mp->b_band != 0) {
30280Sstevel@tonic-gate int i;
30290Sstevel@tonic-gate qband_t **qbpp;
30300Sstevel@tonic-gate
30310Sstevel@tonic-gate if (mp->b_band > q->q_nband) {
30320Sstevel@tonic-gate qbpp = &q->q_bandp;
30330Sstevel@tonic-gate while (*qbpp)
30340Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next;
30350Sstevel@tonic-gate while (mp->b_band > q->q_nband) {
30360Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) {
30370Sstevel@tonic-gate if (freezer != curthread)
30380Sstevel@tonic-gate mutex_exit(QLOCK(q));
30390Sstevel@tonic-gate return (0);
30400Sstevel@tonic-gate }
30410Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat;
30420Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat;
30430Sstevel@tonic-gate q->q_nband++;
30440Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next;
30450Sstevel@tonic-gate }
30460Sstevel@tonic-gate }
30470Sstevel@tonic-gate qbp = q->q_bandp;
30480Sstevel@tonic-gate i = mp->b_band;
30490Sstevel@tonic-gate while (--i)
30500Sstevel@tonic-gate qbp = qbp->qb_next;
30510Sstevel@tonic-gate }
30520Sstevel@tonic-gate
30530Sstevel@tonic-gate if ((mp->b_next = emp) != NULL) {
30540Sstevel@tonic-gate if ((mp->b_prev = emp->b_prev) != NULL)
30550Sstevel@tonic-gate emp->b_prev->b_next = mp;
30560Sstevel@tonic-gate else
30570Sstevel@tonic-gate q->q_first = mp;
30580Sstevel@tonic-gate emp->b_prev = mp;
30590Sstevel@tonic-gate } else {
30600Sstevel@tonic-gate if ((mp->b_prev = q->q_last) != NULL)
30610Sstevel@tonic-gate q->q_last->b_next = mp;
30620Sstevel@tonic-gate else
30630Sstevel@tonic-gate q->q_first = mp;
30640Sstevel@tonic-gate q->q_last = mp;
30650Sstevel@tonic-gate }
30660Sstevel@tonic-gate
30670Sstevel@tonic-gate /* Get mblk and byte count for q_count accounting */
30686769Sja97890 bytecnt = mp_cont_len(mp, &mblkcnt);
30690Sstevel@tonic-gate
30700Sstevel@tonic-gate if (qbp) { /* adjust qband pointers and count */
30710Sstevel@tonic-gate if (!qbp->qb_first) {
30720Sstevel@tonic-gate qbp->qb_first = mp;
30730Sstevel@tonic-gate qbp->qb_last = mp;
30740Sstevel@tonic-gate } else {
30750Sstevel@tonic-gate if (mp->b_prev == NULL || (mp->b_prev != NULL &&
30760Sstevel@tonic-gate (mp->b_prev->b_band != mp->b_band)))
30770Sstevel@tonic-gate qbp->qb_first = mp;
30780Sstevel@tonic-gate else if (mp->b_next == NULL || (mp->b_next != NULL &&
30790Sstevel@tonic-gate (mp->b_next->b_band != mp->b_band)))
30800Sstevel@tonic-gate qbp->qb_last = mp;
30810Sstevel@tonic-gate }
30820Sstevel@tonic-gate qbp->qb_count += bytecnt;
30830Sstevel@tonic-gate qbp->qb_mblkcnt += mblkcnt;
30840Sstevel@tonic-gate if ((qbp->qb_count >= qbp->qb_hiwat) ||
30850Sstevel@tonic-gate (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
30860Sstevel@tonic-gate qbp->qb_flag |= QB_FULL;
30870Sstevel@tonic-gate }
30880Sstevel@tonic-gate } else {
30890Sstevel@tonic-gate q->q_count += bytecnt;
30900Sstevel@tonic-gate q->q_mblkcnt += mblkcnt;
30910Sstevel@tonic-gate if ((q->q_count >= q->q_hiwat) ||
30920Sstevel@tonic-gate (q->q_mblkcnt >= q->q_hiwat)) {
30930Sstevel@tonic-gate q->q_flag |= QFULL;
30940Sstevel@tonic-gate }
30950Sstevel@tonic-gate }
30960Sstevel@tonic-gate
30970Sstevel@tonic-gate STR_FTEVENT_MSG(mp, q, FTEV_INSQ, NULL);
30980Sstevel@tonic-gate
30990Sstevel@tonic-gate if (canenable(q) && (q->q_flag & QWANTR))
31000Sstevel@tonic-gate qenable_locked(q);
31010Sstevel@tonic-gate
31020Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
31030Sstevel@tonic-gate if (freezer != curthread)
31040Sstevel@tonic-gate mutex_exit(QLOCK(q));
31050Sstevel@tonic-gate
31060Sstevel@tonic-gate return (1);
31070Sstevel@tonic-gate }
31080Sstevel@tonic-gate
31090Sstevel@tonic-gate /*
31100Sstevel@tonic-gate * Create and put a control message on queue.
31110Sstevel@tonic-gate */
31120Sstevel@tonic-gate int
putctl(queue_t * q,int type)31130Sstevel@tonic-gate putctl(queue_t *q, int type)
31140Sstevel@tonic-gate {
31150Sstevel@tonic-gate mblk_t *bp;
31160Sstevel@tonic-gate
31170Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) ||
31180Sstevel@tonic-gate (bp = allocb_tryhard(0)) == NULL)
31190Sstevel@tonic-gate return (0);
31200Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char) type;
31210Sstevel@tonic-gate
31220Sstevel@tonic-gate put(q, bp);
31230Sstevel@tonic-gate
31240Sstevel@tonic-gate return (1);
31250Sstevel@tonic-gate }
31260Sstevel@tonic-gate
31270Sstevel@tonic-gate /*
31280Sstevel@tonic-gate * Control message with a single-byte parameter
31290Sstevel@tonic-gate */
31300Sstevel@tonic-gate int
putctl1(queue_t * q,int type,int param)31310Sstevel@tonic-gate putctl1(queue_t *q, int type, int param)
31320Sstevel@tonic-gate {
31330Sstevel@tonic-gate mblk_t *bp;
31340Sstevel@tonic-gate
31350Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) ||
31360Sstevel@tonic-gate (bp = allocb_tryhard(1)) == NULL)
31370Sstevel@tonic-gate return (0);
31380Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char)type;
31390Sstevel@tonic-gate *bp->b_wptr++ = (unsigned char)param;
31400Sstevel@tonic-gate
31410Sstevel@tonic-gate put(q, bp);
31420Sstevel@tonic-gate
31430Sstevel@tonic-gate return (1);
31440Sstevel@tonic-gate }
31450Sstevel@tonic-gate
31460Sstevel@tonic-gate int
putnextctl1(queue_t * q,int type,int param)31470Sstevel@tonic-gate putnextctl1(queue_t *q, int type, int param)
31480Sstevel@tonic-gate {
31490Sstevel@tonic-gate mblk_t *bp;
31500Sstevel@tonic-gate
31510Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) ||
31526707Sbrutus ((bp = allocb_tryhard(1)) == NULL))
31530Sstevel@tonic-gate return (0);
31540Sstevel@tonic-gate
31550Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char)type;
31560Sstevel@tonic-gate *bp->b_wptr++ = (unsigned char)param;
31570Sstevel@tonic-gate
31580Sstevel@tonic-gate putnext(q, bp);
31590Sstevel@tonic-gate
31600Sstevel@tonic-gate return (1);
31610Sstevel@tonic-gate }
31620Sstevel@tonic-gate
31630Sstevel@tonic-gate int
putnextctl(queue_t * q,int type)31640Sstevel@tonic-gate putnextctl(queue_t *q, int type)
31650Sstevel@tonic-gate {
31660Sstevel@tonic-gate mblk_t *bp;
31670Sstevel@tonic-gate
31680Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) ||
31696707Sbrutus ((bp = allocb_tryhard(0)) == NULL))
31700Sstevel@tonic-gate return (0);
31710Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char)type;
31720Sstevel@tonic-gate
31730Sstevel@tonic-gate putnext(q, bp);
31740Sstevel@tonic-gate
31750Sstevel@tonic-gate return (1);
31760Sstevel@tonic-gate }
31770Sstevel@tonic-gate
31780Sstevel@tonic-gate /*
31790Sstevel@tonic-gate * Return the queue upstream from this one
31800Sstevel@tonic-gate */
31810Sstevel@tonic-gate queue_t *
backq(queue_t * q)31820Sstevel@tonic-gate backq(queue_t *q)
31830Sstevel@tonic-gate {
31840Sstevel@tonic-gate q = _OTHERQ(q);
31850Sstevel@tonic-gate if (q->q_next) {
31860Sstevel@tonic-gate q = q->q_next;
31870Sstevel@tonic-gate return (_OTHERQ(q));
31880Sstevel@tonic-gate }
31890Sstevel@tonic-gate return (NULL);
31900Sstevel@tonic-gate }
31910Sstevel@tonic-gate
31920Sstevel@tonic-gate /*
31930Sstevel@tonic-gate * Send a block back up the queue in reverse from this
31940Sstevel@tonic-gate * one (e.g. to respond to ioctls)
31950Sstevel@tonic-gate */
31960Sstevel@tonic-gate void
qreply(queue_t * q,mblk_t * bp)31970Sstevel@tonic-gate qreply(queue_t *q, mblk_t *bp)
31980Sstevel@tonic-gate {
31990Sstevel@tonic-gate ASSERT(q && bp);
32000Sstevel@tonic-gate
32010Sstevel@tonic-gate putnext(_OTHERQ(q), bp);
32020Sstevel@tonic-gate }
32030Sstevel@tonic-gate
32040Sstevel@tonic-gate /*
32050Sstevel@tonic-gate * Streams Queue Scheduling
32060Sstevel@tonic-gate *
32070Sstevel@tonic-gate * Queues are enabled through qenable() when they have messages to
32080Sstevel@tonic-gate * process. They are serviced by queuerun(), which runs each enabled
32090Sstevel@tonic-gate * queue's service procedure. The call to queuerun() is processor
32100Sstevel@tonic-gate * dependent - the general principle is that it be run whenever a queue
32110Sstevel@tonic-gate * is enabled but before returning to user level. For system calls,
32120Sstevel@tonic-gate * the function runqueues() is called if their action causes a queue
32130Sstevel@tonic-gate * to be enabled. For device interrupts, queuerun() should be
32140Sstevel@tonic-gate * called before returning from the last level of interrupt. Beyond
32150Sstevel@tonic-gate * this, no timing assumptions should be made about queue scheduling.
32160Sstevel@tonic-gate */
32170Sstevel@tonic-gate
32180Sstevel@tonic-gate /*
32190Sstevel@tonic-gate * Enable a queue: put it on list of those whose service procedures are
32200Sstevel@tonic-gate * ready to run and set up the scheduling mechanism.
32210Sstevel@tonic-gate * The broadcast is done outside the mutex -> to avoid the woken thread
32220Sstevel@tonic-gate * from contending with the mutex. This is OK 'cos the queue has been
32230Sstevel@tonic-gate * enqueued on the runlist and flagged safely at this point.
32240Sstevel@tonic-gate */
32250Sstevel@tonic-gate void
qenable(queue_t * q)32260Sstevel@tonic-gate qenable(queue_t *q)
32270Sstevel@tonic-gate {
32280Sstevel@tonic-gate mutex_enter(QLOCK(q));
32290Sstevel@tonic-gate qenable_locked(q);
32300Sstevel@tonic-gate mutex_exit(QLOCK(q));
32310Sstevel@tonic-gate }
32320Sstevel@tonic-gate /*
32330Sstevel@tonic-gate * Return number of messages on queue
32340Sstevel@tonic-gate */
32350Sstevel@tonic-gate int
qsize(queue_t * qp)32360Sstevel@tonic-gate qsize(queue_t *qp)
32370Sstevel@tonic-gate {
32380Sstevel@tonic-gate int count = 0;
32390Sstevel@tonic-gate mblk_t *mp;
32400Sstevel@tonic-gate
32410Sstevel@tonic-gate mutex_enter(QLOCK(qp));
32420Sstevel@tonic-gate for (mp = qp->q_first; mp; mp = mp->b_next)
32430Sstevel@tonic-gate count++;
32440Sstevel@tonic-gate mutex_exit(QLOCK(qp));
32450Sstevel@tonic-gate return (count);
32460Sstevel@tonic-gate }
32470Sstevel@tonic-gate
32480Sstevel@tonic-gate /*
32490Sstevel@tonic-gate * noenable - set queue so that putq() will not enable it.
32500Sstevel@tonic-gate * enableok - set queue so that putq() can enable it.
32510Sstevel@tonic-gate */
32520Sstevel@tonic-gate void
noenable(queue_t * q)32530Sstevel@tonic-gate noenable(queue_t *q)
32540Sstevel@tonic-gate {
32550Sstevel@tonic-gate mutex_enter(QLOCK(q));
32560Sstevel@tonic-gate q->q_flag |= QNOENB;
32570Sstevel@tonic-gate mutex_exit(QLOCK(q));
32580Sstevel@tonic-gate }
32590Sstevel@tonic-gate
32600Sstevel@tonic-gate void
enableok(queue_t * q)32610Sstevel@tonic-gate enableok(queue_t *q)
32620Sstevel@tonic-gate {
32630Sstevel@tonic-gate mutex_enter(QLOCK(q));
32640Sstevel@tonic-gate q->q_flag &= ~QNOENB;
32650Sstevel@tonic-gate mutex_exit(QLOCK(q));
32660Sstevel@tonic-gate }
32670Sstevel@tonic-gate
32680Sstevel@tonic-gate /*
32690Sstevel@tonic-gate * Set queue fields.
32700Sstevel@tonic-gate */
32710Sstevel@tonic-gate int
strqset(queue_t * q,qfields_t what,unsigned char pri,intptr_t val)32720Sstevel@tonic-gate strqset(queue_t *q, qfields_t what, unsigned char pri, intptr_t val)
32730Sstevel@tonic-gate {
32740Sstevel@tonic-gate qband_t *qbp = NULL;
32750Sstevel@tonic-gate queue_t *wrq;
32760Sstevel@tonic-gate int error = 0;
32770Sstevel@tonic-gate kthread_id_t freezer;
32780Sstevel@tonic-gate
32790Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer;
32800Sstevel@tonic-gate if (freezer == curthread) {
32810Sstevel@tonic-gate ASSERT(frozenstr(q));
32820Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
32830Sstevel@tonic-gate } else
32840Sstevel@tonic-gate mutex_enter(QLOCK(q));
32850Sstevel@tonic-gate
32860Sstevel@tonic-gate if (what >= QBAD) {
32870Sstevel@tonic-gate error = EINVAL;
32880Sstevel@tonic-gate goto done;
32890Sstevel@tonic-gate }
32900Sstevel@tonic-gate if (pri != 0) {
32910Sstevel@tonic-gate int i;
32920Sstevel@tonic-gate qband_t **qbpp;
32930Sstevel@tonic-gate
32940Sstevel@tonic-gate if (pri > q->q_nband) {
32950Sstevel@tonic-gate qbpp = &q->q_bandp;
32960Sstevel@tonic-gate while (*qbpp)
32970Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next;
32980Sstevel@tonic-gate while (pri > q->q_nband) {
32990Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) {
33000Sstevel@tonic-gate error = EAGAIN;
33010Sstevel@tonic-gate goto done;
33020Sstevel@tonic-gate }
33030Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat;
33040Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat;
33050Sstevel@tonic-gate q->q_nband++;
33060Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next;
33070Sstevel@tonic-gate }
33080Sstevel@tonic-gate }
33090Sstevel@tonic-gate qbp = q->q_bandp;
33100Sstevel@tonic-gate i = pri;
33110Sstevel@tonic-gate while (--i)
33120Sstevel@tonic-gate qbp = qbp->qb_next;
33130Sstevel@tonic-gate }
33140Sstevel@tonic-gate switch (what) {
33150Sstevel@tonic-gate
33160Sstevel@tonic-gate case QHIWAT:
33170Sstevel@tonic-gate if (qbp)
33180Sstevel@tonic-gate qbp->qb_hiwat = (size_t)val;
33190Sstevel@tonic-gate else
33200Sstevel@tonic-gate q->q_hiwat = (size_t)val;
33210Sstevel@tonic-gate break;
33220Sstevel@tonic-gate
33230Sstevel@tonic-gate case QLOWAT:
33240Sstevel@tonic-gate if (qbp)
33250Sstevel@tonic-gate qbp->qb_lowat = (size_t)val;
33260Sstevel@tonic-gate else
33270Sstevel@tonic-gate q->q_lowat = (size_t)val;
33280Sstevel@tonic-gate break;
33290Sstevel@tonic-gate
33300Sstevel@tonic-gate case QMAXPSZ:
33310Sstevel@tonic-gate if (qbp)
33320Sstevel@tonic-gate error = EINVAL;
33330Sstevel@tonic-gate else
33340Sstevel@tonic-gate q->q_maxpsz = (ssize_t)val;
33350Sstevel@tonic-gate
33360Sstevel@tonic-gate /*
33370Sstevel@tonic-gate * Performance concern, strwrite looks at the module below
33380Sstevel@tonic-gate * the stream head for the maxpsz each time it does a write
33390Sstevel@tonic-gate * we now cache it at the stream head. Check to see if this
33400Sstevel@tonic-gate * queue is sitting directly below the stream head.
33410Sstevel@tonic-gate */
33420Sstevel@tonic-gate wrq = STREAM(q)->sd_wrq;
33430Sstevel@tonic-gate if (q != wrq->q_next)
33440Sstevel@tonic-gate break;
33450Sstevel@tonic-gate
33460Sstevel@tonic-gate /*
33470Sstevel@tonic-gate * If the stream is not frozen drop the current QLOCK and
33480Sstevel@tonic-gate * acquire the sd_wrq QLOCK which protects sd_qn_*
33490Sstevel@tonic-gate */
33500Sstevel@tonic-gate if (freezer != curthread) {
33510Sstevel@tonic-gate mutex_exit(QLOCK(q));
33520Sstevel@tonic-gate mutex_enter(QLOCK(wrq));
33530Sstevel@tonic-gate }
33540Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(wrq)));
33550Sstevel@tonic-gate
33560Sstevel@tonic-gate if (strmsgsz != 0) {
33570Sstevel@tonic-gate if (val == INFPSZ)
33580Sstevel@tonic-gate val = strmsgsz;
33590Sstevel@tonic-gate else {
33600Sstevel@tonic-gate if (STREAM(q)->sd_vnode->v_type == VFIFO)
33610Sstevel@tonic-gate val = MIN(PIPE_BUF, val);
33620Sstevel@tonic-gate else
33630Sstevel@tonic-gate val = MIN(strmsgsz, val);
33640Sstevel@tonic-gate }
33650Sstevel@tonic-gate }
33660Sstevel@tonic-gate STREAM(q)->sd_qn_maxpsz = val;
33670Sstevel@tonic-gate if (freezer != curthread) {
33680Sstevel@tonic-gate mutex_exit(QLOCK(wrq));
33690Sstevel@tonic-gate mutex_enter(QLOCK(q));
33700Sstevel@tonic-gate }
33710Sstevel@tonic-gate break;
33720Sstevel@tonic-gate
33730Sstevel@tonic-gate case QMINPSZ:
33740Sstevel@tonic-gate if (qbp)
33750Sstevel@tonic-gate error = EINVAL;
33760Sstevel@tonic-gate else
33770Sstevel@tonic-gate q->q_minpsz = (ssize_t)val;
33780Sstevel@tonic-gate
33790Sstevel@tonic-gate /*
33800Sstevel@tonic-gate * Performance concern, strwrite looks at the module below
33810Sstevel@tonic-gate * the stream head for the maxpsz each time it does a write
33820Sstevel@tonic-gate * we now cache it at the stream head. Check to see if this
33830Sstevel@tonic-gate * queue is sitting directly below the stream head.
33840Sstevel@tonic-gate */
33850Sstevel@tonic-gate wrq = STREAM(q)->sd_wrq;
33860Sstevel@tonic-gate if (q != wrq->q_next)
33870Sstevel@tonic-gate break;
33880Sstevel@tonic-gate
33890Sstevel@tonic-gate /*
33900Sstevel@tonic-gate * If the stream is not frozen drop the current QLOCK and
33910Sstevel@tonic-gate * acquire the sd_wrq QLOCK which protects sd_qn_*
33920Sstevel@tonic-gate */
33930Sstevel@tonic-gate if (freezer != curthread) {
33940Sstevel@tonic-gate mutex_exit(QLOCK(q));
33950Sstevel@tonic-gate mutex_enter(QLOCK(wrq));
33960Sstevel@tonic-gate }
33970Sstevel@tonic-gate STREAM(q)->sd_qn_minpsz = (ssize_t)val;
33980Sstevel@tonic-gate
33990Sstevel@tonic-gate if (freezer != curthread) {
34000Sstevel@tonic-gate mutex_exit(QLOCK(wrq));
34010Sstevel@tonic-gate mutex_enter(QLOCK(q));
34020Sstevel@tonic-gate }
34030Sstevel@tonic-gate break;
34040Sstevel@tonic-gate
34050Sstevel@tonic-gate case QSTRUIOT:
34060Sstevel@tonic-gate if (qbp)
34070Sstevel@tonic-gate error = EINVAL;
34080Sstevel@tonic-gate else
34090Sstevel@tonic-gate q->q_struiot = (ushort_t)val;
34100Sstevel@tonic-gate break;
34110Sstevel@tonic-gate
34120Sstevel@tonic-gate case QCOUNT:
34130Sstevel@tonic-gate case QFIRST:
34140Sstevel@tonic-gate case QLAST:
34150Sstevel@tonic-gate case QFLAG:
34160Sstevel@tonic-gate error = EPERM;
34170Sstevel@tonic-gate break;
34180Sstevel@tonic-gate
34190Sstevel@tonic-gate default:
34200Sstevel@tonic-gate error = EINVAL;
34210Sstevel@tonic-gate break;
34220Sstevel@tonic-gate }
34230Sstevel@tonic-gate done:
34240Sstevel@tonic-gate if (freezer != curthread)
34250Sstevel@tonic-gate mutex_exit(QLOCK(q));
34260Sstevel@tonic-gate return (error);
34270Sstevel@tonic-gate }
34280Sstevel@tonic-gate
34290Sstevel@tonic-gate /*
34300Sstevel@tonic-gate * Get queue fields.
34310Sstevel@tonic-gate */
34320Sstevel@tonic-gate int
strqget(queue_t * q,qfields_t what,unsigned char pri,void * valp)34330Sstevel@tonic-gate strqget(queue_t *q, qfields_t what, unsigned char pri, void *valp)
34340Sstevel@tonic-gate {
34350Sstevel@tonic-gate qband_t *qbp = NULL;
34360Sstevel@tonic-gate int error = 0;
34370Sstevel@tonic-gate kthread_id_t freezer;
34380Sstevel@tonic-gate
34390Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer;
34400Sstevel@tonic-gate if (freezer == curthread) {
34410Sstevel@tonic-gate ASSERT(frozenstr(q));
34420Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q)));
34430Sstevel@tonic-gate } else
34440Sstevel@tonic-gate mutex_enter(QLOCK(q));
34450Sstevel@tonic-gate if (what >= QBAD) {
34460Sstevel@tonic-gate error = EINVAL;
34470Sstevel@tonic-gate goto done;
34480Sstevel@tonic-gate }
34490Sstevel@tonic-gate if (pri != 0) {
34500Sstevel@tonic-gate int i;
34510Sstevel@tonic-gate qband_t **qbpp;
34520Sstevel@tonic-gate
34530Sstevel@tonic-gate if (pri > q->q_nband) {
34540Sstevel@tonic-gate qbpp = &q->q_bandp;
34550Sstevel@tonic-gate while (*qbpp)
34560Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next;
34570Sstevel@tonic-gate while (pri > q->q_nband) {
34580Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) {
34590Sstevel@tonic-gate error = EAGAIN;
34600Sstevel@tonic-gate goto done;
34610Sstevel@tonic-gate }
34620Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat;
34630Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat;
34640Sstevel@tonic-gate q->q_nband++;
34650Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next;
34660Sstevel@tonic-gate }
34670Sstevel@tonic-gate }
34680Sstevel@tonic-gate qbp = q->q_bandp;
34690Sstevel@tonic-gate i = pri;
34700Sstevel@tonic-gate while (--i)
34710Sstevel@tonic-gate qbp = qbp->qb_next;
34720Sstevel@tonic-gate }
34730Sstevel@tonic-gate switch (what) {
34740Sstevel@tonic-gate case QHIWAT:
34750Sstevel@tonic-gate if (qbp)
34760Sstevel@tonic-gate *(size_t *)valp = qbp->qb_hiwat;
34770Sstevel@tonic-gate else
34780Sstevel@tonic-gate *(size_t *)valp = q->q_hiwat;
34790Sstevel@tonic-gate break;
34800Sstevel@tonic-gate
34810Sstevel@tonic-gate case QLOWAT:
34820Sstevel@tonic-gate if (qbp)
34830Sstevel@tonic-gate *(size_t *)valp = qbp->qb_lowat;
34840Sstevel@tonic-gate else
34850Sstevel@tonic-gate *(size_t *)valp = q->q_lowat;
34860Sstevel@tonic-gate break;
34870Sstevel@tonic-gate
34880Sstevel@tonic-gate case QMAXPSZ:
34890Sstevel@tonic-gate if (qbp)
34900Sstevel@tonic-gate error = EINVAL;
34910Sstevel@tonic-gate else
34920Sstevel@tonic-gate *(ssize_t *)valp = q->q_maxpsz;
34930Sstevel@tonic-gate break;
34940Sstevel@tonic-gate
34950Sstevel@tonic-gate case QMINPSZ:
34960Sstevel@tonic-gate if (qbp)
34970Sstevel@tonic-gate error = EINVAL;
34980Sstevel@tonic-gate else
34990Sstevel@tonic-gate *(ssize_t *)valp = q->q_minpsz;
35000Sstevel@tonic-gate break;
35010Sstevel@tonic-gate
35020Sstevel@tonic-gate case QCOUNT:
35030Sstevel@tonic-gate if (qbp)
35040Sstevel@tonic-gate *(size_t *)valp = qbp->qb_count;
35050Sstevel@tonic-gate else
35060Sstevel@tonic-gate *(size_t *)valp = q->q_count;
35070Sstevel@tonic-gate break;
35080Sstevel@tonic-gate
35090Sstevel@tonic-gate case QFIRST:
35100Sstevel@tonic-gate if (qbp)
35110Sstevel@tonic-gate *(mblk_t **)valp = qbp->qb_first;
35120Sstevel@tonic-gate else
35130Sstevel@tonic-gate *(mblk_t **)valp = q->q_first;
35140Sstevel@tonic-gate break;
35150Sstevel@tonic-gate
35160Sstevel@tonic-gate case QLAST:
35170Sstevel@tonic-gate if (qbp)
35180Sstevel@tonic-gate *(mblk_t **)valp = qbp->qb_last;
35190Sstevel@tonic-gate else
35200Sstevel@tonic-gate *(mblk_t **)valp = q->q_last;
35210Sstevel@tonic-gate break;
35220Sstevel@tonic-gate
35230Sstevel@tonic-gate case QFLAG:
35240Sstevel@tonic-gate if (qbp)
35250Sstevel@tonic-gate *(uint_t *)valp = qbp->qb_flag;
35260Sstevel@tonic-gate else
35270Sstevel@tonic-gate *(uint_t *)valp = q->q_flag;
35280Sstevel@tonic-gate break;
35290Sstevel@tonic-gate
35300Sstevel@tonic-gate case QSTRUIOT:
35310Sstevel@tonic-gate if (qbp)
35320Sstevel@tonic-gate error = EINVAL;
35330Sstevel@tonic-gate else
35340Sstevel@tonic-gate *(short *)valp = q->q_struiot;
35350Sstevel@tonic-gate break;
35360Sstevel@tonic-gate
35370Sstevel@tonic-gate default:
35380Sstevel@tonic-gate error = EINVAL;
35390Sstevel@tonic-gate break;
35400Sstevel@tonic-gate }
35410Sstevel@tonic-gate done:
35420Sstevel@tonic-gate if (freezer != curthread)
35430Sstevel@tonic-gate mutex_exit(QLOCK(q));
35440Sstevel@tonic-gate return (error);
35450Sstevel@tonic-gate }
35460Sstevel@tonic-gate
35470Sstevel@tonic-gate /*
35480Sstevel@tonic-gate * Function awakes all in cvwait/sigwait/pollwait, on one of:
35490Sstevel@tonic-gate * QWANTWSYNC or QWANTR or QWANTW,
35500Sstevel@tonic-gate *
35510Sstevel@tonic-gate * Note: for QWANTWSYNC/QWANTW and QWANTR, if no WSLEEPer or RSLEEPer then a
35520Sstevel@tonic-gate * deferred wakeup will be done. Also if strpoll() in progress then a
35530Sstevel@tonic-gate * deferred pollwakeup will be done.
35540Sstevel@tonic-gate */
35550Sstevel@tonic-gate void
strwakeq(queue_t * q,int flag)35560Sstevel@tonic-gate strwakeq(queue_t *q, int flag)
35570Sstevel@tonic-gate {
35580Sstevel@tonic-gate stdata_t *stp = STREAM(q);
35590Sstevel@tonic-gate pollhead_t *pl;
35600Sstevel@tonic-gate
35610Sstevel@tonic-gate mutex_enter(&stp->sd_lock);
35620Sstevel@tonic-gate pl = &stp->sd_pollist;
35630Sstevel@tonic-gate if (flag & QWANTWSYNC) {
35640Sstevel@tonic-gate ASSERT(!(q->q_flag & QREADR));
35650Sstevel@tonic-gate if (stp->sd_flag & WSLEEP) {
35660Sstevel@tonic-gate stp->sd_flag &= ~WSLEEP;
35670Sstevel@tonic-gate cv_broadcast(&stp->sd_wrq->q_wait);
35680Sstevel@tonic-gate } else {
35690Sstevel@tonic-gate stp->sd_wakeq |= WSLEEP;
35700Sstevel@tonic-gate }
35710Sstevel@tonic-gate
35720Sstevel@tonic-gate mutex_exit(&stp->sd_lock);
35730Sstevel@tonic-gate pollwakeup(pl, POLLWRNORM);
35740Sstevel@tonic-gate mutex_enter(&stp->sd_lock);
35750Sstevel@tonic-gate
35760Sstevel@tonic-gate if (stp->sd_sigflags & S_WRNORM)
35770Sstevel@tonic-gate strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
35780Sstevel@tonic-gate } else if (flag & QWANTR) {
35790Sstevel@tonic-gate if (stp->sd_flag & RSLEEP) {
35800Sstevel@tonic-gate stp->sd_flag &= ~RSLEEP;
35810Sstevel@tonic-gate cv_broadcast(&_RD(stp->sd_wrq)->q_wait);
35820Sstevel@tonic-gate } else {
35830Sstevel@tonic-gate stp->sd_wakeq |= RSLEEP;
35840Sstevel@tonic-gate }
35850Sstevel@tonic-gate
35860Sstevel@tonic-gate mutex_exit(&stp->sd_lock);
35870Sstevel@tonic-gate pollwakeup(pl, POLLIN | POLLRDNORM);
35880Sstevel@tonic-gate mutex_enter(&stp->sd_lock);
35890Sstevel@tonic-gate
35900Sstevel@tonic-gate {
35910Sstevel@tonic-gate int events = stp->sd_sigflags & (S_INPUT | S_RDNORM);
35920Sstevel@tonic-gate
35930Sstevel@tonic-gate if (events)
35940Sstevel@tonic-gate strsendsig(stp->sd_siglist, events, 0, 0);
35950Sstevel@tonic-gate }
35960Sstevel@tonic-gate } else {
35970Sstevel@tonic-gate if (stp->sd_flag & WSLEEP) {
35980Sstevel@tonic-gate stp->sd_flag &= ~WSLEEP;
35990Sstevel@tonic-gate cv_broadcast(&stp->sd_wrq->q_wait);
36000Sstevel@tonic-gate }
36010Sstevel@tonic-gate
36020Sstevel@tonic-gate mutex_exit(&stp->sd_lock);
36030Sstevel@tonic-gate pollwakeup(pl, POLLWRNORM);
36040Sstevel@tonic-gate mutex_enter(&stp->sd_lock);
36050Sstevel@tonic-gate
36060Sstevel@tonic-gate if (stp->sd_sigflags & S_WRNORM)
36070Sstevel@tonic-gate strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
36080Sstevel@tonic-gate }
36090Sstevel@tonic-gate mutex_exit(&stp->sd_lock);
36100Sstevel@tonic-gate }
36110Sstevel@tonic-gate
36120Sstevel@tonic-gate int
struioget(queue_t * q,mblk_t * mp,struiod_t * dp,int noblock)36130Sstevel@tonic-gate struioget(queue_t *q, mblk_t *mp, struiod_t *dp, int noblock)
36140Sstevel@tonic-gate {
36150Sstevel@tonic-gate stdata_t *stp = STREAM(q);
36160Sstevel@tonic-gate int typ = STRUIOT_STANDARD;
36170Sstevel@tonic-gate uio_t *uiop = &dp->d_uio;
36180Sstevel@tonic-gate dblk_t *dbp;
36190Sstevel@tonic-gate ssize_t uiocnt;
36200Sstevel@tonic-gate ssize_t cnt;
36210Sstevel@tonic-gate unsigned char *ptr;
36220Sstevel@tonic-gate ssize_t resid;
36230Sstevel@tonic-gate int error = 0;
36240Sstevel@tonic-gate on_trap_data_t otd;
36250Sstevel@tonic-gate queue_t *stwrq;
36260Sstevel@tonic-gate
36270Sstevel@tonic-gate /*
36280Sstevel@tonic-gate * Plumbing may change while taking the type so store the
36290Sstevel@tonic-gate * queue in a temporary variable. It doesn't matter even
36300Sstevel@tonic-gate * if the we take the type from the previous plumbing,
36310Sstevel@tonic-gate * that's because if the plumbing has changed when we were
36320Sstevel@tonic-gate * holding the queue in a temporary variable, we can continue
36330Sstevel@tonic-gate * processing the message the way it would have been processed
36340Sstevel@tonic-gate * in the old plumbing, without any side effects but a bit
36350Sstevel@tonic-gate * extra processing for partial ip header checksum.
36360Sstevel@tonic-gate *
36370Sstevel@tonic-gate * This has been done to avoid holding the sd_lock which is
36380Sstevel@tonic-gate * very hot.
36390Sstevel@tonic-gate */
36400Sstevel@tonic-gate
36410Sstevel@tonic-gate stwrq = stp->sd_struiowrq;
36420Sstevel@tonic-gate if (stwrq)
36430Sstevel@tonic-gate typ = stwrq->q_struiot;
36440Sstevel@tonic-gate
36450Sstevel@tonic-gate for (; (resid = uiop->uio_resid) > 0 && mp; mp = mp->b_cont) {
36460Sstevel@tonic-gate dbp = mp->b_datap;
36470Sstevel@tonic-gate ptr = (uchar_t *)(mp->b_rptr + dbp->db_cksumstuff);
36480Sstevel@tonic-gate uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
36490Sstevel@tonic-gate cnt = MIN(uiocnt, uiop->uio_resid);
36500Sstevel@tonic-gate if (!(dbp->db_struioflag & STRUIO_SPEC) ||
36510Sstevel@tonic-gate (dbp->db_struioflag & STRUIO_DONE) || cnt == 0) {
36520Sstevel@tonic-gate /*
36530Sstevel@tonic-gate * Either this mblk has already been processed
36540Sstevel@tonic-gate * or there is no more room in this mblk (?).
36550Sstevel@tonic-gate */
36560Sstevel@tonic-gate continue;
36570Sstevel@tonic-gate }
36580Sstevel@tonic-gate switch (typ) {
36590Sstevel@tonic-gate case STRUIOT_STANDARD:
36600Sstevel@tonic-gate if (noblock) {
36610Sstevel@tonic-gate if (on_trap(&otd, OT_DATA_ACCESS)) {
36620Sstevel@tonic-gate no_trap();
36630Sstevel@tonic-gate error = EWOULDBLOCK;
36640Sstevel@tonic-gate goto out;
36650Sstevel@tonic-gate }
36660Sstevel@tonic-gate }
36670Sstevel@tonic-gate if (error = uiomove(ptr, cnt, UIO_WRITE, uiop)) {
36680Sstevel@tonic-gate if (noblock)
36690Sstevel@tonic-gate no_trap();
36700Sstevel@tonic-gate goto out;
36710Sstevel@tonic-gate }
36720Sstevel@tonic-gate if (noblock)
36730Sstevel@tonic-gate no_trap();
36740Sstevel@tonic-gate break;
36750Sstevel@tonic-gate
36760Sstevel@tonic-gate default:
36770Sstevel@tonic-gate error = EIO;
36780Sstevel@tonic-gate goto out;
36790Sstevel@tonic-gate }
36800Sstevel@tonic-gate dbp->db_struioflag |= STRUIO_DONE;
36810Sstevel@tonic-gate dbp->db_cksumstuff += cnt;
36820Sstevel@tonic-gate }
36830Sstevel@tonic-gate out:
36840Sstevel@tonic-gate if (error == EWOULDBLOCK && (resid -= uiop->uio_resid) > 0) {
36850Sstevel@tonic-gate /*
36860Sstevel@tonic-gate * A fault has occured and some bytes were moved to the
36870Sstevel@tonic-gate * current mblk, the uio_t has already been updated by
36880Sstevel@tonic-gate * the appropriate uio routine, so also update the mblk
36890Sstevel@tonic-gate * to reflect this in case this same mblk chain is used
36900Sstevel@tonic-gate * again (after the fault has been handled).
36910Sstevel@tonic-gate */
36920Sstevel@tonic-gate uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
36930Sstevel@tonic-gate if (uiocnt >= resid)
36940Sstevel@tonic-gate dbp->db_cksumstuff += resid;
36950Sstevel@tonic-gate }
36960Sstevel@tonic-gate return (error);
36970Sstevel@tonic-gate }
36980Sstevel@tonic-gate
36990Sstevel@tonic-gate /*
37000Sstevel@tonic-gate * Try to enter queue synchronously. Any attempt to enter a closing queue will
37010Sstevel@tonic-gate * fails. The qp->q_rwcnt keeps track of the number of successful entries so
37020Sstevel@tonic-gate * that removeq() will not try to close the queue while a thread is inside the
37030Sstevel@tonic-gate * queue.
37040Sstevel@tonic-gate */
37050Sstevel@tonic-gate static boolean_t
rwnext_enter(queue_t * qp)37060Sstevel@tonic-gate rwnext_enter(queue_t *qp)
37070Sstevel@tonic-gate {
37080Sstevel@tonic-gate mutex_enter(QLOCK(qp));
37090Sstevel@tonic-gate if (qp->q_flag & QWCLOSE) {
37100Sstevel@tonic-gate mutex_exit(QLOCK(qp));
37110Sstevel@tonic-gate return (B_FALSE);
37120Sstevel@tonic-gate }
37130Sstevel@tonic-gate qp->q_rwcnt++;
37140Sstevel@tonic-gate ASSERT(qp->q_rwcnt != 0);
37150Sstevel@tonic-gate mutex_exit(QLOCK(qp));
37160Sstevel@tonic-gate return (B_TRUE);
37170Sstevel@tonic-gate }
37180Sstevel@tonic-gate
37190Sstevel@tonic-gate /*
37200Sstevel@tonic-gate * Decrease the count of threads running in sync stream queue and wake up any
37210Sstevel@tonic-gate * threads blocked in removeq().
37220Sstevel@tonic-gate */
37230Sstevel@tonic-gate static void
rwnext_exit(queue_t * qp)37240Sstevel@tonic-gate rwnext_exit(queue_t *qp)
37250Sstevel@tonic-gate {
37260Sstevel@tonic-gate mutex_enter(QLOCK(qp));
37270Sstevel@tonic-gate qp->q_rwcnt--;
37280Sstevel@tonic-gate if (qp->q_flag & QWANTRMQSYNC) {
37290Sstevel@tonic-gate qp->q_flag &= ~QWANTRMQSYNC;
37300Sstevel@tonic-gate cv_broadcast(&qp->q_wait);
37310Sstevel@tonic-gate }
37320Sstevel@tonic-gate mutex_exit(QLOCK(qp));
37330Sstevel@tonic-gate }
37340Sstevel@tonic-gate
37350Sstevel@tonic-gate /*
37360Sstevel@tonic-gate * The purpose of rwnext() is to call the rw procedure of the next
37370Sstevel@tonic-gate * (downstream) modules queue.
37380Sstevel@tonic-gate *
37390Sstevel@tonic-gate * treated as put entrypoint for perimeter syncronization.
37400Sstevel@tonic-gate *
37410Sstevel@tonic-gate * There's no need to grab sq_putlocks here (which only exist for CIPUT
37420Sstevel@tonic-gate * sync queues). If it is CIPUT sync queue sq_count is incremented and it does
37430Sstevel@tonic-gate * not matter if any regular put entrypoints have been already entered. We
37440Sstevel@tonic-gate * can't increment one of the sq_putcounts (instead of sq_count) because
37450Sstevel@tonic-gate * qwait_rw won't know which counter to decrement.
37460Sstevel@tonic-gate *
37470Sstevel@tonic-gate * It would be reasonable to add the lockless FASTPUT logic.
37480Sstevel@tonic-gate */
37490Sstevel@tonic-gate int
rwnext(queue_t * qp,struiod_t * dp)37500Sstevel@tonic-gate rwnext(queue_t *qp, struiod_t *dp)
37510Sstevel@tonic-gate {
37520Sstevel@tonic-gate queue_t *nqp;
37530Sstevel@tonic-gate syncq_t *sq;
37540Sstevel@tonic-gate uint16_t count;
37550Sstevel@tonic-gate uint16_t flags;
37560Sstevel@tonic-gate struct qinit *qi;
37570Sstevel@tonic-gate int (*proc)();
37580Sstevel@tonic-gate struct stdata *stp;
37590Sstevel@tonic-gate int isread;
37600Sstevel@tonic-gate int rval;
37610Sstevel@tonic-gate
37620Sstevel@tonic-gate stp = STREAM(qp);
37630Sstevel@tonic-gate /*
37640Sstevel@tonic-gate * Prevent q_next from changing by holding sd_lock until acquiring
37650Sstevel@tonic-gate * SQLOCK. Note that a read-side rwnext from the streamhead will
37660Sstevel@tonic-gate * already have sd_lock acquired. In either case sd_lock is always
37670Sstevel@tonic-gate * released after acquiring SQLOCK.
37680Sstevel@tonic-gate *
37690Sstevel@tonic-gate * The streamhead read-side holding sd_lock when calling rwnext is
37700Sstevel@tonic-gate * required to prevent a race condition were M_DATA mblks flowing
37710Sstevel@tonic-gate * up the read-side of the stream could be bypassed by a rwnext()
37720Sstevel@tonic-gate * down-call. In this case sd_lock acts as the streamhead perimeter.
37730Sstevel@tonic-gate */
37740Sstevel@tonic-gate if ((nqp = _WR(qp)) == qp) {
37750Sstevel@tonic-gate isread = 0;
37760Sstevel@tonic-gate mutex_enter(&stp->sd_lock);
37770Sstevel@tonic-gate qp = nqp->q_next;
37780Sstevel@tonic-gate } else {
37790Sstevel@tonic-gate isread = 1;
37800Sstevel@tonic-gate if (nqp != stp->sd_wrq)
37810Sstevel@tonic-gate /* Not streamhead */
37820Sstevel@tonic-gate mutex_enter(&stp->sd_lock);
37830Sstevel@tonic-gate qp = _RD(nqp->q_next);
37840Sstevel@tonic-gate }
37850Sstevel@tonic-gate qi = qp->q_qinfo;
37860Sstevel@tonic-gate if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_rwp)) {
37870Sstevel@tonic-gate /*
37880Sstevel@tonic-gate * Not a synchronous module or no r/w procedure for this
37890Sstevel@tonic-gate * queue, so just return EINVAL and let the caller handle it.
37900Sstevel@tonic-gate */
37910Sstevel@tonic-gate mutex_exit(&stp->sd_lock);
37920Sstevel@tonic-gate return (EINVAL);
37930Sstevel@tonic-gate }
37940Sstevel@tonic-gate
37950Sstevel@tonic-gate if (rwnext_enter(qp) == B_FALSE) {
37960Sstevel@tonic-gate mutex_exit(&stp->sd_lock);
37970Sstevel@tonic-gate return (EINVAL);
37980Sstevel@tonic-gate }
37990Sstevel@tonic-gate
38000Sstevel@tonic-gate sq = qp->q_syncq;
38010Sstevel@tonic-gate mutex_enter(SQLOCK(sq));
38020Sstevel@tonic-gate mutex_exit(&stp->sd_lock);
38030Sstevel@tonic-gate count = sq->sq_count;
38040Sstevel@tonic-gate flags = sq->sq_flags;
38050Sstevel@tonic-gate ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
38060Sstevel@tonic-gate
38070Sstevel@tonic-gate while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
38080Sstevel@tonic-gate /*
38090Sstevel@tonic-gate * if this queue is being closed, return.
38100Sstevel@tonic-gate */
38110Sstevel@tonic-gate if (qp->q_flag & QWCLOSE) {
38120Sstevel@tonic-gate mutex_exit(SQLOCK(sq));
38130Sstevel@tonic-gate rwnext_exit(qp);
38140Sstevel@tonic-gate return (EINVAL);
38150Sstevel@tonic-gate }
38160Sstevel@tonic-gate
38170Sstevel@tonic-gate /*
38180Sstevel@tonic-gate * Wait until we can enter the inner perimeter.
38190Sstevel@tonic-gate */
38200Sstevel@tonic-gate sq->sq_flags = flags | SQ_WANTWAKEUP;
38210Sstevel@tonic-gate cv_wait(&sq->sq_wait, SQLOCK(sq));
38220Sstevel@tonic-gate count = sq->sq_count;
38230Sstevel@tonic-gate flags = sq->sq_flags;
38240Sstevel@tonic-gate }
38250Sstevel@tonic-gate
38260Sstevel@tonic-gate if (isread == 0 && stp->sd_struiowrq == NULL ||
38270Sstevel@tonic-gate isread == 1 && stp->sd_struiordq == NULL) {
38280Sstevel@tonic-gate /*
38290Sstevel@tonic-gate * Stream plumbing changed while waiting for inner perimeter
38300Sstevel@tonic-gate * so just return EINVAL and let the caller handle it.
38310Sstevel@tonic-gate */
38320Sstevel@tonic-gate mutex_exit(SQLOCK(sq));
38330Sstevel@tonic-gate rwnext_exit(qp);
38340Sstevel@tonic-gate return (EINVAL);
38350Sstevel@tonic-gate }
38360Sstevel@tonic-gate if (!(flags & SQ_CIPUT))
38370Sstevel@tonic-gate sq->sq_flags = flags | SQ_EXCL;
38380Sstevel@tonic-gate sq->sq_count = count + 1;
38390Sstevel@tonic-gate ASSERT(sq->sq_count != 0); /* Wraparound */
38400Sstevel@tonic-gate /*
38410Sstevel@tonic-gate * Note: The only message ordering guarantee that rwnext() makes is
38420Sstevel@tonic-gate * for the write queue flow-control case. All others (r/w queue
38430Sstevel@tonic-gate * with q_count > 0 (or q_first != 0)) are the resposibilty of
38440Sstevel@tonic-gate * the queue's rw procedure. This could be genralized here buy
38450Sstevel@tonic-gate * running the queue's service procedure, but that wouldn't be
38460Sstevel@tonic-gate * the most efficent for all cases.
38470Sstevel@tonic-gate */
38480Sstevel@tonic-gate mutex_exit(SQLOCK(sq));
38490Sstevel@tonic-gate if (! isread && (qp->q_flag & QFULL)) {
38500Sstevel@tonic-gate /*
38510Sstevel@tonic-gate * Write queue may be flow controlled. If so,
38520Sstevel@tonic-gate * mark the queue for wakeup when it's not.
38530Sstevel@tonic-gate */
38540Sstevel@tonic-gate mutex_enter(QLOCK(qp));
38550Sstevel@tonic-gate if (qp->q_flag & QFULL) {
38560Sstevel@tonic-gate qp->q_flag |= QWANTWSYNC;
38570Sstevel@tonic-gate mutex_exit(QLOCK(qp));
38580Sstevel@tonic-gate rval = EWOULDBLOCK;
38590Sstevel@tonic-gate goto out;
38600Sstevel@tonic-gate }
38610Sstevel@tonic-gate mutex_exit(QLOCK(qp));
38620Sstevel@tonic-gate }
38630Sstevel@tonic-gate
38640Sstevel@tonic-gate if (! isread && dp->d_mp)
38650Sstevel@tonic-gate STR_FTEVENT_MSG(dp->d_mp, nqp, FTEV_RWNEXT, dp->d_mp->b_rptr -
38660Sstevel@tonic-gate dp->d_mp->b_datap->db_base);
38670Sstevel@tonic-gate
38680Sstevel@tonic-gate rval = (*proc)(qp, dp);
38690Sstevel@tonic-gate
38700Sstevel@tonic-gate if (isread && dp->d_mp)
38710Sstevel@tonic-gate STR_FTEVENT_MSG(dp->d_mp, _RD(nqp), FTEV_RWNEXT,
38720Sstevel@tonic-gate dp->d_mp->b_rptr - dp->d_mp->b_datap->db_base);
38730Sstevel@tonic-gate out:
38740Sstevel@tonic-gate /*
38750Sstevel@tonic-gate * The queue is protected from being freed by sq_count, so it is
38760Sstevel@tonic-gate * safe to call rwnext_exit and reacquire SQLOCK(sq).
38770Sstevel@tonic-gate */
38780Sstevel@tonic-gate rwnext_exit(qp);
38790Sstevel@tonic-gate
38800Sstevel@tonic-gate mutex_enter(SQLOCK(sq));
38810Sstevel@tonic-gate flags = sq->sq_flags;
38820Sstevel@tonic-gate ASSERT(sq->sq_count != 0);
38830Sstevel@tonic-gate sq->sq_count--;
38840Sstevel@tonic-gate if (flags & SQ_TAIL) {
38850Sstevel@tonic-gate putnext_tail(sq, qp, flags);
38860Sstevel@tonic-gate /*
38870Sstevel@tonic-gate * The only purpose of this ASSERT is to preserve calling stack
38880Sstevel@tonic-gate * in DEBUG kernel.
38890Sstevel@tonic-gate */
38900Sstevel@tonic-gate ASSERT(flags & SQ_TAIL);
38910Sstevel@tonic-gate return (rval);
38920Sstevel@tonic-gate }
38930Sstevel@tonic-gate ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
38940Sstevel@tonic-gate /*
38950Sstevel@tonic-gate * Safe to always drop SQ_EXCL:
38960Sstevel@tonic-gate * Not SQ_CIPUT means we set SQ_EXCL above
38970Sstevel@tonic-gate * For SQ_CIPUT SQ_EXCL will only be set if the put procedure
38980Sstevel@tonic-gate * did a qwriter(INNER) in which case nobody else
38990Sstevel@tonic-gate * is in the inner perimeter and we are exiting.
39000Sstevel@tonic-gate *
39010Sstevel@tonic-gate * I would like to make the following assertion:
39020Sstevel@tonic-gate *
39030Sstevel@tonic-gate * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
39040Sstevel@tonic-gate * sq->sq_count == 0);
39050Sstevel@tonic-gate *
39060Sstevel@tonic-gate * which indicates that if we are both putshared and exclusive,
39070Sstevel@tonic-gate * we became exclusive while executing the putproc, and the only
39080Sstevel@tonic-gate * claim on the syncq was the one we dropped a few lines above.
39090Sstevel@tonic-gate * But other threads that enter putnext while the syncq is exclusive
39100Sstevel@tonic-gate * need to make a claim as they may need to drop SQLOCK in the
39110Sstevel@tonic-gate * has_writers case to avoid deadlocks. If these threads are
39120Sstevel@tonic-gate * delayed or preempted, it is possible that the writer thread can
39130Sstevel@tonic-gate * find out that there are other claims making the (sq_count == 0)
39140Sstevel@tonic-gate * test invalid.
39150Sstevel@tonic-gate */
39160Sstevel@tonic-gate
39170Sstevel@tonic-gate sq->sq_flags = flags & ~SQ_EXCL;
39180Sstevel@tonic-gate if (sq->sq_flags & SQ_WANTWAKEUP) {
39190Sstevel@tonic-gate sq->sq_flags &= ~SQ_WANTWAKEUP;
39200Sstevel@tonic-gate cv_broadcast(&sq->sq_wait);
39210Sstevel@tonic-gate }
39220Sstevel@tonic-gate mutex_exit(SQLOCK(sq));
39230Sstevel@tonic-gate return (rval);
39240Sstevel@tonic-gate }
39250Sstevel@tonic-gate
39260Sstevel@tonic-gate /*
39270Sstevel@tonic-gate * The purpose of infonext() is to call the info procedure of the next
39280Sstevel@tonic-gate * (downstream) modules queue.
39290Sstevel@tonic-gate *
39300Sstevel@tonic-gate * treated as put entrypoint for perimeter syncronization.
39310Sstevel@tonic-gate *
39320Sstevel@tonic-gate * There's no need to grab sq_putlocks here (which only exist for CIPUT
39330Sstevel@tonic-gate * sync queues). If it is CIPUT sync queue regular sq_count is incremented and
39340Sstevel@tonic-gate * it does not matter if any regular put entrypoints have been already
39350Sstevel@tonic-gate * entered.
39360Sstevel@tonic-gate */
39370Sstevel@tonic-gate int
infonext(queue_t * qp,infod_t * idp)39380Sstevel@tonic-gate infonext(queue_t *qp, infod_t *idp)
39390Sstevel@tonic-gate {
39400Sstevel@tonic-gate queue_t *nqp;
39410Sstevel@tonic-gate syncq_t *sq;
39420Sstevel@tonic-gate uint16_t count;
39430Sstevel@tonic-gate uint16_t flags;
39440Sstevel@tonic-gate struct qinit *qi;
39450Sstevel@tonic-gate int (*proc)();
39460Sstevel@tonic-gate struct stdata *stp;
39470Sstevel@tonic-gate int rval;
39480Sstevel@tonic-gate
39490Sstevel@tonic-gate stp = STREAM(qp);
39500Sstevel@tonic-gate /*
39510Sstevel@tonic-gate * Prevent q_next from changing by holding sd_lock until
39520Sstevel@tonic-gate * acquiring SQLOCK.
39530Sstevel@tonic-gate */
39540Sstevel@tonic-gate mutex_enter(&stp->sd_lock);
39550Sstevel@tonic-gate if ((nqp = _WR(qp)) == qp) {
39560Sstevel@tonic-gate qp = nqp->q_next;
39570Sstevel@tonic-gate } else {
39580Sstevel@tonic-gate qp = _RD(nqp->q_next);
39590Sstevel@tonic-gate }
39600Sstevel@tonic-gate qi = qp->q_qinfo;
39610Sstevel@tonic-gate if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_infop)) {
39620Sstevel@tonic-gate mutex_exit(&stp->sd_lock);
39630Sstevel@tonic-gate return (EINVAL);
39640Sstevel@tonic-gate }
39650Sstevel@tonic-gate sq = qp->q_syncq;
39660Sstevel@tonic-gate mutex_enter(SQLOCK(sq));
39670Sstevel@tonic-gate mutex_exit(&stp->sd_lock);
39680Sstevel@tonic-gate count = sq->sq_count;
39690Sstevel@tonic-gate flags = sq->sq_flags;
39700Sstevel@tonic-gate ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
39710Sstevel@tonic-gate
39720Sstevel@tonic-gate while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
39730Sstevel@tonic-gate /*
39740Sstevel@tonic-gate * Wait until we can enter the inner perimeter.
39750Sstevel@tonic-gate */
39760Sstevel@tonic-gate sq->sq_flags = flags | SQ_WANTWAKEUP;
39770Sstevel@tonic-gate cv_wait(&sq->sq_wait, SQLOCK(sq));
39780Sstevel@tonic-gate count = sq->sq_count;
39790Sstevel@tonic-gate flags = sq->sq_flags;
39800Sstevel@tonic-gate }
39810Sstevel@tonic-gate
39820Sstevel@tonic-gate if (! (flags & SQ_CIPUT))
39830Sstevel@tonic-gate sq->sq_flags = flags | SQ_EXCL;
39840Sstevel@tonic-gate sq->sq_count = count + 1;
39850Sstevel@tonic-gate ASSERT(sq->sq_count != 0); /* Wraparound */
39860Sstevel@tonic-gate mutex_exit(SQLOCK(sq));
39870Sstevel@tonic-gate
39880Sstevel@tonic-gate rval = (*proc)(qp, idp);
39890Sstevel@tonic-gate
39900Sstevel@tonic-gate mutex_enter(SQLOCK(sq));
39910Sstevel@tonic-gate flags = sq->sq_flags;
39920Sstevel@tonic-gate ASSERT(sq->sq_count != 0);
39930Sstevel@tonic-gate sq->sq_count--;
39940Sstevel@tonic-gate if (flags & SQ_TAIL) {
39950Sstevel@tonic-gate putnext_tail(sq, qp, flags);
39960Sstevel@tonic-gate /*
39970Sstevel@tonic-gate * The only purpose of this ASSERT is to preserve calling stack
39980Sstevel@tonic-gate * in DEBUG kernel.
39990Sstevel@tonic-gate */
40000Sstevel@tonic-gate ASSERT(flags & SQ_TAIL);
40010Sstevel@tonic-gate return (rval);
40020Sstevel@tonic-gate }
40030Sstevel@tonic-gate ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
40040Sstevel@tonic-gate /*
40050Sstevel@tonic-gate * XXXX
40060Sstevel@tonic-gate * I am not certain the next comment is correct here. I need to consider
40070Sstevel@tonic-gate * why the infonext is called, and if dropping SQ_EXCL unless non-CIPUT
40080Sstevel@tonic-gate * might cause other problems. It just might be safer to drop it if
40090Sstevel@tonic-gate * !SQ_CIPUT because that is when we set it.
40100Sstevel@tonic-gate */
40110Sstevel@tonic-gate /*
40120Sstevel@tonic-gate * Safe to always drop SQ_EXCL:
40130Sstevel@tonic-gate * Not SQ_CIPUT means we set SQ_EXCL above
40140Sstevel@tonic-gate * For SQ_CIPUT SQ_EXCL will only be set if the put procedure
40150Sstevel@tonic-gate * did a qwriter(INNER) in which case nobody else
40160Sstevel@tonic-gate * is in the inner perimeter and we are exiting.
40170Sstevel@tonic-gate *
40180Sstevel@tonic-gate * I would like to make the following assertion:
40190Sstevel@tonic-gate *
40200Sstevel@tonic-gate * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
40210Sstevel@tonic-gate * sq->sq_count == 0);
40220Sstevel@tonic-gate *
40230Sstevel@tonic-gate * which indicates that if we are both putshared and exclusive,
40240Sstevel@tonic-gate * we became exclusive while executing the putproc, and the only
40250Sstevel@tonic-gate * claim on the syncq was the one we dropped a few lines above.
40260Sstevel@tonic-gate * But other threads that enter putnext while the syncq is exclusive
40270Sstevel@tonic-gate * need to make a claim as they may need to drop SQLOCK in the
40280Sstevel@tonic-gate * has_writers case to avoid deadlocks. If these threads are
40290Sstevel@tonic-gate * delayed or preempted, it is possible that the writer thread can
40300Sstevel@tonic-gate * find out that there are other claims making the (sq_count == 0)
40310Sstevel@tonic-gate * test invalid.
40320Sstevel@tonic-gate */
40330Sstevel@tonic-gate
40340Sstevel@tonic-gate sq->sq_flags = flags & ~SQ_EXCL;
40350Sstevel@tonic-gate mutex_exit(SQLOCK(sq));
40360Sstevel@tonic-gate return (rval);
40370Sstevel@tonic-gate }
40380Sstevel@tonic-gate
40390Sstevel@tonic-gate /*
40400Sstevel@tonic-gate * Return nonzero if the queue is responsible for struio(), else return 0.
40410Sstevel@tonic-gate */
40420Sstevel@tonic-gate int
isuioq(queue_t * q)40430Sstevel@tonic-gate isuioq(queue_t *q)
40440Sstevel@tonic-gate {
40450Sstevel@tonic-gate if (q->q_flag & QREADR)
40460Sstevel@tonic-gate return (STREAM(q)->sd_struiordq == q);
40470Sstevel@tonic-gate else
40480Sstevel@tonic-gate return (STREAM(q)->sd_struiowrq == q);
40490Sstevel@tonic-gate }
40500Sstevel@tonic-gate
40510Sstevel@tonic-gate #if defined(__sparc)
40520Sstevel@tonic-gate int disable_putlocks = 0;
40530Sstevel@tonic-gate #else
40540Sstevel@tonic-gate int disable_putlocks = 1;
40550Sstevel@tonic-gate #endif
40560Sstevel@tonic-gate
40570Sstevel@tonic-gate /*
40580Sstevel@tonic-gate * called by create_putlock.
40590Sstevel@tonic-gate */
40600Sstevel@tonic-gate static void
create_syncq_putlocks(queue_t * q)40610Sstevel@tonic-gate create_syncq_putlocks(queue_t *q)
40620Sstevel@tonic-gate {
40630Sstevel@tonic-gate syncq_t *sq = q->q_syncq;
40640Sstevel@tonic-gate ciputctrl_t *cip;
40650Sstevel@tonic-gate int i;
40660Sstevel@tonic-gate
40670Sstevel@tonic-gate ASSERT(sq != NULL);
40680Sstevel@tonic-gate
40690Sstevel@tonic-gate ASSERT(disable_putlocks == 0);
40700Sstevel@tonic-gate ASSERT(n_ciputctrl >= min_n_ciputctrl);
40710Sstevel@tonic-gate ASSERT(ciputctrl_cache != NULL);
40720Sstevel@tonic-gate
40730Sstevel@tonic-gate if (!(sq->sq_type & SQ_CIPUT))
40740Sstevel@tonic-gate return;
40750Sstevel@tonic-gate
40760Sstevel@tonic-gate for (i = 0; i <= 1; i++) {
40770Sstevel@tonic-gate if (sq->sq_ciputctrl == NULL) {
40780Sstevel@tonic-gate cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
40790Sstevel@tonic-gate SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
40800Sstevel@tonic-gate mutex_enter(SQLOCK(sq));
40810Sstevel@tonic-gate if (sq->sq_ciputctrl != NULL) {
40820Sstevel@tonic-gate mutex_exit(SQLOCK(sq));
40830Sstevel@tonic-gate kmem_cache_free(ciputctrl_cache, cip);
40840Sstevel@tonic-gate } else {
40850Sstevel@tonic-gate ASSERT(sq->sq_nciputctrl == 0);
40860Sstevel@tonic-gate sq->sq_nciputctrl = n_ciputctrl - 1;
40870Sstevel@tonic-gate /*
40880Sstevel@tonic-gate * putnext checks sq_ciputctrl without holding
40890Sstevel@tonic-gate * SQLOCK. if it is not NULL putnext assumes
40900Sstevel@tonic-gate * sq_nciputctrl is initialized. membar below
40910Sstevel@tonic-gate * insures that.
40920Sstevel@tonic-gate */
40930Sstevel@tonic-gate membar_producer();
40940Sstevel@tonic-gate sq->sq_ciputctrl = cip;
40950Sstevel@tonic-gate mutex_exit(SQLOCK(sq));
40960Sstevel@tonic-gate }
40970Sstevel@tonic-gate }
40980Sstevel@tonic-gate ASSERT(sq->sq_nciputctrl == n_ciputctrl - 1);
40990Sstevel@tonic-gate if (i == 1)
41000Sstevel@tonic-gate break;
41010Sstevel@tonic-gate q = _OTHERQ(q);
41020Sstevel@tonic-gate if (!(q->q_flag & QPERQ)) {
41030Sstevel@tonic-gate ASSERT(sq == q->q_syncq);
41040Sstevel@tonic-gate break;
41050Sstevel@tonic-gate }
41060Sstevel@tonic-gate ASSERT(q->q_syncq != NULL);
41070Sstevel@tonic-gate ASSERT(sq != q->q_syncq);
41080Sstevel@tonic-gate sq = q->q_syncq;
41090Sstevel@tonic-gate ASSERT(sq->sq_type & SQ_CIPUT);
41100Sstevel@tonic-gate }
41110Sstevel@tonic-gate }
41120Sstevel@tonic-gate
41130Sstevel@tonic-gate /*
41140Sstevel@tonic-gate * If stream argument is 0 only create per cpu sq_putlocks/sq_putcounts for
41150Sstevel@tonic-gate * syncq of q. If stream argument is not 0 create per cpu stream_putlocks for
41160Sstevel@tonic-gate * the stream of q and per cpu sq_putlocks/sq_putcounts for all syncq's
41170Sstevel@tonic-gate * starting from q and down to the driver.
41180Sstevel@tonic-gate *
41190Sstevel@tonic-gate * This should be called after the affected queues are part of stream
41200Sstevel@tonic-gate * geometry. It should be called from driver/module open routine after
41210Sstevel@tonic-gate * qprocson() call. It is also called from nfs syscall where it is known that
41220Sstevel@tonic-gate * stream is configured and won't change its geometry during create_putlock
41230Sstevel@tonic-gate * call.
41240Sstevel@tonic-gate *
41250Sstevel@tonic-gate * caller normally uses 0 value for the stream argument to speed up MT putnext
41260Sstevel@tonic-gate * into the perimeter of q for example because its perimeter is per module
41270Sstevel@tonic-gate * (e.g. IP).
41280Sstevel@tonic-gate *
41290Sstevel@tonic-gate * caller normally uses non 0 value for the stream argument to hint the system
41300Sstevel@tonic-gate * that the stream of q is a very contended global system stream
41310Sstevel@tonic-gate * (e.g. NFS/UDP) and the part of the stream from q to the driver is
41320Sstevel@tonic-gate * particularly MT hot.
41330Sstevel@tonic-gate *
41340Sstevel@tonic-gate * Caller insures stream plumbing won't happen while we are here and therefore
41350Sstevel@tonic-gate * q_next can be safely used.
41360Sstevel@tonic-gate */
41370Sstevel@tonic-gate
41380Sstevel@tonic-gate void
create_putlocks(queue_t * q,int stream)41390Sstevel@tonic-gate create_putlocks(queue_t *q, int stream)
41400Sstevel@tonic-gate {
41410Sstevel@tonic-gate ciputctrl_t *cip;
41420Sstevel@tonic-gate struct stdata *stp = STREAM(q);
41430Sstevel@tonic-gate
41440Sstevel@tonic-gate q = _WR(q);
41450Sstevel@tonic-gate ASSERT(stp != NULL);
41460Sstevel@tonic-gate
41470Sstevel@tonic-gate if (disable_putlocks != 0)
41480Sstevel@tonic-gate return;
41490Sstevel@tonic-gate
41500Sstevel@tonic-gate if (n_ciputctrl < min_n_ciputctrl)
41510Sstevel@tonic-gate return;
41520Sstevel@tonic-gate
41530Sstevel@tonic-gate ASSERT(ciputctrl_cache != NULL);
41540Sstevel@tonic-gate
41550Sstevel@tonic-gate if (stream != 0 && stp->sd_ciputctrl == NULL) {
41560Sstevel@tonic-gate cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
41570Sstevel@tonic-gate SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
41580Sstevel@tonic-gate mutex_enter(&stp->sd_lock);
41590Sstevel@tonic-gate if (stp->sd_ciputctrl != NULL) {
41600Sstevel@tonic-gate mutex_exit(&stp->sd_lock);
41610Sstevel@tonic-gate kmem_cache_free(ciputctrl_cache, cip);
41620Sstevel@tonic-gate } else {
41630Sstevel@tonic-gate ASSERT(stp->sd_nciputctrl == 0);
41640Sstevel@tonic-gate stp->sd_nciputctrl = n_ciputctrl - 1;
41650Sstevel@tonic-gate /*
41660Sstevel@tonic-gate * putnext checks sd_ciputctrl without holding
41670Sstevel@tonic-gate * sd_lock. if it is not NULL putnext assumes
41680Sstevel@tonic-gate * sd_nciputctrl is initialized. membar below
41690Sstevel@tonic-gate * insures that.
41700Sstevel@tonic-gate */
41710Sstevel@tonic-gate membar_producer();
41720Sstevel@tonic-gate stp->sd_ciputctrl = cip;
41730Sstevel@tonic-gate mutex_exit(&stp->sd_lock);
41740Sstevel@tonic-gate }
41750Sstevel@tonic-gate }
41760Sstevel@tonic-gate
41770Sstevel@tonic-gate ASSERT(stream == 0 || stp->sd_nciputctrl == n_ciputctrl - 1);
41780Sstevel@tonic-gate
41790Sstevel@tonic-gate while (_SAMESTR(q)) {
41800Sstevel@tonic-gate create_syncq_putlocks(q);
41810Sstevel@tonic-gate if (stream == 0)
41820Sstevel@tonic-gate return;
41830Sstevel@tonic-gate q = q->q_next;
41840Sstevel@tonic-gate }
41850Sstevel@tonic-gate ASSERT(q != NULL);
41860Sstevel@tonic-gate create_syncq_putlocks(q);
41870Sstevel@tonic-gate }
41880Sstevel@tonic-gate
41890Sstevel@tonic-gate /*
41900Sstevel@tonic-gate * STREAMS Flow Trace - record STREAMS Flow Trace events as an mblk flows
41910Sstevel@tonic-gate * through a stream.
41920Sstevel@tonic-gate *
41938752SPeter.Memishian@Sun.COM * Data currently record per-event is a timestamp, module/driver name,
41948752SPeter.Memishian@Sun.COM * downstream module/driver name, optional callstack, event type and a per
41958752SPeter.Memishian@Sun.COM * type datum. Much of the STREAMS framework is instrumented for automatic
41968752SPeter.Memishian@Sun.COM * flow tracing (when enabled). Events can be defined and used by STREAMS
41978752SPeter.Memishian@Sun.COM * modules and drivers.
41980Sstevel@tonic-gate *
41990Sstevel@tonic-gate * Global objects:
42000Sstevel@tonic-gate *
42010Sstevel@tonic-gate * str_ftevent() - Add a flow-trace event to a dblk.
42020Sstevel@tonic-gate * str_ftfree() - Free flow-trace data
42030Sstevel@tonic-gate *
42040Sstevel@tonic-gate * Local objects:
42050Sstevel@tonic-gate *
42060Sstevel@tonic-gate * fthdr_cache - pointer to the kmem cache for trace header.
42070Sstevel@tonic-gate * ftblk_cache - pointer to the kmem cache for trace data blocks.
42080Sstevel@tonic-gate */
42090Sstevel@tonic-gate
42100Sstevel@tonic-gate int str_ftnever = 1; /* Don't do STREAMS flow tracing */
42118752SPeter.Memishian@Sun.COM int str_ftstack = 0; /* Don't record event call stacks */
42120Sstevel@tonic-gate
42130Sstevel@tonic-gate void
str_ftevent(fthdr_t * hp,void * p,ushort_t evnt,ushort_t data)42140Sstevel@tonic-gate str_ftevent(fthdr_t *hp, void *p, ushort_t evnt, ushort_t data)
42150Sstevel@tonic-gate {
42160Sstevel@tonic-gate ftblk_t *bp = hp->tail;
42170Sstevel@tonic-gate ftblk_t *nbp;
42180Sstevel@tonic-gate ftevnt_t *ep;
42190Sstevel@tonic-gate int ix, nix;
42200Sstevel@tonic-gate
42210Sstevel@tonic-gate ASSERT(hp != NULL);
42220Sstevel@tonic-gate
42230Sstevel@tonic-gate for (;;) {
42240Sstevel@tonic-gate if ((ix = bp->ix) == FTBLK_EVNTS) {
42250Sstevel@tonic-gate /*
42260Sstevel@tonic-gate * Tail doesn't have room, so need a new tail.
42270Sstevel@tonic-gate *
42280Sstevel@tonic-gate * To make this MT safe, first, allocate a new
42290Sstevel@tonic-gate * ftblk, and initialize it. To make life a
42300Sstevel@tonic-gate * little easier, reserve the first slot (mostly
42310Sstevel@tonic-gate * by making ix = 1). When we are finished with
42320Sstevel@tonic-gate * the initialization, CAS this pointer to the
42330Sstevel@tonic-gate * tail. If this succeeds, this is the new
42340Sstevel@tonic-gate * "next" block. Otherwise, another thread
42350Sstevel@tonic-gate * got here first, so free the block and start
42360Sstevel@tonic-gate * again.
42370Sstevel@tonic-gate */
42388752SPeter.Memishian@Sun.COM nbp = kmem_cache_alloc(ftblk_cache, KM_NOSLEEP);
42398752SPeter.Memishian@Sun.COM if (nbp == NULL) {
42400Sstevel@tonic-gate /* no mem, so punt */
42410Sstevel@tonic-gate str_ftnever++;
42420Sstevel@tonic-gate /* free up all flow data? */
42430Sstevel@tonic-gate return;
42440Sstevel@tonic-gate }
42450Sstevel@tonic-gate nbp->nxt = NULL;
42460Sstevel@tonic-gate nbp->ix = 1;
42470Sstevel@tonic-gate /*
42480Sstevel@tonic-gate * Just in case there is another thread about
42490Sstevel@tonic-gate * to get the next index, we need to make sure
42500Sstevel@tonic-gate * the value is there for it.
42510Sstevel@tonic-gate */
42520Sstevel@tonic-gate membar_producer();
42530Sstevel@tonic-gate if (casptr(&hp->tail, bp, nbp) == bp) {
42540Sstevel@tonic-gate /* CAS was successful */
42550Sstevel@tonic-gate bp->nxt = nbp;
42560Sstevel@tonic-gate membar_producer();
42570Sstevel@tonic-gate bp = nbp;
42580Sstevel@tonic-gate ix = 0;
42590Sstevel@tonic-gate goto cas_good;
42600Sstevel@tonic-gate } else {
42610Sstevel@tonic-gate kmem_cache_free(ftblk_cache, nbp);
42620Sstevel@tonic-gate bp = hp->tail;
42630Sstevel@tonic-gate continue;
42640Sstevel@tonic-gate }
42650Sstevel@tonic-gate }
42660Sstevel@tonic-gate nix = ix + 1;
42670Sstevel@tonic-gate if (cas32((uint32_t *)&bp->ix, ix, nix) == ix) {
42680Sstevel@tonic-gate cas_good:
42690Sstevel@tonic-gate if (curthread != hp->thread) {
42700Sstevel@tonic-gate hp->thread = curthread;
42710Sstevel@tonic-gate evnt |= FTEV_CS;
42720Sstevel@tonic-gate }
42730Sstevel@tonic-gate if (CPU->cpu_seqid != hp->cpu_seqid) {
42740Sstevel@tonic-gate hp->cpu_seqid = CPU->cpu_seqid;
42750Sstevel@tonic-gate evnt |= FTEV_PS;
42760Sstevel@tonic-gate }
42770Sstevel@tonic-gate ep = &bp->ev[ix];
42780Sstevel@tonic-gate break;
42790Sstevel@tonic-gate }
42800Sstevel@tonic-gate }
42810Sstevel@tonic-gate
42820Sstevel@tonic-gate if (evnt & FTEV_QMASK) {
42830Sstevel@tonic-gate queue_t *qp = p;
42840Sstevel@tonic-gate
42850Sstevel@tonic-gate if (!(qp->q_flag & QREADR))
42860Sstevel@tonic-gate evnt |= FTEV_ISWR;
42878752SPeter.Memishian@Sun.COM
42888752SPeter.Memishian@Sun.COM ep->mid = Q2NAME(qp);
42898752SPeter.Memishian@Sun.COM
42908752SPeter.Memishian@Sun.COM /*
42918752SPeter.Memishian@Sun.COM * We only record the next queue name for FTEV_PUTNEXT since
42928752SPeter.Memishian@Sun.COM * that's the only time we *really* need it, and the putnext()
42938752SPeter.Memishian@Sun.COM * code ensures that qp->q_next won't vanish. (We could use
42948752SPeter.Memishian@Sun.COM * claimstr()/releasestr() but at a performance cost.)
42958752SPeter.Memishian@Sun.COM */
42968752SPeter.Memishian@Sun.COM if ((evnt & FTEV_MASK) == FTEV_PUTNEXT && qp->q_next != NULL)
42978752SPeter.Memishian@Sun.COM ep->midnext = Q2NAME(qp->q_next);
42988752SPeter.Memishian@Sun.COM else
42998752SPeter.Memishian@Sun.COM ep->midnext = NULL;
43000Sstevel@tonic-gate } else {
43018752SPeter.Memishian@Sun.COM ep->mid = p;
43028752SPeter.Memishian@Sun.COM ep->midnext = NULL;
43030Sstevel@tonic-gate }
43040Sstevel@tonic-gate
43058752SPeter.Memishian@Sun.COM if (ep->stk != NULL)
43068752SPeter.Memishian@Sun.COM ep->stk->fs_depth = getpcstack(ep->stk->fs_stk, FTSTK_DEPTH);
43078752SPeter.Memishian@Sun.COM
43080Sstevel@tonic-gate ep->ts = gethrtime();
43090Sstevel@tonic-gate ep->evnt = evnt;
43100Sstevel@tonic-gate ep->data = data;
43110Sstevel@tonic-gate hp->hash = (hp->hash << 9) + hp->hash;
43120Sstevel@tonic-gate hp->hash += (evnt << 16) | data;
43130Sstevel@tonic-gate hp->hash += (uintptr_t)ep->mid;
43140Sstevel@tonic-gate }
43150Sstevel@tonic-gate
43160Sstevel@tonic-gate /*
43170Sstevel@tonic-gate * Free flow-trace data.
43180Sstevel@tonic-gate */
43190Sstevel@tonic-gate void
str_ftfree(dblk_t * dbp)43200Sstevel@tonic-gate str_ftfree(dblk_t *dbp)
43210Sstevel@tonic-gate {
43220Sstevel@tonic-gate fthdr_t *hp = dbp->db_fthdr;
43230Sstevel@tonic-gate ftblk_t *bp = &hp->first;
43240Sstevel@tonic-gate ftblk_t *nbp;
43250Sstevel@tonic-gate
43260Sstevel@tonic-gate if (bp != hp->tail || bp->ix != 0) {
43270Sstevel@tonic-gate /*
43280Sstevel@tonic-gate * Clear out the hash, have the tail point to itself, and free
43290Sstevel@tonic-gate * any continuation blocks.
43300Sstevel@tonic-gate */
43310Sstevel@tonic-gate bp = hp->first.nxt;
43320Sstevel@tonic-gate hp->tail = &hp->first;
43330Sstevel@tonic-gate hp->hash = 0;
43340Sstevel@tonic-gate hp->first.nxt = NULL;
43350Sstevel@tonic-gate hp->first.ix = 0;
43360Sstevel@tonic-gate while (bp != NULL) {
43370Sstevel@tonic-gate nbp = bp->nxt;
43380Sstevel@tonic-gate kmem_cache_free(ftblk_cache, bp);
43390Sstevel@tonic-gate bp = nbp;
43400Sstevel@tonic-gate }
43410Sstevel@tonic-gate }
43420Sstevel@tonic-gate kmem_cache_free(fthdr_cache, hp);
43430Sstevel@tonic-gate dbp->db_fthdr = NULL;
43440Sstevel@tonic-gate }
4345