10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52958Sdr146992 * Common Development and Distribution License (the "License"). 62958Sdr146992 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 220Sstevel@tonic-gate /* All Rights Reserved */ 230Sstevel@tonic-gate 240Sstevel@tonic-gate 250Sstevel@tonic-gate /* 266707Sbrutus * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 270Sstevel@tonic-gate * Use is subject to license terms. 280Sstevel@tonic-gate */ 290Sstevel@tonic-gate 300Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 310Sstevel@tonic-gate 320Sstevel@tonic-gate #include <sys/types.h> 330Sstevel@tonic-gate #include <sys/param.h> 340Sstevel@tonic-gate #include <sys/thread.h> 350Sstevel@tonic-gate #include <sys/sysmacros.h> 360Sstevel@tonic-gate #include <sys/stropts.h> 370Sstevel@tonic-gate #include <sys/stream.h> 380Sstevel@tonic-gate #include <sys/strsubr.h> 390Sstevel@tonic-gate #include <sys/strsun.h> 400Sstevel@tonic-gate #include <sys/conf.h> 410Sstevel@tonic-gate #include <sys/debug.h> 420Sstevel@tonic-gate #include <sys/cmn_err.h> 430Sstevel@tonic-gate #include <sys/kmem.h> 440Sstevel@tonic-gate #include <sys/atomic.h> 450Sstevel@tonic-gate #include <sys/errno.h> 460Sstevel@tonic-gate #include <sys/vtrace.h> 470Sstevel@tonic-gate #include <sys/ftrace.h> 480Sstevel@tonic-gate #include <sys/ontrap.h> 490Sstevel@tonic-gate #include <sys/multidata.h> 500Sstevel@tonic-gate #include <sys/multidata_impl.h> 510Sstevel@tonic-gate #include <sys/sdt.h> 521110Smeem #include <sys/strft.h> 530Sstevel@tonic-gate 540Sstevel@tonic-gate #ifdef DEBUG 550Sstevel@tonic-gate #include <sys/kmem_impl.h> 560Sstevel@tonic-gate #endif 570Sstevel@tonic-gate 580Sstevel@tonic-gate /* 590Sstevel@tonic-gate * This file contains all the STREAMS utility routines that may 600Sstevel@tonic-gate * be used by modules and drivers. 610Sstevel@tonic-gate */ 620Sstevel@tonic-gate 630Sstevel@tonic-gate /* 640Sstevel@tonic-gate * STREAMS message allocator: principles of operation 650Sstevel@tonic-gate * 660Sstevel@tonic-gate * The streams message allocator consists of all the routines that 670Sstevel@tonic-gate * allocate, dup and free streams messages: allocb(), [d]esballoc[a], 680Sstevel@tonic-gate * dupb(), freeb() and freemsg(). What follows is a high-level view 690Sstevel@tonic-gate * of how the allocator works. 700Sstevel@tonic-gate * 710Sstevel@tonic-gate * Every streams message consists of one or more mblks, a dblk, and data. 720Sstevel@tonic-gate * All mblks for all types of messages come from a common mblk_cache. 730Sstevel@tonic-gate * The dblk and data come in several flavors, depending on how the 740Sstevel@tonic-gate * message is allocated: 750Sstevel@tonic-gate * 760Sstevel@tonic-gate * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of 770Sstevel@tonic-gate * fixed-size dblk/data caches. For message sizes that are multiples of 780Sstevel@tonic-gate * PAGESIZE, dblks are allocated separately from the buffer. 790Sstevel@tonic-gate * The associated buffer is allocated by the constructor using kmem_alloc(). 800Sstevel@tonic-gate * For all other message sizes, dblk and its associated data is allocated 810Sstevel@tonic-gate * as a single contiguous chunk of memory. 820Sstevel@tonic-gate * Objects in these caches consist of a dblk plus its associated data. 830Sstevel@tonic-gate * allocb() determines the nearest-size cache by table lookup: 840Sstevel@tonic-gate * the dblk_cache[] array provides the mapping from size to dblk cache. 850Sstevel@tonic-gate * 860Sstevel@tonic-gate * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by 870Sstevel@tonic-gate * kmem_alloc()'ing a buffer for the data and supplying that 880Sstevel@tonic-gate * buffer to gesballoc(), described below. 890Sstevel@tonic-gate * 900Sstevel@tonic-gate * (3) The four flavors of [d]esballoc[a] are all implemented by a 910Sstevel@tonic-gate * common routine, gesballoc() ("generic esballoc"). gesballoc() 920Sstevel@tonic-gate * allocates a dblk from the global dblk_esb_cache and sets db_base, 930Sstevel@tonic-gate * db_lim and db_frtnp to describe the caller-supplied buffer. 940Sstevel@tonic-gate * 950Sstevel@tonic-gate * While there are several routines to allocate messages, there is only 960Sstevel@tonic-gate * one routine to free messages: freeb(). freeb() simply invokes the 970Sstevel@tonic-gate * dblk's free method, dbp->db_free(), which is set at allocation time. 980Sstevel@tonic-gate * 990Sstevel@tonic-gate * dupb() creates a new reference to a message by allocating a new mblk, 1000Sstevel@tonic-gate * incrementing the dblk reference count and setting the dblk's free 1010Sstevel@tonic-gate * method to dblk_decref(). The dblk's original free method is retained 1020Sstevel@tonic-gate * in db_lastfree. dblk_decref() decrements the reference count on each 1030Sstevel@tonic-gate * freeb(). If this is not the last reference it just frees the mblk; 1040Sstevel@tonic-gate * if this *is* the last reference, it restores db_free to db_lastfree, 1050Sstevel@tonic-gate * sets db_mblk to the current mblk (see below), and invokes db_lastfree. 1060Sstevel@tonic-gate * 1070Sstevel@tonic-gate * The implementation makes aggressive use of kmem object caching for 1080Sstevel@tonic-gate * maximum performance. This makes the code simple and compact, but 1090Sstevel@tonic-gate * also a bit abstruse in some places. The invariants that constitute a 1100Sstevel@tonic-gate * message's constructed state, described below, are more subtle than usual. 1110Sstevel@tonic-gate * 1120Sstevel@tonic-gate * Every dblk has an "attached mblk" as part of its constructed state. 1130Sstevel@tonic-gate * The mblk is allocated by the dblk's constructor and remains attached 1140Sstevel@tonic-gate * until the message is either dup'ed or pulled up. In the dupb() case 1150Sstevel@tonic-gate * the mblk association doesn't matter until the last free, at which time 1160Sstevel@tonic-gate * dblk_decref() attaches the last mblk to the dblk. pullupmsg() affects 1170Sstevel@tonic-gate * the mblk association because it swaps the leading mblks of two messages, 1180Sstevel@tonic-gate * so it is responsible for swapping their db_mblk pointers accordingly. 1190Sstevel@tonic-gate * From a constructed-state viewpoint it doesn't matter that a dblk's 1200Sstevel@tonic-gate * attached mblk can change while the message is allocated; all that 1210Sstevel@tonic-gate * matters is that the dblk has *some* attached mblk when it's freed. 1220Sstevel@tonic-gate * 1230Sstevel@tonic-gate * The sizes of the allocb() small-message caches are not magical. 1240Sstevel@tonic-gate * They represent a good trade-off between internal and external 1250Sstevel@tonic-gate * fragmentation for current workloads. They should be reevaluated 1260Sstevel@tonic-gate * periodically, especially if allocations larger than DBLK_MAX_CACHE 1270Sstevel@tonic-gate * become common. We use 64-byte alignment so that dblks don't 1280Sstevel@tonic-gate * straddle cache lines unnecessarily. 1290Sstevel@tonic-gate */ 1300Sstevel@tonic-gate #define DBLK_MAX_CACHE 73728 1310Sstevel@tonic-gate #define DBLK_CACHE_ALIGN 64 1320Sstevel@tonic-gate #define DBLK_MIN_SIZE 8 1330Sstevel@tonic-gate #define DBLK_SIZE_SHIFT 3 1340Sstevel@tonic-gate 1350Sstevel@tonic-gate #ifdef _BIG_ENDIAN 1360Sstevel@tonic-gate #define DBLK_RTFU_SHIFT(field) \ 1370Sstevel@tonic-gate (8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field)) 1380Sstevel@tonic-gate #else 1390Sstevel@tonic-gate #define DBLK_RTFU_SHIFT(field) \ 1400Sstevel@tonic-gate (8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref)) 1410Sstevel@tonic-gate #endif 1420Sstevel@tonic-gate 1430Sstevel@tonic-gate #define DBLK_RTFU(ref, type, flags, uioflag) \ 1440Sstevel@tonic-gate (((ref) << DBLK_RTFU_SHIFT(db_ref)) | \ 1450Sstevel@tonic-gate ((type) << DBLK_RTFU_SHIFT(db_type)) | \ 1460Sstevel@tonic-gate (((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \ 1470Sstevel@tonic-gate ((uioflag) << DBLK_RTFU_SHIFT(db_struioflag))) 1480Sstevel@tonic-gate #define DBLK_RTFU_REF_MASK (DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref)) 1490Sstevel@tonic-gate #define DBLK_RTFU_WORD(dbp) (*((uint32_t *)&(dbp)->db_ref)) 1500Sstevel@tonic-gate #define MBLK_BAND_FLAG_WORD(mp) (*((uint32_t *)&(mp)->b_band)) 1510Sstevel@tonic-gate 1520Sstevel@tonic-gate static size_t dblk_sizes[] = { 1530Sstevel@tonic-gate #ifdef _LP64 1546712Stomee 16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3856, 1556712Stomee 8192, 12048, 16384, 20240, 24576, 28432, 32768, 36624, 1566712Stomee 40960, 44816, 49152, 53008, 57344, 61200, 65536, 69392, 1570Sstevel@tonic-gate #else 1586712Stomee 64, 128, 320, 576, 1088, 1536, 1984, 2624, 3904, 1596712Stomee 8192, 12096, 16384, 20288, 24576, 28480, 32768, 36672, 1606712Stomee 40960, 44864, 49152, 53056, 57344, 61248, 65536, 69440, 1610Sstevel@tonic-gate #endif 1620Sstevel@tonic-gate DBLK_MAX_CACHE, 0 1630Sstevel@tonic-gate }; 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE]; 1660Sstevel@tonic-gate static struct kmem_cache *mblk_cache; 1670Sstevel@tonic-gate static struct kmem_cache *dblk_esb_cache; 1680Sstevel@tonic-gate static struct kmem_cache *fthdr_cache; 1690Sstevel@tonic-gate static struct kmem_cache *ftblk_cache; 1700Sstevel@tonic-gate 1710Sstevel@tonic-gate static void dblk_lastfree(mblk_t *mp, dblk_t *dbp); 1720Sstevel@tonic-gate static mblk_t *allocb_oversize(size_t size, int flags); 1730Sstevel@tonic-gate static int allocb_tryhard_fails; 1740Sstevel@tonic-gate static void frnop_func(void *arg); 1750Sstevel@tonic-gate frtn_t frnop = { frnop_func }; 1760Sstevel@tonic-gate static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp); 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate static boolean_t rwnext_enter(queue_t *qp); 1790Sstevel@tonic-gate static void rwnext_exit(queue_t *qp); 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate /* 1820Sstevel@tonic-gate * Patchable mblk/dblk kmem_cache flags. 1830Sstevel@tonic-gate */ 1840Sstevel@tonic-gate int dblk_kmem_flags = 0; 1850Sstevel@tonic-gate int mblk_kmem_flags = 0; 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate 1880Sstevel@tonic-gate static int 1890Sstevel@tonic-gate dblk_constructor(void *buf, void *cdrarg, int kmflags) 1900Sstevel@tonic-gate { 1910Sstevel@tonic-gate dblk_t *dbp = buf; 1920Sstevel@tonic-gate ssize_t msg_size = (ssize_t)cdrarg; 1930Sstevel@tonic-gate size_t index; 1940Sstevel@tonic-gate 1950Sstevel@tonic-gate ASSERT(msg_size != 0); 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate index = (msg_size - 1) >> DBLK_SIZE_SHIFT; 1980Sstevel@tonic-gate 199577Smeem ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)); 2000Sstevel@tonic-gate 2010Sstevel@tonic-gate if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 2020Sstevel@tonic-gate return (-1); 2030Sstevel@tonic-gate if ((msg_size & PAGEOFFSET) == 0) { 2040Sstevel@tonic-gate dbp->db_base = kmem_alloc(msg_size, kmflags); 2050Sstevel@tonic-gate if (dbp->db_base == NULL) { 2060Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk); 2070Sstevel@tonic-gate return (-1); 2080Sstevel@tonic-gate } 2090Sstevel@tonic-gate } else { 2100Sstevel@tonic-gate dbp->db_base = (unsigned char *)&dbp[1]; 2110Sstevel@tonic-gate } 2120Sstevel@tonic-gate 2130Sstevel@tonic-gate dbp->db_mblk->b_datap = dbp; 2140Sstevel@tonic-gate dbp->db_cache = dblk_cache[index]; 2150Sstevel@tonic-gate dbp->db_lim = dbp->db_base + msg_size; 2160Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree = dblk_lastfree; 2170Sstevel@tonic-gate dbp->db_frtnp = NULL; 2180Sstevel@tonic-gate dbp->db_fthdr = NULL; 2190Sstevel@tonic-gate dbp->db_credp = NULL; 2200Sstevel@tonic-gate dbp->db_cpid = -1; 2210Sstevel@tonic-gate dbp->db_struioflag = 0; 2220Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 2230Sstevel@tonic-gate return (0); 2240Sstevel@tonic-gate } 2250Sstevel@tonic-gate 2260Sstevel@tonic-gate /*ARGSUSED*/ 2270Sstevel@tonic-gate static int 2280Sstevel@tonic-gate dblk_esb_constructor(void *buf, void *cdrarg, int kmflags) 2290Sstevel@tonic-gate { 2300Sstevel@tonic-gate dblk_t *dbp = buf; 2310Sstevel@tonic-gate 2320Sstevel@tonic-gate if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 2330Sstevel@tonic-gate return (-1); 2340Sstevel@tonic-gate dbp->db_mblk->b_datap = dbp; 2350Sstevel@tonic-gate dbp->db_cache = dblk_esb_cache; 2360Sstevel@tonic-gate dbp->db_fthdr = NULL; 2370Sstevel@tonic-gate dbp->db_credp = NULL; 2380Sstevel@tonic-gate dbp->db_cpid = -1; 2390Sstevel@tonic-gate dbp->db_struioflag = 0; 2400Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 2410Sstevel@tonic-gate return (0); 2420Sstevel@tonic-gate } 2430Sstevel@tonic-gate 2440Sstevel@tonic-gate static int 2450Sstevel@tonic-gate bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags) 2460Sstevel@tonic-gate { 2470Sstevel@tonic-gate dblk_t *dbp = buf; 2480Sstevel@tonic-gate bcache_t *bcp = (bcache_t *)cdrarg; 2490Sstevel@tonic-gate 2500Sstevel@tonic-gate if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 2510Sstevel@tonic-gate return (-1); 2520Sstevel@tonic-gate 2530Sstevel@tonic-gate if ((dbp->db_base = (unsigned char *)kmem_cache_alloc(bcp->buffer_cache, 2540Sstevel@tonic-gate kmflags)) == NULL) { 2550Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk); 2560Sstevel@tonic-gate return (-1); 2570Sstevel@tonic-gate } 2580Sstevel@tonic-gate 2590Sstevel@tonic-gate dbp->db_mblk->b_datap = dbp; 2600Sstevel@tonic-gate dbp->db_cache = (void *)bcp; 2610Sstevel@tonic-gate dbp->db_lim = dbp->db_base + bcp->size; 2620Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree; 2630Sstevel@tonic-gate dbp->db_frtnp = NULL; 2640Sstevel@tonic-gate dbp->db_fthdr = NULL; 2650Sstevel@tonic-gate dbp->db_credp = NULL; 2660Sstevel@tonic-gate dbp->db_cpid = -1; 2670Sstevel@tonic-gate dbp->db_struioflag = 0; 2680Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 2690Sstevel@tonic-gate return (0); 2700Sstevel@tonic-gate } 2710Sstevel@tonic-gate 2720Sstevel@tonic-gate /*ARGSUSED*/ 2730Sstevel@tonic-gate static void 2740Sstevel@tonic-gate dblk_destructor(void *buf, void *cdrarg) 2750Sstevel@tonic-gate { 2760Sstevel@tonic-gate dblk_t *dbp = buf; 2770Sstevel@tonic-gate ssize_t msg_size = (ssize_t)cdrarg; 2780Sstevel@tonic-gate 2790Sstevel@tonic-gate ASSERT(dbp->db_mblk->b_datap == dbp); 2800Sstevel@tonic-gate 2810Sstevel@tonic-gate ASSERT(msg_size != 0); 2820Sstevel@tonic-gate 2830Sstevel@tonic-gate ASSERT(dbp->db_struioflag == 0); 2840Sstevel@tonic-gate ASSERT(dbp->db_struioun.cksum.flags == 0); 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate if ((msg_size & PAGEOFFSET) == 0) { 2870Sstevel@tonic-gate kmem_free(dbp->db_base, msg_size); 2880Sstevel@tonic-gate } 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk); 2910Sstevel@tonic-gate } 2920Sstevel@tonic-gate 2930Sstevel@tonic-gate static void 2940Sstevel@tonic-gate bcache_dblk_destructor(void *buf, void *cdrarg) 2950Sstevel@tonic-gate { 2960Sstevel@tonic-gate dblk_t *dbp = buf; 2970Sstevel@tonic-gate bcache_t *bcp = (bcache_t *)cdrarg; 2980Sstevel@tonic-gate 2990Sstevel@tonic-gate kmem_cache_free(bcp->buffer_cache, dbp->db_base); 3000Sstevel@tonic-gate 3010Sstevel@tonic-gate ASSERT(dbp->db_mblk->b_datap == dbp); 3020Sstevel@tonic-gate 3030Sstevel@tonic-gate ASSERT(dbp->db_struioflag == 0); 3040Sstevel@tonic-gate ASSERT(dbp->db_struioun.cksum.flags == 0); 3050Sstevel@tonic-gate 3060Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk); 3070Sstevel@tonic-gate } 3080Sstevel@tonic-gate 3090Sstevel@tonic-gate void 3100Sstevel@tonic-gate streams_msg_init(void) 3110Sstevel@tonic-gate { 3120Sstevel@tonic-gate char name[40]; 3130Sstevel@tonic-gate size_t size; 3140Sstevel@tonic-gate size_t lastsize = DBLK_MIN_SIZE; 3150Sstevel@tonic-gate size_t *sizep; 3160Sstevel@tonic-gate struct kmem_cache *cp; 3170Sstevel@tonic-gate size_t tot_size; 3180Sstevel@tonic-gate int offset; 3190Sstevel@tonic-gate 3200Sstevel@tonic-gate mblk_cache = kmem_cache_create("streams_mblk", 3216707Sbrutus sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL, 3226707Sbrutus mblk_kmem_flags); 3230Sstevel@tonic-gate 3240Sstevel@tonic-gate for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) { 3250Sstevel@tonic-gate 3260Sstevel@tonic-gate if ((offset = (size & PAGEOFFSET)) != 0) { 3270Sstevel@tonic-gate /* 3280Sstevel@tonic-gate * We are in the middle of a page, dblk should 3290Sstevel@tonic-gate * be allocated on the same page 3300Sstevel@tonic-gate */ 3310Sstevel@tonic-gate tot_size = size + sizeof (dblk_t); 3320Sstevel@tonic-gate ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t)) 3336707Sbrutus < PAGESIZE); 3340Sstevel@tonic-gate ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0); 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate } else { 3370Sstevel@tonic-gate 3380Sstevel@tonic-gate /* 3390Sstevel@tonic-gate * buf size is multiple of page size, dblk and 3400Sstevel@tonic-gate * buffer are allocated separately. 3410Sstevel@tonic-gate */ 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0); 3440Sstevel@tonic-gate tot_size = sizeof (dblk_t); 3450Sstevel@tonic-gate } 3460Sstevel@tonic-gate 3470Sstevel@tonic-gate (void) sprintf(name, "streams_dblk_%ld", size); 3480Sstevel@tonic-gate cp = kmem_cache_create(name, tot_size, 3496707Sbrutus DBLK_CACHE_ALIGN, dblk_constructor, 3506707Sbrutus dblk_destructor, NULL, 3516707Sbrutus (void *)(size), NULL, dblk_kmem_flags); 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate while (lastsize <= size) { 3540Sstevel@tonic-gate dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp; 3550Sstevel@tonic-gate lastsize += DBLK_MIN_SIZE; 3560Sstevel@tonic-gate } 3570Sstevel@tonic-gate } 3580Sstevel@tonic-gate 3590Sstevel@tonic-gate dblk_esb_cache = kmem_cache_create("streams_dblk_esb", 3606707Sbrutus sizeof (dblk_t), DBLK_CACHE_ALIGN, 3616707Sbrutus dblk_esb_constructor, dblk_destructor, NULL, 3626707Sbrutus (void *) sizeof (dblk_t), NULL, dblk_kmem_flags); 3630Sstevel@tonic-gate fthdr_cache = kmem_cache_create("streams_fthdr", 3646707Sbrutus sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0); 3650Sstevel@tonic-gate ftblk_cache = kmem_cache_create("streams_ftblk", 3666707Sbrutus sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0); 3670Sstevel@tonic-gate 3680Sstevel@tonic-gate /* Initialize Multidata caches */ 3690Sstevel@tonic-gate mmd_init(); 3703932Sss146032 3713932Sss146032 /* initialize throttling queue for esballoc */ 3723932Sss146032 esballoc_queue_init(); 3730Sstevel@tonic-gate } 3740Sstevel@tonic-gate 3750Sstevel@tonic-gate /*ARGSUSED*/ 3760Sstevel@tonic-gate mblk_t * 3770Sstevel@tonic-gate allocb(size_t size, uint_t pri) 3780Sstevel@tonic-gate { 3790Sstevel@tonic-gate dblk_t *dbp; 3800Sstevel@tonic-gate mblk_t *mp; 3810Sstevel@tonic-gate size_t index; 3820Sstevel@tonic-gate 3830Sstevel@tonic-gate index = (size - 1) >> DBLK_SIZE_SHIFT; 3840Sstevel@tonic-gate 3850Sstevel@tonic-gate if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) { 3860Sstevel@tonic-gate if (size != 0) { 3870Sstevel@tonic-gate mp = allocb_oversize(size, KM_NOSLEEP); 3880Sstevel@tonic-gate goto out; 3890Sstevel@tonic-gate } 3900Sstevel@tonic-gate index = 0; 3910Sstevel@tonic-gate } 3920Sstevel@tonic-gate 3930Sstevel@tonic-gate if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) { 3940Sstevel@tonic-gate mp = NULL; 3950Sstevel@tonic-gate goto out; 3960Sstevel@tonic-gate } 3970Sstevel@tonic-gate 3980Sstevel@tonic-gate mp = dbp->db_mblk; 3990Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 4000Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL; 4010Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base; 4020Sstevel@tonic-gate mp->b_queue = NULL; 4030Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0; 4040Sstevel@tonic-gate STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size); 4050Sstevel@tonic-gate out: 4060Sstevel@tonic-gate FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp); 4070Sstevel@tonic-gate 4080Sstevel@tonic-gate return (mp); 4090Sstevel@tonic-gate } 4100Sstevel@tonic-gate 4110Sstevel@tonic-gate mblk_t * 4120Sstevel@tonic-gate allocb_tmpl(size_t size, const mblk_t *tmpl) 4130Sstevel@tonic-gate { 4140Sstevel@tonic-gate mblk_t *mp = allocb(size, 0); 4150Sstevel@tonic-gate 4160Sstevel@tonic-gate if (mp != NULL) { 4170Sstevel@tonic-gate cred_t *cr = DB_CRED(tmpl); 4180Sstevel@tonic-gate if (cr != NULL) 4190Sstevel@tonic-gate crhold(mp->b_datap->db_credp = cr); 4200Sstevel@tonic-gate DB_CPID(mp) = DB_CPID(tmpl); 4210Sstevel@tonic-gate DB_TYPE(mp) = DB_TYPE(tmpl); 4220Sstevel@tonic-gate } 4230Sstevel@tonic-gate return (mp); 4240Sstevel@tonic-gate } 4250Sstevel@tonic-gate 4260Sstevel@tonic-gate mblk_t * 4270Sstevel@tonic-gate allocb_cred(size_t size, cred_t *cr) 4280Sstevel@tonic-gate { 4290Sstevel@tonic-gate mblk_t *mp = allocb(size, 0); 4300Sstevel@tonic-gate 4310Sstevel@tonic-gate if (mp != NULL && cr != NULL) 4320Sstevel@tonic-gate crhold(mp->b_datap->db_credp = cr); 4330Sstevel@tonic-gate 4340Sstevel@tonic-gate return (mp); 4350Sstevel@tonic-gate } 4360Sstevel@tonic-gate 4370Sstevel@tonic-gate mblk_t * 4380Sstevel@tonic-gate allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr) 4390Sstevel@tonic-gate { 4400Sstevel@tonic-gate mblk_t *mp = allocb_wait(size, 0, flags, error); 4410Sstevel@tonic-gate 4420Sstevel@tonic-gate if (mp != NULL && cr != NULL) 4430Sstevel@tonic-gate crhold(mp->b_datap->db_credp = cr); 4440Sstevel@tonic-gate 4450Sstevel@tonic-gate return (mp); 4460Sstevel@tonic-gate } 4470Sstevel@tonic-gate 4480Sstevel@tonic-gate void 4490Sstevel@tonic-gate freeb(mblk_t *mp) 4500Sstevel@tonic-gate { 4510Sstevel@tonic-gate dblk_t *dbp = mp->b_datap; 4520Sstevel@tonic-gate 4530Sstevel@tonic-gate ASSERT(dbp->db_ref > 0); 4540Sstevel@tonic-gate ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 4550Sstevel@tonic-gate FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp); 4560Sstevel@tonic-gate 4570Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref); 4580Sstevel@tonic-gate 4590Sstevel@tonic-gate dbp->db_free(mp, dbp); 4600Sstevel@tonic-gate } 4610Sstevel@tonic-gate 4620Sstevel@tonic-gate void 4630Sstevel@tonic-gate freemsg(mblk_t *mp) 4640Sstevel@tonic-gate { 4650Sstevel@tonic-gate FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp); 4660Sstevel@tonic-gate while (mp) { 4670Sstevel@tonic-gate dblk_t *dbp = mp->b_datap; 4680Sstevel@tonic-gate mblk_t *mp_cont = mp->b_cont; 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate ASSERT(dbp->db_ref > 0); 4710Sstevel@tonic-gate ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 4720Sstevel@tonic-gate 4730Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref); 4740Sstevel@tonic-gate 4750Sstevel@tonic-gate dbp->db_free(mp, dbp); 4760Sstevel@tonic-gate mp = mp_cont; 4770Sstevel@tonic-gate } 4780Sstevel@tonic-gate } 4790Sstevel@tonic-gate 4800Sstevel@tonic-gate /* 4810Sstevel@tonic-gate * Reallocate a block for another use. Try hard to use the old block. 4820Sstevel@tonic-gate * If the old data is wanted (copy), leave b_wptr at the end of the data, 4830Sstevel@tonic-gate * otherwise return b_wptr = b_rptr. 4840Sstevel@tonic-gate * 4850Sstevel@tonic-gate * This routine is private and unstable. 4860Sstevel@tonic-gate */ 4870Sstevel@tonic-gate mblk_t * 4880Sstevel@tonic-gate reallocb(mblk_t *mp, size_t size, uint_t copy) 4890Sstevel@tonic-gate { 4900Sstevel@tonic-gate mblk_t *mp1; 4910Sstevel@tonic-gate unsigned char *old_rptr; 4920Sstevel@tonic-gate ptrdiff_t cur_size; 4930Sstevel@tonic-gate 4940Sstevel@tonic-gate if (mp == NULL) 4950Sstevel@tonic-gate return (allocb(size, BPRI_HI)); 4960Sstevel@tonic-gate 4970Sstevel@tonic-gate cur_size = mp->b_wptr - mp->b_rptr; 4980Sstevel@tonic-gate old_rptr = mp->b_rptr; 4990Sstevel@tonic-gate 5000Sstevel@tonic-gate ASSERT(mp->b_datap->db_ref != 0); 5010Sstevel@tonic-gate 5020Sstevel@tonic-gate if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) { 5030Sstevel@tonic-gate /* 5040Sstevel@tonic-gate * If the data is wanted and it will fit where it is, no 5050Sstevel@tonic-gate * work is required. 5060Sstevel@tonic-gate */ 5070Sstevel@tonic-gate if (copy && mp->b_datap->db_lim - mp->b_rptr >= size) 5080Sstevel@tonic-gate return (mp); 5090Sstevel@tonic-gate 5100Sstevel@tonic-gate mp->b_wptr = mp->b_rptr = mp->b_datap->db_base; 5110Sstevel@tonic-gate mp1 = mp; 5120Sstevel@tonic-gate } else if ((mp1 = allocb_tmpl(size, mp)) != NULL) { 5130Sstevel@tonic-gate /* XXX other mp state could be copied too, db_flags ... ? */ 5140Sstevel@tonic-gate mp1->b_cont = mp->b_cont; 5150Sstevel@tonic-gate } else { 5160Sstevel@tonic-gate return (NULL); 5170Sstevel@tonic-gate } 5180Sstevel@tonic-gate 5190Sstevel@tonic-gate if (copy) { 5200Sstevel@tonic-gate bcopy(old_rptr, mp1->b_rptr, cur_size); 5210Sstevel@tonic-gate mp1->b_wptr = mp1->b_rptr + cur_size; 5220Sstevel@tonic-gate } 5230Sstevel@tonic-gate 5240Sstevel@tonic-gate if (mp != mp1) 5250Sstevel@tonic-gate freeb(mp); 5260Sstevel@tonic-gate 5270Sstevel@tonic-gate return (mp1); 5280Sstevel@tonic-gate } 5290Sstevel@tonic-gate 5300Sstevel@tonic-gate static void 5310Sstevel@tonic-gate dblk_lastfree(mblk_t *mp, dblk_t *dbp) 5320Sstevel@tonic-gate { 5330Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp); 5340Sstevel@tonic-gate if (dbp->db_fthdr != NULL) 5350Sstevel@tonic-gate str_ftfree(dbp); 5360Sstevel@tonic-gate 5370Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */ 5380Sstevel@tonic-gate if (dbp->db_credp != NULL) { 5390Sstevel@tonic-gate crfree(dbp->db_credp); 5400Sstevel@tonic-gate dbp->db_credp = NULL; 5410Sstevel@tonic-gate } 5420Sstevel@tonic-gate dbp->db_cpid = -1; 5430Sstevel@tonic-gate 5440Sstevel@tonic-gate /* Reset the struioflag and the checksum flag fields */ 5450Sstevel@tonic-gate dbp->db_struioflag = 0; 5460Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 5470Sstevel@tonic-gate 5486707Sbrutus /* and the COOKED and/or UIOA flag(s) */ 5496707Sbrutus dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA); 550898Skais 5510Sstevel@tonic-gate kmem_cache_free(dbp->db_cache, dbp); 5520Sstevel@tonic-gate } 5530Sstevel@tonic-gate 5540Sstevel@tonic-gate static void 5550Sstevel@tonic-gate dblk_decref(mblk_t *mp, dblk_t *dbp) 5560Sstevel@tonic-gate { 5570Sstevel@tonic-gate if (dbp->db_ref != 1) { 5580Sstevel@tonic-gate uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp), 5590Sstevel@tonic-gate -(1 << DBLK_RTFU_SHIFT(db_ref))); 5600Sstevel@tonic-gate /* 5610Sstevel@tonic-gate * atomic_add_32_nv() just decremented db_ref, so we no longer 5620Sstevel@tonic-gate * have a reference to the dblk, which means another thread 5630Sstevel@tonic-gate * could free it. Therefore we cannot examine the dblk to 5640Sstevel@tonic-gate * determine whether ours was the last reference. Instead, 5650Sstevel@tonic-gate * we extract the new and minimum reference counts from rtfu. 5660Sstevel@tonic-gate * Note that all we're really saying is "if (ref != refmin)". 5670Sstevel@tonic-gate */ 5680Sstevel@tonic-gate if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) != 5690Sstevel@tonic-gate ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) { 5700Sstevel@tonic-gate kmem_cache_free(mblk_cache, mp); 5710Sstevel@tonic-gate return; 5720Sstevel@tonic-gate } 5730Sstevel@tonic-gate } 5740Sstevel@tonic-gate dbp->db_mblk = mp; 5750Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree; 5760Sstevel@tonic-gate dbp->db_lastfree(mp, dbp); 5770Sstevel@tonic-gate } 5780Sstevel@tonic-gate 5790Sstevel@tonic-gate mblk_t * 5800Sstevel@tonic-gate dupb(mblk_t *mp) 5810Sstevel@tonic-gate { 5820Sstevel@tonic-gate dblk_t *dbp = mp->b_datap; 5830Sstevel@tonic-gate mblk_t *new_mp; 5840Sstevel@tonic-gate uint32_t oldrtfu, newrtfu; 5850Sstevel@tonic-gate 5860Sstevel@tonic-gate if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL) 5870Sstevel@tonic-gate goto out; 5880Sstevel@tonic-gate 5890Sstevel@tonic-gate new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL; 5900Sstevel@tonic-gate new_mp->b_rptr = mp->b_rptr; 5910Sstevel@tonic-gate new_mp->b_wptr = mp->b_wptr; 5920Sstevel@tonic-gate new_mp->b_datap = dbp; 5930Sstevel@tonic-gate new_mp->b_queue = NULL; 5940Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp); 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref); 5970Sstevel@tonic-gate 5983163Sgeorges dbp->db_free = dblk_decref; 5990Sstevel@tonic-gate do { 6000Sstevel@tonic-gate ASSERT(dbp->db_ref > 0); 6010Sstevel@tonic-gate oldrtfu = DBLK_RTFU_WORD(dbp); 6020Sstevel@tonic-gate newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref)); 6030Sstevel@tonic-gate /* 6040Sstevel@tonic-gate * If db_ref is maxed out we can't dup this message anymore. 6050Sstevel@tonic-gate */ 6060Sstevel@tonic-gate if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) { 6070Sstevel@tonic-gate kmem_cache_free(mblk_cache, new_mp); 6080Sstevel@tonic-gate new_mp = NULL; 6090Sstevel@tonic-gate goto out; 6100Sstevel@tonic-gate } 6110Sstevel@tonic-gate } while (cas32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != oldrtfu); 6120Sstevel@tonic-gate 6130Sstevel@tonic-gate out: 6140Sstevel@tonic-gate FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp); 6150Sstevel@tonic-gate return (new_mp); 6160Sstevel@tonic-gate } 6170Sstevel@tonic-gate 6180Sstevel@tonic-gate static void 6190Sstevel@tonic-gate dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp) 6200Sstevel@tonic-gate { 6210Sstevel@tonic-gate frtn_t *frp = dbp->db_frtnp; 6220Sstevel@tonic-gate 6230Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp); 6240Sstevel@tonic-gate frp->free_func(frp->free_arg); 6250Sstevel@tonic-gate if (dbp->db_fthdr != NULL) 6260Sstevel@tonic-gate str_ftfree(dbp); 6270Sstevel@tonic-gate 6280Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */ 6290Sstevel@tonic-gate if (dbp->db_credp != NULL) { 6300Sstevel@tonic-gate crfree(dbp->db_credp); 6310Sstevel@tonic-gate dbp->db_credp = NULL; 6320Sstevel@tonic-gate } 6330Sstevel@tonic-gate dbp->db_cpid = -1; 6340Sstevel@tonic-gate dbp->db_struioflag = 0; 6350Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 6360Sstevel@tonic-gate 6370Sstevel@tonic-gate kmem_cache_free(dbp->db_cache, dbp); 6380Sstevel@tonic-gate } 6390Sstevel@tonic-gate 6400Sstevel@tonic-gate /*ARGSUSED*/ 6410Sstevel@tonic-gate static void 6420Sstevel@tonic-gate frnop_func(void *arg) 6430Sstevel@tonic-gate { 6440Sstevel@tonic-gate } 6450Sstevel@tonic-gate 6460Sstevel@tonic-gate /* 6470Sstevel@tonic-gate * Generic esballoc used to implement the four flavors: [d]esballoc[a]. 6480Sstevel@tonic-gate */ 6490Sstevel@tonic-gate static mblk_t * 6500Sstevel@tonic-gate gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp, 6510Sstevel@tonic-gate void (*lastfree)(mblk_t *, dblk_t *), int kmflags) 6520Sstevel@tonic-gate { 6530Sstevel@tonic-gate dblk_t *dbp; 6540Sstevel@tonic-gate mblk_t *mp; 6550Sstevel@tonic-gate 6560Sstevel@tonic-gate ASSERT(base != NULL && frp != NULL); 6570Sstevel@tonic-gate 6580Sstevel@tonic-gate if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) { 6590Sstevel@tonic-gate mp = NULL; 6600Sstevel@tonic-gate goto out; 6610Sstevel@tonic-gate } 6620Sstevel@tonic-gate 6630Sstevel@tonic-gate mp = dbp->db_mblk; 6640Sstevel@tonic-gate dbp->db_base = base; 6650Sstevel@tonic-gate dbp->db_lim = base + size; 6660Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree = lastfree; 6670Sstevel@tonic-gate dbp->db_frtnp = frp; 6680Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = db_rtfu; 6690Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL; 6700Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = base; 6710Sstevel@tonic-gate mp->b_queue = NULL; 6720Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0; 6730Sstevel@tonic-gate 6740Sstevel@tonic-gate out: 6750Sstevel@tonic-gate FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp); 6760Sstevel@tonic-gate return (mp); 6770Sstevel@tonic-gate } 6780Sstevel@tonic-gate 6790Sstevel@tonic-gate /*ARGSUSED*/ 6800Sstevel@tonic-gate mblk_t * 6810Sstevel@tonic-gate esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 6820Sstevel@tonic-gate { 6830Sstevel@tonic-gate mblk_t *mp; 6840Sstevel@tonic-gate 6850Sstevel@tonic-gate /* 6860Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e. 6870Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail 6880Sstevel@tonic-gate * call optimization. 6890Sstevel@tonic-gate */ 6900Sstevel@tonic-gate if (!str_ftnever) { 6910Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 6920Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP); 6930Sstevel@tonic-gate 6940Sstevel@tonic-gate if (mp != NULL) 6950Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size); 6960Sstevel@tonic-gate return (mp); 6970Sstevel@tonic-gate } 6980Sstevel@tonic-gate 6990Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 7000Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP)); 7010Sstevel@tonic-gate } 7020Sstevel@tonic-gate 7030Sstevel@tonic-gate /* 7040Sstevel@tonic-gate * Same as esballoc() but sleeps waiting for memory. 7050Sstevel@tonic-gate */ 7060Sstevel@tonic-gate /*ARGSUSED*/ 7070Sstevel@tonic-gate mblk_t * 7080Sstevel@tonic-gate esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 7090Sstevel@tonic-gate { 7100Sstevel@tonic-gate mblk_t *mp; 7110Sstevel@tonic-gate 7120Sstevel@tonic-gate /* 7130Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e. 7140Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail 7150Sstevel@tonic-gate * call optimization. 7160Sstevel@tonic-gate */ 7170Sstevel@tonic-gate if (!str_ftnever) { 7180Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 7190Sstevel@tonic-gate frp, freebs_enqueue, KM_SLEEP); 7200Sstevel@tonic-gate 7210Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size); 7220Sstevel@tonic-gate return (mp); 7230Sstevel@tonic-gate } 7240Sstevel@tonic-gate 7250Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 7260Sstevel@tonic-gate frp, freebs_enqueue, KM_SLEEP)); 7270Sstevel@tonic-gate } 7280Sstevel@tonic-gate 7290Sstevel@tonic-gate /*ARGSUSED*/ 7300Sstevel@tonic-gate mblk_t * 7310Sstevel@tonic-gate desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 7320Sstevel@tonic-gate { 7330Sstevel@tonic-gate mblk_t *mp; 7340Sstevel@tonic-gate 7350Sstevel@tonic-gate /* 7360Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e. 7370Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail 7380Sstevel@tonic-gate * call optimization. 7390Sstevel@tonic-gate */ 7400Sstevel@tonic-gate if (!str_ftnever) { 7410Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 7426707Sbrutus frp, dblk_lastfree_desb, KM_NOSLEEP); 7430Sstevel@tonic-gate 7440Sstevel@tonic-gate if (mp != NULL) 7450Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size); 7460Sstevel@tonic-gate return (mp); 7470Sstevel@tonic-gate } 7480Sstevel@tonic-gate 7490Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 7500Sstevel@tonic-gate frp, dblk_lastfree_desb, KM_NOSLEEP)); 7510Sstevel@tonic-gate } 7520Sstevel@tonic-gate 7530Sstevel@tonic-gate /*ARGSUSED*/ 7540Sstevel@tonic-gate mblk_t * 7550Sstevel@tonic-gate esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 7560Sstevel@tonic-gate { 7570Sstevel@tonic-gate mblk_t *mp; 7580Sstevel@tonic-gate 7590Sstevel@tonic-gate /* 7600Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e. 7610Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail 7620Sstevel@tonic-gate * call optimization. 7630Sstevel@tonic-gate */ 7640Sstevel@tonic-gate if (!str_ftnever) { 7650Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 7660Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP); 7670Sstevel@tonic-gate 7680Sstevel@tonic-gate if (mp != NULL) 7690Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size); 7700Sstevel@tonic-gate return (mp); 7710Sstevel@tonic-gate } 7720Sstevel@tonic-gate 7730Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 7740Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP)); 7750Sstevel@tonic-gate } 7760Sstevel@tonic-gate 7770Sstevel@tonic-gate /*ARGSUSED*/ 7780Sstevel@tonic-gate mblk_t * 7790Sstevel@tonic-gate desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 7800Sstevel@tonic-gate { 7810Sstevel@tonic-gate mblk_t *mp; 7820Sstevel@tonic-gate 7830Sstevel@tonic-gate /* 7840Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e. 7850Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail 7860Sstevel@tonic-gate * call optimization. 7870Sstevel@tonic-gate */ 7880Sstevel@tonic-gate if (!str_ftnever) { 7890Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 7900Sstevel@tonic-gate frp, dblk_lastfree_desb, KM_NOSLEEP); 7910Sstevel@tonic-gate 7920Sstevel@tonic-gate if (mp != NULL) 7930Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size); 7940Sstevel@tonic-gate return (mp); 7950Sstevel@tonic-gate } 7960Sstevel@tonic-gate 7970Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 7980Sstevel@tonic-gate frp, dblk_lastfree_desb, KM_NOSLEEP)); 7990Sstevel@tonic-gate } 8000Sstevel@tonic-gate 8010Sstevel@tonic-gate static void 8020Sstevel@tonic-gate bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp) 8030Sstevel@tonic-gate { 8040Sstevel@tonic-gate bcache_t *bcp = dbp->db_cache; 8050Sstevel@tonic-gate 8060Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp); 8070Sstevel@tonic-gate if (dbp->db_fthdr != NULL) 8080Sstevel@tonic-gate str_ftfree(dbp); 8090Sstevel@tonic-gate 8100Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */ 8110Sstevel@tonic-gate if (dbp->db_credp != NULL) { 8120Sstevel@tonic-gate crfree(dbp->db_credp); 8130Sstevel@tonic-gate dbp->db_credp = NULL; 8140Sstevel@tonic-gate } 8150Sstevel@tonic-gate dbp->db_cpid = -1; 8160Sstevel@tonic-gate dbp->db_struioflag = 0; 8170Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 8180Sstevel@tonic-gate 8190Sstevel@tonic-gate mutex_enter(&bcp->mutex); 8200Sstevel@tonic-gate kmem_cache_free(bcp->dblk_cache, dbp); 8210Sstevel@tonic-gate bcp->alloc--; 8220Sstevel@tonic-gate 8230Sstevel@tonic-gate if (bcp->alloc == 0 && bcp->destroy != 0) { 8240Sstevel@tonic-gate kmem_cache_destroy(bcp->dblk_cache); 8250Sstevel@tonic-gate kmem_cache_destroy(bcp->buffer_cache); 8260Sstevel@tonic-gate mutex_exit(&bcp->mutex); 8270Sstevel@tonic-gate mutex_destroy(&bcp->mutex); 8280Sstevel@tonic-gate kmem_free(bcp, sizeof (bcache_t)); 8290Sstevel@tonic-gate } else { 8300Sstevel@tonic-gate mutex_exit(&bcp->mutex); 8310Sstevel@tonic-gate } 8320Sstevel@tonic-gate } 8330Sstevel@tonic-gate 8340Sstevel@tonic-gate bcache_t * 8350Sstevel@tonic-gate bcache_create(char *name, size_t size, uint_t align) 8360Sstevel@tonic-gate { 8370Sstevel@tonic-gate bcache_t *bcp; 8380Sstevel@tonic-gate char buffer[255]; 8390Sstevel@tonic-gate 8400Sstevel@tonic-gate ASSERT((align & (align - 1)) == 0); 8410Sstevel@tonic-gate 8420Sstevel@tonic-gate if ((bcp = (bcache_t *)kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) == 8430Sstevel@tonic-gate NULL) { 8440Sstevel@tonic-gate return (NULL); 8450Sstevel@tonic-gate } 8460Sstevel@tonic-gate 8470Sstevel@tonic-gate bcp->size = size; 8480Sstevel@tonic-gate bcp->align = align; 8490Sstevel@tonic-gate bcp->alloc = 0; 8500Sstevel@tonic-gate bcp->destroy = 0; 8510Sstevel@tonic-gate 8520Sstevel@tonic-gate mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL); 8530Sstevel@tonic-gate 8540Sstevel@tonic-gate (void) sprintf(buffer, "%s_buffer_cache", name); 8550Sstevel@tonic-gate bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL, 8560Sstevel@tonic-gate NULL, NULL, NULL, 0); 8570Sstevel@tonic-gate (void) sprintf(buffer, "%s_dblk_cache", name); 8580Sstevel@tonic-gate bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t), 8590Sstevel@tonic-gate DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor, 8606707Sbrutus NULL, (void *)bcp, NULL, 0); 8610Sstevel@tonic-gate 8620Sstevel@tonic-gate return (bcp); 8630Sstevel@tonic-gate } 8640Sstevel@tonic-gate 8650Sstevel@tonic-gate void 8660Sstevel@tonic-gate bcache_destroy(bcache_t *bcp) 8670Sstevel@tonic-gate { 8680Sstevel@tonic-gate ASSERT(bcp != NULL); 8690Sstevel@tonic-gate 8700Sstevel@tonic-gate mutex_enter(&bcp->mutex); 8710Sstevel@tonic-gate if (bcp->alloc == 0) { 8720Sstevel@tonic-gate kmem_cache_destroy(bcp->dblk_cache); 8730Sstevel@tonic-gate kmem_cache_destroy(bcp->buffer_cache); 8740Sstevel@tonic-gate mutex_exit(&bcp->mutex); 8750Sstevel@tonic-gate mutex_destroy(&bcp->mutex); 8760Sstevel@tonic-gate kmem_free(bcp, sizeof (bcache_t)); 8770Sstevel@tonic-gate } else { 8780Sstevel@tonic-gate bcp->destroy++; 8790Sstevel@tonic-gate mutex_exit(&bcp->mutex); 8800Sstevel@tonic-gate } 8810Sstevel@tonic-gate } 8820Sstevel@tonic-gate 8830Sstevel@tonic-gate /*ARGSUSED*/ 8840Sstevel@tonic-gate mblk_t * 8850Sstevel@tonic-gate bcache_allocb(bcache_t *bcp, uint_t pri) 8860Sstevel@tonic-gate { 8870Sstevel@tonic-gate dblk_t *dbp; 8880Sstevel@tonic-gate mblk_t *mp = NULL; 8890Sstevel@tonic-gate 8900Sstevel@tonic-gate ASSERT(bcp != NULL); 8910Sstevel@tonic-gate 8920Sstevel@tonic-gate mutex_enter(&bcp->mutex); 8930Sstevel@tonic-gate if (bcp->destroy != 0) { 8940Sstevel@tonic-gate mutex_exit(&bcp->mutex); 8950Sstevel@tonic-gate goto out; 8960Sstevel@tonic-gate } 8970Sstevel@tonic-gate 8980Sstevel@tonic-gate if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) { 8990Sstevel@tonic-gate mutex_exit(&bcp->mutex); 9000Sstevel@tonic-gate goto out; 9010Sstevel@tonic-gate } 9020Sstevel@tonic-gate bcp->alloc++; 9030Sstevel@tonic-gate mutex_exit(&bcp->mutex); 9040Sstevel@tonic-gate 9050Sstevel@tonic-gate ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0); 9060Sstevel@tonic-gate 9070Sstevel@tonic-gate mp = dbp->db_mblk; 9080Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 9090Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL; 9100Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base; 9110Sstevel@tonic-gate mp->b_queue = NULL; 9120Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0; 9130Sstevel@tonic-gate STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size); 9140Sstevel@tonic-gate out: 9150Sstevel@tonic-gate FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp); 9160Sstevel@tonic-gate 9170Sstevel@tonic-gate return (mp); 9180Sstevel@tonic-gate } 9190Sstevel@tonic-gate 9200Sstevel@tonic-gate static void 9210Sstevel@tonic-gate dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp) 9220Sstevel@tonic-gate { 9230Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp); 9240Sstevel@tonic-gate if (dbp->db_fthdr != NULL) 9250Sstevel@tonic-gate str_ftfree(dbp); 9260Sstevel@tonic-gate 9270Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */ 9280Sstevel@tonic-gate if (dbp->db_credp != NULL) { 9290Sstevel@tonic-gate crfree(dbp->db_credp); 9300Sstevel@tonic-gate dbp->db_credp = NULL; 9310Sstevel@tonic-gate } 9320Sstevel@tonic-gate dbp->db_cpid = -1; 9330Sstevel@tonic-gate dbp->db_struioflag = 0; 9340Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 9350Sstevel@tonic-gate 9360Sstevel@tonic-gate kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base); 9370Sstevel@tonic-gate kmem_cache_free(dbp->db_cache, dbp); 9380Sstevel@tonic-gate } 9390Sstevel@tonic-gate 9400Sstevel@tonic-gate static mblk_t * 9410Sstevel@tonic-gate allocb_oversize(size_t size, int kmflags) 9420Sstevel@tonic-gate { 9430Sstevel@tonic-gate mblk_t *mp; 9440Sstevel@tonic-gate void *buf; 9450Sstevel@tonic-gate 9460Sstevel@tonic-gate size = P2ROUNDUP(size, DBLK_CACHE_ALIGN); 9470Sstevel@tonic-gate if ((buf = kmem_alloc(size, kmflags)) == NULL) 9480Sstevel@tonic-gate return (NULL); 9490Sstevel@tonic-gate if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0), 9500Sstevel@tonic-gate &frnop, dblk_lastfree_oversize, kmflags)) == NULL) 9510Sstevel@tonic-gate kmem_free(buf, size); 9520Sstevel@tonic-gate 9530Sstevel@tonic-gate if (mp != NULL) 9540Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size); 9550Sstevel@tonic-gate 9560Sstevel@tonic-gate return (mp); 9570Sstevel@tonic-gate } 9580Sstevel@tonic-gate 9590Sstevel@tonic-gate mblk_t * 9600Sstevel@tonic-gate allocb_tryhard(size_t target_size) 9610Sstevel@tonic-gate { 9620Sstevel@tonic-gate size_t size; 9630Sstevel@tonic-gate mblk_t *bp; 9640Sstevel@tonic-gate 9650Sstevel@tonic-gate for (size = target_size; size < target_size + 512; 9660Sstevel@tonic-gate size += DBLK_CACHE_ALIGN) 9670Sstevel@tonic-gate if ((bp = allocb(size, BPRI_HI)) != NULL) 9680Sstevel@tonic-gate return (bp); 9690Sstevel@tonic-gate allocb_tryhard_fails++; 9700Sstevel@tonic-gate return (NULL); 9710Sstevel@tonic-gate } 9720Sstevel@tonic-gate 9730Sstevel@tonic-gate /* 9740Sstevel@tonic-gate * This routine is consolidation private for STREAMS internal use 9750Sstevel@tonic-gate * This routine may only be called from sync routines (i.e., not 9760Sstevel@tonic-gate * from put or service procedures). It is located here (rather 9770Sstevel@tonic-gate * than strsubr.c) so that we don't have to expose all of the 9780Sstevel@tonic-gate * allocb() implementation details in header files. 9790Sstevel@tonic-gate */ 9800Sstevel@tonic-gate mblk_t * 9810Sstevel@tonic-gate allocb_wait(size_t size, uint_t pri, uint_t flags, int *error) 9820Sstevel@tonic-gate { 9830Sstevel@tonic-gate dblk_t *dbp; 9840Sstevel@tonic-gate mblk_t *mp; 9850Sstevel@tonic-gate size_t index; 9860Sstevel@tonic-gate 9870Sstevel@tonic-gate index = (size -1) >> DBLK_SIZE_SHIFT; 9880Sstevel@tonic-gate 9890Sstevel@tonic-gate if (flags & STR_NOSIG) { 9900Sstevel@tonic-gate if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) { 9910Sstevel@tonic-gate if (size != 0) { 9920Sstevel@tonic-gate mp = allocb_oversize(size, KM_SLEEP); 9930Sstevel@tonic-gate FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", 9940Sstevel@tonic-gate (uintptr_t)mp); 9950Sstevel@tonic-gate return (mp); 9960Sstevel@tonic-gate } 9970Sstevel@tonic-gate index = 0; 9980Sstevel@tonic-gate } 9990Sstevel@tonic-gate 10000Sstevel@tonic-gate dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP); 10010Sstevel@tonic-gate mp = dbp->db_mblk; 10020Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 10030Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL; 10040Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base; 10050Sstevel@tonic-gate mp->b_queue = NULL; 10060Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0; 10070Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size); 10080Sstevel@tonic-gate 10090Sstevel@tonic-gate FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp); 10100Sstevel@tonic-gate 10110Sstevel@tonic-gate } else { 10120Sstevel@tonic-gate while ((mp = allocb(size, pri)) == NULL) { 10130Sstevel@tonic-gate if ((*error = strwaitbuf(size, BPRI_HI)) != 0) 10140Sstevel@tonic-gate return (NULL); 10150Sstevel@tonic-gate } 10160Sstevel@tonic-gate } 10170Sstevel@tonic-gate 10180Sstevel@tonic-gate return (mp); 10190Sstevel@tonic-gate } 10200Sstevel@tonic-gate 10210Sstevel@tonic-gate /* 10220Sstevel@tonic-gate * Call function 'func' with 'arg' when a class zero block can 10230Sstevel@tonic-gate * be allocated with priority 'pri'. 10240Sstevel@tonic-gate */ 10250Sstevel@tonic-gate bufcall_id_t 10260Sstevel@tonic-gate esbbcall(uint_t pri, void (*func)(void *), void *arg) 10270Sstevel@tonic-gate { 10280Sstevel@tonic-gate return (bufcall(1, pri, func, arg)); 10290Sstevel@tonic-gate } 10300Sstevel@tonic-gate 10310Sstevel@tonic-gate /* 10320Sstevel@tonic-gate * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials 10330Sstevel@tonic-gate * ioc_id, rval and error of the struct ioctl to set up an ioctl call. 10340Sstevel@tonic-gate * This provides consistency for all internal allocators of ioctl. 10350Sstevel@tonic-gate */ 10360Sstevel@tonic-gate mblk_t * 10370Sstevel@tonic-gate mkiocb(uint_t cmd) 10380Sstevel@tonic-gate { 10390Sstevel@tonic-gate struct iocblk *ioc; 10400Sstevel@tonic-gate mblk_t *mp; 10410Sstevel@tonic-gate 10420Sstevel@tonic-gate /* 10430Sstevel@tonic-gate * Allocate enough space for any of the ioctl related messages. 10440Sstevel@tonic-gate */ 10450Sstevel@tonic-gate if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL) 10460Sstevel@tonic-gate return (NULL); 10470Sstevel@tonic-gate 10480Sstevel@tonic-gate bzero(mp->b_rptr, sizeof (union ioctypes)); 10490Sstevel@tonic-gate 10500Sstevel@tonic-gate /* 10510Sstevel@tonic-gate * Set the mblk_t information and ptrs correctly. 10520Sstevel@tonic-gate */ 10530Sstevel@tonic-gate mp->b_wptr += sizeof (struct iocblk); 10540Sstevel@tonic-gate mp->b_datap->db_type = M_IOCTL; 10550Sstevel@tonic-gate 10560Sstevel@tonic-gate /* 10570Sstevel@tonic-gate * Fill in the fields. 10580Sstevel@tonic-gate */ 10590Sstevel@tonic-gate ioc = (struct iocblk *)mp->b_rptr; 10600Sstevel@tonic-gate ioc->ioc_cmd = cmd; 10610Sstevel@tonic-gate ioc->ioc_cr = kcred; 10620Sstevel@tonic-gate ioc->ioc_id = getiocseqno(); 10630Sstevel@tonic-gate ioc->ioc_flag = IOC_NATIVE; 10640Sstevel@tonic-gate return (mp); 10650Sstevel@tonic-gate } 10660Sstevel@tonic-gate 10670Sstevel@tonic-gate /* 10680Sstevel@tonic-gate * test if block of given size can be allocated with a request of 10690Sstevel@tonic-gate * the given priority. 10700Sstevel@tonic-gate * 'pri' is no longer used, but is retained for compatibility. 10710Sstevel@tonic-gate */ 10720Sstevel@tonic-gate /* ARGSUSED */ 10730Sstevel@tonic-gate int 10740Sstevel@tonic-gate testb(size_t size, uint_t pri) 10750Sstevel@tonic-gate { 10760Sstevel@tonic-gate return ((size + sizeof (dblk_t)) <= kmem_avail()); 10770Sstevel@tonic-gate } 10780Sstevel@tonic-gate 10790Sstevel@tonic-gate /* 10800Sstevel@tonic-gate * Call function 'func' with argument 'arg' when there is a reasonably 10810Sstevel@tonic-gate * good chance that a block of size 'size' can be allocated. 10820Sstevel@tonic-gate * 'pri' is no longer used, but is retained for compatibility. 10830Sstevel@tonic-gate */ 10840Sstevel@tonic-gate /* ARGSUSED */ 10850Sstevel@tonic-gate bufcall_id_t 10860Sstevel@tonic-gate bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg) 10870Sstevel@tonic-gate { 10880Sstevel@tonic-gate static long bid = 1; /* always odd to save checking for zero */ 10890Sstevel@tonic-gate bufcall_id_t bc_id; 10900Sstevel@tonic-gate struct strbufcall *bcp; 10910Sstevel@tonic-gate 10920Sstevel@tonic-gate if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL) 10930Sstevel@tonic-gate return (0); 10940Sstevel@tonic-gate 10950Sstevel@tonic-gate bcp->bc_func = func; 10960Sstevel@tonic-gate bcp->bc_arg = arg; 10970Sstevel@tonic-gate bcp->bc_size = size; 10980Sstevel@tonic-gate bcp->bc_next = NULL; 10990Sstevel@tonic-gate bcp->bc_executor = NULL; 11000Sstevel@tonic-gate 11010Sstevel@tonic-gate mutex_enter(&strbcall_lock); 11020Sstevel@tonic-gate /* 11030Sstevel@tonic-gate * After bcp is linked into strbcalls and strbcall_lock is dropped there 11040Sstevel@tonic-gate * should be no references to bcp since it may be freed by 11050Sstevel@tonic-gate * runbufcalls(). Since bcp_id field is returned, we save its value in 11060Sstevel@tonic-gate * the local var. 11070Sstevel@tonic-gate */ 11080Sstevel@tonic-gate bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2); /* keep it odd */ 11090Sstevel@tonic-gate 11100Sstevel@tonic-gate /* 11110Sstevel@tonic-gate * add newly allocated stream event to existing 11120Sstevel@tonic-gate * linked list of events. 11130Sstevel@tonic-gate */ 11140Sstevel@tonic-gate if (strbcalls.bc_head == NULL) { 11150Sstevel@tonic-gate strbcalls.bc_head = strbcalls.bc_tail = bcp; 11160Sstevel@tonic-gate } else { 11170Sstevel@tonic-gate strbcalls.bc_tail->bc_next = bcp; 11180Sstevel@tonic-gate strbcalls.bc_tail = bcp; 11190Sstevel@tonic-gate } 11200Sstevel@tonic-gate 11210Sstevel@tonic-gate cv_signal(&strbcall_cv); 11220Sstevel@tonic-gate mutex_exit(&strbcall_lock); 11230Sstevel@tonic-gate return (bc_id); 11240Sstevel@tonic-gate } 11250Sstevel@tonic-gate 11260Sstevel@tonic-gate /* 11270Sstevel@tonic-gate * Cancel a bufcall request. 11280Sstevel@tonic-gate */ 11290Sstevel@tonic-gate void 11300Sstevel@tonic-gate unbufcall(bufcall_id_t id) 11310Sstevel@tonic-gate { 11320Sstevel@tonic-gate strbufcall_t *bcp, *pbcp; 11330Sstevel@tonic-gate 11340Sstevel@tonic-gate mutex_enter(&strbcall_lock); 11350Sstevel@tonic-gate again: 11360Sstevel@tonic-gate pbcp = NULL; 11370Sstevel@tonic-gate for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) { 11380Sstevel@tonic-gate if (id == bcp->bc_id) 11390Sstevel@tonic-gate break; 11400Sstevel@tonic-gate pbcp = bcp; 11410Sstevel@tonic-gate } 11420Sstevel@tonic-gate if (bcp) { 11430Sstevel@tonic-gate if (bcp->bc_executor != NULL) { 11440Sstevel@tonic-gate if (bcp->bc_executor != curthread) { 11450Sstevel@tonic-gate cv_wait(&bcall_cv, &strbcall_lock); 11460Sstevel@tonic-gate goto again; 11470Sstevel@tonic-gate } 11480Sstevel@tonic-gate } else { 11490Sstevel@tonic-gate if (pbcp) 11500Sstevel@tonic-gate pbcp->bc_next = bcp->bc_next; 11510Sstevel@tonic-gate else 11520Sstevel@tonic-gate strbcalls.bc_head = bcp->bc_next; 11530Sstevel@tonic-gate if (bcp == strbcalls.bc_tail) 11540Sstevel@tonic-gate strbcalls.bc_tail = pbcp; 11550Sstevel@tonic-gate kmem_free(bcp, sizeof (strbufcall_t)); 11560Sstevel@tonic-gate } 11570Sstevel@tonic-gate } 11580Sstevel@tonic-gate mutex_exit(&strbcall_lock); 11590Sstevel@tonic-gate } 11600Sstevel@tonic-gate 11610Sstevel@tonic-gate /* 11620Sstevel@tonic-gate * Duplicate a message block by block (uses dupb), returning 11630Sstevel@tonic-gate * a pointer to the duplicate message. 11640Sstevel@tonic-gate * Returns a non-NULL value only if the entire message 11650Sstevel@tonic-gate * was dup'd. 11660Sstevel@tonic-gate */ 11670Sstevel@tonic-gate mblk_t * 11680Sstevel@tonic-gate dupmsg(mblk_t *bp) 11690Sstevel@tonic-gate { 11700Sstevel@tonic-gate mblk_t *head, *nbp; 11710Sstevel@tonic-gate 11720Sstevel@tonic-gate if (!bp || !(nbp = head = dupb(bp))) 11730Sstevel@tonic-gate return (NULL); 11740Sstevel@tonic-gate 11750Sstevel@tonic-gate while (bp->b_cont) { 11760Sstevel@tonic-gate if (!(nbp->b_cont = dupb(bp->b_cont))) { 11770Sstevel@tonic-gate freemsg(head); 11780Sstevel@tonic-gate return (NULL); 11790Sstevel@tonic-gate } 11800Sstevel@tonic-gate nbp = nbp->b_cont; 11810Sstevel@tonic-gate bp = bp->b_cont; 11820Sstevel@tonic-gate } 11830Sstevel@tonic-gate return (head); 11840Sstevel@tonic-gate } 11850Sstevel@tonic-gate 11860Sstevel@tonic-gate #define DUPB_NOLOAN(bp) \ 11870Sstevel@tonic-gate ((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \ 11880Sstevel@tonic-gate copyb((bp)) : dupb((bp))) 11890Sstevel@tonic-gate 11900Sstevel@tonic-gate mblk_t * 11910Sstevel@tonic-gate dupmsg_noloan(mblk_t *bp) 11920Sstevel@tonic-gate { 11930Sstevel@tonic-gate mblk_t *head, *nbp; 11940Sstevel@tonic-gate 11950Sstevel@tonic-gate if (bp == NULL || DB_TYPE(bp) != M_DATA || 11960Sstevel@tonic-gate ((nbp = head = DUPB_NOLOAN(bp)) == NULL)) 11970Sstevel@tonic-gate return (NULL); 11980Sstevel@tonic-gate 11990Sstevel@tonic-gate while (bp->b_cont) { 12000Sstevel@tonic-gate if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) { 12010Sstevel@tonic-gate freemsg(head); 12020Sstevel@tonic-gate return (NULL); 12030Sstevel@tonic-gate } 12040Sstevel@tonic-gate nbp = nbp->b_cont; 12050Sstevel@tonic-gate bp = bp->b_cont; 12060Sstevel@tonic-gate } 12070Sstevel@tonic-gate return (head); 12080Sstevel@tonic-gate } 12090Sstevel@tonic-gate 12100Sstevel@tonic-gate /* 12110Sstevel@tonic-gate * Copy data from message and data block to newly allocated message and 12120Sstevel@tonic-gate * data block. Returns new message block pointer, or NULL if error. 12130Sstevel@tonic-gate * The alignment of rptr (w.r.t. word alignment) will be the same in the copy 12140Sstevel@tonic-gate * as in the original even when db_base is not word aligned. (bug 1052877) 12150Sstevel@tonic-gate */ 12160Sstevel@tonic-gate mblk_t * 12170Sstevel@tonic-gate copyb(mblk_t *bp) 12180Sstevel@tonic-gate { 12190Sstevel@tonic-gate mblk_t *nbp; 12200Sstevel@tonic-gate dblk_t *dp, *ndp; 12210Sstevel@tonic-gate uchar_t *base; 12220Sstevel@tonic-gate size_t size; 12230Sstevel@tonic-gate size_t unaligned; 12240Sstevel@tonic-gate 12250Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr); 12260Sstevel@tonic-gate 12270Sstevel@tonic-gate dp = bp->b_datap; 12280Sstevel@tonic-gate if (dp->db_fthdr != NULL) 12290Sstevel@tonic-gate STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0); 12300Sstevel@tonic-gate 12310Sstevel@tonic-gate /* 12320Sstevel@tonic-gate * Special handling for Multidata message; this should be 12330Sstevel@tonic-gate * removed once a copy-callback routine is made available. 12340Sstevel@tonic-gate */ 12350Sstevel@tonic-gate if (dp->db_type == M_MULTIDATA) { 12360Sstevel@tonic-gate cred_t *cr; 12370Sstevel@tonic-gate 12380Sstevel@tonic-gate if ((nbp = mmd_copy(bp, KM_NOSLEEP)) == NULL) 12390Sstevel@tonic-gate return (NULL); 12400Sstevel@tonic-gate 12410Sstevel@tonic-gate nbp->b_flag = bp->b_flag; 12420Sstevel@tonic-gate nbp->b_band = bp->b_band; 12430Sstevel@tonic-gate ndp = nbp->b_datap; 12440Sstevel@tonic-gate 12450Sstevel@tonic-gate /* See comments below on potential issues. */ 12460Sstevel@tonic-gate STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1); 12470Sstevel@tonic-gate 12480Sstevel@tonic-gate ASSERT(ndp->db_type == dp->db_type); 12490Sstevel@tonic-gate cr = dp->db_credp; 12500Sstevel@tonic-gate if (cr != NULL) 12510Sstevel@tonic-gate crhold(ndp->db_credp = cr); 12520Sstevel@tonic-gate ndp->db_cpid = dp->db_cpid; 12530Sstevel@tonic-gate return (nbp); 12540Sstevel@tonic-gate } 12550Sstevel@tonic-gate 12560Sstevel@tonic-gate size = dp->db_lim - dp->db_base; 12570Sstevel@tonic-gate unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t)); 12580Sstevel@tonic-gate if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL) 12590Sstevel@tonic-gate return (NULL); 12600Sstevel@tonic-gate nbp->b_flag = bp->b_flag; 12610Sstevel@tonic-gate nbp->b_band = bp->b_band; 12620Sstevel@tonic-gate ndp = nbp->b_datap; 12630Sstevel@tonic-gate 12640Sstevel@tonic-gate /* 12650Sstevel@tonic-gate * Well, here is a potential issue. If we are trying to 12660Sstevel@tonic-gate * trace a flow, and we copy the message, we might lose 12670Sstevel@tonic-gate * information about where this message might have been. 12680Sstevel@tonic-gate * So we should inherit the FT data. On the other hand, 12690Sstevel@tonic-gate * a user might be interested only in alloc to free data. 12700Sstevel@tonic-gate * So I guess the real answer is to provide a tunable. 12710Sstevel@tonic-gate */ 12720Sstevel@tonic-gate STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1); 12730Sstevel@tonic-gate 12740Sstevel@tonic-gate base = ndp->db_base + unaligned; 12750Sstevel@tonic-gate bcopy(dp->db_base, ndp->db_base + unaligned, size); 12760Sstevel@tonic-gate 12770Sstevel@tonic-gate nbp->b_rptr = base + (bp->b_rptr - dp->db_base); 12780Sstevel@tonic-gate nbp->b_wptr = nbp->b_rptr + MBLKL(bp); 12790Sstevel@tonic-gate 12800Sstevel@tonic-gate return (nbp); 12810Sstevel@tonic-gate } 12820Sstevel@tonic-gate 12830Sstevel@tonic-gate /* 12840Sstevel@tonic-gate * Copy data from message to newly allocated message using new 12850Sstevel@tonic-gate * data blocks. Returns a pointer to the new message, or NULL if error. 12860Sstevel@tonic-gate */ 12870Sstevel@tonic-gate mblk_t * 12880Sstevel@tonic-gate copymsg(mblk_t *bp) 12890Sstevel@tonic-gate { 12900Sstevel@tonic-gate mblk_t *head, *nbp; 12910Sstevel@tonic-gate 12920Sstevel@tonic-gate if (!bp || !(nbp = head = copyb(bp))) 12930Sstevel@tonic-gate return (NULL); 12940Sstevel@tonic-gate 12950Sstevel@tonic-gate while (bp->b_cont) { 12960Sstevel@tonic-gate if (!(nbp->b_cont = copyb(bp->b_cont))) { 12970Sstevel@tonic-gate freemsg(head); 12980Sstevel@tonic-gate return (NULL); 12990Sstevel@tonic-gate } 13000Sstevel@tonic-gate nbp = nbp->b_cont; 13010Sstevel@tonic-gate bp = bp->b_cont; 13020Sstevel@tonic-gate } 13030Sstevel@tonic-gate return (head); 13040Sstevel@tonic-gate } 13050Sstevel@tonic-gate 13060Sstevel@tonic-gate /* 13070Sstevel@tonic-gate * link a message block to tail of message 13080Sstevel@tonic-gate */ 13090Sstevel@tonic-gate void 13100Sstevel@tonic-gate linkb(mblk_t *mp, mblk_t *bp) 13110Sstevel@tonic-gate { 13120Sstevel@tonic-gate ASSERT(mp && bp); 13130Sstevel@tonic-gate 13140Sstevel@tonic-gate for (; mp->b_cont; mp = mp->b_cont) 13150Sstevel@tonic-gate ; 13160Sstevel@tonic-gate mp->b_cont = bp; 13170Sstevel@tonic-gate } 13180Sstevel@tonic-gate 13190Sstevel@tonic-gate /* 13200Sstevel@tonic-gate * unlink a message block from head of message 13210Sstevel@tonic-gate * return pointer to new message. 13220Sstevel@tonic-gate * NULL if message becomes empty. 13230Sstevel@tonic-gate */ 13240Sstevel@tonic-gate mblk_t * 13250Sstevel@tonic-gate unlinkb(mblk_t *bp) 13260Sstevel@tonic-gate { 13270Sstevel@tonic-gate mblk_t *bp1; 13280Sstevel@tonic-gate 13290Sstevel@tonic-gate bp1 = bp->b_cont; 13300Sstevel@tonic-gate bp->b_cont = NULL; 13310Sstevel@tonic-gate return (bp1); 13320Sstevel@tonic-gate } 13330Sstevel@tonic-gate 13340Sstevel@tonic-gate /* 13350Sstevel@tonic-gate * remove a message block "bp" from message "mp" 13360Sstevel@tonic-gate * 13370Sstevel@tonic-gate * Return pointer to new message or NULL if no message remains. 13380Sstevel@tonic-gate * Return -1 if bp is not found in message. 13390Sstevel@tonic-gate */ 13400Sstevel@tonic-gate mblk_t * 13410Sstevel@tonic-gate rmvb(mblk_t *mp, mblk_t *bp) 13420Sstevel@tonic-gate { 13430Sstevel@tonic-gate mblk_t *tmp; 13440Sstevel@tonic-gate mblk_t *lastp = NULL; 13450Sstevel@tonic-gate 13460Sstevel@tonic-gate ASSERT(mp && bp); 13470Sstevel@tonic-gate for (tmp = mp; tmp; tmp = tmp->b_cont) { 13480Sstevel@tonic-gate if (tmp == bp) { 13490Sstevel@tonic-gate if (lastp) 13500Sstevel@tonic-gate lastp->b_cont = tmp->b_cont; 13510Sstevel@tonic-gate else 13520Sstevel@tonic-gate mp = tmp->b_cont; 13530Sstevel@tonic-gate tmp->b_cont = NULL; 13540Sstevel@tonic-gate return (mp); 13550Sstevel@tonic-gate } 13560Sstevel@tonic-gate lastp = tmp; 13570Sstevel@tonic-gate } 13580Sstevel@tonic-gate return ((mblk_t *)-1); 13590Sstevel@tonic-gate } 13600Sstevel@tonic-gate 13610Sstevel@tonic-gate /* 13620Sstevel@tonic-gate * Concatenate and align first len bytes of common 13630Sstevel@tonic-gate * message type. Len == -1, means concat everything. 13640Sstevel@tonic-gate * Returns 1 on success, 0 on failure 13650Sstevel@tonic-gate * After the pullup, mp points to the pulled up data. 13660Sstevel@tonic-gate */ 13670Sstevel@tonic-gate int 13680Sstevel@tonic-gate pullupmsg(mblk_t *mp, ssize_t len) 13690Sstevel@tonic-gate { 13700Sstevel@tonic-gate mblk_t *bp, *b_cont; 13710Sstevel@tonic-gate dblk_t *dbp; 13720Sstevel@tonic-gate ssize_t n; 13730Sstevel@tonic-gate 13740Sstevel@tonic-gate ASSERT(mp->b_datap->db_ref > 0); 13750Sstevel@tonic-gate ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 13760Sstevel@tonic-gate 13770Sstevel@tonic-gate /* 13780Sstevel@tonic-gate * We won't handle Multidata message, since it contains 13790Sstevel@tonic-gate * metadata which this function has no knowledge of; we 13800Sstevel@tonic-gate * assert on DEBUG, and return failure otherwise. 13810Sstevel@tonic-gate */ 13820Sstevel@tonic-gate ASSERT(mp->b_datap->db_type != M_MULTIDATA); 13830Sstevel@tonic-gate if (mp->b_datap->db_type == M_MULTIDATA) 13840Sstevel@tonic-gate return (0); 13850Sstevel@tonic-gate 13860Sstevel@tonic-gate if (len == -1) { 13870Sstevel@tonic-gate if (mp->b_cont == NULL && str_aligned(mp->b_rptr)) 13880Sstevel@tonic-gate return (1); 13890Sstevel@tonic-gate len = xmsgsize(mp); 13900Sstevel@tonic-gate } else { 13910Sstevel@tonic-gate ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr; 13920Sstevel@tonic-gate ASSERT(first_mblk_len >= 0); 13930Sstevel@tonic-gate /* 13940Sstevel@tonic-gate * If the length is less than that of the first mblk, 13950Sstevel@tonic-gate * we want to pull up the message into an aligned mblk. 13960Sstevel@tonic-gate * Though not part of the spec, some callers assume it. 13970Sstevel@tonic-gate */ 13980Sstevel@tonic-gate if (len <= first_mblk_len) { 13990Sstevel@tonic-gate if (str_aligned(mp->b_rptr)) 14000Sstevel@tonic-gate return (1); 14010Sstevel@tonic-gate len = first_mblk_len; 14020Sstevel@tonic-gate } else if (xmsgsize(mp) < len) 14030Sstevel@tonic-gate return (0); 14040Sstevel@tonic-gate } 14050Sstevel@tonic-gate 14060Sstevel@tonic-gate if ((bp = allocb_tmpl(len, mp)) == NULL) 14070Sstevel@tonic-gate return (0); 14080Sstevel@tonic-gate 14090Sstevel@tonic-gate dbp = bp->b_datap; 14100Sstevel@tonic-gate *bp = *mp; /* swap mblks so bp heads the old msg... */ 14110Sstevel@tonic-gate mp->b_datap = dbp; /* ... and mp heads the new message */ 14120Sstevel@tonic-gate mp->b_datap->db_mblk = mp; 14130Sstevel@tonic-gate bp->b_datap->db_mblk = bp; 14140Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base; 14150Sstevel@tonic-gate 14160Sstevel@tonic-gate do { 14170Sstevel@tonic-gate ASSERT(bp->b_datap->db_ref > 0); 14180Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr); 14190Sstevel@tonic-gate n = MIN(bp->b_wptr - bp->b_rptr, len); 14200Sstevel@tonic-gate bcopy(bp->b_rptr, mp->b_wptr, (size_t)n); 14210Sstevel@tonic-gate mp->b_wptr += n; 14220Sstevel@tonic-gate bp->b_rptr += n; 14230Sstevel@tonic-gate len -= n; 14240Sstevel@tonic-gate if (bp->b_rptr != bp->b_wptr) 14250Sstevel@tonic-gate break; 14260Sstevel@tonic-gate b_cont = bp->b_cont; 14270Sstevel@tonic-gate freeb(bp); 14280Sstevel@tonic-gate bp = b_cont; 14290Sstevel@tonic-gate } while (len && bp); 14300Sstevel@tonic-gate 14310Sstevel@tonic-gate mp->b_cont = bp; /* tack on whatever wasn't pulled up */ 14320Sstevel@tonic-gate 14330Sstevel@tonic-gate return (1); 14340Sstevel@tonic-gate } 14350Sstevel@tonic-gate 14360Sstevel@tonic-gate /* 14370Sstevel@tonic-gate * Concatenate and align at least the first len bytes of common message 14380Sstevel@tonic-gate * type. Len == -1 means concatenate everything. The original message is 14390Sstevel@tonic-gate * unaltered. Returns a pointer to a new message on success, otherwise 14400Sstevel@tonic-gate * returns NULL. 14410Sstevel@tonic-gate */ 14420Sstevel@tonic-gate mblk_t * 14430Sstevel@tonic-gate msgpullup(mblk_t *mp, ssize_t len) 14440Sstevel@tonic-gate { 14450Sstevel@tonic-gate mblk_t *newmp; 14460Sstevel@tonic-gate ssize_t totlen; 14470Sstevel@tonic-gate ssize_t n; 14480Sstevel@tonic-gate 14490Sstevel@tonic-gate /* 14500Sstevel@tonic-gate * We won't handle Multidata message, since it contains 14510Sstevel@tonic-gate * metadata which this function has no knowledge of; we 14520Sstevel@tonic-gate * assert on DEBUG, and return failure otherwise. 14530Sstevel@tonic-gate */ 14540Sstevel@tonic-gate ASSERT(mp->b_datap->db_type != M_MULTIDATA); 14550Sstevel@tonic-gate if (mp->b_datap->db_type == M_MULTIDATA) 14560Sstevel@tonic-gate return (NULL); 14570Sstevel@tonic-gate 14580Sstevel@tonic-gate totlen = xmsgsize(mp); 14590Sstevel@tonic-gate 14600Sstevel@tonic-gate if ((len > 0) && (len > totlen)) 14610Sstevel@tonic-gate return (NULL); 14620Sstevel@tonic-gate 14630Sstevel@tonic-gate /* 14640Sstevel@tonic-gate * Copy all of the first msg type into one new mblk, then dupmsg 14650Sstevel@tonic-gate * and link the rest onto this. 14660Sstevel@tonic-gate */ 14670Sstevel@tonic-gate 14680Sstevel@tonic-gate len = totlen; 14690Sstevel@tonic-gate 14700Sstevel@tonic-gate if ((newmp = allocb_tmpl(len, mp)) == NULL) 14710Sstevel@tonic-gate return (NULL); 14720Sstevel@tonic-gate 14730Sstevel@tonic-gate newmp->b_flag = mp->b_flag; 14740Sstevel@tonic-gate newmp->b_band = mp->b_band; 14750Sstevel@tonic-gate 14760Sstevel@tonic-gate while (len > 0) { 14770Sstevel@tonic-gate n = mp->b_wptr - mp->b_rptr; 14780Sstevel@tonic-gate ASSERT(n >= 0); /* allow zero-length mblk_t's */ 14790Sstevel@tonic-gate if (n > 0) 14800Sstevel@tonic-gate bcopy(mp->b_rptr, newmp->b_wptr, n); 14810Sstevel@tonic-gate newmp->b_wptr += n; 14820Sstevel@tonic-gate len -= n; 14830Sstevel@tonic-gate mp = mp->b_cont; 14840Sstevel@tonic-gate } 14850Sstevel@tonic-gate 14860Sstevel@tonic-gate if (mp != NULL) { 14870Sstevel@tonic-gate newmp->b_cont = dupmsg(mp); 14880Sstevel@tonic-gate if (newmp->b_cont == NULL) { 14890Sstevel@tonic-gate freemsg(newmp); 14900Sstevel@tonic-gate return (NULL); 14910Sstevel@tonic-gate } 14920Sstevel@tonic-gate } 14930Sstevel@tonic-gate 14940Sstevel@tonic-gate return (newmp); 14950Sstevel@tonic-gate } 14960Sstevel@tonic-gate 14970Sstevel@tonic-gate /* 14980Sstevel@tonic-gate * Trim bytes from message 14990Sstevel@tonic-gate * len > 0, trim from head 15000Sstevel@tonic-gate * len < 0, trim from tail 15010Sstevel@tonic-gate * Returns 1 on success, 0 on failure. 15020Sstevel@tonic-gate */ 15030Sstevel@tonic-gate int 15040Sstevel@tonic-gate adjmsg(mblk_t *mp, ssize_t len) 15050Sstevel@tonic-gate { 15060Sstevel@tonic-gate mblk_t *bp; 15070Sstevel@tonic-gate mblk_t *save_bp = NULL; 15080Sstevel@tonic-gate mblk_t *prev_bp; 15090Sstevel@tonic-gate mblk_t *bcont; 15100Sstevel@tonic-gate unsigned char type; 15110Sstevel@tonic-gate ssize_t n; 15120Sstevel@tonic-gate int fromhead; 15130Sstevel@tonic-gate int first; 15140Sstevel@tonic-gate 15150Sstevel@tonic-gate ASSERT(mp != NULL); 15160Sstevel@tonic-gate /* 15170Sstevel@tonic-gate * We won't handle Multidata message, since it contains 15180Sstevel@tonic-gate * metadata which this function has no knowledge of; we 15190Sstevel@tonic-gate * assert on DEBUG, and return failure otherwise. 15200Sstevel@tonic-gate */ 15210Sstevel@tonic-gate ASSERT(mp->b_datap->db_type != M_MULTIDATA); 15220Sstevel@tonic-gate if (mp->b_datap->db_type == M_MULTIDATA) 15230Sstevel@tonic-gate return (0); 15240Sstevel@tonic-gate 15250Sstevel@tonic-gate if (len < 0) { 15260Sstevel@tonic-gate fromhead = 0; 15270Sstevel@tonic-gate len = -len; 15280Sstevel@tonic-gate } else { 15290Sstevel@tonic-gate fromhead = 1; 15300Sstevel@tonic-gate } 15310Sstevel@tonic-gate 15320Sstevel@tonic-gate if (xmsgsize(mp) < len) 15330Sstevel@tonic-gate return (0); 15340Sstevel@tonic-gate 15350Sstevel@tonic-gate 15360Sstevel@tonic-gate if (fromhead) { 15370Sstevel@tonic-gate first = 1; 15380Sstevel@tonic-gate while (len) { 15390Sstevel@tonic-gate ASSERT(mp->b_wptr >= mp->b_rptr); 15400Sstevel@tonic-gate n = MIN(mp->b_wptr - mp->b_rptr, len); 15410Sstevel@tonic-gate mp->b_rptr += n; 15420Sstevel@tonic-gate len -= n; 15430Sstevel@tonic-gate 15440Sstevel@tonic-gate /* 15450Sstevel@tonic-gate * If this is not the first zero length 15460Sstevel@tonic-gate * message remove it 15470Sstevel@tonic-gate */ 15480Sstevel@tonic-gate if (!first && (mp->b_wptr == mp->b_rptr)) { 15490Sstevel@tonic-gate bcont = mp->b_cont; 15500Sstevel@tonic-gate freeb(mp); 15510Sstevel@tonic-gate mp = save_bp->b_cont = bcont; 15520Sstevel@tonic-gate } else { 15530Sstevel@tonic-gate save_bp = mp; 15540Sstevel@tonic-gate mp = mp->b_cont; 15550Sstevel@tonic-gate } 15560Sstevel@tonic-gate first = 0; 15570Sstevel@tonic-gate } 15580Sstevel@tonic-gate } else { 15590Sstevel@tonic-gate type = mp->b_datap->db_type; 15600Sstevel@tonic-gate while (len) { 15610Sstevel@tonic-gate bp = mp; 15620Sstevel@tonic-gate save_bp = NULL; 15630Sstevel@tonic-gate 15640Sstevel@tonic-gate /* 15650Sstevel@tonic-gate * Find the last message of same type 15660Sstevel@tonic-gate */ 15670Sstevel@tonic-gate 15680Sstevel@tonic-gate while (bp && bp->b_datap->db_type == type) { 15690Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr); 15700Sstevel@tonic-gate prev_bp = save_bp; 15710Sstevel@tonic-gate save_bp = bp; 15720Sstevel@tonic-gate bp = bp->b_cont; 15730Sstevel@tonic-gate } 15740Sstevel@tonic-gate if (save_bp == NULL) 15750Sstevel@tonic-gate break; 15760Sstevel@tonic-gate n = MIN(save_bp->b_wptr - save_bp->b_rptr, len); 15770Sstevel@tonic-gate save_bp->b_wptr -= n; 15780Sstevel@tonic-gate len -= n; 15790Sstevel@tonic-gate 15800Sstevel@tonic-gate /* 15810Sstevel@tonic-gate * If this is not the first message 15820Sstevel@tonic-gate * and we have taken away everything 15830Sstevel@tonic-gate * from this message, remove it 15840Sstevel@tonic-gate */ 15850Sstevel@tonic-gate 15860Sstevel@tonic-gate if ((save_bp != mp) && 15876707Sbrutus (save_bp->b_wptr == save_bp->b_rptr)) { 15880Sstevel@tonic-gate bcont = save_bp->b_cont; 15890Sstevel@tonic-gate freeb(save_bp); 15900Sstevel@tonic-gate prev_bp->b_cont = bcont; 15910Sstevel@tonic-gate } 15920Sstevel@tonic-gate } 15930Sstevel@tonic-gate } 15940Sstevel@tonic-gate return (1); 15950Sstevel@tonic-gate } 15960Sstevel@tonic-gate 15970Sstevel@tonic-gate /* 15980Sstevel@tonic-gate * get number of data bytes in message 15990Sstevel@tonic-gate */ 16000Sstevel@tonic-gate size_t 16010Sstevel@tonic-gate msgdsize(mblk_t *bp) 16020Sstevel@tonic-gate { 16030Sstevel@tonic-gate size_t count = 0; 16040Sstevel@tonic-gate 16050Sstevel@tonic-gate for (; bp; bp = bp->b_cont) 16060Sstevel@tonic-gate if (bp->b_datap->db_type == M_DATA) { 16070Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr); 16080Sstevel@tonic-gate count += bp->b_wptr - bp->b_rptr; 16090Sstevel@tonic-gate } 16100Sstevel@tonic-gate return (count); 16110Sstevel@tonic-gate } 16120Sstevel@tonic-gate 16130Sstevel@tonic-gate /* 16140Sstevel@tonic-gate * Get a message off head of queue 16150Sstevel@tonic-gate * 16160Sstevel@tonic-gate * If queue has no buffers then mark queue 16170Sstevel@tonic-gate * with QWANTR. (queue wants to be read by 16180Sstevel@tonic-gate * someone when data becomes available) 16190Sstevel@tonic-gate * 16200Sstevel@tonic-gate * If there is something to take off then do so. 16210Sstevel@tonic-gate * If queue falls below hi water mark turn off QFULL 16220Sstevel@tonic-gate * flag. Decrement weighted count of queue. 16230Sstevel@tonic-gate * Also turn off QWANTR because queue is being read. 16240Sstevel@tonic-gate * 16250Sstevel@tonic-gate * The queue count is maintained on a per-band basis. 16260Sstevel@tonic-gate * Priority band 0 (normal messages) uses q_count, 16270Sstevel@tonic-gate * q_lowat, etc. Non-zero priority bands use the 16280Sstevel@tonic-gate * fields in their respective qband structures 16290Sstevel@tonic-gate * (qb_count, qb_lowat, etc.) All messages appear 16300Sstevel@tonic-gate * on the same list, linked via their b_next pointers. 16310Sstevel@tonic-gate * q_first is the head of the list. q_count does 16320Sstevel@tonic-gate * not reflect the size of all the messages on the 16330Sstevel@tonic-gate * queue. It only reflects those messages in the 16340Sstevel@tonic-gate * normal band of flow. The one exception to this 16350Sstevel@tonic-gate * deals with high priority messages. They are in 16360Sstevel@tonic-gate * their own conceptual "band", but are accounted 16370Sstevel@tonic-gate * against q_count. 16380Sstevel@tonic-gate * 16390Sstevel@tonic-gate * If queue count is below the lo water mark and QWANTW 16400Sstevel@tonic-gate * is set, enable the closest backq which has a service 16410Sstevel@tonic-gate * procedure and turn off the QWANTW flag. 16420Sstevel@tonic-gate * 16430Sstevel@tonic-gate * getq could be built on top of rmvq, but isn't because 16440Sstevel@tonic-gate * of performance considerations. 16450Sstevel@tonic-gate * 16460Sstevel@tonic-gate * A note on the use of q_count and q_mblkcnt: 16470Sstevel@tonic-gate * q_count is the traditional byte count for messages that 16480Sstevel@tonic-gate * have been put on a queue. Documentation tells us that 16490Sstevel@tonic-gate * we shouldn't rely on that count, but some drivers/modules 16500Sstevel@tonic-gate * do. What was needed, however, is a mechanism to prevent 16510Sstevel@tonic-gate * runaway streams from consuming all of the resources, 16520Sstevel@tonic-gate * and particularly be able to flow control zero-length 16530Sstevel@tonic-gate * messages. q_mblkcnt is used for this purpose. It 16540Sstevel@tonic-gate * counts the number of mblk's that are being put on 16550Sstevel@tonic-gate * the queue. The intention here, is that each mblk should 16560Sstevel@tonic-gate * contain one byte of data and, for the purpose of 16570Sstevel@tonic-gate * flow-control, logically does. A queue will become 16580Sstevel@tonic-gate * full when EITHER of these values (q_count and q_mblkcnt) 16590Sstevel@tonic-gate * reach the highwater mark. It will clear when BOTH 16600Sstevel@tonic-gate * of them drop below the highwater mark. And it will 16610Sstevel@tonic-gate * backenable when BOTH of them drop below the lowwater 16620Sstevel@tonic-gate * mark. 16630Sstevel@tonic-gate * With this algorithm, a driver/module might be able 16640Sstevel@tonic-gate * to find a reasonably accurate q_count, and the 16650Sstevel@tonic-gate * framework can still try and limit resource usage. 16660Sstevel@tonic-gate */ 16670Sstevel@tonic-gate mblk_t * 16680Sstevel@tonic-gate getq(queue_t *q) 16690Sstevel@tonic-gate { 16700Sstevel@tonic-gate mblk_t *bp; 1671235Smicheng uchar_t band = 0; 16720Sstevel@tonic-gate 1673*6769Sja97890 bp = getq_noenab(q, 0); 16740Sstevel@tonic-gate if (bp != NULL) 16750Sstevel@tonic-gate band = bp->b_band; 16760Sstevel@tonic-gate 16770Sstevel@tonic-gate /* 16780Sstevel@tonic-gate * Inlined from qbackenable(). 16790Sstevel@tonic-gate * Quick check without holding the lock. 16800Sstevel@tonic-gate */ 16810Sstevel@tonic-gate if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0) 16820Sstevel@tonic-gate return (bp); 16830Sstevel@tonic-gate 16840Sstevel@tonic-gate qbackenable(q, band); 16850Sstevel@tonic-gate return (bp); 16860Sstevel@tonic-gate } 16870Sstevel@tonic-gate 16880Sstevel@tonic-gate /* 1689741Smasputra * Calculate number of data bytes in a single data message block taking 1690741Smasputra * multidata messages into account. 1691741Smasputra */ 1692741Smasputra 1693741Smasputra #define ADD_MBLK_SIZE(mp, size) \ 1694741Smasputra if (DB_TYPE(mp) != M_MULTIDATA) { \ 1695741Smasputra (size) += MBLKL(mp); \ 1696741Smasputra } else { \ 1697741Smasputra uint_t pinuse; \ 1698741Smasputra \ 1699741Smasputra mmd_getsize(mmd_getmultidata(mp), NULL, &pinuse); \ 1700741Smasputra (size) += pinuse; \ 1701741Smasputra } 1702741Smasputra 1703741Smasputra /* 1704*6769Sja97890 * Returns the number of bytes in a message (a message is defined as a 1705*6769Sja97890 * chain of mblks linked by b_cont). If a non-NULL mblkcnt is supplied we 1706*6769Sja97890 * also return the number of distinct mblks in the message. 1707*6769Sja97890 */ 1708*6769Sja97890 int 1709*6769Sja97890 mp_cont_len(mblk_t *bp, int *mblkcnt) 1710*6769Sja97890 { 1711*6769Sja97890 mblk_t *mp; 1712*6769Sja97890 int mblks = 0; 1713*6769Sja97890 int bytes = 0; 1714*6769Sja97890 1715*6769Sja97890 for (mp = bp; mp != NULL; mp = mp->b_cont) { 1716*6769Sja97890 ADD_MBLK_SIZE(mp, bytes); 1717*6769Sja97890 mblks++; 1718*6769Sja97890 } 1719*6769Sja97890 1720*6769Sja97890 if (mblkcnt != NULL) 1721*6769Sja97890 *mblkcnt = mblks; 1722*6769Sja97890 1723*6769Sja97890 return (bytes); 1724*6769Sja97890 } 1725*6769Sja97890 1726*6769Sja97890 /* 17270Sstevel@tonic-gate * Like getq() but does not backenable. This is used by the stream 17280Sstevel@tonic-gate * head when a putback() is likely. The caller must call qbackenable() 17290Sstevel@tonic-gate * after it is done with accessing the queue. 1730*6769Sja97890 * The rbytes arguments to getq_noneab() allows callers to specify a 1731*6769Sja97890 * the maximum number of bytes to return. If the current amount on the 1732*6769Sja97890 * queue is less than this then the entire message will be returned. 1733*6769Sja97890 * A value of 0 returns the entire message and is equivalent to the old 1734*6769Sja97890 * default behaviour prior to the addition of the rbytes argument. 17350Sstevel@tonic-gate */ 17360Sstevel@tonic-gate mblk_t * 1737*6769Sja97890 getq_noenab(queue_t *q, ssize_t rbytes) 17380Sstevel@tonic-gate { 1739*6769Sja97890 mblk_t *bp, *mp1; 1740*6769Sja97890 mblk_t *mp2 = NULL; 17410Sstevel@tonic-gate qband_t *qbp; 17420Sstevel@tonic-gate kthread_id_t freezer; 17430Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0; 17440Sstevel@tonic-gate 17450Sstevel@tonic-gate /* freezestr should allow its caller to call getq/putq */ 17460Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 17470Sstevel@tonic-gate if (freezer == curthread) { 17480Sstevel@tonic-gate ASSERT(frozenstr(q)); 17490Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 17500Sstevel@tonic-gate } else 17510Sstevel@tonic-gate mutex_enter(QLOCK(q)); 17520Sstevel@tonic-gate 17530Sstevel@tonic-gate if ((bp = q->q_first) == 0) { 17540Sstevel@tonic-gate q->q_flag |= QWANTR; 17550Sstevel@tonic-gate } else { 1756*6769Sja97890 /* 1757*6769Sja97890 * If the caller supplied a byte threshold and there is 1758*6769Sja97890 * more than this amount on the queue then break up the 1759*6769Sja97890 * the message appropriately. We can only safely do 1760*6769Sja97890 * this for M_DATA messages. 1761*6769Sja97890 */ 1762*6769Sja97890 if ((DB_TYPE(bp) == M_DATA) && (rbytes > 0) && 1763*6769Sja97890 (q->q_count > rbytes)) { 1764*6769Sja97890 /* 1765*6769Sja97890 * Inline version of mp_cont_len() which terminates 1766*6769Sja97890 * when we meet or exceed rbytes. 1767*6769Sja97890 */ 1768*6769Sja97890 for (mp1 = bp; mp1 != NULL; mp1 = mp1->b_cont) { 1769*6769Sja97890 mblkcnt++; 1770*6769Sja97890 ADD_MBLK_SIZE(mp1, bytecnt); 1771*6769Sja97890 if (bytecnt >= rbytes) 1772*6769Sja97890 break; 1773*6769Sja97890 } 1774*6769Sja97890 /* 1775*6769Sja97890 * We need to account for the following scenarios: 1776*6769Sja97890 * 1777*6769Sja97890 * 1) Too much data in the first message: 1778*6769Sja97890 * mp1 will be the mblk which puts us over our 1779*6769Sja97890 * byte limit. 1780*6769Sja97890 * 2) Not enough data in the first message: 1781*6769Sja97890 * mp1 will be NULL. 1782*6769Sja97890 * 3) Exactly the right amount of data contained within 1783*6769Sja97890 * whole mblks: 1784*6769Sja97890 * mp1->b_cont will be where we break the message. 1785*6769Sja97890 */ 1786*6769Sja97890 if (bytecnt > rbytes) { 1787*6769Sja97890 /* 1788*6769Sja97890 * Dup/copy mp1 and put what we don't need 1789*6769Sja97890 * back onto the queue. Adjust the read/write 1790*6769Sja97890 * and continuation pointers appropriately 1791*6769Sja97890 * and decrement the current mblk count to 1792*6769Sja97890 * reflect we are putting an mblk back onto 1793*6769Sja97890 * the queue. 1794*6769Sja97890 * When adjusting the message pointers, it's 1795*6769Sja97890 * OK to use the existing bytecnt and the 1796*6769Sja97890 * requested amount (rbytes) to calculate the 1797*6769Sja97890 * the new write offset (b_wptr) of what we 1798*6769Sja97890 * are taking. However, we cannot use these 1799*6769Sja97890 * values when calculating the read offset of 1800*6769Sja97890 * the mblk we are putting back on the queue. 1801*6769Sja97890 * This is because the begining (b_rptr) of the 1802*6769Sja97890 * mblk represents some arbitrary point within 1803*6769Sja97890 * the message. 1804*6769Sja97890 * It's simplest to do this by advancing b_rptr 1805*6769Sja97890 * by the new length of mp1 as we don't have to 1806*6769Sja97890 * remember any intermediate state. 1807*6769Sja97890 */ 1808*6769Sja97890 ASSERT(mp1 != NULL); 1809*6769Sja97890 mblkcnt--; 1810*6769Sja97890 if ((mp2 = dupb(mp1)) == NULL && 1811*6769Sja97890 (mp2 = copyb(mp1)) == NULL) { 1812*6769Sja97890 bytecnt = mblkcnt = 0; 1813*6769Sja97890 goto dup_failed; 1814*6769Sja97890 } 1815*6769Sja97890 mp2->b_cont = mp1->b_cont; 1816*6769Sja97890 mp1->b_wptr -= bytecnt - rbytes; 1817*6769Sja97890 mp2->b_rptr += mp1->b_wptr - mp1->b_rptr; 1818*6769Sja97890 mp1->b_cont = NULL; 1819*6769Sja97890 bytecnt = rbytes; 1820*6769Sja97890 } else { 1821*6769Sja97890 /* 1822*6769Sja97890 * Either there is not enough data in the first 1823*6769Sja97890 * message or there is no excess data to deal 1824*6769Sja97890 * with. If mp1 is NULL, we are taking the 1825*6769Sja97890 * whole message. No need to do anything. 1826*6769Sja97890 * Otherwise we assign mp1->b_cont to mp2 as 1827*6769Sja97890 * we will be putting this back onto the head of 1828*6769Sja97890 * the queue. 1829*6769Sja97890 */ 1830*6769Sja97890 if (mp1 != NULL) { 1831*6769Sja97890 mp2 = mp1->b_cont; 1832*6769Sja97890 mp1->b_cont = NULL; 1833*6769Sja97890 } 1834*6769Sja97890 } 1835*6769Sja97890 /* 1836*6769Sja97890 * If mp2 is not NULL then we have part of the message 1837*6769Sja97890 * to put back onto the queue. 1838*6769Sja97890 */ 1839*6769Sja97890 if (mp2 != NULL) { 1840*6769Sja97890 if ((mp2->b_next = bp->b_next) == NULL) 1841*6769Sja97890 q->q_last = mp2; 1842*6769Sja97890 else 1843*6769Sja97890 bp->b_next->b_prev = mp2; 1844*6769Sja97890 q->q_first = mp2; 1845*6769Sja97890 } else { 1846*6769Sja97890 if ((q->q_first = bp->b_next) == NULL) 1847*6769Sja97890 q->q_last = NULL; 1848*6769Sja97890 else 1849*6769Sja97890 q->q_first->b_prev = NULL; 1850*6769Sja97890 } 1851*6769Sja97890 } else { 1852*6769Sja97890 /* 1853*6769Sja97890 * Either no byte threshold was supplied, there is 1854*6769Sja97890 * not enough on the queue or we failed to 1855*6769Sja97890 * duplicate/copy a data block. In these cases we 1856*6769Sja97890 * just take the entire first message. 1857*6769Sja97890 */ 1858*6769Sja97890 dup_failed: 1859*6769Sja97890 bytecnt = mp_cont_len(bp, &mblkcnt); 1860*6769Sja97890 if ((q->q_first = bp->b_next) == NULL) 1861*6769Sja97890 q->q_last = NULL; 1862*6769Sja97890 else 1863*6769Sja97890 q->q_first->b_prev = NULL; 18640Sstevel@tonic-gate } 18650Sstevel@tonic-gate if (bp->b_band == 0) { 18660Sstevel@tonic-gate q->q_count -= bytecnt; 18670Sstevel@tonic-gate q->q_mblkcnt -= mblkcnt; 18685360Srk129064 if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) && 18695360Srk129064 (q->q_mblkcnt < q->q_hiwat))) { 18700Sstevel@tonic-gate q->q_flag &= ~QFULL; 18710Sstevel@tonic-gate } 18720Sstevel@tonic-gate } else { 18730Sstevel@tonic-gate int i; 18740Sstevel@tonic-gate 18750Sstevel@tonic-gate ASSERT(bp->b_band <= q->q_nband); 18760Sstevel@tonic-gate ASSERT(q->q_bandp != NULL); 18770Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 18780Sstevel@tonic-gate qbp = q->q_bandp; 18790Sstevel@tonic-gate i = bp->b_band; 18800Sstevel@tonic-gate while (--i > 0) 18810Sstevel@tonic-gate qbp = qbp->qb_next; 18820Sstevel@tonic-gate if (qbp->qb_first == qbp->qb_last) { 18830Sstevel@tonic-gate qbp->qb_first = NULL; 18840Sstevel@tonic-gate qbp->qb_last = NULL; 18850Sstevel@tonic-gate } else { 18860Sstevel@tonic-gate qbp->qb_first = bp->b_next; 18870Sstevel@tonic-gate } 18880Sstevel@tonic-gate qbp->qb_count -= bytecnt; 18890Sstevel@tonic-gate qbp->qb_mblkcnt -= mblkcnt; 18905360Srk129064 if (qbp->qb_mblkcnt == 0 || 18915360Srk129064 ((qbp->qb_count < qbp->qb_hiwat) && 18925360Srk129064 (qbp->qb_mblkcnt < qbp->qb_hiwat))) { 18930Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL; 18940Sstevel@tonic-gate } 18950Sstevel@tonic-gate } 18960Sstevel@tonic-gate q->q_flag &= ~QWANTR; 18970Sstevel@tonic-gate bp->b_next = NULL; 18980Sstevel@tonic-gate bp->b_prev = NULL; 18990Sstevel@tonic-gate } 19000Sstevel@tonic-gate if (freezer != curthread) 19010Sstevel@tonic-gate mutex_exit(QLOCK(q)); 19020Sstevel@tonic-gate 19030Sstevel@tonic-gate STR_FTEVENT_MSG(bp, q, FTEV_GETQ, NULL); 19040Sstevel@tonic-gate 19050Sstevel@tonic-gate return (bp); 19060Sstevel@tonic-gate } 19070Sstevel@tonic-gate 19080Sstevel@tonic-gate /* 19090Sstevel@tonic-gate * Determine if a backenable is needed after removing a message in the 19100Sstevel@tonic-gate * specified band. 19110Sstevel@tonic-gate * NOTE: This routine assumes that something like getq_noenab() has been 19120Sstevel@tonic-gate * already called. 19130Sstevel@tonic-gate * 19140Sstevel@tonic-gate * For the read side it is ok to hold sd_lock across calling this (and the 19150Sstevel@tonic-gate * stream head often does). 19160Sstevel@tonic-gate * But for the write side strwakeq might be invoked and it acquires sd_lock. 19170Sstevel@tonic-gate */ 19180Sstevel@tonic-gate void 1919235Smicheng qbackenable(queue_t *q, uchar_t band) 19200Sstevel@tonic-gate { 19210Sstevel@tonic-gate int backenab = 0; 19220Sstevel@tonic-gate qband_t *qbp; 19230Sstevel@tonic-gate kthread_id_t freezer; 19240Sstevel@tonic-gate 19250Sstevel@tonic-gate ASSERT(q); 19260Sstevel@tonic-gate ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock)); 19270Sstevel@tonic-gate 19280Sstevel@tonic-gate /* 19290Sstevel@tonic-gate * Quick check without holding the lock. 19300Sstevel@tonic-gate * OK since after getq() has lowered the q_count these flags 19310Sstevel@tonic-gate * would not change unless either the qbackenable() is done by 19320Sstevel@tonic-gate * another thread (which is ok) or the queue has gotten QFULL 19330Sstevel@tonic-gate * in which case another backenable will take place when the queue 19340Sstevel@tonic-gate * drops below q_lowat. 19350Sstevel@tonic-gate */ 19360Sstevel@tonic-gate if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0) 19370Sstevel@tonic-gate return; 19380Sstevel@tonic-gate 19390Sstevel@tonic-gate /* freezestr should allow its caller to call getq/putq */ 19400Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 19410Sstevel@tonic-gate if (freezer == curthread) { 19420Sstevel@tonic-gate ASSERT(frozenstr(q)); 19430Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 19440Sstevel@tonic-gate } else 19450Sstevel@tonic-gate mutex_enter(QLOCK(q)); 19460Sstevel@tonic-gate 19470Sstevel@tonic-gate if (band == 0) { 19480Sstevel@tonic-gate if (q->q_lowat == 0 || (q->q_count < q->q_lowat && 19490Sstevel@tonic-gate q->q_mblkcnt < q->q_lowat)) { 19500Sstevel@tonic-gate backenab = q->q_flag & (QWANTW|QWANTWSYNC); 19510Sstevel@tonic-gate } 19520Sstevel@tonic-gate } else { 19530Sstevel@tonic-gate int i; 19540Sstevel@tonic-gate 19550Sstevel@tonic-gate ASSERT((unsigned)band <= q->q_nband); 19560Sstevel@tonic-gate ASSERT(q->q_bandp != NULL); 19570Sstevel@tonic-gate 19580Sstevel@tonic-gate qbp = q->q_bandp; 19590Sstevel@tonic-gate i = band; 19600Sstevel@tonic-gate while (--i > 0) 19610Sstevel@tonic-gate qbp = qbp->qb_next; 19620Sstevel@tonic-gate 19630Sstevel@tonic-gate if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat && 19640Sstevel@tonic-gate qbp->qb_mblkcnt < qbp->qb_lowat)) { 19650Sstevel@tonic-gate backenab = qbp->qb_flag & QB_WANTW; 19660Sstevel@tonic-gate } 19670Sstevel@tonic-gate } 19680Sstevel@tonic-gate 19690Sstevel@tonic-gate if (backenab == 0) { 19700Sstevel@tonic-gate if (freezer != curthread) 19710Sstevel@tonic-gate mutex_exit(QLOCK(q)); 19720Sstevel@tonic-gate return; 19730Sstevel@tonic-gate } 19740Sstevel@tonic-gate 19750Sstevel@tonic-gate /* Have to drop the lock across strwakeq and backenable */ 19760Sstevel@tonic-gate if (backenab & QWANTWSYNC) 19770Sstevel@tonic-gate q->q_flag &= ~QWANTWSYNC; 19780Sstevel@tonic-gate if (backenab & (QWANTW|QB_WANTW)) { 19790Sstevel@tonic-gate if (band != 0) 19800Sstevel@tonic-gate qbp->qb_flag &= ~QB_WANTW; 19810Sstevel@tonic-gate else { 19820Sstevel@tonic-gate q->q_flag &= ~QWANTW; 19830Sstevel@tonic-gate } 19840Sstevel@tonic-gate } 19850Sstevel@tonic-gate 19860Sstevel@tonic-gate if (freezer != curthread) 19870Sstevel@tonic-gate mutex_exit(QLOCK(q)); 19880Sstevel@tonic-gate 19890Sstevel@tonic-gate if (backenab & QWANTWSYNC) 19900Sstevel@tonic-gate strwakeq(q, QWANTWSYNC); 19910Sstevel@tonic-gate if (backenab & (QWANTW|QB_WANTW)) 19920Sstevel@tonic-gate backenable(q, band); 19930Sstevel@tonic-gate } 19940Sstevel@tonic-gate 19950Sstevel@tonic-gate /* 19960Sstevel@tonic-gate * Remove a message from a queue. The queue count and other 19970Sstevel@tonic-gate * flow control parameters are adjusted and the back queue 19980Sstevel@tonic-gate * enabled if necessary. 19990Sstevel@tonic-gate * 20000Sstevel@tonic-gate * rmvq can be called with the stream frozen, but other utility functions 20010Sstevel@tonic-gate * holding QLOCK, and by streams modules without any locks/frozen. 20020Sstevel@tonic-gate */ 20030Sstevel@tonic-gate void 20040Sstevel@tonic-gate rmvq(queue_t *q, mblk_t *mp) 20050Sstevel@tonic-gate { 20060Sstevel@tonic-gate ASSERT(mp != NULL); 20070Sstevel@tonic-gate 20080Sstevel@tonic-gate rmvq_noenab(q, mp); 20090Sstevel@tonic-gate if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) { 20100Sstevel@tonic-gate /* 20110Sstevel@tonic-gate * qbackenable can handle a frozen stream but not a "random" 20120Sstevel@tonic-gate * qlock being held. Drop lock across qbackenable. 20130Sstevel@tonic-gate */ 20140Sstevel@tonic-gate mutex_exit(QLOCK(q)); 20150Sstevel@tonic-gate qbackenable(q, mp->b_band); 20160Sstevel@tonic-gate mutex_enter(QLOCK(q)); 20170Sstevel@tonic-gate } else { 20180Sstevel@tonic-gate qbackenable(q, mp->b_band); 20190Sstevel@tonic-gate } 20200Sstevel@tonic-gate } 20210Sstevel@tonic-gate 20220Sstevel@tonic-gate /* 20230Sstevel@tonic-gate * Like rmvq() but without any backenabling. 20240Sstevel@tonic-gate * This exists to handle SR_CONSOL_DATA in strrput(). 20250Sstevel@tonic-gate */ 20260Sstevel@tonic-gate void 20270Sstevel@tonic-gate rmvq_noenab(queue_t *q, mblk_t *mp) 20280Sstevel@tonic-gate { 20290Sstevel@tonic-gate int i; 20300Sstevel@tonic-gate qband_t *qbp = NULL; 20310Sstevel@tonic-gate kthread_id_t freezer; 20320Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0; 20330Sstevel@tonic-gate 20340Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 20350Sstevel@tonic-gate if (freezer == curthread) { 20360Sstevel@tonic-gate ASSERT(frozenstr(q)); 20370Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 20380Sstevel@tonic-gate } else if (MUTEX_HELD(QLOCK(q))) { 20390Sstevel@tonic-gate /* Don't drop lock on exit */ 20400Sstevel@tonic-gate freezer = curthread; 20410Sstevel@tonic-gate } else 20420Sstevel@tonic-gate mutex_enter(QLOCK(q)); 20430Sstevel@tonic-gate 20440Sstevel@tonic-gate ASSERT(mp->b_band <= q->q_nband); 20450Sstevel@tonic-gate if (mp->b_band != 0) { /* Adjust band pointers */ 20460Sstevel@tonic-gate ASSERT(q->q_bandp != NULL); 20470Sstevel@tonic-gate qbp = q->q_bandp; 20480Sstevel@tonic-gate i = mp->b_band; 20490Sstevel@tonic-gate while (--i > 0) 20500Sstevel@tonic-gate qbp = qbp->qb_next; 20510Sstevel@tonic-gate if (mp == qbp->qb_first) { 20520Sstevel@tonic-gate if (mp->b_next && mp->b_band == mp->b_next->b_band) 20530Sstevel@tonic-gate qbp->qb_first = mp->b_next; 20540Sstevel@tonic-gate else 20550Sstevel@tonic-gate qbp->qb_first = NULL; 20560Sstevel@tonic-gate } 20570Sstevel@tonic-gate if (mp == qbp->qb_last) { 20580Sstevel@tonic-gate if (mp->b_prev && mp->b_band == mp->b_prev->b_band) 20590Sstevel@tonic-gate qbp->qb_last = mp->b_prev; 20600Sstevel@tonic-gate else 20610Sstevel@tonic-gate qbp->qb_last = NULL; 20620Sstevel@tonic-gate } 20630Sstevel@tonic-gate } 20640Sstevel@tonic-gate 20650Sstevel@tonic-gate /* 20660Sstevel@tonic-gate * Remove the message from the list. 20670Sstevel@tonic-gate */ 20680Sstevel@tonic-gate if (mp->b_prev) 20690Sstevel@tonic-gate mp->b_prev->b_next = mp->b_next; 20700Sstevel@tonic-gate else 20710Sstevel@tonic-gate q->q_first = mp->b_next; 20720Sstevel@tonic-gate if (mp->b_next) 20730Sstevel@tonic-gate mp->b_next->b_prev = mp->b_prev; 20740Sstevel@tonic-gate else 20750Sstevel@tonic-gate q->q_last = mp->b_prev; 20760Sstevel@tonic-gate mp->b_next = NULL; 20770Sstevel@tonic-gate mp->b_prev = NULL; 20780Sstevel@tonic-gate 20790Sstevel@tonic-gate /* Get the size of the message for q_count accounting */ 2080*6769Sja97890 bytecnt = mp_cont_len(mp, &mblkcnt); 20810Sstevel@tonic-gate 20820Sstevel@tonic-gate if (mp->b_band == 0) { /* Perform q_count accounting */ 20830Sstevel@tonic-gate q->q_count -= bytecnt; 20840Sstevel@tonic-gate q->q_mblkcnt -= mblkcnt; 20855360Srk129064 if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) && 20865360Srk129064 (q->q_mblkcnt < q->q_hiwat))) { 20870Sstevel@tonic-gate q->q_flag &= ~QFULL; 20880Sstevel@tonic-gate } 20890Sstevel@tonic-gate } else { /* Perform qb_count accounting */ 20900Sstevel@tonic-gate qbp->qb_count -= bytecnt; 20910Sstevel@tonic-gate qbp->qb_mblkcnt -= mblkcnt; 20925360Srk129064 if (qbp->qb_mblkcnt == 0 || ((qbp->qb_count < qbp->qb_hiwat) && 20935360Srk129064 (qbp->qb_mblkcnt < qbp->qb_hiwat))) { 20940Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL; 20950Sstevel@tonic-gate } 20960Sstevel@tonic-gate } 20970Sstevel@tonic-gate if (freezer != curthread) 20980Sstevel@tonic-gate mutex_exit(QLOCK(q)); 20990Sstevel@tonic-gate 21000Sstevel@tonic-gate STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, NULL); 21010Sstevel@tonic-gate } 21020Sstevel@tonic-gate 21030Sstevel@tonic-gate /* 21040Sstevel@tonic-gate * Empty a queue. 21050Sstevel@tonic-gate * If flag is set, remove all messages. Otherwise, remove 21060Sstevel@tonic-gate * only non-control messages. If queue falls below its low 21070Sstevel@tonic-gate * water mark, and QWANTW is set, enable the nearest upstream 21080Sstevel@tonic-gate * service procedure. 21090Sstevel@tonic-gate * 21100Sstevel@tonic-gate * Historical note: when merging the M_FLUSH code in strrput with this 21110Sstevel@tonic-gate * code one difference was discovered. flushq did not have a check 21120Sstevel@tonic-gate * for q_lowat == 0 in the backenabling test. 21130Sstevel@tonic-gate * 21140Sstevel@tonic-gate * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed 21150Sstevel@tonic-gate * if one exists on the queue. 21160Sstevel@tonic-gate */ 21170Sstevel@tonic-gate void 21180Sstevel@tonic-gate flushq_common(queue_t *q, int flag, int pcproto_flag) 21190Sstevel@tonic-gate { 21200Sstevel@tonic-gate mblk_t *mp, *nmp; 21210Sstevel@tonic-gate qband_t *qbp; 21220Sstevel@tonic-gate int backenab = 0; 21230Sstevel@tonic-gate unsigned char bpri; 21240Sstevel@tonic-gate unsigned char qbf[NBAND]; /* band flushing backenable flags */ 21250Sstevel@tonic-gate 21260Sstevel@tonic-gate if (q->q_first == NULL) 21270Sstevel@tonic-gate return; 21280Sstevel@tonic-gate 21290Sstevel@tonic-gate mutex_enter(QLOCK(q)); 21300Sstevel@tonic-gate mp = q->q_first; 21310Sstevel@tonic-gate q->q_first = NULL; 21320Sstevel@tonic-gate q->q_last = NULL; 21330Sstevel@tonic-gate q->q_count = 0; 21340Sstevel@tonic-gate q->q_mblkcnt = 0; 21350Sstevel@tonic-gate for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) { 21360Sstevel@tonic-gate qbp->qb_first = NULL; 21370Sstevel@tonic-gate qbp->qb_last = NULL; 21380Sstevel@tonic-gate qbp->qb_count = 0; 21390Sstevel@tonic-gate qbp->qb_mblkcnt = 0; 21400Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL; 21410Sstevel@tonic-gate } 21420Sstevel@tonic-gate q->q_flag &= ~QFULL; 21430Sstevel@tonic-gate mutex_exit(QLOCK(q)); 21440Sstevel@tonic-gate while (mp) { 21450Sstevel@tonic-gate nmp = mp->b_next; 21460Sstevel@tonic-gate mp->b_next = mp->b_prev = NULL; 21470Sstevel@tonic-gate 21480Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, NULL); 21490Sstevel@tonic-gate 21500Sstevel@tonic-gate if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO)) 21510Sstevel@tonic-gate (void) putq(q, mp); 21520Sstevel@tonic-gate else if (flag || datamsg(mp->b_datap->db_type)) 21530Sstevel@tonic-gate freemsg(mp); 21540Sstevel@tonic-gate else 21550Sstevel@tonic-gate (void) putq(q, mp); 21560Sstevel@tonic-gate mp = nmp; 21570Sstevel@tonic-gate } 21580Sstevel@tonic-gate bpri = 1; 21590Sstevel@tonic-gate mutex_enter(QLOCK(q)); 21600Sstevel@tonic-gate for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) { 21610Sstevel@tonic-gate if ((qbp->qb_flag & QB_WANTW) && 21620Sstevel@tonic-gate (((qbp->qb_count < qbp->qb_lowat) && 21630Sstevel@tonic-gate (qbp->qb_mblkcnt < qbp->qb_lowat)) || 21640Sstevel@tonic-gate qbp->qb_lowat == 0)) { 21650Sstevel@tonic-gate qbp->qb_flag &= ~QB_WANTW; 21660Sstevel@tonic-gate backenab = 1; 21670Sstevel@tonic-gate qbf[bpri] = 1; 21680Sstevel@tonic-gate } else 21690Sstevel@tonic-gate qbf[bpri] = 0; 21700Sstevel@tonic-gate bpri++; 21710Sstevel@tonic-gate } 21720Sstevel@tonic-gate ASSERT(bpri == (unsigned char)(q->q_nband + 1)); 21730Sstevel@tonic-gate if ((q->q_flag & QWANTW) && 21740Sstevel@tonic-gate (((q->q_count < q->q_lowat) && 21750Sstevel@tonic-gate (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) { 21760Sstevel@tonic-gate q->q_flag &= ~QWANTW; 21770Sstevel@tonic-gate backenab = 1; 21780Sstevel@tonic-gate qbf[0] = 1; 21790Sstevel@tonic-gate } else 21800Sstevel@tonic-gate qbf[0] = 0; 21810Sstevel@tonic-gate 21820Sstevel@tonic-gate /* 21830Sstevel@tonic-gate * If any band can now be written to, and there is a writer 21840Sstevel@tonic-gate * for that band, then backenable the closest service procedure. 21850Sstevel@tonic-gate */ 21860Sstevel@tonic-gate if (backenab) { 21870Sstevel@tonic-gate mutex_exit(QLOCK(q)); 21880Sstevel@tonic-gate for (bpri = q->q_nband; bpri != 0; bpri--) 21890Sstevel@tonic-gate if (qbf[bpri]) 2190235Smicheng backenable(q, bpri); 21910Sstevel@tonic-gate if (qbf[0]) 21920Sstevel@tonic-gate backenable(q, 0); 21930Sstevel@tonic-gate } else 21940Sstevel@tonic-gate mutex_exit(QLOCK(q)); 21950Sstevel@tonic-gate } 21960Sstevel@tonic-gate 21970Sstevel@tonic-gate /* 21980Sstevel@tonic-gate * The real flushing takes place in flushq_common. This is done so that 21990Sstevel@tonic-gate * a flag which specifies whether or not M_PCPROTO messages should be flushed 22000Sstevel@tonic-gate * or not. Currently the only place that uses this flag is the stream head. 22010Sstevel@tonic-gate */ 22020Sstevel@tonic-gate void 22030Sstevel@tonic-gate flushq(queue_t *q, int flag) 22040Sstevel@tonic-gate { 22050Sstevel@tonic-gate flushq_common(q, flag, 0); 22060Sstevel@tonic-gate } 22070Sstevel@tonic-gate 22080Sstevel@tonic-gate /* 22090Sstevel@tonic-gate * Flush the queue of messages of the given priority band. 22100Sstevel@tonic-gate * There is some duplication of code between flushq and flushband. 22110Sstevel@tonic-gate * This is because we want to optimize the code as much as possible. 22120Sstevel@tonic-gate * The assumption is that there will be more messages in the normal 22130Sstevel@tonic-gate * (priority 0) band than in any other. 22140Sstevel@tonic-gate * 22150Sstevel@tonic-gate * Historical note: when merging the M_FLUSH code in strrput with this 22160Sstevel@tonic-gate * code one difference was discovered. flushband had an extra check for 22170Sstevel@tonic-gate * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0 22180Sstevel@tonic-gate * case. That check does not match the man page for flushband and was not 22190Sstevel@tonic-gate * in the strrput flush code hence it was removed. 22200Sstevel@tonic-gate */ 22210Sstevel@tonic-gate void 22220Sstevel@tonic-gate flushband(queue_t *q, unsigned char pri, int flag) 22230Sstevel@tonic-gate { 22240Sstevel@tonic-gate mblk_t *mp; 22250Sstevel@tonic-gate mblk_t *nmp; 22260Sstevel@tonic-gate mblk_t *last; 22270Sstevel@tonic-gate qband_t *qbp; 22280Sstevel@tonic-gate int band; 22290Sstevel@tonic-gate 22300Sstevel@tonic-gate ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL)); 22310Sstevel@tonic-gate if (pri > q->q_nband) { 22320Sstevel@tonic-gate return; 22330Sstevel@tonic-gate } 22340Sstevel@tonic-gate mutex_enter(QLOCK(q)); 22350Sstevel@tonic-gate if (pri == 0) { 22360Sstevel@tonic-gate mp = q->q_first; 22370Sstevel@tonic-gate q->q_first = NULL; 22380Sstevel@tonic-gate q->q_last = NULL; 22390Sstevel@tonic-gate q->q_count = 0; 22400Sstevel@tonic-gate q->q_mblkcnt = 0; 22410Sstevel@tonic-gate for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) { 22420Sstevel@tonic-gate qbp->qb_first = NULL; 22430Sstevel@tonic-gate qbp->qb_last = NULL; 22440Sstevel@tonic-gate qbp->qb_count = 0; 22450Sstevel@tonic-gate qbp->qb_mblkcnt = 0; 22460Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL; 22470Sstevel@tonic-gate } 22480Sstevel@tonic-gate q->q_flag &= ~QFULL; 22490Sstevel@tonic-gate mutex_exit(QLOCK(q)); 22500Sstevel@tonic-gate while (mp) { 22510Sstevel@tonic-gate nmp = mp->b_next; 22520Sstevel@tonic-gate mp->b_next = mp->b_prev = NULL; 22530Sstevel@tonic-gate if ((mp->b_band == 0) && 22546707Sbrutus ((flag == FLUSHALL) || 22556707Sbrutus datamsg(mp->b_datap->db_type))) 22560Sstevel@tonic-gate freemsg(mp); 22570Sstevel@tonic-gate else 22580Sstevel@tonic-gate (void) putq(q, mp); 22590Sstevel@tonic-gate mp = nmp; 22600Sstevel@tonic-gate } 22610Sstevel@tonic-gate mutex_enter(QLOCK(q)); 22620Sstevel@tonic-gate if ((q->q_flag & QWANTW) && 22630Sstevel@tonic-gate (((q->q_count < q->q_lowat) && 22640Sstevel@tonic-gate (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) { 22650Sstevel@tonic-gate q->q_flag &= ~QWANTW; 22660Sstevel@tonic-gate mutex_exit(QLOCK(q)); 22670Sstevel@tonic-gate 2268235Smicheng backenable(q, pri); 22690Sstevel@tonic-gate } else 22700Sstevel@tonic-gate mutex_exit(QLOCK(q)); 22710Sstevel@tonic-gate } else { /* pri != 0 */ 22720Sstevel@tonic-gate boolean_t flushed = B_FALSE; 22730Sstevel@tonic-gate band = pri; 22740Sstevel@tonic-gate 22750Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 22760Sstevel@tonic-gate qbp = q->q_bandp; 22770Sstevel@tonic-gate while (--band > 0) 22780Sstevel@tonic-gate qbp = qbp->qb_next; 22790Sstevel@tonic-gate mp = qbp->qb_first; 22800Sstevel@tonic-gate if (mp == NULL) { 22810Sstevel@tonic-gate mutex_exit(QLOCK(q)); 22820Sstevel@tonic-gate return; 22830Sstevel@tonic-gate } 22840Sstevel@tonic-gate last = qbp->qb_last->b_next; 22850Sstevel@tonic-gate /* 22860Sstevel@tonic-gate * rmvq_noenab() and freemsg() are called for each mblk that 22870Sstevel@tonic-gate * meets the criteria. The loop is executed until the last 22880Sstevel@tonic-gate * mblk has been processed. 22890Sstevel@tonic-gate */ 22900Sstevel@tonic-gate while (mp != last) { 22910Sstevel@tonic-gate ASSERT(mp->b_band == pri); 22920Sstevel@tonic-gate nmp = mp->b_next; 22930Sstevel@tonic-gate if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) { 22940Sstevel@tonic-gate rmvq_noenab(q, mp); 22950Sstevel@tonic-gate freemsg(mp); 22960Sstevel@tonic-gate flushed = B_TRUE; 22970Sstevel@tonic-gate } 22980Sstevel@tonic-gate mp = nmp; 22990Sstevel@tonic-gate } 23000Sstevel@tonic-gate mutex_exit(QLOCK(q)); 23010Sstevel@tonic-gate 23020Sstevel@tonic-gate /* 23030Sstevel@tonic-gate * If any mblk(s) has been freed, we know that qbackenable() 23040Sstevel@tonic-gate * will need to be called. 23050Sstevel@tonic-gate */ 23060Sstevel@tonic-gate if (flushed) 2307235Smicheng qbackenable(q, pri); 23080Sstevel@tonic-gate } 23090Sstevel@tonic-gate } 23100Sstevel@tonic-gate 23110Sstevel@tonic-gate /* 23120Sstevel@tonic-gate * Return 1 if the queue is not full. If the queue is full, return 23130Sstevel@tonic-gate * 0 (may not put message) and set QWANTW flag (caller wants to write 23140Sstevel@tonic-gate * to the queue). 23150Sstevel@tonic-gate */ 23160Sstevel@tonic-gate int 23170Sstevel@tonic-gate canput(queue_t *q) 23180Sstevel@tonic-gate { 23190Sstevel@tonic-gate TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q); 23200Sstevel@tonic-gate 23210Sstevel@tonic-gate /* this is for loopback transports, they should not do a canput */ 23220Sstevel@tonic-gate ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv)); 23230Sstevel@tonic-gate 23240Sstevel@tonic-gate /* Find next forward module that has a service procedure */ 23250Sstevel@tonic-gate q = q->q_nfsrv; 23260Sstevel@tonic-gate 23270Sstevel@tonic-gate if (!(q->q_flag & QFULL)) { 23280Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1); 23290Sstevel@tonic-gate return (1); 23300Sstevel@tonic-gate } 23310Sstevel@tonic-gate mutex_enter(QLOCK(q)); 23320Sstevel@tonic-gate if (q->q_flag & QFULL) { 23330Sstevel@tonic-gate q->q_flag |= QWANTW; 23340Sstevel@tonic-gate mutex_exit(QLOCK(q)); 23350Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0); 23360Sstevel@tonic-gate return (0); 23370Sstevel@tonic-gate } 23380Sstevel@tonic-gate mutex_exit(QLOCK(q)); 23390Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1); 23400Sstevel@tonic-gate return (1); 23410Sstevel@tonic-gate } 23420Sstevel@tonic-gate 23430Sstevel@tonic-gate /* 23440Sstevel@tonic-gate * This is the new canput for use with priority bands. Return 1 if the 23450Sstevel@tonic-gate * band is not full. If the band is full, return 0 (may not put message) 23460Sstevel@tonic-gate * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to 23470Sstevel@tonic-gate * write to the queue). 23480Sstevel@tonic-gate */ 23490Sstevel@tonic-gate int 23500Sstevel@tonic-gate bcanput(queue_t *q, unsigned char pri) 23510Sstevel@tonic-gate { 23520Sstevel@tonic-gate qband_t *qbp; 23530Sstevel@tonic-gate 23540Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri); 23550Sstevel@tonic-gate if (!q) 23560Sstevel@tonic-gate return (0); 23570Sstevel@tonic-gate 23580Sstevel@tonic-gate /* Find next forward module that has a service procedure */ 23590Sstevel@tonic-gate q = q->q_nfsrv; 23600Sstevel@tonic-gate 23610Sstevel@tonic-gate mutex_enter(QLOCK(q)); 23620Sstevel@tonic-gate if (pri == 0) { 23630Sstevel@tonic-gate if (q->q_flag & QFULL) { 23640Sstevel@tonic-gate q->q_flag |= QWANTW; 23650Sstevel@tonic-gate mutex_exit(QLOCK(q)); 23660Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 23676707Sbrutus "bcanput:%p %X %d", q, pri, 0); 23680Sstevel@tonic-gate return (0); 23690Sstevel@tonic-gate } 23700Sstevel@tonic-gate } else { /* pri != 0 */ 23710Sstevel@tonic-gate if (pri > q->q_nband) { 23720Sstevel@tonic-gate /* 23730Sstevel@tonic-gate * No band exists yet, so return success. 23740Sstevel@tonic-gate */ 23750Sstevel@tonic-gate mutex_exit(QLOCK(q)); 23760Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 23776707Sbrutus "bcanput:%p %X %d", q, pri, 1); 23780Sstevel@tonic-gate return (1); 23790Sstevel@tonic-gate } 23800Sstevel@tonic-gate qbp = q->q_bandp; 23810Sstevel@tonic-gate while (--pri) 23820Sstevel@tonic-gate qbp = qbp->qb_next; 23830Sstevel@tonic-gate if (qbp->qb_flag & QB_FULL) { 23840Sstevel@tonic-gate qbp->qb_flag |= QB_WANTW; 23850Sstevel@tonic-gate mutex_exit(QLOCK(q)); 23860Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 23876707Sbrutus "bcanput:%p %X %d", q, pri, 0); 23880Sstevel@tonic-gate return (0); 23890Sstevel@tonic-gate } 23900Sstevel@tonic-gate } 23910Sstevel@tonic-gate mutex_exit(QLOCK(q)); 23920Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 23936707Sbrutus "bcanput:%p %X %d", q, pri, 1); 23940Sstevel@tonic-gate return (1); 23950Sstevel@tonic-gate } 23960Sstevel@tonic-gate 23970Sstevel@tonic-gate /* 23980Sstevel@tonic-gate * Put a message on a queue. 23990Sstevel@tonic-gate * 24000Sstevel@tonic-gate * Messages are enqueued on a priority basis. The priority classes 24010Sstevel@tonic-gate * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0), 24020Sstevel@tonic-gate * and B_NORMAL (type < QPCTL && band == 0). 24030Sstevel@tonic-gate * 24040Sstevel@tonic-gate * Add appropriate weighted data block sizes to queue count. 24050Sstevel@tonic-gate * If queue hits high water mark then set QFULL flag. 24060Sstevel@tonic-gate * 24070Sstevel@tonic-gate * If QNOENAB is not set (putq is allowed to enable the queue), 24080Sstevel@tonic-gate * enable the queue only if the message is PRIORITY, 24090Sstevel@tonic-gate * or the QWANTR flag is set (indicating that the service procedure 24100Sstevel@tonic-gate * is ready to read the queue. This implies that a service 24110Sstevel@tonic-gate * procedure must NEVER put a high priority message back on its own 24120Sstevel@tonic-gate * queue, as this would result in an infinite loop (!). 24130Sstevel@tonic-gate */ 24140Sstevel@tonic-gate int 24150Sstevel@tonic-gate putq(queue_t *q, mblk_t *bp) 24160Sstevel@tonic-gate { 24170Sstevel@tonic-gate mblk_t *tmp; 24180Sstevel@tonic-gate qband_t *qbp = NULL; 24190Sstevel@tonic-gate int mcls = (int)queclass(bp); 24200Sstevel@tonic-gate kthread_id_t freezer; 24210Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0; 24220Sstevel@tonic-gate 24230Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 24240Sstevel@tonic-gate if (freezer == curthread) { 24250Sstevel@tonic-gate ASSERT(frozenstr(q)); 24260Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 24270Sstevel@tonic-gate } else 24280Sstevel@tonic-gate mutex_enter(QLOCK(q)); 24290Sstevel@tonic-gate 24300Sstevel@tonic-gate /* 24310Sstevel@tonic-gate * Make sanity checks and if qband structure is not yet 24320Sstevel@tonic-gate * allocated, do so. 24330Sstevel@tonic-gate */ 24340Sstevel@tonic-gate if (mcls == QPCTL) { 24350Sstevel@tonic-gate if (bp->b_band != 0) 24360Sstevel@tonic-gate bp->b_band = 0; /* force to be correct */ 24370Sstevel@tonic-gate } else if (bp->b_band != 0) { 24380Sstevel@tonic-gate int i; 24390Sstevel@tonic-gate qband_t **qbpp; 24400Sstevel@tonic-gate 24410Sstevel@tonic-gate if (bp->b_band > q->q_nband) { 24420Sstevel@tonic-gate 24430Sstevel@tonic-gate /* 24440Sstevel@tonic-gate * The qband structure for this priority band is 24450Sstevel@tonic-gate * not on the queue yet, so we have to allocate 24460Sstevel@tonic-gate * one on the fly. It would be wasteful to 24470Sstevel@tonic-gate * associate the qband structures with every 24480Sstevel@tonic-gate * queue when the queues are allocated. This is 24490Sstevel@tonic-gate * because most queues will only need the normal 24500Sstevel@tonic-gate * band of flow which can be described entirely 24510Sstevel@tonic-gate * by the queue itself. 24520Sstevel@tonic-gate */ 24530Sstevel@tonic-gate qbpp = &q->q_bandp; 24540Sstevel@tonic-gate while (*qbpp) 24550Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 24560Sstevel@tonic-gate while (bp->b_band > q->q_nband) { 24570Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) { 24580Sstevel@tonic-gate if (freezer != curthread) 24590Sstevel@tonic-gate mutex_exit(QLOCK(q)); 24600Sstevel@tonic-gate return (0); 24610Sstevel@tonic-gate } 24620Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat; 24630Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat; 24640Sstevel@tonic-gate q->q_nband++; 24650Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 24660Sstevel@tonic-gate } 24670Sstevel@tonic-gate } 24680Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 24690Sstevel@tonic-gate qbp = q->q_bandp; 24700Sstevel@tonic-gate i = bp->b_band; 24710Sstevel@tonic-gate while (--i) 24720Sstevel@tonic-gate qbp = qbp->qb_next; 24730Sstevel@tonic-gate } 24740Sstevel@tonic-gate 24750Sstevel@tonic-gate /* 24760Sstevel@tonic-gate * If queue is empty, add the message and initialize the pointers. 24770Sstevel@tonic-gate * Otherwise, adjust message pointers and queue pointers based on 24780Sstevel@tonic-gate * the type of the message and where it belongs on the queue. Some 24790Sstevel@tonic-gate * code is duplicated to minimize the number of conditionals and 24800Sstevel@tonic-gate * hopefully minimize the amount of time this routine takes. 24810Sstevel@tonic-gate */ 24820Sstevel@tonic-gate if (!q->q_first) { 24830Sstevel@tonic-gate bp->b_next = NULL; 24840Sstevel@tonic-gate bp->b_prev = NULL; 24850Sstevel@tonic-gate q->q_first = bp; 24860Sstevel@tonic-gate q->q_last = bp; 24870Sstevel@tonic-gate if (qbp) { 24880Sstevel@tonic-gate qbp->qb_first = bp; 24890Sstevel@tonic-gate qbp->qb_last = bp; 24900Sstevel@tonic-gate } 24910Sstevel@tonic-gate } else if (!qbp) { /* bp->b_band == 0 */ 24920Sstevel@tonic-gate 24930Sstevel@tonic-gate /* 24940Sstevel@tonic-gate * If queue class of message is less than or equal to 24950Sstevel@tonic-gate * that of the last one on the queue, tack on to the end. 24960Sstevel@tonic-gate */ 24970Sstevel@tonic-gate tmp = q->q_last; 24980Sstevel@tonic-gate if (mcls <= (int)queclass(tmp)) { 24990Sstevel@tonic-gate bp->b_next = NULL; 25000Sstevel@tonic-gate bp->b_prev = tmp; 25010Sstevel@tonic-gate tmp->b_next = bp; 25020Sstevel@tonic-gate q->q_last = bp; 25030Sstevel@tonic-gate } else { 25040Sstevel@tonic-gate tmp = q->q_first; 25050Sstevel@tonic-gate while ((int)queclass(tmp) >= mcls) 25060Sstevel@tonic-gate tmp = tmp->b_next; 25070Sstevel@tonic-gate 25080Sstevel@tonic-gate /* 25090Sstevel@tonic-gate * Insert bp before tmp. 25100Sstevel@tonic-gate */ 25110Sstevel@tonic-gate bp->b_next = tmp; 25120Sstevel@tonic-gate bp->b_prev = tmp->b_prev; 25130Sstevel@tonic-gate if (tmp->b_prev) 25140Sstevel@tonic-gate tmp->b_prev->b_next = bp; 25150Sstevel@tonic-gate else 25160Sstevel@tonic-gate q->q_first = bp; 25170Sstevel@tonic-gate tmp->b_prev = bp; 25180Sstevel@tonic-gate } 25190Sstevel@tonic-gate } else { /* bp->b_band != 0 */ 25200Sstevel@tonic-gate if (qbp->qb_first) { 25210Sstevel@tonic-gate tmp = qbp->qb_last; 25220Sstevel@tonic-gate 25230Sstevel@tonic-gate /* 25240Sstevel@tonic-gate * Insert bp after the last message in this band. 25250Sstevel@tonic-gate */ 25260Sstevel@tonic-gate bp->b_next = tmp->b_next; 25270Sstevel@tonic-gate if (tmp->b_next) 25280Sstevel@tonic-gate tmp->b_next->b_prev = bp; 25290Sstevel@tonic-gate else 25300Sstevel@tonic-gate q->q_last = bp; 25310Sstevel@tonic-gate bp->b_prev = tmp; 25320Sstevel@tonic-gate tmp->b_next = bp; 25330Sstevel@tonic-gate } else { 25340Sstevel@tonic-gate tmp = q->q_last; 25350Sstevel@tonic-gate if ((mcls < (int)queclass(tmp)) || 25360Sstevel@tonic-gate (bp->b_band <= tmp->b_band)) { 25370Sstevel@tonic-gate 25380Sstevel@tonic-gate /* 25390Sstevel@tonic-gate * Tack bp on end of queue. 25400Sstevel@tonic-gate */ 25410Sstevel@tonic-gate bp->b_next = NULL; 25420Sstevel@tonic-gate bp->b_prev = tmp; 25430Sstevel@tonic-gate tmp->b_next = bp; 25440Sstevel@tonic-gate q->q_last = bp; 25450Sstevel@tonic-gate } else { 25460Sstevel@tonic-gate tmp = q->q_first; 25470Sstevel@tonic-gate while (tmp->b_datap->db_type >= QPCTL) 25480Sstevel@tonic-gate tmp = tmp->b_next; 25490Sstevel@tonic-gate while (tmp->b_band >= bp->b_band) 25500Sstevel@tonic-gate tmp = tmp->b_next; 25510Sstevel@tonic-gate 25520Sstevel@tonic-gate /* 25530Sstevel@tonic-gate * Insert bp before tmp. 25540Sstevel@tonic-gate */ 25550Sstevel@tonic-gate bp->b_next = tmp; 25560Sstevel@tonic-gate bp->b_prev = tmp->b_prev; 25570Sstevel@tonic-gate if (tmp->b_prev) 25580Sstevel@tonic-gate tmp->b_prev->b_next = bp; 25590Sstevel@tonic-gate else 25600Sstevel@tonic-gate q->q_first = bp; 25610Sstevel@tonic-gate tmp->b_prev = bp; 25620Sstevel@tonic-gate } 25630Sstevel@tonic-gate qbp->qb_first = bp; 25640Sstevel@tonic-gate } 25650Sstevel@tonic-gate qbp->qb_last = bp; 25660Sstevel@tonic-gate } 25670Sstevel@tonic-gate 25680Sstevel@tonic-gate /* Get message byte count for q_count accounting */ 2569*6769Sja97890 bytecnt = mp_cont_len(bp, &mblkcnt); 2570741Smasputra 25710Sstevel@tonic-gate if (qbp) { 25720Sstevel@tonic-gate qbp->qb_count += bytecnt; 25730Sstevel@tonic-gate qbp->qb_mblkcnt += mblkcnt; 25740Sstevel@tonic-gate if ((qbp->qb_count >= qbp->qb_hiwat) || 25750Sstevel@tonic-gate (qbp->qb_mblkcnt >= qbp->qb_hiwat)) { 25760Sstevel@tonic-gate qbp->qb_flag |= QB_FULL; 25770Sstevel@tonic-gate } 25780Sstevel@tonic-gate } else { 25790Sstevel@tonic-gate q->q_count += bytecnt; 25800Sstevel@tonic-gate q->q_mblkcnt += mblkcnt; 25810Sstevel@tonic-gate if ((q->q_count >= q->q_hiwat) || 25820Sstevel@tonic-gate (q->q_mblkcnt >= q->q_hiwat)) { 25830Sstevel@tonic-gate q->q_flag |= QFULL; 25840Sstevel@tonic-gate } 25850Sstevel@tonic-gate } 25860Sstevel@tonic-gate 25870Sstevel@tonic-gate STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, NULL); 25880Sstevel@tonic-gate 25890Sstevel@tonic-gate if ((mcls > QNORM) || 25900Sstevel@tonic-gate (canenable(q) && (q->q_flag & QWANTR || bp->b_band))) 25910Sstevel@tonic-gate qenable_locked(q); 25920Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 25930Sstevel@tonic-gate if (freezer != curthread) 25940Sstevel@tonic-gate mutex_exit(QLOCK(q)); 25950Sstevel@tonic-gate 25960Sstevel@tonic-gate return (1); 25970Sstevel@tonic-gate } 25980Sstevel@tonic-gate 25990Sstevel@tonic-gate /* 26000Sstevel@tonic-gate * Put stuff back at beginning of Q according to priority order. 26010Sstevel@tonic-gate * See comment on putq above for details. 26020Sstevel@tonic-gate */ 26030Sstevel@tonic-gate int 26040Sstevel@tonic-gate putbq(queue_t *q, mblk_t *bp) 26050Sstevel@tonic-gate { 26060Sstevel@tonic-gate mblk_t *tmp; 26070Sstevel@tonic-gate qband_t *qbp = NULL; 26080Sstevel@tonic-gate int mcls = (int)queclass(bp); 26090Sstevel@tonic-gate kthread_id_t freezer; 26100Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0; 26110Sstevel@tonic-gate 26120Sstevel@tonic-gate ASSERT(q && bp); 26130Sstevel@tonic-gate ASSERT(bp->b_next == NULL); 26140Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 26150Sstevel@tonic-gate if (freezer == curthread) { 26160Sstevel@tonic-gate ASSERT(frozenstr(q)); 26170Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 26180Sstevel@tonic-gate } else 26190Sstevel@tonic-gate mutex_enter(QLOCK(q)); 26200Sstevel@tonic-gate 26210Sstevel@tonic-gate /* 26220Sstevel@tonic-gate * Make sanity checks and if qband structure is not yet 26230Sstevel@tonic-gate * allocated, do so. 26240Sstevel@tonic-gate */ 26250Sstevel@tonic-gate if (mcls == QPCTL) { 26260Sstevel@tonic-gate if (bp->b_band != 0) 26270Sstevel@tonic-gate bp->b_band = 0; /* force to be correct */ 26280Sstevel@tonic-gate } else if (bp->b_band != 0) { 26290Sstevel@tonic-gate int i; 26300Sstevel@tonic-gate qband_t **qbpp; 26310Sstevel@tonic-gate 26320Sstevel@tonic-gate if (bp->b_band > q->q_nband) { 26330Sstevel@tonic-gate qbpp = &q->q_bandp; 26340Sstevel@tonic-gate while (*qbpp) 26350Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 26360Sstevel@tonic-gate while (bp->b_band > q->q_nband) { 26370Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) { 26380Sstevel@tonic-gate if (freezer != curthread) 26390Sstevel@tonic-gate mutex_exit(QLOCK(q)); 26400Sstevel@tonic-gate return (0); 26410Sstevel@tonic-gate } 26420Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat; 26430Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat; 26440Sstevel@tonic-gate q->q_nband++; 26450Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 26460Sstevel@tonic-gate } 26470Sstevel@tonic-gate } 26480Sstevel@tonic-gate qbp = q->q_bandp; 26490Sstevel@tonic-gate i = bp->b_band; 26500Sstevel@tonic-gate while (--i) 26510Sstevel@tonic-gate qbp = qbp->qb_next; 26520Sstevel@tonic-gate } 26530Sstevel@tonic-gate 26540Sstevel@tonic-gate /* 26550Sstevel@tonic-gate * If queue is empty or if message is high priority, 26560Sstevel@tonic-gate * place on the front of the queue. 26570Sstevel@tonic-gate */ 26580Sstevel@tonic-gate tmp = q->q_first; 26590Sstevel@tonic-gate if ((!tmp) || (mcls == QPCTL)) { 26600Sstevel@tonic-gate bp->b_next = tmp; 26610Sstevel@tonic-gate if (tmp) 26620Sstevel@tonic-gate tmp->b_prev = bp; 26630Sstevel@tonic-gate else 26640Sstevel@tonic-gate q->q_last = bp; 26650Sstevel@tonic-gate q->q_first = bp; 26660Sstevel@tonic-gate bp->b_prev = NULL; 26670Sstevel@tonic-gate if (qbp) { 26680Sstevel@tonic-gate qbp->qb_first = bp; 26690Sstevel@tonic-gate qbp->qb_last = bp; 26700Sstevel@tonic-gate } 26710Sstevel@tonic-gate } else if (qbp) { /* bp->b_band != 0 */ 26720Sstevel@tonic-gate tmp = qbp->qb_first; 26730Sstevel@tonic-gate if (tmp) { 26740Sstevel@tonic-gate 26750Sstevel@tonic-gate /* 26760Sstevel@tonic-gate * Insert bp before the first message in this band. 26770Sstevel@tonic-gate */ 26780Sstevel@tonic-gate bp->b_next = tmp; 26790Sstevel@tonic-gate bp->b_prev = tmp->b_prev; 26800Sstevel@tonic-gate if (tmp->b_prev) 26810Sstevel@tonic-gate tmp->b_prev->b_next = bp; 26820Sstevel@tonic-gate else 26830Sstevel@tonic-gate q->q_first = bp; 26840Sstevel@tonic-gate tmp->b_prev = bp; 26850Sstevel@tonic-gate } else { 26860Sstevel@tonic-gate tmp = q->q_last; 26870Sstevel@tonic-gate if ((mcls < (int)queclass(tmp)) || 26880Sstevel@tonic-gate (bp->b_band < tmp->b_band)) { 26890Sstevel@tonic-gate 26900Sstevel@tonic-gate /* 26910Sstevel@tonic-gate * Tack bp on end of queue. 26920Sstevel@tonic-gate */ 26930Sstevel@tonic-gate bp->b_next = NULL; 26940Sstevel@tonic-gate bp->b_prev = tmp; 26950Sstevel@tonic-gate tmp->b_next = bp; 26960Sstevel@tonic-gate q->q_last = bp; 26970Sstevel@tonic-gate } else { 26980Sstevel@tonic-gate tmp = q->q_first; 26990Sstevel@tonic-gate while (tmp->b_datap->db_type >= QPCTL) 27000Sstevel@tonic-gate tmp = tmp->b_next; 27010Sstevel@tonic-gate while (tmp->b_band > bp->b_band) 27020Sstevel@tonic-gate tmp = tmp->b_next; 27030Sstevel@tonic-gate 27040Sstevel@tonic-gate /* 27050Sstevel@tonic-gate * Insert bp before tmp. 27060Sstevel@tonic-gate */ 27070Sstevel@tonic-gate bp->b_next = tmp; 27080Sstevel@tonic-gate bp->b_prev = tmp->b_prev; 27090Sstevel@tonic-gate if (tmp->b_prev) 27100Sstevel@tonic-gate tmp->b_prev->b_next = bp; 27110Sstevel@tonic-gate else 27120Sstevel@tonic-gate q->q_first = bp; 27130Sstevel@tonic-gate tmp->b_prev = bp; 27140Sstevel@tonic-gate } 27150Sstevel@tonic-gate qbp->qb_last = bp; 27160Sstevel@tonic-gate } 27170Sstevel@tonic-gate qbp->qb_first = bp; 27180Sstevel@tonic-gate } else { /* bp->b_band == 0 && !QPCTL */ 27190Sstevel@tonic-gate 27200Sstevel@tonic-gate /* 27210Sstevel@tonic-gate * If the queue class or band is less than that of the last 27220Sstevel@tonic-gate * message on the queue, tack bp on the end of the queue. 27230Sstevel@tonic-gate */ 27240Sstevel@tonic-gate tmp = q->q_last; 27250Sstevel@tonic-gate if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) { 27260Sstevel@tonic-gate bp->b_next = NULL; 27270Sstevel@tonic-gate bp->b_prev = tmp; 27280Sstevel@tonic-gate tmp->b_next = bp; 27290Sstevel@tonic-gate q->q_last = bp; 27300Sstevel@tonic-gate } else { 27310Sstevel@tonic-gate tmp = q->q_first; 27320Sstevel@tonic-gate while (tmp->b_datap->db_type >= QPCTL) 27330Sstevel@tonic-gate tmp = tmp->b_next; 27340Sstevel@tonic-gate while (tmp->b_band > bp->b_band) 27350Sstevel@tonic-gate tmp = tmp->b_next; 27360Sstevel@tonic-gate 27370Sstevel@tonic-gate /* 27380Sstevel@tonic-gate * Insert bp before tmp. 27390Sstevel@tonic-gate */ 27400Sstevel@tonic-gate bp->b_next = tmp; 27410Sstevel@tonic-gate bp->b_prev = tmp->b_prev; 27420Sstevel@tonic-gate if (tmp->b_prev) 27430Sstevel@tonic-gate tmp->b_prev->b_next = bp; 27440Sstevel@tonic-gate else 27450Sstevel@tonic-gate q->q_first = bp; 27460Sstevel@tonic-gate tmp->b_prev = bp; 27470Sstevel@tonic-gate } 27480Sstevel@tonic-gate } 27490Sstevel@tonic-gate 27500Sstevel@tonic-gate /* Get message byte count for q_count accounting */ 2751*6769Sja97890 bytecnt = mp_cont_len(bp, &mblkcnt); 2752*6769Sja97890 27530Sstevel@tonic-gate if (qbp) { 27540Sstevel@tonic-gate qbp->qb_count += bytecnt; 27550Sstevel@tonic-gate qbp->qb_mblkcnt += mblkcnt; 27560Sstevel@tonic-gate if ((qbp->qb_count >= qbp->qb_hiwat) || 27570Sstevel@tonic-gate (qbp->qb_mblkcnt >= qbp->qb_hiwat)) { 27580Sstevel@tonic-gate qbp->qb_flag |= QB_FULL; 27590Sstevel@tonic-gate } 27600Sstevel@tonic-gate } else { 27610Sstevel@tonic-gate q->q_count += bytecnt; 27620Sstevel@tonic-gate q->q_mblkcnt += mblkcnt; 27630Sstevel@tonic-gate if ((q->q_count >= q->q_hiwat) || 27640Sstevel@tonic-gate (q->q_mblkcnt >= q->q_hiwat)) { 27650Sstevel@tonic-gate q->q_flag |= QFULL; 27660Sstevel@tonic-gate } 27670Sstevel@tonic-gate } 27680Sstevel@tonic-gate 27690Sstevel@tonic-gate STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, NULL); 27700Sstevel@tonic-gate 27710Sstevel@tonic-gate if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR))) 27720Sstevel@tonic-gate qenable_locked(q); 27730Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 27740Sstevel@tonic-gate if (freezer != curthread) 27750Sstevel@tonic-gate mutex_exit(QLOCK(q)); 27760Sstevel@tonic-gate 27770Sstevel@tonic-gate return (1); 27780Sstevel@tonic-gate } 27790Sstevel@tonic-gate 27800Sstevel@tonic-gate /* 27810Sstevel@tonic-gate * Insert a message before an existing message on the queue. If the 27820Sstevel@tonic-gate * existing message is NULL, the new messages is placed on the end of 27830Sstevel@tonic-gate * the queue. The queue class of the new message is ignored. However, 27840Sstevel@tonic-gate * the priority band of the new message must adhere to the following 27850Sstevel@tonic-gate * ordering: 27860Sstevel@tonic-gate * 27870Sstevel@tonic-gate * emp->b_prev->b_band >= mp->b_band >= emp->b_band. 27880Sstevel@tonic-gate * 27890Sstevel@tonic-gate * All flow control parameters are updated. 27900Sstevel@tonic-gate * 27910Sstevel@tonic-gate * insq can be called with the stream frozen, but other utility functions 27920Sstevel@tonic-gate * holding QLOCK, and by streams modules without any locks/frozen. 27930Sstevel@tonic-gate */ 27940Sstevel@tonic-gate int 27950Sstevel@tonic-gate insq(queue_t *q, mblk_t *emp, mblk_t *mp) 27960Sstevel@tonic-gate { 27970Sstevel@tonic-gate mblk_t *tmp; 27980Sstevel@tonic-gate qband_t *qbp = NULL; 27990Sstevel@tonic-gate int mcls = (int)queclass(mp); 28000Sstevel@tonic-gate kthread_id_t freezer; 28010Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0; 28020Sstevel@tonic-gate 28030Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 28040Sstevel@tonic-gate if (freezer == curthread) { 28050Sstevel@tonic-gate ASSERT(frozenstr(q)); 28060Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 28070Sstevel@tonic-gate } else if (MUTEX_HELD(QLOCK(q))) { 28080Sstevel@tonic-gate /* Don't drop lock on exit */ 28090Sstevel@tonic-gate freezer = curthread; 28100Sstevel@tonic-gate } else 28110Sstevel@tonic-gate mutex_enter(QLOCK(q)); 28120Sstevel@tonic-gate 28130Sstevel@tonic-gate if (mcls == QPCTL) { 28140Sstevel@tonic-gate if (mp->b_band != 0) 28150Sstevel@tonic-gate mp->b_band = 0; /* force to be correct */ 28160Sstevel@tonic-gate if (emp && emp->b_prev && 28170Sstevel@tonic-gate (emp->b_prev->b_datap->db_type < QPCTL)) 28180Sstevel@tonic-gate goto badord; 28190Sstevel@tonic-gate } 28200Sstevel@tonic-gate if (emp) { 28210Sstevel@tonic-gate if (((mcls == QNORM) && (mp->b_band < emp->b_band)) || 28220Sstevel@tonic-gate (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) && 28230Sstevel@tonic-gate (emp->b_prev->b_band < mp->b_band))) { 28240Sstevel@tonic-gate goto badord; 28250Sstevel@tonic-gate } 28260Sstevel@tonic-gate } else { 28270Sstevel@tonic-gate tmp = q->q_last; 28280Sstevel@tonic-gate if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) { 28290Sstevel@tonic-gate badord: 28300Sstevel@tonic-gate cmn_err(CE_WARN, 28310Sstevel@tonic-gate "insq: attempt to insert message out of order " 28320Sstevel@tonic-gate "on q %p", (void *)q); 28330Sstevel@tonic-gate if (freezer != curthread) 28340Sstevel@tonic-gate mutex_exit(QLOCK(q)); 28350Sstevel@tonic-gate return (0); 28360Sstevel@tonic-gate } 28370Sstevel@tonic-gate } 28380Sstevel@tonic-gate 28390Sstevel@tonic-gate if (mp->b_band != 0) { 28400Sstevel@tonic-gate int i; 28410Sstevel@tonic-gate qband_t **qbpp; 28420Sstevel@tonic-gate 28430Sstevel@tonic-gate if (mp->b_band > q->q_nband) { 28440Sstevel@tonic-gate qbpp = &q->q_bandp; 28450Sstevel@tonic-gate while (*qbpp) 28460Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 28470Sstevel@tonic-gate while (mp->b_band > q->q_nband) { 28480Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) { 28490Sstevel@tonic-gate if (freezer != curthread) 28500Sstevel@tonic-gate mutex_exit(QLOCK(q)); 28510Sstevel@tonic-gate return (0); 28520Sstevel@tonic-gate } 28530Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat; 28540Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat; 28550Sstevel@tonic-gate q->q_nband++; 28560Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 28570Sstevel@tonic-gate } 28580Sstevel@tonic-gate } 28590Sstevel@tonic-gate qbp = q->q_bandp; 28600Sstevel@tonic-gate i = mp->b_band; 28610Sstevel@tonic-gate while (--i) 28620Sstevel@tonic-gate qbp = qbp->qb_next; 28630Sstevel@tonic-gate } 28640Sstevel@tonic-gate 28650Sstevel@tonic-gate if ((mp->b_next = emp) != NULL) { 28660Sstevel@tonic-gate if ((mp->b_prev = emp->b_prev) != NULL) 28670Sstevel@tonic-gate emp->b_prev->b_next = mp; 28680Sstevel@tonic-gate else 28690Sstevel@tonic-gate q->q_first = mp; 28700Sstevel@tonic-gate emp->b_prev = mp; 28710Sstevel@tonic-gate } else { 28720Sstevel@tonic-gate if ((mp->b_prev = q->q_last) != NULL) 28730Sstevel@tonic-gate q->q_last->b_next = mp; 28740Sstevel@tonic-gate else 28750Sstevel@tonic-gate q->q_first = mp; 28760Sstevel@tonic-gate q->q_last = mp; 28770Sstevel@tonic-gate } 28780Sstevel@tonic-gate 28790Sstevel@tonic-gate /* Get mblk and byte count for q_count accounting */ 2880*6769Sja97890 bytecnt = mp_cont_len(mp, &mblkcnt); 28810Sstevel@tonic-gate 28820Sstevel@tonic-gate if (qbp) { /* adjust qband pointers and count */ 28830Sstevel@tonic-gate if (!qbp->qb_first) { 28840Sstevel@tonic-gate qbp->qb_first = mp; 28850Sstevel@tonic-gate qbp->qb_last = mp; 28860Sstevel@tonic-gate } else { 28870Sstevel@tonic-gate if (mp->b_prev == NULL || (mp->b_prev != NULL && 28880Sstevel@tonic-gate (mp->b_prev->b_band != mp->b_band))) 28890Sstevel@tonic-gate qbp->qb_first = mp; 28900Sstevel@tonic-gate else if (mp->b_next == NULL || (mp->b_next != NULL && 28910Sstevel@tonic-gate (mp->b_next->b_band != mp->b_band))) 28920Sstevel@tonic-gate qbp->qb_last = mp; 28930Sstevel@tonic-gate } 28940Sstevel@tonic-gate qbp->qb_count += bytecnt; 28950Sstevel@tonic-gate qbp->qb_mblkcnt += mblkcnt; 28960Sstevel@tonic-gate if ((qbp->qb_count >= qbp->qb_hiwat) || 28970Sstevel@tonic-gate (qbp->qb_mblkcnt >= qbp->qb_hiwat)) { 28980Sstevel@tonic-gate qbp->qb_flag |= QB_FULL; 28990Sstevel@tonic-gate } 29000Sstevel@tonic-gate } else { 29010Sstevel@tonic-gate q->q_count += bytecnt; 29020Sstevel@tonic-gate q->q_mblkcnt += mblkcnt; 29030Sstevel@tonic-gate if ((q->q_count >= q->q_hiwat) || 29040Sstevel@tonic-gate (q->q_mblkcnt >= q->q_hiwat)) { 29050Sstevel@tonic-gate q->q_flag |= QFULL; 29060Sstevel@tonic-gate } 29070Sstevel@tonic-gate } 29080Sstevel@tonic-gate 29090Sstevel@tonic-gate STR_FTEVENT_MSG(mp, q, FTEV_INSQ, NULL); 29100Sstevel@tonic-gate 29110Sstevel@tonic-gate if (canenable(q) && (q->q_flag & QWANTR)) 29120Sstevel@tonic-gate qenable_locked(q); 29130Sstevel@tonic-gate 29140Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 29150Sstevel@tonic-gate if (freezer != curthread) 29160Sstevel@tonic-gate mutex_exit(QLOCK(q)); 29170Sstevel@tonic-gate 29180Sstevel@tonic-gate return (1); 29190Sstevel@tonic-gate } 29200Sstevel@tonic-gate 29210Sstevel@tonic-gate /* 29220Sstevel@tonic-gate * Create and put a control message on queue. 29230Sstevel@tonic-gate */ 29240Sstevel@tonic-gate int 29250Sstevel@tonic-gate putctl(queue_t *q, int type) 29260Sstevel@tonic-gate { 29270Sstevel@tonic-gate mblk_t *bp; 29280Sstevel@tonic-gate 29290Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) || 29300Sstevel@tonic-gate (bp = allocb_tryhard(0)) == NULL) 29310Sstevel@tonic-gate return (0); 29320Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char) type; 29330Sstevel@tonic-gate 29340Sstevel@tonic-gate put(q, bp); 29350Sstevel@tonic-gate 29360Sstevel@tonic-gate return (1); 29370Sstevel@tonic-gate } 29380Sstevel@tonic-gate 29390Sstevel@tonic-gate /* 29400Sstevel@tonic-gate * Control message with a single-byte parameter 29410Sstevel@tonic-gate */ 29420Sstevel@tonic-gate int 29430Sstevel@tonic-gate putctl1(queue_t *q, int type, int param) 29440Sstevel@tonic-gate { 29450Sstevel@tonic-gate mblk_t *bp; 29460Sstevel@tonic-gate 29470Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) || 29480Sstevel@tonic-gate (bp = allocb_tryhard(1)) == NULL) 29490Sstevel@tonic-gate return (0); 29500Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char)type; 29510Sstevel@tonic-gate *bp->b_wptr++ = (unsigned char)param; 29520Sstevel@tonic-gate 29530Sstevel@tonic-gate put(q, bp); 29540Sstevel@tonic-gate 29550Sstevel@tonic-gate return (1); 29560Sstevel@tonic-gate } 29570Sstevel@tonic-gate 29580Sstevel@tonic-gate int 29590Sstevel@tonic-gate putnextctl1(queue_t *q, int type, int param) 29600Sstevel@tonic-gate { 29610Sstevel@tonic-gate mblk_t *bp; 29620Sstevel@tonic-gate 29630Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) || 29646707Sbrutus ((bp = allocb_tryhard(1)) == NULL)) 29650Sstevel@tonic-gate return (0); 29660Sstevel@tonic-gate 29670Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char)type; 29680Sstevel@tonic-gate *bp->b_wptr++ = (unsigned char)param; 29690Sstevel@tonic-gate 29700Sstevel@tonic-gate putnext(q, bp); 29710Sstevel@tonic-gate 29720Sstevel@tonic-gate return (1); 29730Sstevel@tonic-gate } 29740Sstevel@tonic-gate 29750Sstevel@tonic-gate int 29760Sstevel@tonic-gate putnextctl(queue_t *q, int type) 29770Sstevel@tonic-gate { 29780Sstevel@tonic-gate mblk_t *bp; 29790Sstevel@tonic-gate 29800Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) || 29816707Sbrutus ((bp = allocb_tryhard(0)) == NULL)) 29820Sstevel@tonic-gate return (0); 29830Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char)type; 29840Sstevel@tonic-gate 29850Sstevel@tonic-gate putnext(q, bp); 29860Sstevel@tonic-gate 29870Sstevel@tonic-gate return (1); 29880Sstevel@tonic-gate } 29890Sstevel@tonic-gate 29900Sstevel@tonic-gate /* 29910Sstevel@tonic-gate * Return the queue upstream from this one 29920Sstevel@tonic-gate */ 29930Sstevel@tonic-gate queue_t * 29940Sstevel@tonic-gate backq(queue_t *q) 29950Sstevel@tonic-gate { 29960Sstevel@tonic-gate q = _OTHERQ(q); 29970Sstevel@tonic-gate if (q->q_next) { 29980Sstevel@tonic-gate q = q->q_next; 29990Sstevel@tonic-gate return (_OTHERQ(q)); 30000Sstevel@tonic-gate } 30010Sstevel@tonic-gate return (NULL); 30020Sstevel@tonic-gate } 30030Sstevel@tonic-gate 30040Sstevel@tonic-gate /* 30050Sstevel@tonic-gate * Send a block back up the queue in reverse from this 30060Sstevel@tonic-gate * one (e.g. to respond to ioctls) 30070Sstevel@tonic-gate */ 30080Sstevel@tonic-gate void 30090Sstevel@tonic-gate qreply(queue_t *q, mblk_t *bp) 30100Sstevel@tonic-gate { 30110Sstevel@tonic-gate ASSERT(q && bp); 30120Sstevel@tonic-gate 30130Sstevel@tonic-gate putnext(_OTHERQ(q), bp); 30140Sstevel@tonic-gate } 30150Sstevel@tonic-gate 30160Sstevel@tonic-gate /* 30170Sstevel@tonic-gate * Streams Queue Scheduling 30180Sstevel@tonic-gate * 30190Sstevel@tonic-gate * Queues are enabled through qenable() when they have messages to 30200Sstevel@tonic-gate * process. They are serviced by queuerun(), which runs each enabled 30210Sstevel@tonic-gate * queue's service procedure. The call to queuerun() is processor 30220Sstevel@tonic-gate * dependent - the general principle is that it be run whenever a queue 30230Sstevel@tonic-gate * is enabled but before returning to user level. For system calls, 30240Sstevel@tonic-gate * the function runqueues() is called if their action causes a queue 30250Sstevel@tonic-gate * to be enabled. For device interrupts, queuerun() should be 30260Sstevel@tonic-gate * called before returning from the last level of interrupt. Beyond 30270Sstevel@tonic-gate * this, no timing assumptions should be made about queue scheduling. 30280Sstevel@tonic-gate */ 30290Sstevel@tonic-gate 30300Sstevel@tonic-gate /* 30310Sstevel@tonic-gate * Enable a queue: put it on list of those whose service procedures are 30320Sstevel@tonic-gate * ready to run and set up the scheduling mechanism. 30330Sstevel@tonic-gate * The broadcast is done outside the mutex -> to avoid the woken thread 30340Sstevel@tonic-gate * from contending with the mutex. This is OK 'cos the queue has been 30350Sstevel@tonic-gate * enqueued on the runlist and flagged safely at this point. 30360Sstevel@tonic-gate */ 30370Sstevel@tonic-gate void 30380Sstevel@tonic-gate qenable(queue_t *q) 30390Sstevel@tonic-gate { 30400Sstevel@tonic-gate mutex_enter(QLOCK(q)); 30410Sstevel@tonic-gate qenable_locked(q); 30420Sstevel@tonic-gate mutex_exit(QLOCK(q)); 30430Sstevel@tonic-gate } 30440Sstevel@tonic-gate /* 30450Sstevel@tonic-gate * Return number of messages on queue 30460Sstevel@tonic-gate */ 30470Sstevel@tonic-gate int 30480Sstevel@tonic-gate qsize(queue_t *qp) 30490Sstevel@tonic-gate { 30500Sstevel@tonic-gate int count = 0; 30510Sstevel@tonic-gate mblk_t *mp; 30520Sstevel@tonic-gate 30530Sstevel@tonic-gate mutex_enter(QLOCK(qp)); 30540Sstevel@tonic-gate for (mp = qp->q_first; mp; mp = mp->b_next) 30550Sstevel@tonic-gate count++; 30560Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 30570Sstevel@tonic-gate return (count); 30580Sstevel@tonic-gate } 30590Sstevel@tonic-gate 30600Sstevel@tonic-gate /* 30610Sstevel@tonic-gate * noenable - set queue so that putq() will not enable it. 30620Sstevel@tonic-gate * enableok - set queue so that putq() can enable it. 30630Sstevel@tonic-gate */ 30640Sstevel@tonic-gate void 30650Sstevel@tonic-gate noenable(queue_t *q) 30660Sstevel@tonic-gate { 30670Sstevel@tonic-gate mutex_enter(QLOCK(q)); 30680Sstevel@tonic-gate q->q_flag |= QNOENB; 30690Sstevel@tonic-gate mutex_exit(QLOCK(q)); 30700Sstevel@tonic-gate } 30710Sstevel@tonic-gate 30720Sstevel@tonic-gate void 30730Sstevel@tonic-gate enableok(queue_t *q) 30740Sstevel@tonic-gate { 30750Sstevel@tonic-gate mutex_enter(QLOCK(q)); 30760Sstevel@tonic-gate q->q_flag &= ~QNOENB; 30770Sstevel@tonic-gate mutex_exit(QLOCK(q)); 30780Sstevel@tonic-gate } 30790Sstevel@tonic-gate 30800Sstevel@tonic-gate /* 30810Sstevel@tonic-gate * Set queue fields. 30820Sstevel@tonic-gate */ 30830Sstevel@tonic-gate int 30840Sstevel@tonic-gate strqset(queue_t *q, qfields_t what, unsigned char pri, intptr_t val) 30850Sstevel@tonic-gate { 30860Sstevel@tonic-gate qband_t *qbp = NULL; 30870Sstevel@tonic-gate queue_t *wrq; 30880Sstevel@tonic-gate int error = 0; 30890Sstevel@tonic-gate kthread_id_t freezer; 30900Sstevel@tonic-gate 30910Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 30920Sstevel@tonic-gate if (freezer == curthread) { 30930Sstevel@tonic-gate ASSERT(frozenstr(q)); 30940Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 30950Sstevel@tonic-gate } else 30960Sstevel@tonic-gate mutex_enter(QLOCK(q)); 30970Sstevel@tonic-gate 30980Sstevel@tonic-gate if (what >= QBAD) { 30990Sstevel@tonic-gate error = EINVAL; 31000Sstevel@tonic-gate goto done; 31010Sstevel@tonic-gate } 31020Sstevel@tonic-gate if (pri != 0) { 31030Sstevel@tonic-gate int i; 31040Sstevel@tonic-gate qband_t **qbpp; 31050Sstevel@tonic-gate 31060Sstevel@tonic-gate if (pri > q->q_nband) { 31070Sstevel@tonic-gate qbpp = &q->q_bandp; 31080Sstevel@tonic-gate while (*qbpp) 31090Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 31100Sstevel@tonic-gate while (pri > q->q_nband) { 31110Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) { 31120Sstevel@tonic-gate error = EAGAIN; 31130Sstevel@tonic-gate goto done; 31140Sstevel@tonic-gate } 31150Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat; 31160Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat; 31170Sstevel@tonic-gate q->q_nband++; 31180Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 31190Sstevel@tonic-gate } 31200Sstevel@tonic-gate } 31210Sstevel@tonic-gate qbp = q->q_bandp; 31220Sstevel@tonic-gate i = pri; 31230Sstevel@tonic-gate while (--i) 31240Sstevel@tonic-gate qbp = qbp->qb_next; 31250Sstevel@tonic-gate } 31260Sstevel@tonic-gate switch (what) { 31270Sstevel@tonic-gate 31280Sstevel@tonic-gate case QHIWAT: 31290Sstevel@tonic-gate if (qbp) 31300Sstevel@tonic-gate qbp->qb_hiwat = (size_t)val; 31310Sstevel@tonic-gate else 31320Sstevel@tonic-gate q->q_hiwat = (size_t)val; 31330Sstevel@tonic-gate break; 31340Sstevel@tonic-gate 31350Sstevel@tonic-gate case QLOWAT: 31360Sstevel@tonic-gate if (qbp) 31370Sstevel@tonic-gate qbp->qb_lowat = (size_t)val; 31380Sstevel@tonic-gate else 31390Sstevel@tonic-gate q->q_lowat = (size_t)val; 31400Sstevel@tonic-gate break; 31410Sstevel@tonic-gate 31420Sstevel@tonic-gate case QMAXPSZ: 31430Sstevel@tonic-gate if (qbp) 31440Sstevel@tonic-gate error = EINVAL; 31450Sstevel@tonic-gate else 31460Sstevel@tonic-gate q->q_maxpsz = (ssize_t)val; 31470Sstevel@tonic-gate 31480Sstevel@tonic-gate /* 31490Sstevel@tonic-gate * Performance concern, strwrite looks at the module below 31500Sstevel@tonic-gate * the stream head for the maxpsz each time it does a write 31510Sstevel@tonic-gate * we now cache it at the stream head. Check to see if this 31520Sstevel@tonic-gate * queue is sitting directly below the stream head. 31530Sstevel@tonic-gate */ 31540Sstevel@tonic-gate wrq = STREAM(q)->sd_wrq; 31550Sstevel@tonic-gate if (q != wrq->q_next) 31560Sstevel@tonic-gate break; 31570Sstevel@tonic-gate 31580Sstevel@tonic-gate /* 31590Sstevel@tonic-gate * If the stream is not frozen drop the current QLOCK and 31600Sstevel@tonic-gate * acquire the sd_wrq QLOCK which protects sd_qn_* 31610Sstevel@tonic-gate */ 31620Sstevel@tonic-gate if (freezer != curthread) { 31630Sstevel@tonic-gate mutex_exit(QLOCK(q)); 31640Sstevel@tonic-gate mutex_enter(QLOCK(wrq)); 31650Sstevel@tonic-gate } 31660Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(wrq))); 31670Sstevel@tonic-gate 31680Sstevel@tonic-gate if (strmsgsz != 0) { 31690Sstevel@tonic-gate if (val == INFPSZ) 31700Sstevel@tonic-gate val = strmsgsz; 31710Sstevel@tonic-gate else { 31720Sstevel@tonic-gate if (STREAM(q)->sd_vnode->v_type == VFIFO) 31730Sstevel@tonic-gate val = MIN(PIPE_BUF, val); 31740Sstevel@tonic-gate else 31750Sstevel@tonic-gate val = MIN(strmsgsz, val); 31760Sstevel@tonic-gate } 31770Sstevel@tonic-gate } 31780Sstevel@tonic-gate STREAM(q)->sd_qn_maxpsz = val; 31790Sstevel@tonic-gate if (freezer != curthread) { 31800Sstevel@tonic-gate mutex_exit(QLOCK(wrq)); 31810Sstevel@tonic-gate mutex_enter(QLOCK(q)); 31820Sstevel@tonic-gate } 31830Sstevel@tonic-gate break; 31840Sstevel@tonic-gate 31850Sstevel@tonic-gate case QMINPSZ: 31860Sstevel@tonic-gate if (qbp) 31870Sstevel@tonic-gate error = EINVAL; 31880Sstevel@tonic-gate else 31890Sstevel@tonic-gate q->q_minpsz = (ssize_t)val; 31900Sstevel@tonic-gate 31910Sstevel@tonic-gate /* 31920Sstevel@tonic-gate * Performance concern, strwrite looks at the module below 31930Sstevel@tonic-gate * the stream head for the maxpsz each time it does a write 31940Sstevel@tonic-gate * we now cache it at the stream head. Check to see if this 31950Sstevel@tonic-gate * queue is sitting directly below the stream head. 31960Sstevel@tonic-gate */ 31970Sstevel@tonic-gate wrq = STREAM(q)->sd_wrq; 31980Sstevel@tonic-gate if (q != wrq->q_next) 31990Sstevel@tonic-gate break; 32000Sstevel@tonic-gate 32010Sstevel@tonic-gate /* 32020Sstevel@tonic-gate * If the stream is not frozen drop the current QLOCK and 32030Sstevel@tonic-gate * acquire the sd_wrq QLOCK which protects sd_qn_* 32040Sstevel@tonic-gate */ 32050Sstevel@tonic-gate if (freezer != curthread) { 32060Sstevel@tonic-gate mutex_exit(QLOCK(q)); 32070Sstevel@tonic-gate mutex_enter(QLOCK(wrq)); 32080Sstevel@tonic-gate } 32090Sstevel@tonic-gate STREAM(q)->sd_qn_minpsz = (ssize_t)val; 32100Sstevel@tonic-gate 32110Sstevel@tonic-gate if (freezer != curthread) { 32120Sstevel@tonic-gate mutex_exit(QLOCK(wrq)); 32130Sstevel@tonic-gate mutex_enter(QLOCK(q)); 32140Sstevel@tonic-gate } 32150Sstevel@tonic-gate break; 32160Sstevel@tonic-gate 32170Sstevel@tonic-gate case QSTRUIOT: 32180Sstevel@tonic-gate if (qbp) 32190Sstevel@tonic-gate error = EINVAL; 32200Sstevel@tonic-gate else 32210Sstevel@tonic-gate q->q_struiot = (ushort_t)val; 32220Sstevel@tonic-gate break; 32230Sstevel@tonic-gate 32240Sstevel@tonic-gate case QCOUNT: 32250Sstevel@tonic-gate case QFIRST: 32260Sstevel@tonic-gate case QLAST: 32270Sstevel@tonic-gate case QFLAG: 32280Sstevel@tonic-gate error = EPERM; 32290Sstevel@tonic-gate break; 32300Sstevel@tonic-gate 32310Sstevel@tonic-gate default: 32320Sstevel@tonic-gate error = EINVAL; 32330Sstevel@tonic-gate break; 32340Sstevel@tonic-gate } 32350Sstevel@tonic-gate done: 32360Sstevel@tonic-gate if (freezer != curthread) 32370Sstevel@tonic-gate mutex_exit(QLOCK(q)); 32380Sstevel@tonic-gate return (error); 32390Sstevel@tonic-gate } 32400Sstevel@tonic-gate 32410Sstevel@tonic-gate /* 32420Sstevel@tonic-gate * Get queue fields. 32430Sstevel@tonic-gate */ 32440Sstevel@tonic-gate int 32450Sstevel@tonic-gate strqget(queue_t *q, qfields_t what, unsigned char pri, void *valp) 32460Sstevel@tonic-gate { 32470Sstevel@tonic-gate qband_t *qbp = NULL; 32480Sstevel@tonic-gate int error = 0; 32490Sstevel@tonic-gate kthread_id_t freezer; 32500Sstevel@tonic-gate 32510Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 32520Sstevel@tonic-gate if (freezer == curthread) { 32530Sstevel@tonic-gate ASSERT(frozenstr(q)); 32540Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 32550Sstevel@tonic-gate } else 32560Sstevel@tonic-gate mutex_enter(QLOCK(q)); 32570Sstevel@tonic-gate if (what >= QBAD) { 32580Sstevel@tonic-gate error = EINVAL; 32590Sstevel@tonic-gate goto done; 32600Sstevel@tonic-gate } 32610Sstevel@tonic-gate if (pri != 0) { 32620Sstevel@tonic-gate int i; 32630Sstevel@tonic-gate qband_t **qbpp; 32640Sstevel@tonic-gate 32650Sstevel@tonic-gate if (pri > q->q_nband) { 32660Sstevel@tonic-gate qbpp = &q->q_bandp; 32670Sstevel@tonic-gate while (*qbpp) 32680Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 32690Sstevel@tonic-gate while (pri > q->q_nband) { 32700Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) { 32710Sstevel@tonic-gate error = EAGAIN; 32720Sstevel@tonic-gate goto done; 32730Sstevel@tonic-gate } 32740Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat; 32750Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat; 32760Sstevel@tonic-gate q->q_nband++; 32770Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 32780Sstevel@tonic-gate } 32790Sstevel@tonic-gate } 32800Sstevel@tonic-gate qbp = q->q_bandp; 32810Sstevel@tonic-gate i = pri; 32820Sstevel@tonic-gate while (--i) 32830Sstevel@tonic-gate qbp = qbp->qb_next; 32840Sstevel@tonic-gate } 32850Sstevel@tonic-gate switch (what) { 32860Sstevel@tonic-gate case QHIWAT: 32870Sstevel@tonic-gate if (qbp) 32880Sstevel@tonic-gate *(size_t *)valp = qbp->qb_hiwat; 32890Sstevel@tonic-gate else 32900Sstevel@tonic-gate *(size_t *)valp = q->q_hiwat; 32910Sstevel@tonic-gate break; 32920Sstevel@tonic-gate 32930Sstevel@tonic-gate case QLOWAT: 32940Sstevel@tonic-gate if (qbp) 32950Sstevel@tonic-gate *(size_t *)valp = qbp->qb_lowat; 32960Sstevel@tonic-gate else 32970Sstevel@tonic-gate *(size_t *)valp = q->q_lowat; 32980Sstevel@tonic-gate break; 32990Sstevel@tonic-gate 33000Sstevel@tonic-gate case QMAXPSZ: 33010Sstevel@tonic-gate if (qbp) 33020Sstevel@tonic-gate error = EINVAL; 33030Sstevel@tonic-gate else 33040Sstevel@tonic-gate *(ssize_t *)valp = q->q_maxpsz; 33050Sstevel@tonic-gate break; 33060Sstevel@tonic-gate 33070Sstevel@tonic-gate case QMINPSZ: 33080Sstevel@tonic-gate if (qbp) 33090Sstevel@tonic-gate error = EINVAL; 33100Sstevel@tonic-gate else 33110Sstevel@tonic-gate *(ssize_t *)valp = q->q_minpsz; 33120Sstevel@tonic-gate break; 33130Sstevel@tonic-gate 33140Sstevel@tonic-gate case QCOUNT: 33150Sstevel@tonic-gate if (qbp) 33160Sstevel@tonic-gate *(size_t *)valp = qbp->qb_count; 33170Sstevel@tonic-gate else 33180Sstevel@tonic-gate *(size_t *)valp = q->q_count; 33190Sstevel@tonic-gate break; 33200Sstevel@tonic-gate 33210Sstevel@tonic-gate case QFIRST: 33220Sstevel@tonic-gate if (qbp) 33230Sstevel@tonic-gate *(mblk_t **)valp = qbp->qb_first; 33240Sstevel@tonic-gate else 33250Sstevel@tonic-gate *(mblk_t **)valp = q->q_first; 33260Sstevel@tonic-gate break; 33270Sstevel@tonic-gate 33280Sstevel@tonic-gate case QLAST: 33290Sstevel@tonic-gate if (qbp) 33300Sstevel@tonic-gate *(mblk_t **)valp = qbp->qb_last; 33310Sstevel@tonic-gate else 33320Sstevel@tonic-gate *(mblk_t **)valp = q->q_last; 33330Sstevel@tonic-gate break; 33340Sstevel@tonic-gate 33350Sstevel@tonic-gate case QFLAG: 33360Sstevel@tonic-gate if (qbp) 33370Sstevel@tonic-gate *(uint_t *)valp = qbp->qb_flag; 33380Sstevel@tonic-gate else 33390Sstevel@tonic-gate *(uint_t *)valp = q->q_flag; 33400Sstevel@tonic-gate break; 33410Sstevel@tonic-gate 33420Sstevel@tonic-gate case QSTRUIOT: 33430Sstevel@tonic-gate if (qbp) 33440Sstevel@tonic-gate error = EINVAL; 33450Sstevel@tonic-gate else 33460Sstevel@tonic-gate *(short *)valp = q->q_struiot; 33470Sstevel@tonic-gate break; 33480Sstevel@tonic-gate 33490Sstevel@tonic-gate default: 33500Sstevel@tonic-gate error = EINVAL; 33510Sstevel@tonic-gate break; 33520Sstevel@tonic-gate } 33530Sstevel@tonic-gate done: 33540Sstevel@tonic-gate if (freezer != curthread) 33550Sstevel@tonic-gate mutex_exit(QLOCK(q)); 33560Sstevel@tonic-gate return (error); 33570Sstevel@tonic-gate } 33580Sstevel@tonic-gate 33590Sstevel@tonic-gate /* 33600Sstevel@tonic-gate * Function awakes all in cvwait/sigwait/pollwait, on one of: 33610Sstevel@tonic-gate * QWANTWSYNC or QWANTR or QWANTW, 33620Sstevel@tonic-gate * 33630Sstevel@tonic-gate * Note: for QWANTWSYNC/QWANTW and QWANTR, if no WSLEEPer or RSLEEPer then a 33640Sstevel@tonic-gate * deferred wakeup will be done. Also if strpoll() in progress then a 33650Sstevel@tonic-gate * deferred pollwakeup will be done. 33660Sstevel@tonic-gate */ 33670Sstevel@tonic-gate void 33680Sstevel@tonic-gate strwakeq(queue_t *q, int flag) 33690Sstevel@tonic-gate { 33700Sstevel@tonic-gate stdata_t *stp = STREAM(q); 33710Sstevel@tonic-gate pollhead_t *pl; 33720Sstevel@tonic-gate 33730Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 33740Sstevel@tonic-gate pl = &stp->sd_pollist; 33750Sstevel@tonic-gate if (flag & QWANTWSYNC) { 33760Sstevel@tonic-gate ASSERT(!(q->q_flag & QREADR)); 33770Sstevel@tonic-gate if (stp->sd_flag & WSLEEP) { 33780Sstevel@tonic-gate stp->sd_flag &= ~WSLEEP; 33790Sstevel@tonic-gate cv_broadcast(&stp->sd_wrq->q_wait); 33800Sstevel@tonic-gate } else { 33810Sstevel@tonic-gate stp->sd_wakeq |= WSLEEP; 33820Sstevel@tonic-gate } 33830Sstevel@tonic-gate 33840Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 33850Sstevel@tonic-gate pollwakeup(pl, POLLWRNORM); 33860Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 33870Sstevel@tonic-gate 33880Sstevel@tonic-gate if (stp->sd_sigflags & S_WRNORM) 33890Sstevel@tonic-gate strsendsig(stp->sd_siglist, S_WRNORM, 0, 0); 33900Sstevel@tonic-gate } else if (flag & QWANTR) { 33910Sstevel@tonic-gate if (stp->sd_flag & RSLEEP) { 33920Sstevel@tonic-gate stp->sd_flag &= ~RSLEEP; 33930Sstevel@tonic-gate cv_broadcast(&_RD(stp->sd_wrq)->q_wait); 33940Sstevel@tonic-gate } else { 33950Sstevel@tonic-gate stp->sd_wakeq |= RSLEEP; 33960Sstevel@tonic-gate } 33970Sstevel@tonic-gate 33980Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 33990Sstevel@tonic-gate pollwakeup(pl, POLLIN | POLLRDNORM); 34000Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 34010Sstevel@tonic-gate 34020Sstevel@tonic-gate { 34030Sstevel@tonic-gate int events = stp->sd_sigflags & (S_INPUT | S_RDNORM); 34040Sstevel@tonic-gate 34050Sstevel@tonic-gate if (events) 34060Sstevel@tonic-gate strsendsig(stp->sd_siglist, events, 0, 0); 34070Sstevel@tonic-gate } 34080Sstevel@tonic-gate } else { 34090Sstevel@tonic-gate if (stp->sd_flag & WSLEEP) { 34100Sstevel@tonic-gate stp->sd_flag &= ~WSLEEP; 34110Sstevel@tonic-gate cv_broadcast(&stp->sd_wrq->q_wait); 34120Sstevel@tonic-gate } 34130Sstevel@tonic-gate 34140Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 34150Sstevel@tonic-gate pollwakeup(pl, POLLWRNORM); 34160Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 34170Sstevel@tonic-gate 34180Sstevel@tonic-gate if (stp->sd_sigflags & S_WRNORM) 34190Sstevel@tonic-gate strsendsig(stp->sd_siglist, S_WRNORM, 0, 0); 34200Sstevel@tonic-gate } 34210Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 34220Sstevel@tonic-gate } 34230Sstevel@tonic-gate 34240Sstevel@tonic-gate int 34250Sstevel@tonic-gate struioget(queue_t *q, mblk_t *mp, struiod_t *dp, int noblock) 34260Sstevel@tonic-gate { 34270Sstevel@tonic-gate stdata_t *stp = STREAM(q); 34280Sstevel@tonic-gate int typ = STRUIOT_STANDARD; 34290Sstevel@tonic-gate uio_t *uiop = &dp->d_uio; 34300Sstevel@tonic-gate dblk_t *dbp; 34310Sstevel@tonic-gate ssize_t uiocnt; 34320Sstevel@tonic-gate ssize_t cnt; 34330Sstevel@tonic-gate unsigned char *ptr; 34340Sstevel@tonic-gate ssize_t resid; 34350Sstevel@tonic-gate int error = 0; 34360Sstevel@tonic-gate on_trap_data_t otd; 34370Sstevel@tonic-gate queue_t *stwrq; 34380Sstevel@tonic-gate 34390Sstevel@tonic-gate /* 34400Sstevel@tonic-gate * Plumbing may change while taking the type so store the 34410Sstevel@tonic-gate * queue in a temporary variable. It doesn't matter even 34420Sstevel@tonic-gate * if the we take the type from the previous plumbing, 34430Sstevel@tonic-gate * that's because if the plumbing has changed when we were 34440Sstevel@tonic-gate * holding the queue in a temporary variable, we can continue 34450Sstevel@tonic-gate * processing the message the way it would have been processed 34460Sstevel@tonic-gate * in the old plumbing, without any side effects but a bit 34470Sstevel@tonic-gate * extra processing for partial ip header checksum. 34480Sstevel@tonic-gate * 34490Sstevel@tonic-gate * This has been done to avoid holding the sd_lock which is 34500Sstevel@tonic-gate * very hot. 34510Sstevel@tonic-gate */ 34520Sstevel@tonic-gate 34530Sstevel@tonic-gate stwrq = stp->sd_struiowrq; 34540Sstevel@tonic-gate if (stwrq) 34550Sstevel@tonic-gate typ = stwrq->q_struiot; 34560Sstevel@tonic-gate 34570Sstevel@tonic-gate for (; (resid = uiop->uio_resid) > 0 && mp; mp = mp->b_cont) { 34580Sstevel@tonic-gate dbp = mp->b_datap; 34590Sstevel@tonic-gate ptr = (uchar_t *)(mp->b_rptr + dbp->db_cksumstuff); 34600Sstevel@tonic-gate uiocnt = dbp->db_cksumend - dbp->db_cksumstuff; 34610Sstevel@tonic-gate cnt = MIN(uiocnt, uiop->uio_resid); 34620Sstevel@tonic-gate if (!(dbp->db_struioflag & STRUIO_SPEC) || 34630Sstevel@tonic-gate (dbp->db_struioflag & STRUIO_DONE) || cnt == 0) { 34640Sstevel@tonic-gate /* 34650Sstevel@tonic-gate * Either this mblk has already been processed 34660Sstevel@tonic-gate * or there is no more room in this mblk (?). 34670Sstevel@tonic-gate */ 34680Sstevel@tonic-gate continue; 34690Sstevel@tonic-gate } 34700Sstevel@tonic-gate switch (typ) { 34710Sstevel@tonic-gate case STRUIOT_STANDARD: 34720Sstevel@tonic-gate if (noblock) { 34730Sstevel@tonic-gate if (on_trap(&otd, OT_DATA_ACCESS)) { 34740Sstevel@tonic-gate no_trap(); 34750Sstevel@tonic-gate error = EWOULDBLOCK; 34760Sstevel@tonic-gate goto out; 34770Sstevel@tonic-gate } 34780Sstevel@tonic-gate } 34790Sstevel@tonic-gate if (error = uiomove(ptr, cnt, UIO_WRITE, uiop)) { 34800Sstevel@tonic-gate if (noblock) 34810Sstevel@tonic-gate no_trap(); 34820Sstevel@tonic-gate goto out; 34830Sstevel@tonic-gate } 34840Sstevel@tonic-gate if (noblock) 34850Sstevel@tonic-gate no_trap(); 34860Sstevel@tonic-gate break; 34870Sstevel@tonic-gate 34880Sstevel@tonic-gate default: 34890Sstevel@tonic-gate error = EIO; 34900Sstevel@tonic-gate goto out; 34910Sstevel@tonic-gate } 34920Sstevel@tonic-gate dbp->db_struioflag |= STRUIO_DONE; 34930Sstevel@tonic-gate dbp->db_cksumstuff += cnt; 34940Sstevel@tonic-gate } 34950Sstevel@tonic-gate out: 34960Sstevel@tonic-gate if (error == EWOULDBLOCK && (resid -= uiop->uio_resid) > 0) { 34970Sstevel@tonic-gate /* 34980Sstevel@tonic-gate * A fault has occured and some bytes were moved to the 34990Sstevel@tonic-gate * current mblk, the uio_t has already been updated by 35000Sstevel@tonic-gate * the appropriate uio routine, so also update the mblk 35010Sstevel@tonic-gate * to reflect this in case this same mblk chain is used 35020Sstevel@tonic-gate * again (after the fault has been handled). 35030Sstevel@tonic-gate */ 35040Sstevel@tonic-gate uiocnt = dbp->db_cksumend - dbp->db_cksumstuff; 35050Sstevel@tonic-gate if (uiocnt >= resid) 35060Sstevel@tonic-gate dbp->db_cksumstuff += resid; 35070Sstevel@tonic-gate } 35080Sstevel@tonic-gate return (error); 35090Sstevel@tonic-gate } 35100Sstevel@tonic-gate 35110Sstevel@tonic-gate /* 35120Sstevel@tonic-gate * Try to enter queue synchronously. Any attempt to enter a closing queue will 35130Sstevel@tonic-gate * fails. The qp->q_rwcnt keeps track of the number of successful entries so 35140Sstevel@tonic-gate * that removeq() will not try to close the queue while a thread is inside the 35150Sstevel@tonic-gate * queue. 35160Sstevel@tonic-gate */ 35170Sstevel@tonic-gate static boolean_t 35180Sstevel@tonic-gate rwnext_enter(queue_t *qp) 35190Sstevel@tonic-gate { 35200Sstevel@tonic-gate mutex_enter(QLOCK(qp)); 35210Sstevel@tonic-gate if (qp->q_flag & QWCLOSE) { 35220Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 35230Sstevel@tonic-gate return (B_FALSE); 35240Sstevel@tonic-gate } 35250Sstevel@tonic-gate qp->q_rwcnt++; 35260Sstevel@tonic-gate ASSERT(qp->q_rwcnt != 0); 35270Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 35280Sstevel@tonic-gate return (B_TRUE); 35290Sstevel@tonic-gate } 35300Sstevel@tonic-gate 35310Sstevel@tonic-gate /* 35320Sstevel@tonic-gate * Decrease the count of threads running in sync stream queue and wake up any 35330Sstevel@tonic-gate * threads blocked in removeq(). 35340Sstevel@tonic-gate */ 35350Sstevel@tonic-gate static void 35360Sstevel@tonic-gate rwnext_exit(queue_t *qp) 35370Sstevel@tonic-gate { 35380Sstevel@tonic-gate mutex_enter(QLOCK(qp)); 35390Sstevel@tonic-gate qp->q_rwcnt--; 35400Sstevel@tonic-gate if (qp->q_flag & QWANTRMQSYNC) { 35410Sstevel@tonic-gate qp->q_flag &= ~QWANTRMQSYNC; 35420Sstevel@tonic-gate cv_broadcast(&qp->q_wait); 35430Sstevel@tonic-gate } 35440Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 35450Sstevel@tonic-gate } 35460Sstevel@tonic-gate 35470Sstevel@tonic-gate /* 35480Sstevel@tonic-gate * The purpose of rwnext() is to call the rw procedure of the next 35490Sstevel@tonic-gate * (downstream) modules queue. 35500Sstevel@tonic-gate * 35510Sstevel@tonic-gate * treated as put entrypoint for perimeter syncronization. 35520Sstevel@tonic-gate * 35530Sstevel@tonic-gate * There's no need to grab sq_putlocks here (which only exist for CIPUT 35540Sstevel@tonic-gate * sync queues). If it is CIPUT sync queue sq_count is incremented and it does 35550Sstevel@tonic-gate * not matter if any regular put entrypoints have been already entered. We 35560Sstevel@tonic-gate * can't increment one of the sq_putcounts (instead of sq_count) because 35570Sstevel@tonic-gate * qwait_rw won't know which counter to decrement. 35580Sstevel@tonic-gate * 35590Sstevel@tonic-gate * It would be reasonable to add the lockless FASTPUT logic. 35600Sstevel@tonic-gate */ 35610Sstevel@tonic-gate int 35620Sstevel@tonic-gate rwnext(queue_t *qp, struiod_t *dp) 35630Sstevel@tonic-gate { 35640Sstevel@tonic-gate queue_t *nqp; 35650Sstevel@tonic-gate syncq_t *sq; 35660Sstevel@tonic-gate uint16_t count; 35670Sstevel@tonic-gate uint16_t flags; 35680Sstevel@tonic-gate struct qinit *qi; 35690Sstevel@tonic-gate int (*proc)(); 35700Sstevel@tonic-gate struct stdata *stp; 35710Sstevel@tonic-gate int isread; 35720Sstevel@tonic-gate int rval; 35730Sstevel@tonic-gate 35740Sstevel@tonic-gate stp = STREAM(qp); 35750Sstevel@tonic-gate /* 35760Sstevel@tonic-gate * Prevent q_next from changing by holding sd_lock until acquiring 35770Sstevel@tonic-gate * SQLOCK. Note that a read-side rwnext from the streamhead will 35780Sstevel@tonic-gate * already have sd_lock acquired. In either case sd_lock is always 35790Sstevel@tonic-gate * released after acquiring SQLOCK. 35800Sstevel@tonic-gate * 35810Sstevel@tonic-gate * The streamhead read-side holding sd_lock when calling rwnext is 35820Sstevel@tonic-gate * required to prevent a race condition were M_DATA mblks flowing 35830Sstevel@tonic-gate * up the read-side of the stream could be bypassed by a rwnext() 35840Sstevel@tonic-gate * down-call. In this case sd_lock acts as the streamhead perimeter. 35850Sstevel@tonic-gate */ 35860Sstevel@tonic-gate if ((nqp = _WR(qp)) == qp) { 35870Sstevel@tonic-gate isread = 0; 35880Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 35890Sstevel@tonic-gate qp = nqp->q_next; 35900Sstevel@tonic-gate } else { 35910Sstevel@tonic-gate isread = 1; 35920Sstevel@tonic-gate if (nqp != stp->sd_wrq) 35930Sstevel@tonic-gate /* Not streamhead */ 35940Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 35950Sstevel@tonic-gate qp = _RD(nqp->q_next); 35960Sstevel@tonic-gate } 35970Sstevel@tonic-gate qi = qp->q_qinfo; 35980Sstevel@tonic-gate if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_rwp)) { 35990Sstevel@tonic-gate /* 36000Sstevel@tonic-gate * Not a synchronous module or no r/w procedure for this 36010Sstevel@tonic-gate * queue, so just return EINVAL and let the caller handle it. 36020Sstevel@tonic-gate */ 36030Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 36040Sstevel@tonic-gate return (EINVAL); 36050Sstevel@tonic-gate } 36060Sstevel@tonic-gate 36070Sstevel@tonic-gate if (rwnext_enter(qp) == B_FALSE) { 36080Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 36090Sstevel@tonic-gate return (EINVAL); 36100Sstevel@tonic-gate } 36110Sstevel@tonic-gate 36120Sstevel@tonic-gate sq = qp->q_syncq; 36130Sstevel@tonic-gate mutex_enter(SQLOCK(sq)); 36140Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 36150Sstevel@tonic-gate count = sq->sq_count; 36160Sstevel@tonic-gate flags = sq->sq_flags; 36170Sstevel@tonic-gate ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT)); 36180Sstevel@tonic-gate 36190Sstevel@tonic-gate while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) { 36200Sstevel@tonic-gate /* 36210Sstevel@tonic-gate * if this queue is being closed, return. 36220Sstevel@tonic-gate */ 36230Sstevel@tonic-gate if (qp->q_flag & QWCLOSE) { 36240Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 36250Sstevel@tonic-gate rwnext_exit(qp); 36260Sstevel@tonic-gate return (EINVAL); 36270Sstevel@tonic-gate } 36280Sstevel@tonic-gate 36290Sstevel@tonic-gate /* 36300Sstevel@tonic-gate * Wait until we can enter the inner perimeter. 36310Sstevel@tonic-gate */ 36320Sstevel@tonic-gate sq->sq_flags = flags | SQ_WANTWAKEUP; 36330Sstevel@tonic-gate cv_wait(&sq->sq_wait, SQLOCK(sq)); 36340Sstevel@tonic-gate count = sq->sq_count; 36350Sstevel@tonic-gate flags = sq->sq_flags; 36360Sstevel@tonic-gate } 36370Sstevel@tonic-gate 36380Sstevel@tonic-gate if (isread == 0 && stp->sd_struiowrq == NULL || 36390Sstevel@tonic-gate isread == 1 && stp->sd_struiordq == NULL) { 36400Sstevel@tonic-gate /* 36410Sstevel@tonic-gate * Stream plumbing changed while waiting for inner perimeter 36420Sstevel@tonic-gate * so just return EINVAL and let the caller handle it. 36430Sstevel@tonic-gate */ 36440Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 36450Sstevel@tonic-gate rwnext_exit(qp); 36460Sstevel@tonic-gate return (EINVAL); 36470Sstevel@tonic-gate } 36480Sstevel@tonic-gate if (!(flags & SQ_CIPUT)) 36490Sstevel@tonic-gate sq->sq_flags = flags | SQ_EXCL; 36500Sstevel@tonic-gate sq->sq_count = count + 1; 36510Sstevel@tonic-gate ASSERT(sq->sq_count != 0); /* Wraparound */ 36520Sstevel@tonic-gate /* 36530Sstevel@tonic-gate * Note: The only message ordering guarantee that rwnext() makes is 36540Sstevel@tonic-gate * for the write queue flow-control case. All others (r/w queue 36550Sstevel@tonic-gate * with q_count > 0 (or q_first != 0)) are the resposibilty of 36560Sstevel@tonic-gate * the queue's rw procedure. This could be genralized here buy 36570Sstevel@tonic-gate * running the queue's service procedure, but that wouldn't be 36580Sstevel@tonic-gate * the most efficent for all cases. 36590Sstevel@tonic-gate */ 36600Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 36610Sstevel@tonic-gate if (! isread && (qp->q_flag & QFULL)) { 36620Sstevel@tonic-gate /* 36630Sstevel@tonic-gate * Write queue may be flow controlled. If so, 36640Sstevel@tonic-gate * mark the queue for wakeup when it's not. 36650Sstevel@tonic-gate */ 36660Sstevel@tonic-gate mutex_enter(QLOCK(qp)); 36670Sstevel@tonic-gate if (qp->q_flag & QFULL) { 36680Sstevel@tonic-gate qp->q_flag |= QWANTWSYNC; 36690Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 36700Sstevel@tonic-gate rval = EWOULDBLOCK; 36710Sstevel@tonic-gate goto out; 36720Sstevel@tonic-gate } 36730Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 36740Sstevel@tonic-gate } 36750Sstevel@tonic-gate 36760Sstevel@tonic-gate if (! isread && dp->d_mp) 36770Sstevel@tonic-gate STR_FTEVENT_MSG(dp->d_mp, nqp, FTEV_RWNEXT, dp->d_mp->b_rptr - 36780Sstevel@tonic-gate dp->d_mp->b_datap->db_base); 36790Sstevel@tonic-gate 36800Sstevel@tonic-gate rval = (*proc)(qp, dp); 36810Sstevel@tonic-gate 36820Sstevel@tonic-gate if (isread && dp->d_mp) 36830Sstevel@tonic-gate STR_FTEVENT_MSG(dp->d_mp, _RD(nqp), FTEV_RWNEXT, 36840Sstevel@tonic-gate dp->d_mp->b_rptr - dp->d_mp->b_datap->db_base); 36850Sstevel@tonic-gate out: 36860Sstevel@tonic-gate /* 36870Sstevel@tonic-gate * The queue is protected from being freed by sq_count, so it is 36880Sstevel@tonic-gate * safe to call rwnext_exit and reacquire SQLOCK(sq). 36890Sstevel@tonic-gate */ 36900Sstevel@tonic-gate rwnext_exit(qp); 36910Sstevel@tonic-gate 36920Sstevel@tonic-gate mutex_enter(SQLOCK(sq)); 36930Sstevel@tonic-gate flags = sq->sq_flags; 36940Sstevel@tonic-gate ASSERT(sq->sq_count != 0); 36950Sstevel@tonic-gate sq->sq_count--; 36960Sstevel@tonic-gate if (flags & SQ_TAIL) { 36970Sstevel@tonic-gate putnext_tail(sq, qp, flags); 36980Sstevel@tonic-gate /* 36990Sstevel@tonic-gate * The only purpose of this ASSERT is to preserve calling stack 37000Sstevel@tonic-gate * in DEBUG kernel. 37010Sstevel@tonic-gate */ 37020Sstevel@tonic-gate ASSERT(flags & SQ_TAIL); 37030Sstevel@tonic-gate return (rval); 37040Sstevel@tonic-gate } 37050Sstevel@tonic-gate ASSERT(flags & (SQ_EXCL|SQ_CIPUT)); 37060Sstevel@tonic-gate /* 37070Sstevel@tonic-gate * Safe to always drop SQ_EXCL: 37080Sstevel@tonic-gate * Not SQ_CIPUT means we set SQ_EXCL above 37090Sstevel@tonic-gate * For SQ_CIPUT SQ_EXCL will only be set if the put procedure 37100Sstevel@tonic-gate * did a qwriter(INNER) in which case nobody else 37110Sstevel@tonic-gate * is in the inner perimeter and we are exiting. 37120Sstevel@tonic-gate * 37130Sstevel@tonic-gate * I would like to make the following assertion: 37140Sstevel@tonic-gate * 37150Sstevel@tonic-gate * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) || 37160Sstevel@tonic-gate * sq->sq_count == 0); 37170Sstevel@tonic-gate * 37180Sstevel@tonic-gate * which indicates that if we are both putshared and exclusive, 37190Sstevel@tonic-gate * we became exclusive while executing the putproc, and the only 37200Sstevel@tonic-gate * claim on the syncq was the one we dropped a few lines above. 37210Sstevel@tonic-gate * But other threads that enter putnext while the syncq is exclusive 37220Sstevel@tonic-gate * need to make a claim as they may need to drop SQLOCK in the 37230Sstevel@tonic-gate * has_writers case to avoid deadlocks. If these threads are 37240Sstevel@tonic-gate * delayed or preempted, it is possible that the writer thread can 37250Sstevel@tonic-gate * find out that there are other claims making the (sq_count == 0) 37260Sstevel@tonic-gate * test invalid. 37270Sstevel@tonic-gate */ 37280Sstevel@tonic-gate 37290Sstevel@tonic-gate sq->sq_flags = flags & ~SQ_EXCL; 37300Sstevel@tonic-gate if (sq->sq_flags & SQ_WANTWAKEUP) { 37310Sstevel@tonic-gate sq->sq_flags &= ~SQ_WANTWAKEUP; 37320Sstevel@tonic-gate cv_broadcast(&sq->sq_wait); 37330Sstevel@tonic-gate } 37340Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 37350Sstevel@tonic-gate return (rval); 37360Sstevel@tonic-gate } 37370Sstevel@tonic-gate 37380Sstevel@tonic-gate /* 37390Sstevel@tonic-gate * The purpose of infonext() is to call the info procedure of the next 37400Sstevel@tonic-gate * (downstream) modules queue. 37410Sstevel@tonic-gate * 37420Sstevel@tonic-gate * treated as put entrypoint for perimeter syncronization. 37430Sstevel@tonic-gate * 37440Sstevel@tonic-gate * There's no need to grab sq_putlocks here (which only exist for CIPUT 37450Sstevel@tonic-gate * sync queues). If it is CIPUT sync queue regular sq_count is incremented and 37460Sstevel@tonic-gate * it does not matter if any regular put entrypoints have been already 37470Sstevel@tonic-gate * entered. 37480Sstevel@tonic-gate */ 37490Sstevel@tonic-gate int 37500Sstevel@tonic-gate infonext(queue_t *qp, infod_t *idp) 37510Sstevel@tonic-gate { 37520Sstevel@tonic-gate queue_t *nqp; 37530Sstevel@tonic-gate syncq_t *sq; 37540Sstevel@tonic-gate uint16_t count; 37550Sstevel@tonic-gate uint16_t flags; 37560Sstevel@tonic-gate struct qinit *qi; 37570Sstevel@tonic-gate int (*proc)(); 37580Sstevel@tonic-gate struct stdata *stp; 37590Sstevel@tonic-gate int rval; 37600Sstevel@tonic-gate 37610Sstevel@tonic-gate stp = STREAM(qp); 37620Sstevel@tonic-gate /* 37630Sstevel@tonic-gate * Prevent q_next from changing by holding sd_lock until 37640Sstevel@tonic-gate * acquiring SQLOCK. 37650Sstevel@tonic-gate */ 37660Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 37670Sstevel@tonic-gate if ((nqp = _WR(qp)) == qp) { 37680Sstevel@tonic-gate qp = nqp->q_next; 37690Sstevel@tonic-gate } else { 37700Sstevel@tonic-gate qp = _RD(nqp->q_next); 37710Sstevel@tonic-gate } 37720Sstevel@tonic-gate qi = qp->q_qinfo; 37730Sstevel@tonic-gate if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_infop)) { 37740Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 37750Sstevel@tonic-gate return (EINVAL); 37760Sstevel@tonic-gate } 37770Sstevel@tonic-gate sq = qp->q_syncq; 37780Sstevel@tonic-gate mutex_enter(SQLOCK(sq)); 37790Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 37800Sstevel@tonic-gate count = sq->sq_count; 37810Sstevel@tonic-gate flags = sq->sq_flags; 37820Sstevel@tonic-gate ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT)); 37830Sstevel@tonic-gate 37840Sstevel@tonic-gate while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) { 37850Sstevel@tonic-gate /* 37860Sstevel@tonic-gate * Wait until we can enter the inner perimeter. 37870Sstevel@tonic-gate */ 37880Sstevel@tonic-gate sq->sq_flags = flags | SQ_WANTWAKEUP; 37890Sstevel@tonic-gate cv_wait(&sq->sq_wait, SQLOCK(sq)); 37900Sstevel@tonic-gate count = sq->sq_count; 37910Sstevel@tonic-gate flags = sq->sq_flags; 37920Sstevel@tonic-gate } 37930Sstevel@tonic-gate 37940Sstevel@tonic-gate if (! (flags & SQ_CIPUT)) 37950Sstevel@tonic-gate sq->sq_flags = flags | SQ_EXCL; 37960Sstevel@tonic-gate sq->sq_count = count + 1; 37970Sstevel@tonic-gate ASSERT(sq->sq_count != 0); /* Wraparound */ 37980Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 37990Sstevel@tonic-gate 38000Sstevel@tonic-gate rval = (*proc)(qp, idp); 38010Sstevel@tonic-gate 38020Sstevel@tonic-gate mutex_enter(SQLOCK(sq)); 38030Sstevel@tonic-gate flags = sq->sq_flags; 38040Sstevel@tonic-gate ASSERT(sq->sq_count != 0); 38050Sstevel@tonic-gate sq->sq_count--; 38060Sstevel@tonic-gate if (flags & SQ_TAIL) { 38070Sstevel@tonic-gate putnext_tail(sq, qp, flags); 38080Sstevel@tonic-gate /* 38090Sstevel@tonic-gate * The only purpose of this ASSERT is to preserve calling stack 38100Sstevel@tonic-gate * in DEBUG kernel. 38110Sstevel@tonic-gate */ 38120Sstevel@tonic-gate ASSERT(flags & SQ_TAIL); 38130Sstevel@tonic-gate return (rval); 38140Sstevel@tonic-gate } 38150Sstevel@tonic-gate ASSERT(flags & (SQ_EXCL|SQ_CIPUT)); 38160Sstevel@tonic-gate /* 38170Sstevel@tonic-gate * XXXX 38180Sstevel@tonic-gate * I am not certain the next comment is correct here. I need to consider 38190Sstevel@tonic-gate * why the infonext is called, and if dropping SQ_EXCL unless non-CIPUT 38200Sstevel@tonic-gate * might cause other problems. It just might be safer to drop it if 38210Sstevel@tonic-gate * !SQ_CIPUT because that is when we set it. 38220Sstevel@tonic-gate */ 38230Sstevel@tonic-gate /* 38240Sstevel@tonic-gate * Safe to always drop SQ_EXCL: 38250Sstevel@tonic-gate * Not SQ_CIPUT means we set SQ_EXCL above 38260Sstevel@tonic-gate * For SQ_CIPUT SQ_EXCL will only be set if the put procedure 38270Sstevel@tonic-gate * did a qwriter(INNER) in which case nobody else 38280Sstevel@tonic-gate * is in the inner perimeter and we are exiting. 38290Sstevel@tonic-gate * 38300Sstevel@tonic-gate * I would like to make the following assertion: 38310Sstevel@tonic-gate * 38320Sstevel@tonic-gate * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) || 38330Sstevel@tonic-gate * sq->sq_count == 0); 38340Sstevel@tonic-gate * 38350Sstevel@tonic-gate * which indicates that if we are both putshared and exclusive, 38360Sstevel@tonic-gate * we became exclusive while executing the putproc, and the only 38370Sstevel@tonic-gate * claim on the syncq was the one we dropped a few lines above. 38380Sstevel@tonic-gate * But other threads that enter putnext while the syncq is exclusive 38390Sstevel@tonic-gate * need to make a claim as they may need to drop SQLOCK in the 38400Sstevel@tonic-gate * has_writers case to avoid deadlocks. If these threads are 38410Sstevel@tonic-gate * delayed or preempted, it is possible that the writer thread can 38420Sstevel@tonic-gate * find out that there are other claims making the (sq_count == 0) 38430Sstevel@tonic-gate * test invalid. 38440Sstevel@tonic-gate */ 38450Sstevel@tonic-gate 38460Sstevel@tonic-gate sq->sq_flags = flags & ~SQ_EXCL; 38470Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 38480Sstevel@tonic-gate return (rval); 38490Sstevel@tonic-gate } 38500Sstevel@tonic-gate 38510Sstevel@tonic-gate /* 38520Sstevel@tonic-gate * Return nonzero if the queue is responsible for struio(), else return 0. 38530Sstevel@tonic-gate */ 38540Sstevel@tonic-gate int 38550Sstevel@tonic-gate isuioq(queue_t *q) 38560Sstevel@tonic-gate { 38570Sstevel@tonic-gate if (q->q_flag & QREADR) 38580Sstevel@tonic-gate return (STREAM(q)->sd_struiordq == q); 38590Sstevel@tonic-gate else 38600Sstevel@tonic-gate return (STREAM(q)->sd_struiowrq == q); 38610Sstevel@tonic-gate } 38620Sstevel@tonic-gate 38630Sstevel@tonic-gate #if defined(__sparc) 38640Sstevel@tonic-gate int disable_putlocks = 0; 38650Sstevel@tonic-gate #else 38660Sstevel@tonic-gate int disable_putlocks = 1; 38670Sstevel@tonic-gate #endif 38680Sstevel@tonic-gate 38690Sstevel@tonic-gate /* 38700Sstevel@tonic-gate * called by create_putlock. 38710Sstevel@tonic-gate */ 38720Sstevel@tonic-gate static void 38730Sstevel@tonic-gate create_syncq_putlocks(queue_t *q) 38740Sstevel@tonic-gate { 38750Sstevel@tonic-gate syncq_t *sq = q->q_syncq; 38760Sstevel@tonic-gate ciputctrl_t *cip; 38770Sstevel@tonic-gate int i; 38780Sstevel@tonic-gate 38790Sstevel@tonic-gate ASSERT(sq != NULL); 38800Sstevel@tonic-gate 38810Sstevel@tonic-gate ASSERT(disable_putlocks == 0); 38820Sstevel@tonic-gate ASSERT(n_ciputctrl >= min_n_ciputctrl); 38830Sstevel@tonic-gate ASSERT(ciputctrl_cache != NULL); 38840Sstevel@tonic-gate 38850Sstevel@tonic-gate if (!(sq->sq_type & SQ_CIPUT)) 38860Sstevel@tonic-gate return; 38870Sstevel@tonic-gate 38880Sstevel@tonic-gate for (i = 0; i <= 1; i++) { 38890Sstevel@tonic-gate if (sq->sq_ciputctrl == NULL) { 38900Sstevel@tonic-gate cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP); 38910Sstevel@tonic-gate SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0); 38920Sstevel@tonic-gate mutex_enter(SQLOCK(sq)); 38930Sstevel@tonic-gate if (sq->sq_ciputctrl != NULL) { 38940Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 38950Sstevel@tonic-gate kmem_cache_free(ciputctrl_cache, cip); 38960Sstevel@tonic-gate } else { 38970Sstevel@tonic-gate ASSERT(sq->sq_nciputctrl == 0); 38980Sstevel@tonic-gate sq->sq_nciputctrl = n_ciputctrl - 1; 38990Sstevel@tonic-gate /* 39000Sstevel@tonic-gate * putnext checks sq_ciputctrl without holding 39010Sstevel@tonic-gate * SQLOCK. if it is not NULL putnext assumes 39020Sstevel@tonic-gate * sq_nciputctrl is initialized. membar below 39030Sstevel@tonic-gate * insures that. 39040Sstevel@tonic-gate */ 39050Sstevel@tonic-gate membar_producer(); 39060Sstevel@tonic-gate sq->sq_ciputctrl = cip; 39070Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 39080Sstevel@tonic-gate } 39090Sstevel@tonic-gate } 39100Sstevel@tonic-gate ASSERT(sq->sq_nciputctrl == n_ciputctrl - 1); 39110Sstevel@tonic-gate if (i == 1) 39120Sstevel@tonic-gate break; 39130Sstevel@tonic-gate q = _OTHERQ(q); 39140Sstevel@tonic-gate if (!(q->q_flag & QPERQ)) { 39150Sstevel@tonic-gate ASSERT(sq == q->q_syncq); 39160Sstevel@tonic-gate break; 39170Sstevel@tonic-gate } 39180Sstevel@tonic-gate ASSERT(q->q_syncq != NULL); 39190Sstevel@tonic-gate ASSERT(sq != q->q_syncq); 39200Sstevel@tonic-gate sq = q->q_syncq; 39210Sstevel@tonic-gate ASSERT(sq->sq_type & SQ_CIPUT); 39220Sstevel@tonic-gate } 39230Sstevel@tonic-gate } 39240Sstevel@tonic-gate 39250Sstevel@tonic-gate /* 39260Sstevel@tonic-gate * If stream argument is 0 only create per cpu sq_putlocks/sq_putcounts for 39270Sstevel@tonic-gate * syncq of q. If stream argument is not 0 create per cpu stream_putlocks for 39280Sstevel@tonic-gate * the stream of q and per cpu sq_putlocks/sq_putcounts for all syncq's 39290Sstevel@tonic-gate * starting from q and down to the driver. 39300Sstevel@tonic-gate * 39310Sstevel@tonic-gate * This should be called after the affected queues are part of stream 39320Sstevel@tonic-gate * geometry. It should be called from driver/module open routine after 39330Sstevel@tonic-gate * qprocson() call. It is also called from nfs syscall where it is known that 39340Sstevel@tonic-gate * stream is configured and won't change its geometry during create_putlock 39350Sstevel@tonic-gate * call. 39360Sstevel@tonic-gate * 39370Sstevel@tonic-gate * caller normally uses 0 value for the stream argument to speed up MT putnext 39380Sstevel@tonic-gate * into the perimeter of q for example because its perimeter is per module 39390Sstevel@tonic-gate * (e.g. IP). 39400Sstevel@tonic-gate * 39410Sstevel@tonic-gate * caller normally uses non 0 value for the stream argument to hint the system 39420Sstevel@tonic-gate * that the stream of q is a very contended global system stream 39430Sstevel@tonic-gate * (e.g. NFS/UDP) and the part of the stream from q to the driver is 39440Sstevel@tonic-gate * particularly MT hot. 39450Sstevel@tonic-gate * 39460Sstevel@tonic-gate * Caller insures stream plumbing won't happen while we are here and therefore 39470Sstevel@tonic-gate * q_next can be safely used. 39480Sstevel@tonic-gate */ 39490Sstevel@tonic-gate 39500Sstevel@tonic-gate void 39510Sstevel@tonic-gate create_putlocks(queue_t *q, int stream) 39520Sstevel@tonic-gate { 39530Sstevel@tonic-gate ciputctrl_t *cip; 39540Sstevel@tonic-gate struct stdata *stp = STREAM(q); 39550Sstevel@tonic-gate 39560Sstevel@tonic-gate q = _WR(q); 39570Sstevel@tonic-gate ASSERT(stp != NULL); 39580Sstevel@tonic-gate 39590Sstevel@tonic-gate if (disable_putlocks != 0) 39600Sstevel@tonic-gate return; 39610Sstevel@tonic-gate 39620Sstevel@tonic-gate if (n_ciputctrl < min_n_ciputctrl) 39630Sstevel@tonic-gate return; 39640Sstevel@tonic-gate 39650Sstevel@tonic-gate ASSERT(ciputctrl_cache != NULL); 39660Sstevel@tonic-gate 39670Sstevel@tonic-gate if (stream != 0 && stp->sd_ciputctrl == NULL) { 39680Sstevel@tonic-gate cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP); 39690Sstevel@tonic-gate SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0); 39700Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 39710Sstevel@tonic-gate if (stp->sd_ciputctrl != NULL) { 39720Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 39730Sstevel@tonic-gate kmem_cache_free(ciputctrl_cache, cip); 39740Sstevel@tonic-gate } else { 39750Sstevel@tonic-gate ASSERT(stp->sd_nciputctrl == 0); 39760Sstevel@tonic-gate stp->sd_nciputctrl = n_ciputctrl - 1; 39770Sstevel@tonic-gate /* 39780Sstevel@tonic-gate * putnext checks sd_ciputctrl without holding 39790Sstevel@tonic-gate * sd_lock. if it is not NULL putnext assumes 39800Sstevel@tonic-gate * sd_nciputctrl is initialized. membar below 39810Sstevel@tonic-gate * insures that. 39820Sstevel@tonic-gate */ 39830Sstevel@tonic-gate membar_producer(); 39840Sstevel@tonic-gate stp->sd_ciputctrl = cip; 39850Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 39860Sstevel@tonic-gate } 39870Sstevel@tonic-gate } 39880Sstevel@tonic-gate 39890Sstevel@tonic-gate ASSERT(stream == 0 || stp->sd_nciputctrl == n_ciputctrl - 1); 39900Sstevel@tonic-gate 39910Sstevel@tonic-gate while (_SAMESTR(q)) { 39920Sstevel@tonic-gate create_syncq_putlocks(q); 39930Sstevel@tonic-gate if (stream == 0) 39940Sstevel@tonic-gate return; 39950Sstevel@tonic-gate q = q->q_next; 39960Sstevel@tonic-gate } 39970Sstevel@tonic-gate ASSERT(q != NULL); 39980Sstevel@tonic-gate create_syncq_putlocks(q); 39990Sstevel@tonic-gate } 40000Sstevel@tonic-gate 40010Sstevel@tonic-gate /* 40020Sstevel@tonic-gate * STREAMS Flow Trace - record STREAMS Flow Trace events as an mblk flows 40030Sstevel@tonic-gate * through a stream. 40040Sstevel@tonic-gate * 40050Sstevel@tonic-gate * Data currently record per event is a hrtime stamp, queue address, event 40060Sstevel@tonic-gate * type, and a per type datum. Much of the STREAMS framework is instrumented 40070Sstevel@tonic-gate * for automatic flow tracing (when enabled). Events can be defined and used 40080Sstevel@tonic-gate * by STREAMS modules and drivers. 40090Sstevel@tonic-gate * 40100Sstevel@tonic-gate * Global objects: 40110Sstevel@tonic-gate * 40120Sstevel@tonic-gate * str_ftevent() - Add a flow-trace event to a dblk. 40130Sstevel@tonic-gate * str_ftfree() - Free flow-trace data 40140Sstevel@tonic-gate * 40150Sstevel@tonic-gate * Local objects: 40160Sstevel@tonic-gate * 40170Sstevel@tonic-gate * fthdr_cache - pointer to the kmem cache for trace header. 40180Sstevel@tonic-gate * ftblk_cache - pointer to the kmem cache for trace data blocks. 40190Sstevel@tonic-gate */ 40200Sstevel@tonic-gate 40210Sstevel@tonic-gate int str_ftnever = 1; /* Don't do STREAMS flow tracing */ 40220Sstevel@tonic-gate 40230Sstevel@tonic-gate void 40240Sstevel@tonic-gate str_ftevent(fthdr_t *hp, void *p, ushort_t evnt, ushort_t data) 40250Sstevel@tonic-gate { 40260Sstevel@tonic-gate ftblk_t *bp = hp->tail; 40270Sstevel@tonic-gate ftblk_t *nbp; 40280Sstevel@tonic-gate ftevnt_t *ep; 40290Sstevel@tonic-gate int ix, nix; 40300Sstevel@tonic-gate 40310Sstevel@tonic-gate ASSERT(hp != NULL); 40320Sstevel@tonic-gate 40330Sstevel@tonic-gate for (;;) { 40340Sstevel@tonic-gate if ((ix = bp->ix) == FTBLK_EVNTS) { 40350Sstevel@tonic-gate /* 40360Sstevel@tonic-gate * Tail doesn't have room, so need a new tail. 40370Sstevel@tonic-gate * 40380Sstevel@tonic-gate * To make this MT safe, first, allocate a new 40390Sstevel@tonic-gate * ftblk, and initialize it. To make life a 40400Sstevel@tonic-gate * little easier, reserve the first slot (mostly 40410Sstevel@tonic-gate * by making ix = 1). When we are finished with 40420Sstevel@tonic-gate * the initialization, CAS this pointer to the 40430Sstevel@tonic-gate * tail. If this succeeds, this is the new 40440Sstevel@tonic-gate * "next" block. Otherwise, another thread 40450Sstevel@tonic-gate * got here first, so free the block and start 40460Sstevel@tonic-gate * again. 40470Sstevel@tonic-gate */ 40480Sstevel@tonic-gate if (!(nbp = kmem_cache_alloc(ftblk_cache, 40490Sstevel@tonic-gate KM_NOSLEEP))) { 40500Sstevel@tonic-gate /* no mem, so punt */ 40510Sstevel@tonic-gate str_ftnever++; 40520Sstevel@tonic-gate /* free up all flow data? */ 40530Sstevel@tonic-gate return; 40540Sstevel@tonic-gate } 40550Sstevel@tonic-gate nbp->nxt = NULL; 40560Sstevel@tonic-gate nbp->ix = 1; 40570Sstevel@tonic-gate /* 40580Sstevel@tonic-gate * Just in case there is another thread about 40590Sstevel@tonic-gate * to get the next index, we need to make sure 40600Sstevel@tonic-gate * the value is there for it. 40610Sstevel@tonic-gate */ 40620Sstevel@tonic-gate membar_producer(); 40630Sstevel@tonic-gate if (casptr(&hp->tail, bp, nbp) == bp) { 40640Sstevel@tonic-gate /* CAS was successful */ 40650Sstevel@tonic-gate bp->nxt = nbp; 40660Sstevel@tonic-gate membar_producer(); 40670Sstevel@tonic-gate bp = nbp; 40680Sstevel@tonic-gate ix = 0; 40690Sstevel@tonic-gate goto cas_good; 40700Sstevel@tonic-gate } else { 40710Sstevel@tonic-gate kmem_cache_free(ftblk_cache, nbp); 40720Sstevel@tonic-gate bp = hp->tail; 40730Sstevel@tonic-gate continue; 40740Sstevel@tonic-gate } 40750Sstevel@tonic-gate } 40760Sstevel@tonic-gate nix = ix + 1; 40770Sstevel@tonic-gate if (cas32((uint32_t *)&bp->ix, ix, nix) == ix) { 40780Sstevel@tonic-gate cas_good: 40790Sstevel@tonic-gate if (curthread != hp->thread) { 40800Sstevel@tonic-gate hp->thread = curthread; 40810Sstevel@tonic-gate evnt |= FTEV_CS; 40820Sstevel@tonic-gate } 40830Sstevel@tonic-gate if (CPU->cpu_seqid != hp->cpu_seqid) { 40840Sstevel@tonic-gate hp->cpu_seqid = CPU->cpu_seqid; 40850Sstevel@tonic-gate evnt |= FTEV_PS; 40860Sstevel@tonic-gate } 40870Sstevel@tonic-gate ep = &bp->ev[ix]; 40880Sstevel@tonic-gate break; 40890Sstevel@tonic-gate } 40900Sstevel@tonic-gate } 40910Sstevel@tonic-gate 40920Sstevel@tonic-gate if (evnt & FTEV_QMASK) { 40930Sstevel@tonic-gate queue_t *qp = p; 40940Sstevel@tonic-gate 40950Sstevel@tonic-gate /* 40960Sstevel@tonic-gate * It is possible that the module info is broke 40970Sstevel@tonic-gate * (as is logsubr.c at this comment writing). 40980Sstevel@tonic-gate * Instead of panicing or doing other unmentionables, 40990Sstevel@tonic-gate * we shall put a dummy name as the mid, and continue. 41000Sstevel@tonic-gate */ 41010Sstevel@tonic-gate if (qp->q_qinfo == NULL) 41020Sstevel@tonic-gate ep->mid = "NONAME"; 41030Sstevel@tonic-gate else 41040Sstevel@tonic-gate ep->mid = qp->q_qinfo->qi_minfo->mi_idname; 41050Sstevel@tonic-gate 41060Sstevel@tonic-gate if (!(qp->q_flag & QREADR)) 41070Sstevel@tonic-gate evnt |= FTEV_ISWR; 41080Sstevel@tonic-gate } else { 41090Sstevel@tonic-gate ep->mid = (char *)p; 41100Sstevel@tonic-gate } 41110Sstevel@tonic-gate 41120Sstevel@tonic-gate ep->ts = gethrtime(); 41130Sstevel@tonic-gate ep->evnt = evnt; 41140Sstevel@tonic-gate ep->data = data; 41150Sstevel@tonic-gate hp->hash = (hp->hash << 9) + hp->hash; 41160Sstevel@tonic-gate hp->hash += (evnt << 16) | data; 41170Sstevel@tonic-gate hp->hash += (uintptr_t)ep->mid; 41180Sstevel@tonic-gate } 41190Sstevel@tonic-gate 41200Sstevel@tonic-gate /* 41210Sstevel@tonic-gate * Free flow-trace data. 41220Sstevel@tonic-gate */ 41230Sstevel@tonic-gate void 41240Sstevel@tonic-gate str_ftfree(dblk_t *dbp) 41250Sstevel@tonic-gate { 41260Sstevel@tonic-gate fthdr_t *hp = dbp->db_fthdr; 41270Sstevel@tonic-gate ftblk_t *bp = &hp->first; 41280Sstevel@tonic-gate ftblk_t *nbp; 41290Sstevel@tonic-gate 41300Sstevel@tonic-gate if (bp != hp->tail || bp->ix != 0) { 41310Sstevel@tonic-gate /* 41320Sstevel@tonic-gate * Clear out the hash, have the tail point to itself, and free 41330Sstevel@tonic-gate * any continuation blocks. 41340Sstevel@tonic-gate */ 41350Sstevel@tonic-gate bp = hp->first.nxt; 41360Sstevel@tonic-gate hp->tail = &hp->first; 41370Sstevel@tonic-gate hp->hash = 0; 41380Sstevel@tonic-gate hp->first.nxt = NULL; 41390Sstevel@tonic-gate hp->first.ix = 0; 41400Sstevel@tonic-gate while (bp != NULL) { 41410Sstevel@tonic-gate nbp = bp->nxt; 41420Sstevel@tonic-gate kmem_cache_free(ftblk_cache, bp); 41430Sstevel@tonic-gate bp = nbp; 41440Sstevel@tonic-gate } 41450Sstevel@tonic-gate } 41460Sstevel@tonic-gate kmem_cache_free(fthdr_cache, hp); 41470Sstevel@tonic-gate dbp->db_fthdr = NULL; 41480Sstevel@tonic-gate } 4149