10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 70Sstevel@tonic-gate * with the License. 80Sstevel@tonic-gate * 90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 110Sstevel@tonic-gate * See the License for the specific language governing permissions 120Sstevel@tonic-gate * and limitations under the License. 130Sstevel@tonic-gate * 140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 190Sstevel@tonic-gate * 200Sstevel@tonic-gate * CDDL HEADER END 210Sstevel@tonic-gate */ 220Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 230Sstevel@tonic-gate /* All Rights Reserved */ 240Sstevel@tonic-gate 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* 270Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 280Sstevel@tonic-gate * Use is subject to license terms. 290Sstevel@tonic-gate */ 300Sstevel@tonic-gate 310Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 320Sstevel@tonic-gate 330Sstevel@tonic-gate #include <sys/types.h> 340Sstevel@tonic-gate #include <sys/param.h> 350Sstevel@tonic-gate #include <sys/thread.h> 360Sstevel@tonic-gate #include <sys/sysmacros.h> 370Sstevel@tonic-gate #include <sys/stropts.h> 380Sstevel@tonic-gate #include <sys/stream.h> 390Sstevel@tonic-gate #include <sys/strsubr.h> 400Sstevel@tonic-gate #include <sys/strsun.h> 410Sstevel@tonic-gate #include <sys/conf.h> 420Sstevel@tonic-gate #include <sys/debug.h> 430Sstevel@tonic-gate #include <sys/cmn_err.h> 440Sstevel@tonic-gate #include <sys/kmem.h> 450Sstevel@tonic-gate #include <sys/atomic.h> 460Sstevel@tonic-gate #include <sys/errno.h> 470Sstevel@tonic-gate #include <sys/vtrace.h> 480Sstevel@tonic-gate #include <sys/ftrace.h> 490Sstevel@tonic-gate #include <sys/ontrap.h> 500Sstevel@tonic-gate #include <sys/multidata.h> 510Sstevel@tonic-gate #include <sys/multidata_impl.h> 520Sstevel@tonic-gate #include <sys/sdt.h> 53*1110Smeem #include <sys/strft.h> 540Sstevel@tonic-gate 550Sstevel@tonic-gate #ifdef DEBUG 560Sstevel@tonic-gate #include <sys/kmem_impl.h> 570Sstevel@tonic-gate #endif 580Sstevel@tonic-gate 590Sstevel@tonic-gate /* 600Sstevel@tonic-gate * This file contains all the STREAMS utility routines that may 610Sstevel@tonic-gate * be used by modules and drivers. 620Sstevel@tonic-gate */ 630Sstevel@tonic-gate 640Sstevel@tonic-gate /* 650Sstevel@tonic-gate * STREAMS message allocator: principles of operation 660Sstevel@tonic-gate * 670Sstevel@tonic-gate * The streams message allocator consists of all the routines that 680Sstevel@tonic-gate * allocate, dup and free streams messages: allocb(), [d]esballoc[a], 690Sstevel@tonic-gate * dupb(), freeb() and freemsg(). What follows is a high-level view 700Sstevel@tonic-gate * of how the allocator works. 710Sstevel@tonic-gate * 720Sstevel@tonic-gate * Every streams message consists of one or more mblks, a dblk, and data. 730Sstevel@tonic-gate * All mblks for all types of messages come from a common mblk_cache. 740Sstevel@tonic-gate * The dblk and data come in several flavors, depending on how the 750Sstevel@tonic-gate * message is allocated: 760Sstevel@tonic-gate * 770Sstevel@tonic-gate * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of 780Sstevel@tonic-gate * fixed-size dblk/data caches. For message sizes that are multiples of 790Sstevel@tonic-gate * PAGESIZE, dblks are allocated separately from the buffer. 800Sstevel@tonic-gate * The associated buffer is allocated by the constructor using kmem_alloc(). 810Sstevel@tonic-gate * For all other message sizes, dblk and its associated data is allocated 820Sstevel@tonic-gate * as a single contiguous chunk of memory. 830Sstevel@tonic-gate * Objects in these caches consist of a dblk plus its associated data. 840Sstevel@tonic-gate * allocb() determines the nearest-size cache by table lookup: 850Sstevel@tonic-gate * the dblk_cache[] array provides the mapping from size to dblk cache. 860Sstevel@tonic-gate * 870Sstevel@tonic-gate * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by 880Sstevel@tonic-gate * kmem_alloc()'ing a buffer for the data and supplying that 890Sstevel@tonic-gate * buffer to gesballoc(), described below. 900Sstevel@tonic-gate * 910Sstevel@tonic-gate * (3) The four flavors of [d]esballoc[a] are all implemented by a 920Sstevel@tonic-gate * common routine, gesballoc() ("generic esballoc"). gesballoc() 930Sstevel@tonic-gate * allocates a dblk from the global dblk_esb_cache and sets db_base, 940Sstevel@tonic-gate * db_lim and db_frtnp to describe the caller-supplied buffer. 950Sstevel@tonic-gate * 960Sstevel@tonic-gate * While there are several routines to allocate messages, there is only 970Sstevel@tonic-gate * one routine to free messages: freeb(). freeb() simply invokes the 980Sstevel@tonic-gate * dblk's free method, dbp->db_free(), which is set at allocation time. 990Sstevel@tonic-gate * 1000Sstevel@tonic-gate * dupb() creates a new reference to a message by allocating a new mblk, 1010Sstevel@tonic-gate * incrementing the dblk reference count and setting the dblk's free 1020Sstevel@tonic-gate * method to dblk_decref(). The dblk's original free method is retained 1030Sstevel@tonic-gate * in db_lastfree. dblk_decref() decrements the reference count on each 1040Sstevel@tonic-gate * freeb(). If this is not the last reference it just frees the mblk; 1050Sstevel@tonic-gate * if this *is* the last reference, it restores db_free to db_lastfree, 1060Sstevel@tonic-gate * sets db_mblk to the current mblk (see below), and invokes db_lastfree. 1070Sstevel@tonic-gate * 1080Sstevel@tonic-gate * The implementation makes aggressive use of kmem object caching for 1090Sstevel@tonic-gate * maximum performance. This makes the code simple and compact, but 1100Sstevel@tonic-gate * also a bit abstruse in some places. The invariants that constitute a 1110Sstevel@tonic-gate * message's constructed state, described below, are more subtle than usual. 1120Sstevel@tonic-gate * 1130Sstevel@tonic-gate * Every dblk has an "attached mblk" as part of its constructed state. 1140Sstevel@tonic-gate * The mblk is allocated by the dblk's constructor and remains attached 1150Sstevel@tonic-gate * until the message is either dup'ed or pulled up. In the dupb() case 1160Sstevel@tonic-gate * the mblk association doesn't matter until the last free, at which time 1170Sstevel@tonic-gate * dblk_decref() attaches the last mblk to the dblk. pullupmsg() affects 1180Sstevel@tonic-gate * the mblk association because it swaps the leading mblks of two messages, 1190Sstevel@tonic-gate * so it is responsible for swapping their db_mblk pointers accordingly. 1200Sstevel@tonic-gate * From a constructed-state viewpoint it doesn't matter that a dblk's 1210Sstevel@tonic-gate * attached mblk can change while the message is allocated; all that 1220Sstevel@tonic-gate * matters is that the dblk has *some* attached mblk when it's freed. 1230Sstevel@tonic-gate * 1240Sstevel@tonic-gate * The sizes of the allocb() small-message caches are not magical. 1250Sstevel@tonic-gate * They represent a good trade-off between internal and external 1260Sstevel@tonic-gate * fragmentation for current workloads. They should be reevaluated 1270Sstevel@tonic-gate * periodically, especially if allocations larger than DBLK_MAX_CACHE 1280Sstevel@tonic-gate * become common. We use 64-byte alignment so that dblks don't 1290Sstevel@tonic-gate * straddle cache lines unnecessarily. 1300Sstevel@tonic-gate */ 1310Sstevel@tonic-gate #define DBLK_MAX_CACHE 73728 1320Sstevel@tonic-gate #define DBLK_CACHE_ALIGN 64 1330Sstevel@tonic-gate #define DBLK_MIN_SIZE 8 1340Sstevel@tonic-gate #define DBLK_SIZE_SHIFT 3 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate #ifdef _BIG_ENDIAN 1370Sstevel@tonic-gate #define DBLK_RTFU_SHIFT(field) \ 1380Sstevel@tonic-gate (8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field)) 1390Sstevel@tonic-gate #else 1400Sstevel@tonic-gate #define DBLK_RTFU_SHIFT(field) \ 1410Sstevel@tonic-gate (8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref)) 1420Sstevel@tonic-gate #endif 1430Sstevel@tonic-gate 1440Sstevel@tonic-gate #define DBLK_RTFU(ref, type, flags, uioflag) \ 1450Sstevel@tonic-gate (((ref) << DBLK_RTFU_SHIFT(db_ref)) | \ 1460Sstevel@tonic-gate ((type) << DBLK_RTFU_SHIFT(db_type)) | \ 1470Sstevel@tonic-gate (((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \ 1480Sstevel@tonic-gate ((uioflag) << DBLK_RTFU_SHIFT(db_struioflag))) 1490Sstevel@tonic-gate #define DBLK_RTFU_REF_MASK (DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref)) 1500Sstevel@tonic-gate #define DBLK_RTFU_WORD(dbp) (*((uint32_t *)&(dbp)->db_ref)) 1510Sstevel@tonic-gate #define MBLK_BAND_FLAG_WORD(mp) (*((uint32_t *)&(mp)->b_band)) 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate static size_t dblk_sizes[] = { 1540Sstevel@tonic-gate #ifdef _LP64 1550Sstevel@tonic-gate 16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3920, 1560Sstevel@tonic-gate 8192, 12112, 16384, 20304, 24576, 28496, 32768, 36688, 1570Sstevel@tonic-gate 40960, 44880, 49152, 53072, 57344, 61264, 65536, 69456, 1580Sstevel@tonic-gate #else 1590Sstevel@tonic-gate 64, 128, 320, 576, 1088, 1536, 1984, 2624, 3968, 1600Sstevel@tonic-gate 8192, 12160, 16384, 20352, 24576, 28544, 32768, 36736, 1610Sstevel@tonic-gate 40960, 44928, 49152, 53120, 57344, 61312, 65536, 69504, 1620Sstevel@tonic-gate #endif 1630Sstevel@tonic-gate DBLK_MAX_CACHE, 0 1640Sstevel@tonic-gate }; 1650Sstevel@tonic-gate 1660Sstevel@tonic-gate static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE]; 1670Sstevel@tonic-gate static struct kmem_cache *mblk_cache; 1680Sstevel@tonic-gate static struct kmem_cache *dblk_esb_cache; 1690Sstevel@tonic-gate static struct kmem_cache *fthdr_cache; 1700Sstevel@tonic-gate static struct kmem_cache *ftblk_cache; 1710Sstevel@tonic-gate 1720Sstevel@tonic-gate static void dblk_lastfree(mblk_t *mp, dblk_t *dbp); 1730Sstevel@tonic-gate static mblk_t *allocb_oversize(size_t size, int flags); 1740Sstevel@tonic-gate static int allocb_tryhard_fails; 1750Sstevel@tonic-gate static void frnop_func(void *arg); 1760Sstevel@tonic-gate frtn_t frnop = { frnop_func }; 1770Sstevel@tonic-gate static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp); 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate static boolean_t rwnext_enter(queue_t *qp); 1800Sstevel@tonic-gate static void rwnext_exit(queue_t *qp); 1810Sstevel@tonic-gate 1820Sstevel@tonic-gate /* 1830Sstevel@tonic-gate * Patchable mblk/dblk kmem_cache flags. 1840Sstevel@tonic-gate */ 1850Sstevel@tonic-gate int dblk_kmem_flags = 0; 1860Sstevel@tonic-gate int mblk_kmem_flags = 0; 1870Sstevel@tonic-gate 1880Sstevel@tonic-gate 1890Sstevel@tonic-gate static int 1900Sstevel@tonic-gate dblk_constructor(void *buf, void *cdrarg, int kmflags) 1910Sstevel@tonic-gate { 1920Sstevel@tonic-gate dblk_t *dbp = buf; 1930Sstevel@tonic-gate ssize_t msg_size = (ssize_t)cdrarg; 1940Sstevel@tonic-gate size_t index; 1950Sstevel@tonic-gate 1960Sstevel@tonic-gate ASSERT(msg_size != 0); 1970Sstevel@tonic-gate 1980Sstevel@tonic-gate index = (msg_size - 1) >> DBLK_SIZE_SHIFT; 1990Sstevel@tonic-gate 200577Smeem ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)); 2010Sstevel@tonic-gate 2020Sstevel@tonic-gate if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 2030Sstevel@tonic-gate return (-1); 2040Sstevel@tonic-gate if ((msg_size & PAGEOFFSET) == 0) { 2050Sstevel@tonic-gate dbp->db_base = kmem_alloc(msg_size, kmflags); 2060Sstevel@tonic-gate if (dbp->db_base == NULL) { 2070Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk); 2080Sstevel@tonic-gate return (-1); 2090Sstevel@tonic-gate } 2100Sstevel@tonic-gate } else { 2110Sstevel@tonic-gate dbp->db_base = (unsigned char *)&dbp[1]; 2120Sstevel@tonic-gate } 2130Sstevel@tonic-gate 2140Sstevel@tonic-gate dbp->db_mblk->b_datap = dbp; 2150Sstevel@tonic-gate dbp->db_cache = dblk_cache[index]; 2160Sstevel@tonic-gate dbp->db_lim = dbp->db_base + msg_size; 2170Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree = dblk_lastfree; 2180Sstevel@tonic-gate dbp->db_frtnp = NULL; 2190Sstevel@tonic-gate dbp->db_fthdr = NULL; 2200Sstevel@tonic-gate dbp->db_credp = NULL; 2210Sstevel@tonic-gate dbp->db_cpid = -1; 2220Sstevel@tonic-gate dbp->db_struioflag = 0; 2230Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 2240Sstevel@tonic-gate return (0); 2250Sstevel@tonic-gate } 2260Sstevel@tonic-gate 2270Sstevel@tonic-gate /*ARGSUSED*/ 2280Sstevel@tonic-gate static int 2290Sstevel@tonic-gate dblk_esb_constructor(void *buf, void *cdrarg, int kmflags) 2300Sstevel@tonic-gate { 2310Sstevel@tonic-gate dblk_t *dbp = buf; 2320Sstevel@tonic-gate 2330Sstevel@tonic-gate if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 2340Sstevel@tonic-gate return (-1); 2350Sstevel@tonic-gate dbp->db_mblk->b_datap = dbp; 2360Sstevel@tonic-gate dbp->db_cache = dblk_esb_cache; 2370Sstevel@tonic-gate dbp->db_fthdr = NULL; 2380Sstevel@tonic-gate dbp->db_credp = NULL; 2390Sstevel@tonic-gate dbp->db_cpid = -1; 2400Sstevel@tonic-gate dbp->db_struioflag = 0; 2410Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 2420Sstevel@tonic-gate return (0); 2430Sstevel@tonic-gate } 2440Sstevel@tonic-gate 2450Sstevel@tonic-gate static int 2460Sstevel@tonic-gate bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags) 2470Sstevel@tonic-gate { 2480Sstevel@tonic-gate dblk_t *dbp = buf; 2490Sstevel@tonic-gate bcache_t *bcp = (bcache_t *)cdrarg; 2500Sstevel@tonic-gate 2510Sstevel@tonic-gate if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 2520Sstevel@tonic-gate return (-1); 2530Sstevel@tonic-gate 2540Sstevel@tonic-gate if ((dbp->db_base = (unsigned char *)kmem_cache_alloc(bcp->buffer_cache, 2550Sstevel@tonic-gate kmflags)) == NULL) { 2560Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk); 2570Sstevel@tonic-gate return (-1); 2580Sstevel@tonic-gate } 2590Sstevel@tonic-gate 2600Sstevel@tonic-gate dbp->db_mblk->b_datap = dbp; 2610Sstevel@tonic-gate dbp->db_cache = (void *)bcp; 2620Sstevel@tonic-gate dbp->db_lim = dbp->db_base + bcp->size; 2630Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree; 2640Sstevel@tonic-gate dbp->db_frtnp = NULL; 2650Sstevel@tonic-gate dbp->db_fthdr = NULL; 2660Sstevel@tonic-gate dbp->db_credp = NULL; 2670Sstevel@tonic-gate dbp->db_cpid = -1; 2680Sstevel@tonic-gate dbp->db_struioflag = 0; 2690Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 2700Sstevel@tonic-gate return (0); 2710Sstevel@tonic-gate } 2720Sstevel@tonic-gate 2730Sstevel@tonic-gate /*ARGSUSED*/ 2740Sstevel@tonic-gate static void 2750Sstevel@tonic-gate dblk_destructor(void *buf, void *cdrarg) 2760Sstevel@tonic-gate { 2770Sstevel@tonic-gate dblk_t *dbp = buf; 2780Sstevel@tonic-gate ssize_t msg_size = (ssize_t)cdrarg; 2790Sstevel@tonic-gate 2800Sstevel@tonic-gate ASSERT(dbp->db_mblk->b_datap == dbp); 2810Sstevel@tonic-gate 2820Sstevel@tonic-gate ASSERT(msg_size != 0); 2830Sstevel@tonic-gate 2840Sstevel@tonic-gate ASSERT(dbp->db_struioflag == 0); 2850Sstevel@tonic-gate ASSERT(dbp->db_struioun.cksum.flags == 0); 2860Sstevel@tonic-gate 2870Sstevel@tonic-gate if ((msg_size & PAGEOFFSET) == 0) { 2880Sstevel@tonic-gate kmem_free(dbp->db_base, msg_size); 2890Sstevel@tonic-gate } 2900Sstevel@tonic-gate 2910Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk); 2920Sstevel@tonic-gate } 2930Sstevel@tonic-gate 2940Sstevel@tonic-gate static void 2950Sstevel@tonic-gate bcache_dblk_destructor(void *buf, void *cdrarg) 2960Sstevel@tonic-gate { 2970Sstevel@tonic-gate dblk_t *dbp = buf; 2980Sstevel@tonic-gate bcache_t *bcp = (bcache_t *)cdrarg; 2990Sstevel@tonic-gate 3000Sstevel@tonic-gate kmem_cache_free(bcp->buffer_cache, dbp->db_base); 3010Sstevel@tonic-gate 3020Sstevel@tonic-gate ASSERT(dbp->db_mblk->b_datap == dbp); 3030Sstevel@tonic-gate 3040Sstevel@tonic-gate ASSERT(dbp->db_struioflag == 0); 3050Sstevel@tonic-gate ASSERT(dbp->db_struioun.cksum.flags == 0); 3060Sstevel@tonic-gate 3070Sstevel@tonic-gate kmem_cache_free(mblk_cache, dbp->db_mblk); 3080Sstevel@tonic-gate } 3090Sstevel@tonic-gate 3100Sstevel@tonic-gate void 3110Sstevel@tonic-gate streams_msg_init(void) 3120Sstevel@tonic-gate { 3130Sstevel@tonic-gate char name[40]; 3140Sstevel@tonic-gate size_t size; 3150Sstevel@tonic-gate size_t lastsize = DBLK_MIN_SIZE; 3160Sstevel@tonic-gate size_t *sizep; 3170Sstevel@tonic-gate struct kmem_cache *cp; 3180Sstevel@tonic-gate size_t tot_size; 3190Sstevel@tonic-gate int offset; 3200Sstevel@tonic-gate 3210Sstevel@tonic-gate mblk_cache = kmem_cache_create("streams_mblk", 3220Sstevel@tonic-gate sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL, 3230Sstevel@tonic-gate mblk_kmem_flags); 3240Sstevel@tonic-gate 3250Sstevel@tonic-gate for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) { 3260Sstevel@tonic-gate 3270Sstevel@tonic-gate if ((offset = (size & PAGEOFFSET)) != 0) { 3280Sstevel@tonic-gate /* 3290Sstevel@tonic-gate * We are in the middle of a page, dblk should 3300Sstevel@tonic-gate * be allocated on the same page 3310Sstevel@tonic-gate */ 3320Sstevel@tonic-gate tot_size = size + sizeof (dblk_t); 3330Sstevel@tonic-gate ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t)) 3340Sstevel@tonic-gate < PAGESIZE); 3350Sstevel@tonic-gate ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0); 3360Sstevel@tonic-gate 3370Sstevel@tonic-gate } else { 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate /* 3400Sstevel@tonic-gate * buf size is multiple of page size, dblk and 3410Sstevel@tonic-gate * buffer are allocated separately. 3420Sstevel@tonic-gate */ 3430Sstevel@tonic-gate 3440Sstevel@tonic-gate ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0); 3450Sstevel@tonic-gate tot_size = sizeof (dblk_t); 3460Sstevel@tonic-gate } 3470Sstevel@tonic-gate 3480Sstevel@tonic-gate (void) sprintf(name, "streams_dblk_%ld", size); 3490Sstevel@tonic-gate cp = kmem_cache_create(name, tot_size, 3500Sstevel@tonic-gate DBLK_CACHE_ALIGN, dblk_constructor, 3510Sstevel@tonic-gate dblk_destructor, NULL, 3520Sstevel@tonic-gate (void *)(size), NULL, dblk_kmem_flags); 3530Sstevel@tonic-gate 3540Sstevel@tonic-gate while (lastsize <= size) { 3550Sstevel@tonic-gate dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp; 3560Sstevel@tonic-gate lastsize += DBLK_MIN_SIZE; 3570Sstevel@tonic-gate } 3580Sstevel@tonic-gate } 3590Sstevel@tonic-gate 3600Sstevel@tonic-gate dblk_esb_cache = kmem_cache_create("streams_dblk_esb", 3610Sstevel@tonic-gate sizeof (dblk_t), DBLK_CACHE_ALIGN, 3620Sstevel@tonic-gate dblk_esb_constructor, dblk_destructor, NULL, 3630Sstevel@tonic-gate (void *) sizeof (dblk_t), NULL, dblk_kmem_flags); 3640Sstevel@tonic-gate fthdr_cache = kmem_cache_create("streams_fthdr", 3650Sstevel@tonic-gate sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0); 3660Sstevel@tonic-gate ftblk_cache = kmem_cache_create("streams_ftblk", 3670Sstevel@tonic-gate sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0); 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate /* Initialize Multidata caches */ 3700Sstevel@tonic-gate mmd_init(); 3710Sstevel@tonic-gate } 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate /*ARGSUSED*/ 3740Sstevel@tonic-gate mblk_t * 3750Sstevel@tonic-gate allocb(size_t size, uint_t pri) 3760Sstevel@tonic-gate { 3770Sstevel@tonic-gate dblk_t *dbp; 3780Sstevel@tonic-gate mblk_t *mp; 3790Sstevel@tonic-gate size_t index; 3800Sstevel@tonic-gate 3810Sstevel@tonic-gate index = (size - 1) >> DBLK_SIZE_SHIFT; 3820Sstevel@tonic-gate 3830Sstevel@tonic-gate if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) { 3840Sstevel@tonic-gate if (size != 0) { 3850Sstevel@tonic-gate mp = allocb_oversize(size, KM_NOSLEEP); 3860Sstevel@tonic-gate goto out; 3870Sstevel@tonic-gate } 3880Sstevel@tonic-gate index = 0; 3890Sstevel@tonic-gate } 3900Sstevel@tonic-gate 3910Sstevel@tonic-gate if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) { 3920Sstevel@tonic-gate mp = NULL; 3930Sstevel@tonic-gate goto out; 3940Sstevel@tonic-gate } 3950Sstevel@tonic-gate 3960Sstevel@tonic-gate mp = dbp->db_mblk; 3970Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 3980Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL; 3990Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base; 4000Sstevel@tonic-gate mp->b_queue = NULL; 4010Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0; 4020Sstevel@tonic-gate STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size); 4030Sstevel@tonic-gate out: 4040Sstevel@tonic-gate FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp); 4050Sstevel@tonic-gate 4060Sstevel@tonic-gate return (mp); 4070Sstevel@tonic-gate } 4080Sstevel@tonic-gate 4090Sstevel@tonic-gate mblk_t * 4100Sstevel@tonic-gate allocb_tmpl(size_t size, const mblk_t *tmpl) 4110Sstevel@tonic-gate { 4120Sstevel@tonic-gate mblk_t *mp = allocb(size, 0); 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate if (mp != NULL) { 4150Sstevel@tonic-gate cred_t *cr = DB_CRED(tmpl); 4160Sstevel@tonic-gate if (cr != NULL) 4170Sstevel@tonic-gate crhold(mp->b_datap->db_credp = cr); 4180Sstevel@tonic-gate DB_CPID(mp) = DB_CPID(tmpl); 4190Sstevel@tonic-gate DB_TYPE(mp) = DB_TYPE(tmpl); 4200Sstevel@tonic-gate } 4210Sstevel@tonic-gate return (mp); 4220Sstevel@tonic-gate } 4230Sstevel@tonic-gate 4240Sstevel@tonic-gate mblk_t * 4250Sstevel@tonic-gate allocb_cred(size_t size, cred_t *cr) 4260Sstevel@tonic-gate { 4270Sstevel@tonic-gate mblk_t *mp = allocb(size, 0); 4280Sstevel@tonic-gate 4290Sstevel@tonic-gate if (mp != NULL && cr != NULL) 4300Sstevel@tonic-gate crhold(mp->b_datap->db_credp = cr); 4310Sstevel@tonic-gate 4320Sstevel@tonic-gate return (mp); 4330Sstevel@tonic-gate } 4340Sstevel@tonic-gate 4350Sstevel@tonic-gate mblk_t * 4360Sstevel@tonic-gate allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr) 4370Sstevel@tonic-gate { 4380Sstevel@tonic-gate mblk_t *mp = allocb_wait(size, 0, flags, error); 4390Sstevel@tonic-gate 4400Sstevel@tonic-gate if (mp != NULL && cr != NULL) 4410Sstevel@tonic-gate crhold(mp->b_datap->db_credp = cr); 4420Sstevel@tonic-gate 4430Sstevel@tonic-gate return (mp); 4440Sstevel@tonic-gate } 4450Sstevel@tonic-gate 4460Sstevel@tonic-gate void 4470Sstevel@tonic-gate freeb(mblk_t *mp) 4480Sstevel@tonic-gate { 4490Sstevel@tonic-gate dblk_t *dbp = mp->b_datap; 4500Sstevel@tonic-gate 4510Sstevel@tonic-gate ASSERT(dbp->db_ref > 0); 4520Sstevel@tonic-gate ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 4530Sstevel@tonic-gate FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp); 4540Sstevel@tonic-gate 4550Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref); 4560Sstevel@tonic-gate 4570Sstevel@tonic-gate dbp->db_free(mp, dbp); 4580Sstevel@tonic-gate } 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate void 4610Sstevel@tonic-gate freemsg(mblk_t *mp) 4620Sstevel@tonic-gate { 4630Sstevel@tonic-gate FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp); 4640Sstevel@tonic-gate while (mp) { 4650Sstevel@tonic-gate dblk_t *dbp = mp->b_datap; 4660Sstevel@tonic-gate mblk_t *mp_cont = mp->b_cont; 4670Sstevel@tonic-gate 4680Sstevel@tonic-gate ASSERT(dbp->db_ref > 0); 4690Sstevel@tonic-gate ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 4700Sstevel@tonic-gate 4710Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref); 4720Sstevel@tonic-gate 4730Sstevel@tonic-gate dbp->db_free(mp, dbp); 4740Sstevel@tonic-gate mp = mp_cont; 4750Sstevel@tonic-gate } 4760Sstevel@tonic-gate } 4770Sstevel@tonic-gate 4780Sstevel@tonic-gate /* 4790Sstevel@tonic-gate * Reallocate a block for another use. Try hard to use the old block. 4800Sstevel@tonic-gate * If the old data is wanted (copy), leave b_wptr at the end of the data, 4810Sstevel@tonic-gate * otherwise return b_wptr = b_rptr. 4820Sstevel@tonic-gate * 4830Sstevel@tonic-gate * This routine is private and unstable. 4840Sstevel@tonic-gate */ 4850Sstevel@tonic-gate mblk_t * 4860Sstevel@tonic-gate reallocb(mblk_t *mp, size_t size, uint_t copy) 4870Sstevel@tonic-gate { 4880Sstevel@tonic-gate mblk_t *mp1; 4890Sstevel@tonic-gate unsigned char *old_rptr; 4900Sstevel@tonic-gate ptrdiff_t cur_size; 4910Sstevel@tonic-gate 4920Sstevel@tonic-gate if (mp == NULL) 4930Sstevel@tonic-gate return (allocb(size, BPRI_HI)); 4940Sstevel@tonic-gate 4950Sstevel@tonic-gate cur_size = mp->b_wptr - mp->b_rptr; 4960Sstevel@tonic-gate old_rptr = mp->b_rptr; 4970Sstevel@tonic-gate 4980Sstevel@tonic-gate ASSERT(mp->b_datap->db_ref != 0); 4990Sstevel@tonic-gate 5000Sstevel@tonic-gate if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) { 5010Sstevel@tonic-gate /* 5020Sstevel@tonic-gate * If the data is wanted and it will fit where it is, no 5030Sstevel@tonic-gate * work is required. 5040Sstevel@tonic-gate */ 5050Sstevel@tonic-gate if (copy && mp->b_datap->db_lim - mp->b_rptr >= size) 5060Sstevel@tonic-gate return (mp); 5070Sstevel@tonic-gate 5080Sstevel@tonic-gate mp->b_wptr = mp->b_rptr = mp->b_datap->db_base; 5090Sstevel@tonic-gate mp1 = mp; 5100Sstevel@tonic-gate } else if ((mp1 = allocb_tmpl(size, mp)) != NULL) { 5110Sstevel@tonic-gate /* XXX other mp state could be copied too, db_flags ... ? */ 5120Sstevel@tonic-gate mp1->b_cont = mp->b_cont; 5130Sstevel@tonic-gate } else { 5140Sstevel@tonic-gate return (NULL); 5150Sstevel@tonic-gate } 5160Sstevel@tonic-gate 5170Sstevel@tonic-gate if (copy) { 5180Sstevel@tonic-gate bcopy(old_rptr, mp1->b_rptr, cur_size); 5190Sstevel@tonic-gate mp1->b_wptr = mp1->b_rptr + cur_size; 5200Sstevel@tonic-gate } 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate if (mp != mp1) 5230Sstevel@tonic-gate freeb(mp); 5240Sstevel@tonic-gate 5250Sstevel@tonic-gate return (mp1); 5260Sstevel@tonic-gate } 5270Sstevel@tonic-gate 5280Sstevel@tonic-gate static void 5290Sstevel@tonic-gate dblk_lastfree(mblk_t *mp, dblk_t *dbp) 5300Sstevel@tonic-gate { 5310Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp); 5320Sstevel@tonic-gate if (dbp->db_fthdr != NULL) 5330Sstevel@tonic-gate str_ftfree(dbp); 5340Sstevel@tonic-gate 5350Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */ 5360Sstevel@tonic-gate if (dbp->db_credp != NULL) { 5370Sstevel@tonic-gate crfree(dbp->db_credp); 5380Sstevel@tonic-gate dbp->db_credp = NULL; 5390Sstevel@tonic-gate } 5400Sstevel@tonic-gate dbp->db_cpid = -1; 5410Sstevel@tonic-gate 5420Sstevel@tonic-gate /* Reset the struioflag and the checksum flag fields */ 5430Sstevel@tonic-gate dbp->db_struioflag = 0; 5440Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 5450Sstevel@tonic-gate 546898Skais /* and the COOKED flag */ 547898Skais dbp->db_flags &= ~DBLK_COOKED; 548898Skais 5490Sstevel@tonic-gate kmem_cache_free(dbp->db_cache, dbp); 5500Sstevel@tonic-gate } 5510Sstevel@tonic-gate 5520Sstevel@tonic-gate static void 5530Sstevel@tonic-gate dblk_decref(mblk_t *mp, dblk_t *dbp) 5540Sstevel@tonic-gate { 5550Sstevel@tonic-gate if (dbp->db_ref != 1) { 5560Sstevel@tonic-gate uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp), 5570Sstevel@tonic-gate -(1 << DBLK_RTFU_SHIFT(db_ref))); 5580Sstevel@tonic-gate /* 5590Sstevel@tonic-gate * atomic_add_32_nv() just decremented db_ref, so we no longer 5600Sstevel@tonic-gate * have a reference to the dblk, which means another thread 5610Sstevel@tonic-gate * could free it. Therefore we cannot examine the dblk to 5620Sstevel@tonic-gate * determine whether ours was the last reference. Instead, 5630Sstevel@tonic-gate * we extract the new and minimum reference counts from rtfu. 5640Sstevel@tonic-gate * Note that all we're really saying is "if (ref != refmin)". 5650Sstevel@tonic-gate */ 5660Sstevel@tonic-gate if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) != 5670Sstevel@tonic-gate ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) { 5680Sstevel@tonic-gate kmem_cache_free(mblk_cache, mp); 5690Sstevel@tonic-gate return; 5700Sstevel@tonic-gate } 5710Sstevel@tonic-gate } 5720Sstevel@tonic-gate dbp->db_mblk = mp; 5730Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree; 5740Sstevel@tonic-gate dbp->db_lastfree(mp, dbp); 5750Sstevel@tonic-gate } 5760Sstevel@tonic-gate 5770Sstevel@tonic-gate mblk_t * 5780Sstevel@tonic-gate dupb(mblk_t *mp) 5790Sstevel@tonic-gate { 5800Sstevel@tonic-gate dblk_t *dbp = mp->b_datap; 5810Sstevel@tonic-gate mblk_t *new_mp; 5820Sstevel@tonic-gate uint32_t oldrtfu, newrtfu; 5830Sstevel@tonic-gate 5840Sstevel@tonic-gate if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL) 5850Sstevel@tonic-gate goto out; 5860Sstevel@tonic-gate 5870Sstevel@tonic-gate new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL; 5880Sstevel@tonic-gate new_mp->b_rptr = mp->b_rptr; 5890Sstevel@tonic-gate new_mp->b_wptr = mp->b_wptr; 5900Sstevel@tonic-gate new_mp->b_datap = dbp; 5910Sstevel@tonic-gate new_mp->b_queue = NULL; 5920Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp); 5930Sstevel@tonic-gate 5940Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref); 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate /* 5970Sstevel@tonic-gate * First-dup optimization. The enabling assumption is that there 5980Sstevel@tonic-gate * can can never be a race (in correct code) to dup the first copy 5990Sstevel@tonic-gate * of a message. Therefore we don't need to do it atomically. 6000Sstevel@tonic-gate */ 6010Sstevel@tonic-gate if (dbp->db_free != dblk_decref) { 6020Sstevel@tonic-gate dbp->db_free = dblk_decref; 6030Sstevel@tonic-gate dbp->db_ref++; 6040Sstevel@tonic-gate goto out; 6050Sstevel@tonic-gate } 6060Sstevel@tonic-gate 6070Sstevel@tonic-gate do { 6080Sstevel@tonic-gate ASSERT(dbp->db_ref > 0); 6090Sstevel@tonic-gate oldrtfu = DBLK_RTFU_WORD(dbp); 6100Sstevel@tonic-gate newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref)); 6110Sstevel@tonic-gate /* 6120Sstevel@tonic-gate * If db_ref is maxed out we can't dup this message anymore. 6130Sstevel@tonic-gate */ 6140Sstevel@tonic-gate if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) { 6150Sstevel@tonic-gate kmem_cache_free(mblk_cache, new_mp); 6160Sstevel@tonic-gate new_mp = NULL; 6170Sstevel@tonic-gate goto out; 6180Sstevel@tonic-gate } 6190Sstevel@tonic-gate } while (cas32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != oldrtfu); 6200Sstevel@tonic-gate 6210Sstevel@tonic-gate out: 6220Sstevel@tonic-gate FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp); 6230Sstevel@tonic-gate return (new_mp); 6240Sstevel@tonic-gate } 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate static void 6270Sstevel@tonic-gate dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp) 6280Sstevel@tonic-gate { 6290Sstevel@tonic-gate frtn_t *frp = dbp->db_frtnp; 6300Sstevel@tonic-gate 6310Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp); 6320Sstevel@tonic-gate frp->free_func(frp->free_arg); 6330Sstevel@tonic-gate if (dbp->db_fthdr != NULL) 6340Sstevel@tonic-gate str_ftfree(dbp); 6350Sstevel@tonic-gate 6360Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */ 6370Sstevel@tonic-gate if (dbp->db_credp != NULL) { 6380Sstevel@tonic-gate crfree(dbp->db_credp); 6390Sstevel@tonic-gate dbp->db_credp = NULL; 6400Sstevel@tonic-gate } 6410Sstevel@tonic-gate dbp->db_cpid = -1; 6420Sstevel@tonic-gate dbp->db_struioflag = 0; 6430Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 6440Sstevel@tonic-gate 6450Sstevel@tonic-gate kmem_cache_free(dbp->db_cache, dbp); 6460Sstevel@tonic-gate } 6470Sstevel@tonic-gate 6480Sstevel@tonic-gate /*ARGSUSED*/ 6490Sstevel@tonic-gate static void 6500Sstevel@tonic-gate frnop_func(void *arg) 6510Sstevel@tonic-gate { 6520Sstevel@tonic-gate } 6530Sstevel@tonic-gate 6540Sstevel@tonic-gate /* 6550Sstevel@tonic-gate * Generic esballoc used to implement the four flavors: [d]esballoc[a]. 6560Sstevel@tonic-gate */ 6570Sstevel@tonic-gate static mblk_t * 6580Sstevel@tonic-gate gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp, 6590Sstevel@tonic-gate void (*lastfree)(mblk_t *, dblk_t *), int kmflags) 6600Sstevel@tonic-gate { 6610Sstevel@tonic-gate dblk_t *dbp; 6620Sstevel@tonic-gate mblk_t *mp; 6630Sstevel@tonic-gate 6640Sstevel@tonic-gate ASSERT(base != NULL && frp != NULL); 6650Sstevel@tonic-gate 6660Sstevel@tonic-gate if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) { 6670Sstevel@tonic-gate mp = NULL; 6680Sstevel@tonic-gate goto out; 6690Sstevel@tonic-gate } 6700Sstevel@tonic-gate 6710Sstevel@tonic-gate mp = dbp->db_mblk; 6720Sstevel@tonic-gate dbp->db_base = base; 6730Sstevel@tonic-gate dbp->db_lim = base + size; 6740Sstevel@tonic-gate dbp->db_free = dbp->db_lastfree = lastfree; 6750Sstevel@tonic-gate dbp->db_frtnp = frp; 6760Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = db_rtfu; 6770Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL; 6780Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = base; 6790Sstevel@tonic-gate mp->b_queue = NULL; 6800Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0; 6810Sstevel@tonic-gate 6820Sstevel@tonic-gate out: 6830Sstevel@tonic-gate FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp); 6840Sstevel@tonic-gate return (mp); 6850Sstevel@tonic-gate } 6860Sstevel@tonic-gate 6870Sstevel@tonic-gate /*ARGSUSED*/ 6880Sstevel@tonic-gate mblk_t * 6890Sstevel@tonic-gate esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 6900Sstevel@tonic-gate { 6910Sstevel@tonic-gate mblk_t *mp; 6920Sstevel@tonic-gate 6930Sstevel@tonic-gate /* 6940Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e. 6950Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail 6960Sstevel@tonic-gate * call optimization. 6970Sstevel@tonic-gate */ 6980Sstevel@tonic-gate if (!str_ftnever) { 6990Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 7000Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP); 7010Sstevel@tonic-gate 7020Sstevel@tonic-gate if (mp != NULL) 7030Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size); 7040Sstevel@tonic-gate return (mp); 7050Sstevel@tonic-gate } 7060Sstevel@tonic-gate 7070Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 7080Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP)); 7090Sstevel@tonic-gate } 7100Sstevel@tonic-gate 7110Sstevel@tonic-gate /* 7120Sstevel@tonic-gate * Same as esballoc() but sleeps waiting for memory. 7130Sstevel@tonic-gate */ 7140Sstevel@tonic-gate /*ARGSUSED*/ 7150Sstevel@tonic-gate mblk_t * 7160Sstevel@tonic-gate esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 7170Sstevel@tonic-gate { 7180Sstevel@tonic-gate mblk_t *mp; 7190Sstevel@tonic-gate 7200Sstevel@tonic-gate /* 7210Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e. 7220Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail 7230Sstevel@tonic-gate * call optimization. 7240Sstevel@tonic-gate */ 7250Sstevel@tonic-gate if (!str_ftnever) { 7260Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 7270Sstevel@tonic-gate frp, freebs_enqueue, KM_SLEEP); 7280Sstevel@tonic-gate 7290Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size); 7300Sstevel@tonic-gate return (mp); 7310Sstevel@tonic-gate } 7320Sstevel@tonic-gate 7330Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 7340Sstevel@tonic-gate frp, freebs_enqueue, KM_SLEEP)); 7350Sstevel@tonic-gate } 7360Sstevel@tonic-gate 7370Sstevel@tonic-gate /*ARGSUSED*/ 7380Sstevel@tonic-gate mblk_t * 7390Sstevel@tonic-gate desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 7400Sstevel@tonic-gate { 7410Sstevel@tonic-gate mblk_t *mp; 7420Sstevel@tonic-gate 7430Sstevel@tonic-gate /* 7440Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e. 7450Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail 7460Sstevel@tonic-gate * call optimization. 7470Sstevel@tonic-gate */ 7480Sstevel@tonic-gate if (!str_ftnever) { 7490Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 7500Sstevel@tonic-gate frp, dblk_lastfree_desb, KM_NOSLEEP); 7510Sstevel@tonic-gate 7520Sstevel@tonic-gate if (mp != NULL) 7530Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size); 7540Sstevel@tonic-gate return (mp); 7550Sstevel@tonic-gate } 7560Sstevel@tonic-gate 7570Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 7580Sstevel@tonic-gate frp, dblk_lastfree_desb, KM_NOSLEEP)); 7590Sstevel@tonic-gate } 7600Sstevel@tonic-gate 7610Sstevel@tonic-gate /*ARGSUSED*/ 7620Sstevel@tonic-gate mblk_t * 7630Sstevel@tonic-gate esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 7640Sstevel@tonic-gate { 7650Sstevel@tonic-gate mblk_t *mp; 7660Sstevel@tonic-gate 7670Sstevel@tonic-gate /* 7680Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e. 7690Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail 7700Sstevel@tonic-gate * call optimization. 7710Sstevel@tonic-gate */ 7720Sstevel@tonic-gate if (!str_ftnever) { 7730Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 7740Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP); 7750Sstevel@tonic-gate 7760Sstevel@tonic-gate if (mp != NULL) 7770Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size); 7780Sstevel@tonic-gate return (mp); 7790Sstevel@tonic-gate } 7800Sstevel@tonic-gate 7810Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 7820Sstevel@tonic-gate frp, freebs_enqueue, KM_NOSLEEP)); 7830Sstevel@tonic-gate } 7840Sstevel@tonic-gate 7850Sstevel@tonic-gate /*ARGSUSED*/ 7860Sstevel@tonic-gate mblk_t * 7870Sstevel@tonic-gate desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 7880Sstevel@tonic-gate { 7890Sstevel@tonic-gate mblk_t *mp; 7900Sstevel@tonic-gate 7910Sstevel@tonic-gate /* 7920Sstevel@tonic-gate * Note that this is structured to allow the common case (i.e. 7930Sstevel@tonic-gate * STREAMS flowtracing disabled) to call gesballoc() with tail 7940Sstevel@tonic-gate * call optimization. 7950Sstevel@tonic-gate */ 7960Sstevel@tonic-gate if (!str_ftnever) { 7970Sstevel@tonic-gate mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 7980Sstevel@tonic-gate frp, dblk_lastfree_desb, KM_NOSLEEP); 7990Sstevel@tonic-gate 8000Sstevel@tonic-gate if (mp != NULL) 8010Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size); 8020Sstevel@tonic-gate return (mp); 8030Sstevel@tonic-gate } 8040Sstevel@tonic-gate 8050Sstevel@tonic-gate return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 8060Sstevel@tonic-gate frp, dblk_lastfree_desb, KM_NOSLEEP)); 8070Sstevel@tonic-gate } 8080Sstevel@tonic-gate 8090Sstevel@tonic-gate static void 8100Sstevel@tonic-gate bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp) 8110Sstevel@tonic-gate { 8120Sstevel@tonic-gate bcache_t *bcp = dbp->db_cache; 8130Sstevel@tonic-gate 8140Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp); 8150Sstevel@tonic-gate if (dbp->db_fthdr != NULL) 8160Sstevel@tonic-gate str_ftfree(dbp); 8170Sstevel@tonic-gate 8180Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */ 8190Sstevel@tonic-gate if (dbp->db_credp != NULL) { 8200Sstevel@tonic-gate crfree(dbp->db_credp); 8210Sstevel@tonic-gate dbp->db_credp = NULL; 8220Sstevel@tonic-gate } 8230Sstevel@tonic-gate dbp->db_cpid = -1; 8240Sstevel@tonic-gate dbp->db_struioflag = 0; 8250Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 8260Sstevel@tonic-gate 8270Sstevel@tonic-gate mutex_enter(&bcp->mutex); 8280Sstevel@tonic-gate kmem_cache_free(bcp->dblk_cache, dbp); 8290Sstevel@tonic-gate bcp->alloc--; 8300Sstevel@tonic-gate 8310Sstevel@tonic-gate if (bcp->alloc == 0 && bcp->destroy != 0) { 8320Sstevel@tonic-gate kmem_cache_destroy(bcp->dblk_cache); 8330Sstevel@tonic-gate kmem_cache_destroy(bcp->buffer_cache); 8340Sstevel@tonic-gate mutex_exit(&bcp->mutex); 8350Sstevel@tonic-gate mutex_destroy(&bcp->mutex); 8360Sstevel@tonic-gate kmem_free(bcp, sizeof (bcache_t)); 8370Sstevel@tonic-gate } else { 8380Sstevel@tonic-gate mutex_exit(&bcp->mutex); 8390Sstevel@tonic-gate } 8400Sstevel@tonic-gate } 8410Sstevel@tonic-gate 8420Sstevel@tonic-gate bcache_t * 8430Sstevel@tonic-gate bcache_create(char *name, size_t size, uint_t align) 8440Sstevel@tonic-gate { 8450Sstevel@tonic-gate bcache_t *bcp; 8460Sstevel@tonic-gate char buffer[255]; 8470Sstevel@tonic-gate 8480Sstevel@tonic-gate ASSERT((align & (align - 1)) == 0); 8490Sstevel@tonic-gate 8500Sstevel@tonic-gate if ((bcp = (bcache_t *)kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) == 8510Sstevel@tonic-gate NULL) { 8520Sstevel@tonic-gate return (NULL); 8530Sstevel@tonic-gate } 8540Sstevel@tonic-gate 8550Sstevel@tonic-gate bcp->size = size; 8560Sstevel@tonic-gate bcp->align = align; 8570Sstevel@tonic-gate bcp->alloc = 0; 8580Sstevel@tonic-gate bcp->destroy = 0; 8590Sstevel@tonic-gate 8600Sstevel@tonic-gate mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL); 8610Sstevel@tonic-gate 8620Sstevel@tonic-gate (void) sprintf(buffer, "%s_buffer_cache", name); 8630Sstevel@tonic-gate bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL, 8640Sstevel@tonic-gate NULL, NULL, NULL, 0); 8650Sstevel@tonic-gate (void) sprintf(buffer, "%s_dblk_cache", name); 8660Sstevel@tonic-gate bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t), 8670Sstevel@tonic-gate DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor, 8680Sstevel@tonic-gate NULL, (void *)bcp, NULL, 0); 8690Sstevel@tonic-gate 8700Sstevel@tonic-gate return (bcp); 8710Sstevel@tonic-gate } 8720Sstevel@tonic-gate 8730Sstevel@tonic-gate void 8740Sstevel@tonic-gate bcache_destroy(bcache_t *bcp) 8750Sstevel@tonic-gate { 8760Sstevel@tonic-gate ASSERT(bcp != NULL); 8770Sstevel@tonic-gate 8780Sstevel@tonic-gate mutex_enter(&bcp->mutex); 8790Sstevel@tonic-gate if (bcp->alloc == 0) { 8800Sstevel@tonic-gate kmem_cache_destroy(bcp->dblk_cache); 8810Sstevel@tonic-gate kmem_cache_destroy(bcp->buffer_cache); 8820Sstevel@tonic-gate mutex_exit(&bcp->mutex); 8830Sstevel@tonic-gate mutex_destroy(&bcp->mutex); 8840Sstevel@tonic-gate kmem_free(bcp, sizeof (bcache_t)); 8850Sstevel@tonic-gate } else { 8860Sstevel@tonic-gate bcp->destroy++; 8870Sstevel@tonic-gate mutex_exit(&bcp->mutex); 8880Sstevel@tonic-gate } 8890Sstevel@tonic-gate } 8900Sstevel@tonic-gate 8910Sstevel@tonic-gate /*ARGSUSED*/ 8920Sstevel@tonic-gate mblk_t * 8930Sstevel@tonic-gate bcache_allocb(bcache_t *bcp, uint_t pri) 8940Sstevel@tonic-gate { 8950Sstevel@tonic-gate dblk_t *dbp; 8960Sstevel@tonic-gate mblk_t *mp = NULL; 8970Sstevel@tonic-gate 8980Sstevel@tonic-gate ASSERT(bcp != NULL); 8990Sstevel@tonic-gate 9000Sstevel@tonic-gate mutex_enter(&bcp->mutex); 9010Sstevel@tonic-gate if (bcp->destroy != 0) { 9020Sstevel@tonic-gate mutex_exit(&bcp->mutex); 9030Sstevel@tonic-gate goto out; 9040Sstevel@tonic-gate } 9050Sstevel@tonic-gate 9060Sstevel@tonic-gate if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) { 9070Sstevel@tonic-gate mutex_exit(&bcp->mutex); 9080Sstevel@tonic-gate goto out; 9090Sstevel@tonic-gate } 9100Sstevel@tonic-gate bcp->alloc++; 9110Sstevel@tonic-gate mutex_exit(&bcp->mutex); 9120Sstevel@tonic-gate 9130Sstevel@tonic-gate ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0); 9140Sstevel@tonic-gate 9150Sstevel@tonic-gate mp = dbp->db_mblk; 9160Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 9170Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL; 9180Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base; 9190Sstevel@tonic-gate mp->b_queue = NULL; 9200Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0; 9210Sstevel@tonic-gate STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size); 9220Sstevel@tonic-gate out: 9230Sstevel@tonic-gate FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp); 9240Sstevel@tonic-gate 9250Sstevel@tonic-gate return (mp); 9260Sstevel@tonic-gate } 9270Sstevel@tonic-gate 9280Sstevel@tonic-gate static void 9290Sstevel@tonic-gate dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp) 9300Sstevel@tonic-gate { 9310Sstevel@tonic-gate ASSERT(dbp->db_mblk == mp); 9320Sstevel@tonic-gate if (dbp->db_fthdr != NULL) 9330Sstevel@tonic-gate str_ftfree(dbp); 9340Sstevel@tonic-gate 9350Sstevel@tonic-gate /* set credp and projid to be 'unspecified' before returning to cache */ 9360Sstevel@tonic-gate if (dbp->db_credp != NULL) { 9370Sstevel@tonic-gate crfree(dbp->db_credp); 9380Sstevel@tonic-gate dbp->db_credp = NULL; 9390Sstevel@tonic-gate } 9400Sstevel@tonic-gate dbp->db_cpid = -1; 9410Sstevel@tonic-gate dbp->db_struioflag = 0; 9420Sstevel@tonic-gate dbp->db_struioun.cksum.flags = 0; 9430Sstevel@tonic-gate 9440Sstevel@tonic-gate kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base); 9450Sstevel@tonic-gate kmem_cache_free(dbp->db_cache, dbp); 9460Sstevel@tonic-gate } 9470Sstevel@tonic-gate 9480Sstevel@tonic-gate static mblk_t * 9490Sstevel@tonic-gate allocb_oversize(size_t size, int kmflags) 9500Sstevel@tonic-gate { 9510Sstevel@tonic-gate mblk_t *mp; 9520Sstevel@tonic-gate void *buf; 9530Sstevel@tonic-gate 9540Sstevel@tonic-gate size = P2ROUNDUP(size, DBLK_CACHE_ALIGN); 9550Sstevel@tonic-gate if ((buf = kmem_alloc(size, kmflags)) == NULL) 9560Sstevel@tonic-gate return (NULL); 9570Sstevel@tonic-gate if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0), 9580Sstevel@tonic-gate &frnop, dblk_lastfree_oversize, kmflags)) == NULL) 9590Sstevel@tonic-gate kmem_free(buf, size); 9600Sstevel@tonic-gate 9610Sstevel@tonic-gate if (mp != NULL) 9620Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size); 9630Sstevel@tonic-gate 9640Sstevel@tonic-gate return (mp); 9650Sstevel@tonic-gate } 9660Sstevel@tonic-gate 9670Sstevel@tonic-gate mblk_t * 9680Sstevel@tonic-gate allocb_tryhard(size_t target_size) 9690Sstevel@tonic-gate { 9700Sstevel@tonic-gate size_t size; 9710Sstevel@tonic-gate mblk_t *bp; 9720Sstevel@tonic-gate 9730Sstevel@tonic-gate for (size = target_size; size < target_size + 512; 9740Sstevel@tonic-gate size += DBLK_CACHE_ALIGN) 9750Sstevel@tonic-gate if ((bp = allocb(size, BPRI_HI)) != NULL) 9760Sstevel@tonic-gate return (bp); 9770Sstevel@tonic-gate allocb_tryhard_fails++; 9780Sstevel@tonic-gate return (NULL); 9790Sstevel@tonic-gate } 9800Sstevel@tonic-gate 9810Sstevel@tonic-gate /* 9820Sstevel@tonic-gate * This routine is consolidation private for STREAMS internal use 9830Sstevel@tonic-gate * This routine may only be called from sync routines (i.e., not 9840Sstevel@tonic-gate * from put or service procedures). It is located here (rather 9850Sstevel@tonic-gate * than strsubr.c) so that we don't have to expose all of the 9860Sstevel@tonic-gate * allocb() implementation details in header files. 9870Sstevel@tonic-gate */ 9880Sstevel@tonic-gate mblk_t * 9890Sstevel@tonic-gate allocb_wait(size_t size, uint_t pri, uint_t flags, int *error) 9900Sstevel@tonic-gate { 9910Sstevel@tonic-gate dblk_t *dbp; 9920Sstevel@tonic-gate mblk_t *mp; 9930Sstevel@tonic-gate size_t index; 9940Sstevel@tonic-gate 9950Sstevel@tonic-gate index = (size -1) >> DBLK_SIZE_SHIFT; 9960Sstevel@tonic-gate 9970Sstevel@tonic-gate if (flags & STR_NOSIG) { 9980Sstevel@tonic-gate if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) { 9990Sstevel@tonic-gate if (size != 0) { 10000Sstevel@tonic-gate mp = allocb_oversize(size, KM_SLEEP); 10010Sstevel@tonic-gate FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", 10020Sstevel@tonic-gate (uintptr_t)mp); 10030Sstevel@tonic-gate return (mp); 10040Sstevel@tonic-gate } 10050Sstevel@tonic-gate index = 0; 10060Sstevel@tonic-gate } 10070Sstevel@tonic-gate 10080Sstevel@tonic-gate dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP); 10090Sstevel@tonic-gate mp = dbp->db_mblk; 10100Sstevel@tonic-gate DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 10110Sstevel@tonic-gate mp->b_next = mp->b_prev = mp->b_cont = NULL; 10120Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base; 10130Sstevel@tonic-gate mp->b_queue = NULL; 10140Sstevel@tonic-gate MBLK_BAND_FLAG_WORD(mp) = 0; 10150Sstevel@tonic-gate STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size); 10160Sstevel@tonic-gate 10170Sstevel@tonic-gate FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp); 10180Sstevel@tonic-gate 10190Sstevel@tonic-gate } else { 10200Sstevel@tonic-gate while ((mp = allocb(size, pri)) == NULL) { 10210Sstevel@tonic-gate if ((*error = strwaitbuf(size, BPRI_HI)) != 0) 10220Sstevel@tonic-gate return (NULL); 10230Sstevel@tonic-gate } 10240Sstevel@tonic-gate } 10250Sstevel@tonic-gate 10260Sstevel@tonic-gate return (mp); 10270Sstevel@tonic-gate } 10280Sstevel@tonic-gate 10290Sstevel@tonic-gate /* 10300Sstevel@tonic-gate * Call function 'func' with 'arg' when a class zero block can 10310Sstevel@tonic-gate * be allocated with priority 'pri'. 10320Sstevel@tonic-gate */ 10330Sstevel@tonic-gate bufcall_id_t 10340Sstevel@tonic-gate esbbcall(uint_t pri, void (*func)(void *), void *arg) 10350Sstevel@tonic-gate { 10360Sstevel@tonic-gate return (bufcall(1, pri, func, arg)); 10370Sstevel@tonic-gate } 10380Sstevel@tonic-gate 10390Sstevel@tonic-gate /* 10400Sstevel@tonic-gate * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials 10410Sstevel@tonic-gate * ioc_id, rval and error of the struct ioctl to set up an ioctl call. 10420Sstevel@tonic-gate * This provides consistency for all internal allocators of ioctl. 10430Sstevel@tonic-gate */ 10440Sstevel@tonic-gate mblk_t * 10450Sstevel@tonic-gate mkiocb(uint_t cmd) 10460Sstevel@tonic-gate { 10470Sstevel@tonic-gate struct iocblk *ioc; 10480Sstevel@tonic-gate mblk_t *mp; 10490Sstevel@tonic-gate 10500Sstevel@tonic-gate /* 10510Sstevel@tonic-gate * Allocate enough space for any of the ioctl related messages. 10520Sstevel@tonic-gate */ 10530Sstevel@tonic-gate if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL) 10540Sstevel@tonic-gate return (NULL); 10550Sstevel@tonic-gate 10560Sstevel@tonic-gate bzero(mp->b_rptr, sizeof (union ioctypes)); 10570Sstevel@tonic-gate 10580Sstevel@tonic-gate /* 10590Sstevel@tonic-gate * Set the mblk_t information and ptrs correctly. 10600Sstevel@tonic-gate */ 10610Sstevel@tonic-gate mp->b_wptr += sizeof (struct iocblk); 10620Sstevel@tonic-gate mp->b_datap->db_type = M_IOCTL; 10630Sstevel@tonic-gate 10640Sstevel@tonic-gate /* 10650Sstevel@tonic-gate * Fill in the fields. 10660Sstevel@tonic-gate */ 10670Sstevel@tonic-gate ioc = (struct iocblk *)mp->b_rptr; 10680Sstevel@tonic-gate ioc->ioc_cmd = cmd; 10690Sstevel@tonic-gate ioc->ioc_cr = kcred; 10700Sstevel@tonic-gate ioc->ioc_id = getiocseqno(); 10710Sstevel@tonic-gate ioc->ioc_flag = IOC_NATIVE; 10720Sstevel@tonic-gate return (mp); 10730Sstevel@tonic-gate } 10740Sstevel@tonic-gate 10750Sstevel@tonic-gate /* 10760Sstevel@tonic-gate * test if block of given size can be allocated with a request of 10770Sstevel@tonic-gate * the given priority. 10780Sstevel@tonic-gate * 'pri' is no longer used, but is retained for compatibility. 10790Sstevel@tonic-gate */ 10800Sstevel@tonic-gate /* ARGSUSED */ 10810Sstevel@tonic-gate int 10820Sstevel@tonic-gate testb(size_t size, uint_t pri) 10830Sstevel@tonic-gate { 10840Sstevel@tonic-gate return ((size + sizeof (dblk_t)) <= kmem_avail()); 10850Sstevel@tonic-gate } 10860Sstevel@tonic-gate 10870Sstevel@tonic-gate /* 10880Sstevel@tonic-gate * Call function 'func' with argument 'arg' when there is a reasonably 10890Sstevel@tonic-gate * good chance that a block of size 'size' can be allocated. 10900Sstevel@tonic-gate * 'pri' is no longer used, but is retained for compatibility. 10910Sstevel@tonic-gate */ 10920Sstevel@tonic-gate /* ARGSUSED */ 10930Sstevel@tonic-gate bufcall_id_t 10940Sstevel@tonic-gate bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg) 10950Sstevel@tonic-gate { 10960Sstevel@tonic-gate static long bid = 1; /* always odd to save checking for zero */ 10970Sstevel@tonic-gate bufcall_id_t bc_id; 10980Sstevel@tonic-gate struct strbufcall *bcp; 10990Sstevel@tonic-gate 11000Sstevel@tonic-gate if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL) 11010Sstevel@tonic-gate return (0); 11020Sstevel@tonic-gate 11030Sstevel@tonic-gate bcp->bc_func = func; 11040Sstevel@tonic-gate bcp->bc_arg = arg; 11050Sstevel@tonic-gate bcp->bc_size = size; 11060Sstevel@tonic-gate bcp->bc_next = NULL; 11070Sstevel@tonic-gate bcp->bc_executor = NULL; 11080Sstevel@tonic-gate 11090Sstevel@tonic-gate mutex_enter(&strbcall_lock); 11100Sstevel@tonic-gate /* 11110Sstevel@tonic-gate * After bcp is linked into strbcalls and strbcall_lock is dropped there 11120Sstevel@tonic-gate * should be no references to bcp since it may be freed by 11130Sstevel@tonic-gate * runbufcalls(). Since bcp_id field is returned, we save its value in 11140Sstevel@tonic-gate * the local var. 11150Sstevel@tonic-gate */ 11160Sstevel@tonic-gate bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2); /* keep it odd */ 11170Sstevel@tonic-gate 11180Sstevel@tonic-gate /* 11190Sstevel@tonic-gate * add newly allocated stream event to existing 11200Sstevel@tonic-gate * linked list of events. 11210Sstevel@tonic-gate */ 11220Sstevel@tonic-gate if (strbcalls.bc_head == NULL) { 11230Sstevel@tonic-gate strbcalls.bc_head = strbcalls.bc_tail = bcp; 11240Sstevel@tonic-gate } else { 11250Sstevel@tonic-gate strbcalls.bc_tail->bc_next = bcp; 11260Sstevel@tonic-gate strbcalls.bc_tail = bcp; 11270Sstevel@tonic-gate } 11280Sstevel@tonic-gate 11290Sstevel@tonic-gate cv_signal(&strbcall_cv); 11300Sstevel@tonic-gate mutex_exit(&strbcall_lock); 11310Sstevel@tonic-gate return (bc_id); 11320Sstevel@tonic-gate } 11330Sstevel@tonic-gate 11340Sstevel@tonic-gate /* 11350Sstevel@tonic-gate * Cancel a bufcall request. 11360Sstevel@tonic-gate */ 11370Sstevel@tonic-gate void 11380Sstevel@tonic-gate unbufcall(bufcall_id_t id) 11390Sstevel@tonic-gate { 11400Sstevel@tonic-gate strbufcall_t *bcp, *pbcp; 11410Sstevel@tonic-gate 11420Sstevel@tonic-gate mutex_enter(&strbcall_lock); 11430Sstevel@tonic-gate again: 11440Sstevel@tonic-gate pbcp = NULL; 11450Sstevel@tonic-gate for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) { 11460Sstevel@tonic-gate if (id == bcp->bc_id) 11470Sstevel@tonic-gate break; 11480Sstevel@tonic-gate pbcp = bcp; 11490Sstevel@tonic-gate } 11500Sstevel@tonic-gate if (bcp) { 11510Sstevel@tonic-gate if (bcp->bc_executor != NULL) { 11520Sstevel@tonic-gate if (bcp->bc_executor != curthread) { 11530Sstevel@tonic-gate cv_wait(&bcall_cv, &strbcall_lock); 11540Sstevel@tonic-gate goto again; 11550Sstevel@tonic-gate } 11560Sstevel@tonic-gate } else { 11570Sstevel@tonic-gate if (pbcp) 11580Sstevel@tonic-gate pbcp->bc_next = bcp->bc_next; 11590Sstevel@tonic-gate else 11600Sstevel@tonic-gate strbcalls.bc_head = bcp->bc_next; 11610Sstevel@tonic-gate if (bcp == strbcalls.bc_tail) 11620Sstevel@tonic-gate strbcalls.bc_tail = pbcp; 11630Sstevel@tonic-gate kmem_free(bcp, sizeof (strbufcall_t)); 11640Sstevel@tonic-gate } 11650Sstevel@tonic-gate } 11660Sstevel@tonic-gate mutex_exit(&strbcall_lock); 11670Sstevel@tonic-gate } 11680Sstevel@tonic-gate 11690Sstevel@tonic-gate /* 11700Sstevel@tonic-gate * Duplicate a message block by block (uses dupb), returning 11710Sstevel@tonic-gate * a pointer to the duplicate message. 11720Sstevel@tonic-gate * Returns a non-NULL value only if the entire message 11730Sstevel@tonic-gate * was dup'd. 11740Sstevel@tonic-gate */ 11750Sstevel@tonic-gate mblk_t * 11760Sstevel@tonic-gate dupmsg(mblk_t *bp) 11770Sstevel@tonic-gate { 11780Sstevel@tonic-gate mblk_t *head, *nbp; 11790Sstevel@tonic-gate 11800Sstevel@tonic-gate if (!bp || !(nbp = head = dupb(bp))) 11810Sstevel@tonic-gate return (NULL); 11820Sstevel@tonic-gate 11830Sstevel@tonic-gate while (bp->b_cont) { 11840Sstevel@tonic-gate if (!(nbp->b_cont = dupb(bp->b_cont))) { 11850Sstevel@tonic-gate freemsg(head); 11860Sstevel@tonic-gate return (NULL); 11870Sstevel@tonic-gate } 11880Sstevel@tonic-gate nbp = nbp->b_cont; 11890Sstevel@tonic-gate bp = bp->b_cont; 11900Sstevel@tonic-gate } 11910Sstevel@tonic-gate return (head); 11920Sstevel@tonic-gate } 11930Sstevel@tonic-gate 11940Sstevel@tonic-gate #define DUPB_NOLOAN(bp) \ 11950Sstevel@tonic-gate ((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \ 11960Sstevel@tonic-gate copyb((bp)) : dupb((bp))) 11970Sstevel@tonic-gate 11980Sstevel@tonic-gate mblk_t * 11990Sstevel@tonic-gate dupmsg_noloan(mblk_t *bp) 12000Sstevel@tonic-gate { 12010Sstevel@tonic-gate mblk_t *head, *nbp; 12020Sstevel@tonic-gate 12030Sstevel@tonic-gate if (bp == NULL || DB_TYPE(bp) != M_DATA || 12040Sstevel@tonic-gate ((nbp = head = DUPB_NOLOAN(bp)) == NULL)) 12050Sstevel@tonic-gate return (NULL); 12060Sstevel@tonic-gate 12070Sstevel@tonic-gate while (bp->b_cont) { 12080Sstevel@tonic-gate if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) { 12090Sstevel@tonic-gate freemsg(head); 12100Sstevel@tonic-gate return (NULL); 12110Sstevel@tonic-gate } 12120Sstevel@tonic-gate nbp = nbp->b_cont; 12130Sstevel@tonic-gate bp = bp->b_cont; 12140Sstevel@tonic-gate } 12150Sstevel@tonic-gate return (head); 12160Sstevel@tonic-gate } 12170Sstevel@tonic-gate 12180Sstevel@tonic-gate /* 12190Sstevel@tonic-gate * Copy data from message and data block to newly allocated message and 12200Sstevel@tonic-gate * data block. Returns new message block pointer, or NULL if error. 12210Sstevel@tonic-gate * The alignment of rptr (w.r.t. word alignment) will be the same in the copy 12220Sstevel@tonic-gate * as in the original even when db_base is not word aligned. (bug 1052877) 12230Sstevel@tonic-gate */ 12240Sstevel@tonic-gate mblk_t * 12250Sstevel@tonic-gate copyb(mblk_t *bp) 12260Sstevel@tonic-gate { 12270Sstevel@tonic-gate mblk_t *nbp; 12280Sstevel@tonic-gate dblk_t *dp, *ndp; 12290Sstevel@tonic-gate uchar_t *base; 12300Sstevel@tonic-gate size_t size; 12310Sstevel@tonic-gate size_t unaligned; 12320Sstevel@tonic-gate 12330Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr); 12340Sstevel@tonic-gate 12350Sstevel@tonic-gate dp = bp->b_datap; 12360Sstevel@tonic-gate if (dp->db_fthdr != NULL) 12370Sstevel@tonic-gate STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0); 12380Sstevel@tonic-gate 12390Sstevel@tonic-gate /* 12400Sstevel@tonic-gate * Special handling for Multidata message; this should be 12410Sstevel@tonic-gate * removed once a copy-callback routine is made available. 12420Sstevel@tonic-gate */ 12430Sstevel@tonic-gate if (dp->db_type == M_MULTIDATA) { 12440Sstevel@tonic-gate cred_t *cr; 12450Sstevel@tonic-gate 12460Sstevel@tonic-gate if ((nbp = mmd_copy(bp, KM_NOSLEEP)) == NULL) 12470Sstevel@tonic-gate return (NULL); 12480Sstevel@tonic-gate 12490Sstevel@tonic-gate nbp->b_flag = bp->b_flag; 12500Sstevel@tonic-gate nbp->b_band = bp->b_band; 12510Sstevel@tonic-gate ndp = nbp->b_datap; 12520Sstevel@tonic-gate 12530Sstevel@tonic-gate /* See comments below on potential issues. */ 12540Sstevel@tonic-gate STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1); 12550Sstevel@tonic-gate 12560Sstevel@tonic-gate ASSERT(ndp->db_type == dp->db_type); 12570Sstevel@tonic-gate cr = dp->db_credp; 12580Sstevel@tonic-gate if (cr != NULL) 12590Sstevel@tonic-gate crhold(ndp->db_credp = cr); 12600Sstevel@tonic-gate ndp->db_cpid = dp->db_cpid; 12610Sstevel@tonic-gate return (nbp); 12620Sstevel@tonic-gate } 12630Sstevel@tonic-gate 12640Sstevel@tonic-gate size = dp->db_lim - dp->db_base; 12650Sstevel@tonic-gate unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t)); 12660Sstevel@tonic-gate if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL) 12670Sstevel@tonic-gate return (NULL); 12680Sstevel@tonic-gate nbp->b_flag = bp->b_flag; 12690Sstevel@tonic-gate nbp->b_band = bp->b_band; 12700Sstevel@tonic-gate ndp = nbp->b_datap; 12710Sstevel@tonic-gate 12720Sstevel@tonic-gate /* 12730Sstevel@tonic-gate * Well, here is a potential issue. If we are trying to 12740Sstevel@tonic-gate * trace a flow, and we copy the message, we might lose 12750Sstevel@tonic-gate * information about where this message might have been. 12760Sstevel@tonic-gate * So we should inherit the FT data. On the other hand, 12770Sstevel@tonic-gate * a user might be interested only in alloc to free data. 12780Sstevel@tonic-gate * So I guess the real answer is to provide a tunable. 12790Sstevel@tonic-gate */ 12800Sstevel@tonic-gate STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1); 12810Sstevel@tonic-gate 12820Sstevel@tonic-gate base = ndp->db_base + unaligned; 12830Sstevel@tonic-gate bcopy(dp->db_base, ndp->db_base + unaligned, size); 12840Sstevel@tonic-gate 12850Sstevel@tonic-gate nbp->b_rptr = base + (bp->b_rptr - dp->db_base); 12860Sstevel@tonic-gate nbp->b_wptr = nbp->b_rptr + MBLKL(bp); 12870Sstevel@tonic-gate 12880Sstevel@tonic-gate return (nbp); 12890Sstevel@tonic-gate } 12900Sstevel@tonic-gate 12910Sstevel@tonic-gate /* 12920Sstevel@tonic-gate * Copy data from message to newly allocated message using new 12930Sstevel@tonic-gate * data blocks. Returns a pointer to the new message, or NULL if error. 12940Sstevel@tonic-gate */ 12950Sstevel@tonic-gate mblk_t * 12960Sstevel@tonic-gate copymsg(mblk_t *bp) 12970Sstevel@tonic-gate { 12980Sstevel@tonic-gate mblk_t *head, *nbp; 12990Sstevel@tonic-gate 13000Sstevel@tonic-gate if (!bp || !(nbp = head = copyb(bp))) 13010Sstevel@tonic-gate return (NULL); 13020Sstevel@tonic-gate 13030Sstevel@tonic-gate while (bp->b_cont) { 13040Sstevel@tonic-gate if (!(nbp->b_cont = copyb(bp->b_cont))) { 13050Sstevel@tonic-gate freemsg(head); 13060Sstevel@tonic-gate return (NULL); 13070Sstevel@tonic-gate } 13080Sstevel@tonic-gate nbp = nbp->b_cont; 13090Sstevel@tonic-gate bp = bp->b_cont; 13100Sstevel@tonic-gate } 13110Sstevel@tonic-gate return (head); 13120Sstevel@tonic-gate } 13130Sstevel@tonic-gate 13140Sstevel@tonic-gate /* 13150Sstevel@tonic-gate * link a message block to tail of message 13160Sstevel@tonic-gate */ 13170Sstevel@tonic-gate void 13180Sstevel@tonic-gate linkb(mblk_t *mp, mblk_t *bp) 13190Sstevel@tonic-gate { 13200Sstevel@tonic-gate ASSERT(mp && bp); 13210Sstevel@tonic-gate 13220Sstevel@tonic-gate for (; mp->b_cont; mp = mp->b_cont) 13230Sstevel@tonic-gate ; 13240Sstevel@tonic-gate mp->b_cont = bp; 13250Sstevel@tonic-gate } 13260Sstevel@tonic-gate 13270Sstevel@tonic-gate /* 13280Sstevel@tonic-gate * unlink a message block from head of message 13290Sstevel@tonic-gate * return pointer to new message. 13300Sstevel@tonic-gate * NULL if message becomes empty. 13310Sstevel@tonic-gate */ 13320Sstevel@tonic-gate mblk_t * 13330Sstevel@tonic-gate unlinkb(mblk_t *bp) 13340Sstevel@tonic-gate { 13350Sstevel@tonic-gate mblk_t *bp1; 13360Sstevel@tonic-gate 13370Sstevel@tonic-gate bp1 = bp->b_cont; 13380Sstevel@tonic-gate bp->b_cont = NULL; 13390Sstevel@tonic-gate return (bp1); 13400Sstevel@tonic-gate } 13410Sstevel@tonic-gate 13420Sstevel@tonic-gate /* 13430Sstevel@tonic-gate * remove a message block "bp" from message "mp" 13440Sstevel@tonic-gate * 13450Sstevel@tonic-gate * Return pointer to new message or NULL if no message remains. 13460Sstevel@tonic-gate * Return -1 if bp is not found in message. 13470Sstevel@tonic-gate */ 13480Sstevel@tonic-gate mblk_t * 13490Sstevel@tonic-gate rmvb(mblk_t *mp, mblk_t *bp) 13500Sstevel@tonic-gate { 13510Sstevel@tonic-gate mblk_t *tmp; 13520Sstevel@tonic-gate mblk_t *lastp = NULL; 13530Sstevel@tonic-gate 13540Sstevel@tonic-gate ASSERT(mp && bp); 13550Sstevel@tonic-gate for (tmp = mp; tmp; tmp = tmp->b_cont) { 13560Sstevel@tonic-gate if (tmp == bp) { 13570Sstevel@tonic-gate if (lastp) 13580Sstevel@tonic-gate lastp->b_cont = tmp->b_cont; 13590Sstevel@tonic-gate else 13600Sstevel@tonic-gate mp = tmp->b_cont; 13610Sstevel@tonic-gate tmp->b_cont = NULL; 13620Sstevel@tonic-gate return (mp); 13630Sstevel@tonic-gate } 13640Sstevel@tonic-gate lastp = tmp; 13650Sstevel@tonic-gate } 13660Sstevel@tonic-gate return ((mblk_t *)-1); 13670Sstevel@tonic-gate } 13680Sstevel@tonic-gate 13690Sstevel@tonic-gate /* 13700Sstevel@tonic-gate * Concatenate and align first len bytes of common 13710Sstevel@tonic-gate * message type. Len == -1, means concat everything. 13720Sstevel@tonic-gate * Returns 1 on success, 0 on failure 13730Sstevel@tonic-gate * After the pullup, mp points to the pulled up data. 13740Sstevel@tonic-gate */ 13750Sstevel@tonic-gate int 13760Sstevel@tonic-gate pullupmsg(mblk_t *mp, ssize_t len) 13770Sstevel@tonic-gate { 13780Sstevel@tonic-gate mblk_t *bp, *b_cont; 13790Sstevel@tonic-gate dblk_t *dbp; 13800Sstevel@tonic-gate ssize_t n; 13810Sstevel@tonic-gate 13820Sstevel@tonic-gate ASSERT(mp->b_datap->db_ref > 0); 13830Sstevel@tonic-gate ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 13840Sstevel@tonic-gate 13850Sstevel@tonic-gate /* 13860Sstevel@tonic-gate * We won't handle Multidata message, since it contains 13870Sstevel@tonic-gate * metadata which this function has no knowledge of; we 13880Sstevel@tonic-gate * assert on DEBUG, and return failure otherwise. 13890Sstevel@tonic-gate */ 13900Sstevel@tonic-gate ASSERT(mp->b_datap->db_type != M_MULTIDATA); 13910Sstevel@tonic-gate if (mp->b_datap->db_type == M_MULTIDATA) 13920Sstevel@tonic-gate return (0); 13930Sstevel@tonic-gate 13940Sstevel@tonic-gate if (len == -1) { 13950Sstevel@tonic-gate if (mp->b_cont == NULL && str_aligned(mp->b_rptr)) 13960Sstevel@tonic-gate return (1); 13970Sstevel@tonic-gate len = xmsgsize(mp); 13980Sstevel@tonic-gate } else { 13990Sstevel@tonic-gate ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr; 14000Sstevel@tonic-gate ASSERT(first_mblk_len >= 0); 14010Sstevel@tonic-gate /* 14020Sstevel@tonic-gate * If the length is less than that of the first mblk, 14030Sstevel@tonic-gate * we want to pull up the message into an aligned mblk. 14040Sstevel@tonic-gate * Though not part of the spec, some callers assume it. 14050Sstevel@tonic-gate */ 14060Sstevel@tonic-gate if (len <= first_mblk_len) { 14070Sstevel@tonic-gate if (str_aligned(mp->b_rptr)) 14080Sstevel@tonic-gate return (1); 14090Sstevel@tonic-gate len = first_mblk_len; 14100Sstevel@tonic-gate } else if (xmsgsize(mp) < len) 14110Sstevel@tonic-gate return (0); 14120Sstevel@tonic-gate } 14130Sstevel@tonic-gate 14140Sstevel@tonic-gate if ((bp = allocb_tmpl(len, mp)) == NULL) 14150Sstevel@tonic-gate return (0); 14160Sstevel@tonic-gate 14170Sstevel@tonic-gate dbp = bp->b_datap; 14180Sstevel@tonic-gate *bp = *mp; /* swap mblks so bp heads the old msg... */ 14190Sstevel@tonic-gate mp->b_datap = dbp; /* ... and mp heads the new message */ 14200Sstevel@tonic-gate mp->b_datap->db_mblk = mp; 14210Sstevel@tonic-gate bp->b_datap->db_mblk = bp; 14220Sstevel@tonic-gate mp->b_rptr = mp->b_wptr = dbp->db_base; 14230Sstevel@tonic-gate 14240Sstevel@tonic-gate do { 14250Sstevel@tonic-gate ASSERT(bp->b_datap->db_ref > 0); 14260Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr); 14270Sstevel@tonic-gate n = MIN(bp->b_wptr - bp->b_rptr, len); 14280Sstevel@tonic-gate bcopy(bp->b_rptr, mp->b_wptr, (size_t)n); 14290Sstevel@tonic-gate mp->b_wptr += n; 14300Sstevel@tonic-gate bp->b_rptr += n; 14310Sstevel@tonic-gate len -= n; 14320Sstevel@tonic-gate if (bp->b_rptr != bp->b_wptr) 14330Sstevel@tonic-gate break; 14340Sstevel@tonic-gate b_cont = bp->b_cont; 14350Sstevel@tonic-gate freeb(bp); 14360Sstevel@tonic-gate bp = b_cont; 14370Sstevel@tonic-gate } while (len && bp); 14380Sstevel@tonic-gate 14390Sstevel@tonic-gate mp->b_cont = bp; /* tack on whatever wasn't pulled up */ 14400Sstevel@tonic-gate 14410Sstevel@tonic-gate return (1); 14420Sstevel@tonic-gate } 14430Sstevel@tonic-gate 14440Sstevel@tonic-gate /* 14450Sstevel@tonic-gate * Concatenate and align at least the first len bytes of common message 14460Sstevel@tonic-gate * type. Len == -1 means concatenate everything. The original message is 14470Sstevel@tonic-gate * unaltered. Returns a pointer to a new message on success, otherwise 14480Sstevel@tonic-gate * returns NULL. 14490Sstevel@tonic-gate */ 14500Sstevel@tonic-gate mblk_t * 14510Sstevel@tonic-gate msgpullup(mblk_t *mp, ssize_t len) 14520Sstevel@tonic-gate { 14530Sstevel@tonic-gate mblk_t *newmp; 14540Sstevel@tonic-gate ssize_t totlen; 14550Sstevel@tonic-gate ssize_t n; 14560Sstevel@tonic-gate 14570Sstevel@tonic-gate /* 14580Sstevel@tonic-gate * We won't handle Multidata message, since it contains 14590Sstevel@tonic-gate * metadata which this function has no knowledge of; we 14600Sstevel@tonic-gate * assert on DEBUG, and return failure otherwise. 14610Sstevel@tonic-gate */ 14620Sstevel@tonic-gate ASSERT(mp->b_datap->db_type != M_MULTIDATA); 14630Sstevel@tonic-gate if (mp->b_datap->db_type == M_MULTIDATA) 14640Sstevel@tonic-gate return (NULL); 14650Sstevel@tonic-gate 14660Sstevel@tonic-gate totlen = xmsgsize(mp); 14670Sstevel@tonic-gate 14680Sstevel@tonic-gate if ((len > 0) && (len > totlen)) 14690Sstevel@tonic-gate return (NULL); 14700Sstevel@tonic-gate 14710Sstevel@tonic-gate /* 14720Sstevel@tonic-gate * Copy all of the first msg type into one new mblk, then dupmsg 14730Sstevel@tonic-gate * and link the rest onto this. 14740Sstevel@tonic-gate */ 14750Sstevel@tonic-gate 14760Sstevel@tonic-gate len = totlen; 14770Sstevel@tonic-gate 14780Sstevel@tonic-gate if ((newmp = allocb_tmpl(len, mp)) == NULL) 14790Sstevel@tonic-gate return (NULL); 14800Sstevel@tonic-gate 14810Sstevel@tonic-gate newmp->b_flag = mp->b_flag; 14820Sstevel@tonic-gate newmp->b_band = mp->b_band; 14830Sstevel@tonic-gate 14840Sstevel@tonic-gate while (len > 0) { 14850Sstevel@tonic-gate n = mp->b_wptr - mp->b_rptr; 14860Sstevel@tonic-gate ASSERT(n >= 0); /* allow zero-length mblk_t's */ 14870Sstevel@tonic-gate if (n > 0) 14880Sstevel@tonic-gate bcopy(mp->b_rptr, newmp->b_wptr, n); 14890Sstevel@tonic-gate newmp->b_wptr += n; 14900Sstevel@tonic-gate len -= n; 14910Sstevel@tonic-gate mp = mp->b_cont; 14920Sstevel@tonic-gate } 14930Sstevel@tonic-gate 14940Sstevel@tonic-gate if (mp != NULL) { 14950Sstevel@tonic-gate newmp->b_cont = dupmsg(mp); 14960Sstevel@tonic-gate if (newmp->b_cont == NULL) { 14970Sstevel@tonic-gate freemsg(newmp); 14980Sstevel@tonic-gate return (NULL); 14990Sstevel@tonic-gate } 15000Sstevel@tonic-gate } 15010Sstevel@tonic-gate 15020Sstevel@tonic-gate return (newmp); 15030Sstevel@tonic-gate } 15040Sstevel@tonic-gate 15050Sstevel@tonic-gate /* 15060Sstevel@tonic-gate * Trim bytes from message 15070Sstevel@tonic-gate * len > 0, trim from head 15080Sstevel@tonic-gate * len < 0, trim from tail 15090Sstevel@tonic-gate * Returns 1 on success, 0 on failure. 15100Sstevel@tonic-gate */ 15110Sstevel@tonic-gate int 15120Sstevel@tonic-gate adjmsg(mblk_t *mp, ssize_t len) 15130Sstevel@tonic-gate { 15140Sstevel@tonic-gate mblk_t *bp; 15150Sstevel@tonic-gate mblk_t *save_bp = NULL; 15160Sstevel@tonic-gate mblk_t *prev_bp; 15170Sstevel@tonic-gate mblk_t *bcont; 15180Sstevel@tonic-gate unsigned char type; 15190Sstevel@tonic-gate ssize_t n; 15200Sstevel@tonic-gate int fromhead; 15210Sstevel@tonic-gate int first; 15220Sstevel@tonic-gate 15230Sstevel@tonic-gate ASSERT(mp != NULL); 15240Sstevel@tonic-gate /* 15250Sstevel@tonic-gate * We won't handle Multidata message, since it contains 15260Sstevel@tonic-gate * metadata which this function has no knowledge of; we 15270Sstevel@tonic-gate * assert on DEBUG, and return failure otherwise. 15280Sstevel@tonic-gate */ 15290Sstevel@tonic-gate ASSERT(mp->b_datap->db_type != M_MULTIDATA); 15300Sstevel@tonic-gate if (mp->b_datap->db_type == M_MULTIDATA) 15310Sstevel@tonic-gate return (0); 15320Sstevel@tonic-gate 15330Sstevel@tonic-gate if (len < 0) { 15340Sstevel@tonic-gate fromhead = 0; 15350Sstevel@tonic-gate len = -len; 15360Sstevel@tonic-gate } else { 15370Sstevel@tonic-gate fromhead = 1; 15380Sstevel@tonic-gate } 15390Sstevel@tonic-gate 15400Sstevel@tonic-gate if (xmsgsize(mp) < len) 15410Sstevel@tonic-gate return (0); 15420Sstevel@tonic-gate 15430Sstevel@tonic-gate 15440Sstevel@tonic-gate if (fromhead) { 15450Sstevel@tonic-gate first = 1; 15460Sstevel@tonic-gate while (len) { 15470Sstevel@tonic-gate ASSERT(mp->b_wptr >= mp->b_rptr); 15480Sstevel@tonic-gate n = MIN(mp->b_wptr - mp->b_rptr, len); 15490Sstevel@tonic-gate mp->b_rptr += n; 15500Sstevel@tonic-gate len -= n; 15510Sstevel@tonic-gate 15520Sstevel@tonic-gate /* 15530Sstevel@tonic-gate * If this is not the first zero length 15540Sstevel@tonic-gate * message remove it 15550Sstevel@tonic-gate */ 15560Sstevel@tonic-gate if (!first && (mp->b_wptr == mp->b_rptr)) { 15570Sstevel@tonic-gate bcont = mp->b_cont; 15580Sstevel@tonic-gate freeb(mp); 15590Sstevel@tonic-gate mp = save_bp->b_cont = bcont; 15600Sstevel@tonic-gate } else { 15610Sstevel@tonic-gate save_bp = mp; 15620Sstevel@tonic-gate mp = mp->b_cont; 15630Sstevel@tonic-gate } 15640Sstevel@tonic-gate first = 0; 15650Sstevel@tonic-gate } 15660Sstevel@tonic-gate } else { 15670Sstevel@tonic-gate type = mp->b_datap->db_type; 15680Sstevel@tonic-gate while (len) { 15690Sstevel@tonic-gate bp = mp; 15700Sstevel@tonic-gate save_bp = NULL; 15710Sstevel@tonic-gate 15720Sstevel@tonic-gate /* 15730Sstevel@tonic-gate * Find the last message of same type 15740Sstevel@tonic-gate */ 15750Sstevel@tonic-gate 15760Sstevel@tonic-gate while (bp && bp->b_datap->db_type == type) { 15770Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr); 15780Sstevel@tonic-gate prev_bp = save_bp; 15790Sstevel@tonic-gate save_bp = bp; 15800Sstevel@tonic-gate bp = bp->b_cont; 15810Sstevel@tonic-gate } 15820Sstevel@tonic-gate if (save_bp == NULL) 15830Sstevel@tonic-gate break; 15840Sstevel@tonic-gate n = MIN(save_bp->b_wptr - save_bp->b_rptr, len); 15850Sstevel@tonic-gate save_bp->b_wptr -= n; 15860Sstevel@tonic-gate len -= n; 15870Sstevel@tonic-gate 15880Sstevel@tonic-gate /* 15890Sstevel@tonic-gate * If this is not the first message 15900Sstevel@tonic-gate * and we have taken away everything 15910Sstevel@tonic-gate * from this message, remove it 15920Sstevel@tonic-gate */ 15930Sstevel@tonic-gate 15940Sstevel@tonic-gate if ((save_bp != mp) && 15950Sstevel@tonic-gate (save_bp->b_wptr == save_bp->b_rptr)) { 15960Sstevel@tonic-gate bcont = save_bp->b_cont; 15970Sstevel@tonic-gate freeb(save_bp); 15980Sstevel@tonic-gate prev_bp->b_cont = bcont; 15990Sstevel@tonic-gate } 16000Sstevel@tonic-gate } 16010Sstevel@tonic-gate } 16020Sstevel@tonic-gate return (1); 16030Sstevel@tonic-gate } 16040Sstevel@tonic-gate 16050Sstevel@tonic-gate /* 16060Sstevel@tonic-gate * get number of data bytes in message 16070Sstevel@tonic-gate */ 16080Sstevel@tonic-gate size_t 16090Sstevel@tonic-gate msgdsize(mblk_t *bp) 16100Sstevel@tonic-gate { 16110Sstevel@tonic-gate size_t count = 0; 16120Sstevel@tonic-gate 16130Sstevel@tonic-gate for (; bp; bp = bp->b_cont) 16140Sstevel@tonic-gate if (bp->b_datap->db_type == M_DATA) { 16150Sstevel@tonic-gate ASSERT(bp->b_wptr >= bp->b_rptr); 16160Sstevel@tonic-gate count += bp->b_wptr - bp->b_rptr; 16170Sstevel@tonic-gate } 16180Sstevel@tonic-gate return (count); 16190Sstevel@tonic-gate } 16200Sstevel@tonic-gate 16210Sstevel@tonic-gate /* 16220Sstevel@tonic-gate * Get a message off head of queue 16230Sstevel@tonic-gate * 16240Sstevel@tonic-gate * If queue has no buffers then mark queue 16250Sstevel@tonic-gate * with QWANTR. (queue wants to be read by 16260Sstevel@tonic-gate * someone when data becomes available) 16270Sstevel@tonic-gate * 16280Sstevel@tonic-gate * If there is something to take off then do so. 16290Sstevel@tonic-gate * If queue falls below hi water mark turn off QFULL 16300Sstevel@tonic-gate * flag. Decrement weighted count of queue. 16310Sstevel@tonic-gate * Also turn off QWANTR because queue is being read. 16320Sstevel@tonic-gate * 16330Sstevel@tonic-gate * The queue count is maintained on a per-band basis. 16340Sstevel@tonic-gate * Priority band 0 (normal messages) uses q_count, 16350Sstevel@tonic-gate * q_lowat, etc. Non-zero priority bands use the 16360Sstevel@tonic-gate * fields in their respective qband structures 16370Sstevel@tonic-gate * (qb_count, qb_lowat, etc.) All messages appear 16380Sstevel@tonic-gate * on the same list, linked via their b_next pointers. 16390Sstevel@tonic-gate * q_first is the head of the list. q_count does 16400Sstevel@tonic-gate * not reflect the size of all the messages on the 16410Sstevel@tonic-gate * queue. It only reflects those messages in the 16420Sstevel@tonic-gate * normal band of flow. The one exception to this 16430Sstevel@tonic-gate * deals with high priority messages. They are in 16440Sstevel@tonic-gate * their own conceptual "band", but are accounted 16450Sstevel@tonic-gate * against q_count. 16460Sstevel@tonic-gate * 16470Sstevel@tonic-gate * If queue count is below the lo water mark and QWANTW 16480Sstevel@tonic-gate * is set, enable the closest backq which has a service 16490Sstevel@tonic-gate * procedure and turn off the QWANTW flag. 16500Sstevel@tonic-gate * 16510Sstevel@tonic-gate * getq could be built on top of rmvq, but isn't because 16520Sstevel@tonic-gate * of performance considerations. 16530Sstevel@tonic-gate * 16540Sstevel@tonic-gate * A note on the use of q_count and q_mblkcnt: 16550Sstevel@tonic-gate * q_count is the traditional byte count for messages that 16560Sstevel@tonic-gate * have been put on a queue. Documentation tells us that 16570Sstevel@tonic-gate * we shouldn't rely on that count, but some drivers/modules 16580Sstevel@tonic-gate * do. What was needed, however, is a mechanism to prevent 16590Sstevel@tonic-gate * runaway streams from consuming all of the resources, 16600Sstevel@tonic-gate * and particularly be able to flow control zero-length 16610Sstevel@tonic-gate * messages. q_mblkcnt is used for this purpose. It 16620Sstevel@tonic-gate * counts the number of mblk's that are being put on 16630Sstevel@tonic-gate * the queue. The intention here, is that each mblk should 16640Sstevel@tonic-gate * contain one byte of data and, for the purpose of 16650Sstevel@tonic-gate * flow-control, logically does. A queue will become 16660Sstevel@tonic-gate * full when EITHER of these values (q_count and q_mblkcnt) 16670Sstevel@tonic-gate * reach the highwater mark. It will clear when BOTH 16680Sstevel@tonic-gate * of them drop below the highwater mark. And it will 16690Sstevel@tonic-gate * backenable when BOTH of them drop below the lowwater 16700Sstevel@tonic-gate * mark. 16710Sstevel@tonic-gate * With this algorithm, a driver/module might be able 16720Sstevel@tonic-gate * to find a reasonably accurate q_count, and the 16730Sstevel@tonic-gate * framework can still try and limit resource usage. 16740Sstevel@tonic-gate */ 16750Sstevel@tonic-gate mblk_t * 16760Sstevel@tonic-gate getq(queue_t *q) 16770Sstevel@tonic-gate { 16780Sstevel@tonic-gate mblk_t *bp; 1679235Smicheng uchar_t band = 0; 16800Sstevel@tonic-gate 16810Sstevel@tonic-gate bp = getq_noenab(q); 16820Sstevel@tonic-gate if (bp != NULL) 16830Sstevel@tonic-gate band = bp->b_band; 16840Sstevel@tonic-gate 16850Sstevel@tonic-gate /* 16860Sstevel@tonic-gate * Inlined from qbackenable(). 16870Sstevel@tonic-gate * Quick check without holding the lock. 16880Sstevel@tonic-gate */ 16890Sstevel@tonic-gate if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0) 16900Sstevel@tonic-gate return (bp); 16910Sstevel@tonic-gate 16920Sstevel@tonic-gate qbackenable(q, band); 16930Sstevel@tonic-gate return (bp); 16940Sstevel@tonic-gate } 16950Sstevel@tonic-gate 16960Sstevel@tonic-gate /* 1697741Smasputra * Calculate number of data bytes in a single data message block taking 1698741Smasputra * multidata messages into account. 1699741Smasputra */ 1700741Smasputra 1701741Smasputra #define ADD_MBLK_SIZE(mp, size) \ 1702741Smasputra if (DB_TYPE(mp) != M_MULTIDATA) { \ 1703741Smasputra (size) += MBLKL(mp); \ 1704741Smasputra } else { \ 1705741Smasputra uint_t pinuse; \ 1706741Smasputra \ 1707741Smasputra mmd_getsize(mmd_getmultidata(mp), NULL, &pinuse); \ 1708741Smasputra (size) += pinuse; \ 1709741Smasputra } 1710741Smasputra 1711741Smasputra /* 17120Sstevel@tonic-gate * Like getq() but does not backenable. This is used by the stream 17130Sstevel@tonic-gate * head when a putback() is likely. The caller must call qbackenable() 17140Sstevel@tonic-gate * after it is done with accessing the queue. 17150Sstevel@tonic-gate */ 17160Sstevel@tonic-gate mblk_t * 17170Sstevel@tonic-gate getq_noenab(queue_t *q) 17180Sstevel@tonic-gate { 17190Sstevel@tonic-gate mblk_t *bp; 17200Sstevel@tonic-gate mblk_t *tmp; 17210Sstevel@tonic-gate qband_t *qbp; 17220Sstevel@tonic-gate kthread_id_t freezer; 17230Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0; 17240Sstevel@tonic-gate 17250Sstevel@tonic-gate /* freezestr should allow its caller to call getq/putq */ 17260Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 17270Sstevel@tonic-gate if (freezer == curthread) { 17280Sstevel@tonic-gate ASSERT(frozenstr(q)); 17290Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 17300Sstevel@tonic-gate } else 17310Sstevel@tonic-gate mutex_enter(QLOCK(q)); 17320Sstevel@tonic-gate 17330Sstevel@tonic-gate if ((bp = q->q_first) == 0) { 17340Sstevel@tonic-gate q->q_flag |= QWANTR; 17350Sstevel@tonic-gate } else { 17360Sstevel@tonic-gate if ((q->q_first = bp->b_next) == NULL) 17370Sstevel@tonic-gate q->q_last = NULL; 17380Sstevel@tonic-gate else 17390Sstevel@tonic-gate q->q_first->b_prev = NULL; 17400Sstevel@tonic-gate 17410Sstevel@tonic-gate /* Get message byte count for q_count accounting */ 17420Sstevel@tonic-gate for (tmp = bp; tmp; tmp = tmp->b_cont) { 1743741Smasputra ADD_MBLK_SIZE(tmp, bytecnt); 17440Sstevel@tonic-gate mblkcnt++; 17450Sstevel@tonic-gate } 17460Sstevel@tonic-gate 17470Sstevel@tonic-gate if (bp->b_band == 0) { 17480Sstevel@tonic-gate q->q_count -= bytecnt; 17490Sstevel@tonic-gate q->q_mblkcnt -= mblkcnt; 17500Sstevel@tonic-gate if ((q->q_count < q->q_hiwat) && 17510Sstevel@tonic-gate (q->q_mblkcnt < q->q_hiwat)) { 17520Sstevel@tonic-gate q->q_flag &= ~QFULL; 17530Sstevel@tonic-gate } 17540Sstevel@tonic-gate } else { 17550Sstevel@tonic-gate int i; 17560Sstevel@tonic-gate 17570Sstevel@tonic-gate ASSERT(bp->b_band <= q->q_nband); 17580Sstevel@tonic-gate ASSERT(q->q_bandp != NULL); 17590Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 17600Sstevel@tonic-gate qbp = q->q_bandp; 17610Sstevel@tonic-gate i = bp->b_band; 17620Sstevel@tonic-gate while (--i > 0) 17630Sstevel@tonic-gate qbp = qbp->qb_next; 17640Sstevel@tonic-gate if (qbp->qb_first == qbp->qb_last) { 17650Sstevel@tonic-gate qbp->qb_first = NULL; 17660Sstevel@tonic-gate qbp->qb_last = NULL; 17670Sstevel@tonic-gate } else { 17680Sstevel@tonic-gate qbp->qb_first = bp->b_next; 17690Sstevel@tonic-gate } 17700Sstevel@tonic-gate qbp->qb_count -= bytecnt; 17710Sstevel@tonic-gate qbp->qb_mblkcnt -= mblkcnt; 17720Sstevel@tonic-gate if ((qbp->qb_count < qbp->qb_hiwat) && 17730Sstevel@tonic-gate (qbp->qb_mblkcnt < qbp->qb_hiwat)) { 17740Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL; 17750Sstevel@tonic-gate } 17760Sstevel@tonic-gate } 17770Sstevel@tonic-gate q->q_flag &= ~QWANTR; 17780Sstevel@tonic-gate bp->b_next = NULL; 17790Sstevel@tonic-gate bp->b_prev = NULL; 17800Sstevel@tonic-gate } 17810Sstevel@tonic-gate if (freezer != curthread) 17820Sstevel@tonic-gate mutex_exit(QLOCK(q)); 17830Sstevel@tonic-gate 17840Sstevel@tonic-gate STR_FTEVENT_MSG(bp, q, FTEV_GETQ, NULL); 17850Sstevel@tonic-gate 17860Sstevel@tonic-gate return (bp); 17870Sstevel@tonic-gate } 17880Sstevel@tonic-gate 17890Sstevel@tonic-gate /* 17900Sstevel@tonic-gate * Determine if a backenable is needed after removing a message in the 17910Sstevel@tonic-gate * specified band. 17920Sstevel@tonic-gate * NOTE: This routine assumes that something like getq_noenab() has been 17930Sstevel@tonic-gate * already called. 17940Sstevel@tonic-gate * 17950Sstevel@tonic-gate * For the read side it is ok to hold sd_lock across calling this (and the 17960Sstevel@tonic-gate * stream head often does). 17970Sstevel@tonic-gate * But for the write side strwakeq might be invoked and it acquires sd_lock. 17980Sstevel@tonic-gate */ 17990Sstevel@tonic-gate void 1800235Smicheng qbackenable(queue_t *q, uchar_t band) 18010Sstevel@tonic-gate { 18020Sstevel@tonic-gate int backenab = 0; 18030Sstevel@tonic-gate qband_t *qbp; 18040Sstevel@tonic-gate kthread_id_t freezer; 18050Sstevel@tonic-gate 18060Sstevel@tonic-gate ASSERT(q); 18070Sstevel@tonic-gate ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock)); 18080Sstevel@tonic-gate 18090Sstevel@tonic-gate /* 18100Sstevel@tonic-gate * Quick check without holding the lock. 18110Sstevel@tonic-gate * OK since after getq() has lowered the q_count these flags 18120Sstevel@tonic-gate * would not change unless either the qbackenable() is done by 18130Sstevel@tonic-gate * another thread (which is ok) or the queue has gotten QFULL 18140Sstevel@tonic-gate * in which case another backenable will take place when the queue 18150Sstevel@tonic-gate * drops below q_lowat. 18160Sstevel@tonic-gate */ 18170Sstevel@tonic-gate if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0) 18180Sstevel@tonic-gate return; 18190Sstevel@tonic-gate 18200Sstevel@tonic-gate /* freezestr should allow its caller to call getq/putq */ 18210Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 18220Sstevel@tonic-gate if (freezer == curthread) { 18230Sstevel@tonic-gate ASSERT(frozenstr(q)); 18240Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 18250Sstevel@tonic-gate } else 18260Sstevel@tonic-gate mutex_enter(QLOCK(q)); 18270Sstevel@tonic-gate 18280Sstevel@tonic-gate if (band == 0) { 18290Sstevel@tonic-gate if (q->q_lowat == 0 || (q->q_count < q->q_lowat && 18300Sstevel@tonic-gate q->q_mblkcnt < q->q_lowat)) { 18310Sstevel@tonic-gate backenab = q->q_flag & (QWANTW|QWANTWSYNC); 18320Sstevel@tonic-gate } 18330Sstevel@tonic-gate } else { 18340Sstevel@tonic-gate int i; 18350Sstevel@tonic-gate 18360Sstevel@tonic-gate ASSERT((unsigned)band <= q->q_nband); 18370Sstevel@tonic-gate ASSERT(q->q_bandp != NULL); 18380Sstevel@tonic-gate 18390Sstevel@tonic-gate qbp = q->q_bandp; 18400Sstevel@tonic-gate i = band; 18410Sstevel@tonic-gate while (--i > 0) 18420Sstevel@tonic-gate qbp = qbp->qb_next; 18430Sstevel@tonic-gate 18440Sstevel@tonic-gate if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat && 18450Sstevel@tonic-gate qbp->qb_mblkcnt < qbp->qb_lowat)) { 18460Sstevel@tonic-gate backenab = qbp->qb_flag & QB_WANTW; 18470Sstevel@tonic-gate } 18480Sstevel@tonic-gate } 18490Sstevel@tonic-gate 18500Sstevel@tonic-gate if (backenab == 0) { 18510Sstevel@tonic-gate if (freezer != curthread) 18520Sstevel@tonic-gate mutex_exit(QLOCK(q)); 18530Sstevel@tonic-gate return; 18540Sstevel@tonic-gate } 18550Sstevel@tonic-gate 18560Sstevel@tonic-gate /* Have to drop the lock across strwakeq and backenable */ 18570Sstevel@tonic-gate if (backenab & QWANTWSYNC) 18580Sstevel@tonic-gate q->q_flag &= ~QWANTWSYNC; 18590Sstevel@tonic-gate if (backenab & (QWANTW|QB_WANTW)) { 18600Sstevel@tonic-gate if (band != 0) 18610Sstevel@tonic-gate qbp->qb_flag &= ~QB_WANTW; 18620Sstevel@tonic-gate else { 18630Sstevel@tonic-gate q->q_flag &= ~QWANTW; 18640Sstevel@tonic-gate } 18650Sstevel@tonic-gate } 18660Sstevel@tonic-gate 18670Sstevel@tonic-gate if (freezer != curthread) 18680Sstevel@tonic-gate mutex_exit(QLOCK(q)); 18690Sstevel@tonic-gate 18700Sstevel@tonic-gate if (backenab & QWANTWSYNC) 18710Sstevel@tonic-gate strwakeq(q, QWANTWSYNC); 18720Sstevel@tonic-gate if (backenab & (QWANTW|QB_WANTW)) 18730Sstevel@tonic-gate backenable(q, band); 18740Sstevel@tonic-gate } 18750Sstevel@tonic-gate 18760Sstevel@tonic-gate /* 18770Sstevel@tonic-gate * Remove a message from a queue. The queue count and other 18780Sstevel@tonic-gate * flow control parameters are adjusted and the back queue 18790Sstevel@tonic-gate * enabled if necessary. 18800Sstevel@tonic-gate * 18810Sstevel@tonic-gate * rmvq can be called with the stream frozen, but other utility functions 18820Sstevel@tonic-gate * holding QLOCK, and by streams modules without any locks/frozen. 18830Sstevel@tonic-gate */ 18840Sstevel@tonic-gate void 18850Sstevel@tonic-gate rmvq(queue_t *q, mblk_t *mp) 18860Sstevel@tonic-gate { 18870Sstevel@tonic-gate ASSERT(mp != NULL); 18880Sstevel@tonic-gate 18890Sstevel@tonic-gate rmvq_noenab(q, mp); 18900Sstevel@tonic-gate if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) { 18910Sstevel@tonic-gate /* 18920Sstevel@tonic-gate * qbackenable can handle a frozen stream but not a "random" 18930Sstevel@tonic-gate * qlock being held. Drop lock across qbackenable. 18940Sstevel@tonic-gate */ 18950Sstevel@tonic-gate mutex_exit(QLOCK(q)); 18960Sstevel@tonic-gate qbackenable(q, mp->b_band); 18970Sstevel@tonic-gate mutex_enter(QLOCK(q)); 18980Sstevel@tonic-gate } else { 18990Sstevel@tonic-gate qbackenable(q, mp->b_band); 19000Sstevel@tonic-gate } 19010Sstevel@tonic-gate } 19020Sstevel@tonic-gate 19030Sstevel@tonic-gate /* 19040Sstevel@tonic-gate * Like rmvq() but without any backenabling. 19050Sstevel@tonic-gate * This exists to handle SR_CONSOL_DATA in strrput(). 19060Sstevel@tonic-gate */ 19070Sstevel@tonic-gate void 19080Sstevel@tonic-gate rmvq_noenab(queue_t *q, mblk_t *mp) 19090Sstevel@tonic-gate { 19100Sstevel@tonic-gate mblk_t *tmp; 19110Sstevel@tonic-gate int i; 19120Sstevel@tonic-gate qband_t *qbp = NULL; 19130Sstevel@tonic-gate kthread_id_t freezer; 19140Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0; 19150Sstevel@tonic-gate 19160Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 19170Sstevel@tonic-gate if (freezer == curthread) { 19180Sstevel@tonic-gate ASSERT(frozenstr(q)); 19190Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 19200Sstevel@tonic-gate } else if (MUTEX_HELD(QLOCK(q))) { 19210Sstevel@tonic-gate /* Don't drop lock on exit */ 19220Sstevel@tonic-gate freezer = curthread; 19230Sstevel@tonic-gate } else 19240Sstevel@tonic-gate mutex_enter(QLOCK(q)); 19250Sstevel@tonic-gate 19260Sstevel@tonic-gate ASSERT(mp->b_band <= q->q_nband); 19270Sstevel@tonic-gate if (mp->b_band != 0) { /* Adjust band pointers */ 19280Sstevel@tonic-gate ASSERT(q->q_bandp != NULL); 19290Sstevel@tonic-gate qbp = q->q_bandp; 19300Sstevel@tonic-gate i = mp->b_band; 19310Sstevel@tonic-gate while (--i > 0) 19320Sstevel@tonic-gate qbp = qbp->qb_next; 19330Sstevel@tonic-gate if (mp == qbp->qb_first) { 19340Sstevel@tonic-gate if (mp->b_next && mp->b_band == mp->b_next->b_band) 19350Sstevel@tonic-gate qbp->qb_first = mp->b_next; 19360Sstevel@tonic-gate else 19370Sstevel@tonic-gate qbp->qb_first = NULL; 19380Sstevel@tonic-gate } 19390Sstevel@tonic-gate if (mp == qbp->qb_last) { 19400Sstevel@tonic-gate if (mp->b_prev && mp->b_band == mp->b_prev->b_band) 19410Sstevel@tonic-gate qbp->qb_last = mp->b_prev; 19420Sstevel@tonic-gate else 19430Sstevel@tonic-gate qbp->qb_last = NULL; 19440Sstevel@tonic-gate } 19450Sstevel@tonic-gate } 19460Sstevel@tonic-gate 19470Sstevel@tonic-gate /* 19480Sstevel@tonic-gate * Remove the message from the list. 19490Sstevel@tonic-gate */ 19500Sstevel@tonic-gate if (mp->b_prev) 19510Sstevel@tonic-gate mp->b_prev->b_next = mp->b_next; 19520Sstevel@tonic-gate else 19530Sstevel@tonic-gate q->q_first = mp->b_next; 19540Sstevel@tonic-gate if (mp->b_next) 19550Sstevel@tonic-gate mp->b_next->b_prev = mp->b_prev; 19560Sstevel@tonic-gate else 19570Sstevel@tonic-gate q->q_last = mp->b_prev; 19580Sstevel@tonic-gate mp->b_next = NULL; 19590Sstevel@tonic-gate mp->b_prev = NULL; 19600Sstevel@tonic-gate 19610Sstevel@tonic-gate /* Get the size of the message for q_count accounting */ 19620Sstevel@tonic-gate for (tmp = mp; tmp; tmp = tmp->b_cont) { 1963741Smasputra ADD_MBLK_SIZE(tmp, bytecnt); 19640Sstevel@tonic-gate mblkcnt++; 19650Sstevel@tonic-gate } 19660Sstevel@tonic-gate 19670Sstevel@tonic-gate if (mp->b_band == 0) { /* Perform q_count accounting */ 19680Sstevel@tonic-gate q->q_count -= bytecnt; 19690Sstevel@tonic-gate q->q_mblkcnt -= mblkcnt; 19700Sstevel@tonic-gate if ((q->q_count < q->q_hiwat) && 19710Sstevel@tonic-gate (q->q_mblkcnt < q->q_hiwat)) { 19720Sstevel@tonic-gate q->q_flag &= ~QFULL; 19730Sstevel@tonic-gate } 19740Sstevel@tonic-gate } else { /* Perform qb_count accounting */ 19750Sstevel@tonic-gate qbp->qb_count -= bytecnt; 19760Sstevel@tonic-gate qbp->qb_mblkcnt -= mblkcnt; 19770Sstevel@tonic-gate if ((qbp->qb_count < qbp->qb_hiwat) && 19780Sstevel@tonic-gate (qbp->qb_mblkcnt < qbp->qb_hiwat)) { 19790Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL; 19800Sstevel@tonic-gate } 19810Sstevel@tonic-gate } 19820Sstevel@tonic-gate if (freezer != curthread) 19830Sstevel@tonic-gate mutex_exit(QLOCK(q)); 19840Sstevel@tonic-gate 19850Sstevel@tonic-gate STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, NULL); 19860Sstevel@tonic-gate } 19870Sstevel@tonic-gate 19880Sstevel@tonic-gate /* 19890Sstevel@tonic-gate * Empty a queue. 19900Sstevel@tonic-gate * If flag is set, remove all messages. Otherwise, remove 19910Sstevel@tonic-gate * only non-control messages. If queue falls below its low 19920Sstevel@tonic-gate * water mark, and QWANTW is set, enable the nearest upstream 19930Sstevel@tonic-gate * service procedure. 19940Sstevel@tonic-gate * 19950Sstevel@tonic-gate * Historical note: when merging the M_FLUSH code in strrput with this 19960Sstevel@tonic-gate * code one difference was discovered. flushq did not have a check 19970Sstevel@tonic-gate * for q_lowat == 0 in the backenabling test. 19980Sstevel@tonic-gate * 19990Sstevel@tonic-gate * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed 20000Sstevel@tonic-gate * if one exists on the queue. 20010Sstevel@tonic-gate */ 20020Sstevel@tonic-gate void 20030Sstevel@tonic-gate flushq_common(queue_t *q, int flag, int pcproto_flag) 20040Sstevel@tonic-gate { 20050Sstevel@tonic-gate mblk_t *mp, *nmp; 20060Sstevel@tonic-gate qband_t *qbp; 20070Sstevel@tonic-gate int backenab = 0; 20080Sstevel@tonic-gate unsigned char bpri; 20090Sstevel@tonic-gate unsigned char qbf[NBAND]; /* band flushing backenable flags */ 20100Sstevel@tonic-gate 20110Sstevel@tonic-gate if (q->q_first == NULL) 20120Sstevel@tonic-gate return; 20130Sstevel@tonic-gate 20140Sstevel@tonic-gate mutex_enter(QLOCK(q)); 20150Sstevel@tonic-gate mp = q->q_first; 20160Sstevel@tonic-gate q->q_first = NULL; 20170Sstevel@tonic-gate q->q_last = NULL; 20180Sstevel@tonic-gate q->q_count = 0; 20190Sstevel@tonic-gate q->q_mblkcnt = 0; 20200Sstevel@tonic-gate for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) { 20210Sstevel@tonic-gate qbp->qb_first = NULL; 20220Sstevel@tonic-gate qbp->qb_last = NULL; 20230Sstevel@tonic-gate qbp->qb_count = 0; 20240Sstevel@tonic-gate qbp->qb_mblkcnt = 0; 20250Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL; 20260Sstevel@tonic-gate } 20270Sstevel@tonic-gate q->q_flag &= ~QFULL; 20280Sstevel@tonic-gate mutex_exit(QLOCK(q)); 20290Sstevel@tonic-gate while (mp) { 20300Sstevel@tonic-gate nmp = mp->b_next; 20310Sstevel@tonic-gate mp->b_next = mp->b_prev = NULL; 20320Sstevel@tonic-gate 20330Sstevel@tonic-gate STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, NULL); 20340Sstevel@tonic-gate 20350Sstevel@tonic-gate if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO)) 20360Sstevel@tonic-gate (void) putq(q, mp); 20370Sstevel@tonic-gate else if (flag || datamsg(mp->b_datap->db_type)) 20380Sstevel@tonic-gate freemsg(mp); 20390Sstevel@tonic-gate else 20400Sstevel@tonic-gate (void) putq(q, mp); 20410Sstevel@tonic-gate mp = nmp; 20420Sstevel@tonic-gate } 20430Sstevel@tonic-gate bpri = 1; 20440Sstevel@tonic-gate mutex_enter(QLOCK(q)); 20450Sstevel@tonic-gate for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) { 20460Sstevel@tonic-gate if ((qbp->qb_flag & QB_WANTW) && 20470Sstevel@tonic-gate (((qbp->qb_count < qbp->qb_lowat) && 20480Sstevel@tonic-gate (qbp->qb_mblkcnt < qbp->qb_lowat)) || 20490Sstevel@tonic-gate qbp->qb_lowat == 0)) { 20500Sstevel@tonic-gate qbp->qb_flag &= ~QB_WANTW; 20510Sstevel@tonic-gate backenab = 1; 20520Sstevel@tonic-gate qbf[bpri] = 1; 20530Sstevel@tonic-gate } else 20540Sstevel@tonic-gate qbf[bpri] = 0; 20550Sstevel@tonic-gate bpri++; 20560Sstevel@tonic-gate } 20570Sstevel@tonic-gate ASSERT(bpri == (unsigned char)(q->q_nband + 1)); 20580Sstevel@tonic-gate if ((q->q_flag & QWANTW) && 20590Sstevel@tonic-gate (((q->q_count < q->q_lowat) && 20600Sstevel@tonic-gate (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) { 20610Sstevel@tonic-gate q->q_flag &= ~QWANTW; 20620Sstevel@tonic-gate backenab = 1; 20630Sstevel@tonic-gate qbf[0] = 1; 20640Sstevel@tonic-gate } else 20650Sstevel@tonic-gate qbf[0] = 0; 20660Sstevel@tonic-gate 20670Sstevel@tonic-gate /* 20680Sstevel@tonic-gate * If any band can now be written to, and there is a writer 20690Sstevel@tonic-gate * for that band, then backenable the closest service procedure. 20700Sstevel@tonic-gate */ 20710Sstevel@tonic-gate if (backenab) { 20720Sstevel@tonic-gate mutex_exit(QLOCK(q)); 20730Sstevel@tonic-gate for (bpri = q->q_nband; bpri != 0; bpri--) 20740Sstevel@tonic-gate if (qbf[bpri]) 2075235Smicheng backenable(q, bpri); 20760Sstevel@tonic-gate if (qbf[0]) 20770Sstevel@tonic-gate backenable(q, 0); 20780Sstevel@tonic-gate } else 20790Sstevel@tonic-gate mutex_exit(QLOCK(q)); 20800Sstevel@tonic-gate } 20810Sstevel@tonic-gate 20820Sstevel@tonic-gate /* 20830Sstevel@tonic-gate * The real flushing takes place in flushq_common. This is done so that 20840Sstevel@tonic-gate * a flag which specifies whether or not M_PCPROTO messages should be flushed 20850Sstevel@tonic-gate * or not. Currently the only place that uses this flag is the stream head. 20860Sstevel@tonic-gate */ 20870Sstevel@tonic-gate void 20880Sstevel@tonic-gate flushq(queue_t *q, int flag) 20890Sstevel@tonic-gate { 20900Sstevel@tonic-gate flushq_common(q, flag, 0); 20910Sstevel@tonic-gate } 20920Sstevel@tonic-gate 20930Sstevel@tonic-gate /* 20940Sstevel@tonic-gate * Flush the queue of messages of the given priority band. 20950Sstevel@tonic-gate * There is some duplication of code between flushq and flushband. 20960Sstevel@tonic-gate * This is because we want to optimize the code as much as possible. 20970Sstevel@tonic-gate * The assumption is that there will be more messages in the normal 20980Sstevel@tonic-gate * (priority 0) band than in any other. 20990Sstevel@tonic-gate * 21000Sstevel@tonic-gate * Historical note: when merging the M_FLUSH code in strrput with this 21010Sstevel@tonic-gate * code one difference was discovered. flushband had an extra check for 21020Sstevel@tonic-gate * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0 21030Sstevel@tonic-gate * case. That check does not match the man page for flushband and was not 21040Sstevel@tonic-gate * in the strrput flush code hence it was removed. 21050Sstevel@tonic-gate */ 21060Sstevel@tonic-gate void 21070Sstevel@tonic-gate flushband(queue_t *q, unsigned char pri, int flag) 21080Sstevel@tonic-gate { 21090Sstevel@tonic-gate mblk_t *mp; 21100Sstevel@tonic-gate mblk_t *nmp; 21110Sstevel@tonic-gate mblk_t *last; 21120Sstevel@tonic-gate qband_t *qbp; 21130Sstevel@tonic-gate int band; 21140Sstevel@tonic-gate 21150Sstevel@tonic-gate ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL)); 21160Sstevel@tonic-gate if (pri > q->q_nband) { 21170Sstevel@tonic-gate return; 21180Sstevel@tonic-gate } 21190Sstevel@tonic-gate mutex_enter(QLOCK(q)); 21200Sstevel@tonic-gate if (pri == 0) { 21210Sstevel@tonic-gate mp = q->q_first; 21220Sstevel@tonic-gate q->q_first = NULL; 21230Sstevel@tonic-gate q->q_last = NULL; 21240Sstevel@tonic-gate q->q_count = 0; 21250Sstevel@tonic-gate q->q_mblkcnt = 0; 21260Sstevel@tonic-gate for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) { 21270Sstevel@tonic-gate qbp->qb_first = NULL; 21280Sstevel@tonic-gate qbp->qb_last = NULL; 21290Sstevel@tonic-gate qbp->qb_count = 0; 21300Sstevel@tonic-gate qbp->qb_mblkcnt = 0; 21310Sstevel@tonic-gate qbp->qb_flag &= ~QB_FULL; 21320Sstevel@tonic-gate } 21330Sstevel@tonic-gate q->q_flag &= ~QFULL; 21340Sstevel@tonic-gate mutex_exit(QLOCK(q)); 21350Sstevel@tonic-gate while (mp) { 21360Sstevel@tonic-gate nmp = mp->b_next; 21370Sstevel@tonic-gate mp->b_next = mp->b_prev = NULL; 21380Sstevel@tonic-gate if ((mp->b_band == 0) && 21390Sstevel@tonic-gate ((flag == FLUSHALL) || 21400Sstevel@tonic-gate datamsg(mp->b_datap->db_type))) 21410Sstevel@tonic-gate freemsg(mp); 21420Sstevel@tonic-gate else 21430Sstevel@tonic-gate (void) putq(q, mp); 21440Sstevel@tonic-gate mp = nmp; 21450Sstevel@tonic-gate } 21460Sstevel@tonic-gate mutex_enter(QLOCK(q)); 21470Sstevel@tonic-gate if ((q->q_flag & QWANTW) && 21480Sstevel@tonic-gate (((q->q_count < q->q_lowat) && 21490Sstevel@tonic-gate (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) { 21500Sstevel@tonic-gate q->q_flag &= ~QWANTW; 21510Sstevel@tonic-gate mutex_exit(QLOCK(q)); 21520Sstevel@tonic-gate 2153235Smicheng backenable(q, pri); 21540Sstevel@tonic-gate } else 21550Sstevel@tonic-gate mutex_exit(QLOCK(q)); 21560Sstevel@tonic-gate } else { /* pri != 0 */ 21570Sstevel@tonic-gate boolean_t flushed = B_FALSE; 21580Sstevel@tonic-gate band = pri; 21590Sstevel@tonic-gate 21600Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 21610Sstevel@tonic-gate qbp = q->q_bandp; 21620Sstevel@tonic-gate while (--band > 0) 21630Sstevel@tonic-gate qbp = qbp->qb_next; 21640Sstevel@tonic-gate mp = qbp->qb_first; 21650Sstevel@tonic-gate if (mp == NULL) { 21660Sstevel@tonic-gate mutex_exit(QLOCK(q)); 21670Sstevel@tonic-gate return; 21680Sstevel@tonic-gate } 21690Sstevel@tonic-gate last = qbp->qb_last->b_next; 21700Sstevel@tonic-gate /* 21710Sstevel@tonic-gate * rmvq_noenab() and freemsg() are called for each mblk that 21720Sstevel@tonic-gate * meets the criteria. The loop is executed until the last 21730Sstevel@tonic-gate * mblk has been processed. 21740Sstevel@tonic-gate */ 21750Sstevel@tonic-gate while (mp != last) { 21760Sstevel@tonic-gate ASSERT(mp->b_band == pri); 21770Sstevel@tonic-gate nmp = mp->b_next; 21780Sstevel@tonic-gate if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) { 21790Sstevel@tonic-gate rmvq_noenab(q, mp); 21800Sstevel@tonic-gate freemsg(mp); 21810Sstevel@tonic-gate flushed = B_TRUE; 21820Sstevel@tonic-gate } 21830Sstevel@tonic-gate mp = nmp; 21840Sstevel@tonic-gate } 21850Sstevel@tonic-gate mutex_exit(QLOCK(q)); 21860Sstevel@tonic-gate 21870Sstevel@tonic-gate /* 21880Sstevel@tonic-gate * If any mblk(s) has been freed, we know that qbackenable() 21890Sstevel@tonic-gate * will need to be called. 21900Sstevel@tonic-gate */ 21910Sstevel@tonic-gate if (flushed) 2192235Smicheng qbackenable(q, pri); 21930Sstevel@tonic-gate } 21940Sstevel@tonic-gate } 21950Sstevel@tonic-gate 21960Sstevel@tonic-gate /* 21970Sstevel@tonic-gate * Return 1 if the queue is not full. If the queue is full, return 21980Sstevel@tonic-gate * 0 (may not put message) and set QWANTW flag (caller wants to write 21990Sstevel@tonic-gate * to the queue). 22000Sstevel@tonic-gate */ 22010Sstevel@tonic-gate int 22020Sstevel@tonic-gate canput(queue_t *q) 22030Sstevel@tonic-gate { 22040Sstevel@tonic-gate TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q); 22050Sstevel@tonic-gate 22060Sstevel@tonic-gate /* this is for loopback transports, they should not do a canput */ 22070Sstevel@tonic-gate ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv)); 22080Sstevel@tonic-gate 22090Sstevel@tonic-gate /* Find next forward module that has a service procedure */ 22100Sstevel@tonic-gate q = q->q_nfsrv; 22110Sstevel@tonic-gate 22120Sstevel@tonic-gate if (!(q->q_flag & QFULL)) { 22130Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1); 22140Sstevel@tonic-gate return (1); 22150Sstevel@tonic-gate } 22160Sstevel@tonic-gate mutex_enter(QLOCK(q)); 22170Sstevel@tonic-gate if (q->q_flag & QFULL) { 22180Sstevel@tonic-gate q->q_flag |= QWANTW; 22190Sstevel@tonic-gate mutex_exit(QLOCK(q)); 22200Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0); 22210Sstevel@tonic-gate return (0); 22220Sstevel@tonic-gate } 22230Sstevel@tonic-gate mutex_exit(QLOCK(q)); 22240Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1); 22250Sstevel@tonic-gate return (1); 22260Sstevel@tonic-gate } 22270Sstevel@tonic-gate 22280Sstevel@tonic-gate /* 22290Sstevel@tonic-gate * This is the new canput for use with priority bands. Return 1 if the 22300Sstevel@tonic-gate * band is not full. If the band is full, return 0 (may not put message) 22310Sstevel@tonic-gate * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to 22320Sstevel@tonic-gate * write to the queue). 22330Sstevel@tonic-gate */ 22340Sstevel@tonic-gate int 22350Sstevel@tonic-gate bcanput(queue_t *q, unsigned char pri) 22360Sstevel@tonic-gate { 22370Sstevel@tonic-gate qband_t *qbp; 22380Sstevel@tonic-gate 22390Sstevel@tonic-gate TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri); 22400Sstevel@tonic-gate if (!q) 22410Sstevel@tonic-gate return (0); 22420Sstevel@tonic-gate 22430Sstevel@tonic-gate /* Find next forward module that has a service procedure */ 22440Sstevel@tonic-gate q = q->q_nfsrv; 22450Sstevel@tonic-gate 22460Sstevel@tonic-gate mutex_enter(QLOCK(q)); 22470Sstevel@tonic-gate if (pri == 0) { 22480Sstevel@tonic-gate if (q->q_flag & QFULL) { 22490Sstevel@tonic-gate q->q_flag |= QWANTW; 22500Sstevel@tonic-gate mutex_exit(QLOCK(q)); 22510Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 22520Sstevel@tonic-gate "bcanput:%p %X %d", q, pri, 0); 22530Sstevel@tonic-gate return (0); 22540Sstevel@tonic-gate } 22550Sstevel@tonic-gate } else { /* pri != 0 */ 22560Sstevel@tonic-gate if (pri > q->q_nband) { 22570Sstevel@tonic-gate /* 22580Sstevel@tonic-gate * No band exists yet, so return success. 22590Sstevel@tonic-gate */ 22600Sstevel@tonic-gate mutex_exit(QLOCK(q)); 22610Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 22620Sstevel@tonic-gate "bcanput:%p %X %d", q, pri, 1); 22630Sstevel@tonic-gate return (1); 22640Sstevel@tonic-gate } 22650Sstevel@tonic-gate qbp = q->q_bandp; 22660Sstevel@tonic-gate while (--pri) 22670Sstevel@tonic-gate qbp = qbp->qb_next; 22680Sstevel@tonic-gate if (qbp->qb_flag & QB_FULL) { 22690Sstevel@tonic-gate qbp->qb_flag |= QB_WANTW; 22700Sstevel@tonic-gate mutex_exit(QLOCK(q)); 22710Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 22720Sstevel@tonic-gate "bcanput:%p %X %d", q, pri, 0); 22730Sstevel@tonic-gate return (0); 22740Sstevel@tonic-gate } 22750Sstevel@tonic-gate } 22760Sstevel@tonic-gate mutex_exit(QLOCK(q)); 22770Sstevel@tonic-gate TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 22780Sstevel@tonic-gate "bcanput:%p %X %d", q, pri, 1); 22790Sstevel@tonic-gate return (1); 22800Sstevel@tonic-gate } 22810Sstevel@tonic-gate 22820Sstevel@tonic-gate /* 22830Sstevel@tonic-gate * Put a message on a queue. 22840Sstevel@tonic-gate * 22850Sstevel@tonic-gate * Messages are enqueued on a priority basis. The priority classes 22860Sstevel@tonic-gate * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0), 22870Sstevel@tonic-gate * and B_NORMAL (type < QPCTL && band == 0). 22880Sstevel@tonic-gate * 22890Sstevel@tonic-gate * Add appropriate weighted data block sizes to queue count. 22900Sstevel@tonic-gate * If queue hits high water mark then set QFULL flag. 22910Sstevel@tonic-gate * 22920Sstevel@tonic-gate * If QNOENAB is not set (putq is allowed to enable the queue), 22930Sstevel@tonic-gate * enable the queue only if the message is PRIORITY, 22940Sstevel@tonic-gate * or the QWANTR flag is set (indicating that the service procedure 22950Sstevel@tonic-gate * is ready to read the queue. This implies that a service 22960Sstevel@tonic-gate * procedure must NEVER put a high priority message back on its own 22970Sstevel@tonic-gate * queue, as this would result in an infinite loop (!). 22980Sstevel@tonic-gate */ 22990Sstevel@tonic-gate int 23000Sstevel@tonic-gate putq(queue_t *q, mblk_t *bp) 23010Sstevel@tonic-gate { 23020Sstevel@tonic-gate mblk_t *tmp; 23030Sstevel@tonic-gate qband_t *qbp = NULL; 23040Sstevel@tonic-gate int mcls = (int)queclass(bp); 23050Sstevel@tonic-gate kthread_id_t freezer; 23060Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0; 23070Sstevel@tonic-gate 23080Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 23090Sstevel@tonic-gate if (freezer == curthread) { 23100Sstevel@tonic-gate ASSERT(frozenstr(q)); 23110Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 23120Sstevel@tonic-gate } else 23130Sstevel@tonic-gate mutex_enter(QLOCK(q)); 23140Sstevel@tonic-gate 23150Sstevel@tonic-gate /* 23160Sstevel@tonic-gate * Make sanity checks and if qband structure is not yet 23170Sstevel@tonic-gate * allocated, do so. 23180Sstevel@tonic-gate */ 23190Sstevel@tonic-gate if (mcls == QPCTL) { 23200Sstevel@tonic-gate if (bp->b_band != 0) 23210Sstevel@tonic-gate bp->b_band = 0; /* force to be correct */ 23220Sstevel@tonic-gate } else if (bp->b_band != 0) { 23230Sstevel@tonic-gate int i; 23240Sstevel@tonic-gate qband_t **qbpp; 23250Sstevel@tonic-gate 23260Sstevel@tonic-gate if (bp->b_band > q->q_nband) { 23270Sstevel@tonic-gate 23280Sstevel@tonic-gate /* 23290Sstevel@tonic-gate * The qband structure for this priority band is 23300Sstevel@tonic-gate * not on the queue yet, so we have to allocate 23310Sstevel@tonic-gate * one on the fly. It would be wasteful to 23320Sstevel@tonic-gate * associate the qband structures with every 23330Sstevel@tonic-gate * queue when the queues are allocated. This is 23340Sstevel@tonic-gate * because most queues will only need the normal 23350Sstevel@tonic-gate * band of flow which can be described entirely 23360Sstevel@tonic-gate * by the queue itself. 23370Sstevel@tonic-gate */ 23380Sstevel@tonic-gate qbpp = &q->q_bandp; 23390Sstevel@tonic-gate while (*qbpp) 23400Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 23410Sstevel@tonic-gate while (bp->b_band > q->q_nband) { 23420Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) { 23430Sstevel@tonic-gate if (freezer != curthread) 23440Sstevel@tonic-gate mutex_exit(QLOCK(q)); 23450Sstevel@tonic-gate return (0); 23460Sstevel@tonic-gate } 23470Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat; 23480Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat; 23490Sstevel@tonic-gate q->q_nband++; 23500Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 23510Sstevel@tonic-gate } 23520Sstevel@tonic-gate } 23530Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 23540Sstevel@tonic-gate qbp = q->q_bandp; 23550Sstevel@tonic-gate i = bp->b_band; 23560Sstevel@tonic-gate while (--i) 23570Sstevel@tonic-gate qbp = qbp->qb_next; 23580Sstevel@tonic-gate } 23590Sstevel@tonic-gate 23600Sstevel@tonic-gate /* 23610Sstevel@tonic-gate * If queue is empty, add the message and initialize the pointers. 23620Sstevel@tonic-gate * Otherwise, adjust message pointers and queue pointers based on 23630Sstevel@tonic-gate * the type of the message and where it belongs on the queue. Some 23640Sstevel@tonic-gate * code is duplicated to minimize the number of conditionals and 23650Sstevel@tonic-gate * hopefully minimize the amount of time this routine takes. 23660Sstevel@tonic-gate */ 23670Sstevel@tonic-gate if (!q->q_first) { 23680Sstevel@tonic-gate bp->b_next = NULL; 23690Sstevel@tonic-gate bp->b_prev = NULL; 23700Sstevel@tonic-gate q->q_first = bp; 23710Sstevel@tonic-gate q->q_last = bp; 23720Sstevel@tonic-gate if (qbp) { 23730Sstevel@tonic-gate qbp->qb_first = bp; 23740Sstevel@tonic-gate qbp->qb_last = bp; 23750Sstevel@tonic-gate } 23760Sstevel@tonic-gate } else if (!qbp) { /* bp->b_band == 0 */ 23770Sstevel@tonic-gate 23780Sstevel@tonic-gate /* 23790Sstevel@tonic-gate * If queue class of message is less than or equal to 23800Sstevel@tonic-gate * that of the last one on the queue, tack on to the end. 23810Sstevel@tonic-gate */ 23820Sstevel@tonic-gate tmp = q->q_last; 23830Sstevel@tonic-gate if (mcls <= (int)queclass(tmp)) { 23840Sstevel@tonic-gate bp->b_next = NULL; 23850Sstevel@tonic-gate bp->b_prev = tmp; 23860Sstevel@tonic-gate tmp->b_next = bp; 23870Sstevel@tonic-gate q->q_last = bp; 23880Sstevel@tonic-gate } else { 23890Sstevel@tonic-gate tmp = q->q_first; 23900Sstevel@tonic-gate while ((int)queclass(tmp) >= mcls) 23910Sstevel@tonic-gate tmp = tmp->b_next; 23920Sstevel@tonic-gate 23930Sstevel@tonic-gate /* 23940Sstevel@tonic-gate * Insert bp before tmp. 23950Sstevel@tonic-gate */ 23960Sstevel@tonic-gate bp->b_next = tmp; 23970Sstevel@tonic-gate bp->b_prev = tmp->b_prev; 23980Sstevel@tonic-gate if (tmp->b_prev) 23990Sstevel@tonic-gate tmp->b_prev->b_next = bp; 24000Sstevel@tonic-gate else 24010Sstevel@tonic-gate q->q_first = bp; 24020Sstevel@tonic-gate tmp->b_prev = bp; 24030Sstevel@tonic-gate } 24040Sstevel@tonic-gate } else { /* bp->b_band != 0 */ 24050Sstevel@tonic-gate if (qbp->qb_first) { 24060Sstevel@tonic-gate tmp = qbp->qb_last; 24070Sstevel@tonic-gate 24080Sstevel@tonic-gate /* 24090Sstevel@tonic-gate * Insert bp after the last message in this band. 24100Sstevel@tonic-gate */ 24110Sstevel@tonic-gate bp->b_next = tmp->b_next; 24120Sstevel@tonic-gate if (tmp->b_next) 24130Sstevel@tonic-gate tmp->b_next->b_prev = bp; 24140Sstevel@tonic-gate else 24150Sstevel@tonic-gate q->q_last = bp; 24160Sstevel@tonic-gate bp->b_prev = tmp; 24170Sstevel@tonic-gate tmp->b_next = bp; 24180Sstevel@tonic-gate } else { 24190Sstevel@tonic-gate tmp = q->q_last; 24200Sstevel@tonic-gate if ((mcls < (int)queclass(tmp)) || 24210Sstevel@tonic-gate (bp->b_band <= tmp->b_band)) { 24220Sstevel@tonic-gate 24230Sstevel@tonic-gate /* 24240Sstevel@tonic-gate * Tack bp on end of queue. 24250Sstevel@tonic-gate */ 24260Sstevel@tonic-gate bp->b_next = NULL; 24270Sstevel@tonic-gate bp->b_prev = tmp; 24280Sstevel@tonic-gate tmp->b_next = bp; 24290Sstevel@tonic-gate q->q_last = bp; 24300Sstevel@tonic-gate } else { 24310Sstevel@tonic-gate tmp = q->q_first; 24320Sstevel@tonic-gate while (tmp->b_datap->db_type >= QPCTL) 24330Sstevel@tonic-gate tmp = tmp->b_next; 24340Sstevel@tonic-gate while (tmp->b_band >= bp->b_band) 24350Sstevel@tonic-gate tmp = tmp->b_next; 24360Sstevel@tonic-gate 24370Sstevel@tonic-gate /* 24380Sstevel@tonic-gate * Insert bp before tmp. 24390Sstevel@tonic-gate */ 24400Sstevel@tonic-gate bp->b_next = tmp; 24410Sstevel@tonic-gate bp->b_prev = tmp->b_prev; 24420Sstevel@tonic-gate if (tmp->b_prev) 24430Sstevel@tonic-gate tmp->b_prev->b_next = bp; 24440Sstevel@tonic-gate else 24450Sstevel@tonic-gate q->q_first = bp; 24460Sstevel@tonic-gate tmp->b_prev = bp; 24470Sstevel@tonic-gate } 24480Sstevel@tonic-gate qbp->qb_first = bp; 24490Sstevel@tonic-gate } 24500Sstevel@tonic-gate qbp->qb_last = bp; 24510Sstevel@tonic-gate } 24520Sstevel@tonic-gate 24530Sstevel@tonic-gate /* Get message byte count for q_count accounting */ 24540Sstevel@tonic-gate for (tmp = bp; tmp; tmp = tmp->b_cont) { 2455741Smasputra ADD_MBLK_SIZE(tmp, bytecnt); 24560Sstevel@tonic-gate mblkcnt++; 24570Sstevel@tonic-gate } 2458741Smasputra 24590Sstevel@tonic-gate if (qbp) { 24600Sstevel@tonic-gate qbp->qb_count += bytecnt; 24610Sstevel@tonic-gate qbp->qb_mblkcnt += mblkcnt; 24620Sstevel@tonic-gate if ((qbp->qb_count >= qbp->qb_hiwat) || 24630Sstevel@tonic-gate (qbp->qb_mblkcnt >= qbp->qb_hiwat)) { 24640Sstevel@tonic-gate qbp->qb_flag |= QB_FULL; 24650Sstevel@tonic-gate } 24660Sstevel@tonic-gate } else { 24670Sstevel@tonic-gate q->q_count += bytecnt; 24680Sstevel@tonic-gate q->q_mblkcnt += mblkcnt; 24690Sstevel@tonic-gate if ((q->q_count >= q->q_hiwat) || 24700Sstevel@tonic-gate (q->q_mblkcnt >= q->q_hiwat)) { 24710Sstevel@tonic-gate q->q_flag |= QFULL; 24720Sstevel@tonic-gate } 24730Sstevel@tonic-gate } 24740Sstevel@tonic-gate 24750Sstevel@tonic-gate STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, NULL); 24760Sstevel@tonic-gate 24770Sstevel@tonic-gate if ((mcls > QNORM) || 24780Sstevel@tonic-gate (canenable(q) && (q->q_flag & QWANTR || bp->b_band))) 24790Sstevel@tonic-gate qenable_locked(q); 24800Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 24810Sstevel@tonic-gate if (freezer != curthread) 24820Sstevel@tonic-gate mutex_exit(QLOCK(q)); 24830Sstevel@tonic-gate 24840Sstevel@tonic-gate return (1); 24850Sstevel@tonic-gate } 24860Sstevel@tonic-gate 24870Sstevel@tonic-gate /* 24880Sstevel@tonic-gate * Put stuff back at beginning of Q according to priority order. 24890Sstevel@tonic-gate * See comment on putq above for details. 24900Sstevel@tonic-gate */ 24910Sstevel@tonic-gate int 24920Sstevel@tonic-gate putbq(queue_t *q, mblk_t *bp) 24930Sstevel@tonic-gate { 24940Sstevel@tonic-gate mblk_t *tmp; 24950Sstevel@tonic-gate qband_t *qbp = NULL; 24960Sstevel@tonic-gate int mcls = (int)queclass(bp); 24970Sstevel@tonic-gate kthread_id_t freezer; 24980Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0; 24990Sstevel@tonic-gate 25000Sstevel@tonic-gate ASSERT(q && bp); 25010Sstevel@tonic-gate ASSERT(bp->b_next == NULL); 25020Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 25030Sstevel@tonic-gate if (freezer == curthread) { 25040Sstevel@tonic-gate ASSERT(frozenstr(q)); 25050Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 25060Sstevel@tonic-gate } else 25070Sstevel@tonic-gate mutex_enter(QLOCK(q)); 25080Sstevel@tonic-gate 25090Sstevel@tonic-gate /* 25100Sstevel@tonic-gate * Make sanity checks and if qband structure is not yet 25110Sstevel@tonic-gate * allocated, do so. 25120Sstevel@tonic-gate */ 25130Sstevel@tonic-gate if (mcls == QPCTL) { 25140Sstevel@tonic-gate if (bp->b_band != 0) 25150Sstevel@tonic-gate bp->b_band = 0; /* force to be correct */ 25160Sstevel@tonic-gate } else if (bp->b_band != 0) { 25170Sstevel@tonic-gate int i; 25180Sstevel@tonic-gate qband_t **qbpp; 25190Sstevel@tonic-gate 25200Sstevel@tonic-gate if (bp->b_band > q->q_nband) { 25210Sstevel@tonic-gate qbpp = &q->q_bandp; 25220Sstevel@tonic-gate while (*qbpp) 25230Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 25240Sstevel@tonic-gate while (bp->b_band > q->q_nband) { 25250Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) { 25260Sstevel@tonic-gate if (freezer != curthread) 25270Sstevel@tonic-gate mutex_exit(QLOCK(q)); 25280Sstevel@tonic-gate return (0); 25290Sstevel@tonic-gate } 25300Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat; 25310Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat; 25320Sstevel@tonic-gate q->q_nband++; 25330Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 25340Sstevel@tonic-gate } 25350Sstevel@tonic-gate } 25360Sstevel@tonic-gate qbp = q->q_bandp; 25370Sstevel@tonic-gate i = bp->b_band; 25380Sstevel@tonic-gate while (--i) 25390Sstevel@tonic-gate qbp = qbp->qb_next; 25400Sstevel@tonic-gate } 25410Sstevel@tonic-gate 25420Sstevel@tonic-gate /* 25430Sstevel@tonic-gate * If queue is empty or if message is high priority, 25440Sstevel@tonic-gate * place on the front of the queue. 25450Sstevel@tonic-gate */ 25460Sstevel@tonic-gate tmp = q->q_first; 25470Sstevel@tonic-gate if ((!tmp) || (mcls == QPCTL)) { 25480Sstevel@tonic-gate bp->b_next = tmp; 25490Sstevel@tonic-gate if (tmp) 25500Sstevel@tonic-gate tmp->b_prev = bp; 25510Sstevel@tonic-gate else 25520Sstevel@tonic-gate q->q_last = bp; 25530Sstevel@tonic-gate q->q_first = bp; 25540Sstevel@tonic-gate bp->b_prev = NULL; 25550Sstevel@tonic-gate if (qbp) { 25560Sstevel@tonic-gate qbp->qb_first = bp; 25570Sstevel@tonic-gate qbp->qb_last = bp; 25580Sstevel@tonic-gate } 25590Sstevel@tonic-gate } else if (qbp) { /* bp->b_band != 0 */ 25600Sstevel@tonic-gate tmp = qbp->qb_first; 25610Sstevel@tonic-gate if (tmp) { 25620Sstevel@tonic-gate 25630Sstevel@tonic-gate /* 25640Sstevel@tonic-gate * Insert bp before the first message in this band. 25650Sstevel@tonic-gate */ 25660Sstevel@tonic-gate bp->b_next = tmp; 25670Sstevel@tonic-gate bp->b_prev = tmp->b_prev; 25680Sstevel@tonic-gate if (tmp->b_prev) 25690Sstevel@tonic-gate tmp->b_prev->b_next = bp; 25700Sstevel@tonic-gate else 25710Sstevel@tonic-gate q->q_first = bp; 25720Sstevel@tonic-gate tmp->b_prev = bp; 25730Sstevel@tonic-gate } else { 25740Sstevel@tonic-gate tmp = q->q_last; 25750Sstevel@tonic-gate if ((mcls < (int)queclass(tmp)) || 25760Sstevel@tonic-gate (bp->b_band < tmp->b_band)) { 25770Sstevel@tonic-gate 25780Sstevel@tonic-gate /* 25790Sstevel@tonic-gate * Tack bp on end of queue. 25800Sstevel@tonic-gate */ 25810Sstevel@tonic-gate bp->b_next = NULL; 25820Sstevel@tonic-gate bp->b_prev = tmp; 25830Sstevel@tonic-gate tmp->b_next = bp; 25840Sstevel@tonic-gate q->q_last = bp; 25850Sstevel@tonic-gate } else { 25860Sstevel@tonic-gate tmp = q->q_first; 25870Sstevel@tonic-gate while (tmp->b_datap->db_type >= QPCTL) 25880Sstevel@tonic-gate tmp = tmp->b_next; 25890Sstevel@tonic-gate while (tmp->b_band > bp->b_band) 25900Sstevel@tonic-gate tmp = tmp->b_next; 25910Sstevel@tonic-gate 25920Sstevel@tonic-gate /* 25930Sstevel@tonic-gate * Insert bp before tmp. 25940Sstevel@tonic-gate */ 25950Sstevel@tonic-gate bp->b_next = tmp; 25960Sstevel@tonic-gate bp->b_prev = tmp->b_prev; 25970Sstevel@tonic-gate if (tmp->b_prev) 25980Sstevel@tonic-gate tmp->b_prev->b_next = bp; 25990Sstevel@tonic-gate else 26000Sstevel@tonic-gate q->q_first = bp; 26010Sstevel@tonic-gate tmp->b_prev = bp; 26020Sstevel@tonic-gate } 26030Sstevel@tonic-gate qbp->qb_last = bp; 26040Sstevel@tonic-gate } 26050Sstevel@tonic-gate qbp->qb_first = bp; 26060Sstevel@tonic-gate } else { /* bp->b_band == 0 && !QPCTL */ 26070Sstevel@tonic-gate 26080Sstevel@tonic-gate /* 26090Sstevel@tonic-gate * If the queue class or band is less than that of the last 26100Sstevel@tonic-gate * message on the queue, tack bp on the end of the queue. 26110Sstevel@tonic-gate */ 26120Sstevel@tonic-gate tmp = q->q_last; 26130Sstevel@tonic-gate if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) { 26140Sstevel@tonic-gate bp->b_next = NULL; 26150Sstevel@tonic-gate bp->b_prev = tmp; 26160Sstevel@tonic-gate tmp->b_next = bp; 26170Sstevel@tonic-gate q->q_last = bp; 26180Sstevel@tonic-gate } else { 26190Sstevel@tonic-gate tmp = q->q_first; 26200Sstevel@tonic-gate while (tmp->b_datap->db_type >= QPCTL) 26210Sstevel@tonic-gate tmp = tmp->b_next; 26220Sstevel@tonic-gate while (tmp->b_band > bp->b_band) 26230Sstevel@tonic-gate tmp = tmp->b_next; 26240Sstevel@tonic-gate 26250Sstevel@tonic-gate /* 26260Sstevel@tonic-gate * Insert bp before tmp. 26270Sstevel@tonic-gate */ 26280Sstevel@tonic-gate bp->b_next = tmp; 26290Sstevel@tonic-gate bp->b_prev = tmp->b_prev; 26300Sstevel@tonic-gate if (tmp->b_prev) 26310Sstevel@tonic-gate tmp->b_prev->b_next = bp; 26320Sstevel@tonic-gate else 26330Sstevel@tonic-gate q->q_first = bp; 26340Sstevel@tonic-gate tmp->b_prev = bp; 26350Sstevel@tonic-gate } 26360Sstevel@tonic-gate } 26370Sstevel@tonic-gate 26380Sstevel@tonic-gate /* Get message byte count for q_count accounting */ 26390Sstevel@tonic-gate for (tmp = bp; tmp; tmp = tmp->b_cont) { 2640741Smasputra ADD_MBLK_SIZE(tmp, bytecnt); 26410Sstevel@tonic-gate mblkcnt++; 26420Sstevel@tonic-gate } 26430Sstevel@tonic-gate if (qbp) { 26440Sstevel@tonic-gate qbp->qb_count += bytecnt; 26450Sstevel@tonic-gate qbp->qb_mblkcnt += mblkcnt; 26460Sstevel@tonic-gate if ((qbp->qb_count >= qbp->qb_hiwat) || 26470Sstevel@tonic-gate (qbp->qb_mblkcnt >= qbp->qb_hiwat)) { 26480Sstevel@tonic-gate qbp->qb_flag |= QB_FULL; 26490Sstevel@tonic-gate } 26500Sstevel@tonic-gate } else { 26510Sstevel@tonic-gate q->q_count += bytecnt; 26520Sstevel@tonic-gate q->q_mblkcnt += mblkcnt; 26530Sstevel@tonic-gate if ((q->q_count >= q->q_hiwat) || 26540Sstevel@tonic-gate (q->q_mblkcnt >= q->q_hiwat)) { 26550Sstevel@tonic-gate q->q_flag |= QFULL; 26560Sstevel@tonic-gate } 26570Sstevel@tonic-gate } 26580Sstevel@tonic-gate 26590Sstevel@tonic-gate STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, NULL); 26600Sstevel@tonic-gate 26610Sstevel@tonic-gate if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR))) 26620Sstevel@tonic-gate qenable_locked(q); 26630Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 26640Sstevel@tonic-gate if (freezer != curthread) 26650Sstevel@tonic-gate mutex_exit(QLOCK(q)); 26660Sstevel@tonic-gate 26670Sstevel@tonic-gate return (1); 26680Sstevel@tonic-gate } 26690Sstevel@tonic-gate 26700Sstevel@tonic-gate /* 26710Sstevel@tonic-gate * Insert a message before an existing message on the queue. If the 26720Sstevel@tonic-gate * existing message is NULL, the new messages is placed on the end of 26730Sstevel@tonic-gate * the queue. The queue class of the new message is ignored. However, 26740Sstevel@tonic-gate * the priority band of the new message must adhere to the following 26750Sstevel@tonic-gate * ordering: 26760Sstevel@tonic-gate * 26770Sstevel@tonic-gate * emp->b_prev->b_band >= mp->b_band >= emp->b_band. 26780Sstevel@tonic-gate * 26790Sstevel@tonic-gate * All flow control parameters are updated. 26800Sstevel@tonic-gate * 26810Sstevel@tonic-gate * insq can be called with the stream frozen, but other utility functions 26820Sstevel@tonic-gate * holding QLOCK, and by streams modules without any locks/frozen. 26830Sstevel@tonic-gate */ 26840Sstevel@tonic-gate int 26850Sstevel@tonic-gate insq(queue_t *q, mblk_t *emp, mblk_t *mp) 26860Sstevel@tonic-gate { 26870Sstevel@tonic-gate mblk_t *tmp; 26880Sstevel@tonic-gate qband_t *qbp = NULL; 26890Sstevel@tonic-gate int mcls = (int)queclass(mp); 26900Sstevel@tonic-gate kthread_id_t freezer; 26910Sstevel@tonic-gate int bytecnt = 0, mblkcnt = 0; 26920Sstevel@tonic-gate 26930Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 26940Sstevel@tonic-gate if (freezer == curthread) { 26950Sstevel@tonic-gate ASSERT(frozenstr(q)); 26960Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 26970Sstevel@tonic-gate } else if (MUTEX_HELD(QLOCK(q))) { 26980Sstevel@tonic-gate /* Don't drop lock on exit */ 26990Sstevel@tonic-gate freezer = curthread; 27000Sstevel@tonic-gate } else 27010Sstevel@tonic-gate mutex_enter(QLOCK(q)); 27020Sstevel@tonic-gate 27030Sstevel@tonic-gate if (mcls == QPCTL) { 27040Sstevel@tonic-gate if (mp->b_band != 0) 27050Sstevel@tonic-gate mp->b_band = 0; /* force to be correct */ 27060Sstevel@tonic-gate if (emp && emp->b_prev && 27070Sstevel@tonic-gate (emp->b_prev->b_datap->db_type < QPCTL)) 27080Sstevel@tonic-gate goto badord; 27090Sstevel@tonic-gate } 27100Sstevel@tonic-gate if (emp) { 27110Sstevel@tonic-gate if (((mcls == QNORM) && (mp->b_band < emp->b_band)) || 27120Sstevel@tonic-gate (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) && 27130Sstevel@tonic-gate (emp->b_prev->b_band < mp->b_band))) { 27140Sstevel@tonic-gate goto badord; 27150Sstevel@tonic-gate } 27160Sstevel@tonic-gate } else { 27170Sstevel@tonic-gate tmp = q->q_last; 27180Sstevel@tonic-gate if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) { 27190Sstevel@tonic-gate badord: 27200Sstevel@tonic-gate cmn_err(CE_WARN, 27210Sstevel@tonic-gate "insq: attempt to insert message out of order " 27220Sstevel@tonic-gate "on q %p", (void *)q); 27230Sstevel@tonic-gate if (freezer != curthread) 27240Sstevel@tonic-gate mutex_exit(QLOCK(q)); 27250Sstevel@tonic-gate return (0); 27260Sstevel@tonic-gate } 27270Sstevel@tonic-gate } 27280Sstevel@tonic-gate 27290Sstevel@tonic-gate if (mp->b_band != 0) { 27300Sstevel@tonic-gate int i; 27310Sstevel@tonic-gate qband_t **qbpp; 27320Sstevel@tonic-gate 27330Sstevel@tonic-gate if (mp->b_band > q->q_nband) { 27340Sstevel@tonic-gate qbpp = &q->q_bandp; 27350Sstevel@tonic-gate while (*qbpp) 27360Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 27370Sstevel@tonic-gate while (mp->b_band > q->q_nband) { 27380Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) { 27390Sstevel@tonic-gate if (freezer != curthread) 27400Sstevel@tonic-gate mutex_exit(QLOCK(q)); 27410Sstevel@tonic-gate return (0); 27420Sstevel@tonic-gate } 27430Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat; 27440Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat; 27450Sstevel@tonic-gate q->q_nband++; 27460Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 27470Sstevel@tonic-gate } 27480Sstevel@tonic-gate } 27490Sstevel@tonic-gate qbp = q->q_bandp; 27500Sstevel@tonic-gate i = mp->b_band; 27510Sstevel@tonic-gate while (--i) 27520Sstevel@tonic-gate qbp = qbp->qb_next; 27530Sstevel@tonic-gate } 27540Sstevel@tonic-gate 27550Sstevel@tonic-gate if ((mp->b_next = emp) != NULL) { 27560Sstevel@tonic-gate if ((mp->b_prev = emp->b_prev) != NULL) 27570Sstevel@tonic-gate emp->b_prev->b_next = mp; 27580Sstevel@tonic-gate else 27590Sstevel@tonic-gate q->q_first = mp; 27600Sstevel@tonic-gate emp->b_prev = mp; 27610Sstevel@tonic-gate } else { 27620Sstevel@tonic-gate if ((mp->b_prev = q->q_last) != NULL) 27630Sstevel@tonic-gate q->q_last->b_next = mp; 27640Sstevel@tonic-gate else 27650Sstevel@tonic-gate q->q_first = mp; 27660Sstevel@tonic-gate q->q_last = mp; 27670Sstevel@tonic-gate } 27680Sstevel@tonic-gate 27690Sstevel@tonic-gate /* Get mblk and byte count for q_count accounting */ 27700Sstevel@tonic-gate for (tmp = mp; tmp; tmp = tmp->b_cont) { 2771741Smasputra ADD_MBLK_SIZE(tmp, bytecnt); 27720Sstevel@tonic-gate mblkcnt++; 27730Sstevel@tonic-gate } 27740Sstevel@tonic-gate 27750Sstevel@tonic-gate if (qbp) { /* adjust qband pointers and count */ 27760Sstevel@tonic-gate if (!qbp->qb_first) { 27770Sstevel@tonic-gate qbp->qb_first = mp; 27780Sstevel@tonic-gate qbp->qb_last = mp; 27790Sstevel@tonic-gate } else { 27800Sstevel@tonic-gate if (mp->b_prev == NULL || (mp->b_prev != NULL && 27810Sstevel@tonic-gate (mp->b_prev->b_band != mp->b_band))) 27820Sstevel@tonic-gate qbp->qb_first = mp; 27830Sstevel@tonic-gate else if (mp->b_next == NULL || (mp->b_next != NULL && 27840Sstevel@tonic-gate (mp->b_next->b_band != mp->b_band))) 27850Sstevel@tonic-gate qbp->qb_last = mp; 27860Sstevel@tonic-gate } 27870Sstevel@tonic-gate qbp->qb_count += bytecnt; 27880Sstevel@tonic-gate qbp->qb_mblkcnt += mblkcnt; 27890Sstevel@tonic-gate if ((qbp->qb_count >= qbp->qb_hiwat) || 27900Sstevel@tonic-gate (qbp->qb_mblkcnt >= qbp->qb_hiwat)) { 27910Sstevel@tonic-gate qbp->qb_flag |= QB_FULL; 27920Sstevel@tonic-gate } 27930Sstevel@tonic-gate } else { 27940Sstevel@tonic-gate q->q_count += bytecnt; 27950Sstevel@tonic-gate q->q_mblkcnt += mblkcnt; 27960Sstevel@tonic-gate if ((q->q_count >= q->q_hiwat) || 27970Sstevel@tonic-gate (q->q_mblkcnt >= q->q_hiwat)) { 27980Sstevel@tonic-gate q->q_flag |= QFULL; 27990Sstevel@tonic-gate } 28000Sstevel@tonic-gate } 28010Sstevel@tonic-gate 28020Sstevel@tonic-gate STR_FTEVENT_MSG(mp, q, FTEV_INSQ, NULL); 28030Sstevel@tonic-gate 28040Sstevel@tonic-gate if (canenable(q) && (q->q_flag & QWANTR)) 28050Sstevel@tonic-gate qenable_locked(q); 28060Sstevel@tonic-gate 28070Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 28080Sstevel@tonic-gate if (freezer != curthread) 28090Sstevel@tonic-gate mutex_exit(QLOCK(q)); 28100Sstevel@tonic-gate 28110Sstevel@tonic-gate return (1); 28120Sstevel@tonic-gate } 28130Sstevel@tonic-gate 28140Sstevel@tonic-gate /* 28150Sstevel@tonic-gate * Create and put a control message on queue. 28160Sstevel@tonic-gate */ 28170Sstevel@tonic-gate int 28180Sstevel@tonic-gate putctl(queue_t *q, int type) 28190Sstevel@tonic-gate { 28200Sstevel@tonic-gate mblk_t *bp; 28210Sstevel@tonic-gate 28220Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) || 28230Sstevel@tonic-gate (bp = allocb_tryhard(0)) == NULL) 28240Sstevel@tonic-gate return (0); 28250Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char) type; 28260Sstevel@tonic-gate 28270Sstevel@tonic-gate put(q, bp); 28280Sstevel@tonic-gate 28290Sstevel@tonic-gate return (1); 28300Sstevel@tonic-gate } 28310Sstevel@tonic-gate 28320Sstevel@tonic-gate /* 28330Sstevel@tonic-gate * Control message with a single-byte parameter 28340Sstevel@tonic-gate */ 28350Sstevel@tonic-gate int 28360Sstevel@tonic-gate putctl1(queue_t *q, int type, int param) 28370Sstevel@tonic-gate { 28380Sstevel@tonic-gate mblk_t *bp; 28390Sstevel@tonic-gate 28400Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) || 28410Sstevel@tonic-gate (bp = allocb_tryhard(1)) == NULL) 28420Sstevel@tonic-gate return (0); 28430Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char)type; 28440Sstevel@tonic-gate *bp->b_wptr++ = (unsigned char)param; 28450Sstevel@tonic-gate 28460Sstevel@tonic-gate put(q, bp); 28470Sstevel@tonic-gate 28480Sstevel@tonic-gate return (1); 28490Sstevel@tonic-gate } 28500Sstevel@tonic-gate 28510Sstevel@tonic-gate int 28520Sstevel@tonic-gate putnextctl1(queue_t *q, int type, int param) 28530Sstevel@tonic-gate { 28540Sstevel@tonic-gate mblk_t *bp; 28550Sstevel@tonic-gate 28560Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) || 28570Sstevel@tonic-gate ((bp = allocb_tryhard(1)) == NULL)) 28580Sstevel@tonic-gate return (0); 28590Sstevel@tonic-gate 28600Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char)type; 28610Sstevel@tonic-gate *bp->b_wptr++ = (unsigned char)param; 28620Sstevel@tonic-gate 28630Sstevel@tonic-gate putnext(q, bp); 28640Sstevel@tonic-gate 28650Sstevel@tonic-gate return (1); 28660Sstevel@tonic-gate } 28670Sstevel@tonic-gate 28680Sstevel@tonic-gate int 28690Sstevel@tonic-gate putnextctl(queue_t *q, int type) 28700Sstevel@tonic-gate { 28710Sstevel@tonic-gate mblk_t *bp; 28720Sstevel@tonic-gate 28730Sstevel@tonic-gate if ((datamsg(type) && (type != M_DELAY)) || 28740Sstevel@tonic-gate ((bp = allocb_tryhard(0)) == NULL)) 28750Sstevel@tonic-gate return (0); 28760Sstevel@tonic-gate bp->b_datap->db_type = (unsigned char)type; 28770Sstevel@tonic-gate 28780Sstevel@tonic-gate putnext(q, bp); 28790Sstevel@tonic-gate 28800Sstevel@tonic-gate return (1); 28810Sstevel@tonic-gate } 28820Sstevel@tonic-gate 28830Sstevel@tonic-gate /* 28840Sstevel@tonic-gate * Return the queue upstream from this one 28850Sstevel@tonic-gate */ 28860Sstevel@tonic-gate queue_t * 28870Sstevel@tonic-gate backq(queue_t *q) 28880Sstevel@tonic-gate { 28890Sstevel@tonic-gate q = _OTHERQ(q); 28900Sstevel@tonic-gate if (q->q_next) { 28910Sstevel@tonic-gate q = q->q_next; 28920Sstevel@tonic-gate return (_OTHERQ(q)); 28930Sstevel@tonic-gate } 28940Sstevel@tonic-gate return (NULL); 28950Sstevel@tonic-gate } 28960Sstevel@tonic-gate 28970Sstevel@tonic-gate /* 28980Sstevel@tonic-gate * Send a block back up the queue in reverse from this 28990Sstevel@tonic-gate * one (e.g. to respond to ioctls) 29000Sstevel@tonic-gate */ 29010Sstevel@tonic-gate void 29020Sstevel@tonic-gate qreply(queue_t *q, mblk_t *bp) 29030Sstevel@tonic-gate { 29040Sstevel@tonic-gate ASSERT(q && bp); 29050Sstevel@tonic-gate 29060Sstevel@tonic-gate putnext(_OTHERQ(q), bp); 29070Sstevel@tonic-gate } 29080Sstevel@tonic-gate 29090Sstevel@tonic-gate /* 29100Sstevel@tonic-gate * Streams Queue Scheduling 29110Sstevel@tonic-gate * 29120Sstevel@tonic-gate * Queues are enabled through qenable() when they have messages to 29130Sstevel@tonic-gate * process. They are serviced by queuerun(), which runs each enabled 29140Sstevel@tonic-gate * queue's service procedure. The call to queuerun() is processor 29150Sstevel@tonic-gate * dependent - the general principle is that it be run whenever a queue 29160Sstevel@tonic-gate * is enabled but before returning to user level. For system calls, 29170Sstevel@tonic-gate * the function runqueues() is called if their action causes a queue 29180Sstevel@tonic-gate * to be enabled. For device interrupts, queuerun() should be 29190Sstevel@tonic-gate * called before returning from the last level of interrupt. Beyond 29200Sstevel@tonic-gate * this, no timing assumptions should be made about queue scheduling. 29210Sstevel@tonic-gate */ 29220Sstevel@tonic-gate 29230Sstevel@tonic-gate /* 29240Sstevel@tonic-gate * Enable a queue: put it on list of those whose service procedures are 29250Sstevel@tonic-gate * ready to run and set up the scheduling mechanism. 29260Sstevel@tonic-gate * The broadcast is done outside the mutex -> to avoid the woken thread 29270Sstevel@tonic-gate * from contending with the mutex. This is OK 'cos the queue has been 29280Sstevel@tonic-gate * enqueued on the runlist and flagged safely at this point. 29290Sstevel@tonic-gate */ 29300Sstevel@tonic-gate void 29310Sstevel@tonic-gate qenable(queue_t *q) 29320Sstevel@tonic-gate { 29330Sstevel@tonic-gate mutex_enter(QLOCK(q)); 29340Sstevel@tonic-gate qenable_locked(q); 29350Sstevel@tonic-gate mutex_exit(QLOCK(q)); 29360Sstevel@tonic-gate } 29370Sstevel@tonic-gate /* 29380Sstevel@tonic-gate * Return number of messages on queue 29390Sstevel@tonic-gate */ 29400Sstevel@tonic-gate int 29410Sstevel@tonic-gate qsize(queue_t *qp) 29420Sstevel@tonic-gate { 29430Sstevel@tonic-gate int count = 0; 29440Sstevel@tonic-gate mblk_t *mp; 29450Sstevel@tonic-gate 29460Sstevel@tonic-gate mutex_enter(QLOCK(qp)); 29470Sstevel@tonic-gate for (mp = qp->q_first; mp; mp = mp->b_next) 29480Sstevel@tonic-gate count++; 29490Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 29500Sstevel@tonic-gate return (count); 29510Sstevel@tonic-gate } 29520Sstevel@tonic-gate 29530Sstevel@tonic-gate /* 29540Sstevel@tonic-gate * noenable - set queue so that putq() will not enable it. 29550Sstevel@tonic-gate * enableok - set queue so that putq() can enable it. 29560Sstevel@tonic-gate */ 29570Sstevel@tonic-gate void 29580Sstevel@tonic-gate noenable(queue_t *q) 29590Sstevel@tonic-gate { 29600Sstevel@tonic-gate mutex_enter(QLOCK(q)); 29610Sstevel@tonic-gate q->q_flag |= QNOENB; 29620Sstevel@tonic-gate mutex_exit(QLOCK(q)); 29630Sstevel@tonic-gate } 29640Sstevel@tonic-gate 29650Sstevel@tonic-gate void 29660Sstevel@tonic-gate enableok(queue_t *q) 29670Sstevel@tonic-gate { 29680Sstevel@tonic-gate mutex_enter(QLOCK(q)); 29690Sstevel@tonic-gate q->q_flag &= ~QNOENB; 29700Sstevel@tonic-gate mutex_exit(QLOCK(q)); 29710Sstevel@tonic-gate } 29720Sstevel@tonic-gate 29730Sstevel@tonic-gate /* 29740Sstevel@tonic-gate * Set queue fields. 29750Sstevel@tonic-gate */ 29760Sstevel@tonic-gate int 29770Sstevel@tonic-gate strqset(queue_t *q, qfields_t what, unsigned char pri, intptr_t val) 29780Sstevel@tonic-gate { 29790Sstevel@tonic-gate qband_t *qbp = NULL; 29800Sstevel@tonic-gate queue_t *wrq; 29810Sstevel@tonic-gate int error = 0; 29820Sstevel@tonic-gate kthread_id_t freezer; 29830Sstevel@tonic-gate 29840Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 29850Sstevel@tonic-gate if (freezer == curthread) { 29860Sstevel@tonic-gate ASSERT(frozenstr(q)); 29870Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 29880Sstevel@tonic-gate } else 29890Sstevel@tonic-gate mutex_enter(QLOCK(q)); 29900Sstevel@tonic-gate 29910Sstevel@tonic-gate if (what >= QBAD) { 29920Sstevel@tonic-gate error = EINVAL; 29930Sstevel@tonic-gate goto done; 29940Sstevel@tonic-gate } 29950Sstevel@tonic-gate if (pri != 0) { 29960Sstevel@tonic-gate int i; 29970Sstevel@tonic-gate qband_t **qbpp; 29980Sstevel@tonic-gate 29990Sstevel@tonic-gate if (pri > q->q_nband) { 30000Sstevel@tonic-gate qbpp = &q->q_bandp; 30010Sstevel@tonic-gate while (*qbpp) 30020Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 30030Sstevel@tonic-gate while (pri > q->q_nband) { 30040Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) { 30050Sstevel@tonic-gate error = EAGAIN; 30060Sstevel@tonic-gate goto done; 30070Sstevel@tonic-gate } 30080Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat; 30090Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat; 30100Sstevel@tonic-gate q->q_nband++; 30110Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 30120Sstevel@tonic-gate } 30130Sstevel@tonic-gate } 30140Sstevel@tonic-gate qbp = q->q_bandp; 30150Sstevel@tonic-gate i = pri; 30160Sstevel@tonic-gate while (--i) 30170Sstevel@tonic-gate qbp = qbp->qb_next; 30180Sstevel@tonic-gate } 30190Sstevel@tonic-gate switch (what) { 30200Sstevel@tonic-gate 30210Sstevel@tonic-gate case QHIWAT: 30220Sstevel@tonic-gate if (qbp) 30230Sstevel@tonic-gate qbp->qb_hiwat = (size_t)val; 30240Sstevel@tonic-gate else 30250Sstevel@tonic-gate q->q_hiwat = (size_t)val; 30260Sstevel@tonic-gate break; 30270Sstevel@tonic-gate 30280Sstevel@tonic-gate case QLOWAT: 30290Sstevel@tonic-gate if (qbp) 30300Sstevel@tonic-gate qbp->qb_lowat = (size_t)val; 30310Sstevel@tonic-gate else 30320Sstevel@tonic-gate q->q_lowat = (size_t)val; 30330Sstevel@tonic-gate break; 30340Sstevel@tonic-gate 30350Sstevel@tonic-gate case QMAXPSZ: 30360Sstevel@tonic-gate if (qbp) 30370Sstevel@tonic-gate error = EINVAL; 30380Sstevel@tonic-gate else 30390Sstevel@tonic-gate q->q_maxpsz = (ssize_t)val; 30400Sstevel@tonic-gate 30410Sstevel@tonic-gate /* 30420Sstevel@tonic-gate * Performance concern, strwrite looks at the module below 30430Sstevel@tonic-gate * the stream head for the maxpsz each time it does a write 30440Sstevel@tonic-gate * we now cache it at the stream head. Check to see if this 30450Sstevel@tonic-gate * queue is sitting directly below the stream head. 30460Sstevel@tonic-gate */ 30470Sstevel@tonic-gate wrq = STREAM(q)->sd_wrq; 30480Sstevel@tonic-gate if (q != wrq->q_next) 30490Sstevel@tonic-gate break; 30500Sstevel@tonic-gate 30510Sstevel@tonic-gate /* 30520Sstevel@tonic-gate * If the stream is not frozen drop the current QLOCK and 30530Sstevel@tonic-gate * acquire the sd_wrq QLOCK which protects sd_qn_* 30540Sstevel@tonic-gate */ 30550Sstevel@tonic-gate if (freezer != curthread) { 30560Sstevel@tonic-gate mutex_exit(QLOCK(q)); 30570Sstevel@tonic-gate mutex_enter(QLOCK(wrq)); 30580Sstevel@tonic-gate } 30590Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(wrq))); 30600Sstevel@tonic-gate 30610Sstevel@tonic-gate if (strmsgsz != 0) { 30620Sstevel@tonic-gate if (val == INFPSZ) 30630Sstevel@tonic-gate val = strmsgsz; 30640Sstevel@tonic-gate else { 30650Sstevel@tonic-gate if (STREAM(q)->sd_vnode->v_type == VFIFO) 30660Sstevel@tonic-gate val = MIN(PIPE_BUF, val); 30670Sstevel@tonic-gate else 30680Sstevel@tonic-gate val = MIN(strmsgsz, val); 30690Sstevel@tonic-gate } 30700Sstevel@tonic-gate } 30710Sstevel@tonic-gate STREAM(q)->sd_qn_maxpsz = val; 30720Sstevel@tonic-gate if (freezer != curthread) { 30730Sstevel@tonic-gate mutex_exit(QLOCK(wrq)); 30740Sstevel@tonic-gate mutex_enter(QLOCK(q)); 30750Sstevel@tonic-gate } 30760Sstevel@tonic-gate break; 30770Sstevel@tonic-gate 30780Sstevel@tonic-gate case QMINPSZ: 30790Sstevel@tonic-gate if (qbp) 30800Sstevel@tonic-gate error = EINVAL; 30810Sstevel@tonic-gate else 30820Sstevel@tonic-gate q->q_minpsz = (ssize_t)val; 30830Sstevel@tonic-gate 30840Sstevel@tonic-gate /* 30850Sstevel@tonic-gate * Performance concern, strwrite looks at the module below 30860Sstevel@tonic-gate * the stream head for the maxpsz each time it does a write 30870Sstevel@tonic-gate * we now cache it at the stream head. Check to see if this 30880Sstevel@tonic-gate * queue is sitting directly below the stream head. 30890Sstevel@tonic-gate */ 30900Sstevel@tonic-gate wrq = STREAM(q)->sd_wrq; 30910Sstevel@tonic-gate if (q != wrq->q_next) 30920Sstevel@tonic-gate break; 30930Sstevel@tonic-gate 30940Sstevel@tonic-gate /* 30950Sstevel@tonic-gate * If the stream is not frozen drop the current QLOCK and 30960Sstevel@tonic-gate * acquire the sd_wrq QLOCK which protects sd_qn_* 30970Sstevel@tonic-gate */ 30980Sstevel@tonic-gate if (freezer != curthread) { 30990Sstevel@tonic-gate mutex_exit(QLOCK(q)); 31000Sstevel@tonic-gate mutex_enter(QLOCK(wrq)); 31010Sstevel@tonic-gate } 31020Sstevel@tonic-gate STREAM(q)->sd_qn_minpsz = (ssize_t)val; 31030Sstevel@tonic-gate 31040Sstevel@tonic-gate if (freezer != curthread) { 31050Sstevel@tonic-gate mutex_exit(QLOCK(wrq)); 31060Sstevel@tonic-gate mutex_enter(QLOCK(q)); 31070Sstevel@tonic-gate } 31080Sstevel@tonic-gate break; 31090Sstevel@tonic-gate 31100Sstevel@tonic-gate case QSTRUIOT: 31110Sstevel@tonic-gate if (qbp) 31120Sstevel@tonic-gate error = EINVAL; 31130Sstevel@tonic-gate else 31140Sstevel@tonic-gate q->q_struiot = (ushort_t)val; 31150Sstevel@tonic-gate break; 31160Sstevel@tonic-gate 31170Sstevel@tonic-gate case QCOUNT: 31180Sstevel@tonic-gate case QFIRST: 31190Sstevel@tonic-gate case QLAST: 31200Sstevel@tonic-gate case QFLAG: 31210Sstevel@tonic-gate error = EPERM; 31220Sstevel@tonic-gate break; 31230Sstevel@tonic-gate 31240Sstevel@tonic-gate default: 31250Sstevel@tonic-gate error = EINVAL; 31260Sstevel@tonic-gate break; 31270Sstevel@tonic-gate } 31280Sstevel@tonic-gate done: 31290Sstevel@tonic-gate if (freezer != curthread) 31300Sstevel@tonic-gate mutex_exit(QLOCK(q)); 31310Sstevel@tonic-gate return (error); 31320Sstevel@tonic-gate } 31330Sstevel@tonic-gate 31340Sstevel@tonic-gate /* 31350Sstevel@tonic-gate * Get queue fields. 31360Sstevel@tonic-gate */ 31370Sstevel@tonic-gate int 31380Sstevel@tonic-gate strqget(queue_t *q, qfields_t what, unsigned char pri, void *valp) 31390Sstevel@tonic-gate { 31400Sstevel@tonic-gate qband_t *qbp = NULL; 31410Sstevel@tonic-gate int error = 0; 31420Sstevel@tonic-gate kthread_id_t freezer; 31430Sstevel@tonic-gate 31440Sstevel@tonic-gate freezer = STREAM(q)->sd_freezer; 31450Sstevel@tonic-gate if (freezer == curthread) { 31460Sstevel@tonic-gate ASSERT(frozenstr(q)); 31470Sstevel@tonic-gate ASSERT(MUTEX_HELD(QLOCK(q))); 31480Sstevel@tonic-gate } else 31490Sstevel@tonic-gate mutex_enter(QLOCK(q)); 31500Sstevel@tonic-gate if (what >= QBAD) { 31510Sstevel@tonic-gate error = EINVAL; 31520Sstevel@tonic-gate goto done; 31530Sstevel@tonic-gate } 31540Sstevel@tonic-gate if (pri != 0) { 31550Sstevel@tonic-gate int i; 31560Sstevel@tonic-gate qband_t **qbpp; 31570Sstevel@tonic-gate 31580Sstevel@tonic-gate if (pri > q->q_nband) { 31590Sstevel@tonic-gate qbpp = &q->q_bandp; 31600Sstevel@tonic-gate while (*qbpp) 31610Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 31620Sstevel@tonic-gate while (pri > q->q_nband) { 31630Sstevel@tonic-gate if ((*qbpp = allocband()) == NULL) { 31640Sstevel@tonic-gate error = EAGAIN; 31650Sstevel@tonic-gate goto done; 31660Sstevel@tonic-gate } 31670Sstevel@tonic-gate (*qbpp)->qb_hiwat = q->q_hiwat; 31680Sstevel@tonic-gate (*qbpp)->qb_lowat = q->q_lowat; 31690Sstevel@tonic-gate q->q_nband++; 31700Sstevel@tonic-gate qbpp = &(*qbpp)->qb_next; 31710Sstevel@tonic-gate } 31720Sstevel@tonic-gate } 31730Sstevel@tonic-gate qbp = q->q_bandp; 31740Sstevel@tonic-gate i = pri; 31750Sstevel@tonic-gate while (--i) 31760Sstevel@tonic-gate qbp = qbp->qb_next; 31770Sstevel@tonic-gate } 31780Sstevel@tonic-gate switch (what) { 31790Sstevel@tonic-gate case QHIWAT: 31800Sstevel@tonic-gate if (qbp) 31810Sstevel@tonic-gate *(size_t *)valp = qbp->qb_hiwat; 31820Sstevel@tonic-gate else 31830Sstevel@tonic-gate *(size_t *)valp = q->q_hiwat; 31840Sstevel@tonic-gate break; 31850Sstevel@tonic-gate 31860Sstevel@tonic-gate case QLOWAT: 31870Sstevel@tonic-gate if (qbp) 31880Sstevel@tonic-gate *(size_t *)valp = qbp->qb_lowat; 31890Sstevel@tonic-gate else 31900Sstevel@tonic-gate *(size_t *)valp = q->q_lowat; 31910Sstevel@tonic-gate break; 31920Sstevel@tonic-gate 31930Sstevel@tonic-gate case QMAXPSZ: 31940Sstevel@tonic-gate if (qbp) 31950Sstevel@tonic-gate error = EINVAL; 31960Sstevel@tonic-gate else 31970Sstevel@tonic-gate *(ssize_t *)valp = q->q_maxpsz; 31980Sstevel@tonic-gate break; 31990Sstevel@tonic-gate 32000Sstevel@tonic-gate case QMINPSZ: 32010Sstevel@tonic-gate if (qbp) 32020Sstevel@tonic-gate error = EINVAL; 32030Sstevel@tonic-gate else 32040Sstevel@tonic-gate *(ssize_t *)valp = q->q_minpsz; 32050Sstevel@tonic-gate break; 32060Sstevel@tonic-gate 32070Sstevel@tonic-gate case QCOUNT: 32080Sstevel@tonic-gate if (qbp) 32090Sstevel@tonic-gate *(size_t *)valp = qbp->qb_count; 32100Sstevel@tonic-gate else 32110Sstevel@tonic-gate *(size_t *)valp = q->q_count; 32120Sstevel@tonic-gate break; 32130Sstevel@tonic-gate 32140Sstevel@tonic-gate case QFIRST: 32150Sstevel@tonic-gate if (qbp) 32160Sstevel@tonic-gate *(mblk_t **)valp = qbp->qb_first; 32170Sstevel@tonic-gate else 32180Sstevel@tonic-gate *(mblk_t **)valp = q->q_first; 32190Sstevel@tonic-gate break; 32200Sstevel@tonic-gate 32210Sstevel@tonic-gate case QLAST: 32220Sstevel@tonic-gate if (qbp) 32230Sstevel@tonic-gate *(mblk_t **)valp = qbp->qb_last; 32240Sstevel@tonic-gate else 32250Sstevel@tonic-gate *(mblk_t **)valp = q->q_last; 32260Sstevel@tonic-gate break; 32270Sstevel@tonic-gate 32280Sstevel@tonic-gate case QFLAG: 32290Sstevel@tonic-gate if (qbp) 32300Sstevel@tonic-gate *(uint_t *)valp = qbp->qb_flag; 32310Sstevel@tonic-gate else 32320Sstevel@tonic-gate *(uint_t *)valp = q->q_flag; 32330Sstevel@tonic-gate break; 32340Sstevel@tonic-gate 32350Sstevel@tonic-gate case QSTRUIOT: 32360Sstevel@tonic-gate if (qbp) 32370Sstevel@tonic-gate error = EINVAL; 32380Sstevel@tonic-gate else 32390Sstevel@tonic-gate *(short *)valp = q->q_struiot; 32400Sstevel@tonic-gate break; 32410Sstevel@tonic-gate 32420Sstevel@tonic-gate default: 32430Sstevel@tonic-gate error = EINVAL; 32440Sstevel@tonic-gate break; 32450Sstevel@tonic-gate } 32460Sstevel@tonic-gate done: 32470Sstevel@tonic-gate if (freezer != curthread) 32480Sstevel@tonic-gate mutex_exit(QLOCK(q)); 32490Sstevel@tonic-gate return (error); 32500Sstevel@tonic-gate } 32510Sstevel@tonic-gate 32520Sstevel@tonic-gate /* 32530Sstevel@tonic-gate * Function awakes all in cvwait/sigwait/pollwait, on one of: 32540Sstevel@tonic-gate * QWANTWSYNC or QWANTR or QWANTW, 32550Sstevel@tonic-gate * 32560Sstevel@tonic-gate * Note: for QWANTWSYNC/QWANTW and QWANTR, if no WSLEEPer or RSLEEPer then a 32570Sstevel@tonic-gate * deferred wakeup will be done. Also if strpoll() in progress then a 32580Sstevel@tonic-gate * deferred pollwakeup will be done. 32590Sstevel@tonic-gate */ 32600Sstevel@tonic-gate void 32610Sstevel@tonic-gate strwakeq(queue_t *q, int flag) 32620Sstevel@tonic-gate { 32630Sstevel@tonic-gate stdata_t *stp = STREAM(q); 32640Sstevel@tonic-gate pollhead_t *pl; 32650Sstevel@tonic-gate 32660Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 32670Sstevel@tonic-gate pl = &stp->sd_pollist; 32680Sstevel@tonic-gate if (flag & QWANTWSYNC) { 32690Sstevel@tonic-gate ASSERT(!(q->q_flag & QREADR)); 32700Sstevel@tonic-gate if (stp->sd_flag & WSLEEP) { 32710Sstevel@tonic-gate stp->sd_flag &= ~WSLEEP; 32720Sstevel@tonic-gate cv_broadcast(&stp->sd_wrq->q_wait); 32730Sstevel@tonic-gate } else { 32740Sstevel@tonic-gate stp->sd_wakeq |= WSLEEP; 32750Sstevel@tonic-gate } 32760Sstevel@tonic-gate 32770Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 32780Sstevel@tonic-gate pollwakeup(pl, POLLWRNORM); 32790Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 32800Sstevel@tonic-gate 32810Sstevel@tonic-gate if (stp->sd_sigflags & S_WRNORM) 32820Sstevel@tonic-gate strsendsig(stp->sd_siglist, S_WRNORM, 0, 0); 32830Sstevel@tonic-gate } else if (flag & QWANTR) { 32840Sstevel@tonic-gate if (stp->sd_flag & RSLEEP) { 32850Sstevel@tonic-gate stp->sd_flag &= ~RSLEEP; 32860Sstevel@tonic-gate cv_broadcast(&_RD(stp->sd_wrq)->q_wait); 32870Sstevel@tonic-gate } else { 32880Sstevel@tonic-gate stp->sd_wakeq |= RSLEEP; 32890Sstevel@tonic-gate } 32900Sstevel@tonic-gate 32910Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 32920Sstevel@tonic-gate pollwakeup(pl, POLLIN | POLLRDNORM); 32930Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 32940Sstevel@tonic-gate 32950Sstevel@tonic-gate { 32960Sstevel@tonic-gate int events = stp->sd_sigflags & (S_INPUT | S_RDNORM); 32970Sstevel@tonic-gate 32980Sstevel@tonic-gate if (events) 32990Sstevel@tonic-gate strsendsig(stp->sd_siglist, events, 0, 0); 33000Sstevel@tonic-gate } 33010Sstevel@tonic-gate } else { 33020Sstevel@tonic-gate if (stp->sd_flag & WSLEEP) { 33030Sstevel@tonic-gate stp->sd_flag &= ~WSLEEP; 33040Sstevel@tonic-gate cv_broadcast(&stp->sd_wrq->q_wait); 33050Sstevel@tonic-gate } 33060Sstevel@tonic-gate 33070Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 33080Sstevel@tonic-gate pollwakeup(pl, POLLWRNORM); 33090Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 33100Sstevel@tonic-gate 33110Sstevel@tonic-gate if (stp->sd_sigflags & S_WRNORM) 33120Sstevel@tonic-gate strsendsig(stp->sd_siglist, S_WRNORM, 0, 0); 33130Sstevel@tonic-gate } 33140Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 33150Sstevel@tonic-gate } 33160Sstevel@tonic-gate 33170Sstevel@tonic-gate int 33180Sstevel@tonic-gate struioget(queue_t *q, mblk_t *mp, struiod_t *dp, int noblock) 33190Sstevel@tonic-gate { 33200Sstevel@tonic-gate stdata_t *stp = STREAM(q); 33210Sstevel@tonic-gate int typ = STRUIOT_STANDARD; 33220Sstevel@tonic-gate uio_t *uiop = &dp->d_uio; 33230Sstevel@tonic-gate dblk_t *dbp; 33240Sstevel@tonic-gate ssize_t uiocnt; 33250Sstevel@tonic-gate ssize_t cnt; 33260Sstevel@tonic-gate unsigned char *ptr; 33270Sstevel@tonic-gate ssize_t resid; 33280Sstevel@tonic-gate int error = 0; 33290Sstevel@tonic-gate on_trap_data_t otd; 33300Sstevel@tonic-gate queue_t *stwrq; 33310Sstevel@tonic-gate 33320Sstevel@tonic-gate /* 33330Sstevel@tonic-gate * Plumbing may change while taking the type so store the 33340Sstevel@tonic-gate * queue in a temporary variable. It doesn't matter even 33350Sstevel@tonic-gate * if the we take the type from the previous plumbing, 33360Sstevel@tonic-gate * that's because if the plumbing has changed when we were 33370Sstevel@tonic-gate * holding the queue in a temporary variable, we can continue 33380Sstevel@tonic-gate * processing the message the way it would have been processed 33390Sstevel@tonic-gate * in the old plumbing, without any side effects but a bit 33400Sstevel@tonic-gate * extra processing for partial ip header checksum. 33410Sstevel@tonic-gate * 33420Sstevel@tonic-gate * This has been done to avoid holding the sd_lock which is 33430Sstevel@tonic-gate * very hot. 33440Sstevel@tonic-gate */ 33450Sstevel@tonic-gate 33460Sstevel@tonic-gate stwrq = stp->sd_struiowrq; 33470Sstevel@tonic-gate if (stwrq) 33480Sstevel@tonic-gate typ = stwrq->q_struiot; 33490Sstevel@tonic-gate 33500Sstevel@tonic-gate for (; (resid = uiop->uio_resid) > 0 && mp; mp = mp->b_cont) { 33510Sstevel@tonic-gate dbp = mp->b_datap; 33520Sstevel@tonic-gate ptr = (uchar_t *)(mp->b_rptr + dbp->db_cksumstuff); 33530Sstevel@tonic-gate uiocnt = dbp->db_cksumend - dbp->db_cksumstuff; 33540Sstevel@tonic-gate cnt = MIN(uiocnt, uiop->uio_resid); 33550Sstevel@tonic-gate if (!(dbp->db_struioflag & STRUIO_SPEC) || 33560Sstevel@tonic-gate (dbp->db_struioflag & STRUIO_DONE) || cnt == 0) { 33570Sstevel@tonic-gate /* 33580Sstevel@tonic-gate * Either this mblk has already been processed 33590Sstevel@tonic-gate * or there is no more room in this mblk (?). 33600Sstevel@tonic-gate */ 33610Sstevel@tonic-gate continue; 33620Sstevel@tonic-gate } 33630Sstevel@tonic-gate switch (typ) { 33640Sstevel@tonic-gate case STRUIOT_STANDARD: 33650Sstevel@tonic-gate if (noblock) { 33660Sstevel@tonic-gate if (on_trap(&otd, OT_DATA_ACCESS)) { 33670Sstevel@tonic-gate no_trap(); 33680Sstevel@tonic-gate error = EWOULDBLOCK; 33690Sstevel@tonic-gate goto out; 33700Sstevel@tonic-gate } 33710Sstevel@tonic-gate } 33720Sstevel@tonic-gate if (error = uiomove(ptr, cnt, UIO_WRITE, uiop)) { 33730Sstevel@tonic-gate if (noblock) 33740Sstevel@tonic-gate no_trap(); 33750Sstevel@tonic-gate goto out; 33760Sstevel@tonic-gate } 33770Sstevel@tonic-gate if (noblock) 33780Sstevel@tonic-gate no_trap(); 33790Sstevel@tonic-gate break; 33800Sstevel@tonic-gate 33810Sstevel@tonic-gate default: 33820Sstevel@tonic-gate error = EIO; 33830Sstevel@tonic-gate goto out; 33840Sstevel@tonic-gate } 33850Sstevel@tonic-gate dbp->db_struioflag |= STRUIO_DONE; 33860Sstevel@tonic-gate dbp->db_cksumstuff += cnt; 33870Sstevel@tonic-gate } 33880Sstevel@tonic-gate out: 33890Sstevel@tonic-gate if (error == EWOULDBLOCK && (resid -= uiop->uio_resid) > 0) { 33900Sstevel@tonic-gate /* 33910Sstevel@tonic-gate * A fault has occured and some bytes were moved to the 33920Sstevel@tonic-gate * current mblk, the uio_t has already been updated by 33930Sstevel@tonic-gate * the appropriate uio routine, so also update the mblk 33940Sstevel@tonic-gate * to reflect this in case this same mblk chain is used 33950Sstevel@tonic-gate * again (after the fault has been handled). 33960Sstevel@tonic-gate */ 33970Sstevel@tonic-gate uiocnt = dbp->db_cksumend - dbp->db_cksumstuff; 33980Sstevel@tonic-gate if (uiocnt >= resid) 33990Sstevel@tonic-gate dbp->db_cksumstuff += resid; 34000Sstevel@tonic-gate } 34010Sstevel@tonic-gate return (error); 34020Sstevel@tonic-gate } 34030Sstevel@tonic-gate 34040Sstevel@tonic-gate /* 34050Sstevel@tonic-gate * Try to enter queue synchronously. Any attempt to enter a closing queue will 34060Sstevel@tonic-gate * fails. The qp->q_rwcnt keeps track of the number of successful entries so 34070Sstevel@tonic-gate * that removeq() will not try to close the queue while a thread is inside the 34080Sstevel@tonic-gate * queue. 34090Sstevel@tonic-gate */ 34100Sstevel@tonic-gate static boolean_t 34110Sstevel@tonic-gate rwnext_enter(queue_t *qp) 34120Sstevel@tonic-gate { 34130Sstevel@tonic-gate mutex_enter(QLOCK(qp)); 34140Sstevel@tonic-gate if (qp->q_flag & QWCLOSE) { 34150Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 34160Sstevel@tonic-gate return (B_FALSE); 34170Sstevel@tonic-gate } 34180Sstevel@tonic-gate qp->q_rwcnt++; 34190Sstevel@tonic-gate ASSERT(qp->q_rwcnt != 0); 34200Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 34210Sstevel@tonic-gate return (B_TRUE); 34220Sstevel@tonic-gate } 34230Sstevel@tonic-gate 34240Sstevel@tonic-gate /* 34250Sstevel@tonic-gate * Decrease the count of threads running in sync stream queue and wake up any 34260Sstevel@tonic-gate * threads blocked in removeq(). 34270Sstevel@tonic-gate */ 34280Sstevel@tonic-gate static void 34290Sstevel@tonic-gate rwnext_exit(queue_t *qp) 34300Sstevel@tonic-gate { 34310Sstevel@tonic-gate mutex_enter(QLOCK(qp)); 34320Sstevel@tonic-gate qp->q_rwcnt--; 34330Sstevel@tonic-gate if (qp->q_flag & QWANTRMQSYNC) { 34340Sstevel@tonic-gate qp->q_flag &= ~QWANTRMQSYNC; 34350Sstevel@tonic-gate cv_broadcast(&qp->q_wait); 34360Sstevel@tonic-gate } 34370Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 34380Sstevel@tonic-gate } 34390Sstevel@tonic-gate 34400Sstevel@tonic-gate /* 34410Sstevel@tonic-gate * The purpose of rwnext() is to call the rw procedure of the next 34420Sstevel@tonic-gate * (downstream) modules queue. 34430Sstevel@tonic-gate * 34440Sstevel@tonic-gate * treated as put entrypoint for perimeter syncronization. 34450Sstevel@tonic-gate * 34460Sstevel@tonic-gate * There's no need to grab sq_putlocks here (which only exist for CIPUT 34470Sstevel@tonic-gate * sync queues). If it is CIPUT sync queue sq_count is incremented and it does 34480Sstevel@tonic-gate * not matter if any regular put entrypoints have been already entered. We 34490Sstevel@tonic-gate * can't increment one of the sq_putcounts (instead of sq_count) because 34500Sstevel@tonic-gate * qwait_rw won't know which counter to decrement. 34510Sstevel@tonic-gate * 34520Sstevel@tonic-gate * It would be reasonable to add the lockless FASTPUT logic. 34530Sstevel@tonic-gate */ 34540Sstevel@tonic-gate int 34550Sstevel@tonic-gate rwnext(queue_t *qp, struiod_t *dp) 34560Sstevel@tonic-gate { 34570Sstevel@tonic-gate queue_t *nqp; 34580Sstevel@tonic-gate syncq_t *sq; 34590Sstevel@tonic-gate uint16_t count; 34600Sstevel@tonic-gate uint16_t flags; 34610Sstevel@tonic-gate struct qinit *qi; 34620Sstevel@tonic-gate int (*proc)(); 34630Sstevel@tonic-gate struct stdata *stp; 34640Sstevel@tonic-gate int isread; 34650Sstevel@tonic-gate int rval; 34660Sstevel@tonic-gate 34670Sstevel@tonic-gate stp = STREAM(qp); 34680Sstevel@tonic-gate /* 34690Sstevel@tonic-gate * Prevent q_next from changing by holding sd_lock until acquiring 34700Sstevel@tonic-gate * SQLOCK. Note that a read-side rwnext from the streamhead will 34710Sstevel@tonic-gate * already have sd_lock acquired. In either case sd_lock is always 34720Sstevel@tonic-gate * released after acquiring SQLOCK. 34730Sstevel@tonic-gate * 34740Sstevel@tonic-gate * The streamhead read-side holding sd_lock when calling rwnext is 34750Sstevel@tonic-gate * required to prevent a race condition were M_DATA mblks flowing 34760Sstevel@tonic-gate * up the read-side of the stream could be bypassed by a rwnext() 34770Sstevel@tonic-gate * down-call. In this case sd_lock acts as the streamhead perimeter. 34780Sstevel@tonic-gate */ 34790Sstevel@tonic-gate if ((nqp = _WR(qp)) == qp) { 34800Sstevel@tonic-gate isread = 0; 34810Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 34820Sstevel@tonic-gate qp = nqp->q_next; 34830Sstevel@tonic-gate } else { 34840Sstevel@tonic-gate isread = 1; 34850Sstevel@tonic-gate if (nqp != stp->sd_wrq) 34860Sstevel@tonic-gate /* Not streamhead */ 34870Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 34880Sstevel@tonic-gate qp = _RD(nqp->q_next); 34890Sstevel@tonic-gate } 34900Sstevel@tonic-gate qi = qp->q_qinfo; 34910Sstevel@tonic-gate if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_rwp)) { 34920Sstevel@tonic-gate /* 34930Sstevel@tonic-gate * Not a synchronous module or no r/w procedure for this 34940Sstevel@tonic-gate * queue, so just return EINVAL and let the caller handle it. 34950Sstevel@tonic-gate */ 34960Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 34970Sstevel@tonic-gate return (EINVAL); 34980Sstevel@tonic-gate } 34990Sstevel@tonic-gate 35000Sstevel@tonic-gate if (rwnext_enter(qp) == B_FALSE) { 35010Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 35020Sstevel@tonic-gate return (EINVAL); 35030Sstevel@tonic-gate } 35040Sstevel@tonic-gate 35050Sstevel@tonic-gate sq = qp->q_syncq; 35060Sstevel@tonic-gate mutex_enter(SQLOCK(sq)); 35070Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 35080Sstevel@tonic-gate count = sq->sq_count; 35090Sstevel@tonic-gate flags = sq->sq_flags; 35100Sstevel@tonic-gate ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT)); 35110Sstevel@tonic-gate 35120Sstevel@tonic-gate while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) { 35130Sstevel@tonic-gate /* 35140Sstevel@tonic-gate * if this queue is being closed, return. 35150Sstevel@tonic-gate */ 35160Sstevel@tonic-gate if (qp->q_flag & QWCLOSE) { 35170Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 35180Sstevel@tonic-gate rwnext_exit(qp); 35190Sstevel@tonic-gate return (EINVAL); 35200Sstevel@tonic-gate } 35210Sstevel@tonic-gate 35220Sstevel@tonic-gate /* 35230Sstevel@tonic-gate * Wait until we can enter the inner perimeter. 35240Sstevel@tonic-gate */ 35250Sstevel@tonic-gate sq->sq_flags = flags | SQ_WANTWAKEUP; 35260Sstevel@tonic-gate cv_wait(&sq->sq_wait, SQLOCK(sq)); 35270Sstevel@tonic-gate count = sq->sq_count; 35280Sstevel@tonic-gate flags = sq->sq_flags; 35290Sstevel@tonic-gate } 35300Sstevel@tonic-gate 35310Sstevel@tonic-gate if (isread == 0 && stp->sd_struiowrq == NULL || 35320Sstevel@tonic-gate isread == 1 && stp->sd_struiordq == NULL) { 35330Sstevel@tonic-gate /* 35340Sstevel@tonic-gate * Stream plumbing changed while waiting for inner perimeter 35350Sstevel@tonic-gate * so just return EINVAL and let the caller handle it. 35360Sstevel@tonic-gate */ 35370Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 35380Sstevel@tonic-gate rwnext_exit(qp); 35390Sstevel@tonic-gate return (EINVAL); 35400Sstevel@tonic-gate } 35410Sstevel@tonic-gate if (!(flags & SQ_CIPUT)) 35420Sstevel@tonic-gate sq->sq_flags = flags | SQ_EXCL; 35430Sstevel@tonic-gate sq->sq_count = count + 1; 35440Sstevel@tonic-gate ASSERT(sq->sq_count != 0); /* Wraparound */ 35450Sstevel@tonic-gate /* 35460Sstevel@tonic-gate * Note: The only message ordering guarantee that rwnext() makes is 35470Sstevel@tonic-gate * for the write queue flow-control case. All others (r/w queue 35480Sstevel@tonic-gate * with q_count > 0 (or q_first != 0)) are the resposibilty of 35490Sstevel@tonic-gate * the queue's rw procedure. This could be genralized here buy 35500Sstevel@tonic-gate * running the queue's service procedure, but that wouldn't be 35510Sstevel@tonic-gate * the most efficent for all cases. 35520Sstevel@tonic-gate */ 35530Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 35540Sstevel@tonic-gate if (! isread && (qp->q_flag & QFULL)) { 35550Sstevel@tonic-gate /* 35560Sstevel@tonic-gate * Write queue may be flow controlled. If so, 35570Sstevel@tonic-gate * mark the queue for wakeup when it's not. 35580Sstevel@tonic-gate */ 35590Sstevel@tonic-gate mutex_enter(QLOCK(qp)); 35600Sstevel@tonic-gate if (qp->q_flag & QFULL) { 35610Sstevel@tonic-gate qp->q_flag |= QWANTWSYNC; 35620Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 35630Sstevel@tonic-gate rval = EWOULDBLOCK; 35640Sstevel@tonic-gate goto out; 35650Sstevel@tonic-gate } 35660Sstevel@tonic-gate mutex_exit(QLOCK(qp)); 35670Sstevel@tonic-gate } 35680Sstevel@tonic-gate 35690Sstevel@tonic-gate if (! isread && dp->d_mp) 35700Sstevel@tonic-gate STR_FTEVENT_MSG(dp->d_mp, nqp, FTEV_RWNEXT, dp->d_mp->b_rptr - 35710Sstevel@tonic-gate dp->d_mp->b_datap->db_base); 35720Sstevel@tonic-gate 35730Sstevel@tonic-gate rval = (*proc)(qp, dp); 35740Sstevel@tonic-gate 35750Sstevel@tonic-gate if (isread && dp->d_mp) 35760Sstevel@tonic-gate STR_FTEVENT_MSG(dp->d_mp, _RD(nqp), FTEV_RWNEXT, 35770Sstevel@tonic-gate dp->d_mp->b_rptr - dp->d_mp->b_datap->db_base); 35780Sstevel@tonic-gate out: 35790Sstevel@tonic-gate /* 35800Sstevel@tonic-gate * The queue is protected from being freed by sq_count, so it is 35810Sstevel@tonic-gate * safe to call rwnext_exit and reacquire SQLOCK(sq). 35820Sstevel@tonic-gate */ 35830Sstevel@tonic-gate rwnext_exit(qp); 35840Sstevel@tonic-gate 35850Sstevel@tonic-gate mutex_enter(SQLOCK(sq)); 35860Sstevel@tonic-gate flags = sq->sq_flags; 35870Sstevel@tonic-gate ASSERT(sq->sq_count != 0); 35880Sstevel@tonic-gate sq->sq_count--; 35890Sstevel@tonic-gate if (flags & SQ_TAIL) { 35900Sstevel@tonic-gate putnext_tail(sq, qp, flags); 35910Sstevel@tonic-gate /* 35920Sstevel@tonic-gate * The only purpose of this ASSERT is to preserve calling stack 35930Sstevel@tonic-gate * in DEBUG kernel. 35940Sstevel@tonic-gate */ 35950Sstevel@tonic-gate ASSERT(flags & SQ_TAIL); 35960Sstevel@tonic-gate return (rval); 35970Sstevel@tonic-gate } 35980Sstevel@tonic-gate ASSERT(flags & (SQ_EXCL|SQ_CIPUT)); 35990Sstevel@tonic-gate /* 36000Sstevel@tonic-gate * Safe to always drop SQ_EXCL: 36010Sstevel@tonic-gate * Not SQ_CIPUT means we set SQ_EXCL above 36020Sstevel@tonic-gate * For SQ_CIPUT SQ_EXCL will only be set if the put procedure 36030Sstevel@tonic-gate * did a qwriter(INNER) in which case nobody else 36040Sstevel@tonic-gate * is in the inner perimeter and we are exiting. 36050Sstevel@tonic-gate * 36060Sstevel@tonic-gate * I would like to make the following assertion: 36070Sstevel@tonic-gate * 36080Sstevel@tonic-gate * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) || 36090Sstevel@tonic-gate * sq->sq_count == 0); 36100Sstevel@tonic-gate * 36110Sstevel@tonic-gate * which indicates that if we are both putshared and exclusive, 36120Sstevel@tonic-gate * we became exclusive while executing the putproc, and the only 36130Sstevel@tonic-gate * claim on the syncq was the one we dropped a few lines above. 36140Sstevel@tonic-gate * But other threads that enter putnext while the syncq is exclusive 36150Sstevel@tonic-gate * need to make a claim as they may need to drop SQLOCK in the 36160Sstevel@tonic-gate * has_writers case to avoid deadlocks. If these threads are 36170Sstevel@tonic-gate * delayed or preempted, it is possible that the writer thread can 36180Sstevel@tonic-gate * find out that there are other claims making the (sq_count == 0) 36190Sstevel@tonic-gate * test invalid. 36200Sstevel@tonic-gate */ 36210Sstevel@tonic-gate 36220Sstevel@tonic-gate sq->sq_flags = flags & ~SQ_EXCL; 36230Sstevel@tonic-gate if (sq->sq_flags & SQ_WANTWAKEUP) { 36240Sstevel@tonic-gate sq->sq_flags &= ~SQ_WANTWAKEUP; 36250Sstevel@tonic-gate cv_broadcast(&sq->sq_wait); 36260Sstevel@tonic-gate } 36270Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 36280Sstevel@tonic-gate return (rval); 36290Sstevel@tonic-gate } 36300Sstevel@tonic-gate 36310Sstevel@tonic-gate /* 36320Sstevel@tonic-gate * The purpose of infonext() is to call the info procedure of the next 36330Sstevel@tonic-gate * (downstream) modules queue. 36340Sstevel@tonic-gate * 36350Sstevel@tonic-gate * treated as put entrypoint for perimeter syncronization. 36360Sstevel@tonic-gate * 36370Sstevel@tonic-gate * There's no need to grab sq_putlocks here (which only exist for CIPUT 36380Sstevel@tonic-gate * sync queues). If it is CIPUT sync queue regular sq_count is incremented and 36390Sstevel@tonic-gate * it does not matter if any regular put entrypoints have been already 36400Sstevel@tonic-gate * entered. 36410Sstevel@tonic-gate */ 36420Sstevel@tonic-gate int 36430Sstevel@tonic-gate infonext(queue_t *qp, infod_t *idp) 36440Sstevel@tonic-gate { 36450Sstevel@tonic-gate queue_t *nqp; 36460Sstevel@tonic-gate syncq_t *sq; 36470Sstevel@tonic-gate uint16_t count; 36480Sstevel@tonic-gate uint16_t flags; 36490Sstevel@tonic-gate struct qinit *qi; 36500Sstevel@tonic-gate int (*proc)(); 36510Sstevel@tonic-gate struct stdata *stp; 36520Sstevel@tonic-gate int rval; 36530Sstevel@tonic-gate 36540Sstevel@tonic-gate stp = STREAM(qp); 36550Sstevel@tonic-gate /* 36560Sstevel@tonic-gate * Prevent q_next from changing by holding sd_lock until 36570Sstevel@tonic-gate * acquiring SQLOCK. 36580Sstevel@tonic-gate */ 36590Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 36600Sstevel@tonic-gate if ((nqp = _WR(qp)) == qp) { 36610Sstevel@tonic-gate qp = nqp->q_next; 36620Sstevel@tonic-gate } else { 36630Sstevel@tonic-gate qp = _RD(nqp->q_next); 36640Sstevel@tonic-gate } 36650Sstevel@tonic-gate qi = qp->q_qinfo; 36660Sstevel@tonic-gate if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_infop)) { 36670Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 36680Sstevel@tonic-gate return (EINVAL); 36690Sstevel@tonic-gate } 36700Sstevel@tonic-gate sq = qp->q_syncq; 36710Sstevel@tonic-gate mutex_enter(SQLOCK(sq)); 36720Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 36730Sstevel@tonic-gate count = sq->sq_count; 36740Sstevel@tonic-gate flags = sq->sq_flags; 36750Sstevel@tonic-gate ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT)); 36760Sstevel@tonic-gate 36770Sstevel@tonic-gate while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) { 36780Sstevel@tonic-gate /* 36790Sstevel@tonic-gate * Wait until we can enter the inner perimeter. 36800Sstevel@tonic-gate */ 36810Sstevel@tonic-gate sq->sq_flags = flags | SQ_WANTWAKEUP; 36820Sstevel@tonic-gate cv_wait(&sq->sq_wait, SQLOCK(sq)); 36830Sstevel@tonic-gate count = sq->sq_count; 36840Sstevel@tonic-gate flags = sq->sq_flags; 36850Sstevel@tonic-gate } 36860Sstevel@tonic-gate 36870Sstevel@tonic-gate if (! (flags & SQ_CIPUT)) 36880Sstevel@tonic-gate sq->sq_flags = flags | SQ_EXCL; 36890Sstevel@tonic-gate sq->sq_count = count + 1; 36900Sstevel@tonic-gate ASSERT(sq->sq_count != 0); /* Wraparound */ 36910Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 36920Sstevel@tonic-gate 36930Sstevel@tonic-gate rval = (*proc)(qp, idp); 36940Sstevel@tonic-gate 36950Sstevel@tonic-gate mutex_enter(SQLOCK(sq)); 36960Sstevel@tonic-gate flags = sq->sq_flags; 36970Sstevel@tonic-gate ASSERT(sq->sq_count != 0); 36980Sstevel@tonic-gate sq->sq_count--; 36990Sstevel@tonic-gate if (flags & SQ_TAIL) { 37000Sstevel@tonic-gate putnext_tail(sq, qp, flags); 37010Sstevel@tonic-gate /* 37020Sstevel@tonic-gate * The only purpose of this ASSERT is to preserve calling stack 37030Sstevel@tonic-gate * in DEBUG kernel. 37040Sstevel@tonic-gate */ 37050Sstevel@tonic-gate ASSERT(flags & SQ_TAIL); 37060Sstevel@tonic-gate return (rval); 37070Sstevel@tonic-gate } 37080Sstevel@tonic-gate ASSERT(flags & (SQ_EXCL|SQ_CIPUT)); 37090Sstevel@tonic-gate /* 37100Sstevel@tonic-gate * XXXX 37110Sstevel@tonic-gate * I am not certain the next comment is correct here. I need to consider 37120Sstevel@tonic-gate * why the infonext is called, and if dropping SQ_EXCL unless non-CIPUT 37130Sstevel@tonic-gate * might cause other problems. It just might be safer to drop it if 37140Sstevel@tonic-gate * !SQ_CIPUT because that is when we set it. 37150Sstevel@tonic-gate */ 37160Sstevel@tonic-gate /* 37170Sstevel@tonic-gate * Safe to always drop SQ_EXCL: 37180Sstevel@tonic-gate * Not SQ_CIPUT means we set SQ_EXCL above 37190Sstevel@tonic-gate * For SQ_CIPUT SQ_EXCL will only be set if the put procedure 37200Sstevel@tonic-gate * did a qwriter(INNER) in which case nobody else 37210Sstevel@tonic-gate * is in the inner perimeter and we are exiting. 37220Sstevel@tonic-gate * 37230Sstevel@tonic-gate * I would like to make the following assertion: 37240Sstevel@tonic-gate * 37250Sstevel@tonic-gate * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) || 37260Sstevel@tonic-gate * sq->sq_count == 0); 37270Sstevel@tonic-gate * 37280Sstevel@tonic-gate * which indicates that if we are both putshared and exclusive, 37290Sstevel@tonic-gate * we became exclusive while executing the putproc, and the only 37300Sstevel@tonic-gate * claim on the syncq was the one we dropped a few lines above. 37310Sstevel@tonic-gate * But other threads that enter putnext while the syncq is exclusive 37320Sstevel@tonic-gate * need to make a claim as they may need to drop SQLOCK in the 37330Sstevel@tonic-gate * has_writers case to avoid deadlocks. If these threads are 37340Sstevel@tonic-gate * delayed or preempted, it is possible that the writer thread can 37350Sstevel@tonic-gate * find out that there are other claims making the (sq_count == 0) 37360Sstevel@tonic-gate * test invalid. 37370Sstevel@tonic-gate */ 37380Sstevel@tonic-gate 37390Sstevel@tonic-gate sq->sq_flags = flags & ~SQ_EXCL; 37400Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 37410Sstevel@tonic-gate return (rval); 37420Sstevel@tonic-gate } 37430Sstevel@tonic-gate 37440Sstevel@tonic-gate /* 37450Sstevel@tonic-gate * Return nonzero if the queue is responsible for struio(), else return 0. 37460Sstevel@tonic-gate */ 37470Sstevel@tonic-gate int 37480Sstevel@tonic-gate isuioq(queue_t *q) 37490Sstevel@tonic-gate { 37500Sstevel@tonic-gate if (q->q_flag & QREADR) 37510Sstevel@tonic-gate return (STREAM(q)->sd_struiordq == q); 37520Sstevel@tonic-gate else 37530Sstevel@tonic-gate return (STREAM(q)->sd_struiowrq == q); 37540Sstevel@tonic-gate } 37550Sstevel@tonic-gate 37560Sstevel@tonic-gate #if defined(__sparc) 37570Sstevel@tonic-gate int disable_putlocks = 0; 37580Sstevel@tonic-gate #else 37590Sstevel@tonic-gate int disable_putlocks = 1; 37600Sstevel@tonic-gate #endif 37610Sstevel@tonic-gate 37620Sstevel@tonic-gate /* 37630Sstevel@tonic-gate * called by create_putlock. 37640Sstevel@tonic-gate */ 37650Sstevel@tonic-gate static void 37660Sstevel@tonic-gate create_syncq_putlocks(queue_t *q) 37670Sstevel@tonic-gate { 37680Sstevel@tonic-gate syncq_t *sq = q->q_syncq; 37690Sstevel@tonic-gate ciputctrl_t *cip; 37700Sstevel@tonic-gate int i; 37710Sstevel@tonic-gate 37720Sstevel@tonic-gate ASSERT(sq != NULL); 37730Sstevel@tonic-gate 37740Sstevel@tonic-gate ASSERT(disable_putlocks == 0); 37750Sstevel@tonic-gate ASSERT(n_ciputctrl >= min_n_ciputctrl); 37760Sstevel@tonic-gate ASSERT(ciputctrl_cache != NULL); 37770Sstevel@tonic-gate 37780Sstevel@tonic-gate if (!(sq->sq_type & SQ_CIPUT)) 37790Sstevel@tonic-gate return; 37800Sstevel@tonic-gate 37810Sstevel@tonic-gate for (i = 0; i <= 1; i++) { 37820Sstevel@tonic-gate if (sq->sq_ciputctrl == NULL) { 37830Sstevel@tonic-gate cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP); 37840Sstevel@tonic-gate SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0); 37850Sstevel@tonic-gate mutex_enter(SQLOCK(sq)); 37860Sstevel@tonic-gate if (sq->sq_ciputctrl != NULL) { 37870Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 37880Sstevel@tonic-gate kmem_cache_free(ciputctrl_cache, cip); 37890Sstevel@tonic-gate } else { 37900Sstevel@tonic-gate ASSERT(sq->sq_nciputctrl == 0); 37910Sstevel@tonic-gate sq->sq_nciputctrl = n_ciputctrl - 1; 37920Sstevel@tonic-gate /* 37930Sstevel@tonic-gate * putnext checks sq_ciputctrl without holding 37940Sstevel@tonic-gate * SQLOCK. if it is not NULL putnext assumes 37950Sstevel@tonic-gate * sq_nciputctrl is initialized. membar below 37960Sstevel@tonic-gate * insures that. 37970Sstevel@tonic-gate */ 37980Sstevel@tonic-gate membar_producer(); 37990Sstevel@tonic-gate sq->sq_ciputctrl = cip; 38000Sstevel@tonic-gate mutex_exit(SQLOCK(sq)); 38010Sstevel@tonic-gate } 38020Sstevel@tonic-gate } 38030Sstevel@tonic-gate ASSERT(sq->sq_nciputctrl == n_ciputctrl - 1); 38040Sstevel@tonic-gate if (i == 1) 38050Sstevel@tonic-gate break; 38060Sstevel@tonic-gate q = _OTHERQ(q); 38070Sstevel@tonic-gate if (!(q->q_flag & QPERQ)) { 38080Sstevel@tonic-gate ASSERT(sq == q->q_syncq); 38090Sstevel@tonic-gate break; 38100Sstevel@tonic-gate } 38110Sstevel@tonic-gate ASSERT(q->q_syncq != NULL); 38120Sstevel@tonic-gate ASSERT(sq != q->q_syncq); 38130Sstevel@tonic-gate sq = q->q_syncq; 38140Sstevel@tonic-gate ASSERT(sq->sq_type & SQ_CIPUT); 38150Sstevel@tonic-gate } 38160Sstevel@tonic-gate } 38170Sstevel@tonic-gate 38180Sstevel@tonic-gate /* 38190Sstevel@tonic-gate * If stream argument is 0 only create per cpu sq_putlocks/sq_putcounts for 38200Sstevel@tonic-gate * syncq of q. If stream argument is not 0 create per cpu stream_putlocks for 38210Sstevel@tonic-gate * the stream of q and per cpu sq_putlocks/sq_putcounts for all syncq's 38220Sstevel@tonic-gate * starting from q and down to the driver. 38230Sstevel@tonic-gate * 38240Sstevel@tonic-gate * This should be called after the affected queues are part of stream 38250Sstevel@tonic-gate * geometry. It should be called from driver/module open routine after 38260Sstevel@tonic-gate * qprocson() call. It is also called from nfs syscall where it is known that 38270Sstevel@tonic-gate * stream is configured and won't change its geometry during create_putlock 38280Sstevel@tonic-gate * call. 38290Sstevel@tonic-gate * 38300Sstevel@tonic-gate * caller normally uses 0 value for the stream argument to speed up MT putnext 38310Sstevel@tonic-gate * into the perimeter of q for example because its perimeter is per module 38320Sstevel@tonic-gate * (e.g. IP). 38330Sstevel@tonic-gate * 38340Sstevel@tonic-gate * caller normally uses non 0 value for the stream argument to hint the system 38350Sstevel@tonic-gate * that the stream of q is a very contended global system stream 38360Sstevel@tonic-gate * (e.g. NFS/UDP) and the part of the stream from q to the driver is 38370Sstevel@tonic-gate * particularly MT hot. 38380Sstevel@tonic-gate * 38390Sstevel@tonic-gate * Caller insures stream plumbing won't happen while we are here and therefore 38400Sstevel@tonic-gate * q_next can be safely used. 38410Sstevel@tonic-gate */ 38420Sstevel@tonic-gate 38430Sstevel@tonic-gate void 38440Sstevel@tonic-gate create_putlocks(queue_t *q, int stream) 38450Sstevel@tonic-gate { 38460Sstevel@tonic-gate ciputctrl_t *cip; 38470Sstevel@tonic-gate struct stdata *stp = STREAM(q); 38480Sstevel@tonic-gate 38490Sstevel@tonic-gate q = _WR(q); 38500Sstevel@tonic-gate ASSERT(stp != NULL); 38510Sstevel@tonic-gate 38520Sstevel@tonic-gate if (disable_putlocks != 0) 38530Sstevel@tonic-gate return; 38540Sstevel@tonic-gate 38550Sstevel@tonic-gate if (n_ciputctrl < min_n_ciputctrl) 38560Sstevel@tonic-gate return; 38570Sstevel@tonic-gate 38580Sstevel@tonic-gate ASSERT(ciputctrl_cache != NULL); 38590Sstevel@tonic-gate 38600Sstevel@tonic-gate if (stream != 0 && stp->sd_ciputctrl == NULL) { 38610Sstevel@tonic-gate cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP); 38620Sstevel@tonic-gate SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0); 38630Sstevel@tonic-gate mutex_enter(&stp->sd_lock); 38640Sstevel@tonic-gate if (stp->sd_ciputctrl != NULL) { 38650Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 38660Sstevel@tonic-gate kmem_cache_free(ciputctrl_cache, cip); 38670Sstevel@tonic-gate } else { 38680Sstevel@tonic-gate ASSERT(stp->sd_nciputctrl == 0); 38690Sstevel@tonic-gate stp->sd_nciputctrl = n_ciputctrl - 1; 38700Sstevel@tonic-gate /* 38710Sstevel@tonic-gate * putnext checks sd_ciputctrl without holding 38720Sstevel@tonic-gate * sd_lock. if it is not NULL putnext assumes 38730Sstevel@tonic-gate * sd_nciputctrl is initialized. membar below 38740Sstevel@tonic-gate * insures that. 38750Sstevel@tonic-gate */ 38760Sstevel@tonic-gate membar_producer(); 38770Sstevel@tonic-gate stp->sd_ciputctrl = cip; 38780Sstevel@tonic-gate mutex_exit(&stp->sd_lock); 38790Sstevel@tonic-gate } 38800Sstevel@tonic-gate } 38810Sstevel@tonic-gate 38820Sstevel@tonic-gate ASSERT(stream == 0 || stp->sd_nciputctrl == n_ciputctrl - 1); 38830Sstevel@tonic-gate 38840Sstevel@tonic-gate while (_SAMESTR(q)) { 38850Sstevel@tonic-gate create_syncq_putlocks(q); 38860Sstevel@tonic-gate if (stream == 0) 38870Sstevel@tonic-gate return; 38880Sstevel@tonic-gate q = q->q_next; 38890Sstevel@tonic-gate } 38900Sstevel@tonic-gate ASSERT(q != NULL); 38910Sstevel@tonic-gate create_syncq_putlocks(q); 38920Sstevel@tonic-gate } 38930Sstevel@tonic-gate 38940Sstevel@tonic-gate /* 38950Sstevel@tonic-gate * STREAMS Flow Trace - record STREAMS Flow Trace events as an mblk flows 38960Sstevel@tonic-gate * through a stream. 38970Sstevel@tonic-gate * 38980Sstevel@tonic-gate * Data currently record per event is a hrtime stamp, queue address, event 38990Sstevel@tonic-gate * type, and a per type datum. Much of the STREAMS framework is instrumented 39000Sstevel@tonic-gate * for automatic flow tracing (when enabled). Events can be defined and used 39010Sstevel@tonic-gate * by STREAMS modules and drivers. 39020Sstevel@tonic-gate * 39030Sstevel@tonic-gate * Global objects: 39040Sstevel@tonic-gate * 39050Sstevel@tonic-gate * str_ftevent() - Add a flow-trace event to a dblk. 39060Sstevel@tonic-gate * str_ftfree() - Free flow-trace data 39070Sstevel@tonic-gate * 39080Sstevel@tonic-gate * Local objects: 39090Sstevel@tonic-gate * 39100Sstevel@tonic-gate * fthdr_cache - pointer to the kmem cache for trace header. 39110Sstevel@tonic-gate * ftblk_cache - pointer to the kmem cache for trace data blocks. 39120Sstevel@tonic-gate */ 39130Sstevel@tonic-gate 39140Sstevel@tonic-gate int str_ftnever = 1; /* Don't do STREAMS flow tracing */ 39150Sstevel@tonic-gate 39160Sstevel@tonic-gate void 39170Sstevel@tonic-gate str_ftevent(fthdr_t *hp, void *p, ushort_t evnt, ushort_t data) 39180Sstevel@tonic-gate { 39190Sstevel@tonic-gate ftblk_t *bp = hp->tail; 39200Sstevel@tonic-gate ftblk_t *nbp; 39210Sstevel@tonic-gate ftevnt_t *ep; 39220Sstevel@tonic-gate int ix, nix; 39230Sstevel@tonic-gate 39240Sstevel@tonic-gate ASSERT(hp != NULL); 39250Sstevel@tonic-gate 39260Sstevel@tonic-gate for (;;) { 39270Sstevel@tonic-gate if ((ix = bp->ix) == FTBLK_EVNTS) { 39280Sstevel@tonic-gate /* 39290Sstevel@tonic-gate * Tail doesn't have room, so need a new tail. 39300Sstevel@tonic-gate * 39310Sstevel@tonic-gate * To make this MT safe, first, allocate a new 39320Sstevel@tonic-gate * ftblk, and initialize it. To make life a 39330Sstevel@tonic-gate * little easier, reserve the first slot (mostly 39340Sstevel@tonic-gate * by making ix = 1). When we are finished with 39350Sstevel@tonic-gate * the initialization, CAS this pointer to the 39360Sstevel@tonic-gate * tail. If this succeeds, this is the new 39370Sstevel@tonic-gate * "next" block. Otherwise, another thread 39380Sstevel@tonic-gate * got here first, so free the block and start 39390Sstevel@tonic-gate * again. 39400Sstevel@tonic-gate */ 39410Sstevel@tonic-gate if (!(nbp = kmem_cache_alloc(ftblk_cache, 39420Sstevel@tonic-gate KM_NOSLEEP))) { 39430Sstevel@tonic-gate /* no mem, so punt */ 39440Sstevel@tonic-gate str_ftnever++; 39450Sstevel@tonic-gate /* free up all flow data? */ 39460Sstevel@tonic-gate return; 39470Sstevel@tonic-gate } 39480Sstevel@tonic-gate nbp->nxt = NULL; 39490Sstevel@tonic-gate nbp->ix = 1; 39500Sstevel@tonic-gate /* 39510Sstevel@tonic-gate * Just in case there is another thread about 39520Sstevel@tonic-gate * to get the next index, we need to make sure 39530Sstevel@tonic-gate * the value is there for it. 39540Sstevel@tonic-gate */ 39550Sstevel@tonic-gate membar_producer(); 39560Sstevel@tonic-gate if (casptr(&hp->tail, bp, nbp) == bp) { 39570Sstevel@tonic-gate /* CAS was successful */ 39580Sstevel@tonic-gate bp->nxt = nbp; 39590Sstevel@tonic-gate membar_producer(); 39600Sstevel@tonic-gate bp = nbp; 39610Sstevel@tonic-gate ix = 0; 39620Sstevel@tonic-gate goto cas_good; 39630Sstevel@tonic-gate } else { 39640Sstevel@tonic-gate kmem_cache_free(ftblk_cache, nbp); 39650Sstevel@tonic-gate bp = hp->tail; 39660Sstevel@tonic-gate continue; 39670Sstevel@tonic-gate } 39680Sstevel@tonic-gate } 39690Sstevel@tonic-gate nix = ix + 1; 39700Sstevel@tonic-gate if (cas32((uint32_t *)&bp->ix, ix, nix) == ix) { 39710Sstevel@tonic-gate cas_good: 39720Sstevel@tonic-gate if (curthread != hp->thread) { 39730Sstevel@tonic-gate hp->thread = curthread; 39740Sstevel@tonic-gate evnt |= FTEV_CS; 39750Sstevel@tonic-gate } 39760Sstevel@tonic-gate if (CPU->cpu_seqid != hp->cpu_seqid) { 39770Sstevel@tonic-gate hp->cpu_seqid = CPU->cpu_seqid; 39780Sstevel@tonic-gate evnt |= FTEV_PS; 39790Sstevel@tonic-gate } 39800Sstevel@tonic-gate ep = &bp->ev[ix]; 39810Sstevel@tonic-gate break; 39820Sstevel@tonic-gate } 39830Sstevel@tonic-gate } 39840Sstevel@tonic-gate 39850Sstevel@tonic-gate if (evnt & FTEV_QMASK) { 39860Sstevel@tonic-gate queue_t *qp = p; 39870Sstevel@tonic-gate 39880Sstevel@tonic-gate /* 39890Sstevel@tonic-gate * It is possible that the module info is broke 39900Sstevel@tonic-gate * (as is logsubr.c at this comment writing). 39910Sstevel@tonic-gate * Instead of panicing or doing other unmentionables, 39920Sstevel@tonic-gate * we shall put a dummy name as the mid, and continue. 39930Sstevel@tonic-gate */ 39940Sstevel@tonic-gate if (qp->q_qinfo == NULL) 39950Sstevel@tonic-gate ep->mid = "NONAME"; 39960Sstevel@tonic-gate else 39970Sstevel@tonic-gate ep->mid = qp->q_qinfo->qi_minfo->mi_idname; 39980Sstevel@tonic-gate 39990Sstevel@tonic-gate if (!(qp->q_flag & QREADR)) 40000Sstevel@tonic-gate evnt |= FTEV_ISWR; 40010Sstevel@tonic-gate } else { 40020Sstevel@tonic-gate ep->mid = (char *)p; 40030Sstevel@tonic-gate } 40040Sstevel@tonic-gate 40050Sstevel@tonic-gate ep->ts = gethrtime(); 40060Sstevel@tonic-gate ep->evnt = evnt; 40070Sstevel@tonic-gate ep->data = data; 40080Sstevel@tonic-gate hp->hash = (hp->hash << 9) + hp->hash; 40090Sstevel@tonic-gate hp->hash += (evnt << 16) | data; 40100Sstevel@tonic-gate hp->hash += (uintptr_t)ep->mid; 40110Sstevel@tonic-gate } 40120Sstevel@tonic-gate 40130Sstevel@tonic-gate /* 40140Sstevel@tonic-gate * Free flow-trace data. 40150Sstevel@tonic-gate */ 40160Sstevel@tonic-gate void 40170Sstevel@tonic-gate str_ftfree(dblk_t *dbp) 40180Sstevel@tonic-gate { 40190Sstevel@tonic-gate fthdr_t *hp = dbp->db_fthdr; 40200Sstevel@tonic-gate ftblk_t *bp = &hp->first; 40210Sstevel@tonic-gate ftblk_t *nbp; 40220Sstevel@tonic-gate 40230Sstevel@tonic-gate if (bp != hp->tail || bp->ix != 0) { 40240Sstevel@tonic-gate /* 40250Sstevel@tonic-gate * Clear out the hash, have the tail point to itself, and free 40260Sstevel@tonic-gate * any continuation blocks. 40270Sstevel@tonic-gate */ 40280Sstevel@tonic-gate bp = hp->first.nxt; 40290Sstevel@tonic-gate hp->tail = &hp->first; 40300Sstevel@tonic-gate hp->hash = 0; 40310Sstevel@tonic-gate hp->first.nxt = NULL; 40320Sstevel@tonic-gate hp->first.ix = 0; 40330Sstevel@tonic-gate while (bp != NULL) { 40340Sstevel@tonic-gate nbp = bp->nxt; 40350Sstevel@tonic-gate kmem_cache_free(ftblk_cache, bp); 40360Sstevel@tonic-gate bp = nbp; 40370Sstevel@tonic-gate } 40380Sstevel@tonic-gate } 40390Sstevel@tonic-gate kmem_cache_free(fthdr_cache, hp); 40400Sstevel@tonic-gate dbp->db_fthdr = NULL; 40410Sstevel@tonic-gate } 4042