xref: /onnv-gate/usr/src/uts/common/io/multidata.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate /*
30*0Sstevel@tonic-gate  * Multidata, as described in the following papers:
31*0Sstevel@tonic-gate  *
32*0Sstevel@tonic-gate  * Adi Masputra,
33*0Sstevel@tonic-gate  * Multidata V.2: VA-Disjoint Packet Extents Framework Interface
34*0Sstevel@tonic-gate  * Design Specification.  August 2004.
35*0Sstevel@tonic-gate  * Available as http://sac.sfbay/PSARC/2004/594/materials/mmd2.pdf.
36*0Sstevel@tonic-gate  *
37*0Sstevel@tonic-gate  * Adi Masputra,
38*0Sstevel@tonic-gate  * Multidata Interface Design Specification.  Sep 2002.
39*0Sstevel@tonic-gate  * Available as http://sac.sfbay/PSARC/2002/276/materials/mmd.pdf.
40*0Sstevel@tonic-gate  *
41*0Sstevel@tonic-gate  * Adi Masputra, Frank DiMambro, Kacheong Poon,
42*0Sstevel@tonic-gate  * An Efficient Networking Transmit Mechanism for Solaris:
43*0Sstevel@tonic-gate  * Multidata Transmit (MDT).  May 2002.
44*0Sstevel@tonic-gate  * Available as http://sac.sfbay/PSARC/2002/276/materials/mdt.pdf.
45*0Sstevel@tonic-gate  */
46*0Sstevel@tonic-gate 
47*0Sstevel@tonic-gate #include <sys/types.h>
48*0Sstevel@tonic-gate #include <sys/stream.h>
49*0Sstevel@tonic-gate #include <sys/dlpi.h>
50*0Sstevel@tonic-gate #include <sys/stropts.h>
51*0Sstevel@tonic-gate #include <sys/strsun.h>
52*0Sstevel@tonic-gate #include <sys/strlog.h>
53*0Sstevel@tonic-gate #include <sys/strsubr.h>
54*0Sstevel@tonic-gate #include <sys/sysmacros.h>
55*0Sstevel@tonic-gate #include <sys/cmn_err.h>
56*0Sstevel@tonic-gate #include <sys/debug.h>
57*0Sstevel@tonic-gate #include <sys/kmem.h>
58*0Sstevel@tonic-gate #include <sys/atomic.h>
59*0Sstevel@tonic-gate 
60*0Sstevel@tonic-gate #include <sys/multidata.h>
61*0Sstevel@tonic-gate #include <sys/multidata_impl.h>
62*0Sstevel@tonic-gate 
63*0Sstevel@tonic-gate extern mblk_t *desballoc(unsigned char *, size_t, uint_t, frtn_t *);
64*0Sstevel@tonic-gate 
65*0Sstevel@tonic-gate static int mmd_constructor(void *, void *, int);
66*0Sstevel@tonic-gate static void mmd_destructor(void *, void *);
67*0Sstevel@tonic-gate static int pdslab_constructor(void *, void *, int);
68*0Sstevel@tonic-gate static void pdslab_destructor(void *, void *);
69*0Sstevel@tonic-gate static int pattbl_constructor(void *, void *, int);
70*0Sstevel@tonic-gate static void pattbl_destructor(void *, void *);
71*0Sstevel@tonic-gate static void mmd_esballoc_free(caddr_t);
72*0Sstevel@tonic-gate static int mmd_copy_pattbl(patbkt_t *, multidata_t *, pdesc_t *, int);
73*0Sstevel@tonic-gate 
74*0Sstevel@tonic-gate static boolean_t pbuf_ref_valid(multidata_t *, pdescinfo_t *);
75*0Sstevel@tonic-gate #pragma inline(pbuf_ref_valid)
76*0Sstevel@tonic-gate 
77*0Sstevel@tonic-gate static boolean_t pdi_in_range(pdescinfo_t *, pdescinfo_t *);
78*0Sstevel@tonic-gate #pragma inline(pdi_in_range)
79*0Sstevel@tonic-gate 
80*0Sstevel@tonic-gate static pdesc_t *mmd_addpdesc_int(multidata_t *, pdescinfo_t *, int *, int);
81*0Sstevel@tonic-gate #pragma inline(mmd_addpdesc_int)
82*0Sstevel@tonic-gate 
83*0Sstevel@tonic-gate static void mmd_destroy_pattbl(patbkt_t **);
84*0Sstevel@tonic-gate #pragma inline(mmd_destroy_pattbl)
85*0Sstevel@tonic-gate 
86*0Sstevel@tonic-gate static pattr_t *mmd_find_pattr(patbkt_t *, uint_t);
87*0Sstevel@tonic-gate #pragma inline(mmd_find_pattr)
88*0Sstevel@tonic-gate 
89*0Sstevel@tonic-gate static pdesc_t *mmd_destroy_pdesc(multidata_t *, pdesc_t *);
90*0Sstevel@tonic-gate #pragma inline(mmd_destroy_pdesc)
91*0Sstevel@tonic-gate 
92*0Sstevel@tonic-gate static pdesc_t *mmd_getpdesc(multidata_t *, pdesc_t *, pdescinfo_t *, uint_t,
93*0Sstevel@tonic-gate     boolean_t);
94*0Sstevel@tonic-gate #pragma inline(mmd_getpdesc)
95*0Sstevel@tonic-gate 
96*0Sstevel@tonic-gate static struct kmem_cache *mmd_cache;
97*0Sstevel@tonic-gate static struct kmem_cache *pd_slab_cache;
98*0Sstevel@tonic-gate static struct kmem_cache *pattbl_cache;
99*0Sstevel@tonic-gate 
100*0Sstevel@tonic-gate int mmd_debug = 1;
101*0Sstevel@tonic-gate #define	MMD_DEBUG(s)	if (mmd_debug > 0) cmn_err s
102*0Sstevel@tonic-gate 
103*0Sstevel@tonic-gate /*
104*0Sstevel@tonic-gate  * Set to this to true to bypass pdesc bounds checking.
105*0Sstevel@tonic-gate  */
106*0Sstevel@tonic-gate boolean_t mmd_speed_over_safety = B_FALSE;
107*0Sstevel@tonic-gate 
108*0Sstevel@tonic-gate /*
109*0Sstevel@tonic-gate  * Patchable kmem_cache flags.
110*0Sstevel@tonic-gate  */
111*0Sstevel@tonic-gate int mmd_kmem_flags = 0;
112*0Sstevel@tonic-gate int pdslab_kmem_flags = 0;
113*0Sstevel@tonic-gate int pattbl_kmem_flags = 0;
114*0Sstevel@tonic-gate 
115*0Sstevel@tonic-gate /*
116*0Sstevel@tonic-gate  * Alignment (in bytes) of our kmem caches.
117*0Sstevel@tonic-gate  */
118*0Sstevel@tonic-gate #define	MULTIDATA_CACHE_ALIGN	64
119*0Sstevel@tonic-gate 
120*0Sstevel@tonic-gate /*
121*0Sstevel@tonic-gate  * Default number of packet descriptors per descriptor slab.  Making
122*0Sstevel@tonic-gate  * this too small will trigger more descriptor slab allocation; making
123*0Sstevel@tonic-gate  * it too large will create too many unclaimed descriptors.
124*0Sstevel@tonic-gate  */
125*0Sstevel@tonic-gate #define	PDSLAB_SZ	15
126*0Sstevel@tonic-gate uint_t pdslab_sz = PDSLAB_SZ;
127*0Sstevel@tonic-gate 
128*0Sstevel@tonic-gate /*
129*0Sstevel@tonic-gate  * Default attribute hash table size.  It's okay to set this to a small
130*0Sstevel@tonic-gate  * value (even to 1) because there aren't that many attributes currently
131*0Sstevel@tonic-gate  * defined, and because we assume there won't be many attributes associated
132*0Sstevel@tonic-gate  * with a Multidata at a given time.  Increasing the size will reduce
133*0Sstevel@tonic-gate  * attribute search time (given a large number of attributes in a Multidata),
134*0Sstevel@tonic-gate  * and decreasing it will reduce the memory footprints and the overhead
135*0Sstevel@tonic-gate  * associated with managing the table.
136*0Sstevel@tonic-gate  */
137*0Sstevel@tonic-gate #define	PATTBL_SZ	1
138*0Sstevel@tonic-gate uint_t pattbl_sz = PATTBL_SZ;
139*0Sstevel@tonic-gate 
140*0Sstevel@tonic-gate /*
141*0Sstevel@tonic-gate  * Attribute hash key.
142*0Sstevel@tonic-gate  */
143*0Sstevel@tonic-gate #define	PATTBL_HASH(x, sz)	((x) % (sz))
144*0Sstevel@tonic-gate 
145*0Sstevel@tonic-gate /*
146*0Sstevel@tonic-gate  * Structure that precedes each Multidata metadata.
147*0Sstevel@tonic-gate  */
148*0Sstevel@tonic-gate struct mmd_buf_info {
149*0Sstevel@tonic-gate 	frtn_t	frp;		/* free routine */
150*0Sstevel@tonic-gate 	uint_t	buf_len;	/* length of kmem buffer */
151*0Sstevel@tonic-gate };
152*0Sstevel@tonic-gate 
153*0Sstevel@tonic-gate /*
154*0Sstevel@tonic-gate  * The size of each metadata buffer.
155*0Sstevel@tonic-gate  */
156*0Sstevel@tonic-gate #define	MMD_CACHE_SIZE	\
157*0Sstevel@tonic-gate 	(sizeof (struct mmd_buf_info) + sizeof (multidata_t))
158*0Sstevel@tonic-gate 
159*0Sstevel@tonic-gate /*
160*0Sstevel@tonic-gate  * Called during startup in order to create the Multidata kmem caches.
161*0Sstevel@tonic-gate  */
162*0Sstevel@tonic-gate void
163*0Sstevel@tonic-gate mmd_init(void)
164*0Sstevel@tonic-gate {
165*0Sstevel@tonic-gate 	pdslab_sz = MAX(1, pdslab_sz);	/* at least 1 descriptor */
166*0Sstevel@tonic-gate 	pattbl_sz = MAX(1, pattbl_sz);	/* at least 1 bucket */
167*0Sstevel@tonic-gate 
168*0Sstevel@tonic-gate 	mmd_cache = kmem_cache_create("multidata", MMD_CACHE_SIZE,
169*0Sstevel@tonic-gate 	    MULTIDATA_CACHE_ALIGN, mmd_constructor, mmd_destructor,
170*0Sstevel@tonic-gate 	    NULL, NULL, NULL, mmd_kmem_flags);
171*0Sstevel@tonic-gate 
172*0Sstevel@tonic-gate 	pd_slab_cache = kmem_cache_create("multidata_pdslab",
173*0Sstevel@tonic-gate 	    PDESC_SLAB_SIZE(pdslab_sz), MULTIDATA_CACHE_ALIGN,
174*0Sstevel@tonic-gate 	    pdslab_constructor, pdslab_destructor, NULL,
175*0Sstevel@tonic-gate 	    (void *)(uintptr_t)pdslab_sz, NULL, pdslab_kmem_flags);
176*0Sstevel@tonic-gate 
177*0Sstevel@tonic-gate 	pattbl_cache = kmem_cache_create("multidata_pattbl",
178*0Sstevel@tonic-gate 	    sizeof (patbkt_t) * pattbl_sz, MULTIDATA_CACHE_ALIGN,
179*0Sstevel@tonic-gate 	    pattbl_constructor, pattbl_destructor, NULL,
180*0Sstevel@tonic-gate 	    (void *)(uintptr_t)pattbl_sz, NULL, pattbl_kmem_flags);
181*0Sstevel@tonic-gate }
182*0Sstevel@tonic-gate 
183*0Sstevel@tonic-gate /*
184*0Sstevel@tonic-gate  * Create a Multidata message block.
185*0Sstevel@tonic-gate  */
186*0Sstevel@tonic-gate multidata_t *
187*0Sstevel@tonic-gate mmd_alloc(mblk_t *hdr_mp, mblk_t **mmd_mp, int kmflags)
188*0Sstevel@tonic-gate {
189*0Sstevel@tonic-gate 	uchar_t *buf;
190*0Sstevel@tonic-gate 	multidata_t *mmd;
191*0Sstevel@tonic-gate 	uint_t mmd_mplen;
192*0Sstevel@tonic-gate 	struct mmd_buf_info *buf_info;
193*0Sstevel@tonic-gate 
194*0Sstevel@tonic-gate 	ASSERT(hdr_mp != NULL);
195*0Sstevel@tonic-gate 	ASSERT(mmd_mp != NULL);
196*0Sstevel@tonic-gate 
197*0Sstevel@tonic-gate 	/*
198*0Sstevel@tonic-gate 	 * Caller should never pass in a chain of mblks since we
199*0Sstevel@tonic-gate 	 * only care about the first one, hence the assertions.
200*0Sstevel@tonic-gate 	 */
201*0Sstevel@tonic-gate 	ASSERT(hdr_mp->b_cont == NULL);
202*0Sstevel@tonic-gate 
203*0Sstevel@tonic-gate 	if ((buf = kmem_cache_alloc(mmd_cache, kmflags)) == NULL)
204*0Sstevel@tonic-gate 		return (NULL);
205*0Sstevel@tonic-gate 
206*0Sstevel@tonic-gate 	buf_info = (struct mmd_buf_info *)buf;
207*0Sstevel@tonic-gate 	buf_info->frp.free_arg = (caddr_t)buf;
208*0Sstevel@tonic-gate 
209*0Sstevel@tonic-gate 	mmd = (multidata_t *)(buf_info + 1);
210*0Sstevel@tonic-gate 	mmd_mplen = sizeof (*mmd);
211*0Sstevel@tonic-gate 
212*0Sstevel@tonic-gate 	if ((*mmd_mp = desballoc((uchar_t *)mmd, mmd_mplen, BPRI_HI,
213*0Sstevel@tonic-gate 	    &(buf_info->frp))) == NULL) {
214*0Sstevel@tonic-gate 		kmem_cache_free(mmd_cache, buf);
215*0Sstevel@tonic-gate 		return (NULL);
216*0Sstevel@tonic-gate 	}
217*0Sstevel@tonic-gate 
218*0Sstevel@tonic-gate 	DB_TYPE(*mmd_mp) = M_MULTIDATA;
219*0Sstevel@tonic-gate 	(*mmd_mp)->b_wptr += mmd_mplen;
220*0Sstevel@tonic-gate 	mmd->mmd_dp = (*mmd_mp)->b_datap;
221*0Sstevel@tonic-gate 	mmd->mmd_hbuf = hdr_mp;
222*0Sstevel@tonic-gate 
223*0Sstevel@tonic-gate 	return (mmd);
224*0Sstevel@tonic-gate }
225*0Sstevel@tonic-gate 
226*0Sstevel@tonic-gate /*
227*0Sstevel@tonic-gate  * Associate additional payload buffer to the Multidata.
228*0Sstevel@tonic-gate  */
229*0Sstevel@tonic-gate int
230*0Sstevel@tonic-gate mmd_addpldbuf(multidata_t *mmd, mblk_t *pld_mp)
231*0Sstevel@tonic-gate {
232*0Sstevel@tonic-gate 	int i;
233*0Sstevel@tonic-gate 
234*0Sstevel@tonic-gate 	ASSERT(mmd != NULL);
235*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
236*0Sstevel@tonic-gate 	ASSERT(pld_mp != NULL);
237*0Sstevel@tonic-gate 
238*0Sstevel@tonic-gate 	mutex_enter(&mmd->mmd_pd_slab_lock);
239*0Sstevel@tonic-gate 	for (i = 0; i < MULTIDATA_MAX_PBUFS &&
240*0Sstevel@tonic-gate 	    mmd->mmd_pbuf_cnt < MULTIDATA_MAX_PBUFS; i++) {
241*0Sstevel@tonic-gate 		if (mmd->mmd_pbuf[i] == pld_mp) {
242*0Sstevel@tonic-gate 			/* duplicate entry */
243*0Sstevel@tonic-gate 			MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding "
244*0Sstevel@tonic-gate 			    "pld 0x%p to mmd 0x%p since it has been "
245*0Sstevel@tonic-gate 			    "previously added into slot %d (total %d)\n",
246*0Sstevel@tonic-gate 			    (void *)pld_mp, (void *)mmd, i, mmd->mmd_pbuf_cnt));
247*0Sstevel@tonic-gate 			mutex_exit(&mmd->mmd_pd_slab_lock);
248*0Sstevel@tonic-gate 			return (-1);
249*0Sstevel@tonic-gate 		} else if (mmd->mmd_pbuf[i] == NULL) {
250*0Sstevel@tonic-gate 			mmd->mmd_pbuf[i] = pld_mp;
251*0Sstevel@tonic-gate 			mmd->mmd_pbuf_cnt++;
252*0Sstevel@tonic-gate 			mutex_exit(&mmd->mmd_pd_slab_lock);
253*0Sstevel@tonic-gate 			return (i);
254*0Sstevel@tonic-gate 		}
255*0Sstevel@tonic-gate 	}
256*0Sstevel@tonic-gate 
257*0Sstevel@tonic-gate 	/* all slots are taken */
258*0Sstevel@tonic-gate 	MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding pld 0x%p to mmd 0x%p "
259*0Sstevel@tonic-gate 	    "since no slot space is left (total %d max %d)\n", (void *)pld_mp,
260*0Sstevel@tonic-gate 	    (void *)mmd, mmd->mmd_pbuf_cnt, MULTIDATA_MAX_PBUFS));
261*0Sstevel@tonic-gate 	mutex_exit(&mmd->mmd_pd_slab_lock);
262*0Sstevel@tonic-gate 
263*0Sstevel@tonic-gate 	return (-1);
264*0Sstevel@tonic-gate }
265*0Sstevel@tonic-gate 
266*0Sstevel@tonic-gate /*
267*0Sstevel@tonic-gate  * Multidata metadata kmem cache constructor routine.
268*0Sstevel@tonic-gate  */
269*0Sstevel@tonic-gate /* ARGSUSED */
270*0Sstevel@tonic-gate static int
271*0Sstevel@tonic-gate mmd_constructor(void *buf, void *cdrarg, int kmflags)
272*0Sstevel@tonic-gate {
273*0Sstevel@tonic-gate 	struct mmd_buf_info *buf_info;
274*0Sstevel@tonic-gate 	multidata_t *mmd;
275*0Sstevel@tonic-gate 
276*0Sstevel@tonic-gate 	bzero((void *)buf, MMD_CACHE_SIZE);
277*0Sstevel@tonic-gate 
278*0Sstevel@tonic-gate 	buf_info = (struct mmd_buf_info *)buf;
279*0Sstevel@tonic-gate 	buf_info->frp.free_func = mmd_esballoc_free;
280*0Sstevel@tonic-gate 	buf_info->buf_len = MMD_CACHE_SIZE;
281*0Sstevel@tonic-gate 
282*0Sstevel@tonic-gate 	mmd = (multidata_t *)(buf_info + 1);
283*0Sstevel@tonic-gate 	mmd->mmd_magic = MULTIDATA_MAGIC;
284*0Sstevel@tonic-gate 
285*0Sstevel@tonic-gate 	mutex_init(&(mmd->mmd_pd_slab_lock), NULL, MUTEX_DRIVER, NULL);
286*0Sstevel@tonic-gate 	QL_INIT(&(mmd->mmd_pd_slab_q));
287*0Sstevel@tonic-gate 	QL_INIT(&(mmd->mmd_pd_q));
288*0Sstevel@tonic-gate 
289*0Sstevel@tonic-gate 	return (0);
290*0Sstevel@tonic-gate }
291*0Sstevel@tonic-gate 
292*0Sstevel@tonic-gate /*
293*0Sstevel@tonic-gate  * Multidata metadata kmem cache destructor routine.
294*0Sstevel@tonic-gate  */
295*0Sstevel@tonic-gate /* ARGSUSED */
296*0Sstevel@tonic-gate static void
297*0Sstevel@tonic-gate mmd_destructor(void *buf, void *cdrarg)
298*0Sstevel@tonic-gate {
299*0Sstevel@tonic-gate 	multidata_t *mmd;
300*0Sstevel@tonic-gate #ifdef DEBUG
301*0Sstevel@tonic-gate 	int i;
302*0Sstevel@tonic-gate #endif
303*0Sstevel@tonic-gate 
304*0Sstevel@tonic-gate 	mmd = (multidata_t *)((uchar_t *)buf + sizeof (struct mmd_buf_info));
305*0Sstevel@tonic-gate 
306*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
307*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_dp == NULL);
308*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_hbuf == NULL);
309*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_pbuf_cnt == 0);
310*0Sstevel@tonic-gate #ifdef DEBUG
311*0Sstevel@tonic-gate 	for (i = 0; i < MULTIDATA_MAX_PBUFS; i++)
312*0Sstevel@tonic-gate 		ASSERT(mmd->mmd_pbuf[i] == NULL);
313*0Sstevel@tonic-gate #endif
314*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_pattbl == NULL);
315*0Sstevel@tonic-gate 
316*0Sstevel@tonic-gate 	mutex_destroy(&(mmd->mmd_pd_slab_lock));
317*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q));
318*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_slab_cnt == 0);
319*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q));
320*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_pd_cnt == 0);
321*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_hbuf_ref == 0);
322*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_pbuf_ref == 0);
323*0Sstevel@tonic-gate }
324*0Sstevel@tonic-gate 
325*0Sstevel@tonic-gate /*
326*0Sstevel@tonic-gate  * Multidata message block free callback routine.
327*0Sstevel@tonic-gate  */
328*0Sstevel@tonic-gate static void
329*0Sstevel@tonic-gate mmd_esballoc_free(caddr_t buf)
330*0Sstevel@tonic-gate {
331*0Sstevel@tonic-gate 	multidata_t *mmd;
332*0Sstevel@tonic-gate 	pdesc_t *pd;
333*0Sstevel@tonic-gate 	pdesc_slab_t *slab;
334*0Sstevel@tonic-gate 	int i;
335*0Sstevel@tonic-gate 
336*0Sstevel@tonic-gate 	ASSERT(buf != NULL);
337*0Sstevel@tonic-gate 	ASSERT(((struct mmd_buf_info *)buf)->buf_len == MMD_CACHE_SIZE);
338*0Sstevel@tonic-gate 
339*0Sstevel@tonic-gate 	mmd = (multidata_t *)(buf + sizeof (struct mmd_buf_info));
340*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
341*0Sstevel@tonic-gate 
342*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_dp != NULL);
343*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_dp->db_ref == 1);
344*0Sstevel@tonic-gate 
345*0Sstevel@tonic-gate 	/* remove all packet descriptors and private attributes */
346*0Sstevel@tonic-gate 	pd = Q2PD(mmd->mmd_pd_q.ql_next);
347*0Sstevel@tonic-gate 	while (pd != Q2PD(&(mmd->mmd_pd_q)))
348*0Sstevel@tonic-gate 		pd = mmd_destroy_pdesc(mmd, pd);
349*0Sstevel@tonic-gate 
350*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q));
351*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_pd_cnt == 0);
352*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_hbuf_ref == 0);
353*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_pbuf_ref == 0);
354*0Sstevel@tonic-gate 
355*0Sstevel@tonic-gate 	/* remove all global attributes */
356*0Sstevel@tonic-gate 	if (mmd->mmd_pattbl != NULL)
357*0Sstevel@tonic-gate 		mmd_destroy_pattbl(&(mmd->mmd_pattbl));
358*0Sstevel@tonic-gate 
359*0Sstevel@tonic-gate 	/* remove all descriptor slabs */
360*0Sstevel@tonic-gate 	slab = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_next);
361*0Sstevel@tonic-gate 	while (slab != Q2PDSLAB(&(mmd->mmd_pd_slab_q))) {
362*0Sstevel@tonic-gate 		pdesc_slab_t *slab_next = Q2PDSLAB(slab->pds_next);
363*0Sstevel@tonic-gate 
364*0Sstevel@tonic-gate 		remque(&(slab->pds_next));
365*0Sstevel@tonic-gate 		slab->pds_next = NULL;
366*0Sstevel@tonic-gate 		slab->pds_prev = NULL;
367*0Sstevel@tonic-gate 		slab->pds_mmd = NULL;
368*0Sstevel@tonic-gate 		slab->pds_used = 0;
369*0Sstevel@tonic-gate 		kmem_cache_free(pd_slab_cache, slab);
370*0Sstevel@tonic-gate 
371*0Sstevel@tonic-gate 		ASSERT(mmd->mmd_slab_cnt > 0);
372*0Sstevel@tonic-gate 		mmd->mmd_slab_cnt--;
373*0Sstevel@tonic-gate 		slab = slab_next;
374*0Sstevel@tonic-gate 	}
375*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q));
376*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_slab_cnt == 0);
377*0Sstevel@tonic-gate 
378*0Sstevel@tonic-gate 	mmd->mmd_dp = NULL;
379*0Sstevel@tonic-gate 
380*0Sstevel@tonic-gate 	/* finally, free all associated message blocks */
381*0Sstevel@tonic-gate 	if (mmd->mmd_hbuf != NULL) {
382*0Sstevel@tonic-gate 		freeb(mmd->mmd_hbuf);
383*0Sstevel@tonic-gate 		mmd->mmd_hbuf = NULL;
384*0Sstevel@tonic-gate 	}
385*0Sstevel@tonic-gate 
386*0Sstevel@tonic-gate 	for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) {
387*0Sstevel@tonic-gate 		if (mmd->mmd_pbuf[i] != NULL) {
388*0Sstevel@tonic-gate 			freeb(mmd->mmd_pbuf[i]);
389*0Sstevel@tonic-gate 			mmd->mmd_pbuf[i] = NULL;
390*0Sstevel@tonic-gate 			ASSERT(mmd->mmd_pbuf_cnt > 0);
391*0Sstevel@tonic-gate 			mmd->mmd_pbuf_cnt--;
392*0Sstevel@tonic-gate 		}
393*0Sstevel@tonic-gate 	}
394*0Sstevel@tonic-gate 
395*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_pbuf_cnt == 0);
396*0Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&(mmd->mmd_pd_slab_lock)));
397*0Sstevel@tonic-gate 	kmem_cache_free(mmd_cache, buf);
398*0Sstevel@tonic-gate }
399*0Sstevel@tonic-gate 
400*0Sstevel@tonic-gate /*
401*0Sstevel@tonic-gate  * Multidata message block copy routine, called by copyb() when it
402*0Sstevel@tonic-gate  * encounters a M_MULTIDATA data block type.  This routine should
403*0Sstevel@tonic-gate  * not be called by anyone other than copyb(), since it may go away
404*0Sstevel@tonic-gate  * (read: become static to this module) once some sort of copy callback
405*0Sstevel@tonic-gate  * routine is made available.
406*0Sstevel@tonic-gate  */
407*0Sstevel@tonic-gate mblk_t *
408*0Sstevel@tonic-gate mmd_copy(mblk_t *bp, int kmflags)
409*0Sstevel@tonic-gate {
410*0Sstevel@tonic-gate 	multidata_t *mmd, *n_mmd;
411*0Sstevel@tonic-gate 	mblk_t *n_hbuf = NULL, *n_pbuf[MULTIDATA_MAX_PBUFS];
412*0Sstevel@tonic-gate 	mblk_t **pmp_last = &n_pbuf[MULTIDATA_MAX_PBUFS - 1];
413*0Sstevel@tonic-gate 	mblk_t **pmp;
414*0Sstevel@tonic-gate 	mblk_t *n_bp = NULL;
415*0Sstevel@tonic-gate 	pdesc_t *pd;
416*0Sstevel@tonic-gate 	uint_t n_pbuf_cnt = 0;
417*0Sstevel@tonic-gate 	int idx, i;
418*0Sstevel@tonic-gate 
419*0Sstevel@tonic-gate #define	FREE_PBUFS() {					\
420*0Sstevel@tonic-gate 	for (pmp = &n_pbuf[0]; pmp <= pmp_last; pmp++)	\
421*0Sstevel@tonic-gate 		if (*pmp != NULL) freeb(*pmp);		\
422*0Sstevel@tonic-gate }
423*0Sstevel@tonic-gate 
424*0Sstevel@tonic-gate #define	REL_OFF(p, base, n_base)			\
425*0Sstevel@tonic-gate 	((uchar_t *)(n_base) + ((uchar_t *)(p) - (uchar_t *)base))
426*0Sstevel@tonic-gate 
427*0Sstevel@tonic-gate 	ASSERT(bp != NULL && DB_TYPE(bp) == M_MULTIDATA);
428*0Sstevel@tonic-gate 	mmd = mmd_getmultidata(bp);
429*0Sstevel@tonic-gate 
430*0Sstevel@tonic-gate 	/* copy the header buffer */
431*0Sstevel@tonic-gate 	if (mmd->mmd_hbuf != NULL && (n_hbuf = copyb(mmd->mmd_hbuf)) == NULL)
432*0Sstevel@tonic-gate 		return (NULL);
433*0Sstevel@tonic-gate 
434*0Sstevel@tonic-gate 	/* copy the payload buffer(s) */
435*0Sstevel@tonic-gate 	mutex_enter(&mmd->mmd_pd_slab_lock);
436*0Sstevel@tonic-gate 	bzero((void *)&n_pbuf[0], sizeof (mblk_t *) * MULTIDATA_MAX_PBUFS);
437*0Sstevel@tonic-gate 	n_pbuf_cnt = mmd->mmd_pbuf_cnt;
438*0Sstevel@tonic-gate 	for (i = 0; i < n_pbuf_cnt; i++) {
439*0Sstevel@tonic-gate 		ASSERT(mmd->mmd_pbuf[i] != NULL);
440*0Sstevel@tonic-gate 		n_pbuf[i] = copyb(mmd->mmd_pbuf[i]);
441*0Sstevel@tonic-gate 		if (n_pbuf[i] == NULL) {
442*0Sstevel@tonic-gate 			FREE_PBUFS();
443*0Sstevel@tonic-gate 			mutex_exit(&mmd->mmd_pd_slab_lock);
444*0Sstevel@tonic-gate 			return (NULL);
445*0Sstevel@tonic-gate 		}
446*0Sstevel@tonic-gate 	}
447*0Sstevel@tonic-gate 
448*0Sstevel@tonic-gate 	/* allocate new Multidata */
449*0Sstevel@tonic-gate 	n_mmd = mmd_alloc(n_hbuf, &n_bp, kmflags);
450*0Sstevel@tonic-gate 	if (n_mmd == NULL) {
451*0Sstevel@tonic-gate 		if (n_hbuf != NULL)
452*0Sstevel@tonic-gate 			freeb(n_hbuf);
453*0Sstevel@tonic-gate 		if (n_pbuf_cnt != 0)
454*0Sstevel@tonic-gate 			FREE_PBUFS();
455*0Sstevel@tonic-gate 		mutex_exit(&mmd->mmd_pd_slab_lock);
456*0Sstevel@tonic-gate 		return (NULL);
457*0Sstevel@tonic-gate 	}
458*0Sstevel@tonic-gate 
459*0Sstevel@tonic-gate 	/*
460*0Sstevel@tonic-gate 	 * Add payload buffer(s); upon success, leave n_pbuf array
461*0Sstevel@tonic-gate 	 * alone, as the newly-created Multidata had already contained
462*0Sstevel@tonic-gate 	 * the mblk pointers stored in the array.  These will be freed
463*0Sstevel@tonic-gate 	 * along with the Multidata itself.
464*0Sstevel@tonic-gate 	 */
465*0Sstevel@tonic-gate 	for (i = 0, pmp = &n_pbuf[0]; i < n_pbuf_cnt; i++, pmp++) {
466*0Sstevel@tonic-gate 		idx = mmd_addpldbuf(n_mmd, *pmp);
467*0Sstevel@tonic-gate 		if (idx < 0) {
468*0Sstevel@tonic-gate 			FREE_PBUFS();
469*0Sstevel@tonic-gate 			freeb(n_bp);
470*0Sstevel@tonic-gate 			mutex_exit(&mmd->mmd_pd_slab_lock);
471*0Sstevel@tonic-gate 			return (NULL);
472*0Sstevel@tonic-gate 		}
473*0Sstevel@tonic-gate 	}
474*0Sstevel@tonic-gate 
475*0Sstevel@tonic-gate 	/* copy over global attributes */
476*0Sstevel@tonic-gate 	if (mmd->mmd_pattbl != NULL &&
477*0Sstevel@tonic-gate 	    mmd_copy_pattbl(mmd->mmd_pattbl, n_mmd, NULL, kmflags) < 0) {
478*0Sstevel@tonic-gate 		freeb(n_bp);
479*0Sstevel@tonic-gate 		mutex_exit(&mmd->mmd_pd_slab_lock);
480*0Sstevel@tonic-gate 		return (NULL);
481*0Sstevel@tonic-gate 	}
482*0Sstevel@tonic-gate 
483*0Sstevel@tonic-gate 	/* copy over packet descriptors and their atttributes */
484*0Sstevel@tonic-gate 	pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE);	/* first pdesc */
485*0Sstevel@tonic-gate 	while (pd != NULL) {
486*0Sstevel@tonic-gate 		pdesc_t *n_pd;
487*0Sstevel@tonic-gate 		pdescinfo_t *pdi, n_pdi;
488*0Sstevel@tonic-gate 		uchar_t *n_base, *base;
489*0Sstevel@tonic-gate 		pdesc_t *pd_next;
490*0Sstevel@tonic-gate 
491*0Sstevel@tonic-gate 		/* next pdesc */
492*0Sstevel@tonic-gate 		pd_next = mmd_getpdesc(pd->pd_slab->pds_mmd, pd, NULL,
493*0Sstevel@tonic-gate 		    1, B_TRUE);
494*0Sstevel@tonic-gate 
495*0Sstevel@tonic-gate 		/* skip if already removed */
496*0Sstevel@tonic-gate 		if (pd->pd_flags & PDESC_REM_DEFER) {
497*0Sstevel@tonic-gate 			pd = pd_next;
498*0Sstevel@tonic-gate 			continue;
499*0Sstevel@tonic-gate 		}
500*0Sstevel@tonic-gate 
501*0Sstevel@tonic-gate 		pdi = &(pd->pd_pdi);
502*0Sstevel@tonic-gate 		bzero(&n_pdi, sizeof (n_pdi));
503*0Sstevel@tonic-gate 
504*0Sstevel@tonic-gate 		/*
505*0Sstevel@tonic-gate 		 * Calculate new descriptor values based on the offset of
506*0Sstevel@tonic-gate 		 * each pointer relative to the associated buffer(s).
507*0Sstevel@tonic-gate 		 */
508*0Sstevel@tonic-gate 		ASSERT(pdi->flags & PDESC_HAS_REF);
509*0Sstevel@tonic-gate 		if (pdi->flags & PDESC_HBUF_REF) {
510*0Sstevel@tonic-gate 			n_base = n_mmd->mmd_hbuf->b_rptr;
511*0Sstevel@tonic-gate 			base = mmd->mmd_hbuf->b_rptr;
512*0Sstevel@tonic-gate 
513*0Sstevel@tonic-gate 			n_pdi.flags |= PDESC_HBUF_REF;
514*0Sstevel@tonic-gate 			n_pdi.hdr_base = REL_OFF(pdi->hdr_base, base, n_base);
515*0Sstevel@tonic-gate 			n_pdi.hdr_rptr = REL_OFF(pdi->hdr_rptr, base, n_base);
516*0Sstevel@tonic-gate 			n_pdi.hdr_wptr = REL_OFF(pdi->hdr_wptr, base, n_base);
517*0Sstevel@tonic-gate 			n_pdi.hdr_lim = REL_OFF(pdi->hdr_lim, base, n_base);
518*0Sstevel@tonic-gate 		}
519*0Sstevel@tonic-gate 
520*0Sstevel@tonic-gate 		if (pdi->flags & PDESC_PBUF_REF) {
521*0Sstevel@tonic-gate 			n_pdi.flags |= PDESC_PBUF_REF;
522*0Sstevel@tonic-gate 			n_pdi.pld_cnt = pdi->pld_cnt;
523*0Sstevel@tonic-gate 
524*0Sstevel@tonic-gate 			for (i = 0; i < pdi->pld_cnt; i++) {
525*0Sstevel@tonic-gate 				idx = pdi->pld_ary[i].pld_pbuf_idx;
526*0Sstevel@tonic-gate 				ASSERT(idx < MULTIDATA_MAX_PBUFS);
527*0Sstevel@tonic-gate 				ASSERT(n_mmd->mmd_pbuf[idx] != NULL);
528*0Sstevel@tonic-gate 				ASSERT(mmd->mmd_pbuf[idx] != NULL);
529*0Sstevel@tonic-gate 
530*0Sstevel@tonic-gate 				n_base = n_mmd->mmd_pbuf[idx]->b_rptr;
531*0Sstevel@tonic-gate 				base = mmd->mmd_pbuf[idx]->b_rptr;
532*0Sstevel@tonic-gate 
533*0Sstevel@tonic-gate 				n_pdi.pld_ary[i].pld_pbuf_idx = idx;
534*0Sstevel@tonic-gate 
535*0Sstevel@tonic-gate 				/*
536*0Sstevel@tonic-gate 				 * We can't copy the pointers just like that,
537*0Sstevel@tonic-gate 				 * so calculate the relative offset.
538*0Sstevel@tonic-gate 				 */
539*0Sstevel@tonic-gate 				n_pdi.pld_ary[i].pld_rptr =
540*0Sstevel@tonic-gate 				    REL_OFF(pdi->pld_ary[i].pld_rptr,
541*0Sstevel@tonic-gate 					base, n_base);
542*0Sstevel@tonic-gate 				n_pdi.pld_ary[i].pld_wptr =
543*0Sstevel@tonic-gate 				    REL_OFF(pdi->pld_ary[i].pld_wptr,
544*0Sstevel@tonic-gate 					base, n_base);
545*0Sstevel@tonic-gate 			}
546*0Sstevel@tonic-gate 		}
547*0Sstevel@tonic-gate 
548*0Sstevel@tonic-gate 		/* add the new descriptor to the new Multidata */
549*0Sstevel@tonic-gate 		n_pd = mmd_addpdesc_int(n_mmd, &n_pdi, NULL, kmflags);
550*0Sstevel@tonic-gate 
551*0Sstevel@tonic-gate 		if (n_pd == NULL || (pd->pd_pattbl != NULL &&
552*0Sstevel@tonic-gate 		    mmd_copy_pattbl(pd->pd_pattbl, n_mmd, n_pd, kmflags) < 0)) {
553*0Sstevel@tonic-gate 			freeb(n_bp);
554*0Sstevel@tonic-gate 			mutex_exit(&mmd->mmd_pd_slab_lock);
555*0Sstevel@tonic-gate 			return (NULL);
556*0Sstevel@tonic-gate 		}
557*0Sstevel@tonic-gate 
558*0Sstevel@tonic-gate 		pd = pd_next;
559*0Sstevel@tonic-gate 	}
560*0Sstevel@tonic-gate #undef REL_OFF
561*0Sstevel@tonic-gate #undef FREE_PBUFS
562*0Sstevel@tonic-gate 
563*0Sstevel@tonic-gate 	mutex_exit(&mmd->mmd_pd_slab_lock);
564*0Sstevel@tonic-gate 	return (n_bp);
565*0Sstevel@tonic-gate }
566*0Sstevel@tonic-gate 
567*0Sstevel@tonic-gate /*
568*0Sstevel@tonic-gate  * Given a Multidata message block, return the Multidata metadata handle.
569*0Sstevel@tonic-gate  */
570*0Sstevel@tonic-gate multidata_t *
571*0Sstevel@tonic-gate mmd_getmultidata(mblk_t *mp)
572*0Sstevel@tonic-gate {
573*0Sstevel@tonic-gate 	multidata_t *mmd;
574*0Sstevel@tonic-gate 
575*0Sstevel@tonic-gate 	ASSERT(mp != NULL);
576*0Sstevel@tonic-gate 
577*0Sstevel@tonic-gate 	if (DB_TYPE(mp) != M_MULTIDATA)
578*0Sstevel@tonic-gate 		return (NULL);
579*0Sstevel@tonic-gate 
580*0Sstevel@tonic-gate 	mmd = (multidata_t *)mp->b_rptr;
581*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
582*0Sstevel@tonic-gate 
583*0Sstevel@tonic-gate 	return (mmd);
584*0Sstevel@tonic-gate }
585*0Sstevel@tonic-gate 
586*0Sstevel@tonic-gate /*
587*0Sstevel@tonic-gate  * Return the start and end addresses of the associated buffer(s).
588*0Sstevel@tonic-gate  */
589*0Sstevel@tonic-gate void
590*0Sstevel@tonic-gate mmd_getregions(multidata_t *mmd, mbufinfo_t *mbi)
591*0Sstevel@tonic-gate {
592*0Sstevel@tonic-gate 	int i;
593*0Sstevel@tonic-gate 
594*0Sstevel@tonic-gate 	ASSERT(mmd != NULL);
595*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
596*0Sstevel@tonic-gate 	ASSERT(mbi != NULL);
597*0Sstevel@tonic-gate 
598*0Sstevel@tonic-gate 	bzero((void *)mbi, sizeof (mbufinfo_t));
599*0Sstevel@tonic-gate 
600*0Sstevel@tonic-gate 	if (mmd->mmd_hbuf != NULL) {
601*0Sstevel@tonic-gate 		mbi->hbuf_rptr = mmd->mmd_hbuf->b_rptr;
602*0Sstevel@tonic-gate 		mbi->hbuf_wptr = mmd->mmd_hbuf->b_wptr;
603*0Sstevel@tonic-gate 	}
604*0Sstevel@tonic-gate 
605*0Sstevel@tonic-gate 	mutex_enter(&mmd->mmd_pd_slab_lock);
606*0Sstevel@tonic-gate 	for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
607*0Sstevel@tonic-gate 		ASSERT(mmd->mmd_pbuf[i] != NULL);
608*0Sstevel@tonic-gate 		mbi->pbuf_ary[i].pbuf_rptr = mmd->mmd_pbuf[i]->b_rptr;
609*0Sstevel@tonic-gate 		mbi->pbuf_ary[i].pbuf_wptr = mmd->mmd_pbuf[i]->b_wptr;
610*0Sstevel@tonic-gate 
611*0Sstevel@tonic-gate 	}
612*0Sstevel@tonic-gate 	mbi->pbuf_cnt = mmd->mmd_pbuf_cnt;
613*0Sstevel@tonic-gate 	mutex_exit(&mmd->mmd_pd_slab_lock);
614*0Sstevel@tonic-gate }
615*0Sstevel@tonic-gate 
616*0Sstevel@tonic-gate /*
617*0Sstevel@tonic-gate  * Return the Multidata statistics.
618*0Sstevel@tonic-gate  */
619*0Sstevel@tonic-gate uint_t
620*0Sstevel@tonic-gate mmd_getcnt(multidata_t *mmd, uint_t *hbuf_ref, uint_t *pbuf_ref)
621*0Sstevel@tonic-gate {
622*0Sstevel@tonic-gate 	uint_t pd_cnt;
623*0Sstevel@tonic-gate 
624*0Sstevel@tonic-gate 	ASSERT(mmd != NULL);
625*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
626*0Sstevel@tonic-gate 
627*0Sstevel@tonic-gate 	mutex_enter(&(mmd->mmd_pd_slab_lock));
628*0Sstevel@tonic-gate 	if (hbuf_ref != NULL)
629*0Sstevel@tonic-gate 		*hbuf_ref = mmd->mmd_hbuf_ref;
630*0Sstevel@tonic-gate 	if (pbuf_ref != NULL)
631*0Sstevel@tonic-gate 		*pbuf_ref = mmd->mmd_pbuf_ref;
632*0Sstevel@tonic-gate 	pd_cnt = mmd->mmd_pd_cnt;
633*0Sstevel@tonic-gate 	mutex_exit(&(mmd->mmd_pd_slab_lock));
634*0Sstevel@tonic-gate 
635*0Sstevel@tonic-gate 	return (pd_cnt);
636*0Sstevel@tonic-gate }
637*0Sstevel@tonic-gate 
638*0Sstevel@tonic-gate #define	HBUF_REF_VALID(mmd, pdi)					\
639*0Sstevel@tonic-gate 	((mmd)->mmd_hbuf != NULL && (pdi)->hdr_rptr != NULL &&		\
640*0Sstevel@tonic-gate 	(pdi)->hdr_wptr != NULL && (pdi)->hdr_base != NULL &&		\
641*0Sstevel@tonic-gate 	(pdi)->hdr_lim != NULL && (pdi)->hdr_lim >= (pdi)->hdr_base &&	\
642*0Sstevel@tonic-gate 	(pdi)->hdr_wptr >= (pdi)->hdr_rptr &&				\
643*0Sstevel@tonic-gate 	(pdi)->hdr_base <= (pdi)->hdr_rptr &&				\
644*0Sstevel@tonic-gate 	(pdi)->hdr_lim >= (pdi)->hdr_wptr &&				\
645*0Sstevel@tonic-gate 	(pdi)->hdr_base >= (mmd)->mmd_hbuf->b_rptr &&			\
646*0Sstevel@tonic-gate 	MBLKIN((mmd)->mmd_hbuf,						\
647*0Sstevel@tonic-gate 	(pdi->hdr_base - (mmd)->mmd_hbuf->b_rptr),			\
648*0Sstevel@tonic-gate 	PDESC_HDRSIZE(pdi)))
649*0Sstevel@tonic-gate 
650*0Sstevel@tonic-gate /*
651*0Sstevel@tonic-gate  * Bounds check payload area(s).
652*0Sstevel@tonic-gate  */
653*0Sstevel@tonic-gate static boolean_t
654*0Sstevel@tonic-gate pbuf_ref_valid(multidata_t *mmd, pdescinfo_t *pdi)
655*0Sstevel@tonic-gate {
656*0Sstevel@tonic-gate 	int i = 0, idx;
657*0Sstevel@tonic-gate 	boolean_t valid = B_TRUE;
658*0Sstevel@tonic-gate 	struct pld_ary_s *pa;
659*0Sstevel@tonic-gate 
660*0Sstevel@tonic-gate 	mutex_enter(&mmd->mmd_pd_slab_lock);
661*0Sstevel@tonic-gate 	if (pdi->pld_cnt == 0 || pdi->pld_cnt > mmd->mmd_pbuf_cnt) {
662*0Sstevel@tonic-gate 		mutex_exit(&mmd->mmd_pd_slab_lock);
663*0Sstevel@tonic-gate 		return (B_FALSE);
664*0Sstevel@tonic-gate 	}
665*0Sstevel@tonic-gate 
666*0Sstevel@tonic-gate 	pa = &pdi->pld_ary[0];
667*0Sstevel@tonic-gate 	while (valid && i < pdi->pld_cnt) {
668*0Sstevel@tonic-gate 		valid = (((idx = pa->pld_pbuf_idx) < mmd->mmd_pbuf_cnt) &&
669*0Sstevel@tonic-gate 		    pa->pld_rptr != NULL && pa->pld_wptr != NULL &&
670*0Sstevel@tonic-gate 		    pa->pld_wptr >= pa->pld_rptr &&
671*0Sstevel@tonic-gate 		    pa->pld_rptr >= mmd->mmd_pbuf[idx]->b_rptr &&
672*0Sstevel@tonic-gate 		    MBLKIN(mmd->mmd_pbuf[idx], (pa->pld_rptr -
673*0Sstevel@tonic-gate 			mmd->mmd_pbuf[idx]->b_rptr),
674*0Sstevel@tonic-gate 			PDESC_PLD_SPAN_SIZE(pdi, i)));
675*0Sstevel@tonic-gate 
676*0Sstevel@tonic-gate 		if (!valid) {
677*0Sstevel@tonic-gate 			MMD_DEBUG((CE_WARN,
678*0Sstevel@tonic-gate 			    "pbuf_ref_valid: pdi 0x%p pld out of bound; "
679*0Sstevel@tonic-gate 			    "index %d has pld_cnt %d pbuf_idx %d "
680*0Sstevel@tonic-gate 			    "(mmd_pbuf_cnt %d), "
681*0Sstevel@tonic-gate 			    "pld_rptr 0x%p pld_wptr 0x%p len %d "
682*0Sstevel@tonic-gate 			    "(valid 0x%p-0x%p len %d)\n", (void *)pdi,
683*0Sstevel@tonic-gate 			    i, pdi->pld_cnt, idx, mmd->mmd_pbuf_cnt,
684*0Sstevel@tonic-gate 			    (void *)pa->pld_rptr,
685*0Sstevel@tonic-gate 			    (void *)pa->pld_wptr,
686*0Sstevel@tonic-gate 			    (int)PDESC_PLD_SPAN_SIZE(pdi, i),
687*0Sstevel@tonic-gate 			    (void *)mmd->mmd_pbuf[idx]->b_rptr,
688*0Sstevel@tonic-gate 			    (void *)mmd->mmd_pbuf[idx]->b_wptr,
689*0Sstevel@tonic-gate 			    (int)MBLKL(mmd->mmd_pbuf[idx])));
690*0Sstevel@tonic-gate 		}
691*0Sstevel@tonic-gate 
692*0Sstevel@tonic-gate 		/* advance to next entry */
693*0Sstevel@tonic-gate 		i++;
694*0Sstevel@tonic-gate 		pa++;
695*0Sstevel@tonic-gate 	}
696*0Sstevel@tonic-gate 
697*0Sstevel@tonic-gate 	mutex_exit(&mmd->mmd_pd_slab_lock);
698*0Sstevel@tonic-gate 	return (valid);
699*0Sstevel@tonic-gate }
700*0Sstevel@tonic-gate 
701*0Sstevel@tonic-gate /*
702*0Sstevel@tonic-gate  * Add a packet descriptor to the Multidata.
703*0Sstevel@tonic-gate  */
704*0Sstevel@tonic-gate pdesc_t *
705*0Sstevel@tonic-gate mmd_addpdesc(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags)
706*0Sstevel@tonic-gate {
707*0Sstevel@tonic-gate 	ASSERT(mmd != NULL);
708*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
709*0Sstevel@tonic-gate 	ASSERT(pdi != NULL);
710*0Sstevel@tonic-gate 	ASSERT(pdi->flags & PDESC_HAS_REF);
711*0Sstevel@tonic-gate 
712*0Sstevel@tonic-gate 	/* do the references refer to invalid memory regions? */
713*0Sstevel@tonic-gate 	if (!mmd_speed_over_safety &&
714*0Sstevel@tonic-gate 	    (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) ||
715*0Sstevel@tonic-gate 	    ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) {
716*0Sstevel@tonic-gate 		if (err != NULL)
717*0Sstevel@tonic-gate 			*err = EINVAL;
718*0Sstevel@tonic-gate 		return (NULL);
719*0Sstevel@tonic-gate 	}
720*0Sstevel@tonic-gate 
721*0Sstevel@tonic-gate 	return (mmd_addpdesc_int(mmd, pdi, err, kmflags));
722*0Sstevel@tonic-gate }
723*0Sstevel@tonic-gate 
724*0Sstevel@tonic-gate /*
725*0Sstevel@tonic-gate  * Internal routine to add a packet descriptor, called when mmd_addpdesc
726*0Sstevel@tonic-gate  * or mmd_copy tries to allocate and add a descriptor to a Multidata.
727*0Sstevel@tonic-gate  */
728*0Sstevel@tonic-gate static pdesc_t *
729*0Sstevel@tonic-gate mmd_addpdesc_int(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags)
730*0Sstevel@tonic-gate {
731*0Sstevel@tonic-gate 	pdesc_slab_t *slab, *slab_last;
732*0Sstevel@tonic-gate 	pdesc_t *pd;
733*0Sstevel@tonic-gate 
734*0Sstevel@tonic-gate 	ASSERT(pdi->flags & PDESC_HAS_REF);
735*0Sstevel@tonic-gate 	ASSERT(!(pdi->flags & PDESC_HBUF_REF) || HBUF_REF_VALID(mmd, pdi));
736*0Sstevel@tonic-gate 	ASSERT(!(pdi->flags & PDESC_PBUF_REF) || pbuf_ref_valid(mmd, pdi));
737*0Sstevel@tonic-gate 
738*0Sstevel@tonic-gate 	if (err != NULL)
739*0Sstevel@tonic-gate 		*err = 0;
740*0Sstevel@tonic-gate 
741*0Sstevel@tonic-gate 	mutex_enter(&(mmd->mmd_pd_slab_lock));
742*0Sstevel@tonic-gate 	/*
743*0Sstevel@tonic-gate 	 * Is slab list empty or the last-added slab is full?  If so,
744*0Sstevel@tonic-gate 	 * allocate new slab for the descriptor; otherwise, use the
745*0Sstevel@tonic-gate 	 * last-added slab instead.
746*0Sstevel@tonic-gate 	 */
747*0Sstevel@tonic-gate 	slab_last = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_prev);
748*0Sstevel@tonic-gate 	if (mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q) ||
749*0Sstevel@tonic-gate 	    slab_last->pds_used == slab_last->pds_sz) {
750*0Sstevel@tonic-gate 		slab = kmem_cache_alloc(pd_slab_cache, kmflags);
751*0Sstevel@tonic-gate 		if (slab == NULL) {
752*0Sstevel@tonic-gate 			if (err != NULL)
753*0Sstevel@tonic-gate 				*err = ENOMEM;
754*0Sstevel@tonic-gate 			mutex_exit(&(mmd->mmd_pd_slab_lock));
755*0Sstevel@tonic-gate 			return (NULL);
756*0Sstevel@tonic-gate 		}
757*0Sstevel@tonic-gate 		slab->pds_mmd = mmd;
758*0Sstevel@tonic-gate 
759*0Sstevel@tonic-gate 		ASSERT(slab->pds_used == 0);
760*0Sstevel@tonic-gate 		ASSERT(slab->pds_next == NULL && slab->pds_prev == NULL);
761*0Sstevel@tonic-gate 
762*0Sstevel@tonic-gate 		/* insert slab at end of list */
763*0Sstevel@tonic-gate 		insque(&(slab->pds_next), mmd->mmd_pd_slab_q.ql_prev);
764*0Sstevel@tonic-gate 		mmd->mmd_slab_cnt++;
765*0Sstevel@tonic-gate 	} else {
766*0Sstevel@tonic-gate 		slab = slab_last;
767*0Sstevel@tonic-gate 	}
768*0Sstevel@tonic-gate 	ASSERT(slab->pds_used < slab->pds_sz);
769*0Sstevel@tonic-gate 	pd = &(slab->pds_free_desc[slab->pds_used++]);
770*0Sstevel@tonic-gate 	ASSERT(pd->pd_magic == PDESC_MAGIC);
771*0Sstevel@tonic-gate 	pd->pd_next = NULL;
772*0Sstevel@tonic-gate 	pd->pd_prev = NULL;
773*0Sstevel@tonic-gate 	pd->pd_slab = slab;
774*0Sstevel@tonic-gate 	pd->pd_pattbl = NULL;
775*0Sstevel@tonic-gate 
776*0Sstevel@tonic-gate 	/* copy over the descriptor info from caller */
777*0Sstevel@tonic-gate 	PDI_COPY(pdi, &(pd->pd_pdi));
778*0Sstevel@tonic-gate 
779*0Sstevel@tonic-gate 	if (pd->pd_flags & PDESC_HBUF_REF)
780*0Sstevel@tonic-gate 		mmd->mmd_hbuf_ref++;
781*0Sstevel@tonic-gate 	if (pd->pd_flags & PDESC_PBUF_REF)
782*0Sstevel@tonic-gate 		mmd->mmd_pbuf_ref += pd->pd_pdi.pld_cnt;
783*0Sstevel@tonic-gate 	mmd->mmd_pd_cnt++;
784*0Sstevel@tonic-gate 
785*0Sstevel@tonic-gate 	/* insert descriptor at end of list */
786*0Sstevel@tonic-gate 	insque(&(pd->pd_next), mmd->mmd_pd_q.ql_prev);
787*0Sstevel@tonic-gate 	mutex_exit(&(mmd->mmd_pd_slab_lock));
788*0Sstevel@tonic-gate 
789*0Sstevel@tonic-gate 	return (pd);
790*0Sstevel@tonic-gate }
791*0Sstevel@tonic-gate 
792*0Sstevel@tonic-gate /*
793*0Sstevel@tonic-gate  * Packet descriptor slab kmem cache constructor routine.
794*0Sstevel@tonic-gate  */
795*0Sstevel@tonic-gate /* ARGSUSED */
796*0Sstevel@tonic-gate static int
797*0Sstevel@tonic-gate pdslab_constructor(void *buf, void *cdrarg, int kmflags)
798*0Sstevel@tonic-gate {
799*0Sstevel@tonic-gate 	pdesc_slab_t *slab;
800*0Sstevel@tonic-gate 	uint_t cnt = (uint_t)(uintptr_t)cdrarg;
801*0Sstevel@tonic-gate 	int i;
802*0Sstevel@tonic-gate 
803*0Sstevel@tonic-gate 	ASSERT(cnt > 0);	/* slab size can't be zero */
804*0Sstevel@tonic-gate 
805*0Sstevel@tonic-gate 	slab = (pdesc_slab_t *)buf;
806*0Sstevel@tonic-gate 	slab->pds_next = NULL;
807*0Sstevel@tonic-gate 	slab->pds_prev = NULL;
808*0Sstevel@tonic-gate 	slab->pds_mmd = NULL;
809*0Sstevel@tonic-gate 	slab->pds_used = 0;
810*0Sstevel@tonic-gate 	slab->pds_sz = cnt;
811*0Sstevel@tonic-gate 
812*0Sstevel@tonic-gate 	for (i = 0; i < cnt; i++) {
813*0Sstevel@tonic-gate 		pdesc_t *pd = &(slab->pds_free_desc[i]);
814*0Sstevel@tonic-gate 		pd->pd_magic = PDESC_MAGIC;
815*0Sstevel@tonic-gate 	}
816*0Sstevel@tonic-gate 	return (0);
817*0Sstevel@tonic-gate }
818*0Sstevel@tonic-gate 
819*0Sstevel@tonic-gate /*
820*0Sstevel@tonic-gate  * Packet descriptor slab kmem cache destructor routine.
821*0Sstevel@tonic-gate  */
822*0Sstevel@tonic-gate /* ARGSUSED */
823*0Sstevel@tonic-gate static void
824*0Sstevel@tonic-gate pdslab_destructor(void *buf, void *cdrarg)
825*0Sstevel@tonic-gate {
826*0Sstevel@tonic-gate 	pdesc_slab_t *slab;
827*0Sstevel@tonic-gate 
828*0Sstevel@tonic-gate 	slab = (pdesc_slab_t *)buf;
829*0Sstevel@tonic-gate 	ASSERT(slab->pds_next == NULL);
830*0Sstevel@tonic-gate 	ASSERT(slab->pds_prev == NULL);
831*0Sstevel@tonic-gate 	ASSERT(slab->pds_mmd == NULL);
832*0Sstevel@tonic-gate 	ASSERT(slab->pds_used == 0);
833*0Sstevel@tonic-gate 	ASSERT(slab->pds_sz > 0);
834*0Sstevel@tonic-gate }
835*0Sstevel@tonic-gate 
836*0Sstevel@tonic-gate /*
837*0Sstevel@tonic-gate  * Remove a packet descriptor from the in-use descriptor list,
838*0Sstevel@tonic-gate  * called by mmd_rempdesc or during free.
839*0Sstevel@tonic-gate  */
840*0Sstevel@tonic-gate static pdesc_t *
841*0Sstevel@tonic-gate mmd_destroy_pdesc(multidata_t *mmd, pdesc_t *pd)
842*0Sstevel@tonic-gate {
843*0Sstevel@tonic-gate 	pdesc_t *pd_next;
844*0Sstevel@tonic-gate 
845*0Sstevel@tonic-gate 	pd_next = Q2PD(pd->pd_next);
846*0Sstevel@tonic-gate 	remque(&(pd->pd_next));
847*0Sstevel@tonic-gate 
848*0Sstevel@tonic-gate 	/* remove all local attributes */
849*0Sstevel@tonic-gate 	if (pd->pd_pattbl != NULL)
850*0Sstevel@tonic-gate 		mmd_destroy_pattbl(&(pd->pd_pattbl));
851*0Sstevel@tonic-gate 
852*0Sstevel@tonic-gate 	/* don't decrease counts for a removed descriptor */
853*0Sstevel@tonic-gate 	if (!(pd->pd_flags & PDESC_REM_DEFER)) {
854*0Sstevel@tonic-gate 		if (pd->pd_flags & PDESC_HBUF_REF) {
855*0Sstevel@tonic-gate 			ASSERT(mmd->mmd_hbuf_ref > 0);
856*0Sstevel@tonic-gate 			mmd->mmd_hbuf_ref--;
857*0Sstevel@tonic-gate 		}
858*0Sstevel@tonic-gate 		if (pd->pd_flags & PDESC_PBUF_REF) {
859*0Sstevel@tonic-gate 			ASSERT(mmd->mmd_pbuf_ref > 0);
860*0Sstevel@tonic-gate 			mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt;
861*0Sstevel@tonic-gate 		}
862*0Sstevel@tonic-gate 		ASSERT(mmd->mmd_pd_cnt > 0);
863*0Sstevel@tonic-gate 		mmd->mmd_pd_cnt--;
864*0Sstevel@tonic-gate 	}
865*0Sstevel@tonic-gate 	return (pd_next);
866*0Sstevel@tonic-gate }
867*0Sstevel@tonic-gate 
868*0Sstevel@tonic-gate /*
869*0Sstevel@tonic-gate  * Remove a packet descriptor from the Multidata.
870*0Sstevel@tonic-gate  */
871*0Sstevel@tonic-gate void
872*0Sstevel@tonic-gate mmd_rempdesc(pdesc_t *pd)
873*0Sstevel@tonic-gate {
874*0Sstevel@tonic-gate 	multidata_t *mmd;
875*0Sstevel@tonic-gate 
876*0Sstevel@tonic-gate 	ASSERT(pd->pd_magic == PDESC_MAGIC);
877*0Sstevel@tonic-gate 	ASSERT(pd->pd_slab != NULL);
878*0Sstevel@tonic-gate 
879*0Sstevel@tonic-gate 	mmd = pd->pd_slab->pds_mmd;
880*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
881*0Sstevel@tonic-gate 
882*0Sstevel@tonic-gate 	mutex_enter(&(mmd->mmd_pd_slab_lock));
883*0Sstevel@tonic-gate 	/*
884*0Sstevel@tonic-gate 	 * We can't deallocate the associated resources if the Multidata
885*0Sstevel@tonic-gate 	 * is shared with other threads, because it's possible that the
886*0Sstevel@tonic-gate 	 * descriptor handle value is held by those threads.  That's why
887*0Sstevel@tonic-gate 	 * we simply mark the entry as "removed" and decrement the counts.
888*0Sstevel@tonic-gate 	 * If there are no other threads, then we free the descriptor.
889*0Sstevel@tonic-gate 	 */
890*0Sstevel@tonic-gate 	if (mmd->mmd_dp->db_ref > 1) {
891*0Sstevel@tonic-gate 		pd->pd_flags |= PDESC_REM_DEFER;
892*0Sstevel@tonic-gate 		if (pd->pd_flags & PDESC_HBUF_REF) {
893*0Sstevel@tonic-gate 			ASSERT(mmd->mmd_hbuf_ref > 0);
894*0Sstevel@tonic-gate 			mmd->mmd_hbuf_ref--;
895*0Sstevel@tonic-gate 		}
896*0Sstevel@tonic-gate 		if (pd->pd_flags & PDESC_PBUF_REF) {
897*0Sstevel@tonic-gate 			ASSERT(mmd->mmd_pbuf_ref > 0);
898*0Sstevel@tonic-gate 			mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt;
899*0Sstevel@tonic-gate 		}
900*0Sstevel@tonic-gate 		ASSERT(mmd->mmd_pd_cnt > 0);
901*0Sstevel@tonic-gate 		mmd->mmd_pd_cnt--;
902*0Sstevel@tonic-gate 	} else {
903*0Sstevel@tonic-gate 		(void) mmd_destroy_pdesc(mmd, pd);
904*0Sstevel@tonic-gate 	}
905*0Sstevel@tonic-gate 	mutex_exit(&(mmd->mmd_pd_slab_lock));
906*0Sstevel@tonic-gate }
907*0Sstevel@tonic-gate 
908*0Sstevel@tonic-gate /*
909*0Sstevel@tonic-gate  * A generic routine to traverse the packet descriptor in-use list.
910*0Sstevel@tonic-gate  */
911*0Sstevel@tonic-gate static pdesc_t *
912*0Sstevel@tonic-gate mmd_getpdesc(multidata_t *mmd, pdesc_t *pd, pdescinfo_t *pdi, uint_t forw,
913*0Sstevel@tonic-gate     boolean_t mutex_held)
914*0Sstevel@tonic-gate {
915*0Sstevel@tonic-gate 	pdesc_t *pd_head;
916*0Sstevel@tonic-gate 
917*0Sstevel@tonic-gate 	ASSERT(pd == NULL || pd->pd_slab->pds_mmd == mmd);
918*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
919*0Sstevel@tonic-gate 	ASSERT(!mutex_held || MUTEX_HELD(&(mmd->mmd_pd_slab_lock)));
920*0Sstevel@tonic-gate 
921*0Sstevel@tonic-gate 	if (!mutex_held)
922*0Sstevel@tonic-gate 		mutex_enter(&(mmd->mmd_pd_slab_lock));
923*0Sstevel@tonic-gate 	pd_head = Q2PD(&(mmd->mmd_pd_q));
924*0Sstevel@tonic-gate 
925*0Sstevel@tonic-gate 	if (pd == NULL) {
926*0Sstevel@tonic-gate 		/*
927*0Sstevel@tonic-gate 		 * We're called by mmd_get{first,last}pdesc, and so
928*0Sstevel@tonic-gate 		 * return either the first or last list element.
929*0Sstevel@tonic-gate 		 */
930*0Sstevel@tonic-gate 		pd = forw ? Q2PD(mmd->mmd_pd_q.ql_next) :
931*0Sstevel@tonic-gate 		    Q2PD(mmd->mmd_pd_q.ql_prev);
932*0Sstevel@tonic-gate 	} else {
933*0Sstevel@tonic-gate 		/*
934*0Sstevel@tonic-gate 		 * We're called by mmd_get{next,prev}pdesc, and so
935*0Sstevel@tonic-gate 		 * return either the next or previous list element.
936*0Sstevel@tonic-gate 		 */
937*0Sstevel@tonic-gate 		pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev);
938*0Sstevel@tonic-gate 	}
939*0Sstevel@tonic-gate 
940*0Sstevel@tonic-gate 	while (pd != pd_head) {
941*0Sstevel@tonic-gate 		/* skip element if it has been removed */
942*0Sstevel@tonic-gate 		if (!(pd->pd_flags & PDESC_REM_DEFER))
943*0Sstevel@tonic-gate 			break;
944*0Sstevel@tonic-gate 		pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev);
945*0Sstevel@tonic-gate 	}
946*0Sstevel@tonic-gate 	if (!mutex_held)
947*0Sstevel@tonic-gate 		mutex_exit(&(mmd->mmd_pd_slab_lock));
948*0Sstevel@tonic-gate 
949*0Sstevel@tonic-gate 	/* return NULL if we're back at the beginning */
950*0Sstevel@tonic-gate 	if (pd == pd_head)
951*0Sstevel@tonic-gate 		pd = NULL;
952*0Sstevel@tonic-gate 
953*0Sstevel@tonic-gate 	/* got an entry; copy descriptor info to caller */
954*0Sstevel@tonic-gate 	if (pd != NULL && pdi != NULL)
955*0Sstevel@tonic-gate 		PDI_COPY(&(pd->pd_pdi), pdi);
956*0Sstevel@tonic-gate 
957*0Sstevel@tonic-gate 	ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC);
958*0Sstevel@tonic-gate 	return (pd);
959*0Sstevel@tonic-gate 
960*0Sstevel@tonic-gate }
961*0Sstevel@tonic-gate 
962*0Sstevel@tonic-gate /*
963*0Sstevel@tonic-gate  * Return the first packet descriptor in the in-use list.
964*0Sstevel@tonic-gate  */
965*0Sstevel@tonic-gate pdesc_t *
966*0Sstevel@tonic-gate mmd_getfirstpdesc(multidata_t *mmd, pdescinfo_t *pdi)
967*0Sstevel@tonic-gate {
968*0Sstevel@tonic-gate 	return (mmd_getpdesc(mmd, NULL, pdi, 1, B_FALSE));
969*0Sstevel@tonic-gate }
970*0Sstevel@tonic-gate 
971*0Sstevel@tonic-gate /*
972*0Sstevel@tonic-gate  * Return the last packet descriptor in the in-use list.
973*0Sstevel@tonic-gate  */
974*0Sstevel@tonic-gate pdesc_t *
975*0Sstevel@tonic-gate mmd_getlastpdesc(multidata_t *mmd, pdescinfo_t *pdi)
976*0Sstevel@tonic-gate {
977*0Sstevel@tonic-gate 	return (mmd_getpdesc(mmd, NULL, pdi, 0, B_FALSE));
978*0Sstevel@tonic-gate }
979*0Sstevel@tonic-gate 
980*0Sstevel@tonic-gate /*
981*0Sstevel@tonic-gate  * Return the next packet descriptor in the in-use list.
982*0Sstevel@tonic-gate  */
983*0Sstevel@tonic-gate pdesc_t *
984*0Sstevel@tonic-gate mmd_getnextpdesc(pdesc_t *pd, pdescinfo_t *pdi)
985*0Sstevel@tonic-gate {
986*0Sstevel@tonic-gate 	return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 1, B_FALSE));
987*0Sstevel@tonic-gate }
988*0Sstevel@tonic-gate 
989*0Sstevel@tonic-gate /*
990*0Sstevel@tonic-gate  * Return the previous packet descriptor in the in-use list.
991*0Sstevel@tonic-gate  */
992*0Sstevel@tonic-gate pdesc_t *
993*0Sstevel@tonic-gate mmd_getprevpdesc(pdesc_t *pd, pdescinfo_t *pdi)
994*0Sstevel@tonic-gate {
995*0Sstevel@tonic-gate 	return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 0, B_FALSE));
996*0Sstevel@tonic-gate }
997*0Sstevel@tonic-gate 
998*0Sstevel@tonic-gate /*
999*0Sstevel@tonic-gate  * Check to see if pdi stretches over c_pdi; used to ensure that a packet
1000*0Sstevel@tonic-gate  * descriptor's header and payload span may not be extended beyond the
1001*0Sstevel@tonic-gate  * current boundaries.
1002*0Sstevel@tonic-gate  */
1003*0Sstevel@tonic-gate static boolean_t
1004*0Sstevel@tonic-gate pdi_in_range(pdescinfo_t *pdi, pdescinfo_t *c_pdi)
1005*0Sstevel@tonic-gate {
1006*0Sstevel@tonic-gate 	int i;
1007*0Sstevel@tonic-gate 	struct pld_ary_s *pa = &pdi->pld_ary[0];
1008*0Sstevel@tonic-gate 	struct pld_ary_s *c_pa = &c_pdi->pld_ary[0];
1009*0Sstevel@tonic-gate 
1010*0Sstevel@tonic-gate 	if (pdi->hdr_base < c_pdi->hdr_base || pdi->hdr_lim > c_pdi->hdr_lim)
1011*0Sstevel@tonic-gate 		return (B_FALSE);
1012*0Sstevel@tonic-gate 
1013*0Sstevel@tonic-gate 	/*
1014*0Sstevel@tonic-gate 	 * We don't allow the number of span to be reduced, for the sake
1015*0Sstevel@tonic-gate 	 * of simplicity.  Instead, we provide PDESC_PLD_SPAN_CLEAR() to
1016*0Sstevel@tonic-gate 	 * clear a packet descriptor.  Note that we allow the span count to
1017*0Sstevel@tonic-gate 	 * be increased, and the bounds check for the new one happens
1018*0Sstevel@tonic-gate 	 * in pbuf_ref_valid.
1019*0Sstevel@tonic-gate 	 */
1020*0Sstevel@tonic-gate 	if (pdi->pld_cnt < c_pdi->pld_cnt)
1021*0Sstevel@tonic-gate 		return (B_FALSE);
1022*0Sstevel@tonic-gate 
1023*0Sstevel@tonic-gate 	/* compare only those which are currently defined */
1024*0Sstevel@tonic-gate 	for (i = 0; i < c_pdi->pld_cnt; i++, pa++, c_pa++) {
1025*0Sstevel@tonic-gate 		if (pa->pld_pbuf_idx != c_pa->pld_pbuf_idx ||
1026*0Sstevel@tonic-gate 		    pa->pld_rptr < c_pa->pld_rptr ||
1027*0Sstevel@tonic-gate 		    pa->pld_wptr > c_pa->pld_wptr)
1028*0Sstevel@tonic-gate 			return (B_FALSE);
1029*0Sstevel@tonic-gate 	}
1030*0Sstevel@tonic-gate 	return (B_TRUE);
1031*0Sstevel@tonic-gate }
1032*0Sstevel@tonic-gate 
1033*0Sstevel@tonic-gate /*
1034*0Sstevel@tonic-gate  * Modify the layout of a packet descriptor.
1035*0Sstevel@tonic-gate  */
1036*0Sstevel@tonic-gate pdesc_t *
1037*0Sstevel@tonic-gate mmd_adjpdesc(pdesc_t *pd, pdescinfo_t *pdi)
1038*0Sstevel@tonic-gate {
1039*0Sstevel@tonic-gate 	multidata_t *mmd;
1040*0Sstevel@tonic-gate 	pdescinfo_t *c_pdi;
1041*0Sstevel@tonic-gate 
1042*0Sstevel@tonic-gate 	ASSERT(pd != NULL);
1043*0Sstevel@tonic-gate 	ASSERT(pdi != NULL);
1044*0Sstevel@tonic-gate 	ASSERT(pd->pd_magic == PDESC_MAGIC);
1045*0Sstevel@tonic-gate 
1046*0Sstevel@tonic-gate 	mmd = pd->pd_slab->pds_mmd;
1047*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1048*0Sstevel@tonic-gate 
1049*0Sstevel@tonic-gate 	/* entry has been removed */
1050*0Sstevel@tonic-gate 	if (pd->pd_flags & PDESC_REM_DEFER)
1051*0Sstevel@tonic-gate 		return (NULL);
1052*0Sstevel@tonic-gate 
1053*0Sstevel@tonic-gate 	/* caller doesn't intend to specify any buffer reference? */
1054*0Sstevel@tonic-gate 	if (!(pdi->flags & PDESC_HAS_REF))
1055*0Sstevel@tonic-gate 		return (NULL);
1056*0Sstevel@tonic-gate 
1057*0Sstevel@tonic-gate 	/* do the references refer to invalid memory regions? */
1058*0Sstevel@tonic-gate 	if (!mmd_speed_over_safety &&
1059*0Sstevel@tonic-gate 	    (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) ||
1060*0Sstevel@tonic-gate 	    ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi))))
1061*0Sstevel@tonic-gate 		return (NULL);
1062*0Sstevel@tonic-gate 
1063*0Sstevel@tonic-gate 	/* they're not subsets of current references? */
1064*0Sstevel@tonic-gate 	c_pdi = &(pd->pd_pdi);
1065*0Sstevel@tonic-gate 	if (!pdi_in_range(pdi, c_pdi))
1066*0Sstevel@tonic-gate 		return (NULL);
1067*0Sstevel@tonic-gate 
1068*0Sstevel@tonic-gate 	/* copy over the descriptor info from caller */
1069*0Sstevel@tonic-gate 	PDI_COPY(pdi, c_pdi);
1070*0Sstevel@tonic-gate 
1071*0Sstevel@tonic-gate 	return (pd);
1072*0Sstevel@tonic-gate }
1073*0Sstevel@tonic-gate 
1074*0Sstevel@tonic-gate /*
1075*0Sstevel@tonic-gate  * Copy the contents of a packet descriptor into a new buffer.  If the
1076*0Sstevel@tonic-gate  * descriptor points to more than one buffer fragments, the contents
1077*0Sstevel@tonic-gate  * of both fragments will be joined, with the header buffer fragment
1078*0Sstevel@tonic-gate  * preceding the payload buffer fragment(s).
1079*0Sstevel@tonic-gate  */
1080*0Sstevel@tonic-gate mblk_t *
1081*0Sstevel@tonic-gate mmd_transform(pdesc_t *pd)
1082*0Sstevel@tonic-gate {
1083*0Sstevel@tonic-gate 	multidata_t *mmd;
1084*0Sstevel@tonic-gate 	pdescinfo_t *pdi;
1085*0Sstevel@tonic-gate 	mblk_t *mp;
1086*0Sstevel@tonic-gate 	int h_size = 0, p_size = 0;
1087*0Sstevel@tonic-gate 	int i, len;
1088*0Sstevel@tonic-gate 
1089*0Sstevel@tonic-gate 	ASSERT(pd != NULL);
1090*0Sstevel@tonic-gate 	ASSERT(pd->pd_magic == PDESC_MAGIC);
1091*0Sstevel@tonic-gate 
1092*0Sstevel@tonic-gate 	mmd = pd->pd_slab->pds_mmd;
1093*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1094*0Sstevel@tonic-gate 
1095*0Sstevel@tonic-gate 	/* entry has been removed */
1096*0Sstevel@tonic-gate 	if (pd->pd_flags & PDESC_REM_DEFER)
1097*0Sstevel@tonic-gate 		return (NULL);
1098*0Sstevel@tonic-gate 
1099*0Sstevel@tonic-gate 	mutex_enter(&mmd->mmd_pd_slab_lock);
1100*0Sstevel@tonic-gate 	pdi = &(pd->pd_pdi);
1101*0Sstevel@tonic-gate 	if (pdi->flags & PDESC_HBUF_REF)
1102*0Sstevel@tonic-gate 		h_size = PDESC_HDRL(pdi);
1103*0Sstevel@tonic-gate 	if (pdi->flags & PDESC_PBUF_REF) {
1104*0Sstevel@tonic-gate 		for (i = 0; i < pdi->pld_cnt; i++)
1105*0Sstevel@tonic-gate 			p_size += PDESC_PLD_SPAN_SIZE(pdi, i);
1106*0Sstevel@tonic-gate 	}
1107*0Sstevel@tonic-gate 
1108*0Sstevel@tonic-gate 	/* allocate space large enough to hold the fragment(s) */
1109*0Sstevel@tonic-gate 	ASSERT(h_size + p_size >= 0);
1110*0Sstevel@tonic-gate 	if ((mp = allocb(h_size + p_size, BPRI_HI)) == NULL) {
1111*0Sstevel@tonic-gate 		mutex_exit(&mmd->mmd_pd_slab_lock);
1112*0Sstevel@tonic-gate 		return (NULL);
1113*0Sstevel@tonic-gate 	}
1114*0Sstevel@tonic-gate 
1115*0Sstevel@tonic-gate 	/* copy over the header fragment */
1116*0Sstevel@tonic-gate 	if ((pdi->flags & PDESC_HBUF_REF) && h_size > 0) {
1117*0Sstevel@tonic-gate 		bcopy(pdi->hdr_rptr, mp->b_wptr, h_size);
1118*0Sstevel@tonic-gate 		mp->b_wptr += h_size;
1119*0Sstevel@tonic-gate 	}
1120*0Sstevel@tonic-gate 
1121*0Sstevel@tonic-gate 	/* copy over the payload fragment */
1122*0Sstevel@tonic-gate 	if ((pdi->flags & PDESC_PBUF_REF) && p_size > 0) {
1123*0Sstevel@tonic-gate 		for (i = 0; i < pdi->pld_cnt; i++) {
1124*0Sstevel@tonic-gate 			len = PDESC_PLD_SPAN_SIZE(pdi, i);
1125*0Sstevel@tonic-gate 			if (len > 0) {
1126*0Sstevel@tonic-gate 				bcopy(pdi->pld_ary[i].pld_rptr,
1127*0Sstevel@tonic-gate 				    mp->b_wptr, len);
1128*0Sstevel@tonic-gate 				mp->b_wptr += len;
1129*0Sstevel@tonic-gate 			}
1130*0Sstevel@tonic-gate 		}
1131*0Sstevel@tonic-gate 	}
1132*0Sstevel@tonic-gate 
1133*0Sstevel@tonic-gate 	mutex_exit(&mmd->mmd_pd_slab_lock);
1134*0Sstevel@tonic-gate 	return (mp);
1135*0Sstevel@tonic-gate }
1136*0Sstevel@tonic-gate 
1137*0Sstevel@tonic-gate /*
1138*0Sstevel@tonic-gate  * Return a chain of mblks representing the Multidata packet.
1139*0Sstevel@tonic-gate  */
1140*0Sstevel@tonic-gate mblk_t *
1141*0Sstevel@tonic-gate mmd_transform_link(pdesc_t *pd)
1142*0Sstevel@tonic-gate {
1143*0Sstevel@tonic-gate 	multidata_t *mmd;
1144*0Sstevel@tonic-gate 	pdescinfo_t *pdi;
1145*0Sstevel@tonic-gate 	mblk_t *nmp = NULL;
1146*0Sstevel@tonic-gate 
1147*0Sstevel@tonic-gate 	ASSERT(pd != NULL);
1148*0Sstevel@tonic-gate 	ASSERT(pd->pd_magic == PDESC_MAGIC);
1149*0Sstevel@tonic-gate 
1150*0Sstevel@tonic-gate 	mmd = pd->pd_slab->pds_mmd;
1151*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1152*0Sstevel@tonic-gate 
1153*0Sstevel@tonic-gate 	/* entry has been removed */
1154*0Sstevel@tonic-gate 	if (pd->pd_flags & PDESC_REM_DEFER)
1155*0Sstevel@tonic-gate 		return (NULL);
1156*0Sstevel@tonic-gate 
1157*0Sstevel@tonic-gate 	pdi = &(pd->pd_pdi);
1158*0Sstevel@tonic-gate 
1159*0Sstevel@tonic-gate 	/* duplicate header buffer */
1160*0Sstevel@tonic-gate 	if ((pdi->flags & PDESC_HBUF_REF)) {
1161*0Sstevel@tonic-gate 		if ((nmp = dupb(mmd->mmd_hbuf)) == NULL)
1162*0Sstevel@tonic-gate 			return (NULL);
1163*0Sstevel@tonic-gate 		nmp->b_rptr = pdi->hdr_rptr;
1164*0Sstevel@tonic-gate 		nmp->b_wptr = pdi->hdr_wptr;
1165*0Sstevel@tonic-gate 	}
1166*0Sstevel@tonic-gate 
1167*0Sstevel@tonic-gate 	/* duplicate payload buffer(s) */
1168*0Sstevel@tonic-gate 	if (pdi->flags & PDESC_PBUF_REF) {
1169*0Sstevel@tonic-gate 		int i;
1170*0Sstevel@tonic-gate 		mblk_t *mp;
1171*0Sstevel@tonic-gate 		struct pld_ary_s *pa = &pdi->pld_ary[0];
1172*0Sstevel@tonic-gate 
1173*0Sstevel@tonic-gate 		mutex_enter(&mmd->mmd_pd_slab_lock);
1174*0Sstevel@tonic-gate 		for (i = 0; i < pdi->pld_cnt; i++, pa++) {
1175*0Sstevel@tonic-gate 			ASSERT(mmd->mmd_pbuf[pa->pld_pbuf_idx] != NULL);
1176*0Sstevel@tonic-gate 
1177*0Sstevel@tonic-gate 			/* skip empty ones */
1178*0Sstevel@tonic-gate 			if (PDESC_PLD_SPAN_SIZE(pdi, i) == 0)
1179*0Sstevel@tonic-gate 				continue;
1180*0Sstevel@tonic-gate 
1181*0Sstevel@tonic-gate 			mp = dupb(mmd->mmd_pbuf[pa->pld_pbuf_idx]);
1182*0Sstevel@tonic-gate 			if (mp == NULL) {
1183*0Sstevel@tonic-gate 				if (nmp != NULL)
1184*0Sstevel@tonic-gate 					freemsg(nmp);
1185*0Sstevel@tonic-gate 				mutex_exit(&mmd->mmd_pd_slab_lock);
1186*0Sstevel@tonic-gate 				return (NULL);
1187*0Sstevel@tonic-gate 			}
1188*0Sstevel@tonic-gate 			mp->b_rptr = pa->pld_rptr;
1189*0Sstevel@tonic-gate 			mp->b_wptr = pa->pld_wptr;
1190*0Sstevel@tonic-gate 			if (nmp == NULL)
1191*0Sstevel@tonic-gate 				nmp = mp;
1192*0Sstevel@tonic-gate 			else
1193*0Sstevel@tonic-gate 				linkb(nmp, mp);
1194*0Sstevel@tonic-gate 		}
1195*0Sstevel@tonic-gate 		mutex_exit(&mmd->mmd_pd_slab_lock);
1196*0Sstevel@tonic-gate 	}
1197*0Sstevel@tonic-gate 
1198*0Sstevel@tonic-gate 	return (nmp);
1199*0Sstevel@tonic-gate }
1200*0Sstevel@tonic-gate 
1201*0Sstevel@tonic-gate /*
1202*0Sstevel@tonic-gate  * Return duplicate message block(s) of the associated buffer(s).
1203*0Sstevel@tonic-gate  */
1204*0Sstevel@tonic-gate int
1205*0Sstevel@tonic-gate mmd_dupbufs(multidata_t *mmd, mblk_t **hmp, mblk_t **pmp)
1206*0Sstevel@tonic-gate {
1207*0Sstevel@tonic-gate 	ASSERT(mmd != NULL);
1208*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1209*0Sstevel@tonic-gate 
1210*0Sstevel@tonic-gate 	if (hmp != NULL) {
1211*0Sstevel@tonic-gate 		*hmp = NULL;
1212*0Sstevel@tonic-gate 		if (mmd->mmd_hbuf != NULL &&
1213*0Sstevel@tonic-gate 		    (*hmp = dupb(mmd->mmd_hbuf)) == NULL)
1214*0Sstevel@tonic-gate 			return (-1);
1215*0Sstevel@tonic-gate 	}
1216*0Sstevel@tonic-gate 
1217*0Sstevel@tonic-gate 	if (pmp != NULL) {
1218*0Sstevel@tonic-gate 		int i;
1219*0Sstevel@tonic-gate 		mblk_t *mp;
1220*0Sstevel@tonic-gate 
1221*0Sstevel@tonic-gate 		mutex_enter(&mmd->mmd_pd_slab_lock);
1222*0Sstevel@tonic-gate 		*pmp = NULL;
1223*0Sstevel@tonic-gate 		for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
1224*0Sstevel@tonic-gate 			ASSERT(mmd->mmd_pbuf[i] != NULL);
1225*0Sstevel@tonic-gate 			mp = dupb(mmd->mmd_pbuf[i]);
1226*0Sstevel@tonic-gate 			if (mp == NULL) {
1227*0Sstevel@tonic-gate 				if (hmp != NULL && *hmp != NULL)
1228*0Sstevel@tonic-gate 					freeb(*hmp);
1229*0Sstevel@tonic-gate 				if (*pmp != NULL)
1230*0Sstevel@tonic-gate 					freemsg(*pmp);
1231*0Sstevel@tonic-gate 				mutex_exit(&mmd->mmd_pd_slab_lock);
1232*0Sstevel@tonic-gate 				return (-1);
1233*0Sstevel@tonic-gate 			}
1234*0Sstevel@tonic-gate 			if (*pmp == NULL)
1235*0Sstevel@tonic-gate 				*pmp = mp;
1236*0Sstevel@tonic-gate 			else
1237*0Sstevel@tonic-gate 				linkb(*pmp, mp);
1238*0Sstevel@tonic-gate 		}
1239*0Sstevel@tonic-gate 		mutex_exit(&mmd->mmd_pd_slab_lock);
1240*0Sstevel@tonic-gate 	}
1241*0Sstevel@tonic-gate 
1242*0Sstevel@tonic-gate 	return (0);
1243*0Sstevel@tonic-gate }
1244*0Sstevel@tonic-gate 
1245*0Sstevel@tonic-gate /*
1246*0Sstevel@tonic-gate  * Return the layout of a packet descriptor.
1247*0Sstevel@tonic-gate  */
1248*0Sstevel@tonic-gate int
1249*0Sstevel@tonic-gate mmd_getpdescinfo(pdesc_t *pd, pdescinfo_t *pdi)
1250*0Sstevel@tonic-gate {
1251*0Sstevel@tonic-gate 	ASSERT(pd != NULL);
1252*0Sstevel@tonic-gate 	ASSERT(pd->pd_magic == PDESC_MAGIC);
1253*0Sstevel@tonic-gate 	ASSERT(pd->pd_slab != NULL);
1254*0Sstevel@tonic-gate 	ASSERT(pd->pd_slab->pds_mmd->mmd_magic == MULTIDATA_MAGIC);
1255*0Sstevel@tonic-gate 	ASSERT(pdi != NULL);
1256*0Sstevel@tonic-gate 
1257*0Sstevel@tonic-gate 	/* entry has been removed */
1258*0Sstevel@tonic-gate 	if (pd->pd_flags & PDESC_REM_DEFER)
1259*0Sstevel@tonic-gate 		return (-1);
1260*0Sstevel@tonic-gate 
1261*0Sstevel@tonic-gate 	/* copy descriptor info to caller */
1262*0Sstevel@tonic-gate 	PDI_COPY(&(pd->pd_pdi), pdi);
1263*0Sstevel@tonic-gate 
1264*0Sstevel@tonic-gate 	return (0);
1265*0Sstevel@tonic-gate }
1266*0Sstevel@tonic-gate 
1267*0Sstevel@tonic-gate /*
1268*0Sstevel@tonic-gate  * Add a global or local attribute to a Multidata.  Global attribute
1269*0Sstevel@tonic-gate  * association is specified by a NULL packet descriptor.
1270*0Sstevel@tonic-gate  */
1271*0Sstevel@tonic-gate pattr_t *
1272*0Sstevel@tonic-gate mmd_addpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai,
1273*0Sstevel@tonic-gate     boolean_t persistent, int kmflags)
1274*0Sstevel@tonic-gate {
1275*0Sstevel@tonic-gate 	patbkt_t **tbl_p;
1276*0Sstevel@tonic-gate 	patbkt_t *tbl, *o_tbl;
1277*0Sstevel@tonic-gate 	patbkt_t *bkt;
1278*0Sstevel@tonic-gate 	pattr_t *pa;
1279*0Sstevel@tonic-gate 	uint_t size;
1280*0Sstevel@tonic-gate 
1281*0Sstevel@tonic-gate 	ASSERT(mmd != NULL);
1282*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1283*0Sstevel@tonic-gate 	ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC);
1284*0Sstevel@tonic-gate 	ASSERT(pai != NULL);
1285*0Sstevel@tonic-gate 
1286*0Sstevel@tonic-gate 	/* pointer to the attribute hash table (local or global) */
1287*0Sstevel@tonic-gate 	tbl_p = pd != NULL ? &(pd->pd_pattbl) : &(mmd->mmd_pattbl);
1288*0Sstevel@tonic-gate 
1289*0Sstevel@tonic-gate 	/*
1290*0Sstevel@tonic-gate 	 * See if the hash table has not yet been created; if so,
1291*0Sstevel@tonic-gate 	 * we create the table and store its address atomically.
1292*0Sstevel@tonic-gate 	 */
1293*0Sstevel@tonic-gate 	if ((tbl = *tbl_p) == NULL) {
1294*0Sstevel@tonic-gate 		tbl = kmem_cache_alloc(pattbl_cache, kmflags);
1295*0Sstevel@tonic-gate 		if (tbl == NULL)
1296*0Sstevel@tonic-gate 			return (NULL);
1297*0Sstevel@tonic-gate 
1298*0Sstevel@tonic-gate 		/* if someone got there first, use his table instead */
1299*0Sstevel@tonic-gate 		if ((o_tbl = casptr(tbl_p, NULL, tbl)) != NULL) {
1300*0Sstevel@tonic-gate 			kmem_cache_free(pattbl_cache, tbl);
1301*0Sstevel@tonic-gate 			tbl = o_tbl;
1302*0Sstevel@tonic-gate 		}
1303*0Sstevel@tonic-gate 	}
1304*0Sstevel@tonic-gate 
1305*0Sstevel@tonic-gate 	ASSERT(tbl->pbkt_tbl_sz > 0);
1306*0Sstevel@tonic-gate 	bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]);
1307*0Sstevel@tonic-gate 
1308*0Sstevel@tonic-gate 	/* attribute of the same type already exists? */
1309*0Sstevel@tonic-gate 	if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL)
1310*0Sstevel@tonic-gate 		return (NULL);
1311*0Sstevel@tonic-gate 
1312*0Sstevel@tonic-gate 	size = sizeof (*pa) + pai->len;
1313*0Sstevel@tonic-gate 	if ((pa = kmem_zalloc(size, kmflags)) == NULL)
1314*0Sstevel@tonic-gate 		return (NULL);
1315*0Sstevel@tonic-gate 
1316*0Sstevel@tonic-gate 	pa->pat_magic = PATTR_MAGIC;
1317*0Sstevel@tonic-gate 	pa->pat_lock = &(bkt->pbkt_lock);
1318*0Sstevel@tonic-gate 	pa->pat_mmd = mmd;
1319*0Sstevel@tonic-gate 	pa->pat_buflen = size;
1320*0Sstevel@tonic-gate 	pa->pat_type = pai->type;
1321*0Sstevel@tonic-gate 	pai->buf = pai->len > 0 ? ((uchar_t *)(pa + 1)) : NULL;
1322*0Sstevel@tonic-gate 
1323*0Sstevel@tonic-gate 	if (persistent)
1324*0Sstevel@tonic-gate 		pa->pat_flags = PATTR_PERSIST;
1325*0Sstevel@tonic-gate 
1326*0Sstevel@tonic-gate 	/* insert attribute at end of hash chain */
1327*0Sstevel@tonic-gate 	mutex_enter(&(bkt->pbkt_lock));
1328*0Sstevel@tonic-gate 	insque(&(pa->pat_next), bkt->pbkt_pattr_q.ql_prev);
1329*0Sstevel@tonic-gate 	mutex_exit(&(bkt->pbkt_lock));
1330*0Sstevel@tonic-gate 
1331*0Sstevel@tonic-gate 	return (pa);
1332*0Sstevel@tonic-gate }
1333*0Sstevel@tonic-gate 
1334*0Sstevel@tonic-gate /*
1335*0Sstevel@tonic-gate  * Attribute hash table kmem cache constructor routine.
1336*0Sstevel@tonic-gate  */
1337*0Sstevel@tonic-gate /* ARGSUSED */
1338*0Sstevel@tonic-gate static int
1339*0Sstevel@tonic-gate pattbl_constructor(void *buf, void *cdrarg, int kmflags)
1340*0Sstevel@tonic-gate {
1341*0Sstevel@tonic-gate 	patbkt_t *bkt;
1342*0Sstevel@tonic-gate 	uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg;
1343*0Sstevel@tonic-gate 	uint_t i;
1344*0Sstevel@tonic-gate 
1345*0Sstevel@tonic-gate 	ASSERT(tbl_sz > 0);	/* table size can't be zero */
1346*0Sstevel@tonic-gate 
1347*0Sstevel@tonic-gate 	for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) {
1348*0Sstevel@tonic-gate 		mutex_init(&(bkt->pbkt_lock), NULL, MUTEX_DRIVER, NULL);
1349*0Sstevel@tonic-gate 		QL_INIT(&(bkt->pbkt_pattr_q));
1350*0Sstevel@tonic-gate 
1351*0Sstevel@tonic-gate 		/* first bucket contains the table size */
1352*0Sstevel@tonic-gate 		bkt->pbkt_tbl_sz = i == 0 ? tbl_sz : 0;
1353*0Sstevel@tonic-gate 	}
1354*0Sstevel@tonic-gate 	return (0);
1355*0Sstevel@tonic-gate }
1356*0Sstevel@tonic-gate 
1357*0Sstevel@tonic-gate /*
1358*0Sstevel@tonic-gate  * Attribute hash table kmem cache destructor routine.
1359*0Sstevel@tonic-gate  */
1360*0Sstevel@tonic-gate /* ARGSUSED */
1361*0Sstevel@tonic-gate static void
1362*0Sstevel@tonic-gate pattbl_destructor(void *buf, void *cdrarg)
1363*0Sstevel@tonic-gate {
1364*0Sstevel@tonic-gate 	patbkt_t *bkt;
1365*0Sstevel@tonic-gate 	uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg;
1366*0Sstevel@tonic-gate 	uint_t i;
1367*0Sstevel@tonic-gate 
1368*0Sstevel@tonic-gate 	ASSERT(tbl_sz > 0);	/* table size can't be zero */
1369*0Sstevel@tonic-gate 
1370*0Sstevel@tonic-gate 	for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) {
1371*0Sstevel@tonic-gate 		mutex_destroy(&(bkt->pbkt_lock));
1372*0Sstevel@tonic-gate 		ASSERT(bkt->pbkt_pattr_q.ql_next == &(bkt->pbkt_pattr_q));
1373*0Sstevel@tonic-gate 		ASSERT(i > 0 || bkt->pbkt_tbl_sz == tbl_sz);
1374*0Sstevel@tonic-gate 	}
1375*0Sstevel@tonic-gate }
1376*0Sstevel@tonic-gate 
1377*0Sstevel@tonic-gate /*
1378*0Sstevel@tonic-gate  * Destroy an attribute hash table, called by mmd_rempdesc or during free.
1379*0Sstevel@tonic-gate  */
1380*0Sstevel@tonic-gate static void
1381*0Sstevel@tonic-gate mmd_destroy_pattbl(patbkt_t **tbl)
1382*0Sstevel@tonic-gate {
1383*0Sstevel@tonic-gate 	patbkt_t *bkt;
1384*0Sstevel@tonic-gate 	pattr_t *pa, *pa_next;
1385*0Sstevel@tonic-gate 	uint_t i, tbl_sz;
1386*0Sstevel@tonic-gate 
1387*0Sstevel@tonic-gate 	ASSERT(tbl != NULL);
1388*0Sstevel@tonic-gate 	bkt = *tbl;
1389*0Sstevel@tonic-gate 	tbl_sz = bkt->pbkt_tbl_sz;
1390*0Sstevel@tonic-gate 
1391*0Sstevel@tonic-gate 	/* make sure caller passes in the first bucket */
1392*0Sstevel@tonic-gate 	ASSERT(tbl_sz > 0);
1393*0Sstevel@tonic-gate 
1394*0Sstevel@tonic-gate 	/* destroy the contents of each bucket */
1395*0Sstevel@tonic-gate 	for (i = 0; i < tbl_sz; i++, bkt++) {
1396*0Sstevel@tonic-gate 		/* we ought to be exclusive at this point */
1397*0Sstevel@tonic-gate 		ASSERT(MUTEX_NOT_HELD(&(bkt->pbkt_lock)));
1398*0Sstevel@tonic-gate 
1399*0Sstevel@tonic-gate 		pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1400*0Sstevel@tonic-gate 		while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) {
1401*0Sstevel@tonic-gate 			ASSERT(pa->pat_magic == PATTR_MAGIC);
1402*0Sstevel@tonic-gate 			pa_next = Q2PATTR(pa->pat_next);
1403*0Sstevel@tonic-gate 			remque(&(pa->pat_next));
1404*0Sstevel@tonic-gate 			kmem_free(pa, pa->pat_buflen);
1405*0Sstevel@tonic-gate 			pa = pa_next;
1406*0Sstevel@tonic-gate 		}
1407*0Sstevel@tonic-gate 	}
1408*0Sstevel@tonic-gate 
1409*0Sstevel@tonic-gate 	kmem_cache_free(pattbl_cache, *tbl);
1410*0Sstevel@tonic-gate 	*tbl = NULL;
1411*0Sstevel@tonic-gate 
1412*0Sstevel@tonic-gate 	/* commit all previous stores */
1413*0Sstevel@tonic-gate 	membar_producer();
1414*0Sstevel@tonic-gate }
1415*0Sstevel@tonic-gate 
1416*0Sstevel@tonic-gate /*
1417*0Sstevel@tonic-gate  * Copy the contents of an attribute hash table, called by mmd_copy.
1418*0Sstevel@tonic-gate  */
1419*0Sstevel@tonic-gate static int
1420*0Sstevel@tonic-gate mmd_copy_pattbl(patbkt_t *src_tbl, multidata_t *n_mmd, pdesc_t *n_pd,
1421*0Sstevel@tonic-gate     int kmflags)
1422*0Sstevel@tonic-gate {
1423*0Sstevel@tonic-gate 	patbkt_t *bkt;
1424*0Sstevel@tonic-gate 	pattr_t *pa;
1425*0Sstevel@tonic-gate 	pattrinfo_t pai;
1426*0Sstevel@tonic-gate 	uint_t i, tbl_sz;
1427*0Sstevel@tonic-gate 
1428*0Sstevel@tonic-gate 	ASSERT(src_tbl != NULL);
1429*0Sstevel@tonic-gate 	bkt = src_tbl;
1430*0Sstevel@tonic-gate 	tbl_sz = bkt->pbkt_tbl_sz;
1431*0Sstevel@tonic-gate 
1432*0Sstevel@tonic-gate 	/* make sure caller passes in the first bucket */
1433*0Sstevel@tonic-gate 	ASSERT(tbl_sz > 0);
1434*0Sstevel@tonic-gate 
1435*0Sstevel@tonic-gate 	for (i = 0; i < tbl_sz; i++, bkt++) {
1436*0Sstevel@tonic-gate 		mutex_enter(&(bkt->pbkt_lock));
1437*0Sstevel@tonic-gate 		pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1438*0Sstevel@tonic-gate 		while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) {
1439*0Sstevel@tonic-gate 			pattr_t *pa_next = Q2PATTR(pa->pat_next);
1440*0Sstevel@tonic-gate 
1441*0Sstevel@tonic-gate 			/* skip if it's removed */
1442*0Sstevel@tonic-gate 			if (pa->pat_flags & PATTR_REM_DEFER) {
1443*0Sstevel@tonic-gate 				pa = pa_next;
1444*0Sstevel@tonic-gate 				continue;
1445*0Sstevel@tonic-gate 			}
1446*0Sstevel@tonic-gate 
1447*0Sstevel@tonic-gate 			pai.type = pa->pat_type;
1448*0Sstevel@tonic-gate 			pai.len = pa->pat_buflen - sizeof (*pa);
1449*0Sstevel@tonic-gate 			if (mmd_addpattr(n_mmd, n_pd, &pai, (pa->pat_flags &
1450*0Sstevel@tonic-gate 			    PATTR_PERSIST) != 0, kmflags) == NULL) {
1451*0Sstevel@tonic-gate 				mutex_exit(&(bkt->pbkt_lock));
1452*0Sstevel@tonic-gate 				return (-1);
1453*0Sstevel@tonic-gate 			}
1454*0Sstevel@tonic-gate 
1455*0Sstevel@tonic-gate 			/* copy over the contents */
1456*0Sstevel@tonic-gate 			if (pai.buf != NULL)
1457*0Sstevel@tonic-gate 				bcopy(pa + 1, pai.buf, pai.len);
1458*0Sstevel@tonic-gate 
1459*0Sstevel@tonic-gate 			pa = pa_next;
1460*0Sstevel@tonic-gate 		}
1461*0Sstevel@tonic-gate 		mutex_exit(&(bkt->pbkt_lock));
1462*0Sstevel@tonic-gate 	}
1463*0Sstevel@tonic-gate 
1464*0Sstevel@tonic-gate 	return (0);
1465*0Sstevel@tonic-gate }
1466*0Sstevel@tonic-gate 
1467*0Sstevel@tonic-gate /*
1468*0Sstevel@tonic-gate  * Search for an attribute type within an attribute hash bucket.
1469*0Sstevel@tonic-gate  */
1470*0Sstevel@tonic-gate static pattr_t *
1471*0Sstevel@tonic-gate mmd_find_pattr(patbkt_t *bkt, uint_t type)
1472*0Sstevel@tonic-gate {
1473*0Sstevel@tonic-gate 	pattr_t *pa_head, *pa;
1474*0Sstevel@tonic-gate 
1475*0Sstevel@tonic-gate 	mutex_enter(&(bkt->pbkt_lock));
1476*0Sstevel@tonic-gate 	pa_head = Q2PATTR(&(bkt->pbkt_pattr_q));
1477*0Sstevel@tonic-gate 	pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1478*0Sstevel@tonic-gate 
1479*0Sstevel@tonic-gate 	while (pa != pa_head) {
1480*0Sstevel@tonic-gate 		ASSERT(pa->pat_magic == PATTR_MAGIC);
1481*0Sstevel@tonic-gate 
1482*0Sstevel@tonic-gate 		/* return a match; we treat removed entry as non-existent */
1483*0Sstevel@tonic-gate 		if (pa->pat_type == type && !(pa->pat_flags & PATTR_REM_DEFER))
1484*0Sstevel@tonic-gate 			break;
1485*0Sstevel@tonic-gate 		pa = Q2PATTR(pa->pat_next);
1486*0Sstevel@tonic-gate 	}
1487*0Sstevel@tonic-gate 	mutex_exit(&(bkt->pbkt_lock));
1488*0Sstevel@tonic-gate 
1489*0Sstevel@tonic-gate 	return (pa == pa_head ? NULL : pa);
1490*0Sstevel@tonic-gate }
1491*0Sstevel@tonic-gate 
1492*0Sstevel@tonic-gate /*
1493*0Sstevel@tonic-gate  * Remove an attribute from a Multidata.
1494*0Sstevel@tonic-gate  */
1495*0Sstevel@tonic-gate void
1496*0Sstevel@tonic-gate mmd_rempattr(pattr_t *pa)
1497*0Sstevel@tonic-gate {
1498*0Sstevel@tonic-gate 	kmutex_t *pat_lock = pa->pat_lock;
1499*0Sstevel@tonic-gate 
1500*0Sstevel@tonic-gate 	ASSERT(pa->pat_magic == PATTR_MAGIC);
1501*0Sstevel@tonic-gate 
1502*0Sstevel@tonic-gate 	/* ignore if attribute was marked as persistent */
1503*0Sstevel@tonic-gate 	if ((pa->pat_flags & PATTR_PERSIST) != 0)
1504*0Sstevel@tonic-gate 		return;
1505*0Sstevel@tonic-gate 
1506*0Sstevel@tonic-gate 	mutex_enter(pat_lock);
1507*0Sstevel@tonic-gate 	/*
1508*0Sstevel@tonic-gate 	 * We can't deallocate the associated resources if the Multidata
1509*0Sstevel@tonic-gate 	 * is shared with other threads, because it's possible that the
1510*0Sstevel@tonic-gate 	 * attribute handle value is held by those threads.  That's why
1511*0Sstevel@tonic-gate 	 * we simply mark the entry as "removed".  If there are no other
1512*0Sstevel@tonic-gate 	 * threads, then we free the attribute.
1513*0Sstevel@tonic-gate 	 */
1514*0Sstevel@tonic-gate 	if (pa->pat_mmd->mmd_dp->db_ref > 1) {
1515*0Sstevel@tonic-gate 		pa->pat_flags |= PATTR_REM_DEFER;
1516*0Sstevel@tonic-gate 	} else {
1517*0Sstevel@tonic-gate 		remque(&(pa->pat_next));
1518*0Sstevel@tonic-gate 		kmem_free(pa, pa->pat_buflen);
1519*0Sstevel@tonic-gate 	}
1520*0Sstevel@tonic-gate 	mutex_exit(pat_lock);
1521*0Sstevel@tonic-gate }
1522*0Sstevel@tonic-gate 
1523*0Sstevel@tonic-gate /*
1524*0Sstevel@tonic-gate  * Find an attribute (according to its type) and return its handle.
1525*0Sstevel@tonic-gate  */
1526*0Sstevel@tonic-gate pattr_t *
1527*0Sstevel@tonic-gate mmd_getpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai)
1528*0Sstevel@tonic-gate {
1529*0Sstevel@tonic-gate 	patbkt_t *tbl, *bkt;
1530*0Sstevel@tonic-gate 	pattr_t *pa;
1531*0Sstevel@tonic-gate 
1532*0Sstevel@tonic-gate 	ASSERT(mmd != NULL);
1533*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1534*0Sstevel@tonic-gate 	ASSERT(pai != NULL);
1535*0Sstevel@tonic-gate 
1536*0Sstevel@tonic-gate 	/* get the right attribute hash table (local or global) */
1537*0Sstevel@tonic-gate 	tbl = pd != NULL ? pd->pd_pattbl : mmd->mmd_pattbl;
1538*0Sstevel@tonic-gate 
1539*0Sstevel@tonic-gate 	/* attribute hash table doesn't exist? */
1540*0Sstevel@tonic-gate 	if (tbl == NULL)
1541*0Sstevel@tonic-gate 		return (NULL);
1542*0Sstevel@tonic-gate 
1543*0Sstevel@tonic-gate 	ASSERT(tbl->pbkt_tbl_sz > 0);
1544*0Sstevel@tonic-gate 	bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]);
1545*0Sstevel@tonic-gate 
1546*0Sstevel@tonic-gate 	if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) {
1547*0Sstevel@tonic-gate 		ASSERT(pa->pat_buflen >= sizeof (*pa));
1548*0Sstevel@tonic-gate 		pai->len = pa->pat_buflen - sizeof (*pa);
1549*0Sstevel@tonic-gate 		pai->buf = pai->len > 0 ?
1550*0Sstevel@tonic-gate 		    (uchar_t *)pa + sizeof (pattr_t) : NULL;
1551*0Sstevel@tonic-gate 	}
1552*0Sstevel@tonic-gate 	ASSERT(pa == NULL || pa->pat_magic == PATTR_MAGIC);
1553*0Sstevel@tonic-gate 	return (pa);
1554*0Sstevel@tonic-gate }
1555*0Sstevel@tonic-gate 
1556*0Sstevel@tonic-gate /*
1557*0Sstevel@tonic-gate  * Return total size of buffers and total size of areas referenced
1558*0Sstevel@tonic-gate  * by all in-use (unremoved) packet descriptors.
1559*0Sstevel@tonic-gate  */
1560*0Sstevel@tonic-gate void
1561*0Sstevel@tonic-gate mmd_getsize(multidata_t *mmd, uint_t *ptotal, uint_t *pinuse)
1562*0Sstevel@tonic-gate {
1563*0Sstevel@tonic-gate 	pdesc_t *pd;
1564*0Sstevel@tonic-gate 	pdescinfo_t *pdi;
1565*0Sstevel@tonic-gate 	int i;
1566*0Sstevel@tonic-gate 
1567*0Sstevel@tonic-gate 	ASSERT(mmd != NULL);
1568*0Sstevel@tonic-gate 	ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1569*0Sstevel@tonic-gate 
1570*0Sstevel@tonic-gate 	mutex_enter(&mmd->mmd_pd_slab_lock);
1571*0Sstevel@tonic-gate 	if (ptotal != NULL) {
1572*0Sstevel@tonic-gate 		*ptotal = 0;
1573*0Sstevel@tonic-gate 
1574*0Sstevel@tonic-gate 		if (mmd->mmd_hbuf != NULL)
1575*0Sstevel@tonic-gate 			*ptotal += MBLKL(mmd->mmd_hbuf);
1576*0Sstevel@tonic-gate 
1577*0Sstevel@tonic-gate 		for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
1578*0Sstevel@tonic-gate 			ASSERT(mmd->mmd_pbuf[i] != NULL);
1579*0Sstevel@tonic-gate 			*ptotal += MBLKL(mmd->mmd_pbuf[i]);
1580*0Sstevel@tonic-gate 		}
1581*0Sstevel@tonic-gate 	}
1582*0Sstevel@tonic-gate 	if (pinuse != NULL) {
1583*0Sstevel@tonic-gate 		*pinuse = 0;
1584*0Sstevel@tonic-gate 
1585*0Sstevel@tonic-gate 		/* first pdesc */
1586*0Sstevel@tonic-gate 		pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE);
1587*0Sstevel@tonic-gate 		while (pd != NULL) {
1588*0Sstevel@tonic-gate 			pdi = &pd->pd_pdi;
1589*0Sstevel@tonic-gate 
1590*0Sstevel@tonic-gate 			/* next pdesc */
1591*0Sstevel@tonic-gate 			pd = mmd_getpdesc(mmd, pd, NULL, 1, B_TRUE);
1592*0Sstevel@tonic-gate 
1593*0Sstevel@tonic-gate 			/* skip over removed descriptor */
1594*0Sstevel@tonic-gate 			if (pdi->flags & PDESC_REM_DEFER)
1595*0Sstevel@tonic-gate 				continue;
1596*0Sstevel@tonic-gate 
1597*0Sstevel@tonic-gate 			if (pdi->flags & PDESC_HBUF_REF)
1598*0Sstevel@tonic-gate 				*pinuse += PDESC_HDRL(pdi);
1599*0Sstevel@tonic-gate 
1600*0Sstevel@tonic-gate 			if (pdi->flags & PDESC_PBUF_REF) {
1601*0Sstevel@tonic-gate 				for (i = 0; i < pdi->pld_cnt; i++)
1602*0Sstevel@tonic-gate 					*pinuse += PDESC_PLDL(pdi, i);
1603*0Sstevel@tonic-gate 			}
1604*0Sstevel@tonic-gate 		}
1605*0Sstevel@tonic-gate 	}
1606*0Sstevel@tonic-gate 	mutex_exit(&mmd->mmd_pd_slab_lock);
1607*0Sstevel@tonic-gate }
1608