1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 /*
30 * Multidata, as described in the following papers:
31 *
32 * Adi Masputra,
33 * Multidata V.2: VA-Disjoint Packet Extents Framework Interface
34 * Design Specification. August 2004.
35 * Available as http://sac.sfbay/PSARC/2004/594/materials/mmd2.pdf.
36 *
37 * Adi Masputra,
38 * Multidata Interface Design Specification. Sep 2002.
39 * Available as http://sac.sfbay/PSARC/2002/276/materials/mmd.pdf.
40 *
41 * Adi Masputra, Frank DiMambro, Kacheong Poon,
42 * An Efficient Networking Transmit Mechanism for Solaris:
43 * Multidata Transmit (MDT). May 2002.
44 * Available as http://sac.sfbay/PSARC/2002/276/materials/mdt.pdf.
45 */
46
47 #include <sys/types.h>
48 #include <sys/stream.h>
49 #include <sys/dlpi.h>
50 #include <sys/stropts.h>
51 #include <sys/strsun.h>
52 #include <sys/strlog.h>
53 #include <sys/strsubr.h>
54 #include <sys/sysmacros.h>
55 #include <sys/cmn_err.h>
56 #include <sys/debug.h>
57 #include <sys/kmem.h>
58 #include <sys/atomic.h>
59
60 #include <sys/multidata.h>
61 #include <sys/multidata_impl.h>
62
63 static int mmd_constructor(void *, void *, int);
64 static void mmd_destructor(void *, void *);
65 static int pdslab_constructor(void *, void *, int);
66 static void pdslab_destructor(void *, void *);
67 static int pattbl_constructor(void *, void *, int);
68 static void pattbl_destructor(void *, void *);
69 static void mmd_esballoc_free(caddr_t);
70 static int mmd_copy_pattbl(patbkt_t *, multidata_t *, pdesc_t *, int);
71
72 static boolean_t pbuf_ref_valid(multidata_t *, pdescinfo_t *);
73 #pragma inline(pbuf_ref_valid)
74
75 static boolean_t pdi_in_range(pdescinfo_t *, pdescinfo_t *);
76 #pragma inline(pdi_in_range)
77
78 static pdesc_t *mmd_addpdesc_int(multidata_t *, pdescinfo_t *, int *, int);
79 #pragma inline(mmd_addpdesc_int)
80
81 static void mmd_destroy_pattbl(patbkt_t **);
82 #pragma inline(mmd_destroy_pattbl)
83
84 static pattr_t *mmd_find_pattr(patbkt_t *, uint_t);
85 #pragma inline(mmd_find_pattr)
86
87 static pdesc_t *mmd_destroy_pdesc(multidata_t *, pdesc_t *);
88 #pragma inline(mmd_destroy_pdesc)
89
90 static pdesc_t *mmd_getpdesc(multidata_t *, pdesc_t *, pdescinfo_t *, uint_t,
91 boolean_t);
92 #pragma inline(mmd_getpdesc)
93
94 static struct kmem_cache *mmd_cache;
95 static struct kmem_cache *pd_slab_cache;
96 static struct kmem_cache *pattbl_cache;
97
98 int mmd_debug = 1;
99 #define MMD_DEBUG(s) if (mmd_debug > 0) cmn_err s
100
101 /*
102 * Set to this to true to bypass pdesc bounds checking.
103 */
104 boolean_t mmd_speed_over_safety = B_FALSE;
105
106 /*
107 * Patchable kmem_cache flags.
108 */
109 int mmd_kmem_flags = 0;
110 int pdslab_kmem_flags = 0;
111 int pattbl_kmem_flags = 0;
112
113 /*
114 * Alignment (in bytes) of our kmem caches.
115 */
116 #define MULTIDATA_CACHE_ALIGN 64
117
118 /*
119 * Default number of packet descriptors per descriptor slab. Making
120 * this too small will trigger more descriptor slab allocation; making
121 * it too large will create too many unclaimed descriptors.
122 */
123 #define PDSLAB_SZ 15
124 uint_t pdslab_sz = PDSLAB_SZ;
125
126 /*
127 * Default attribute hash table size. It's okay to set this to a small
128 * value (even to 1) because there aren't that many attributes currently
129 * defined, and because we assume there won't be many attributes associated
130 * with a Multidata at a given time. Increasing the size will reduce
131 * attribute search time (given a large number of attributes in a Multidata),
132 * and decreasing it will reduce the memory footprints and the overhead
133 * associated with managing the table.
134 */
135 #define PATTBL_SZ 1
136 uint_t pattbl_sz = PATTBL_SZ;
137
138 /*
139 * Attribute hash key.
140 */
141 #define PATTBL_HASH(x, sz) ((x) % (sz))
142
143 /*
144 * Structure that precedes each Multidata metadata.
145 */
146 struct mmd_buf_info {
147 frtn_t frp; /* free routine */
148 uint_t buf_len; /* length of kmem buffer */
149 };
150
151 /*
152 * The size of each metadata buffer.
153 */
154 #define MMD_CACHE_SIZE \
155 (sizeof (struct mmd_buf_info) + sizeof (multidata_t))
156
157 /*
158 * Called during startup in order to create the Multidata kmem caches.
159 */
160 void
mmd_init(void)161 mmd_init(void)
162 {
163 pdslab_sz = MAX(1, pdslab_sz); /* at least 1 descriptor */
164 pattbl_sz = MAX(1, pattbl_sz); /* at least 1 bucket */
165
166 mmd_cache = kmem_cache_create("multidata", MMD_CACHE_SIZE,
167 MULTIDATA_CACHE_ALIGN, mmd_constructor, mmd_destructor,
168 NULL, NULL, NULL, mmd_kmem_flags);
169
170 pd_slab_cache = kmem_cache_create("multidata_pdslab",
171 PDESC_SLAB_SIZE(pdslab_sz), MULTIDATA_CACHE_ALIGN,
172 pdslab_constructor, pdslab_destructor, NULL,
173 (void *)(uintptr_t)pdslab_sz, NULL, pdslab_kmem_flags);
174
175 pattbl_cache = kmem_cache_create("multidata_pattbl",
176 sizeof (patbkt_t) * pattbl_sz, MULTIDATA_CACHE_ALIGN,
177 pattbl_constructor, pattbl_destructor, NULL,
178 (void *)(uintptr_t)pattbl_sz, NULL, pattbl_kmem_flags);
179 }
180
181 /*
182 * Create a Multidata message block.
183 */
184 multidata_t *
mmd_alloc(mblk_t * hdr_mp,mblk_t ** mmd_mp,int kmflags)185 mmd_alloc(mblk_t *hdr_mp, mblk_t **mmd_mp, int kmflags)
186 {
187 uchar_t *buf;
188 multidata_t *mmd;
189 uint_t mmd_mplen;
190 struct mmd_buf_info *buf_info;
191
192 ASSERT(hdr_mp != NULL);
193 ASSERT(mmd_mp != NULL);
194
195 /*
196 * Caller should never pass in a chain of mblks since we
197 * only care about the first one, hence the assertions.
198 */
199 ASSERT(hdr_mp->b_cont == NULL);
200
201 if ((buf = kmem_cache_alloc(mmd_cache, kmflags)) == NULL)
202 return (NULL);
203
204 buf_info = (struct mmd_buf_info *)buf;
205 buf_info->frp.free_arg = (caddr_t)buf;
206
207 mmd = (multidata_t *)(buf_info + 1);
208 mmd_mplen = sizeof (*mmd);
209
210 if ((*mmd_mp = desballoc((uchar_t *)mmd, mmd_mplen, BPRI_HI,
211 &(buf_info->frp))) == NULL) {
212 kmem_cache_free(mmd_cache, buf);
213 return (NULL);
214 }
215
216 DB_TYPE(*mmd_mp) = M_MULTIDATA;
217 (*mmd_mp)->b_wptr += mmd_mplen;
218 mmd->mmd_dp = (*mmd_mp)->b_datap;
219 mmd->mmd_hbuf = hdr_mp;
220
221 return (mmd);
222 }
223
224 /*
225 * Associate additional payload buffer to the Multidata.
226 */
227 int
mmd_addpldbuf(multidata_t * mmd,mblk_t * pld_mp)228 mmd_addpldbuf(multidata_t *mmd, mblk_t *pld_mp)
229 {
230 int i;
231
232 ASSERT(mmd != NULL);
233 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
234 ASSERT(pld_mp != NULL);
235
236 mutex_enter(&mmd->mmd_pd_slab_lock);
237 for (i = 0; i < MULTIDATA_MAX_PBUFS &&
238 mmd->mmd_pbuf_cnt < MULTIDATA_MAX_PBUFS; i++) {
239 if (mmd->mmd_pbuf[i] == pld_mp) {
240 /* duplicate entry */
241 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding "
242 "pld 0x%p to mmd 0x%p since it has been "
243 "previously added into slot %d (total %d)\n",
244 (void *)pld_mp, (void *)mmd, i, mmd->mmd_pbuf_cnt));
245 mutex_exit(&mmd->mmd_pd_slab_lock);
246 return (-1);
247 } else if (mmd->mmd_pbuf[i] == NULL) {
248 mmd->mmd_pbuf[i] = pld_mp;
249 mmd->mmd_pbuf_cnt++;
250 mutex_exit(&mmd->mmd_pd_slab_lock);
251 return (i);
252 }
253 }
254
255 /* all slots are taken */
256 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding pld 0x%p to mmd 0x%p "
257 "since no slot space is left (total %d max %d)\n", (void *)pld_mp,
258 (void *)mmd, mmd->mmd_pbuf_cnt, MULTIDATA_MAX_PBUFS));
259 mutex_exit(&mmd->mmd_pd_slab_lock);
260
261 return (-1);
262 }
263
264 /*
265 * Multidata metadata kmem cache constructor routine.
266 */
267 /* ARGSUSED */
268 static int
mmd_constructor(void * buf,void * cdrarg,int kmflags)269 mmd_constructor(void *buf, void *cdrarg, int kmflags)
270 {
271 struct mmd_buf_info *buf_info;
272 multidata_t *mmd;
273
274 bzero((void *)buf, MMD_CACHE_SIZE);
275
276 buf_info = (struct mmd_buf_info *)buf;
277 buf_info->frp.free_func = mmd_esballoc_free;
278 buf_info->buf_len = MMD_CACHE_SIZE;
279
280 mmd = (multidata_t *)(buf_info + 1);
281 mmd->mmd_magic = MULTIDATA_MAGIC;
282
283 mutex_init(&(mmd->mmd_pd_slab_lock), NULL, MUTEX_DRIVER, NULL);
284 QL_INIT(&(mmd->mmd_pd_slab_q));
285 QL_INIT(&(mmd->mmd_pd_q));
286
287 return (0);
288 }
289
290 /*
291 * Multidata metadata kmem cache destructor routine.
292 */
293 /* ARGSUSED */
294 static void
mmd_destructor(void * buf,void * cdrarg)295 mmd_destructor(void *buf, void *cdrarg)
296 {
297 multidata_t *mmd;
298 #ifdef DEBUG
299 int i;
300 #endif
301
302 mmd = (multidata_t *)((uchar_t *)buf + sizeof (struct mmd_buf_info));
303
304 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
305 ASSERT(mmd->mmd_dp == NULL);
306 ASSERT(mmd->mmd_hbuf == NULL);
307 ASSERT(mmd->mmd_pbuf_cnt == 0);
308 #ifdef DEBUG
309 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++)
310 ASSERT(mmd->mmd_pbuf[i] == NULL);
311 #endif
312 ASSERT(mmd->mmd_pattbl == NULL);
313
314 mutex_destroy(&(mmd->mmd_pd_slab_lock));
315 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q));
316 ASSERT(mmd->mmd_slab_cnt == 0);
317 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q));
318 ASSERT(mmd->mmd_pd_cnt == 0);
319 ASSERT(mmd->mmd_hbuf_ref == 0);
320 ASSERT(mmd->mmd_pbuf_ref == 0);
321 }
322
323 /*
324 * Multidata message block free callback routine.
325 */
326 static void
mmd_esballoc_free(caddr_t buf)327 mmd_esballoc_free(caddr_t buf)
328 {
329 multidata_t *mmd;
330 pdesc_t *pd;
331 pdesc_slab_t *slab;
332 int i;
333
334 ASSERT(buf != NULL);
335 ASSERT(((struct mmd_buf_info *)buf)->buf_len == MMD_CACHE_SIZE);
336
337 mmd = (multidata_t *)(buf + sizeof (struct mmd_buf_info));
338 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
339
340 ASSERT(mmd->mmd_dp != NULL);
341 ASSERT(mmd->mmd_dp->db_ref == 1);
342
343 /* remove all packet descriptors and private attributes */
344 pd = Q2PD(mmd->mmd_pd_q.ql_next);
345 while (pd != Q2PD(&(mmd->mmd_pd_q)))
346 pd = mmd_destroy_pdesc(mmd, pd);
347
348 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q));
349 ASSERT(mmd->mmd_pd_cnt == 0);
350 ASSERT(mmd->mmd_hbuf_ref == 0);
351 ASSERT(mmd->mmd_pbuf_ref == 0);
352
353 /* remove all global attributes */
354 if (mmd->mmd_pattbl != NULL)
355 mmd_destroy_pattbl(&(mmd->mmd_pattbl));
356
357 /* remove all descriptor slabs */
358 slab = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_next);
359 while (slab != Q2PDSLAB(&(mmd->mmd_pd_slab_q))) {
360 pdesc_slab_t *slab_next = Q2PDSLAB(slab->pds_next);
361
362 remque(&(slab->pds_next));
363 slab->pds_next = NULL;
364 slab->pds_prev = NULL;
365 slab->pds_mmd = NULL;
366 slab->pds_used = 0;
367 kmem_cache_free(pd_slab_cache, slab);
368
369 ASSERT(mmd->mmd_slab_cnt > 0);
370 mmd->mmd_slab_cnt--;
371 slab = slab_next;
372 }
373 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q));
374 ASSERT(mmd->mmd_slab_cnt == 0);
375
376 mmd->mmd_dp = NULL;
377
378 /* finally, free all associated message blocks */
379 if (mmd->mmd_hbuf != NULL) {
380 freeb(mmd->mmd_hbuf);
381 mmd->mmd_hbuf = NULL;
382 }
383
384 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) {
385 if (mmd->mmd_pbuf[i] != NULL) {
386 freeb(mmd->mmd_pbuf[i]);
387 mmd->mmd_pbuf[i] = NULL;
388 ASSERT(mmd->mmd_pbuf_cnt > 0);
389 mmd->mmd_pbuf_cnt--;
390 }
391 }
392
393 ASSERT(mmd->mmd_pbuf_cnt == 0);
394 ASSERT(MUTEX_NOT_HELD(&(mmd->mmd_pd_slab_lock)));
395 kmem_cache_free(mmd_cache, buf);
396 }
397
398 /*
399 * Multidata message block copy routine, called by copyb() when it
400 * encounters a M_MULTIDATA data block type. This routine should
401 * not be called by anyone other than copyb(), since it may go away
402 * (read: become static to this module) once some sort of copy callback
403 * routine is made available.
404 */
405 mblk_t *
mmd_copy(mblk_t * bp,int kmflags)406 mmd_copy(mblk_t *bp, int kmflags)
407 {
408 multidata_t *mmd, *n_mmd;
409 mblk_t *n_hbuf = NULL, *n_pbuf[MULTIDATA_MAX_PBUFS];
410 mblk_t **pmp_last = &n_pbuf[MULTIDATA_MAX_PBUFS - 1];
411 mblk_t **pmp;
412 mblk_t *n_bp = NULL;
413 pdesc_t *pd;
414 uint_t n_pbuf_cnt = 0;
415 int idx, i;
416
417 #define FREE_PBUFS() { \
418 for (pmp = &n_pbuf[0]; pmp <= pmp_last; pmp++) \
419 if (*pmp != NULL) freeb(*pmp); \
420 }
421
422 #define REL_OFF(p, base, n_base) \
423 ((uchar_t *)(n_base) + ((uchar_t *)(p) - (uchar_t *)base))
424
425 ASSERT(bp != NULL && DB_TYPE(bp) == M_MULTIDATA);
426 mmd = mmd_getmultidata(bp);
427
428 /* copy the header buffer */
429 if (mmd->mmd_hbuf != NULL && (n_hbuf = copyb(mmd->mmd_hbuf)) == NULL)
430 return (NULL);
431
432 /* copy the payload buffer(s) */
433 mutex_enter(&mmd->mmd_pd_slab_lock);
434 bzero((void *)&n_pbuf[0], sizeof (mblk_t *) * MULTIDATA_MAX_PBUFS);
435 n_pbuf_cnt = mmd->mmd_pbuf_cnt;
436 for (i = 0; i < n_pbuf_cnt; i++) {
437 ASSERT(mmd->mmd_pbuf[i] != NULL);
438 n_pbuf[i] = copyb(mmd->mmd_pbuf[i]);
439 if (n_pbuf[i] == NULL) {
440 FREE_PBUFS();
441 mutex_exit(&mmd->mmd_pd_slab_lock);
442 return (NULL);
443 }
444 }
445
446 /* allocate new Multidata */
447 n_mmd = mmd_alloc(n_hbuf, &n_bp, kmflags);
448 if (n_mmd == NULL) {
449 if (n_hbuf != NULL)
450 freeb(n_hbuf);
451 if (n_pbuf_cnt != 0)
452 FREE_PBUFS();
453 mutex_exit(&mmd->mmd_pd_slab_lock);
454 return (NULL);
455 }
456
457 /*
458 * Add payload buffer(s); upon success, leave n_pbuf array
459 * alone, as the newly-created Multidata had already contained
460 * the mblk pointers stored in the array. These will be freed
461 * along with the Multidata itself.
462 */
463 for (i = 0, pmp = &n_pbuf[0]; i < n_pbuf_cnt; i++, pmp++) {
464 idx = mmd_addpldbuf(n_mmd, *pmp);
465 if (idx < 0) {
466 FREE_PBUFS();
467 freeb(n_bp);
468 mutex_exit(&mmd->mmd_pd_slab_lock);
469 return (NULL);
470 }
471 }
472
473 /* copy over global attributes */
474 if (mmd->mmd_pattbl != NULL &&
475 mmd_copy_pattbl(mmd->mmd_pattbl, n_mmd, NULL, kmflags) < 0) {
476 freeb(n_bp);
477 mutex_exit(&mmd->mmd_pd_slab_lock);
478 return (NULL);
479 }
480
481 /* copy over packet descriptors and their atttributes */
482 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE); /* first pdesc */
483 while (pd != NULL) {
484 pdesc_t *n_pd;
485 pdescinfo_t *pdi, n_pdi;
486 uchar_t *n_base, *base;
487 pdesc_t *pd_next;
488
489 /* next pdesc */
490 pd_next = mmd_getpdesc(pd->pd_slab->pds_mmd, pd, NULL,
491 1, B_TRUE);
492
493 /* skip if already removed */
494 if (pd->pd_flags & PDESC_REM_DEFER) {
495 pd = pd_next;
496 continue;
497 }
498
499 pdi = &(pd->pd_pdi);
500 bzero(&n_pdi, sizeof (n_pdi));
501
502 /*
503 * Calculate new descriptor values based on the offset of
504 * each pointer relative to the associated buffer(s).
505 */
506 ASSERT(pdi->flags & PDESC_HAS_REF);
507 if (pdi->flags & PDESC_HBUF_REF) {
508 n_base = n_mmd->mmd_hbuf->b_rptr;
509 base = mmd->mmd_hbuf->b_rptr;
510
511 n_pdi.flags |= PDESC_HBUF_REF;
512 n_pdi.hdr_base = REL_OFF(pdi->hdr_base, base, n_base);
513 n_pdi.hdr_rptr = REL_OFF(pdi->hdr_rptr, base, n_base);
514 n_pdi.hdr_wptr = REL_OFF(pdi->hdr_wptr, base, n_base);
515 n_pdi.hdr_lim = REL_OFF(pdi->hdr_lim, base, n_base);
516 }
517
518 if (pdi->flags & PDESC_PBUF_REF) {
519 n_pdi.flags |= PDESC_PBUF_REF;
520 n_pdi.pld_cnt = pdi->pld_cnt;
521
522 for (i = 0; i < pdi->pld_cnt; i++) {
523 idx = pdi->pld_ary[i].pld_pbuf_idx;
524 ASSERT(idx < MULTIDATA_MAX_PBUFS);
525 ASSERT(n_mmd->mmd_pbuf[idx] != NULL);
526 ASSERT(mmd->mmd_pbuf[idx] != NULL);
527
528 n_base = n_mmd->mmd_pbuf[idx]->b_rptr;
529 base = mmd->mmd_pbuf[idx]->b_rptr;
530
531 n_pdi.pld_ary[i].pld_pbuf_idx = idx;
532
533 /*
534 * We can't copy the pointers just like that,
535 * so calculate the relative offset.
536 */
537 n_pdi.pld_ary[i].pld_rptr =
538 REL_OFF(pdi->pld_ary[i].pld_rptr,
539 base, n_base);
540 n_pdi.pld_ary[i].pld_wptr =
541 REL_OFF(pdi->pld_ary[i].pld_wptr,
542 base, n_base);
543 }
544 }
545
546 /* add the new descriptor to the new Multidata */
547 n_pd = mmd_addpdesc_int(n_mmd, &n_pdi, NULL, kmflags);
548
549 if (n_pd == NULL || (pd->pd_pattbl != NULL &&
550 mmd_copy_pattbl(pd->pd_pattbl, n_mmd, n_pd, kmflags) < 0)) {
551 freeb(n_bp);
552 mutex_exit(&mmd->mmd_pd_slab_lock);
553 return (NULL);
554 }
555
556 pd = pd_next;
557 }
558 #undef REL_OFF
559 #undef FREE_PBUFS
560
561 mutex_exit(&mmd->mmd_pd_slab_lock);
562 return (n_bp);
563 }
564
565 /*
566 * Given a Multidata message block, return the Multidata metadata handle.
567 */
568 multidata_t *
mmd_getmultidata(mblk_t * mp)569 mmd_getmultidata(mblk_t *mp)
570 {
571 multidata_t *mmd;
572
573 ASSERT(mp != NULL);
574
575 if (DB_TYPE(mp) != M_MULTIDATA)
576 return (NULL);
577
578 mmd = (multidata_t *)mp->b_rptr;
579 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
580
581 return (mmd);
582 }
583
584 /*
585 * Return the start and end addresses of the associated buffer(s).
586 */
587 void
mmd_getregions(multidata_t * mmd,mbufinfo_t * mbi)588 mmd_getregions(multidata_t *mmd, mbufinfo_t *mbi)
589 {
590 int i;
591
592 ASSERT(mmd != NULL);
593 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
594 ASSERT(mbi != NULL);
595
596 bzero((void *)mbi, sizeof (mbufinfo_t));
597
598 if (mmd->mmd_hbuf != NULL) {
599 mbi->hbuf_rptr = mmd->mmd_hbuf->b_rptr;
600 mbi->hbuf_wptr = mmd->mmd_hbuf->b_wptr;
601 }
602
603 mutex_enter(&mmd->mmd_pd_slab_lock);
604 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
605 ASSERT(mmd->mmd_pbuf[i] != NULL);
606 mbi->pbuf_ary[i].pbuf_rptr = mmd->mmd_pbuf[i]->b_rptr;
607 mbi->pbuf_ary[i].pbuf_wptr = mmd->mmd_pbuf[i]->b_wptr;
608
609 }
610 mbi->pbuf_cnt = mmd->mmd_pbuf_cnt;
611 mutex_exit(&mmd->mmd_pd_slab_lock);
612 }
613
614 /*
615 * Return the Multidata statistics.
616 */
617 uint_t
mmd_getcnt(multidata_t * mmd,uint_t * hbuf_ref,uint_t * pbuf_ref)618 mmd_getcnt(multidata_t *mmd, uint_t *hbuf_ref, uint_t *pbuf_ref)
619 {
620 uint_t pd_cnt;
621
622 ASSERT(mmd != NULL);
623 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
624
625 mutex_enter(&(mmd->mmd_pd_slab_lock));
626 if (hbuf_ref != NULL)
627 *hbuf_ref = mmd->mmd_hbuf_ref;
628 if (pbuf_ref != NULL)
629 *pbuf_ref = mmd->mmd_pbuf_ref;
630 pd_cnt = mmd->mmd_pd_cnt;
631 mutex_exit(&(mmd->mmd_pd_slab_lock));
632
633 return (pd_cnt);
634 }
635
636 #define HBUF_REF_VALID(mmd, pdi) \
637 ((mmd)->mmd_hbuf != NULL && (pdi)->hdr_rptr != NULL && \
638 (pdi)->hdr_wptr != NULL && (pdi)->hdr_base != NULL && \
639 (pdi)->hdr_lim != NULL && (pdi)->hdr_lim >= (pdi)->hdr_base && \
640 (pdi)->hdr_wptr >= (pdi)->hdr_rptr && \
641 (pdi)->hdr_base <= (pdi)->hdr_rptr && \
642 (pdi)->hdr_lim >= (pdi)->hdr_wptr && \
643 (pdi)->hdr_base >= (mmd)->mmd_hbuf->b_rptr && \
644 MBLKIN((mmd)->mmd_hbuf, \
645 (pdi->hdr_base - (mmd)->mmd_hbuf->b_rptr), \
646 PDESC_HDRSIZE(pdi)))
647
648 /*
649 * Bounds check payload area(s).
650 */
651 static boolean_t
pbuf_ref_valid(multidata_t * mmd,pdescinfo_t * pdi)652 pbuf_ref_valid(multidata_t *mmd, pdescinfo_t *pdi)
653 {
654 int i = 0, idx;
655 boolean_t valid = B_TRUE;
656 struct pld_ary_s *pa;
657
658 mutex_enter(&mmd->mmd_pd_slab_lock);
659 if (pdi->pld_cnt == 0 || pdi->pld_cnt > mmd->mmd_pbuf_cnt) {
660 mutex_exit(&mmd->mmd_pd_slab_lock);
661 return (B_FALSE);
662 }
663
664 pa = &pdi->pld_ary[0];
665 while (valid && i < pdi->pld_cnt) {
666 valid = (((idx = pa->pld_pbuf_idx) < mmd->mmd_pbuf_cnt) &&
667 pa->pld_rptr != NULL && pa->pld_wptr != NULL &&
668 pa->pld_wptr >= pa->pld_rptr &&
669 pa->pld_rptr >= mmd->mmd_pbuf[idx]->b_rptr &&
670 MBLKIN(mmd->mmd_pbuf[idx], (pa->pld_rptr -
671 mmd->mmd_pbuf[idx]->b_rptr),
672 PDESC_PLD_SPAN_SIZE(pdi, i)));
673
674 if (!valid) {
675 MMD_DEBUG((CE_WARN,
676 "pbuf_ref_valid: pdi 0x%p pld out of bound; "
677 "index %d has pld_cnt %d pbuf_idx %d "
678 "(mmd_pbuf_cnt %d), "
679 "pld_rptr 0x%p pld_wptr 0x%p len %d "
680 "(valid 0x%p-0x%p len %d)\n", (void *)pdi,
681 i, pdi->pld_cnt, idx, mmd->mmd_pbuf_cnt,
682 (void *)pa->pld_rptr,
683 (void *)pa->pld_wptr,
684 (int)PDESC_PLD_SPAN_SIZE(pdi, i),
685 (void *)mmd->mmd_pbuf[idx]->b_rptr,
686 (void *)mmd->mmd_pbuf[idx]->b_wptr,
687 (int)MBLKL(mmd->mmd_pbuf[idx])));
688 }
689
690 /* advance to next entry */
691 i++;
692 pa++;
693 }
694
695 mutex_exit(&mmd->mmd_pd_slab_lock);
696 return (valid);
697 }
698
699 /*
700 * Add a packet descriptor to the Multidata.
701 */
702 pdesc_t *
mmd_addpdesc(multidata_t * mmd,pdescinfo_t * pdi,int * err,int kmflags)703 mmd_addpdesc(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags)
704 {
705 ASSERT(mmd != NULL);
706 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
707 ASSERT(pdi != NULL);
708 ASSERT(pdi->flags & PDESC_HAS_REF);
709
710 /* do the references refer to invalid memory regions? */
711 if (!mmd_speed_over_safety &&
712 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) ||
713 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) {
714 if (err != NULL)
715 *err = EINVAL;
716 return (NULL);
717 }
718
719 return (mmd_addpdesc_int(mmd, pdi, err, kmflags));
720 }
721
722 /*
723 * Internal routine to add a packet descriptor, called when mmd_addpdesc
724 * or mmd_copy tries to allocate and add a descriptor to a Multidata.
725 */
726 static pdesc_t *
mmd_addpdesc_int(multidata_t * mmd,pdescinfo_t * pdi,int * err,int kmflags)727 mmd_addpdesc_int(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags)
728 {
729 pdesc_slab_t *slab, *slab_last;
730 pdesc_t *pd;
731
732 ASSERT(pdi->flags & PDESC_HAS_REF);
733 ASSERT(!(pdi->flags & PDESC_HBUF_REF) || HBUF_REF_VALID(mmd, pdi));
734 ASSERT(!(pdi->flags & PDESC_PBUF_REF) || pbuf_ref_valid(mmd, pdi));
735
736 if (err != NULL)
737 *err = 0;
738
739 mutex_enter(&(mmd->mmd_pd_slab_lock));
740 /*
741 * Is slab list empty or the last-added slab is full? If so,
742 * allocate new slab for the descriptor; otherwise, use the
743 * last-added slab instead.
744 */
745 slab_last = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_prev);
746 if (mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q) ||
747 slab_last->pds_used == slab_last->pds_sz) {
748 slab = kmem_cache_alloc(pd_slab_cache, kmflags);
749 if (slab == NULL) {
750 if (err != NULL)
751 *err = ENOMEM;
752 mutex_exit(&(mmd->mmd_pd_slab_lock));
753 return (NULL);
754 }
755 slab->pds_mmd = mmd;
756
757 ASSERT(slab->pds_used == 0);
758 ASSERT(slab->pds_next == NULL && slab->pds_prev == NULL);
759
760 /* insert slab at end of list */
761 insque(&(slab->pds_next), mmd->mmd_pd_slab_q.ql_prev);
762 mmd->mmd_slab_cnt++;
763 } else {
764 slab = slab_last;
765 }
766 ASSERT(slab->pds_used < slab->pds_sz);
767 pd = &(slab->pds_free_desc[slab->pds_used++]);
768 ASSERT(pd->pd_magic == PDESC_MAGIC);
769 pd->pd_next = NULL;
770 pd->pd_prev = NULL;
771 pd->pd_slab = slab;
772 pd->pd_pattbl = NULL;
773
774 /* copy over the descriptor info from caller */
775 PDI_COPY(pdi, &(pd->pd_pdi));
776
777 if (pd->pd_flags & PDESC_HBUF_REF)
778 mmd->mmd_hbuf_ref++;
779 if (pd->pd_flags & PDESC_PBUF_REF)
780 mmd->mmd_pbuf_ref += pd->pd_pdi.pld_cnt;
781 mmd->mmd_pd_cnt++;
782
783 /* insert descriptor at end of list */
784 insque(&(pd->pd_next), mmd->mmd_pd_q.ql_prev);
785 mutex_exit(&(mmd->mmd_pd_slab_lock));
786
787 return (pd);
788 }
789
790 /*
791 * Packet descriptor slab kmem cache constructor routine.
792 */
793 /* ARGSUSED */
794 static int
pdslab_constructor(void * buf,void * cdrarg,int kmflags)795 pdslab_constructor(void *buf, void *cdrarg, int kmflags)
796 {
797 pdesc_slab_t *slab;
798 uint_t cnt = (uint_t)(uintptr_t)cdrarg;
799 int i;
800
801 ASSERT(cnt > 0); /* slab size can't be zero */
802
803 slab = (pdesc_slab_t *)buf;
804 slab->pds_next = NULL;
805 slab->pds_prev = NULL;
806 slab->pds_mmd = NULL;
807 slab->pds_used = 0;
808 slab->pds_sz = cnt;
809
810 for (i = 0; i < cnt; i++) {
811 pdesc_t *pd = &(slab->pds_free_desc[i]);
812 pd->pd_magic = PDESC_MAGIC;
813 }
814 return (0);
815 }
816
817 /*
818 * Packet descriptor slab kmem cache destructor routine.
819 */
820 /* ARGSUSED */
821 static void
pdslab_destructor(void * buf,void * cdrarg)822 pdslab_destructor(void *buf, void *cdrarg)
823 {
824 pdesc_slab_t *slab;
825
826 slab = (pdesc_slab_t *)buf;
827 ASSERT(slab->pds_next == NULL);
828 ASSERT(slab->pds_prev == NULL);
829 ASSERT(slab->pds_mmd == NULL);
830 ASSERT(slab->pds_used == 0);
831 ASSERT(slab->pds_sz > 0);
832 }
833
834 /*
835 * Remove a packet descriptor from the in-use descriptor list,
836 * called by mmd_rempdesc or during free.
837 */
838 static pdesc_t *
mmd_destroy_pdesc(multidata_t * mmd,pdesc_t * pd)839 mmd_destroy_pdesc(multidata_t *mmd, pdesc_t *pd)
840 {
841 pdesc_t *pd_next;
842
843 pd_next = Q2PD(pd->pd_next);
844 remque(&(pd->pd_next));
845
846 /* remove all local attributes */
847 if (pd->pd_pattbl != NULL)
848 mmd_destroy_pattbl(&(pd->pd_pattbl));
849
850 /* don't decrease counts for a removed descriptor */
851 if (!(pd->pd_flags & PDESC_REM_DEFER)) {
852 if (pd->pd_flags & PDESC_HBUF_REF) {
853 ASSERT(mmd->mmd_hbuf_ref > 0);
854 mmd->mmd_hbuf_ref--;
855 }
856 if (pd->pd_flags & PDESC_PBUF_REF) {
857 ASSERT(mmd->mmd_pbuf_ref > 0);
858 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt;
859 }
860 ASSERT(mmd->mmd_pd_cnt > 0);
861 mmd->mmd_pd_cnt--;
862 }
863 return (pd_next);
864 }
865
866 /*
867 * Remove a packet descriptor from the Multidata.
868 */
869 void
mmd_rempdesc(pdesc_t * pd)870 mmd_rempdesc(pdesc_t *pd)
871 {
872 multidata_t *mmd;
873
874 ASSERT(pd->pd_magic == PDESC_MAGIC);
875 ASSERT(pd->pd_slab != NULL);
876
877 mmd = pd->pd_slab->pds_mmd;
878 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
879
880 mutex_enter(&(mmd->mmd_pd_slab_lock));
881 /*
882 * We can't deallocate the associated resources if the Multidata
883 * is shared with other threads, because it's possible that the
884 * descriptor handle value is held by those threads. That's why
885 * we simply mark the entry as "removed" and decrement the counts.
886 * If there are no other threads, then we free the descriptor.
887 */
888 if (mmd->mmd_dp->db_ref > 1) {
889 pd->pd_flags |= PDESC_REM_DEFER;
890 if (pd->pd_flags & PDESC_HBUF_REF) {
891 ASSERT(mmd->mmd_hbuf_ref > 0);
892 mmd->mmd_hbuf_ref--;
893 }
894 if (pd->pd_flags & PDESC_PBUF_REF) {
895 ASSERT(mmd->mmd_pbuf_ref > 0);
896 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt;
897 }
898 ASSERT(mmd->mmd_pd_cnt > 0);
899 mmd->mmd_pd_cnt--;
900 } else {
901 (void) mmd_destroy_pdesc(mmd, pd);
902 }
903 mutex_exit(&(mmd->mmd_pd_slab_lock));
904 }
905
906 /*
907 * A generic routine to traverse the packet descriptor in-use list.
908 */
909 static pdesc_t *
mmd_getpdesc(multidata_t * mmd,pdesc_t * pd,pdescinfo_t * pdi,uint_t forw,boolean_t mutex_held)910 mmd_getpdesc(multidata_t *mmd, pdesc_t *pd, pdescinfo_t *pdi, uint_t forw,
911 boolean_t mutex_held)
912 {
913 pdesc_t *pd_head;
914
915 ASSERT(pd == NULL || pd->pd_slab->pds_mmd == mmd);
916 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
917 ASSERT(!mutex_held || MUTEX_HELD(&(mmd->mmd_pd_slab_lock)));
918
919 if (!mutex_held)
920 mutex_enter(&(mmd->mmd_pd_slab_lock));
921 pd_head = Q2PD(&(mmd->mmd_pd_q));
922
923 if (pd == NULL) {
924 /*
925 * We're called by mmd_get{first,last}pdesc, and so
926 * return either the first or last list element.
927 */
928 pd = forw ? Q2PD(mmd->mmd_pd_q.ql_next) :
929 Q2PD(mmd->mmd_pd_q.ql_prev);
930 } else {
931 /*
932 * We're called by mmd_get{next,prev}pdesc, and so
933 * return either the next or previous list element.
934 */
935 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev);
936 }
937
938 while (pd != pd_head) {
939 /* skip element if it has been removed */
940 if (!(pd->pd_flags & PDESC_REM_DEFER))
941 break;
942 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev);
943 }
944 if (!mutex_held)
945 mutex_exit(&(mmd->mmd_pd_slab_lock));
946
947 /* return NULL if we're back at the beginning */
948 if (pd == pd_head)
949 pd = NULL;
950
951 /* got an entry; copy descriptor info to caller */
952 if (pd != NULL && pdi != NULL)
953 PDI_COPY(&(pd->pd_pdi), pdi);
954
955 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC);
956 return (pd);
957
958 }
959
960 /*
961 * Return the first packet descriptor in the in-use list.
962 */
963 pdesc_t *
mmd_getfirstpdesc(multidata_t * mmd,pdescinfo_t * pdi)964 mmd_getfirstpdesc(multidata_t *mmd, pdescinfo_t *pdi)
965 {
966 return (mmd_getpdesc(mmd, NULL, pdi, 1, B_FALSE));
967 }
968
969 /*
970 * Return the last packet descriptor in the in-use list.
971 */
972 pdesc_t *
mmd_getlastpdesc(multidata_t * mmd,pdescinfo_t * pdi)973 mmd_getlastpdesc(multidata_t *mmd, pdescinfo_t *pdi)
974 {
975 return (mmd_getpdesc(mmd, NULL, pdi, 0, B_FALSE));
976 }
977
978 /*
979 * Return the next packet descriptor in the in-use list.
980 */
981 pdesc_t *
mmd_getnextpdesc(pdesc_t * pd,pdescinfo_t * pdi)982 mmd_getnextpdesc(pdesc_t *pd, pdescinfo_t *pdi)
983 {
984 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 1, B_FALSE));
985 }
986
987 /*
988 * Return the previous packet descriptor in the in-use list.
989 */
990 pdesc_t *
mmd_getprevpdesc(pdesc_t * pd,pdescinfo_t * pdi)991 mmd_getprevpdesc(pdesc_t *pd, pdescinfo_t *pdi)
992 {
993 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 0, B_FALSE));
994 }
995
996 /*
997 * Check to see if pdi stretches over c_pdi; used to ensure that a packet
998 * descriptor's header and payload span may not be extended beyond the
999 * current boundaries.
1000 */
1001 static boolean_t
pdi_in_range(pdescinfo_t * pdi,pdescinfo_t * c_pdi)1002 pdi_in_range(pdescinfo_t *pdi, pdescinfo_t *c_pdi)
1003 {
1004 int i;
1005 struct pld_ary_s *pa = &pdi->pld_ary[0];
1006 struct pld_ary_s *c_pa = &c_pdi->pld_ary[0];
1007
1008 if (pdi->hdr_base < c_pdi->hdr_base || pdi->hdr_lim > c_pdi->hdr_lim)
1009 return (B_FALSE);
1010
1011 /*
1012 * We don't allow the number of span to be reduced, for the sake
1013 * of simplicity. Instead, we provide PDESC_PLD_SPAN_CLEAR() to
1014 * clear a packet descriptor. Note that we allow the span count to
1015 * be increased, and the bounds check for the new one happens
1016 * in pbuf_ref_valid.
1017 */
1018 if (pdi->pld_cnt < c_pdi->pld_cnt)
1019 return (B_FALSE);
1020
1021 /* compare only those which are currently defined */
1022 for (i = 0; i < c_pdi->pld_cnt; i++, pa++, c_pa++) {
1023 if (pa->pld_pbuf_idx != c_pa->pld_pbuf_idx ||
1024 pa->pld_rptr < c_pa->pld_rptr ||
1025 pa->pld_wptr > c_pa->pld_wptr)
1026 return (B_FALSE);
1027 }
1028 return (B_TRUE);
1029 }
1030
1031 /*
1032 * Modify the layout of a packet descriptor.
1033 */
1034 pdesc_t *
mmd_adjpdesc(pdesc_t * pd,pdescinfo_t * pdi)1035 mmd_adjpdesc(pdesc_t *pd, pdescinfo_t *pdi)
1036 {
1037 multidata_t *mmd;
1038 pdescinfo_t *c_pdi;
1039
1040 ASSERT(pd != NULL);
1041 ASSERT(pdi != NULL);
1042 ASSERT(pd->pd_magic == PDESC_MAGIC);
1043
1044 mmd = pd->pd_slab->pds_mmd;
1045 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1046
1047 /* entry has been removed */
1048 if (pd->pd_flags & PDESC_REM_DEFER)
1049 return (NULL);
1050
1051 /* caller doesn't intend to specify any buffer reference? */
1052 if (!(pdi->flags & PDESC_HAS_REF))
1053 return (NULL);
1054
1055 /* do the references refer to invalid memory regions? */
1056 if (!mmd_speed_over_safety &&
1057 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) ||
1058 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi))))
1059 return (NULL);
1060
1061 /* they're not subsets of current references? */
1062 c_pdi = &(pd->pd_pdi);
1063 if (!pdi_in_range(pdi, c_pdi))
1064 return (NULL);
1065
1066 /* copy over the descriptor info from caller */
1067 PDI_COPY(pdi, c_pdi);
1068
1069 return (pd);
1070 }
1071
1072 /*
1073 * Copy the contents of a packet descriptor into a new buffer. If the
1074 * descriptor points to more than one buffer fragments, the contents
1075 * of both fragments will be joined, with the header buffer fragment
1076 * preceding the payload buffer fragment(s).
1077 */
1078 mblk_t *
mmd_transform(pdesc_t * pd)1079 mmd_transform(pdesc_t *pd)
1080 {
1081 multidata_t *mmd;
1082 pdescinfo_t *pdi;
1083 mblk_t *mp;
1084 int h_size = 0, p_size = 0;
1085 int i, len;
1086
1087 ASSERT(pd != NULL);
1088 ASSERT(pd->pd_magic == PDESC_MAGIC);
1089
1090 mmd = pd->pd_slab->pds_mmd;
1091 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1092
1093 /* entry has been removed */
1094 if (pd->pd_flags & PDESC_REM_DEFER)
1095 return (NULL);
1096
1097 mutex_enter(&mmd->mmd_pd_slab_lock);
1098 pdi = &(pd->pd_pdi);
1099 if (pdi->flags & PDESC_HBUF_REF)
1100 h_size = PDESC_HDRL(pdi);
1101 if (pdi->flags & PDESC_PBUF_REF) {
1102 for (i = 0; i < pdi->pld_cnt; i++)
1103 p_size += PDESC_PLD_SPAN_SIZE(pdi, i);
1104 }
1105
1106 /* allocate space large enough to hold the fragment(s) */
1107 ASSERT(h_size + p_size >= 0);
1108 if ((mp = allocb(h_size + p_size, BPRI_HI)) == NULL) {
1109 mutex_exit(&mmd->mmd_pd_slab_lock);
1110 return (NULL);
1111 }
1112
1113 /* copy over the header fragment */
1114 if ((pdi->flags & PDESC_HBUF_REF) && h_size > 0) {
1115 bcopy(pdi->hdr_rptr, mp->b_wptr, h_size);
1116 mp->b_wptr += h_size;
1117 }
1118
1119 /* copy over the payload fragment */
1120 if ((pdi->flags & PDESC_PBUF_REF) && p_size > 0) {
1121 for (i = 0; i < pdi->pld_cnt; i++) {
1122 len = PDESC_PLD_SPAN_SIZE(pdi, i);
1123 if (len > 0) {
1124 bcopy(pdi->pld_ary[i].pld_rptr,
1125 mp->b_wptr, len);
1126 mp->b_wptr += len;
1127 }
1128 }
1129 }
1130
1131 mutex_exit(&mmd->mmd_pd_slab_lock);
1132 return (mp);
1133 }
1134
1135 /*
1136 * Return a chain of mblks representing the Multidata packet.
1137 */
1138 mblk_t *
mmd_transform_link(pdesc_t * pd)1139 mmd_transform_link(pdesc_t *pd)
1140 {
1141 multidata_t *mmd;
1142 pdescinfo_t *pdi;
1143 mblk_t *nmp = NULL;
1144
1145 ASSERT(pd != NULL);
1146 ASSERT(pd->pd_magic == PDESC_MAGIC);
1147
1148 mmd = pd->pd_slab->pds_mmd;
1149 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1150
1151 /* entry has been removed */
1152 if (pd->pd_flags & PDESC_REM_DEFER)
1153 return (NULL);
1154
1155 pdi = &(pd->pd_pdi);
1156
1157 /* duplicate header buffer */
1158 if ((pdi->flags & PDESC_HBUF_REF)) {
1159 if ((nmp = dupb(mmd->mmd_hbuf)) == NULL)
1160 return (NULL);
1161 nmp->b_rptr = pdi->hdr_rptr;
1162 nmp->b_wptr = pdi->hdr_wptr;
1163 }
1164
1165 /* duplicate payload buffer(s) */
1166 if (pdi->flags & PDESC_PBUF_REF) {
1167 int i;
1168 mblk_t *mp;
1169 struct pld_ary_s *pa = &pdi->pld_ary[0];
1170
1171 mutex_enter(&mmd->mmd_pd_slab_lock);
1172 for (i = 0; i < pdi->pld_cnt; i++, pa++) {
1173 ASSERT(mmd->mmd_pbuf[pa->pld_pbuf_idx] != NULL);
1174
1175 /* skip empty ones */
1176 if (PDESC_PLD_SPAN_SIZE(pdi, i) == 0)
1177 continue;
1178
1179 mp = dupb(mmd->mmd_pbuf[pa->pld_pbuf_idx]);
1180 if (mp == NULL) {
1181 if (nmp != NULL)
1182 freemsg(nmp);
1183 mutex_exit(&mmd->mmd_pd_slab_lock);
1184 return (NULL);
1185 }
1186 mp->b_rptr = pa->pld_rptr;
1187 mp->b_wptr = pa->pld_wptr;
1188 if (nmp == NULL)
1189 nmp = mp;
1190 else
1191 linkb(nmp, mp);
1192 }
1193 mutex_exit(&mmd->mmd_pd_slab_lock);
1194 }
1195
1196 return (nmp);
1197 }
1198
1199 /*
1200 * Return duplicate message block(s) of the associated buffer(s).
1201 */
1202 int
mmd_dupbufs(multidata_t * mmd,mblk_t ** hmp,mblk_t ** pmp)1203 mmd_dupbufs(multidata_t *mmd, mblk_t **hmp, mblk_t **pmp)
1204 {
1205 ASSERT(mmd != NULL);
1206 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1207
1208 if (hmp != NULL) {
1209 *hmp = NULL;
1210 if (mmd->mmd_hbuf != NULL &&
1211 (*hmp = dupb(mmd->mmd_hbuf)) == NULL)
1212 return (-1);
1213 }
1214
1215 if (pmp != NULL) {
1216 int i;
1217 mblk_t *mp;
1218
1219 mutex_enter(&mmd->mmd_pd_slab_lock);
1220 *pmp = NULL;
1221 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
1222 ASSERT(mmd->mmd_pbuf[i] != NULL);
1223 mp = dupb(mmd->mmd_pbuf[i]);
1224 if (mp == NULL) {
1225 if (hmp != NULL && *hmp != NULL)
1226 freeb(*hmp);
1227 if (*pmp != NULL)
1228 freemsg(*pmp);
1229 mutex_exit(&mmd->mmd_pd_slab_lock);
1230 return (-1);
1231 }
1232 if (*pmp == NULL)
1233 *pmp = mp;
1234 else
1235 linkb(*pmp, mp);
1236 }
1237 mutex_exit(&mmd->mmd_pd_slab_lock);
1238 }
1239
1240 return (0);
1241 }
1242
1243 /*
1244 * Return the layout of a packet descriptor.
1245 */
1246 int
mmd_getpdescinfo(pdesc_t * pd,pdescinfo_t * pdi)1247 mmd_getpdescinfo(pdesc_t *pd, pdescinfo_t *pdi)
1248 {
1249 ASSERT(pd != NULL);
1250 ASSERT(pd->pd_magic == PDESC_MAGIC);
1251 ASSERT(pd->pd_slab != NULL);
1252 ASSERT(pd->pd_slab->pds_mmd->mmd_magic == MULTIDATA_MAGIC);
1253 ASSERT(pdi != NULL);
1254
1255 /* entry has been removed */
1256 if (pd->pd_flags & PDESC_REM_DEFER)
1257 return (-1);
1258
1259 /* copy descriptor info to caller */
1260 PDI_COPY(&(pd->pd_pdi), pdi);
1261
1262 return (0);
1263 }
1264
1265 /*
1266 * Add a global or local attribute to a Multidata. Global attribute
1267 * association is specified by a NULL packet descriptor.
1268 */
1269 pattr_t *
mmd_addpattr(multidata_t * mmd,pdesc_t * pd,pattrinfo_t * pai,boolean_t persistent,int kmflags)1270 mmd_addpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai,
1271 boolean_t persistent, int kmflags)
1272 {
1273 patbkt_t **tbl_p;
1274 patbkt_t *tbl, *o_tbl;
1275 patbkt_t *bkt;
1276 pattr_t *pa;
1277 uint_t size;
1278
1279 ASSERT(mmd != NULL);
1280 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1281 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC);
1282 ASSERT(pai != NULL);
1283
1284 /* pointer to the attribute hash table (local or global) */
1285 tbl_p = pd != NULL ? &(pd->pd_pattbl) : &(mmd->mmd_pattbl);
1286
1287 /*
1288 * See if the hash table has not yet been created; if so,
1289 * we create the table and store its address atomically.
1290 */
1291 if ((tbl = *tbl_p) == NULL) {
1292 tbl = kmem_cache_alloc(pattbl_cache, kmflags);
1293 if (tbl == NULL)
1294 return (NULL);
1295
1296 /* if someone got there first, use his table instead */
1297 if ((o_tbl = casptr(tbl_p, NULL, tbl)) != NULL) {
1298 kmem_cache_free(pattbl_cache, tbl);
1299 tbl = o_tbl;
1300 }
1301 }
1302
1303 ASSERT(tbl->pbkt_tbl_sz > 0);
1304 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]);
1305
1306 /* attribute of the same type already exists? */
1307 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL)
1308 return (NULL);
1309
1310 size = sizeof (*pa) + pai->len;
1311 if ((pa = kmem_zalloc(size, kmflags)) == NULL)
1312 return (NULL);
1313
1314 pa->pat_magic = PATTR_MAGIC;
1315 pa->pat_lock = &(bkt->pbkt_lock);
1316 pa->pat_mmd = mmd;
1317 pa->pat_buflen = size;
1318 pa->pat_type = pai->type;
1319 pai->buf = pai->len > 0 ? ((uchar_t *)(pa + 1)) : NULL;
1320
1321 if (persistent)
1322 pa->pat_flags = PATTR_PERSIST;
1323
1324 /* insert attribute at end of hash chain */
1325 mutex_enter(&(bkt->pbkt_lock));
1326 insque(&(pa->pat_next), bkt->pbkt_pattr_q.ql_prev);
1327 mutex_exit(&(bkt->pbkt_lock));
1328
1329 return (pa);
1330 }
1331
1332 /*
1333 * Attribute hash table kmem cache constructor routine.
1334 */
1335 /* ARGSUSED */
1336 static int
pattbl_constructor(void * buf,void * cdrarg,int kmflags)1337 pattbl_constructor(void *buf, void *cdrarg, int kmflags)
1338 {
1339 patbkt_t *bkt;
1340 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg;
1341 uint_t i;
1342
1343 ASSERT(tbl_sz > 0); /* table size can't be zero */
1344
1345 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) {
1346 mutex_init(&(bkt->pbkt_lock), NULL, MUTEX_DRIVER, NULL);
1347 QL_INIT(&(bkt->pbkt_pattr_q));
1348
1349 /* first bucket contains the table size */
1350 bkt->pbkt_tbl_sz = i == 0 ? tbl_sz : 0;
1351 }
1352 return (0);
1353 }
1354
1355 /*
1356 * Attribute hash table kmem cache destructor routine.
1357 */
1358 /* ARGSUSED */
1359 static void
pattbl_destructor(void * buf,void * cdrarg)1360 pattbl_destructor(void *buf, void *cdrarg)
1361 {
1362 patbkt_t *bkt;
1363 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg;
1364 uint_t i;
1365
1366 ASSERT(tbl_sz > 0); /* table size can't be zero */
1367
1368 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) {
1369 mutex_destroy(&(bkt->pbkt_lock));
1370 ASSERT(bkt->pbkt_pattr_q.ql_next == &(bkt->pbkt_pattr_q));
1371 ASSERT(i > 0 || bkt->pbkt_tbl_sz == tbl_sz);
1372 }
1373 }
1374
1375 /*
1376 * Destroy an attribute hash table, called by mmd_rempdesc or during free.
1377 */
1378 static void
mmd_destroy_pattbl(patbkt_t ** tbl)1379 mmd_destroy_pattbl(patbkt_t **tbl)
1380 {
1381 patbkt_t *bkt;
1382 pattr_t *pa, *pa_next;
1383 uint_t i, tbl_sz;
1384
1385 ASSERT(tbl != NULL);
1386 bkt = *tbl;
1387 tbl_sz = bkt->pbkt_tbl_sz;
1388
1389 /* make sure caller passes in the first bucket */
1390 ASSERT(tbl_sz > 0);
1391
1392 /* destroy the contents of each bucket */
1393 for (i = 0; i < tbl_sz; i++, bkt++) {
1394 /* we ought to be exclusive at this point */
1395 ASSERT(MUTEX_NOT_HELD(&(bkt->pbkt_lock)));
1396
1397 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1398 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) {
1399 ASSERT(pa->pat_magic == PATTR_MAGIC);
1400 pa_next = Q2PATTR(pa->pat_next);
1401 remque(&(pa->pat_next));
1402 kmem_free(pa, pa->pat_buflen);
1403 pa = pa_next;
1404 }
1405 }
1406
1407 kmem_cache_free(pattbl_cache, *tbl);
1408 *tbl = NULL;
1409
1410 /* commit all previous stores */
1411 membar_producer();
1412 }
1413
1414 /*
1415 * Copy the contents of an attribute hash table, called by mmd_copy.
1416 */
1417 static int
mmd_copy_pattbl(patbkt_t * src_tbl,multidata_t * n_mmd,pdesc_t * n_pd,int kmflags)1418 mmd_copy_pattbl(patbkt_t *src_tbl, multidata_t *n_mmd, pdesc_t *n_pd,
1419 int kmflags)
1420 {
1421 patbkt_t *bkt;
1422 pattr_t *pa;
1423 pattrinfo_t pai;
1424 uint_t i, tbl_sz;
1425
1426 ASSERT(src_tbl != NULL);
1427 bkt = src_tbl;
1428 tbl_sz = bkt->pbkt_tbl_sz;
1429
1430 /* make sure caller passes in the first bucket */
1431 ASSERT(tbl_sz > 0);
1432
1433 for (i = 0; i < tbl_sz; i++, bkt++) {
1434 mutex_enter(&(bkt->pbkt_lock));
1435 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1436 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) {
1437 pattr_t *pa_next = Q2PATTR(pa->pat_next);
1438
1439 /* skip if it's removed */
1440 if (pa->pat_flags & PATTR_REM_DEFER) {
1441 pa = pa_next;
1442 continue;
1443 }
1444
1445 pai.type = pa->pat_type;
1446 pai.len = pa->pat_buflen - sizeof (*pa);
1447 if (mmd_addpattr(n_mmd, n_pd, &pai, (pa->pat_flags &
1448 PATTR_PERSIST) != 0, kmflags) == NULL) {
1449 mutex_exit(&(bkt->pbkt_lock));
1450 return (-1);
1451 }
1452
1453 /* copy over the contents */
1454 if (pai.buf != NULL)
1455 bcopy(pa + 1, pai.buf, pai.len);
1456
1457 pa = pa_next;
1458 }
1459 mutex_exit(&(bkt->pbkt_lock));
1460 }
1461
1462 return (0);
1463 }
1464
1465 /*
1466 * Search for an attribute type within an attribute hash bucket.
1467 */
1468 static pattr_t *
mmd_find_pattr(patbkt_t * bkt,uint_t type)1469 mmd_find_pattr(patbkt_t *bkt, uint_t type)
1470 {
1471 pattr_t *pa_head, *pa;
1472
1473 mutex_enter(&(bkt->pbkt_lock));
1474 pa_head = Q2PATTR(&(bkt->pbkt_pattr_q));
1475 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1476
1477 while (pa != pa_head) {
1478 ASSERT(pa->pat_magic == PATTR_MAGIC);
1479
1480 /* return a match; we treat removed entry as non-existent */
1481 if (pa->pat_type == type && !(pa->pat_flags & PATTR_REM_DEFER))
1482 break;
1483 pa = Q2PATTR(pa->pat_next);
1484 }
1485 mutex_exit(&(bkt->pbkt_lock));
1486
1487 return (pa == pa_head ? NULL : pa);
1488 }
1489
1490 /*
1491 * Remove an attribute from a Multidata.
1492 */
1493 void
mmd_rempattr(pattr_t * pa)1494 mmd_rempattr(pattr_t *pa)
1495 {
1496 kmutex_t *pat_lock = pa->pat_lock;
1497
1498 ASSERT(pa->pat_magic == PATTR_MAGIC);
1499
1500 /* ignore if attribute was marked as persistent */
1501 if ((pa->pat_flags & PATTR_PERSIST) != 0)
1502 return;
1503
1504 mutex_enter(pat_lock);
1505 /*
1506 * We can't deallocate the associated resources if the Multidata
1507 * is shared with other threads, because it's possible that the
1508 * attribute handle value is held by those threads. That's why
1509 * we simply mark the entry as "removed". If there are no other
1510 * threads, then we free the attribute.
1511 */
1512 if (pa->pat_mmd->mmd_dp->db_ref > 1) {
1513 pa->pat_flags |= PATTR_REM_DEFER;
1514 } else {
1515 remque(&(pa->pat_next));
1516 kmem_free(pa, pa->pat_buflen);
1517 }
1518 mutex_exit(pat_lock);
1519 }
1520
1521 /*
1522 * Find an attribute (according to its type) and return its handle.
1523 */
1524 pattr_t *
mmd_getpattr(multidata_t * mmd,pdesc_t * pd,pattrinfo_t * pai)1525 mmd_getpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai)
1526 {
1527 patbkt_t *tbl, *bkt;
1528 pattr_t *pa;
1529
1530 ASSERT(mmd != NULL);
1531 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1532 ASSERT(pai != NULL);
1533
1534 /* get the right attribute hash table (local or global) */
1535 tbl = pd != NULL ? pd->pd_pattbl : mmd->mmd_pattbl;
1536
1537 /* attribute hash table doesn't exist? */
1538 if (tbl == NULL)
1539 return (NULL);
1540
1541 ASSERT(tbl->pbkt_tbl_sz > 0);
1542 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]);
1543
1544 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) {
1545 ASSERT(pa->pat_buflen >= sizeof (*pa));
1546 pai->len = pa->pat_buflen - sizeof (*pa);
1547 pai->buf = pai->len > 0 ?
1548 (uchar_t *)pa + sizeof (pattr_t) : NULL;
1549 }
1550 ASSERT(pa == NULL || pa->pat_magic == PATTR_MAGIC);
1551 return (pa);
1552 }
1553
1554 /*
1555 * Return total size of buffers and total size of areas referenced
1556 * by all in-use (unremoved) packet descriptors.
1557 */
1558 void
mmd_getsize(multidata_t * mmd,uint_t * ptotal,uint_t * pinuse)1559 mmd_getsize(multidata_t *mmd, uint_t *ptotal, uint_t *pinuse)
1560 {
1561 pdesc_t *pd;
1562 pdescinfo_t *pdi;
1563 int i;
1564
1565 ASSERT(mmd != NULL);
1566 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1567
1568 mutex_enter(&mmd->mmd_pd_slab_lock);
1569 if (ptotal != NULL) {
1570 *ptotal = 0;
1571
1572 if (mmd->mmd_hbuf != NULL)
1573 *ptotal += MBLKL(mmd->mmd_hbuf);
1574
1575 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
1576 ASSERT(mmd->mmd_pbuf[i] != NULL);
1577 *ptotal += MBLKL(mmd->mmd_pbuf[i]);
1578 }
1579 }
1580 if (pinuse != NULL) {
1581 *pinuse = 0;
1582
1583 /* first pdesc */
1584 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE);
1585 while (pd != NULL) {
1586 pdi = &pd->pd_pdi;
1587
1588 /* next pdesc */
1589 pd = mmd_getpdesc(mmd, pd, NULL, 1, B_TRUE);
1590
1591 /* skip over removed descriptor */
1592 if (pdi->flags & PDESC_REM_DEFER)
1593 continue;
1594
1595 if (pdi->flags & PDESC_HBUF_REF)
1596 *pinuse += PDESC_HDRL(pdi);
1597
1598 if (pdi->flags & PDESC_PBUF_REF) {
1599 for (i = 0; i < pdi->pld_cnt; i++)
1600 *pinuse += PDESC_PLDL(pdi, i);
1601 }
1602 }
1603 }
1604 mutex_exit(&mmd->mmd_pd_slab_lock);
1605 }
1606