xref: /openbsd-src/sys/kern/uipc_mbuf.c (revision fc405d53b73a2d73393cb97f684863d17b583e38)
1 /*	$OpenBSD: uipc_mbuf.c,v 1.286 2023/05/16 20:09:27 mvs Exp $	*/
2 /*	$NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
33  */
34 
35 /*
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  *	This product includes software developed by the University of
50  *	California, Berkeley and its contributors.
51  *	This product includes software developed at the Information
52  *	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include "pf.h"
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/atomic.h>
80 #include <sys/mbuf.h>
81 #include <sys/pool.h>
82 #include <sys/percpu.h>
83 #include <sys/sysctl.h>
84 
85 #include <sys/socket.h>
86 #include <net/if.h>
87 
88 
89 #include <uvm/uvm_extern.h>
90 
91 #ifdef DDB
92 #include <machine/db_machdep.h>
93 #endif
94 
95 #if NPF > 0
96 #include <net/pfvar.h>
97 #endif	/* NPF > 0 */
98 
99 /* mbuf stats */
100 COUNTERS_BOOT_MEMORY(mbstat_boot, MBSTAT_COUNT);
101 struct cpumem *mbstat = COUNTERS_BOOT_INITIALIZER(mbstat_boot);
102 /* mbuf pools */
103 struct	pool mbpool;
104 struct	pool mtagpool;
105 
106 /* mbuf cluster pools */
107 u_int	mclsizes[MCLPOOLS] = {
108 	MCLBYTES,	/* must be at slot 0 */
109 	MCLBYTES + 2,	/* ETHER_ALIGNED 2k mbufs */
110 	4 * 1024,
111 	8 * 1024,
112 	9 * 1024,
113 	12 * 1024,
114 	16 * 1024,
115 	64 * 1024
116 };
117 static	char mclnames[MCLPOOLS][8];
118 struct	pool mclpools[MCLPOOLS];
119 
120 struct pool *m_clpool(u_int);
121 
122 int max_linkhdr;		/* largest link-level header */
123 int max_protohdr;		/* largest protocol header */
124 int max_hdr;			/* largest link+protocol header */
125 
126 struct	mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET);
127 
128 void	m_extfree(struct mbuf *);
129 void	m_zero(struct mbuf *);
130 
131 unsigned long mbuf_mem_limit;	/* how much memory can be allocated */
132 unsigned long mbuf_mem_alloc;	/* how much memory has been allocated */
133 
134 void	*m_pool_alloc(struct pool *, int, int *);
135 void	m_pool_free(struct pool *, void *);
136 
137 struct pool_allocator m_pool_allocator = {
138 	m_pool_alloc,
139 	m_pool_free,
140 	0 /* will be copied from pool_allocator_multi */
141 };
142 
143 static void (*mextfree_fns[4])(caddr_t, u_int, void *);
144 static u_int num_extfree_fns;
145 
146 #define M_DATABUF(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf : \
147 			(m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat)
148 #define M_SIZE(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_size : \
149 			(m)->m_flags & M_PKTHDR ? MHLEN : MLEN)
150 
151 /*
152  * Initialize the mbuf allocator.
153  */
154 void
155 mbinit(void)
156 {
157 	int i, error;
158 	unsigned int lowbits;
159 
160 	CTASSERT(MSIZE == sizeof(struct mbuf));
161 
162 	m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz;
163 
164 	mbuf_mem_alloc = 0;
165 
166 #if DIAGNOSTIC
167 	if (mclsizes[0] != MCLBYTES)
168 		panic("mbinit: the smallest cluster size != MCLBYTES");
169 	if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES)
170 		panic("mbinit: the largest cluster size != MAXMCLBYTES");
171 #endif
172 
173 	m_pool_init(&mbpool, MSIZE, 64, "mbufpl");
174 
175 	pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
176 	    IPL_NET, 0, "mtagpl", NULL);
177 
178 	for (i = 0; i < nitems(mclsizes); i++) {
179 		lowbits = mclsizes[i] & ((1 << 10) - 1);
180 		if (lowbits) {
181 			snprintf(mclnames[i], sizeof(mclnames[0]),
182 			    "mcl%dk%u", mclsizes[i] >> 10, lowbits);
183 		} else {
184 			snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
185 			    mclsizes[i] >> 10);
186 		}
187 
188 		m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]);
189 	}
190 
191 	error = nmbclust_update(nmbclust);
192 	KASSERT(error == 0);
193 
194 	(void)mextfree_register(m_extfree_pool);
195 	KASSERT(num_extfree_fns == 1);
196 }
197 
198 void
199 mbcpuinit(void)
200 {
201 	int i;
202 
203 	mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT);
204 
205 	pool_cache_init(&mbpool);
206 	pool_cache_init(&mtagpool);
207 
208 	for (i = 0; i < nitems(mclsizes); i++)
209 		pool_cache_init(&mclpools[i]);
210 }
211 
212 int
213 nmbclust_update(long newval)
214 {
215 	int i;
216 
217 	if (newval < 0 || newval > LONG_MAX / MCLBYTES)
218 		return ERANGE;
219 	/* update the global mbuf memory limit */
220 	nmbclust = newval;
221 	mbuf_mem_limit = nmbclust * MCLBYTES;
222 
223 	pool_wakeup(&mbpool);
224 	for (i = 0; i < nitems(mclsizes); i++)
225 		pool_wakeup(&mclpools[i]);
226 
227 	return 0;
228 }
229 
230 /*
231  * Space allocation routines.
232  */
233 struct mbuf *
234 m_get(int nowait, int type)
235 {
236 	struct mbuf *m;
237 	struct counters_ref cr;
238 	uint64_t *counters;
239 	int s;
240 
241 	KASSERT(type >= 0 && type < MT_NTYPES);
242 
243 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
244 	if (m == NULL)
245 		return (NULL);
246 
247 	s = splnet();
248 	counters = counters_enter(&cr, mbstat);
249 	counters[type]++;
250 	counters_leave(&cr, mbstat);
251 	splx(s);
252 
253 	m->m_type = type;
254 	m->m_next = NULL;
255 	m->m_nextpkt = NULL;
256 	m->m_data = m->m_dat;
257 	m->m_flags = 0;
258 
259 	return (m);
260 }
261 
262 /*
263  * ATTN: When changing anything here check m_inithdr() and m_defrag() those
264  * may need to change as well.
265  */
266 struct mbuf *
267 m_gethdr(int nowait, int type)
268 {
269 	struct mbuf *m;
270 	struct counters_ref cr;
271 	uint64_t *counters;
272 	int s;
273 
274 	KASSERT(type >= 0 && type < MT_NTYPES);
275 
276 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
277 	if (m == NULL)
278 		return (NULL);
279 
280 	s = splnet();
281 	counters = counters_enter(&cr, mbstat);
282 	counters[type]++;
283 	counters_leave(&cr, mbstat);
284 	splx(s);
285 
286 	m->m_type = type;
287 
288 	return (m_inithdr(m));
289 }
290 
291 struct mbuf *
292 m_inithdr(struct mbuf *m)
293 {
294 	/* keep in sync with m_gethdr */
295 	m->m_next = NULL;
296 	m->m_nextpkt = NULL;
297 	m->m_data = m->m_pktdat;
298 	m->m_flags = M_PKTHDR;
299 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
300 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
301 
302 	return (m);
303 }
304 
305 static inline void
306 m_clearhdr(struct mbuf *m)
307 {
308 	/* delete all mbuf tags to reset the state */
309 	m_tag_delete_chain(m);
310 #if NPF > 0
311 	pf_mbuf_unlink_state_key(m);
312 	pf_mbuf_unlink_inpcb(m);
313 #endif	/* NPF > 0 */
314 
315 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
316 }
317 
318 void
319 m_removehdr(struct mbuf *m)
320 {
321 	KASSERT(m->m_flags & M_PKTHDR);
322 	m_clearhdr(m);
323 	m->m_flags &= ~M_PKTHDR;
324 }
325 
326 void
327 m_resethdr(struct mbuf *m)
328 {
329 	int len = m->m_pkthdr.len;
330 	u_int8_t loopcnt = m->m_pkthdr.ph_loopcnt;
331 
332 	KASSERT(m->m_flags & M_PKTHDR);
333 	m->m_flags &= (M_EXT|M_PKTHDR|M_EOR|M_EXTWR|M_ZEROIZE);
334 	m_clearhdr(m);
335 	/* like m_inithdr(), but keep any associated data and mbufs */
336 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
337 	m->m_pkthdr.len = len;
338 	m->m_pkthdr.ph_loopcnt = loopcnt;
339 }
340 
341 void
342 m_calchdrlen(struct mbuf *m)
343 {
344 	struct mbuf *n;
345 	int plen = 0;
346 
347 	KASSERT(m->m_flags & M_PKTHDR);
348 	for (n = m; n; n = n->m_next)
349 		plen += n->m_len;
350 	m->m_pkthdr.len = plen;
351 }
352 
353 struct mbuf *
354 m_getclr(int nowait, int type)
355 {
356 	struct mbuf *m;
357 
358 	MGET(m, nowait, type);
359 	if (m == NULL)
360 		return (NULL);
361 	memset(mtod(m, caddr_t), 0, MLEN);
362 	return (m);
363 }
364 
365 struct pool *
366 m_clpool(u_int pktlen)
367 {
368 	struct pool *pp;
369 	int pi;
370 
371 	for (pi = 0; pi < nitems(mclpools); pi++) {
372 		pp = &mclpools[pi];
373 		if (pktlen <= pp->pr_size)
374 			return (pp);
375 	}
376 
377 	return (NULL);
378 }
379 
380 struct mbuf *
381 m_clget(struct mbuf *m, int how, u_int pktlen)
382 {
383 	struct mbuf *m0 = NULL;
384 	struct pool *pp;
385 	caddr_t buf;
386 
387 	pp = m_clpool(pktlen);
388 #ifdef DIAGNOSTIC
389 	if (pp == NULL)
390 		panic("m_clget: request for %u byte cluster", pktlen);
391 #endif
392 
393 	if (m == NULL) {
394 		m0 = m_gethdr(how, MT_DATA);
395 		if (m0 == NULL)
396 			return (NULL);
397 
398 		m = m0;
399 	}
400 	buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT);
401 	if (buf == NULL) {
402 		m_freem(m0);
403 		return (NULL);
404 	}
405 
406 	MEXTADD(m, buf, pp->pr_size, M_EXTWR, MEXTFREE_POOL, pp);
407 	return (m);
408 }
409 
410 void
411 m_extfree_pool(caddr_t buf, u_int size, void *pp)
412 {
413 	pool_put(pp, buf);
414 }
415 
416 struct mbuf *
417 m_free(struct mbuf *m)
418 {
419 	struct mbuf *n;
420 	struct counters_ref cr;
421 	uint64_t *counters;
422 	int s;
423 
424 	if (m == NULL)
425 		return (NULL);
426 
427 	s = splnet();
428 	counters = counters_enter(&cr, mbstat);
429 	counters[m->m_type]--;
430 	counters_leave(&cr, mbstat);
431 	splx(s);
432 
433 	n = m->m_next;
434 	if (m->m_flags & M_ZEROIZE) {
435 		m_zero(m);
436 		/* propagate M_ZEROIZE to the next mbuf in the chain */
437 		if (n)
438 			n->m_flags |= M_ZEROIZE;
439 	}
440 	if (m->m_flags & M_PKTHDR) {
441 		m_tag_delete_chain(m);
442 #if NPF > 0
443 		pf_mbuf_unlink_state_key(m);
444 		pf_mbuf_unlink_inpcb(m);
445 #endif	/* NPF > 0 */
446 	}
447 	if (m->m_flags & M_EXT)
448 		m_extfree(m);
449 
450 	pool_put(&mbpool, m);
451 
452 	return (n);
453 }
454 
455 void
456 m_extref(struct mbuf *o, struct mbuf *n)
457 {
458 	int refs = MCLISREFERENCED(o);
459 
460 	n->m_flags |= o->m_flags & (M_EXT|M_EXTWR);
461 
462 	if (refs)
463 		mtx_enter(&m_extref_mtx);
464 	n->m_ext.ext_nextref = o->m_ext.ext_nextref;
465 	n->m_ext.ext_prevref = o;
466 	o->m_ext.ext_nextref = n;
467 	n->m_ext.ext_nextref->m_ext.ext_prevref = n;
468 	if (refs)
469 		mtx_leave(&m_extref_mtx);
470 
471 	MCLREFDEBUGN((n), __FILE__, __LINE__);
472 }
473 
474 static inline u_int
475 m_extunref(struct mbuf *m)
476 {
477 	int refs = 0;
478 
479 	if (!MCLISREFERENCED(m))
480 		return (0);
481 
482 	mtx_enter(&m_extref_mtx);
483 	if (MCLISREFERENCED(m)) {
484 		m->m_ext.ext_nextref->m_ext.ext_prevref =
485 		    m->m_ext.ext_prevref;
486 		m->m_ext.ext_prevref->m_ext.ext_nextref =
487 		    m->m_ext.ext_nextref;
488 		refs = 1;
489 	}
490 	mtx_leave(&m_extref_mtx);
491 
492 	return (refs);
493 }
494 
495 /*
496  * Returns a number for use with MEXTADD.
497  * Should only be called once per function.
498  * Drivers can be assured that the index will be non zero.
499  */
500 u_int
501 mextfree_register(void (*fn)(caddr_t, u_int, void *))
502 {
503 	KASSERT(num_extfree_fns < nitems(mextfree_fns));
504 	mextfree_fns[num_extfree_fns] = fn;
505 	return num_extfree_fns++;
506 }
507 
508 void
509 m_extfree(struct mbuf *m)
510 {
511 	if (m_extunref(m) == 0) {
512 		KASSERT(m->m_ext.ext_free_fn < num_extfree_fns);
513 		mextfree_fns[m->m_ext.ext_free_fn](m->m_ext.ext_buf,
514 		    m->m_ext.ext_size, m->m_ext.ext_arg);
515 	}
516 
517 	m->m_flags &= ~(M_EXT|M_EXTWR);
518 }
519 
520 struct mbuf *
521 m_freem(struct mbuf *m)
522 {
523 	struct mbuf *n;
524 
525 	if (m == NULL)
526 		return (NULL);
527 
528 	n = m->m_nextpkt;
529 
530 	do
531 		m = m_free(m);
532 	while (m != NULL);
533 
534 	return (n);
535 }
536 
537 void
538 m_purge(struct mbuf *m)
539 {
540 	while (m != NULL)
541 		m = m_freem(m);
542 }
543 
544 /*
545  * mbuf chain defragmenter. This function uses some evil tricks to defragment
546  * an mbuf chain into a single buffer without changing the mbuf pointer.
547  * This needs to know a lot of the mbuf internals to make this work.
548  */
549 int
550 m_defrag(struct mbuf *m, int how)
551 {
552 	struct mbuf *m0;
553 
554 	if (m->m_next == NULL)
555 		return (0);
556 
557 	KASSERT(m->m_flags & M_PKTHDR);
558 
559 	if ((m0 = m_gethdr(how, m->m_type)) == NULL)
560 		return (ENOBUFS);
561 	if (m->m_pkthdr.len > MHLEN) {
562 		MCLGETL(m0, how, m->m_pkthdr.len);
563 		if (!(m0->m_flags & M_EXT)) {
564 			m_free(m0);
565 			return (ENOBUFS);
566 		}
567 	}
568 	m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
569 	m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
570 
571 	/* free chain behind and possible ext buf on the first mbuf */
572 	m_freem(m->m_next);
573 	m->m_next = NULL;
574 	if (m->m_flags & M_EXT)
575 		m_extfree(m);
576 
577 	/*
578 	 * Bounce copy mbuf over to the original mbuf and set everything up.
579 	 * This needs to reset or clear all pointers that may go into the
580 	 * original mbuf chain.
581 	 */
582 	if (m0->m_flags & M_EXT) {
583 		memcpy(&m->m_ext, &m0->m_ext, sizeof(struct mbuf_ext));
584 		MCLINITREFERENCE(m);
585 		m->m_flags |= m0->m_flags & (M_EXT|M_EXTWR);
586 		m->m_data = m->m_ext.ext_buf;
587 	} else {
588 		m->m_data = m->m_pktdat;
589 		memcpy(m->m_data, m0->m_data, m0->m_len);
590 	}
591 	m->m_pkthdr.len = m->m_len = m0->m_len;
592 
593 	m0->m_flags &= ~(M_EXT|M_EXTWR);	/* cluster is gone */
594 	m_free(m0);
595 
596 	return (0);
597 }
598 
599 /*
600  * Mbuffer utility routines.
601  */
602 
603 /*
604  * Ensure len bytes of contiguous space at the beginning of the mbuf chain
605  */
606 struct mbuf *
607 m_prepend(struct mbuf *m, int len, int how)
608 {
609 	struct mbuf *mn;
610 
611 	if (len > MHLEN)
612 		panic("mbuf prepend length too big");
613 
614 	if (m_leadingspace(m) >= len) {
615 		m->m_data -= len;
616 		m->m_len += len;
617 	} else {
618 		MGET(mn, how, m->m_type);
619 		if (mn == NULL) {
620 			m_freem(m);
621 			return (NULL);
622 		}
623 		if (m->m_flags & M_PKTHDR)
624 			M_MOVE_PKTHDR(mn, m);
625 		mn->m_next = m;
626 		m = mn;
627 		m_align(m, len);
628 		m->m_len = len;
629 	}
630 	if (m->m_flags & M_PKTHDR)
631 		m->m_pkthdr.len += len;
632 	return (m);
633 }
634 
635 /*
636  * Make a copy of an mbuf chain starting "off" bytes from the beginning,
637  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
638  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
639  */
640 struct mbuf *
641 m_copym(struct mbuf *m0, int off, int len, int wait)
642 {
643 	struct mbuf *m, *n, **np;
644 	struct mbuf *top;
645 	int copyhdr = 0;
646 
647 	if (off < 0 || len < 0)
648 		panic("m_copym0: off %d, len %d", off, len);
649 	if (off == 0 && m0->m_flags & M_PKTHDR)
650 		copyhdr = 1;
651 	if ((m = m_getptr(m0, off, &off)) == NULL)
652 		panic("m_copym0: short mbuf chain");
653 	np = &top;
654 	top = NULL;
655 	while (len > 0) {
656 		if (m == NULL) {
657 			if (len != M_COPYALL)
658 				panic("m_copym0: m == NULL and not COPYALL");
659 			break;
660 		}
661 		MGET(n, wait, m->m_type);
662 		*np = n;
663 		if (n == NULL)
664 			goto nospace;
665 		if (copyhdr) {
666 			if (m_dup_pkthdr(n, m0, wait))
667 				goto nospace;
668 			if (len != M_COPYALL)
669 				n->m_pkthdr.len = len;
670 			copyhdr = 0;
671 		}
672 		n->m_len = min(len, m->m_len - off);
673 		if (m->m_flags & M_EXT) {
674 			n->m_data = m->m_data + off;
675 			n->m_ext = m->m_ext;
676 			MCLADDREFERENCE(m, n);
677 		} else {
678 			n->m_data += m->m_data -
679 			    (m->m_flags & M_PKTHDR ? m->m_pktdat : m->m_dat);
680 			n->m_data += off;
681 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
682 			    n->m_len);
683 		}
684 		if (len != M_COPYALL)
685 			len -= n->m_len;
686 		off += n->m_len;
687 #ifdef DIAGNOSTIC
688 		if (off > m->m_len)
689 			panic("m_copym0 overrun");
690 #endif
691 		if (off == m->m_len) {
692 			m = m->m_next;
693 			off = 0;
694 		}
695 		np = &n->m_next;
696 	}
697 	return (top);
698 nospace:
699 	m_freem(top);
700 	return (NULL);
701 }
702 
703 /*
704  * Copy data from an mbuf chain starting "off" bytes from the beginning,
705  * continuing for "len" bytes, into the indicated buffer.
706  */
707 void
708 m_copydata(struct mbuf *m, int off, int len, void *p)
709 {
710 	caddr_t cp = p;
711 	unsigned count;
712 
713 	if (off < 0)
714 		panic("m_copydata: off %d < 0", off);
715 	if (len < 0)
716 		panic("m_copydata: len %d < 0", len);
717 	if ((m = m_getptr(m, off, &off)) == NULL)
718 		panic("m_copydata: short mbuf chain");
719 	while (len > 0) {
720 		if (m == NULL)
721 			panic("m_copydata: null mbuf");
722 		count = min(m->m_len - off, len);
723 		memmove(cp, mtod(m, caddr_t) + off, count);
724 		len -= count;
725 		cp += count;
726 		off = 0;
727 		m = m->m_next;
728 	}
729 }
730 
731 /*
732  * Copy data from a buffer back into the indicated mbuf chain,
733  * starting "off" bytes from the beginning, extending the mbuf
734  * chain if necessary. The mbuf needs to be properly initialized
735  * including the setting of m_len.
736  */
737 int
738 m_copyback(struct mbuf *m0, int off, int len, const void *_cp, int wait)
739 {
740 	int mlen, totlen = 0;
741 	struct mbuf *m = m0, *n;
742 	caddr_t cp = (caddr_t)_cp;
743 	int error = 0;
744 
745 	if (m0 == NULL)
746 		return (0);
747 	while (off > (mlen = m->m_len)) {
748 		off -= mlen;
749 		totlen += mlen;
750 		if (m->m_next == NULL) {
751 			if ((n = m_get(wait, m->m_type)) == NULL) {
752 				error = ENOBUFS;
753 				goto out;
754 			}
755 
756 			if (off + len > MLEN) {
757 				MCLGETL(n, wait, off + len);
758 				if (!(n->m_flags & M_EXT)) {
759 					m_free(n);
760 					error = ENOBUFS;
761 					goto out;
762 				}
763 			}
764 			memset(mtod(n, caddr_t), 0, off);
765 			n->m_len = len + off;
766 			m->m_next = n;
767 		}
768 		m = m->m_next;
769 	}
770 	while (len > 0) {
771 		/* extend last packet to be filled fully */
772 		if (m->m_next == NULL && (len > m->m_len - off))
773 			m->m_len += min(len - (m->m_len - off),
774 			    m_trailingspace(m));
775 		mlen = min(m->m_len - off, len);
776 		memmove(mtod(m, caddr_t) + off, cp, mlen);
777 		cp += mlen;
778 		len -= mlen;
779 		totlen += mlen + off;
780 		if (len == 0)
781 			break;
782 		off = 0;
783 
784 		if (m->m_next == NULL) {
785 			if ((n = m_get(wait, m->m_type)) == NULL) {
786 				error = ENOBUFS;
787 				goto out;
788 			}
789 
790 			if (len > MLEN) {
791 				MCLGETL(n, wait, len);
792 				if (!(n->m_flags & M_EXT)) {
793 					m_free(n);
794 					error = ENOBUFS;
795 					goto out;
796 				}
797 			}
798 			n->m_len = len;
799 			m->m_next = n;
800 		}
801 		m = m->m_next;
802 	}
803 out:
804 	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
805 		m->m_pkthdr.len = totlen;
806 
807 	return (error);
808 }
809 
810 /*
811  * Concatenate mbuf chain n to m.
812  * n might be copied into m (when n->m_len is small), therefore data portion of
813  * n could be copied into an mbuf of different mbuf type.
814  * Therefore both chains should be of the same type (e.g. MT_DATA).
815  * Any m_pkthdr is not updated.
816  */
817 void
818 m_cat(struct mbuf *m, struct mbuf *n)
819 {
820 	while (m->m_next)
821 		m = m->m_next;
822 	while (n) {
823 		if (M_READONLY(m) || n->m_len > m_trailingspace(m)) {
824 			/* just join the two chains */
825 			m->m_next = n;
826 			return;
827 		}
828 		/* splat the data from one into the other */
829 		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
830 		    n->m_len);
831 		m->m_len += n->m_len;
832 		n = m_free(n);
833 	}
834 }
835 
836 void
837 m_adj(struct mbuf *mp, int req_len)
838 {
839 	int len = req_len;
840 	struct mbuf *m;
841 	int count;
842 
843 	if (mp == NULL)
844 		return;
845 	if (len >= 0) {
846 		/*
847 		 * Trim from head.
848 		 */
849 		m = mp;
850 		while (m != NULL && len > 0) {
851 			if (m->m_len <= len) {
852 				len -= m->m_len;
853 				m->m_data += m->m_len;
854 				m->m_len = 0;
855 				m = m->m_next;
856 			} else {
857 				m->m_data += len;
858 				m->m_len -= len;
859 				len = 0;
860 			}
861 		}
862 		if (mp->m_flags & M_PKTHDR)
863 			mp->m_pkthdr.len -= (req_len - len);
864 	} else {
865 		/*
866 		 * Trim from tail.  Scan the mbuf chain,
867 		 * calculating its length and finding the last mbuf.
868 		 * If the adjustment only affects this mbuf, then just
869 		 * adjust and return.  Otherwise, rescan and truncate
870 		 * after the remaining size.
871 		 */
872 		len = -len;
873 		count = 0;
874 		m = mp;
875 		for (;;) {
876 			count += m->m_len;
877 			if (m->m_next == NULL)
878 				break;
879 			m = m->m_next;
880 		}
881 		if (m->m_len >= len) {
882 			m->m_len -= len;
883 			if (mp->m_flags & M_PKTHDR)
884 				mp->m_pkthdr.len -= len;
885 			return;
886 		}
887 		count -= len;
888 		if (count < 0)
889 			count = 0;
890 		/*
891 		 * Correct length for chain is "count".
892 		 * Find the mbuf with last data, adjust its length,
893 		 * and toss data from remaining mbufs on chain.
894 		 */
895 		if (mp->m_flags & M_PKTHDR)
896 			mp->m_pkthdr.len = count;
897 		m = mp;
898 		for (;;) {
899 			if (m->m_len >= count) {
900 				m->m_len = count;
901 				break;
902 			}
903 			count -= m->m_len;
904 			m = m->m_next;
905 		}
906 		while ((m = m->m_next) != NULL)
907 			m->m_len = 0;
908 	}
909 }
910 
911 /*
912  * Rearrange an mbuf chain so that len bytes are contiguous
913  * and in the data area of an mbuf (so that mtod will work
914  * for a structure of size len).  Returns the resulting
915  * mbuf chain on success, frees it and returns null on failure.
916  */
917 struct mbuf *
918 m_pullup(struct mbuf *m0, int len)
919 {
920 	struct mbuf *m;
921 	unsigned int adj;
922 	caddr_t head, tail;
923 	unsigned int space;
924 
925 	/* if len is already contig in m0, then don't do any work */
926 	if (len <= m0->m_len)
927 		return (m0);
928 
929 	/* look for some data */
930 	m = m0->m_next;
931 	if (m == NULL)
932 		goto freem0;
933 
934 	head = M_DATABUF(m0);
935 	if (m0->m_len == 0) {
936 		while (m->m_len == 0) {
937 			m = m_free(m);
938 			if (m == NULL)
939 				goto freem0;
940 		}
941 
942 		adj = mtod(m, unsigned long) & (sizeof(long) - 1);
943 	} else
944 		adj = mtod(m0, unsigned long) & (sizeof(long) - 1);
945 
946 	tail = head + M_SIZE(m0);
947 	head += adj;
948 
949 	if (!M_READONLY(m0) && len <= tail - head) {
950 		/* we can copy everything into the first mbuf */
951 		if (m0->m_len == 0) {
952 			m0->m_data = head;
953 		} else if (len > tail - mtod(m0, caddr_t)) {
954 			/* need to memmove to make space at the end */
955 			memmove(head, mtod(m0, caddr_t), m0->m_len);
956 			m0->m_data = head;
957 		}
958 
959 		len -= m0->m_len;
960 	} else {
961 		/* the first mbuf is too small or read-only, make a new one */
962 		space = adj + len;
963 
964 		if (space > MAXMCLBYTES)
965 			goto bad;
966 
967 		m0->m_next = m;
968 		m = m0;
969 
970 		MGET(m0, M_DONTWAIT, m->m_type);
971 		if (m0 == NULL)
972 			goto bad;
973 
974 		if (space > MHLEN) {
975 			MCLGETL(m0, M_DONTWAIT, space);
976 			if ((m0->m_flags & M_EXT) == 0)
977 				goto bad;
978 		}
979 
980 		if (m->m_flags & M_PKTHDR)
981 			M_MOVE_PKTHDR(m0, m);
982 
983 		m0->m_len = 0;
984 		m0->m_data += adj;
985 	}
986 
987 	KDASSERT(m_trailingspace(m0) >= len);
988 
989 	for (;;) {
990 		space = min(len, m->m_len);
991 		memcpy(mtod(m0, caddr_t) + m0->m_len, mtod(m, caddr_t), space);
992 		len -= space;
993 		m0->m_len += space;
994 		m->m_len -= space;
995 
996 		if (m->m_len > 0)
997 			m->m_data += space;
998 		else
999 			m = m_free(m);
1000 
1001 		if (len == 0)
1002 			break;
1003 
1004 		if (m == NULL)
1005 			goto bad;
1006 	}
1007 
1008 	m0->m_next = m; /* link the chain back up */
1009 
1010 	return (m0);
1011 
1012 bad:
1013 	m_freem(m);
1014 freem0:
1015 	m_free(m0);
1016 	return (NULL);
1017 }
1018 
1019 /*
1020  * Return a pointer to mbuf/offset of location in mbuf chain.
1021  */
1022 struct mbuf *
1023 m_getptr(struct mbuf *m, int loc, int *off)
1024 {
1025 	while (loc >= 0) {
1026 		/* Normal end of search */
1027 		if (m->m_len > loc) {
1028 			*off = loc;
1029 			return (m);
1030 		} else {
1031 			loc -= m->m_len;
1032 
1033 			if (m->m_next == NULL) {
1034 				if (loc == 0) {
1035 					/* Point at the end of valid data */
1036 					*off = m->m_len;
1037 					return (m);
1038 				} else {
1039 					return (NULL);
1040 				}
1041 			} else {
1042 				m = m->m_next;
1043 			}
1044 		}
1045 	}
1046 
1047 	return (NULL);
1048 }
1049 
1050 /*
1051  * Partition an mbuf chain in two pieces, returning the tail --
1052  * all but the first len0 bytes.  In case of failure, it returns NULL and
1053  * attempts to restore the chain to its original state.
1054  */
1055 struct mbuf *
1056 m_split(struct mbuf *m0, int len0, int wait)
1057 {
1058 	struct mbuf *m, *n;
1059 	unsigned len = len0, remain, olen;
1060 
1061 	for (m = m0; m && len > m->m_len; m = m->m_next)
1062 		len -= m->m_len;
1063 	if (m == NULL)
1064 		return (NULL);
1065 	remain = m->m_len - len;
1066 	if (m0->m_flags & M_PKTHDR) {
1067 		MGETHDR(n, wait, m0->m_type);
1068 		if (n == NULL)
1069 			return (NULL);
1070 		if (m_dup_pkthdr(n, m0, wait)) {
1071 			m_freem(n);
1072 			return (NULL);
1073 		}
1074 		n->m_pkthdr.len -= len0;
1075 		olen = m0->m_pkthdr.len;
1076 		m0->m_pkthdr.len = len0;
1077 		if (remain == 0) {
1078 			n->m_next = m->m_next;
1079 			m->m_next = NULL;
1080 			n->m_len = 0;
1081 			return (n);
1082 		}
1083 		if (m->m_flags & M_EXT)
1084 			goto extpacket;
1085 		if (remain > MHLEN) {
1086 			/* m can't be the lead packet */
1087 			m_align(n, 0);
1088 			n->m_next = m_split(m, len, wait);
1089 			if (n->m_next == NULL) {
1090 				(void) m_free(n);
1091 				m0->m_pkthdr.len = olen;
1092 				return (NULL);
1093 			} else {
1094 				n->m_len = 0;
1095 				return (n);
1096 			}
1097 		} else
1098 			m_align(n, remain);
1099 	} else if (remain == 0) {
1100 		n = m->m_next;
1101 		m->m_next = NULL;
1102 		return (n);
1103 	} else {
1104 		MGET(n, wait, m->m_type);
1105 		if (n == NULL)
1106 			return (NULL);
1107 		m_align(n, remain);
1108 	}
1109 extpacket:
1110 	if (m->m_flags & M_EXT) {
1111 		n->m_ext = m->m_ext;
1112 		MCLADDREFERENCE(m, n);
1113 		n->m_data = m->m_data + len;
1114 	} else {
1115 		memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
1116 	}
1117 	n->m_len = remain;
1118 	m->m_len = len;
1119 	n->m_next = m->m_next;
1120 	m->m_next = NULL;
1121 	return (n);
1122 }
1123 
1124 /*
1125  * Make space for a new header of length hlen at skip bytes
1126  * into the packet.  When doing this we allocate new mbufs only
1127  * when absolutely necessary.  The mbuf where the new header
1128  * is to go is returned together with an offset into the mbuf.
1129  * If NULL is returned then the mbuf chain may have been modified;
1130  * the caller is assumed to always free the chain.
1131  */
1132 struct mbuf *
1133 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
1134 {
1135 	struct mbuf *m;
1136 	unsigned remain;
1137 
1138 	KASSERT(m0->m_flags & M_PKTHDR);
1139 	/*
1140 	 * Limit the size of the new header to MHLEN. In case
1141 	 * skip = 0 and the first buffer is not a cluster this
1142 	 * is the maximum space available in that mbuf.
1143 	 * In other words this code never prepends a mbuf.
1144 	 */
1145 	KASSERT(hlen < MHLEN);
1146 
1147 	for (m = m0; m && skip > m->m_len; m = m->m_next)
1148 		skip -= m->m_len;
1149 	if (m == NULL)
1150 		return (NULL);
1151 	/*
1152 	 * At this point skip is the offset into the mbuf m
1153 	 * where the new header should be placed.  Figure out
1154 	 * if there's space to insert the new header.  If so,
1155 	 * and copying the remainder makes sense then do so.
1156 	 * Otherwise insert a new mbuf in the chain, splitting
1157 	 * the contents of m as needed.
1158 	 */
1159 	remain = m->m_len - skip;		/* data to move */
1160 	if (skip < remain && hlen <= m_leadingspace(m)) {
1161 		if (skip)
1162 			memmove(m->m_data-hlen, m->m_data, skip);
1163 		m->m_data -= hlen;
1164 		m->m_len += hlen;
1165 		*off = skip;
1166 	} else if (hlen > m_trailingspace(m)) {
1167 		struct mbuf *n;
1168 
1169 		if (remain > 0) {
1170 			MGET(n, M_DONTWAIT, m->m_type);
1171 			if (n && remain > MLEN) {
1172 				MCLGETL(n, M_DONTWAIT, remain);
1173 				if ((n->m_flags & M_EXT) == 0) {
1174 					m_free(n);
1175 					n = NULL;
1176 				}
1177 			}
1178 			if (n == NULL)
1179 				return (NULL);
1180 
1181 			memcpy(n->m_data, mtod(m, char *) + skip, remain);
1182 			n->m_len = remain;
1183 			m->m_len -= remain;
1184 
1185 			n->m_next = m->m_next;
1186 			m->m_next = n;
1187 		}
1188 
1189 		if (hlen <= m_trailingspace(m)) {
1190 			m->m_len += hlen;
1191 			*off = skip;
1192 		} else {
1193 			n = m_get(M_DONTWAIT, m->m_type);
1194 			if (n == NULL)
1195 				return NULL;
1196 
1197 			n->m_len = hlen;
1198 
1199 			n->m_next = m->m_next;
1200 			m->m_next = n;
1201 
1202 			*off = 0;	/* header is at front ... */
1203 			m = n;		/* ... of new mbuf */
1204 		}
1205 	} else {
1206 		/*
1207 		 * Copy the remainder to the back of the mbuf
1208 		 * so there's space to write the new header.
1209 		 */
1210 		if (remain > 0)
1211 			memmove(mtod(m, caddr_t) + skip + hlen,
1212 			      mtod(m, caddr_t) + skip, remain);
1213 		m->m_len += hlen;
1214 		*off = skip;
1215 	}
1216 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
1217 	return m;
1218 }
1219 
1220 
1221 /*
1222  * Routine to copy from device local memory into mbufs.
1223  */
1224 struct mbuf *
1225 m_devget(char *buf, int totlen, int off)
1226 {
1227 	struct mbuf	*m;
1228 	struct mbuf	*top, **mp;
1229 	int		 len;
1230 
1231 	top = NULL;
1232 	mp = &top;
1233 
1234 	if (off < 0 || off > MHLEN)
1235 		return (NULL);
1236 
1237 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1238 	if (m == NULL)
1239 		return (NULL);
1240 
1241 	m->m_pkthdr.len = totlen;
1242 
1243 	len = MHLEN;
1244 
1245 	while (totlen > 0) {
1246 		if (top != NULL) {
1247 			MGET(m, M_DONTWAIT, MT_DATA);
1248 			if (m == NULL) {
1249 				/*
1250 				 * As we might get called by pfkey, make sure
1251 				 * we do not leak sensitive data.
1252 				 */
1253 				top->m_flags |= M_ZEROIZE;
1254 				m_freem(top);
1255 				return (NULL);
1256 			}
1257 			len = MLEN;
1258 		}
1259 
1260 		if (totlen + off >= MINCLSIZE) {
1261 			MCLGET(m, M_DONTWAIT);
1262 			if (m->m_flags & M_EXT)
1263 				len = MCLBYTES;
1264 		} else {
1265 			/* Place initial small packet/header at end of mbuf. */
1266 			if (top == NULL && totlen + off + max_linkhdr <= len) {
1267 				m->m_data += max_linkhdr;
1268 				len -= max_linkhdr;
1269 			}
1270 		}
1271 
1272 		if (off) {
1273 			m->m_data += off;
1274 			len -= off;
1275 			off = 0;
1276 		}
1277 
1278 		m->m_len = len = min(totlen, len);
1279 		memcpy(mtod(m, void *), buf, (size_t)len);
1280 
1281 		buf += len;
1282 		*mp = m;
1283 		mp = &m->m_next;
1284 		totlen -= len;
1285 	}
1286 	return (top);
1287 }
1288 
1289 void
1290 m_zero(struct mbuf *m)
1291 {
1292 	if (M_READONLY(m)) {
1293 		mtx_enter(&m_extref_mtx);
1294 		if ((m->m_flags & M_EXT) && MCLISREFERENCED(m)) {
1295 			m->m_ext.ext_nextref->m_flags |= M_ZEROIZE;
1296 			m->m_ext.ext_prevref->m_flags |= M_ZEROIZE;
1297 		}
1298 		mtx_leave(&m_extref_mtx);
1299 		return;
1300 	}
1301 
1302 	explicit_bzero(M_DATABUF(m), M_SIZE(m));
1303 }
1304 
1305 /*
1306  * Apply function f to the data in an mbuf chain starting "off" bytes from the
1307  * beginning, continuing for "len" bytes.
1308  */
1309 int
1310 m_apply(struct mbuf *m, int off, int len,
1311     int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate)
1312 {
1313 	int rval;
1314 	unsigned int count;
1315 
1316 	if (len < 0)
1317 		panic("m_apply: len %d < 0", len);
1318 	if (off < 0)
1319 		panic("m_apply: off %d < 0", off);
1320 	while (off > 0) {
1321 		if (m == NULL)
1322 			panic("m_apply: null mbuf in skip");
1323 		if (off < m->m_len)
1324 			break;
1325 		off -= m->m_len;
1326 		m = m->m_next;
1327 	}
1328 	while (len > 0) {
1329 		if (m == NULL)
1330 			panic("m_apply: null mbuf");
1331 		count = min(m->m_len - off, len);
1332 
1333 		rval = f(fstate, mtod(m, caddr_t) + off, count);
1334 		if (rval)
1335 			return (rval);
1336 
1337 		len -= count;
1338 		off = 0;
1339 		m = m->m_next;
1340 	}
1341 
1342 	return (0);
1343 }
1344 
1345 /*
1346  * Compute the amount of space available before the current start of data
1347  * in an mbuf. Read-only clusters never have space available.
1348  */
1349 int
1350 m_leadingspace(struct mbuf *m)
1351 {
1352 	if (M_READONLY(m))
1353 		return 0;
1354 	KASSERT(m->m_data >= M_DATABUF(m));
1355 	return m->m_data - M_DATABUF(m);
1356 }
1357 
1358 /*
1359  * Compute the amount of space available after the end of data in an mbuf.
1360  * Read-only clusters never have space available.
1361  */
1362 int
1363 m_trailingspace(struct mbuf *m)
1364 {
1365 	if (M_READONLY(m))
1366 		return 0;
1367 	KASSERT(M_DATABUF(m) + M_SIZE(m) >= (m->m_data + m->m_len));
1368 	return M_DATABUF(m) + M_SIZE(m) - (m->m_data + m->m_len);
1369 }
1370 
1371 /*
1372  * Set the m_data pointer of a newly-allocated mbuf to place an object of
1373  * the specified size at the end of the mbuf, longword aligned.
1374  */
1375 void
1376 m_align(struct mbuf *m, int len)
1377 {
1378 	KASSERT(len >= 0 && !M_READONLY(m));
1379 	KASSERT(m->m_data == M_DATABUF(m));	/* newly-allocated check */
1380 	KASSERT(((len + sizeof(long) - 1) &~ (sizeof(long) - 1)) <= M_SIZE(m));
1381 
1382 	m->m_data = M_DATABUF(m) + ((M_SIZE(m) - (len)) &~ (sizeof(long) - 1));
1383 }
1384 
1385 /*
1386  * Duplicate mbuf pkthdr from from to to.
1387  * from must have M_PKTHDR set, and to must be empty.
1388  */
1389 int
1390 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int wait)
1391 {
1392 	int error;
1393 
1394 	KASSERT(from->m_flags & M_PKTHDR);
1395 
1396 	to->m_flags = (to->m_flags & (M_EXT | M_EXTWR));
1397 	to->m_flags |= (from->m_flags & M_COPYFLAGS);
1398 	to->m_pkthdr = from->m_pkthdr;
1399 
1400 #if NPF > 0
1401 	to->m_pkthdr.pf.statekey = NULL;
1402 	pf_mbuf_link_state_key(to, from->m_pkthdr.pf.statekey);
1403 	to->m_pkthdr.pf.inp = NULL;
1404 	pf_mbuf_link_inpcb(to, from->m_pkthdr.pf.inp);
1405 #endif	/* NPF > 0 */
1406 
1407 	SLIST_INIT(&to->m_pkthdr.ph_tags);
1408 
1409 	if ((error = m_tag_copy_chain(to, from, wait)) != 0)
1410 		return (error);
1411 
1412 	if ((to->m_flags & M_EXT) == 0)
1413 		to->m_data = to->m_pktdat;
1414 
1415 	return (0);
1416 }
1417 
1418 struct mbuf *
1419 m_dup_pkt(struct mbuf *m0, unsigned int adj, int wait)
1420 {
1421 	struct mbuf *m;
1422 	int len;
1423 
1424 	KASSERT(m0->m_flags & M_PKTHDR);
1425 
1426 	len = m0->m_pkthdr.len + adj;
1427 	if (len > MAXMCLBYTES) /* XXX */
1428 		return (NULL);
1429 
1430 	m = m_get(wait, m0->m_type);
1431 	if (m == NULL)
1432 		return (NULL);
1433 
1434 	if (m_dup_pkthdr(m, m0, wait) != 0)
1435 		goto fail;
1436 
1437 	if (len > MHLEN) {
1438 		MCLGETL(m, wait, len);
1439 		if (!ISSET(m->m_flags, M_EXT))
1440 			goto fail;
1441 	}
1442 
1443 	m->m_len = m->m_pkthdr.len = len;
1444 	m_adj(m, adj);
1445 	m_copydata(m0, 0, m0->m_pkthdr.len, mtod(m, caddr_t));
1446 
1447 	return (m);
1448 
1449 fail:
1450 	m_freem(m);
1451 	return (NULL);
1452 }
1453 
1454 void
1455 m_microtime(const struct mbuf *m, struct timeval *tv)
1456 {
1457 	if (ISSET(m->m_pkthdr.csum_flags, M_TIMESTAMP)) {
1458 		struct timeval btv, utv;
1459 
1460 		NSEC_TO_TIMEVAL(m->m_pkthdr.ph_timestamp, &utv);
1461 		microboottime(&btv);
1462 		timeradd(&btv, &utv, tv);
1463 	} else
1464 		microtime(tv);
1465 }
1466 
1467 void *
1468 m_pool_alloc(struct pool *pp, int flags, int *slowdown)
1469 {
1470 	void *v;
1471 
1472 	if (atomic_add_long_nv(&mbuf_mem_alloc, pp->pr_pgsize) > mbuf_mem_limit)
1473 		goto fail;
1474 
1475 	v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown);
1476 	if (v != NULL)
1477 		return (v);
1478 
1479  fail:
1480 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1481 	return (NULL);
1482 }
1483 
1484 void
1485 m_pool_free(struct pool *pp, void *v)
1486 {
1487 	(*pool_allocator_multi.pa_free)(pp, v);
1488 
1489 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1490 }
1491 
1492 void
1493 m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
1494 {
1495 	pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
1496 	pool_set_constraints(pp, &kp_dma_contig);
1497 }
1498 
1499 u_int
1500 m_pool_used(void)
1501 {
1502 	return ((mbuf_mem_alloc * 100) / mbuf_mem_limit);
1503 }
1504 
1505 #ifdef DDB
1506 void
1507 m_print(void *v,
1508     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1509 {
1510 	struct mbuf *m = v;
1511 
1512 	(*pr)("mbuf %p\n", m);
1513 	(*pr)("m_type: %i\tm_flags: %b\n", m->m_type, m->m_flags, M_BITS);
1514 	(*pr)("m_next: %p\tm_nextpkt: %p\n", m->m_next, m->m_nextpkt);
1515 	(*pr)("m_data: %p\tm_len: %u\n", m->m_data, m->m_len);
1516 	(*pr)("m_dat: %p\tm_pktdat: %p\n", m->m_dat, m->m_pktdat);
1517 	if (m->m_flags & M_PKTHDR) {
1518 		(*pr)("m_ptkhdr.ph_ifidx: %u\tm_pkthdr.len: %i\n",
1519 		    m->m_pkthdr.ph_ifidx, m->m_pkthdr.len);
1520 		(*pr)("m_ptkhdr.ph_tags: %p\tm_pkthdr.ph_tagsset: %b\n",
1521 		    SLIST_FIRST(&m->m_pkthdr.ph_tags),
1522 		    m->m_pkthdr.ph_tagsset, MTAG_BITS);
1523 		(*pr)("m_pkthdr.ph_flowid: %u\tm_pkthdr.ph_loopcnt: %u\n",
1524 		    m->m_pkthdr.ph_flowid, m->m_pkthdr.ph_loopcnt);
1525 		(*pr)("m_pkthdr.csum_flags: %b\n",
1526 		    m->m_pkthdr.csum_flags, MCS_BITS);
1527 		(*pr)("m_pkthdr.ether_vtag: %u\tm_ptkhdr.ph_rtableid: %u\n",
1528 		    m->m_pkthdr.ether_vtag, m->m_pkthdr.ph_rtableid);
1529 		(*pr)("m_pkthdr.pf.statekey: %p\tm_pkthdr.pf.inp %p\n",
1530 		    m->m_pkthdr.pf.statekey, m->m_pkthdr.pf.inp);
1531 		(*pr)("m_pkthdr.pf.qid: %u\tm_pkthdr.pf.tag: %u\n",
1532 		    m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag);
1533 		(*pr)("m_pkthdr.pf.flags: %b\n",
1534 		    m->m_pkthdr.pf.flags, MPF_BITS);
1535 		(*pr)("m_pkthdr.pf.routed: %u\tm_pkthdr.pf.prio: %u\n",
1536 		    m->m_pkthdr.pf.routed, m->m_pkthdr.pf.prio);
1537 	}
1538 	if (m->m_flags & M_EXT) {
1539 		(*pr)("m_ext.ext_buf: %p\tm_ext.ext_size: %u\n",
1540 		    m->m_ext.ext_buf, m->m_ext.ext_size);
1541 		(*pr)("m_ext.ext_free_fn: %u\tm_ext.ext_arg: %p\n",
1542 		    m->m_ext.ext_free_fn, m->m_ext.ext_arg);
1543 		(*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n",
1544 		    m->m_ext.ext_nextref, m->m_ext.ext_prevref);
1545 
1546 	}
1547 }
1548 #endif
1549 
1550 /*
1551  * mbuf lists
1552  */
1553 
1554 void
1555 ml_init(struct mbuf_list *ml)
1556 {
1557 	ml->ml_head = ml->ml_tail = NULL;
1558 	ml->ml_len = 0;
1559 }
1560 
1561 void
1562 ml_enqueue(struct mbuf_list *ml, struct mbuf *m)
1563 {
1564 	if (ml->ml_tail == NULL)
1565 		ml->ml_head = ml->ml_tail = m;
1566 	else {
1567 		ml->ml_tail->m_nextpkt = m;
1568 		ml->ml_tail = m;
1569 	}
1570 
1571 	m->m_nextpkt = NULL;
1572 	ml->ml_len++;
1573 }
1574 
1575 void
1576 ml_enlist(struct mbuf_list *mla, struct mbuf_list *mlb)
1577 {
1578 	if (!ml_empty(mlb)) {
1579 		if (ml_empty(mla))
1580 			mla->ml_head = mlb->ml_head;
1581 		else
1582 			mla->ml_tail->m_nextpkt = mlb->ml_head;
1583 		mla->ml_tail = mlb->ml_tail;
1584 		mla->ml_len += mlb->ml_len;
1585 
1586 		ml_init(mlb);
1587 	}
1588 }
1589 
1590 struct mbuf *
1591 ml_dequeue(struct mbuf_list *ml)
1592 {
1593 	struct mbuf *m;
1594 
1595 	m = ml->ml_head;
1596 	if (m != NULL) {
1597 		ml->ml_head = m->m_nextpkt;
1598 		if (ml->ml_head == NULL)
1599 			ml->ml_tail = NULL;
1600 
1601 		m->m_nextpkt = NULL;
1602 		ml->ml_len--;
1603 	}
1604 
1605 	return (m);
1606 }
1607 
1608 struct mbuf *
1609 ml_dechain(struct mbuf_list *ml)
1610 {
1611 	struct mbuf *m0;
1612 
1613 	m0 = ml->ml_head;
1614 
1615 	ml_init(ml);
1616 
1617 	return (m0);
1618 }
1619 
1620 unsigned int
1621 ml_purge(struct mbuf_list *ml)
1622 {
1623 	struct mbuf *m, *n;
1624 	unsigned int len;
1625 
1626 	for (m = ml->ml_head; m != NULL; m = n) {
1627 		n = m->m_nextpkt;
1628 		m_freem(m);
1629 	}
1630 
1631 	len = ml->ml_len;
1632 	ml_init(ml);
1633 
1634 	return (len);
1635 }
1636 
1637 unsigned int
1638 ml_hdatalen(struct mbuf_list *ml)
1639 {
1640 	struct mbuf *m;
1641 
1642 	m = ml->ml_head;
1643 	if (m == NULL)
1644 		return (0);
1645 
1646 	KASSERT(ISSET(m->m_flags, M_PKTHDR));
1647 	return (m->m_pkthdr.len);
1648 }
1649 
1650 /*
1651  * mbuf queues
1652  */
1653 
1654 void
1655 mq_init(struct mbuf_queue *mq, u_int maxlen, int ipl)
1656 {
1657 	mtx_init(&mq->mq_mtx, ipl);
1658 	ml_init(&mq->mq_list);
1659 	mq->mq_maxlen = maxlen;
1660 }
1661 
1662 int
1663 mq_push(struct mbuf_queue *mq, struct mbuf *m)
1664 {
1665 	struct mbuf *dropped = NULL;
1666 
1667 	mtx_enter(&mq->mq_mtx);
1668 	if (mq_len(mq) >= mq->mq_maxlen) {
1669 		mq->mq_drops++;
1670 		dropped = ml_dequeue(&mq->mq_list);
1671 	}
1672 	ml_enqueue(&mq->mq_list, m);
1673 	mtx_leave(&mq->mq_mtx);
1674 
1675 	if (dropped)
1676 		m_freem(dropped);
1677 
1678 	return (dropped != NULL);
1679 }
1680 
1681 int
1682 mq_enqueue(struct mbuf_queue *mq, struct mbuf *m)
1683 {
1684 	int dropped = 0;
1685 
1686 	mtx_enter(&mq->mq_mtx);
1687 	if (mq_len(mq) < mq->mq_maxlen)
1688 		ml_enqueue(&mq->mq_list, m);
1689 	else {
1690 		mq->mq_drops++;
1691 		dropped = 1;
1692 	}
1693 	mtx_leave(&mq->mq_mtx);
1694 
1695 	if (dropped)
1696 		m_freem(m);
1697 
1698 	return (dropped);
1699 }
1700 
1701 struct mbuf *
1702 mq_dequeue(struct mbuf_queue *mq)
1703 {
1704 	struct mbuf *m;
1705 
1706 	mtx_enter(&mq->mq_mtx);
1707 	m = ml_dequeue(&mq->mq_list);
1708 	mtx_leave(&mq->mq_mtx);
1709 
1710 	return (m);
1711 }
1712 
1713 int
1714 mq_enlist(struct mbuf_queue *mq, struct mbuf_list *ml)
1715 {
1716 	struct mbuf *m;
1717 	int dropped = 0;
1718 
1719 	mtx_enter(&mq->mq_mtx);
1720 	if (mq_len(mq) < mq->mq_maxlen)
1721 		ml_enlist(&mq->mq_list, ml);
1722 	else {
1723 		dropped = ml_len(ml);
1724 		mq->mq_drops += dropped;
1725 	}
1726 	mtx_leave(&mq->mq_mtx);
1727 
1728 	if (dropped) {
1729 		while ((m = ml_dequeue(ml)) != NULL)
1730 			m_freem(m);
1731 	}
1732 
1733 	return (dropped);
1734 }
1735 
1736 void
1737 mq_delist(struct mbuf_queue *mq, struct mbuf_list *ml)
1738 {
1739 	mtx_enter(&mq->mq_mtx);
1740 	*ml = mq->mq_list;
1741 	ml_init(&mq->mq_list);
1742 	mtx_leave(&mq->mq_mtx);
1743 }
1744 
1745 struct mbuf *
1746 mq_dechain(struct mbuf_queue *mq)
1747 {
1748 	struct mbuf *m0;
1749 
1750 	mtx_enter(&mq->mq_mtx);
1751 	m0 = ml_dechain(&mq->mq_list);
1752 	mtx_leave(&mq->mq_mtx);
1753 
1754 	return (m0);
1755 }
1756 
1757 unsigned int
1758 mq_purge(struct mbuf_queue *mq)
1759 {
1760 	struct mbuf_list ml;
1761 
1762 	mq_delist(mq, &ml);
1763 
1764 	return (ml_purge(&ml));
1765 }
1766 
1767 unsigned int
1768 mq_hdatalen(struct mbuf_queue *mq)
1769 {
1770 	unsigned int hdatalen;
1771 
1772 	mtx_enter(&mq->mq_mtx);
1773 	hdatalen = ml_hdatalen(&mq->mq_list);
1774 	mtx_leave(&mq->mq_mtx);
1775 
1776 	return (hdatalen);
1777 }
1778 
1779 void
1780 mq_set_maxlen(struct mbuf_queue *mq, u_int maxlen)
1781 {
1782 	mtx_enter(&mq->mq_mtx);
1783 	mq->mq_maxlen = maxlen;
1784 	mtx_leave(&mq->mq_mtx);
1785 }
1786 
1787 int
1788 sysctl_mq(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1789     void *newp, size_t newlen, struct mbuf_queue *mq)
1790 {
1791 	unsigned int maxlen;
1792 	int error;
1793 
1794 	/* All sysctl names at this level are terminal. */
1795 	if (namelen != 1)
1796 		return (ENOTDIR);
1797 
1798 	switch (name[0]) {
1799 	case IFQCTL_LEN:
1800 		return (sysctl_rdint(oldp, oldlenp, newp, mq_len(mq)));
1801 	case IFQCTL_MAXLEN:
1802 		maxlen = mq->mq_maxlen;
1803 		error = sysctl_int(oldp, oldlenp, newp, newlen, &maxlen);
1804 		if (error == 0)
1805 			mq_set_maxlen(mq, maxlen);
1806 		return (error);
1807 	case IFQCTL_DROPS:
1808 		return (sysctl_rdint(oldp, oldlenp, newp, mq_drops(mq)));
1809 	default:
1810 		return (EOPNOTSUPP);
1811 	}
1812 	/* NOTREACHED */
1813 }
1814