xref: /openbsd-src/sys/kern/uipc_mbuf.c (revision c1a45aed656e7d5627c30c92421893a76f370ccb)
1 /*	$OpenBSD: uipc_mbuf.c,v 1.283 2022/02/22 01:15:01 guenther Exp $	*/
2 /*	$NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
33  */
34 
35 /*
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  *	This product includes software developed by the University of
50  *	California, Berkeley and its contributors.
51  *	This product includes software developed at the Information
52  *	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include "pf.h"
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/atomic.h>
80 #include <sys/malloc.h>
81 #include <sys/mbuf.h>
82 #include <sys/kernel.h>
83 #include <sys/syslog.h>
84 #include <sys/pool.h>
85 #include <sys/percpu.h>
86 #include <sys/sysctl.h>
87 
88 #include <sys/socket.h>
89 #include <sys/socketvar.h>
90 #include <net/if.h>
91 
92 
93 #include <uvm/uvm_extern.h>
94 
95 #ifdef DDB
96 #include <machine/db_machdep.h>
97 #endif
98 
99 #if NPF > 0
100 #include <net/pfvar.h>
101 #endif	/* NPF > 0 */
102 
103 /* mbuf stats */
104 COUNTERS_BOOT_MEMORY(mbstat_boot, MBSTAT_COUNT);
105 struct cpumem *mbstat = COUNTERS_BOOT_INITIALIZER(mbstat_boot);
106 /* mbuf pools */
107 struct	pool mbpool;
108 struct	pool mtagpool;
109 
110 /* mbuf cluster pools */
111 u_int	mclsizes[MCLPOOLS] = {
112 	MCLBYTES,	/* must be at slot 0 */
113 	MCLBYTES + 2,	/* ETHER_ALIGNED 2k mbufs */
114 	4 * 1024,
115 	8 * 1024,
116 	9 * 1024,
117 	12 * 1024,
118 	16 * 1024,
119 	64 * 1024
120 };
121 static	char mclnames[MCLPOOLS][8];
122 struct	pool mclpools[MCLPOOLS];
123 
124 struct pool *m_clpool(u_int);
125 
126 int max_linkhdr;		/* largest link-level header */
127 int max_protohdr;		/* largest protocol header */
128 int max_hdr;			/* largest link+protocol header */
129 
130 struct	mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET);
131 
132 void	m_extfree(struct mbuf *);
133 void	m_zero(struct mbuf *);
134 
135 unsigned long mbuf_mem_limit;	/* how much memory can be allocated */
136 unsigned long mbuf_mem_alloc;	/* how much memory has been allocated */
137 
138 void	*m_pool_alloc(struct pool *, int, int *);
139 void	m_pool_free(struct pool *, void *);
140 
141 struct pool_allocator m_pool_allocator = {
142 	m_pool_alloc,
143 	m_pool_free,
144 	0 /* will be copied from pool_allocator_multi */
145 };
146 
147 static void (*mextfree_fns[4])(caddr_t, u_int, void *);
148 static u_int num_extfree_fns;
149 
150 #define M_DATABUF(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf : \
151 			(m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat)
152 #define M_SIZE(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_size : \
153 			(m)->m_flags & M_PKTHDR ? MHLEN : MLEN)
154 
155 /*
156  * Initialize the mbuf allocator.
157  */
158 void
159 mbinit(void)
160 {
161 	int i, error;
162 	unsigned int lowbits;
163 
164 	CTASSERT(MSIZE == sizeof(struct mbuf));
165 
166 	m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz;
167 
168 	mbuf_mem_alloc = 0;
169 
170 #if DIAGNOSTIC
171 	if (mclsizes[0] != MCLBYTES)
172 		panic("mbinit: the smallest cluster size != MCLBYTES");
173 	if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES)
174 		panic("mbinit: the largest cluster size != MAXMCLBYTES");
175 #endif
176 
177 	m_pool_init(&mbpool, MSIZE, 64, "mbufpl");
178 
179 	pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
180 	    IPL_NET, 0, "mtagpl", NULL);
181 
182 	for (i = 0; i < nitems(mclsizes); i++) {
183 		lowbits = mclsizes[i] & ((1 << 10) - 1);
184 		if (lowbits) {
185 			snprintf(mclnames[i], sizeof(mclnames[0]),
186 			    "mcl%dk%u", mclsizes[i] >> 10, lowbits);
187 		} else {
188 			snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
189 			    mclsizes[i] >> 10);
190 		}
191 
192 		m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]);
193 	}
194 
195 	error = nmbclust_update(nmbclust);
196 	KASSERT(error == 0);
197 
198 	(void)mextfree_register(m_extfree_pool);
199 	KASSERT(num_extfree_fns == 1);
200 }
201 
202 void
203 mbcpuinit(void)
204 {
205 	int i;
206 
207 	mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT);
208 
209 	pool_cache_init(&mbpool);
210 	pool_cache_init(&mtagpool);
211 
212 	for (i = 0; i < nitems(mclsizes); i++)
213 		pool_cache_init(&mclpools[i]);
214 }
215 
216 int
217 nmbclust_update(long newval)
218 {
219 	int i;
220 
221 	if (newval < 0 || newval > LONG_MAX / MCLBYTES)
222 		return ERANGE;
223 	/* update the global mbuf memory limit */
224 	nmbclust = newval;
225 	mbuf_mem_limit = nmbclust * MCLBYTES;
226 
227 	pool_wakeup(&mbpool);
228 	for (i = 0; i < nitems(mclsizes); i++)
229 		pool_wakeup(&mclpools[i]);
230 
231 	return 0;
232 }
233 
234 /*
235  * Space allocation routines.
236  */
237 struct mbuf *
238 m_get(int nowait, int type)
239 {
240 	struct mbuf *m;
241 	struct counters_ref cr;
242 	uint64_t *counters;
243 	int s;
244 
245 	KASSERT(type >= 0 && type < MT_NTYPES);
246 
247 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
248 	if (m == NULL)
249 		return (NULL);
250 
251 	s = splnet();
252 	counters = counters_enter(&cr, mbstat);
253 	counters[type]++;
254 	counters_leave(&cr, mbstat);
255 	splx(s);
256 
257 	m->m_type = type;
258 	m->m_next = NULL;
259 	m->m_nextpkt = NULL;
260 	m->m_data = m->m_dat;
261 	m->m_flags = 0;
262 
263 	return (m);
264 }
265 
266 /*
267  * ATTN: When changing anything here check m_inithdr() and m_defrag() those
268  * may need to change as well.
269  */
270 struct mbuf *
271 m_gethdr(int nowait, int type)
272 {
273 	struct mbuf *m;
274 	struct counters_ref cr;
275 	uint64_t *counters;
276 	int s;
277 
278 	KASSERT(type >= 0 && type < MT_NTYPES);
279 
280 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
281 	if (m == NULL)
282 		return (NULL);
283 
284 	s = splnet();
285 	counters = counters_enter(&cr, mbstat);
286 	counters[type]++;
287 	counters_leave(&cr, mbstat);
288 	splx(s);
289 
290 	m->m_type = type;
291 
292 	return (m_inithdr(m));
293 }
294 
295 struct mbuf *
296 m_inithdr(struct mbuf *m)
297 {
298 	/* keep in sync with m_gethdr */
299 	m->m_next = NULL;
300 	m->m_nextpkt = NULL;
301 	m->m_data = m->m_pktdat;
302 	m->m_flags = M_PKTHDR;
303 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
304 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
305 
306 	return (m);
307 }
308 
309 static inline void
310 m_clearhdr(struct mbuf *m)
311 {
312 	/* delete all mbuf tags to reset the state */
313 	m_tag_delete_chain(m);
314 #if NPF > 0
315 	pf_mbuf_unlink_state_key(m);
316 	pf_mbuf_unlink_inpcb(m);
317 #endif	/* NPF > 0 */
318 
319 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
320 }
321 
322 void
323 m_removehdr(struct mbuf *m)
324 {
325 	KASSERT(m->m_flags & M_PKTHDR);
326 	m_clearhdr(m);
327 	m->m_flags &= ~M_PKTHDR;
328 }
329 
330 void
331 m_resethdr(struct mbuf *m)
332 {
333 	int len = m->m_pkthdr.len;
334 	u_int8_t loopcnt = m->m_pkthdr.ph_loopcnt;
335 
336 	KASSERT(m->m_flags & M_PKTHDR);
337 	m->m_flags &= (M_EXT|M_PKTHDR|M_EOR|M_EXTWR|M_ZEROIZE);
338 	m_clearhdr(m);
339 	/* like m_inithdr(), but keep any associated data and mbufs */
340 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
341 	m->m_pkthdr.len = len;
342 	m->m_pkthdr.ph_loopcnt = loopcnt;
343 }
344 
345 void
346 m_calchdrlen(struct mbuf *m)
347 {
348 	struct mbuf *n;
349 	int plen = 0;
350 
351 	KASSERT(m->m_flags & M_PKTHDR);
352 	for (n = m; n; n = n->m_next)
353 		plen += n->m_len;
354 	m->m_pkthdr.len = plen;
355 }
356 
357 struct mbuf *
358 m_getclr(int nowait, int type)
359 {
360 	struct mbuf *m;
361 
362 	MGET(m, nowait, type);
363 	if (m == NULL)
364 		return (NULL);
365 	memset(mtod(m, caddr_t), 0, MLEN);
366 	return (m);
367 }
368 
369 struct pool *
370 m_clpool(u_int pktlen)
371 {
372 	struct pool *pp;
373 	int pi;
374 
375 	for (pi = 0; pi < nitems(mclpools); pi++) {
376 		pp = &mclpools[pi];
377 		if (pktlen <= pp->pr_size)
378 			return (pp);
379 	}
380 
381 	return (NULL);
382 }
383 
384 struct mbuf *
385 m_clget(struct mbuf *m, int how, u_int pktlen)
386 {
387 	struct mbuf *m0 = NULL;
388 	struct pool *pp;
389 	caddr_t buf;
390 
391 	pp = m_clpool(pktlen);
392 #ifdef DIAGNOSTIC
393 	if (pp == NULL)
394 		panic("m_clget: request for %u byte cluster", pktlen);
395 #endif
396 
397 	if (m == NULL) {
398 		m0 = m_gethdr(how, MT_DATA);
399 		if (m0 == NULL)
400 			return (NULL);
401 
402 		m = m0;
403 	}
404 	buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT);
405 	if (buf == NULL) {
406 		m_freem(m0);
407 		return (NULL);
408 	}
409 
410 	MEXTADD(m, buf, pp->pr_size, M_EXTWR, MEXTFREE_POOL, pp);
411 	return (m);
412 }
413 
414 void
415 m_extfree_pool(caddr_t buf, u_int size, void *pp)
416 {
417 	pool_put(pp, buf);
418 }
419 
420 struct mbuf *
421 m_free(struct mbuf *m)
422 {
423 	struct mbuf *n;
424 	struct counters_ref cr;
425 	uint64_t *counters;
426 	int s;
427 
428 	if (m == NULL)
429 		return (NULL);
430 
431 	s = splnet();
432 	counters = counters_enter(&cr, mbstat);
433 	counters[m->m_type]--;
434 	counters_leave(&cr, mbstat);
435 	splx(s);
436 
437 	n = m->m_next;
438 	if (m->m_flags & M_ZEROIZE) {
439 		m_zero(m);
440 		/* propagate M_ZEROIZE to the next mbuf in the chain */
441 		if (n)
442 			n->m_flags |= M_ZEROIZE;
443 	}
444 	if (m->m_flags & M_PKTHDR) {
445 		m_tag_delete_chain(m);
446 #if NPF > 0
447 		pf_mbuf_unlink_state_key(m);
448 		pf_mbuf_unlink_inpcb(m);
449 #endif	/* NPF > 0 */
450 	}
451 	if (m->m_flags & M_EXT)
452 		m_extfree(m);
453 
454 	pool_put(&mbpool, m);
455 
456 	return (n);
457 }
458 
459 void
460 m_extref(struct mbuf *o, struct mbuf *n)
461 {
462 	int refs = MCLISREFERENCED(o);
463 
464 	n->m_flags |= o->m_flags & (M_EXT|M_EXTWR);
465 
466 	if (refs)
467 		mtx_enter(&m_extref_mtx);
468 	n->m_ext.ext_nextref = o->m_ext.ext_nextref;
469 	n->m_ext.ext_prevref = o;
470 	o->m_ext.ext_nextref = n;
471 	n->m_ext.ext_nextref->m_ext.ext_prevref = n;
472 	if (refs)
473 		mtx_leave(&m_extref_mtx);
474 
475 	MCLREFDEBUGN((n), __FILE__, __LINE__);
476 }
477 
478 static inline u_int
479 m_extunref(struct mbuf *m)
480 {
481 	int refs = 0;
482 
483 	if (!MCLISREFERENCED(m))
484 		return (0);
485 
486 	mtx_enter(&m_extref_mtx);
487 	if (MCLISREFERENCED(m)) {
488 		m->m_ext.ext_nextref->m_ext.ext_prevref =
489 		    m->m_ext.ext_prevref;
490 		m->m_ext.ext_prevref->m_ext.ext_nextref =
491 		    m->m_ext.ext_nextref;
492 		refs = 1;
493 	}
494 	mtx_leave(&m_extref_mtx);
495 
496 	return (refs);
497 }
498 
499 /*
500  * Returns a number for use with MEXTADD.
501  * Should only be called once per function.
502  * Drivers can be assured that the index will be non zero.
503  */
504 u_int
505 mextfree_register(void (*fn)(caddr_t, u_int, void *))
506 {
507 	KASSERT(num_extfree_fns < nitems(mextfree_fns));
508 	mextfree_fns[num_extfree_fns] = fn;
509 	return num_extfree_fns++;
510 }
511 
512 void
513 m_extfree(struct mbuf *m)
514 {
515 	if (m_extunref(m) == 0) {
516 		KASSERT(m->m_ext.ext_free_fn < num_extfree_fns);
517 		mextfree_fns[m->m_ext.ext_free_fn](m->m_ext.ext_buf,
518 		    m->m_ext.ext_size, m->m_ext.ext_arg);
519 	}
520 
521 	m->m_flags &= ~(M_EXT|M_EXTWR);
522 }
523 
524 struct mbuf *
525 m_freem(struct mbuf *m)
526 {
527 	struct mbuf *n;
528 
529 	if (m == NULL)
530 		return (NULL);
531 
532 	n = m->m_nextpkt;
533 
534 	do
535 		m = m_free(m);
536 	while (m != NULL);
537 
538 	return (n);
539 }
540 
541 void
542 m_purge(struct mbuf *m)
543 {
544 	while (m != NULL)
545 		m = m_freem(m);
546 }
547 
548 /*
549  * mbuf chain defragmenter. This function uses some evil tricks to defragment
550  * an mbuf chain into a single buffer without changing the mbuf pointer.
551  * This needs to know a lot of the mbuf internals to make this work.
552  */
553 int
554 m_defrag(struct mbuf *m, int how)
555 {
556 	struct mbuf *m0;
557 
558 	if (m->m_next == NULL)
559 		return (0);
560 
561 	KASSERT(m->m_flags & M_PKTHDR);
562 
563 	if ((m0 = m_gethdr(how, m->m_type)) == NULL)
564 		return (ENOBUFS);
565 	if (m->m_pkthdr.len > MHLEN) {
566 		MCLGETL(m0, how, m->m_pkthdr.len);
567 		if (!(m0->m_flags & M_EXT)) {
568 			m_free(m0);
569 			return (ENOBUFS);
570 		}
571 	}
572 	m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
573 	m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
574 
575 	/* free chain behind and possible ext buf on the first mbuf */
576 	m_freem(m->m_next);
577 	m->m_next = NULL;
578 	if (m->m_flags & M_EXT)
579 		m_extfree(m);
580 
581 	/*
582 	 * Bounce copy mbuf over to the original mbuf and set everything up.
583 	 * This needs to reset or clear all pointers that may go into the
584 	 * original mbuf chain.
585 	 */
586 	if (m0->m_flags & M_EXT) {
587 		memcpy(&m->m_ext, &m0->m_ext, sizeof(struct mbuf_ext));
588 		MCLINITREFERENCE(m);
589 		m->m_flags |= m0->m_flags & (M_EXT|M_EXTWR);
590 		m->m_data = m->m_ext.ext_buf;
591 	} else {
592 		m->m_data = m->m_pktdat;
593 		memcpy(m->m_data, m0->m_data, m0->m_len);
594 	}
595 	m->m_pkthdr.len = m->m_len = m0->m_len;
596 
597 	m0->m_flags &= ~(M_EXT|M_EXTWR);	/* cluster is gone */
598 	m_free(m0);
599 
600 	return (0);
601 }
602 
603 /*
604  * Mbuffer utility routines.
605  */
606 
607 /*
608  * Ensure len bytes of contiguous space at the beginning of the mbuf chain
609  */
610 struct mbuf *
611 m_prepend(struct mbuf *m, int len, int how)
612 {
613 	struct mbuf *mn;
614 
615 	if (len > MHLEN)
616 		panic("mbuf prepend length too big");
617 
618 	if (m_leadingspace(m) >= len) {
619 		m->m_data -= len;
620 		m->m_len += len;
621 	} else {
622 		MGET(mn, how, m->m_type);
623 		if (mn == NULL) {
624 			m_freem(m);
625 			return (NULL);
626 		}
627 		if (m->m_flags & M_PKTHDR)
628 			M_MOVE_PKTHDR(mn, m);
629 		mn->m_next = m;
630 		m = mn;
631 		m_align(m, len);
632 		m->m_len = len;
633 	}
634 	if (m->m_flags & M_PKTHDR)
635 		m->m_pkthdr.len += len;
636 	return (m);
637 }
638 
639 /*
640  * Make a copy of an mbuf chain starting "off" bytes from the beginning,
641  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
642  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
643  */
644 struct mbuf *
645 m_copym(struct mbuf *m0, int off, int len, int wait)
646 {
647 	struct mbuf *m, *n, **np;
648 	struct mbuf *top;
649 	int copyhdr = 0;
650 
651 	if (off < 0 || len < 0)
652 		panic("m_copym0: off %d, len %d", off, len);
653 	if (off == 0 && m0->m_flags & M_PKTHDR)
654 		copyhdr = 1;
655 	if ((m = m_getptr(m0, off, &off)) == NULL)
656 		panic("m_copym0: short mbuf chain");
657 	np = &top;
658 	top = NULL;
659 	while (len > 0) {
660 		if (m == NULL) {
661 			if (len != M_COPYALL)
662 				panic("m_copym0: m == NULL and not COPYALL");
663 			break;
664 		}
665 		MGET(n, wait, m->m_type);
666 		*np = n;
667 		if (n == NULL)
668 			goto nospace;
669 		if (copyhdr) {
670 			if (m_dup_pkthdr(n, m0, wait))
671 				goto nospace;
672 			if (len != M_COPYALL)
673 				n->m_pkthdr.len = len;
674 			copyhdr = 0;
675 		}
676 		n->m_len = min(len, m->m_len - off);
677 		if (m->m_flags & M_EXT) {
678 			n->m_data = m->m_data + off;
679 			n->m_ext = m->m_ext;
680 			MCLADDREFERENCE(m, n);
681 		} else {
682 			n->m_data += m->m_data -
683 			    (m->m_flags & M_PKTHDR ? m->m_pktdat : m->m_dat);
684 			n->m_data += off;
685 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
686 			    n->m_len);
687 		}
688 		if (len != M_COPYALL)
689 			len -= n->m_len;
690 		off += n->m_len;
691 #ifdef DIAGNOSTIC
692 		if (off > m->m_len)
693 			panic("m_copym0 overrun");
694 #endif
695 		if (off == m->m_len) {
696 			m = m->m_next;
697 			off = 0;
698 		}
699 		np = &n->m_next;
700 	}
701 	return (top);
702 nospace:
703 	m_freem(top);
704 	return (NULL);
705 }
706 
707 /*
708  * Copy data from an mbuf chain starting "off" bytes from the beginning,
709  * continuing for "len" bytes, into the indicated buffer.
710  */
711 void
712 m_copydata(struct mbuf *m, int off, int len, void *p)
713 {
714 	caddr_t cp = p;
715 	unsigned count;
716 
717 	if (off < 0)
718 		panic("m_copydata: off %d < 0", off);
719 	if (len < 0)
720 		panic("m_copydata: len %d < 0", len);
721 	if ((m = m_getptr(m, off, &off)) == NULL)
722 		panic("m_copydata: short mbuf chain");
723 	while (len > 0) {
724 		if (m == NULL)
725 			panic("m_copydata: null mbuf");
726 		count = min(m->m_len - off, len);
727 		memmove(cp, mtod(m, caddr_t) + off, count);
728 		len -= count;
729 		cp += count;
730 		off = 0;
731 		m = m->m_next;
732 	}
733 }
734 
735 /*
736  * Copy data from a buffer back into the indicated mbuf chain,
737  * starting "off" bytes from the beginning, extending the mbuf
738  * chain if necessary. The mbuf needs to be properly initialized
739  * including the setting of m_len.
740  */
741 int
742 m_copyback(struct mbuf *m0, int off, int len, const void *_cp, int wait)
743 {
744 	int mlen, totlen = 0;
745 	struct mbuf *m = m0, *n;
746 	caddr_t cp = (caddr_t)_cp;
747 	int error = 0;
748 
749 	if (m0 == NULL)
750 		return (0);
751 	while (off > (mlen = m->m_len)) {
752 		off -= mlen;
753 		totlen += mlen;
754 		if (m->m_next == NULL) {
755 			if ((n = m_get(wait, m->m_type)) == NULL) {
756 				error = ENOBUFS;
757 				goto out;
758 			}
759 
760 			if (off + len > MLEN) {
761 				MCLGETL(n, wait, off + len);
762 				if (!(n->m_flags & M_EXT)) {
763 					m_free(n);
764 					error = ENOBUFS;
765 					goto out;
766 				}
767 			}
768 			memset(mtod(n, caddr_t), 0, off);
769 			n->m_len = len + off;
770 			m->m_next = n;
771 		}
772 		m = m->m_next;
773 	}
774 	while (len > 0) {
775 		/* extend last packet to be filled fully */
776 		if (m->m_next == NULL && (len > m->m_len - off))
777 			m->m_len += min(len - (m->m_len - off),
778 			    m_trailingspace(m));
779 		mlen = min(m->m_len - off, len);
780 		memmove(mtod(m, caddr_t) + off, cp, mlen);
781 		cp += mlen;
782 		len -= mlen;
783 		totlen += mlen + off;
784 		if (len == 0)
785 			break;
786 		off = 0;
787 
788 		if (m->m_next == NULL) {
789 			if ((n = m_get(wait, m->m_type)) == NULL) {
790 				error = ENOBUFS;
791 				goto out;
792 			}
793 
794 			if (len > MLEN) {
795 				MCLGETL(n, wait, len);
796 				if (!(n->m_flags & M_EXT)) {
797 					m_free(n);
798 					error = ENOBUFS;
799 					goto out;
800 				}
801 			}
802 			n->m_len = len;
803 			m->m_next = n;
804 		}
805 		m = m->m_next;
806 	}
807 out:
808 	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
809 		m->m_pkthdr.len = totlen;
810 
811 	return (error);
812 }
813 
814 /*
815  * Concatenate mbuf chain n to m.
816  * n might be copied into m (when n->m_len is small), therefore data portion of
817  * n could be copied into an mbuf of different mbuf type.
818  * Therefore both chains should be of the same type (e.g. MT_DATA).
819  * Any m_pkthdr is not updated.
820  */
821 void
822 m_cat(struct mbuf *m, struct mbuf *n)
823 {
824 	while (m->m_next)
825 		m = m->m_next;
826 	while (n) {
827 		if (M_READONLY(m) || n->m_len > m_trailingspace(m)) {
828 			/* just join the two chains */
829 			m->m_next = n;
830 			return;
831 		}
832 		/* splat the data from one into the other */
833 		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
834 		    n->m_len);
835 		m->m_len += n->m_len;
836 		n = m_free(n);
837 	}
838 }
839 
840 void
841 m_adj(struct mbuf *mp, int req_len)
842 {
843 	int len = req_len;
844 	struct mbuf *m;
845 	int count;
846 
847 	if (mp == NULL)
848 		return;
849 	if (len >= 0) {
850 		/*
851 		 * Trim from head.
852 		 */
853 		m = mp;
854 		while (m != NULL && len > 0) {
855 			if (m->m_len <= len) {
856 				len -= m->m_len;
857 				m->m_data += m->m_len;
858 				m->m_len = 0;
859 				m = m->m_next;
860 			} else {
861 				m->m_data += len;
862 				m->m_len -= len;
863 				len = 0;
864 			}
865 		}
866 		if (mp->m_flags & M_PKTHDR)
867 			mp->m_pkthdr.len -= (req_len - len);
868 	} else {
869 		/*
870 		 * Trim from tail.  Scan the mbuf chain,
871 		 * calculating its length and finding the last mbuf.
872 		 * If the adjustment only affects this mbuf, then just
873 		 * adjust and return.  Otherwise, rescan and truncate
874 		 * after the remaining size.
875 		 */
876 		len = -len;
877 		count = 0;
878 		m = mp;
879 		for (;;) {
880 			count += m->m_len;
881 			if (m->m_next == NULL)
882 				break;
883 			m = m->m_next;
884 		}
885 		if (m->m_len >= len) {
886 			m->m_len -= len;
887 			if (mp->m_flags & M_PKTHDR)
888 				mp->m_pkthdr.len -= len;
889 			return;
890 		}
891 		count -= len;
892 		if (count < 0)
893 			count = 0;
894 		/*
895 		 * Correct length for chain is "count".
896 		 * Find the mbuf with last data, adjust its length,
897 		 * and toss data from remaining mbufs on chain.
898 		 */
899 		if (mp->m_flags & M_PKTHDR)
900 			mp->m_pkthdr.len = count;
901 		m = mp;
902 		for (;;) {
903 			if (m->m_len >= count) {
904 				m->m_len = count;
905 				break;
906 			}
907 			count -= m->m_len;
908 			m = m->m_next;
909 		}
910 		while ((m = m->m_next) != NULL)
911 			m->m_len = 0;
912 	}
913 }
914 
915 /*
916  * Rearrange an mbuf chain so that len bytes are contiguous
917  * and in the data area of an mbuf (so that mtod will work
918  * for a structure of size len).  Returns the resulting
919  * mbuf chain on success, frees it and returns null on failure.
920  */
921 struct mbuf *
922 m_pullup(struct mbuf *m0, int len)
923 {
924 	struct mbuf *m;
925 	unsigned int adj;
926 	caddr_t head, tail;
927 	unsigned int space;
928 
929 	/* if len is already contig in m0, then don't do any work */
930 	if (len <= m0->m_len)
931 		return (m0);
932 
933 	/* look for some data */
934 	m = m0->m_next;
935 	if (m == NULL)
936 		goto freem0;
937 
938 	head = M_DATABUF(m0);
939 	if (m0->m_len == 0) {
940 		while (m->m_len == 0) {
941 			m = m_free(m);
942 			if (m == NULL)
943 				goto freem0;
944 		}
945 
946 		adj = mtod(m, unsigned long) & (sizeof(long) - 1);
947 	} else
948 		adj = mtod(m0, unsigned long) & (sizeof(long) - 1);
949 
950 	tail = head + M_SIZE(m0);
951 	head += adj;
952 
953 	if (!M_READONLY(m0) && len <= tail - head) {
954 		/* we can copy everything into the first mbuf */
955 		if (m0->m_len == 0) {
956 			m0->m_data = head;
957 		} else if (len > tail - mtod(m0, caddr_t)) {
958 			/* need to memmove to make space at the end */
959 			memmove(head, mtod(m0, caddr_t), m0->m_len);
960 			m0->m_data = head;
961 		}
962 
963 		len -= m0->m_len;
964 	} else {
965 		/* the first mbuf is too small or read-only, make a new one */
966 		space = adj + len;
967 
968 		if (space > MAXMCLBYTES)
969 			goto bad;
970 
971 		m0->m_next = m;
972 		m = m0;
973 
974 		MGET(m0, M_DONTWAIT, m->m_type);
975 		if (m0 == NULL)
976 			goto bad;
977 
978 		if (space > MHLEN) {
979 			MCLGETL(m0, M_DONTWAIT, space);
980 			if ((m0->m_flags & M_EXT) == 0)
981 				goto bad;
982 		}
983 
984 		if (m->m_flags & M_PKTHDR)
985 			M_MOVE_PKTHDR(m0, m);
986 
987 		m0->m_len = 0;
988 		m0->m_data += adj;
989 	}
990 
991 	KDASSERT(m_trailingspace(m0) >= len);
992 
993 	for (;;) {
994 		space = min(len, m->m_len);
995 		memcpy(mtod(m0, caddr_t) + m0->m_len, mtod(m, caddr_t), space);
996 		len -= space;
997 		m0->m_len += space;
998 		m->m_len -= space;
999 
1000 		if (m->m_len > 0)
1001 			m->m_data += space;
1002 		else
1003 			m = m_free(m);
1004 
1005 		if (len == 0)
1006 			break;
1007 
1008 		if (m == NULL)
1009 			goto bad;
1010 	}
1011 
1012 	m0->m_next = m; /* link the chain back up */
1013 
1014 	return (m0);
1015 
1016 bad:
1017 	m_freem(m);
1018 freem0:
1019 	m_free(m0);
1020 	return (NULL);
1021 }
1022 
1023 /*
1024  * Return a pointer to mbuf/offset of location in mbuf chain.
1025  */
1026 struct mbuf *
1027 m_getptr(struct mbuf *m, int loc, int *off)
1028 {
1029 	while (loc >= 0) {
1030 		/* Normal end of search */
1031 		if (m->m_len > loc) {
1032 			*off = loc;
1033 			return (m);
1034 		} else {
1035 			loc -= m->m_len;
1036 
1037 			if (m->m_next == NULL) {
1038 				if (loc == 0) {
1039 					/* Point at the end of valid data */
1040 					*off = m->m_len;
1041 					return (m);
1042 				} else {
1043 					return (NULL);
1044 				}
1045 			} else {
1046 				m = m->m_next;
1047 			}
1048 		}
1049 	}
1050 
1051 	return (NULL);
1052 }
1053 
1054 /*
1055  * Partition an mbuf chain in two pieces, returning the tail --
1056  * all but the first len0 bytes.  In case of failure, it returns NULL and
1057  * attempts to restore the chain to its original state.
1058  */
1059 struct mbuf *
1060 m_split(struct mbuf *m0, int len0, int wait)
1061 {
1062 	struct mbuf *m, *n;
1063 	unsigned len = len0, remain, olen;
1064 
1065 	for (m = m0; m && len > m->m_len; m = m->m_next)
1066 		len -= m->m_len;
1067 	if (m == NULL)
1068 		return (NULL);
1069 	remain = m->m_len - len;
1070 	if (m0->m_flags & M_PKTHDR) {
1071 		MGETHDR(n, wait, m0->m_type);
1072 		if (n == NULL)
1073 			return (NULL);
1074 		if (m_dup_pkthdr(n, m0, wait)) {
1075 			m_freem(n);
1076 			return (NULL);
1077 		}
1078 		n->m_pkthdr.len -= len0;
1079 		olen = m0->m_pkthdr.len;
1080 		m0->m_pkthdr.len = len0;
1081 		if (remain == 0) {
1082 			n->m_next = m->m_next;
1083 			m->m_next = NULL;
1084 			n->m_len = 0;
1085 			return (n);
1086 		}
1087 		if (m->m_flags & M_EXT)
1088 			goto extpacket;
1089 		if (remain > MHLEN) {
1090 			/* m can't be the lead packet */
1091 			m_align(n, 0);
1092 			n->m_next = m_split(m, len, wait);
1093 			if (n->m_next == NULL) {
1094 				(void) m_free(n);
1095 				m0->m_pkthdr.len = olen;
1096 				return (NULL);
1097 			} else {
1098 				n->m_len = 0;
1099 				return (n);
1100 			}
1101 		} else
1102 			m_align(n, remain);
1103 	} else if (remain == 0) {
1104 		n = m->m_next;
1105 		m->m_next = NULL;
1106 		return (n);
1107 	} else {
1108 		MGET(n, wait, m->m_type);
1109 		if (n == NULL)
1110 			return (NULL);
1111 		m_align(n, remain);
1112 	}
1113 extpacket:
1114 	if (m->m_flags & M_EXT) {
1115 		n->m_ext = m->m_ext;
1116 		MCLADDREFERENCE(m, n);
1117 		n->m_data = m->m_data + len;
1118 	} else {
1119 		memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
1120 	}
1121 	n->m_len = remain;
1122 	m->m_len = len;
1123 	n->m_next = m->m_next;
1124 	m->m_next = NULL;
1125 	return (n);
1126 }
1127 
1128 /*
1129  * Make space for a new header of length hlen at skip bytes
1130  * into the packet.  When doing this we allocate new mbufs only
1131  * when absolutely necessary.  The mbuf where the new header
1132  * is to go is returned together with an offset into the mbuf.
1133  * If NULL is returned then the mbuf chain may have been modified;
1134  * the caller is assumed to always free the chain.
1135  */
1136 struct mbuf *
1137 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
1138 {
1139 	struct mbuf *m;
1140 	unsigned remain;
1141 
1142 	KASSERT(m0->m_flags & M_PKTHDR);
1143 	/*
1144 	 * Limit the size of the new header to MHLEN. In case
1145 	 * skip = 0 and the first buffer is not a cluster this
1146 	 * is the maximum space available in that mbuf.
1147 	 * In other words this code never prepends a mbuf.
1148 	 */
1149 	KASSERT(hlen < MHLEN);
1150 
1151 	for (m = m0; m && skip > m->m_len; m = m->m_next)
1152 		skip -= m->m_len;
1153 	if (m == NULL)
1154 		return (NULL);
1155 	/*
1156 	 * At this point skip is the offset into the mbuf m
1157 	 * where the new header should be placed.  Figure out
1158 	 * if there's space to insert the new header.  If so,
1159 	 * and copying the remainder makes sense then do so.
1160 	 * Otherwise insert a new mbuf in the chain, splitting
1161 	 * the contents of m as needed.
1162 	 */
1163 	remain = m->m_len - skip;		/* data to move */
1164 	if (skip < remain && hlen <= m_leadingspace(m)) {
1165 		if (skip)
1166 			memmove(m->m_data-hlen, m->m_data, skip);
1167 		m->m_data -= hlen;
1168 		m->m_len += hlen;
1169 		*off = skip;
1170 	} else if (hlen > m_trailingspace(m)) {
1171 		struct mbuf *n;
1172 
1173 		if (remain > 0) {
1174 			MGET(n, M_DONTWAIT, m->m_type);
1175 			if (n && remain > MLEN) {
1176 				MCLGETL(n, M_DONTWAIT, remain);
1177 				if ((n->m_flags & M_EXT) == 0) {
1178 					m_free(n);
1179 					n = NULL;
1180 				}
1181 			}
1182 			if (n == NULL)
1183 				return (NULL);
1184 
1185 			memcpy(n->m_data, mtod(m, char *) + skip, remain);
1186 			n->m_len = remain;
1187 			m->m_len -= remain;
1188 
1189 			n->m_next = m->m_next;
1190 			m->m_next = n;
1191 		}
1192 
1193 		if (hlen <= m_trailingspace(m)) {
1194 			m->m_len += hlen;
1195 			*off = skip;
1196 		} else {
1197 			n = m_get(M_DONTWAIT, m->m_type);
1198 			if (n == NULL)
1199 				return NULL;
1200 
1201 			n->m_len = hlen;
1202 
1203 			n->m_next = m->m_next;
1204 			m->m_next = n;
1205 
1206 			*off = 0;	/* header is at front ... */
1207 			m = n;		/* ... of new mbuf */
1208 		}
1209 	} else {
1210 		/*
1211 		 * Copy the remainder to the back of the mbuf
1212 		 * so there's space to write the new header.
1213 		 */
1214 		if (remain > 0)
1215 			memmove(mtod(m, caddr_t) + skip + hlen,
1216 			      mtod(m, caddr_t) + skip, remain);
1217 		m->m_len += hlen;
1218 		*off = skip;
1219 	}
1220 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
1221 	return m;
1222 }
1223 
1224 
1225 /*
1226  * Routine to copy from device local memory into mbufs.
1227  */
1228 struct mbuf *
1229 m_devget(char *buf, int totlen, int off)
1230 {
1231 	struct mbuf	*m;
1232 	struct mbuf	*top, **mp;
1233 	int		 len;
1234 
1235 	top = NULL;
1236 	mp = &top;
1237 
1238 	if (off < 0 || off > MHLEN)
1239 		return (NULL);
1240 
1241 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1242 	if (m == NULL)
1243 		return (NULL);
1244 
1245 	m->m_pkthdr.len = totlen;
1246 
1247 	len = MHLEN;
1248 
1249 	while (totlen > 0) {
1250 		if (top != NULL) {
1251 			MGET(m, M_DONTWAIT, MT_DATA);
1252 			if (m == NULL) {
1253 				/*
1254 				 * As we might get called by pfkey, make sure
1255 				 * we do not leak sensitive data.
1256 				 */
1257 				top->m_flags |= M_ZEROIZE;
1258 				m_freem(top);
1259 				return (NULL);
1260 			}
1261 			len = MLEN;
1262 		}
1263 
1264 		if (totlen + off >= MINCLSIZE) {
1265 			MCLGET(m, M_DONTWAIT);
1266 			if (m->m_flags & M_EXT)
1267 				len = MCLBYTES;
1268 		} else {
1269 			/* Place initial small packet/header at end of mbuf. */
1270 			if (top == NULL && totlen + off + max_linkhdr <= len) {
1271 				m->m_data += max_linkhdr;
1272 				len -= max_linkhdr;
1273 			}
1274 		}
1275 
1276 		if (off) {
1277 			m->m_data += off;
1278 			len -= off;
1279 			off = 0;
1280 		}
1281 
1282 		m->m_len = len = min(totlen, len);
1283 		memcpy(mtod(m, void *), buf, (size_t)len);
1284 
1285 		buf += len;
1286 		*mp = m;
1287 		mp = &m->m_next;
1288 		totlen -= len;
1289 	}
1290 	return (top);
1291 }
1292 
1293 void
1294 m_zero(struct mbuf *m)
1295 {
1296 	if (M_READONLY(m)) {
1297 		mtx_enter(&m_extref_mtx);
1298 		if ((m->m_flags & M_EXT) && MCLISREFERENCED(m)) {
1299 			m->m_ext.ext_nextref->m_flags |= M_ZEROIZE;
1300 			m->m_ext.ext_prevref->m_flags |= M_ZEROIZE;
1301 		}
1302 		mtx_leave(&m_extref_mtx);
1303 		return;
1304 	}
1305 
1306 	explicit_bzero(M_DATABUF(m), M_SIZE(m));
1307 }
1308 
1309 /*
1310  * Apply function f to the data in an mbuf chain starting "off" bytes from the
1311  * beginning, continuing for "len" bytes.
1312  */
1313 int
1314 m_apply(struct mbuf *m, int off, int len,
1315     int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate)
1316 {
1317 	int rval;
1318 	unsigned int count;
1319 
1320 	if (len < 0)
1321 		panic("m_apply: len %d < 0", len);
1322 	if (off < 0)
1323 		panic("m_apply: off %d < 0", off);
1324 	while (off > 0) {
1325 		if (m == NULL)
1326 			panic("m_apply: null mbuf in skip");
1327 		if (off < m->m_len)
1328 			break;
1329 		off -= m->m_len;
1330 		m = m->m_next;
1331 	}
1332 	while (len > 0) {
1333 		if (m == NULL)
1334 			panic("m_apply: null mbuf");
1335 		count = min(m->m_len - off, len);
1336 
1337 		rval = f(fstate, mtod(m, caddr_t) + off, count);
1338 		if (rval)
1339 			return (rval);
1340 
1341 		len -= count;
1342 		off = 0;
1343 		m = m->m_next;
1344 	}
1345 
1346 	return (0);
1347 }
1348 
1349 /*
1350  * Compute the amount of space available before the current start of data
1351  * in an mbuf. Read-only clusters never have space available.
1352  */
1353 int
1354 m_leadingspace(struct mbuf *m)
1355 {
1356 	if (M_READONLY(m))
1357 		return 0;
1358 	KASSERT(m->m_data >= M_DATABUF(m));
1359 	return m->m_data - M_DATABUF(m);
1360 }
1361 
1362 /*
1363  * Compute the amount of space available after the end of data in an mbuf.
1364  * Read-only clusters never have space available.
1365  */
1366 int
1367 m_trailingspace(struct mbuf *m)
1368 {
1369 	if (M_READONLY(m))
1370 		return 0;
1371 	KASSERT(M_DATABUF(m) + M_SIZE(m) >= (m->m_data + m->m_len));
1372 	return M_DATABUF(m) + M_SIZE(m) - (m->m_data + m->m_len);
1373 }
1374 
1375 /*
1376  * Set the m_data pointer of a newly-allocated mbuf to place an object of
1377  * the specified size at the end of the mbuf, longword aligned.
1378  */
1379 void
1380 m_align(struct mbuf *m, int len)
1381 {
1382 	KASSERT(len >= 0 && !M_READONLY(m));
1383 	KASSERT(m->m_data == M_DATABUF(m));	/* newly-allocated check */
1384 	KASSERT(((len + sizeof(long) - 1) &~ (sizeof(long) - 1)) <= M_SIZE(m));
1385 
1386 	m->m_data = M_DATABUF(m) + ((M_SIZE(m) - (len)) &~ (sizeof(long) - 1));
1387 }
1388 
1389 /*
1390  * Duplicate mbuf pkthdr from from to to.
1391  * from must have M_PKTHDR set, and to must be empty.
1392  */
1393 int
1394 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int wait)
1395 {
1396 	int error;
1397 
1398 	KASSERT(from->m_flags & M_PKTHDR);
1399 
1400 	to->m_flags = (to->m_flags & (M_EXT | M_EXTWR));
1401 	to->m_flags |= (from->m_flags & M_COPYFLAGS);
1402 	to->m_pkthdr = from->m_pkthdr;
1403 
1404 #if NPF > 0
1405 	to->m_pkthdr.pf.statekey = NULL;
1406 	pf_mbuf_link_state_key(to, from->m_pkthdr.pf.statekey);
1407 	to->m_pkthdr.pf.inp = NULL;
1408 	pf_mbuf_link_inpcb(to, from->m_pkthdr.pf.inp);
1409 #endif	/* NPF > 0 */
1410 
1411 	SLIST_INIT(&to->m_pkthdr.ph_tags);
1412 
1413 	if ((error = m_tag_copy_chain(to, from, wait)) != 0)
1414 		return (error);
1415 
1416 	if ((to->m_flags & M_EXT) == 0)
1417 		to->m_data = to->m_pktdat;
1418 
1419 	return (0);
1420 }
1421 
1422 struct mbuf *
1423 m_dup_pkt(struct mbuf *m0, unsigned int adj, int wait)
1424 {
1425 	struct mbuf *m;
1426 	int len;
1427 
1428 	KASSERT(m0->m_flags & M_PKTHDR);
1429 
1430 	len = m0->m_pkthdr.len + adj;
1431 	if (len > MAXMCLBYTES) /* XXX */
1432 		return (NULL);
1433 
1434 	m = m_get(wait, m0->m_type);
1435 	if (m == NULL)
1436 		return (NULL);
1437 
1438 	if (m_dup_pkthdr(m, m0, wait) != 0)
1439 		goto fail;
1440 
1441 	if (len > MHLEN) {
1442 		MCLGETL(m, wait, len);
1443 		if (!ISSET(m->m_flags, M_EXT))
1444 			goto fail;
1445 	}
1446 
1447 	m->m_len = m->m_pkthdr.len = len;
1448 	m_adj(m, adj);
1449 	m_copydata(m0, 0, m0->m_pkthdr.len, mtod(m, caddr_t));
1450 
1451 	return (m);
1452 
1453 fail:
1454 	m_freem(m);
1455 	return (NULL);
1456 }
1457 
1458 void
1459 m_microtime(const struct mbuf *m, struct timeval *tv)
1460 {
1461 	if (ISSET(m->m_pkthdr.csum_flags, M_TIMESTAMP)) {
1462 		struct timeval btv, utv;
1463 
1464 		NSEC_TO_TIMEVAL(m->m_pkthdr.ph_timestamp, &utv);
1465 		microboottime(&btv);
1466 		timeradd(&btv, &utv, tv);
1467 	} else
1468 		microtime(tv);
1469 }
1470 
1471 void *
1472 m_pool_alloc(struct pool *pp, int flags, int *slowdown)
1473 {
1474 	void *v;
1475 
1476 	if (atomic_add_long_nv(&mbuf_mem_alloc, pp->pr_pgsize) > mbuf_mem_limit)
1477 		goto fail;
1478 
1479 	v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown);
1480 	if (v != NULL)
1481 		return (v);
1482 
1483  fail:
1484 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1485 	return (NULL);
1486 }
1487 
1488 void
1489 m_pool_free(struct pool *pp, void *v)
1490 {
1491 	(*pool_allocator_multi.pa_free)(pp, v);
1492 
1493 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1494 }
1495 
1496 void
1497 m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
1498 {
1499 	pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
1500 	pool_set_constraints(pp, &kp_dma_contig);
1501 }
1502 
1503 u_int
1504 m_pool_used(void)
1505 {
1506 	return ((mbuf_mem_alloc * 100) / mbuf_mem_limit);
1507 }
1508 
1509 #ifdef DDB
1510 void
1511 m_print(void *v,
1512     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1513 {
1514 	struct mbuf *m = v;
1515 
1516 	(*pr)("mbuf %p\n", m);
1517 	(*pr)("m_type: %i\tm_flags: %b\n", m->m_type, m->m_flags, M_BITS);
1518 	(*pr)("m_next: %p\tm_nextpkt: %p\n", m->m_next, m->m_nextpkt);
1519 	(*pr)("m_data: %p\tm_len: %u\n", m->m_data, m->m_len);
1520 	(*pr)("m_dat: %p\tm_pktdat: %p\n", m->m_dat, m->m_pktdat);
1521 	if (m->m_flags & M_PKTHDR) {
1522 		(*pr)("m_ptkhdr.ph_ifidx: %u\tm_pkthdr.len: %i\n",
1523 		    m->m_pkthdr.ph_ifidx, m->m_pkthdr.len);
1524 		(*pr)("m_ptkhdr.ph_tags: %p\tm_pkthdr.ph_tagsset: %b\n",
1525 		    SLIST_FIRST(&m->m_pkthdr.ph_tags),
1526 		    m->m_pkthdr.ph_tagsset, MTAG_BITS);
1527 		(*pr)("m_pkthdr.ph_flowid: %u\tm_pkthdr.ph_loopcnt: %u\n",
1528 		    m->m_pkthdr.ph_flowid, m->m_pkthdr.ph_loopcnt);
1529 		(*pr)("m_pkthdr.csum_flags: %b\n",
1530 		    m->m_pkthdr.csum_flags, MCS_BITS);
1531 		(*pr)("m_pkthdr.ether_vtag: %u\tm_ptkhdr.ph_rtableid: %u\n",
1532 		    m->m_pkthdr.ether_vtag, m->m_pkthdr.ph_rtableid);
1533 		(*pr)("m_pkthdr.pf.statekey: %p\tm_pkthdr.pf.inp %p\n",
1534 		    m->m_pkthdr.pf.statekey, m->m_pkthdr.pf.inp);
1535 		(*pr)("m_pkthdr.pf.qid: %u\tm_pkthdr.pf.tag: %u\n",
1536 		    m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag);
1537 		(*pr)("m_pkthdr.pf.flags: %b\n",
1538 		    m->m_pkthdr.pf.flags, MPF_BITS);
1539 		(*pr)("m_pkthdr.pf.routed: %u\tm_pkthdr.pf.prio: %u\n",
1540 		    m->m_pkthdr.pf.routed, m->m_pkthdr.pf.prio);
1541 	}
1542 	if (m->m_flags & M_EXT) {
1543 		(*pr)("m_ext.ext_buf: %p\tm_ext.ext_size: %u\n",
1544 		    m->m_ext.ext_buf, m->m_ext.ext_size);
1545 		(*pr)("m_ext.ext_free_fn: %u\tm_ext.ext_arg: %p\n",
1546 		    m->m_ext.ext_free_fn, m->m_ext.ext_arg);
1547 		(*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n",
1548 		    m->m_ext.ext_nextref, m->m_ext.ext_prevref);
1549 
1550 	}
1551 }
1552 #endif
1553 
1554 /*
1555  * mbuf lists
1556  */
1557 
1558 void
1559 ml_init(struct mbuf_list *ml)
1560 {
1561 	ml->ml_head = ml->ml_tail = NULL;
1562 	ml->ml_len = 0;
1563 }
1564 
1565 void
1566 ml_enqueue(struct mbuf_list *ml, struct mbuf *m)
1567 {
1568 	if (ml->ml_tail == NULL)
1569 		ml->ml_head = ml->ml_tail = m;
1570 	else {
1571 		ml->ml_tail->m_nextpkt = m;
1572 		ml->ml_tail = m;
1573 	}
1574 
1575 	m->m_nextpkt = NULL;
1576 	ml->ml_len++;
1577 }
1578 
1579 void
1580 ml_enlist(struct mbuf_list *mla, struct mbuf_list *mlb)
1581 {
1582 	if (!ml_empty(mlb)) {
1583 		if (ml_empty(mla))
1584 			mla->ml_head = mlb->ml_head;
1585 		else
1586 			mla->ml_tail->m_nextpkt = mlb->ml_head;
1587 		mla->ml_tail = mlb->ml_tail;
1588 		mla->ml_len += mlb->ml_len;
1589 
1590 		ml_init(mlb);
1591 	}
1592 }
1593 
1594 struct mbuf *
1595 ml_dequeue(struct mbuf_list *ml)
1596 {
1597 	struct mbuf *m;
1598 
1599 	m = ml->ml_head;
1600 	if (m != NULL) {
1601 		ml->ml_head = m->m_nextpkt;
1602 		if (ml->ml_head == NULL)
1603 			ml->ml_tail = NULL;
1604 
1605 		m->m_nextpkt = NULL;
1606 		ml->ml_len--;
1607 	}
1608 
1609 	return (m);
1610 }
1611 
1612 struct mbuf *
1613 ml_dechain(struct mbuf_list *ml)
1614 {
1615 	struct mbuf *m0;
1616 
1617 	m0 = ml->ml_head;
1618 
1619 	ml_init(ml);
1620 
1621 	return (m0);
1622 }
1623 
1624 unsigned int
1625 ml_purge(struct mbuf_list *ml)
1626 {
1627 	struct mbuf *m, *n;
1628 	unsigned int len;
1629 
1630 	for (m = ml->ml_head; m != NULL; m = n) {
1631 		n = m->m_nextpkt;
1632 		m_freem(m);
1633 	}
1634 
1635 	len = ml->ml_len;
1636 	ml_init(ml);
1637 
1638 	return (len);
1639 }
1640 
1641 unsigned int
1642 ml_hdatalen(struct mbuf_list *ml)
1643 {
1644 	struct mbuf *m;
1645 
1646 	m = ml->ml_head;
1647 	if (m == NULL)
1648 		return (0);
1649 
1650 	KASSERT(ISSET(m->m_flags, M_PKTHDR));
1651 	return (m->m_pkthdr.len);
1652 }
1653 
1654 /*
1655  * mbuf queues
1656  */
1657 
1658 void
1659 mq_init(struct mbuf_queue *mq, u_int maxlen, int ipl)
1660 {
1661 	mtx_init(&mq->mq_mtx, ipl);
1662 	ml_init(&mq->mq_list);
1663 	mq->mq_maxlen = maxlen;
1664 }
1665 
1666 int
1667 mq_push(struct mbuf_queue *mq, struct mbuf *m)
1668 {
1669 	struct mbuf *dropped = NULL;
1670 
1671 	mtx_enter(&mq->mq_mtx);
1672 	if (mq_len(mq) >= mq->mq_maxlen) {
1673 		mq->mq_drops++;
1674 		dropped = ml_dequeue(&mq->mq_list);
1675 	}
1676 	ml_enqueue(&mq->mq_list, m);
1677 	mtx_leave(&mq->mq_mtx);
1678 
1679 	if (dropped)
1680 		m_freem(dropped);
1681 
1682 	return (dropped != NULL);
1683 }
1684 
1685 int
1686 mq_enqueue(struct mbuf_queue *mq, struct mbuf *m)
1687 {
1688 	int dropped = 0;
1689 
1690 	mtx_enter(&mq->mq_mtx);
1691 	if (mq_len(mq) < mq->mq_maxlen)
1692 		ml_enqueue(&mq->mq_list, m);
1693 	else {
1694 		mq->mq_drops++;
1695 		dropped = 1;
1696 	}
1697 	mtx_leave(&mq->mq_mtx);
1698 
1699 	if (dropped)
1700 		m_freem(m);
1701 
1702 	return (dropped);
1703 }
1704 
1705 struct mbuf *
1706 mq_dequeue(struct mbuf_queue *mq)
1707 {
1708 	struct mbuf *m;
1709 
1710 	mtx_enter(&mq->mq_mtx);
1711 	m = ml_dequeue(&mq->mq_list);
1712 	mtx_leave(&mq->mq_mtx);
1713 
1714 	return (m);
1715 }
1716 
1717 int
1718 mq_enlist(struct mbuf_queue *mq, struct mbuf_list *ml)
1719 {
1720 	struct mbuf *m;
1721 	int dropped = 0;
1722 
1723 	mtx_enter(&mq->mq_mtx);
1724 	if (mq_len(mq) < mq->mq_maxlen)
1725 		ml_enlist(&mq->mq_list, ml);
1726 	else {
1727 		dropped = ml_len(ml);
1728 		mq->mq_drops += dropped;
1729 	}
1730 	mtx_leave(&mq->mq_mtx);
1731 
1732 	if (dropped) {
1733 		while ((m = ml_dequeue(ml)) != NULL)
1734 			m_freem(m);
1735 	}
1736 
1737 	return (dropped);
1738 }
1739 
1740 void
1741 mq_delist(struct mbuf_queue *mq, struct mbuf_list *ml)
1742 {
1743 	mtx_enter(&mq->mq_mtx);
1744 	*ml = mq->mq_list;
1745 	ml_init(&mq->mq_list);
1746 	mtx_leave(&mq->mq_mtx);
1747 }
1748 
1749 struct mbuf *
1750 mq_dechain(struct mbuf_queue *mq)
1751 {
1752 	struct mbuf *m0;
1753 
1754 	mtx_enter(&mq->mq_mtx);
1755 	m0 = ml_dechain(&mq->mq_list);
1756 	mtx_leave(&mq->mq_mtx);
1757 
1758 	return (m0);
1759 }
1760 
1761 unsigned int
1762 mq_purge(struct mbuf_queue *mq)
1763 {
1764 	struct mbuf_list ml;
1765 
1766 	mq_delist(mq, &ml);
1767 
1768 	return (ml_purge(&ml));
1769 }
1770 
1771 unsigned int
1772 mq_hdatalen(struct mbuf_queue *mq)
1773 {
1774 	unsigned int hdatalen;
1775 
1776 	mtx_enter(&mq->mq_mtx);
1777 	hdatalen = ml_hdatalen(&mq->mq_list);
1778 	mtx_leave(&mq->mq_mtx);
1779 
1780 	return (hdatalen);
1781 }
1782 
1783 int
1784 sysctl_mq(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1785     void *newp, size_t newlen, struct mbuf_queue *mq)
1786 {
1787 	unsigned int maxlen;
1788 	int error;
1789 
1790 	/* All sysctl names at this level are terminal. */
1791 	if (namelen != 1)
1792 		return (ENOTDIR);
1793 
1794 	switch (name[0]) {
1795 	case IFQCTL_LEN:
1796 		return (sysctl_rdint(oldp, oldlenp, newp, mq_len(mq)));
1797 	case IFQCTL_MAXLEN:
1798 		maxlen = mq->mq_maxlen;
1799 		error = sysctl_int(oldp, oldlenp, newp, newlen, &maxlen);
1800 		if (!error && maxlen != mq->mq_maxlen) {
1801 			mtx_enter(&mq->mq_mtx);
1802 			mq->mq_maxlen = maxlen;
1803 			mtx_leave(&mq->mq_mtx);
1804 		}
1805 		return (error);
1806 	case IFQCTL_DROPS:
1807 		return (sysctl_rdint(oldp, oldlenp, newp, mq_drops(mq)));
1808 	default:
1809 		return (EOPNOTSUPP);
1810 	}
1811 	/* NOTREACHED */
1812 }
1813