xref: /openbsd-src/sys/kern/uipc_mbuf.c (revision be691f3bb6417f04a68938fadbcaee2d5795e764)
1 /*	$OpenBSD: uipc_mbuf.c,v 1.279 2021/03/06 09:20:49 jsg Exp $	*/
2 /*	$NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
33  */
34 
35 /*
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  *	This product includes software developed by the University of
50  *	California, Berkeley and its contributors.
51  *	This product includes software developed at the Information
52  *	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include "pf.h"
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/atomic.h>
80 #include <sys/malloc.h>
81 #include <sys/mbuf.h>
82 #include <sys/kernel.h>
83 #include <sys/syslog.h>
84 #include <sys/domain.h>
85 #include <sys/protosw.h>
86 #include <sys/pool.h>
87 #include <sys/percpu.h>
88 #include <sys/sysctl.h>
89 
90 #include <sys/socket.h>
91 #include <sys/socketvar.h>
92 #include <net/if.h>
93 
94 
95 #include <uvm/uvm_extern.h>
96 
97 #ifdef DDB
98 #include <machine/db_machdep.h>
99 #endif
100 
101 #if NPF > 0
102 #include <net/pfvar.h>
103 #endif	/* NPF > 0 */
104 
105 /* mbuf stats */
106 COUNTERS_BOOT_MEMORY(mbstat_boot, MBSTAT_COUNT);
107 struct cpumem *mbstat = COUNTERS_BOOT_INITIALIZER(mbstat_boot);
108 /* mbuf pools */
109 struct	pool mbpool;
110 struct	pool mtagpool;
111 
112 /* mbuf cluster pools */
113 u_int	mclsizes[MCLPOOLS] = {
114 	MCLBYTES,	/* must be at slot 0 */
115 	MCLBYTES + 2,	/* ETHER_ALIGNED 2k mbufs */
116 	4 * 1024,
117 	8 * 1024,
118 	9 * 1024,
119 	12 * 1024,
120 	16 * 1024,
121 	64 * 1024
122 };
123 static	char mclnames[MCLPOOLS][8];
124 struct	pool mclpools[MCLPOOLS];
125 
126 struct pool *m_clpool(u_int);
127 
128 int max_linkhdr;		/* largest link-level header */
129 int max_protohdr;		/* largest protocol header */
130 int max_hdr;			/* largest link+protocol header */
131 
132 struct	mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET);
133 
134 void	m_extfree(struct mbuf *);
135 void	m_zero(struct mbuf *);
136 
137 unsigned long mbuf_mem_limit;	/* how much memory can be allocated */
138 unsigned long mbuf_mem_alloc;	/* how much memory has been allocated */
139 
140 void	*m_pool_alloc(struct pool *, int, int *);
141 void	m_pool_free(struct pool *, void *);
142 
143 struct pool_allocator m_pool_allocator = {
144 	m_pool_alloc,
145 	m_pool_free,
146 	0 /* will be copied from pool_allocator_multi */
147 };
148 
149 static void (*mextfree_fns[4])(caddr_t, u_int, void *);
150 static u_int num_extfree_fns;
151 
152 #define M_DATABUF(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf : \
153 			(m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat)
154 #define M_SIZE(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_size : \
155 			(m)->m_flags & M_PKTHDR ? MHLEN : MLEN)
156 
157 /*
158  * Initialize the mbuf allocator.
159  */
160 void
161 mbinit(void)
162 {
163 	int i, error;
164 	unsigned int lowbits;
165 
166 	CTASSERT(MSIZE == sizeof(struct mbuf));
167 
168 	m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz;
169 
170 	mbuf_mem_alloc = 0;
171 
172 #if DIAGNOSTIC
173 	if (mclsizes[0] != MCLBYTES)
174 		panic("mbinit: the smallest cluster size != MCLBYTES");
175 	if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES)
176 		panic("mbinit: the largest cluster size != MAXMCLBYTES");
177 #endif
178 
179 	m_pool_init(&mbpool, MSIZE, 64, "mbufpl");
180 
181 	pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
182 	    IPL_NET, 0, "mtagpl", NULL);
183 
184 	for (i = 0; i < nitems(mclsizes); i++) {
185 		lowbits = mclsizes[i] & ((1 << 10) - 1);
186 		if (lowbits) {
187 			snprintf(mclnames[i], sizeof(mclnames[0]),
188 			    "mcl%dk%u", mclsizes[i] >> 10, lowbits);
189 		} else {
190 			snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
191 			    mclsizes[i] >> 10);
192 		}
193 
194 		m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]);
195 	}
196 
197 	error = nmbclust_update(nmbclust);
198 	KASSERT(error == 0);
199 
200 	(void)mextfree_register(m_extfree_pool);
201 	KASSERT(num_extfree_fns == 1);
202 }
203 
204 void
205 mbcpuinit(void)
206 {
207 	int i;
208 
209 	mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT);
210 
211 	pool_cache_init(&mbpool);
212 	pool_cache_init(&mtagpool);
213 
214 	for (i = 0; i < nitems(mclsizes); i++)
215 		pool_cache_init(&mclpools[i]);
216 }
217 
218 int
219 nmbclust_update(long newval)
220 {
221 	int i;
222 
223 	if (newval < 0 || newval > LONG_MAX / MCLBYTES)
224 		return ERANGE;
225 	/* update the global mbuf memory limit */
226 	nmbclust = newval;
227 	mbuf_mem_limit = nmbclust * MCLBYTES;
228 
229 	pool_wakeup(&mbpool);
230 	for (i = 0; i < nitems(mclsizes); i++)
231 		pool_wakeup(&mclpools[i]);
232 
233 	return 0;
234 }
235 
236 /*
237  * Space allocation routines.
238  */
239 struct mbuf *
240 m_get(int nowait, int type)
241 {
242 	struct mbuf *m;
243 	struct counters_ref cr;
244 	uint64_t *counters;
245 	int s;
246 
247 	KASSERT(type >= 0 && type < MT_NTYPES);
248 
249 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
250 	if (m == NULL)
251 		return (NULL);
252 
253 	s = splnet();
254 	counters = counters_enter(&cr, mbstat);
255 	counters[type]++;
256 	counters_leave(&cr, mbstat);
257 	splx(s);
258 
259 	m->m_type = type;
260 	m->m_next = NULL;
261 	m->m_nextpkt = NULL;
262 	m->m_data = m->m_dat;
263 	m->m_flags = 0;
264 
265 	return (m);
266 }
267 
268 /*
269  * ATTN: When changing anything here check m_inithdr() and m_defrag() those
270  * may need to change as well.
271  */
272 struct mbuf *
273 m_gethdr(int nowait, int type)
274 {
275 	struct mbuf *m;
276 	struct counters_ref cr;
277 	uint64_t *counters;
278 	int s;
279 
280 	KASSERT(type >= 0 && type < MT_NTYPES);
281 
282 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
283 	if (m == NULL)
284 		return (NULL);
285 
286 	s = splnet();
287 	counters = counters_enter(&cr, mbstat);
288 	counters[type]++;
289 	counters_leave(&cr, mbstat);
290 	splx(s);
291 
292 	m->m_type = type;
293 
294 	return (m_inithdr(m));
295 }
296 
297 struct mbuf *
298 m_inithdr(struct mbuf *m)
299 {
300 	/* keep in sync with m_gethdr */
301 	m->m_next = NULL;
302 	m->m_nextpkt = NULL;
303 	m->m_data = m->m_pktdat;
304 	m->m_flags = M_PKTHDR;
305 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
306 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
307 
308 	return (m);
309 }
310 
311 static inline void
312 m_clearhdr(struct mbuf *m)
313 {
314 	/* delete all mbuf tags to reset the state */
315 	m_tag_delete_chain(m);
316 #if NPF > 0
317 	pf_mbuf_unlink_state_key(m);
318 	pf_mbuf_unlink_inpcb(m);
319 #endif	/* NPF > 0 */
320 
321 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
322 }
323 
324 void
325 m_removehdr(struct mbuf *m)
326 {
327 	KASSERT(m->m_flags & M_PKTHDR);
328 	m_clearhdr(m);
329 	m->m_flags &= ~M_PKTHDR;
330 }
331 
332 void
333 m_resethdr(struct mbuf *m)
334 {
335 	int len = m->m_pkthdr.len;
336 	u_int8_t loopcnt = m->m_pkthdr.ph_loopcnt;
337 
338 	KASSERT(m->m_flags & M_PKTHDR);
339 	m->m_flags &= (M_EXT|M_PKTHDR|M_EOR|M_EXTWR|M_ZEROIZE);
340 	m_clearhdr(m);
341 	/* like m_inithdr(), but keep any associated data and mbufs */
342 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
343 	m->m_pkthdr.len = len;
344 	m->m_pkthdr.ph_loopcnt = loopcnt;
345 }
346 
347 void
348 m_calchdrlen(struct mbuf *m)
349 {
350 	struct mbuf *n;
351 	int plen = 0;
352 
353 	KASSERT(m->m_flags & M_PKTHDR);
354 	for (n = m; n; n = n->m_next)
355 		plen += n->m_len;
356 	m->m_pkthdr.len = plen;
357 }
358 
359 struct mbuf *
360 m_getclr(int nowait, int type)
361 {
362 	struct mbuf *m;
363 
364 	MGET(m, nowait, type);
365 	if (m == NULL)
366 		return (NULL);
367 	memset(mtod(m, caddr_t), 0, MLEN);
368 	return (m);
369 }
370 
371 struct pool *
372 m_clpool(u_int pktlen)
373 {
374 	struct pool *pp;
375 	int pi;
376 
377 	for (pi = 0; pi < nitems(mclpools); pi++) {
378 		pp = &mclpools[pi];
379 		if (pktlen <= pp->pr_size)
380 			return (pp);
381 	}
382 
383 	return (NULL);
384 }
385 
386 struct mbuf *
387 m_clget(struct mbuf *m, int how, u_int pktlen)
388 {
389 	struct mbuf *m0 = NULL;
390 	struct pool *pp;
391 	caddr_t buf;
392 
393 	pp = m_clpool(pktlen);
394 #ifdef DIAGNOSTIC
395 	if (pp == NULL)
396 		panic("m_clget: request for %u byte cluster", pktlen);
397 #endif
398 
399 	if (m == NULL) {
400 		m0 = m_gethdr(how, MT_DATA);
401 		if (m0 == NULL)
402 			return (NULL);
403 
404 		m = m0;
405 	}
406 	buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT);
407 	if (buf == NULL) {
408 		m_freem(m0);
409 		return (NULL);
410 	}
411 
412 	MEXTADD(m, buf, pp->pr_size, M_EXTWR, MEXTFREE_POOL, pp);
413 	return (m);
414 }
415 
416 void
417 m_extfree_pool(caddr_t buf, u_int size, void *pp)
418 {
419 	pool_put(pp, buf);
420 }
421 
422 struct mbuf *
423 m_free(struct mbuf *m)
424 {
425 	struct mbuf *n;
426 	struct counters_ref cr;
427 	uint64_t *counters;
428 	int s;
429 
430 	if (m == NULL)
431 		return (NULL);
432 
433 	s = splnet();
434 	counters = counters_enter(&cr, mbstat);
435 	counters[m->m_type]--;
436 	counters_leave(&cr, mbstat);
437 	splx(s);
438 
439 	n = m->m_next;
440 	if (m->m_flags & M_ZEROIZE) {
441 		m_zero(m);
442 		/* propagate M_ZEROIZE to the next mbuf in the chain */
443 		if (n)
444 			n->m_flags |= M_ZEROIZE;
445 	}
446 	if (m->m_flags & M_PKTHDR) {
447 		m_tag_delete_chain(m);
448 #if NPF > 0
449 		pf_mbuf_unlink_state_key(m);
450 		pf_mbuf_unlink_inpcb(m);
451 #endif	/* NPF > 0 */
452 	}
453 	if (m->m_flags & M_EXT)
454 		m_extfree(m);
455 
456 	pool_put(&mbpool, m);
457 
458 	return (n);
459 }
460 
461 void
462 m_extref(struct mbuf *o, struct mbuf *n)
463 {
464 	int refs = MCLISREFERENCED(o);
465 
466 	n->m_flags |= o->m_flags & (M_EXT|M_EXTWR);
467 
468 	if (refs)
469 		mtx_enter(&m_extref_mtx);
470 	n->m_ext.ext_nextref = o->m_ext.ext_nextref;
471 	n->m_ext.ext_prevref = o;
472 	o->m_ext.ext_nextref = n;
473 	n->m_ext.ext_nextref->m_ext.ext_prevref = n;
474 	if (refs)
475 		mtx_leave(&m_extref_mtx);
476 
477 	MCLREFDEBUGN((n), __FILE__, __LINE__);
478 }
479 
480 static inline u_int
481 m_extunref(struct mbuf *m)
482 {
483 	int refs = 0;
484 
485 	if (!MCLISREFERENCED(m))
486 		return (0);
487 
488 	mtx_enter(&m_extref_mtx);
489 	if (MCLISREFERENCED(m)) {
490 		m->m_ext.ext_nextref->m_ext.ext_prevref =
491 		    m->m_ext.ext_prevref;
492 		m->m_ext.ext_prevref->m_ext.ext_nextref =
493 		    m->m_ext.ext_nextref;
494 		refs = 1;
495 	}
496 	mtx_leave(&m_extref_mtx);
497 
498 	return (refs);
499 }
500 
501 /*
502  * Returns a number for use with MEXTADD.
503  * Should only be called once per function.
504  * Drivers can be assured that the index will be non zero.
505  */
506 u_int
507 mextfree_register(void (*fn)(caddr_t, u_int, void *))
508 {
509 	KASSERT(num_extfree_fns < nitems(mextfree_fns));
510 	mextfree_fns[num_extfree_fns] = fn;
511 	return num_extfree_fns++;
512 }
513 
514 void
515 m_extfree(struct mbuf *m)
516 {
517 	if (m_extunref(m) == 0) {
518 		KASSERT(m->m_ext.ext_free_fn < num_extfree_fns);
519 		mextfree_fns[m->m_ext.ext_free_fn](m->m_ext.ext_buf,
520 		    m->m_ext.ext_size, m->m_ext.ext_arg);
521 	}
522 
523 	m->m_flags &= ~(M_EXT|M_EXTWR);
524 }
525 
526 struct mbuf *
527 m_freem(struct mbuf *m)
528 {
529 	struct mbuf *n;
530 
531 	if (m == NULL)
532 		return (NULL);
533 
534 	n = m->m_nextpkt;
535 
536 	do
537 		m = m_free(m);
538 	while (m != NULL);
539 
540 	return (n);
541 }
542 
543 void
544 m_purge(struct mbuf *m)
545 {
546 	while (m != NULL)
547 		m = m_freem(m);
548 }
549 
550 /*
551  * mbuf chain defragmenter. This function uses some evil tricks to defragment
552  * an mbuf chain into a single buffer without changing the mbuf pointer.
553  * This needs to know a lot of the mbuf internals to make this work.
554  */
555 int
556 m_defrag(struct mbuf *m, int how)
557 {
558 	struct mbuf *m0;
559 
560 	if (m->m_next == NULL)
561 		return (0);
562 
563 	KASSERT(m->m_flags & M_PKTHDR);
564 
565 	if ((m0 = m_gethdr(how, m->m_type)) == NULL)
566 		return (ENOBUFS);
567 	if (m->m_pkthdr.len > MHLEN) {
568 		MCLGETL(m0, how, m->m_pkthdr.len);
569 		if (!(m0->m_flags & M_EXT)) {
570 			m_free(m0);
571 			return (ENOBUFS);
572 		}
573 	}
574 	m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
575 	m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
576 
577 	/* free chain behind and possible ext buf on the first mbuf */
578 	m_freem(m->m_next);
579 	m->m_next = NULL;
580 	if (m->m_flags & M_EXT)
581 		m_extfree(m);
582 
583 	/*
584 	 * Bounce copy mbuf over to the original mbuf and set everything up.
585 	 * This needs to reset or clear all pointers that may go into the
586 	 * original mbuf chain.
587 	 */
588 	if (m0->m_flags & M_EXT) {
589 		memcpy(&m->m_ext, &m0->m_ext, sizeof(struct mbuf_ext));
590 		MCLINITREFERENCE(m);
591 		m->m_flags |= m0->m_flags & (M_EXT|M_EXTWR);
592 		m->m_data = m->m_ext.ext_buf;
593 	} else {
594 		m->m_data = m->m_pktdat;
595 		memcpy(m->m_data, m0->m_data, m0->m_len);
596 	}
597 	m->m_pkthdr.len = m->m_len = m0->m_len;
598 
599 	m0->m_flags &= ~(M_EXT|M_EXTWR);	/* cluster is gone */
600 	m_free(m0);
601 
602 	return (0);
603 }
604 
605 /*
606  * Mbuffer utility routines.
607  */
608 
609 /*
610  * Ensure len bytes of contiguous space at the beginning of the mbuf chain
611  */
612 struct mbuf *
613 m_prepend(struct mbuf *m, int len, int how)
614 {
615 	struct mbuf *mn;
616 
617 	if (len > MHLEN)
618 		panic("mbuf prepend length too big");
619 
620 	if (m_leadingspace(m) >= len) {
621 		m->m_data -= len;
622 		m->m_len += len;
623 	} else {
624 		MGET(mn, how, m->m_type);
625 		if (mn == NULL) {
626 			m_freem(m);
627 			return (NULL);
628 		}
629 		if (m->m_flags & M_PKTHDR)
630 			M_MOVE_PKTHDR(mn, m);
631 		mn->m_next = m;
632 		m = mn;
633 		m_align(m, len);
634 		m->m_len = len;
635 	}
636 	if (m->m_flags & M_PKTHDR)
637 		m->m_pkthdr.len += len;
638 	return (m);
639 }
640 
641 /*
642  * Make a copy of an mbuf chain starting "off" bytes from the beginning,
643  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
644  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
645  */
646 struct mbuf *
647 m_copym(struct mbuf *m0, int off, int len, int wait)
648 {
649 	struct mbuf *m, *n, **np;
650 	struct mbuf *top;
651 	int copyhdr = 0;
652 
653 	if (off < 0 || len < 0)
654 		panic("m_copym0: off %d, len %d", off, len);
655 	if (off == 0 && m0->m_flags & M_PKTHDR)
656 		copyhdr = 1;
657 	if ((m = m_getptr(m0, off, &off)) == NULL)
658 		panic("m_copym0: short mbuf chain");
659 	np = &top;
660 	top = NULL;
661 	while (len > 0) {
662 		if (m == NULL) {
663 			if (len != M_COPYALL)
664 				panic("m_copym0: m == NULL and not COPYALL");
665 			break;
666 		}
667 		MGET(n, wait, m->m_type);
668 		*np = n;
669 		if (n == NULL)
670 			goto nospace;
671 		if (copyhdr) {
672 			if (m_dup_pkthdr(n, m0, wait))
673 				goto nospace;
674 			if (len != M_COPYALL)
675 				n->m_pkthdr.len = len;
676 			copyhdr = 0;
677 		}
678 		n->m_len = min(len, m->m_len - off);
679 		if (m->m_flags & M_EXT) {
680 			n->m_data = m->m_data + off;
681 			n->m_ext = m->m_ext;
682 			MCLADDREFERENCE(m, n);
683 		} else {
684 			n->m_data += m->m_data -
685 			    (m->m_flags & M_PKTHDR ? m->m_pktdat : m->m_dat);
686 			n->m_data += off;
687 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
688 			    n->m_len);
689 		}
690 		if (len != M_COPYALL)
691 			len -= n->m_len;
692 		off += n->m_len;
693 #ifdef DIAGNOSTIC
694 		if (off > m->m_len)
695 			panic("m_copym0 overrun");
696 #endif
697 		if (off == m->m_len) {
698 			m = m->m_next;
699 			off = 0;
700 		}
701 		np = &n->m_next;
702 	}
703 	return (top);
704 nospace:
705 	m_freem(top);
706 	return (NULL);
707 }
708 
709 /*
710  * Copy data from an mbuf chain starting "off" bytes from the beginning,
711  * continuing for "len" bytes, into the indicated buffer.
712  */
713 void
714 m_copydata(struct mbuf *m, int off, int len, void *p)
715 {
716 	caddr_t cp = p;
717 	unsigned count;
718 
719 	if (off < 0)
720 		panic("m_copydata: off %d < 0", off);
721 	if (len < 0)
722 		panic("m_copydata: len %d < 0", len);
723 	if ((m = m_getptr(m, off, &off)) == NULL)
724 		panic("m_copydata: short mbuf chain");
725 	while (len > 0) {
726 		if (m == NULL)
727 			panic("m_copydata: null mbuf");
728 		count = min(m->m_len - off, len);
729 		memmove(cp, mtod(m, caddr_t) + off, count);
730 		len -= count;
731 		cp += count;
732 		off = 0;
733 		m = m->m_next;
734 	}
735 }
736 
737 /*
738  * Copy data from a buffer back into the indicated mbuf chain,
739  * starting "off" bytes from the beginning, extending the mbuf
740  * chain if necessary. The mbuf needs to be properly initialized
741  * including the setting of m_len.
742  */
743 int
744 m_copyback(struct mbuf *m0, int off, int len, const void *_cp, int wait)
745 {
746 	int mlen, totlen = 0;
747 	struct mbuf *m = m0, *n;
748 	caddr_t cp = (caddr_t)_cp;
749 	int error = 0;
750 
751 	if (m0 == NULL)
752 		return (0);
753 	while (off > (mlen = m->m_len)) {
754 		off -= mlen;
755 		totlen += mlen;
756 		if (m->m_next == NULL) {
757 			if ((n = m_get(wait, m->m_type)) == NULL) {
758 				error = ENOBUFS;
759 				goto out;
760 			}
761 
762 			if (off + len > MLEN) {
763 				MCLGETL(n, wait, off + len);
764 				if (!(n->m_flags & M_EXT)) {
765 					m_free(n);
766 					error = ENOBUFS;
767 					goto out;
768 				}
769 			}
770 			memset(mtod(n, caddr_t), 0, off);
771 			n->m_len = len + off;
772 			m->m_next = n;
773 		}
774 		m = m->m_next;
775 	}
776 	while (len > 0) {
777 		/* extend last packet to be filled fully */
778 		if (m->m_next == NULL && (len > m->m_len - off))
779 			m->m_len += min(len - (m->m_len - off),
780 			    m_trailingspace(m));
781 		mlen = min(m->m_len - off, len);
782 		memmove(mtod(m, caddr_t) + off, cp, mlen);
783 		cp += mlen;
784 		len -= mlen;
785 		totlen += mlen + off;
786 		if (len == 0)
787 			break;
788 		off = 0;
789 
790 		if (m->m_next == NULL) {
791 			if ((n = m_get(wait, m->m_type)) == NULL) {
792 				error = ENOBUFS;
793 				goto out;
794 			}
795 
796 			if (len > MLEN) {
797 				MCLGETL(n, wait, len);
798 				if (!(n->m_flags & M_EXT)) {
799 					m_free(n);
800 					error = ENOBUFS;
801 					goto out;
802 				}
803 			}
804 			n->m_len = len;
805 			m->m_next = n;
806 		}
807 		m = m->m_next;
808 	}
809 out:
810 	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
811 		m->m_pkthdr.len = totlen;
812 
813 	return (error);
814 }
815 
816 /*
817  * Concatenate mbuf chain n to m.
818  * n might be copied into m (when n->m_len is small), therefore data portion of
819  * n could be copied into an mbuf of different mbuf type.
820  * Therefore both chains should be of the same type (e.g. MT_DATA).
821  * Any m_pkthdr is not updated.
822  */
823 void
824 m_cat(struct mbuf *m, struct mbuf *n)
825 {
826 	while (m->m_next)
827 		m = m->m_next;
828 	while (n) {
829 		if (M_READONLY(m) || n->m_len > m_trailingspace(m)) {
830 			/* just join the two chains */
831 			m->m_next = n;
832 			return;
833 		}
834 		/* splat the data from one into the other */
835 		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
836 		    n->m_len);
837 		m->m_len += n->m_len;
838 		n = m_free(n);
839 	}
840 }
841 
842 void
843 m_adj(struct mbuf *mp, int req_len)
844 {
845 	int len = req_len;
846 	struct mbuf *m;
847 	int count;
848 
849 	if (mp == NULL)
850 		return;
851 	if (len >= 0) {
852 		/*
853 		 * Trim from head.
854 		 */
855 		m = mp;
856 		while (m != NULL && len > 0) {
857 			if (m->m_len <= len) {
858 				len -= m->m_len;
859 				m->m_data += m->m_len;
860 				m->m_len = 0;
861 				m = m->m_next;
862 			} else {
863 				m->m_data += len;
864 				m->m_len -= len;
865 				len = 0;
866 			}
867 		}
868 		if (mp->m_flags & M_PKTHDR)
869 			mp->m_pkthdr.len -= (req_len - len);
870 	} else {
871 		/*
872 		 * Trim from tail.  Scan the mbuf chain,
873 		 * calculating its length and finding the last mbuf.
874 		 * If the adjustment only affects this mbuf, then just
875 		 * adjust and return.  Otherwise, rescan and truncate
876 		 * after the remaining size.
877 		 */
878 		len = -len;
879 		count = 0;
880 		m = mp;
881 		for (;;) {
882 			count += m->m_len;
883 			if (m->m_next == NULL)
884 				break;
885 			m = m->m_next;
886 		}
887 		if (m->m_len >= len) {
888 			m->m_len -= len;
889 			if (mp->m_flags & M_PKTHDR)
890 				mp->m_pkthdr.len -= len;
891 			return;
892 		}
893 		count -= len;
894 		if (count < 0)
895 			count = 0;
896 		/*
897 		 * Correct length for chain is "count".
898 		 * Find the mbuf with last data, adjust its length,
899 		 * and toss data from remaining mbufs on chain.
900 		 */
901 		if (mp->m_flags & M_PKTHDR)
902 			mp->m_pkthdr.len = count;
903 		m = mp;
904 		for (;;) {
905 			if (m->m_len >= count) {
906 				m->m_len = count;
907 				break;
908 			}
909 			count -= m->m_len;
910 			m = m->m_next;
911 		}
912 		while ((m = m->m_next) != NULL)
913 			m->m_len = 0;
914 	}
915 }
916 
917 /*
918  * Rearrange an mbuf chain so that len bytes are contiguous
919  * and in the data area of an mbuf (so that mtod will work
920  * for a structure of size len).  Returns the resulting
921  * mbuf chain on success, frees it and returns null on failure.
922  */
923 struct mbuf *
924 m_pullup(struct mbuf *m0, int len)
925 {
926 	struct mbuf *m;
927 	unsigned int adj;
928 	caddr_t head, tail;
929 	unsigned int space;
930 
931 	/* if len is already contig in m0, then don't do any work */
932 	if (len <= m0->m_len)
933 		return (m0);
934 
935 	/* look for some data */
936 	m = m0->m_next;
937 	if (m == NULL)
938 		goto freem0;
939 
940 	head = M_DATABUF(m0);
941 	if (m0->m_len == 0) {
942 		m0->m_data = head;
943 
944 		while (m->m_len == 0) {
945 			m = m_free(m);
946 			if (m == NULL)
947 				goto freem0;
948 		}
949 
950 		adj = mtod(m, unsigned long) & ALIGNBYTES;
951 	} else
952 		adj = mtod(m0, unsigned long) & ALIGNBYTES;
953 
954 	tail = head + M_SIZE(m0);
955 	head += adj;
956 
957 	if (len <= tail - head) {
958 		/* there's enough space in the first mbuf */
959 
960 		if (len > tail - mtod(m0, caddr_t)) {
961 			/* need to memmove to make space at the end */
962 			memmove(head, mtod(m0, caddr_t), m0->m_len);
963 			m0->m_data = head;
964 		}
965 
966 		len -= m0->m_len;
967 	} else {
968 		/* the first mbuf is too small so make a new one */
969 		space = adj + len;
970 
971 		if (space > MAXMCLBYTES)
972 			goto bad;
973 
974 		m0->m_next = m;
975 		m = m0;
976 
977 		MGET(m0, M_DONTWAIT, m->m_type);
978 		if (m0 == NULL)
979 			goto bad;
980 
981 		if (space > MHLEN) {
982 			MCLGETL(m0, M_DONTWAIT, space);
983 			if ((m0->m_flags & M_EXT) == 0)
984 				goto bad;
985 		}
986 
987 		if (m->m_flags & M_PKTHDR)
988 			M_MOVE_PKTHDR(m0, m);
989 
990 		m0->m_len = 0;
991 		m0->m_data += adj;
992 	}
993 
994 	KDASSERT(m_trailingspace(m0) >= len);
995 
996 	for (;;) {
997 		space = min(len, m->m_len);
998 		memcpy(mtod(m0, caddr_t) + m0->m_len, mtod(m, caddr_t), space);
999 		len -= space;
1000 		m0->m_len += space;
1001 		m->m_len -= space;
1002 
1003 		if (m->m_len > 0)
1004 			m->m_data += space;
1005 		else
1006 			m = m_free(m);
1007 
1008 		if (len == 0)
1009 			break;
1010 
1011 		if (m == NULL)
1012 			goto bad;
1013 	}
1014 
1015 	m0->m_next = m; /* link the chain back up */
1016 
1017 	return (m0);
1018 
1019 bad:
1020 	m_freem(m);
1021 freem0:
1022 	m_free(m0);
1023 	return (NULL);
1024 }
1025 
1026 /*
1027  * Return a pointer to mbuf/offset of location in mbuf chain.
1028  */
1029 struct mbuf *
1030 m_getptr(struct mbuf *m, int loc, int *off)
1031 {
1032 	while (loc >= 0) {
1033 		/* Normal end of search */
1034 		if (m->m_len > loc) {
1035 			*off = loc;
1036 			return (m);
1037 		} else {
1038 			loc -= m->m_len;
1039 
1040 			if (m->m_next == NULL) {
1041 				if (loc == 0) {
1042 					/* Point at the end of valid data */
1043 					*off = m->m_len;
1044 					return (m);
1045 				} else {
1046 					return (NULL);
1047 				}
1048 			} else {
1049 				m = m->m_next;
1050 			}
1051 		}
1052 	}
1053 
1054 	return (NULL);
1055 }
1056 
1057 /*
1058  * Partition an mbuf chain in two pieces, returning the tail --
1059  * all but the first len0 bytes.  In case of failure, it returns NULL and
1060  * attempts to restore the chain to its original state.
1061  */
1062 struct mbuf *
1063 m_split(struct mbuf *m0, int len0, int wait)
1064 {
1065 	struct mbuf *m, *n;
1066 	unsigned len = len0, remain, olen;
1067 
1068 	for (m = m0; m && len > m->m_len; m = m->m_next)
1069 		len -= m->m_len;
1070 	if (m == NULL)
1071 		return (NULL);
1072 	remain = m->m_len - len;
1073 	if (m0->m_flags & M_PKTHDR) {
1074 		MGETHDR(n, wait, m0->m_type);
1075 		if (n == NULL)
1076 			return (NULL);
1077 		if (m_dup_pkthdr(n, m0, wait)) {
1078 			m_freem(n);
1079 			return (NULL);
1080 		}
1081 		n->m_pkthdr.len -= len0;
1082 		olen = m0->m_pkthdr.len;
1083 		m0->m_pkthdr.len = len0;
1084 		if (remain == 0) {
1085 			n->m_next = m->m_next;
1086 			m->m_next = NULL;
1087 			n->m_len = 0;
1088 			return (n);
1089 		}
1090 		if (m->m_flags & M_EXT)
1091 			goto extpacket;
1092 		if (remain > MHLEN) {
1093 			/* m can't be the lead packet */
1094 			m_align(n, 0);
1095 			n->m_next = m_split(m, len, wait);
1096 			if (n->m_next == NULL) {
1097 				(void) m_free(n);
1098 				m0->m_pkthdr.len = olen;
1099 				return (NULL);
1100 			} else {
1101 				n->m_len = 0;
1102 				return (n);
1103 			}
1104 		} else
1105 			m_align(n, remain);
1106 	} else if (remain == 0) {
1107 		n = m->m_next;
1108 		m->m_next = NULL;
1109 		return (n);
1110 	} else {
1111 		MGET(n, wait, m->m_type);
1112 		if (n == NULL)
1113 			return (NULL);
1114 		m_align(n, remain);
1115 	}
1116 extpacket:
1117 	if (m->m_flags & M_EXT) {
1118 		n->m_ext = m->m_ext;
1119 		MCLADDREFERENCE(m, n);
1120 		n->m_data = m->m_data + len;
1121 	} else {
1122 		memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
1123 	}
1124 	n->m_len = remain;
1125 	m->m_len = len;
1126 	n->m_next = m->m_next;
1127 	m->m_next = NULL;
1128 	return (n);
1129 }
1130 
1131 /*
1132  * Make space for a new header of length hlen at skip bytes
1133  * into the packet.  When doing this we allocate new mbufs only
1134  * when absolutely necessary.  The mbuf where the new header
1135  * is to go is returned together with an offset into the mbuf.
1136  * If NULL is returned then the mbuf chain may have been modified;
1137  * the caller is assumed to always free the chain.
1138  */
1139 struct mbuf *
1140 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
1141 {
1142 	struct mbuf *m;
1143 	unsigned remain;
1144 
1145 	KASSERT(m0->m_flags & M_PKTHDR);
1146 	/*
1147 	 * Limit the size of the new header to MHLEN. In case
1148 	 * skip = 0 and the first buffer is not a cluster this
1149 	 * is the maximum space available in that mbuf.
1150 	 * In other words this code never prepends a mbuf.
1151 	 */
1152 	KASSERT(hlen < MHLEN);
1153 
1154 	for (m = m0; m && skip > m->m_len; m = m->m_next)
1155 		skip -= m->m_len;
1156 	if (m == NULL)
1157 		return (NULL);
1158 	/*
1159 	 * At this point skip is the offset into the mbuf m
1160 	 * where the new header should be placed.  Figure out
1161 	 * if there's space to insert the new header.  If so,
1162 	 * and copying the remainder makes sense then do so.
1163 	 * Otherwise insert a new mbuf in the chain, splitting
1164 	 * the contents of m as needed.
1165 	 */
1166 	remain = m->m_len - skip;		/* data to move */
1167 	if (skip < remain && hlen <= m_leadingspace(m)) {
1168 		if (skip)
1169 			memmove(m->m_data-hlen, m->m_data, skip);
1170 		m->m_data -= hlen;
1171 		m->m_len += hlen;
1172 		*off = skip;
1173 	} else if (hlen > m_trailingspace(m)) {
1174 		struct mbuf *n;
1175 
1176 		if (remain > 0) {
1177 			MGET(n, M_DONTWAIT, m->m_type);
1178 			if (n && remain > MLEN) {
1179 				MCLGETL(n, M_DONTWAIT, remain);
1180 				if ((n->m_flags & M_EXT) == 0) {
1181 					m_free(n);
1182 					n = NULL;
1183 				}
1184 			}
1185 			if (n == NULL)
1186 				return (NULL);
1187 
1188 			memcpy(n->m_data, mtod(m, char *) + skip, remain);
1189 			n->m_len = remain;
1190 			m->m_len -= remain;
1191 
1192 			n->m_next = m->m_next;
1193 			m->m_next = n;
1194 		}
1195 
1196 		if (hlen <= m_trailingspace(m)) {
1197 			m->m_len += hlen;
1198 			*off = skip;
1199 		} else {
1200 			n = m_get(M_DONTWAIT, m->m_type);
1201 			if (n == NULL)
1202 				return NULL;
1203 
1204 			n->m_len = hlen;
1205 
1206 			n->m_next = m->m_next;
1207 			m->m_next = n;
1208 
1209 			*off = 0;	/* header is at front ... */
1210 			m = n;		/* ... of new mbuf */
1211 		}
1212 	} else {
1213 		/*
1214 		 * Copy the remainder to the back of the mbuf
1215 		 * so there's space to write the new header.
1216 		 */
1217 		if (remain > 0)
1218 			memmove(mtod(m, caddr_t) + skip + hlen,
1219 			      mtod(m, caddr_t) + skip, remain);
1220 		m->m_len += hlen;
1221 		*off = skip;
1222 	}
1223 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
1224 	return m;
1225 }
1226 
1227 
1228 /*
1229  * Routine to copy from device local memory into mbufs.
1230  */
1231 struct mbuf *
1232 m_devget(char *buf, int totlen, int off)
1233 {
1234 	struct mbuf	*m;
1235 	struct mbuf	*top, **mp;
1236 	int		 len;
1237 
1238 	top = NULL;
1239 	mp = &top;
1240 
1241 	if (off < 0 || off > MHLEN)
1242 		return (NULL);
1243 
1244 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1245 	if (m == NULL)
1246 		return (NULL);
1247 
1248 	m->m_pkthdr.len = totlen;
1249 
1250 	len = MHLEN;
1251 
1252 	while (totlen > 0) {
1253 		if (top != NULL) {
1254 			MGET(m, M_DONTWAIT, MT_DATA);
1255 			if (m == NULL) {
1256 				/*
1257 				 * As we might get called by pfkey, make sure
1258 				 * we do not leak sensitive data.
1259 				 */
1260 				top->m_flags |= M_ZEROIZE;
1261 				m_freem(top);
1262 				return (NULL);
1263 			}
1264 			len = MLEN;
1265 		}
1266 
1267 		if (totlen + off >= MINCLSIZE) {
1268 			MCLGET(m, M_DONTWAIT);
1269 			if (m->m_flags & M_EXT)
1270 				len = MCLBYTES;
1271 		} else {
1272 			/* Place initial small packet/header at end of mbuf. */
1273 			if (top == NULL && totlen + off + max_linkhdr <= len) {
1274 				m->m_data += max_linkhdr;
1275 				len -= max_linkhdr;
1276 			}
1277 		}
1278 
1279 		if (off) {
1280 			m->m_data += off;
1281 			len -= off;
1282 			off = 0;
1283 		}
1284 
1285 		m->m_len = len = min(totlen, len);
1286 		memcpy(mtod(m, void *), buf, (size_t)len);
1287 
1288 		buf += len;
1289 		*mp = m;
1290 		mp = &m->m_next;
1291 		totlen -= len;
1292 	}
1293 	return (top);
1294 }
1295 
1296 void
1297 m_zero(struct mbuf *m)
1298 {
1299 	if (M_READONLY(m)) {
1300 		mtx_enter(&m_extref_mtx);
1301 		if ((m->m_flags & M_EXT) && MCLISREFERENCED(m)) {
1302 			m->m_ext.ext_nextref->m_flags |= M_ZEROIZE;
1303 			m->m_ext.ext_prevref->m_flags |= M_ZEROIZE;
1304 		}
1305 		mtx_leave(&m_extref_mtx);
1306 		return;
1307 	}
1308 
1309 	explicit_bzero(M_DATABUF(m), M_SIZE(m));
1310 }
1311 
1312 /*
1313  * Apply function f to the data in an mbuf chain starting "off" bytes from the
1314  * beginning, continuing for "len" bytes.
1315  */
1316 int
1317 m_apply(struct mbuf *m, int off, int len,
1318     int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate)
1319 {
1320 	int rval;
1321 	unsigned int count;
1322 
1323 	if (len < 0)
1324 		panic("m_apply: len %d < 0", len);
1325 	if (off < 0)
1326 		panic("m_apply: off %d < 0", off);
1327 	while (off > 0) {
1328 		if (m == NULL)
1329 			panic("m_apply: null mbuf in skip");
1330 		if (off < m->m_len)
1331 			break;
1332 		off -= m->m_len;
1333 		m = m->m_next;
1334 	}
1335 	while (len > 0) {
1336 		if (m == NULL)
1337 			panic("m_apply: null mbuf");
1338 		count = min(m->m_len - off, len);
1339 
1340 		rval = f(fstate, mtod(m, caddr_t) + off, count);
1341 		if (rval)
1342 			return (rval);
1343 
1344 		len -= count;
1345 		off = 0;
1346 		m = m->m_next;
1347 	}
1348 
1349 	return (0);
1350 }
1351 
1352 /*
1353  * Compute the amount of space available before the current start of data
1354  * in an mbuf. Read-only clusters never have space available.
1355  */
1356 int
1357 m_leadingspace(struct mbuf *m)
1358 {
1359 	if (M_READONLY(m))
1360 		return 0;
1361 	KASSERT(m->m_data >= M_DATABUF(m));
1362 	return m->m_data - M_DATABUF(m);
1363 }
1364 
1365 /*
1366  * Compute the amount of space available after the end of data in an mbuf.
1367  * Read-only clusters never have space available.
1368  */
1369 int
1370 m_trailingspace(struct mbuf *m)
1371 {
1372 	if (M_READONLY(m))
1373 		return 0;
1374 	KASSERT(M_DATABUF(m) + M_SIZE(m) >= (m->m_data + m->m_len));
1375 	return M_DATABUF(m) + M_SIZE(m) - (m->m_data + m->m_len);
1376 }
1377 
1378 /*
1379  * Set the m_data pointer of a newly-allocated mbuf to place an object of
1380  * the specified size at the end of the mbuf, longword aligned.
1381  */
1382 void
1383 m_align(struct mbuf *m, int len)
1384 {
1385 	KASSERT(len >= 0 && !M_READONLY(m));
1386 	KASSERT(m->m_data == M_DATABUF(m));	/* newly-allocated check */
1387 	KASSERT(((len + sizeof(long) - 1) &~ (sizeof(long) - 1)) <= M_SIZE(m));
1388 
1389 	m->m_data = M_DATABUF(m) + ((M_SIZE(m) - (len)) &~ (sizeof(long) - 1));
1390 }
1391 
1392 /*
1393  * Duplicate mbuf pkthdr from from to to.
1394  * from must have M_PKTHDR set, and to must be empty.
1395  */
1396 int
1397 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int wait)
1398 {
1399 	int error;
1400 
1401 	KASSERT(from->m_flags & M_PKTHDR);
1402 
1403 	to->m_flags = (to->m_flags & (M_EXT | M_EXTWR));
1404 	to->m_flags |= (from->m_flags & M_COPYFLAGS);
1405 	to->m_pkthdr = from->m_pkthdr;
1406 
1407 #if NPF > 0
1408 	to->m_pkthdr.pf.statekey = NULL;
1409 	pf_mbuf_link_state_key(to, from->m_pkthdr.pf.statekey);
1410 	to->m_pkthdr.pf.inp = NULL;
1411 	pf_mbuf_link_inpcb(to, from->m_pkthdr.pf.inp);
1412 #endif	/* NPF > 0 */
1413 
1414 	SLIST_INIT(&to->m_pkthdr.ph_tags);
1415 
1416 	if ((error = m_tag_copy_chain(to, from, wait)) != 0)
1417 		return (error);
1418 
1419 	if ((to->m_flags & M_EXT) == 0)
1420 		to->m_data = to->m_pktdat;
1421 
1422 	return (0);
1423 }
1424 
1425 struct mbuf *
1426 m_dup_pkt(struct mbuf *m0, unsigned int adj, int wait)
1427 {
1428 	struct mbuf *m;
1429 	int len;
1430 
1431 	KASSERT(m0->m_flags & M_PKTHDR);
1432 
1433 	len = m0->m_pkthdr.len + adj;
1434 	if (len > MAXMCLBYTES) /* XXX */
1435 		return (NULL);
1436 
1437 	m = m_get(wait, m0->m_type);
1438 	if (m == NULL)
1439 		return (NULL);
1440 
1441 	if (m_dup_pkthdr(m, m0, wait) != 0)
1442 		goto fail;
1443 
1444 	if (len > MHLEN) {
1445 		MCLGETL(m, wait, len);
1446 		if (!ISSET(m->m_flags, M_EXT))
1447 			goto fail;
1448 	}
1449 
1450 	m->m_len = m->m_pkthdr.len = len;
1451 	m_adj(m, adj);
1452 	m_copydata(m0, 0, m0->m_pkthdr.len, mtod(m, caddr_t));
1453 
1454 	return (m);
1455 
1456 fail:
1457 	m_freem(m);
1458 	return (NULL);
1459 }
1460 
1461 void
1462 m_microtime(const struct mbuf *m, struct timeval *tv)
1463 {
1464 	if (ISSET(m->m_pkthdr.csum_flags, M_TIMESTAMP)) {
1465 		struct timeval btv, utv;
1466 
1467 		NSEC_TO_TIMEVAL(m->m_pkthdr.ph_timestamp, &utv);
1468 		microboottime(&btv);
1469 		timeradd(&btv, &utv, tv);
1470 	} else
1471 		microtime(tv);
1472 }
1473 
1474 void *
1475 m_pool_alloc(struct pool *pp, int flags, int *slowdown)
1476 {
1477 	void *v;
1478 
1479 	if (atomic_add_long_nv(&mbuf_mem_alloc, pp->pr_pgsize) > mbuf_mem_limit)
1480 		goto fail;
1481 
1482 	v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown);
1483 	if (v != NULL)
1484 		return (v);
1485 
1486  fail:
1487 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1488 	return (NULL);
1489 }
1490 
1491 void
1492 m_pool_free(struct pool *pp, void *v)
1493 {
1494 	(*pool_allocator_multi.pa_free)(pp, v);
1495 
1496 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1497 }
1498 
1499 void
1500 m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
1501 {
1502 	pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
1503 	pool_set_constraints(pp, &kp_dma_contig);
1504 }
1505 
1506 #ifdef DDB
1507 void
1508 m_print(void *v,
1509     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1510 {
1511 	struct mbuf *m = v;
1512 
1513 	(*pr)("mbuf %p\n", m);
1514 	(*pr)("m_type: %i\tm_flags: %b\n", m->m_type, m->m_flags, M_BITS);
1515 	(*pr)("m_next: %p\tm_nextpkt: %p\n", m->m_next, m->m_nextpkt);
1516 	(*pr)("m_data: %p\tm_len: %u\n", m->m_data, m->m_len);
1517 	(*pr)("m_dat: %p\tm_pktdat: %p\n", m->m_dat, m->m_pktdat);
1518 	if (m->m_flags & M_PKTHDR) {
1519 		(*pr)("m_ptkhdr.ph_ifidx: %u\tm_pkthdr.len: %i\n",
1520 		    m->m_pkthdr.ph_ifidx, m->m_pkthdr.len);
1521 		(*pr)("m_ptkhdr.ph_tags: %p\tm_pkthdr.ph_tagsset: %b\n",
1522 		    SLIST_FIRST(&m->m_pkthdr.ph_tags),
1523 		    m->m_pkthdr.ph_tagsset, MTAG_BITS);
1524 		(*pr)("m_pkthdr.ph_flowid: %u\tm_pkthdr.ph_loopcnt: %u\n",
1525 		    m->m_pkthdr.ph_flowid, m->m_pkthdr.ph_loopcnt);
1526 		(*pr)("m_pkthdr.csum_flags: %b\n",
1527 		    m->m_pkthdr.csum_flags, MCS_BITS);
1528 		(*pr)("m_pkthdr.ether_vtag: %u\tm_ptkhdr.ph_rtableid: %u\n",
1529 		    m->m_pkthdr.ether_vtag, m->m_pkthdr.ph_rtableid);
1530 		(*pr)("m_pkthdr.pf.statekey: %p\tm_pkthdr.pf.inp %p\n",
1531 		    m->m_pkthdr.pf.statekey, m->m_pkthdr.pf.inp);
1532 		(*pr)("m_pkthdr.pf.qid: %u\tm_pkthdr.pf.tag: %u\n",
1533 		    m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag);
1534 		(*pr)("m_pkthdr.pf.flags: %b\n",
1535 		    m->m_pkthdr.pf.flags, MPF_BITS);
1536 		(*pr)("m_pkthdr.pf.routed: %u\tm_pkthdr.pf.prio: %u\n",
1537 		    m->m_pkthdr.pf.routed, m->m_pkthdr.pf.prio);
1538 	}
1539 	if (m->m_flags & M_EXT) {
1540 		(*pr)("m_ext.ext_buf: %p\tm_ext.ext_size: %u\n",
1541 		    m->m_ext.ext_buf, m->m_ext.ext_size);
1542 		(*pr)("m_ext.ext_free_fn: %u\tm_ext.ext_arg: %p\n",
1543 		    m->m_ext.ext_free_fn, m->m_ext.ext_arg);
1544 		(*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n",
1545 		    m->m_ext.ext_nextref, m->m_ext.ext_prevref);
1546 
1547 	}
1548 }
1549 #endif
1550 
1551 /*
1552  * mbuf lists
1553  */
1554 
1555 void
1556 ml_init(struct mbuf_list *ml)
1557 {
1558 	ml->ml_head = ml->ml_tail = NULL;
1559 	ml->ml_len = 0;
1560 }
1561 
1562 void
1563 ml_enqueue(struct mbuf_list *ml, struct mbuf *m)
1564 {
1565 	if (ml->ml_tail == NULL)
1566 		ml->ml_head = ml->ml_tail = m;
1567 	else {
1568 		ml->ml_tail->m_nextpkt = m;
1569 		ml->ml_tail = m;
1570 	}
1571 
1572 	m->m_nextpkt = NULL;
1573 	ml->ml_len++;
1574 }
1575 
1576 void
1577 ml_enlist(struct mbuf_list *mla, struct mbuf_list *mlb)
1578 {
1579 	if (!ml_empty(mlb)) {
1580 		if (ml_empty(mla))
1581 			mla->ml_head = mlb->ml_head;
1582 		else
1583 			mla->ml_tail->m_nextpkt = mlb->ml_head;
1584 		mla->ml_tail = mlb->ml_tail;
1585 		mla->ml_len += mlb->ml_len;
1586 
1587 		ml_init(mlb);
1588 	}
1589 }
1590 
1591 struct mbuf *
1592 ml_dequeue(struct mbuf_list *ml)
1593 {
1594 	struct mbuf *m;
1595 
1596 	m = ml->ml_head;
1597 	if (m != NULL) {
1598 		ml->ml_head = m->m_nextpkt;
1599 		if (ml->ml_head == NULL)
1600 			ml->ml_tail = NULL;
1601 
1602 		m->m_nextpkt = NULL;
1603 		ml->ml_len--;
1604 	}
1605 
1606 	return (m);
1607 }
1608 
1609 struct mbuf *
1610 ml_dechain(struct mbuf_list *ml)
1611 {
1612 	struct mbuf *m0;
1613 
1614 	m0 = ml->ml_head;
1615 
1616 	ml_init(ml);
1617 
1618 	return (m0);
1619 }
1620 
1621 unsigned int
1622 ml_purge(struct mbuf_list *ml)
1623 {
1624 	struct mbuf *m, *n;
1625 	unsigned int len;
1626 
1627 	for (m = ml->ml_head; m != NULL; m = n) {
1628 		n = m->m_nextpkt;
1629 		m_freem(m);
1630 	}
1631 
1632 	len = ml->ml_len;
1633 	ml_init(ml);
1634 
1635 	return (len);
1636 }
1637 
1638 unsigned int
1639 ml_hdatalen(struct mbuf_list *ml)
1640 {
1641 	struct mbuf *m;
1642 
1643 	m = ml->ml_head;
1644 	if (m == NULL)
1645 		return (0);
1646 
1647 	KASSERT(ISSET(m->m_flags, M_PKTHDR));
1648 	return (m->m_pkthdr.len);
1649 }
1650 
1651 /*
1652  * mbuf queues
1653  */
1654 
1655 void
1656 mq_init(struct mbuf_queue *mq, u_int maxlen, int ipl)
1657 {
1658 	mtx_init(&mq->mq_mtx, ipl);
1659 	ml_init(&mq->mq_list);
1660 	mq->mq_maxlen = maxlen;
1661 }
1662 
1663 int
1664 mq_push(struct mbuf_queue *mq, struct mbuf *m)
1665 {
1666 	struct mbuf *dropped = NULL;
1667 
1668 	mtx_enter(&mq->mq_mtx);
1669 	if (mq_len(mq) >= mq->mq_maxlen) {
1670 		mq->mq_drops++;
1671 		dropped = ml_dequeue(&mq->mq_list);
1672 	}
1673 	ml_enqueue(&mq->mq_list, m);
1674 	mtx_leave(&mq->mq_mtx);
1675 
1676 	if (dropped)
1677 		m_freem(dropped);
1678 
1679 	return (dropped != NULL);
1680 }
1681 
1682 int
1683 mq_enqueue(struct mbuf_queue *mq, struct mbuf *m)
1684 {
1685 	int dropped = 0;
1686 
1687 	mtx_enter(&mq->mq_mtx);
1688 	if (mq_len(mq) < mq->mq_maxlen)
1689 		ml_enqueue(&mq->mq_list, m);
1690 	else {
1691 		mq->mq_drops++;
1692 		dropped = 1;
1693 	}
1694 	mtx_leave(&mq->mq_mtx);
1695 
1696 	if (dropped)
1697 		m_freem(m);
1698 
1699 	return (dropped);
1700 }
1701 
1702 struct mbuf *
1703 mq_dequeue(struct mbuf_queue *mq)
1704 {
1705 	struct mbuf *m;
1706 
1707 	mtx_enter(&mq->mq_mtx);
1708 	m = ml_dequeue(&mq->mq_list);
1709 	mtx_leave(&mq->mq_mtx);
1710 
1711 	return (m);
1712 }
1713 
1714 int
1715 mq_enlist(struct mbuf_queue *mq, struct mbuf_list *ml)
1716 {
1717 	struct mbuf *m;
1718 	int dropped = 0;
1719 
1720 	mtx_enter(&mq->mq_mtx);
1721 	if (mq_len(mq) < mq->mq_maxlen)
1722 		ml_enlist(&mq->mq_list, ml);
1723 	else {
1724 		dropped = ml_len(ml);
1725 		mq->mq_drops += dropped;
1726 	}
1727 	mtx_leave(&mq->mq_mtx);
1728 
1729 	if (dropped) {
1730 		while ((m = ml_dequeue(ml)) != NULL)
1731 			m_freem(m);
1732 	}
1733 
1734 	return (dropped);
1735 }
1736 
1737 void
1738 mq_delist(struct mbuf_queue *mq, struct mbuf_list *ml)
1739 {
1740 	mtx_enter(&mq->mq_mtx);
1741 	*ml = mq->mq_list;
1742 	ml_init(&mq->mq_list);
1743 	mtx_leave(&mq->mq_mtx);
1744 }
1745 
1746 struct mbuf *
1747 mq_dechain(struct mbuf_queue *mq)
1748 {
1749 	struct mbuf *m0;
1750 
1751 	mtx_enter(&mq->mq_mtx);
1752 	m0 = ml_dechain(&mq->mq_list);
1753 	mtx_leave(&mq->mq_mtx);
1754 
1755 	return (m0);
1756 }
1757 
1758 unsigned int
1759 mq_purge(struct mbuf_queue *mq)
1760 {
1761 	struct mbuf_list ml;
1762 
1763 	mq_delist(mq, &ml);
1764 
1765 	return (ml_purge(&ml));
1766 }
1767 
1768 unsigned int
1769 mq_hdatalen(struct mbuf_queue *mq)
1770 {
1771 	unsigned int hdatalen;
1772 
1773 	mtx_enter(&mq->mq_mtx);
1774 	hdatalen = ml_hdatalen(&mq->mq_list);
1775 	mtx_leave(&mq->mq_mtx);
1776 
1777 	return (hdatalen);
1778 }
1779 
1780 int
1781 sysctl_mq(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1782     void *newp, size_t newlen, struct mbuf_queue *mq)
1783 {
1784 	unsigned int maxlen;
1785 	int error;
1786 
1787 	/* All sysctl names at this level are terminal. */
1788 	if (namelen != 1)
1789 		return (ENOTDIR);
1790 
1791 	switch (name[0]) {
1792 	case IFQCTL_LEN:
1793 		return (sysctl_rdint(oldp, oldlenp, newp, mq_len(mq)));
1794 	case IFQCTL_MAXLEN:
1795 		maxlen = mq->mq_maxlen;
1796 		error = sysctl_int(oldp, oldlenp, newp, newlen, &maxlen);
1797 		if (!error && maxlen != mq->mq_maxlen) {
1798 			mtx_enter(&mq->mq_mtx);
1799 			mq->mq_maxlen = maxlen;
1800 			mtx_leave(&mq->mq_mtx);
1801 		}
1802 		return (error);
1803 	case IFQCTL_DROPS:
1804 		return (sysctl_rdint(oldp, oldlenp, newp, mq_drops(mq)));
1805 	default:
1806 		return (EOPNOTSUPP);
1807 	}
1808 	/* NOTREACHED */
1809 }
1810