xref: /openbsd-src/sys/kern/uipc_mbuf.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: uipc_mbuf.c,v 1.192 2014/07/13 15:52:38 tedu Exp $	*/
2 /*	$NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
33  */
34 
35 /*
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  * 	This product includes software developed by the University of
50  * 	California, Berkeley and its contributors.
51  * 	This product includes software developed at the Information
52  * 	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/kernel.h>
81 #include <sys/syslog.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/pool.h>
85 
86 #include <sys/socket.h>
87 #include <sys/socketvar.h>
88 #include <net/if.h>
89 
90 
91 #include <uvm/uvm_extern.h>
92 
93 #ifdef DDB
94 #include <machine/db_machdep.h>
95 #include <ddb/db_interface.h>
96 #endif
97 
98 struct	mbstat mbstat;		/* mbuf stats */
99 struct	pool mbpool;		/* mbuf pool */
100 struct	pool mtagpool;
101 
102 /* mbuf cluster pools */
103 u_int	mclsizes[] = {
104 	MCLBYTES,	/* must be at slot 0 */
105 	4 * 1024,
106 	8 * 1024,
107 	9 * 1024,
108 	12 * 1024,
109 	16 * 1024,
110 	64 * 1024
111 };
112 static	char mclnames[MCLPOOLS][8];
113 struct	pool mclpools[MCLPOOLS];
114 
115 struct pool *m_clpool(u_int);
116 
117 int max_linkhdr;		/* largest link-level header */
118 int max_protohdr;		/* largest protocol header */
119 int max_hdr;			/* largest link+protocol header */
120 
121 void	m_extfree(struct mbuf *);
122 struct mbuf *m_copym0(struct mbuf *, int, int, int, int);
123 void	nmbclust_update(void);
124 void	m_zero(struct mbuf *);
125 
126 
127 const char *mclpool_warnmsg =
128     "WARNING: mclpools limit reached; increase kern.maxclusters";
129 
130 /*
131  * Initialize the mbuf allocator.
132  */
133 void
134 mbinit(void)
135 {
136 	int i;
137 
138 #if DIAGNOSTIC
139 	if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES)
140 		panic("mbinit: the largest cluster size != MAXMCLBYTES");
141 #endif
142 
143 	pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", NULL);
144 	pool_set_constraints(&mbpool, &kp_dma_contig);
145 	pool_setlowat(&mbpool, mblowat);
146 
147 	pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag),
148 	    0, 0, 0, "mtagpl", NULL);
149 	pool_setipl(&mtagpool, IPL_NET);
150 
151 	for (i = 0; i < nitems(mclsizes); i++) {
152 		snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
153 		    mclsizes[i] >> 10);
154 		pool_init(&mclpools[i], mclsizes[i], 0, 0, 0,
155 		    mclnames[i], NULL);
156 		pool_set_constraints(&mclpools[i], &kp_dma_contig);
157 		pool_setlowat(&mclpools[i], mcllowat);
158 	}
159 
160 	nmbclust_update();
161 }
162 
163 void
164 nmbclust_update(void)
165 {
166 	int i;
167 	/*
168 	 * Set the hard limit on the mclpools to the number of
169 	 * mbuf clusters the kernel is to support.  Log the limit
170 	 * reached message max once a minute.
171 	 */
172 	for (i = 0; i < nitems(mclsizes); i++) {
173 		(void)pool_sethardlimit(&mclpools[i], nmbclust,
174 		    mclpool_warnmsg, 60);
175 		/*
176 		 * XXX this needs to be reconsidered.
177 		 * Setting the high water mark to nmbclust is too high
178 		 * but we need to have enough spare buffers around so that
179 		 * allocations in interrupt context don't fail or mclgeti()
180 		 * drivers may end up with empty rings.
181 		 */
182 		pool_sethiwat(&mclpools[i], nmbclust);
183 	}
184 	pool_sethiwat(&mbpool, nmbclust);
185 }
186 
187 void
188 m_reclaim(void *arg, int flags)
189 {
190 	struct domain *dp;
191 	struct protosw *pr;
192 	int s = splnet();
193 
194 	for (dp = domains; dp; dp = dp->dom_next)
195 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
196 			if (pr->pr_drain)
197 				(*pr->pr_drain)();
198 	mbstat.m_drain++;
199 	splx(s);
200 }
201 
202 /*
203  * Space allocation routines.
204  */
205 struct mbuf *
206 m_get(int nowait, int type)
207 {
208 	struct mbuf *m;
209 	int s;
210 
211 	s = splnet();
212 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
213 	if (m)
214 		mbstat.m_mtypes[type]++;
215 	splx(s);
216 	if (m) {
217 		m->m_type = type;
218 		m->m_next = NULL;
219 		m->m_nextpkt = NULL;
220 		m->m_data = m->m_dat;
221 		m->m_flags = 0;
222 	}
223 	return (m);
224 }
225 
226 /*
227  * ATTN: When changing anything here check m_inithdr() and m_defrag() those
228  * may need to change as well.
229  */
230 struct mbuf *
231 m_gethdr(int nowait, int type)
232 {
233 	struct mbuf *m;
234 	int s;
235 
236 	s = splnet();
237 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
238 	if (m)
239 		mbstat.m_mtypes[type]++;
240 	splx(s);
241 	if (m) {
242 		m->m_type = type;
243 
244 		/* keep in sync with m_inithdr */
245 		m->m_next = NULL;
246 		m->m_nextpkt = NULL;
247 		m->m_data = m->m_pktdat;
248 		m->m_flags = M_PKTHDR;
249 		memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
250 		m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
251 	}
252 	return (m);
253 }
254 
255 struct mbuf *
256 m_inithdr(struct mbuf *m)
257 {
258 	/* keep in sync with m_gethdr */
259 	m->m_next = NULL;
260 	m->m_nextpkt = NULL;
261 	m->m_data = m->m_pktdat;
262 	m->m_flags = M_PKTHDR;
263 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
264 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
265 
266 	return (m);
267 }
268 
269 struct mbuf *
270 m_getclr(int nowait, int type)
271 {
272 	struct mbuf *m;
273 
274 	MGET(m, nowait, type);
275 	if (m == NULL)
276 		return (NULL);
277 	memset(mtod(m, caddr_t), 0, MLEN);
278 	return (m);
279 }
280 
281 struct pool *
282 m_clpool(u_int pktlen)
283 {
284 	struct pool *pp;
285 	int pi;
286 
287 	for (pi = 0; pi < nitems(mclpools); pi++) {
288 		pp = &mclpools[pi];
289 		if (pktlen <= pp->pr_size)
290 			return (pp);
291 	}
292 
293 	return (NULL);
294 }
295 
296 struct mbuf *
297 m_clget(struct mbuf *m, int how, struct ifnet *ifp, u_int pktlen)
298 {
299 	struct mbuf *m0 = NULL;
300 	struct pool *pp;
301 	caddr_t buf;
302 	int s;
303 
304 	pp = m_clpool(pktlen);
305 #ifdef DIAGNOSTIC
306 	if (pp == NULL)
307 		panic("m_clget: request for %u byte cluster", pktlen);
308 #endif
309 
310 	s = splnet();
311 	if (m == NULL) {
312 		MGETHDR(m0, M_DONTWAIT, MT_DATA);
313 		if (m0 == NULL) {
314 			splx(s);
315 			return (NULL);
316 		}
317 		m = m0;
318 	}
319 	buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT);
320 	if (buf == NULL) {
321 		if (m0)
322 			m_freem(m0);
323 		splx(s);
324 		return (NULL);
325 	}
326 	splx(s);
327 
328 	MEXTADD(m, buf, pp->pr_size, M_EXTWR, m_extfree_pool, pp);
329 	return (m);
330 }
331 
332 void
333 m_extfree_pool(caddr_t buf, u_int size, void *pp)
334 {
335 	splassert(IPL_NET);
336 	pool_put(pp, buf);
337 }
338 
339 struct mbuf *
340 m_free_unlocked(struct mbuf *m)
341 {
342 	struct mbuf *n;
343 
344 	mbstat.m_mtypes[m->m_type]--;
345 	n = m->m_next;
346 	if (m->m_flags & M_ZEROIZE) {
347 		m_zero(m);
348 		/* propagate M_ZEROIZE to the next mbuf in the chain */
349 		if (n)
350 			n->m_flags |= M_ZEROIZE;
351 	}
352 	if (m->m_flags & M_PKTHDR)
353 		m_tag_delete_chain(m);
354 	if (m->m_flags & M_EXT)
355 		m_extfree(m);
356 	pool_put(&mbpool, m);
357 
358 	return (n);
359 }
360 
361 struct mbuf *
362 m_free(struct mbuf *m)
363 {
364 	struct mbuf *n;
365 	int s;
366 
367 	s = splnet();
368 	n = m_free_unlocked(m);
369 	splx(s);
370 
371 	return (n);
372 }
373 
374 void
375 m_extfree(struct mbuf *m)
376 {
377 	if (MCLISREFERENCED(m)) {
378 		m->m_ext.ext_nextref->m_ext.ext_prevref =
379 		    m->m_ext.ext_prevref;
380 		m->m_ext.ext_prevref->m_ext.ext_nextref =
381 		    m->m_ext.ext_nextref;
382 	} else if (m->m_ext.ext_free)
383 		(*(m->m_ext.ext_free))(m->m_ext.ext_buf,
384 		    m->m_ext.ext_size, m->m_ext.ext_arg);
385 	else
386 		panic("unknown type of extension buffer");
387 	m->m_ext.ext_size = 0;
388 	m->m_flags &= ~(M_EXT|M_EXTWR);
389 }
390 
391 void
392 m_freem(struct mbuf *m)
393 {
394 	struct mbuf *n;
395 	int s;
396 
397 	if (m == NULL)
398 		return;
399 	s = splnet();
400 	do {
401 		n = m_free_unlocked(m);
402 	} while ((m = n) != NULL);
403 	splx(s);
404 }
405 
406 /*
407  * mbuf chain defragmenter. This function uses some evil tricks to defragment
408  * an mbuf chain into a single buffer without changing the mbuf pointer.
409  * This needs to know a lot of the mbuf internals to make this work.
410  */
411 int
412 m_defrag(struct mbuf *m, int how)
413 {
414 	struct mbuf *m0;
415 
416 	if (m->m_next == NULL)
417 		return (0);
418 
419 #ifdef DIAGNOSTIC
420 	if (!(m->m_flags & M_PKTHDR))
421 		panic("m_defrag: no packet hdr or not a chain");
422 #endif
423 
424 	if ((m0 = m_gethdr(how, m->m_type)) == NULL)
425 		return (ENOBUFS);
426 	if (m->m_pkthdr.len > MHLEN) {
427 		MCLGETI(m0, how, NULL, m->m_pkthdr.len);
428 		if (!(m0->m_flags & M_EXT)) {
429 			m_free(m0);
430 			return (ENOBUFS);
431 		}
432 	}
433 	m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
434 	m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
435 
436 	/* free chain behind and possible ext buf on the first mbuf */
437 	m_freem(m->m_next);
438 	m->m_next = NULL;
439 
440 	if (m->m_flags & M_EXT) {
441 		int s = splnet();
442 		m_extfree(m);
443 		splx(s);
444 	}
445 
446 	/*
447 	 * Bounce copy mbuf over to the original mbuf and set everything up.
448 	 * This needs to reset or clear all pointers that may go into the
449 	 * original mbuf chain.
450 	 */
451 	if (m0->m_flags & M_EXT) {
452 		memcpy(&m->m_ext, &m0->m_ext, sizeof(struct mbuf_ext));
453 		MCLINITREFERENCE(m);
454 		m->m_flags |= m0->m_flags & (M_EXT|M_EXTWR);
455 		m->m_data = m->m_ext.ext_buf;
456 	} else {
457 		m->m_data = m->m_pktdat;
458 		memcpy(m->m_data, m0->m_data, m0->m_len);
459 	}
460 	m->m_pkthdr.len = m->m_len = m0->m_len;
461 
462 	m0->m_flags &= ~(M_EXT|M_EXTWR);	/* cluster is gone */
463 	m_free(m0);
464 
465 	return (0);
466 }
467 
468 /*
469  * Mbuffer utility routines.
470  */
471 
472 /*
473  * Ensure len bytes of contiguous space at the beginning of the mbuf chain
474  */
475 struct mbuf *
476 m_prepend(struct mbuf *m, int len, int how)
477 {
478 	struct mbuf *mn;
479 
480 	if (len > MHLEN)
481 		panic("mbuf prepend length too big");
482 
483 	if (M_LEADINGSPACE(m) >= len) {
484 		m->m_data -= len;
485 		m->m_len += len;
486 	} else {
487 		MGET(mn, how, m->m_type);
488 		if (mn == NULL) {
489 			m_freem(m);
490 			return (NULL);
491 		}
492 		if (m->m_flags & M_PKTHDR)
493 			M_MOVE_PKTHDR(mn, m);
494 		mn->m_next = m;
495 		m = mn;
496 		MH_ALIGN(m, len);
497 		m->m_len = len;
498 	}
499 	if (m->m_flags & M_PKTHDR)
500 		m->m_pkthdr.len += len;
501 	return (m);
502 }
503 
504 /*
505  * Make a copy of an mbuf chain starting "off" bytes from the beginning,
506  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
507  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
508  */
509 struct mbuf *
510 m_copym(struct mbuf *m, int off, int len, int wait)
511 {
512 	return m_copym0(m, off, len, wait, 0);	/* shallow copy on M_EXT */
513 }
514 
515 /*
516  * m_copym2() is like m_copym(), except it COPIES cluster mbufs, instead
517  * of merely bumping the reference count.
518  */
519 struct mbuf *
520 m_copym2(struct mbuf *m, int off, int len, int wait)
521 {
522 	return m_copym0(m, off, len, wait, 1);	/* deep copy */
523 }
524 
525 struct mbuf *
526 m_copym0(struct mbuf *m0, int off, int len, int wait, int deep)
527 {
528 	struct mbuf *m, *n, **np;
529 	struct mbuf *top;
530 	int copyhdr = 0;
531 
532 	if (off < 0 || len < 0)
533 		panic("m_copym0: off %d, len %d", off, len);
534 	if (off == 0 && m0->m_flags & M_PKTHDR)
535 		copyhdr = 1;
536 	if ((m = m_getptr(m0, off, &off)) == NULL)
537 		panic("m_copym0: short mbuf chain");
538 	np = &top;
539 	top = NULL;
540 	while (len > 0) {
541 		if (m == NULL) {
542 			if (len != M_COPYALL)
543 				panic("m_copym0: m == NULL and not COPYALL");
544 			break;
545 		}
546 		MGET(n, wait, m->m_type);
547 		*np = n;
548 		if (n == NULL)
549 			goto nospace;
550 		if (copyhdr) {
551 			if (m_dup_pkthdr(n, m0, wait))
552 				goto nospace;
553 			if (len != M_COPYALL)
554 				n->m_pkthdr.len = len;
555 			copyhdr = 0;
556 		}
557 		n->m_len = min(len, m->m_len - off);
558 		if (m->m_flags & M_EXT) {
559 			if (!deep) {
560 				n->m_data = m->m_data + off;
561 				n->m_ext = m->m_ext;
562 				MCLADDREFERENCE(m, n);
563 			} else {
564 				/*
565 				 * we are unsure about the way m was allocated.
566 				 * copy into multiple MCLBYTES cluster mbufs.
567 				 */
568 				MCLGET(n, wait);
569 				n->m_len = 0;
570 				n->m_len = M_TRAILINGSPACE(n);
571 				n->m_len = min(n->m_len, len);
572 				n->m_len = min(n->m_len, m->m_len - off);
573 				memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
574 				    n->m_len);
575 			}
576 		} else
577 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
578 			    n->m_len);
579 		if (len != M_COPYALL)
580 			len -= n->m_len;
581 		off += n->m_len;
582 #ifdef DIAGNOSTIC
583 		if (off > m->m_len)
584 			panic("m_copym0 overrun");
585 #endif
586 		if (off == m->m_len) {
587 			m = m->m_next;
588 			off = 0;
589 		}
590 		np = &n->m_next;
591 	}
592 	return (top);
593 nospace:
594 	m_freem(top);
595 	return (NULL);
596 }
597 
598 /*
599  * Copy data from an mbuf chain starting "off" bytes from the beginning,
600  * continuing for "len" bytes, into the indicated buffer.
601  */
602 void
603 m_copydata(struct mbuf *m, int off, int len, caddr_t cp)
604 {
605 	unsigned count;
606 
607 	if (off < 0)
608 		panic("m_copydata: off %d < 0", off);
609 	if (len < 0)
610 		panic("m_copydata: len %d < 0", len);
611 	if ((m = m_getptr(m, off, &off)) == NULL)
612 		panic("m_copydata: short mbuf chain");
613 	while (len > 0) {
614 		if (m == NULL)
615 			panic("m_copydata: null mbuf");
616 		count = min(m->m_len - off, len);
617 		bcopy(mtod(m, caddr_t) + off, cp, count);
618 		len -= count;
619 		cp += count;
620 		off = 0;
621 		m = m->m_next;
622 	}
623 }
624 
625 /*
626  * Copy data from a buffer back into the indicated mbuf chain,
627  * starting "off" bytes from the beginning, extending the mbuf
628  * chain if necessary. The mbuf needs to be properly initialized
629  * including the setting of m_len.
630  */
631 int
632 m_copyback(struct mbuf *m0, int off, int len, const void *_cp, int wait)
633 {
634 	int mlen, totlen = 0;
635 	struct mbuf *m = m0, *n;
636 	caddr_t cp = (caddr_t)_cp;
637 	int error = 0;
638 
639 	if (m0 == NULL)
640 		return (0);
641 	while (off > (mlen = m->m_len)) {
642 		off -= mlen;
643 		totlen += mlen;
644 		if (m->m_next == NULL) {
645 			if ((n = m_get(wait, m->m_type)) == NULL) {
646 				error = ENOBUFS;
647 				goto out;
648 			}
649 
650 			if (off + len > MLEN) {
651 				MCLGETI(n, wait, NULL, off + len);
652 				if (!(n->m_flags & M_EXT)) {
653 					m_free(n);
654 					error = ENOBUFS;
655 					goto out;
656 				}
657 			}
658 			memset(mtod(n, caddr_t), 0, off);
659 			n->m_len = len + off;
660 			m->m_next = n;
661 		}
662 		m = m->m_next;
663 	}
664 	while (len > 0) {
665 		/* extend last packet to be filled fully */
666 		if (m->m_next == NULL && (len > m->m_len - off))
667 			m->m_len += min(len - (m->m_len - off),
668 			    M_TRAILINGSPACE(m));
669 		mlen = min(m->m_len - off, len);
670 		bcopy(cp, mtod(m, caddr_t) + off, (size_t)mlen);
671 		cp += mlen;
672 		len -= mlen;
673 		totlen += mlen + off;
674 		if (len == 0)
675 			break;
676 		off = 0;
677 
678 		if (m->m_next == NULL) {
679 			if ((n = m_get(wait, m->m_type)) == NULL) {
680 				error = ENOBUFS;
681 				goto out;
682 			}
683 
684 			if (len > MLEN) {
685 				MCLGETI(n, wait, NULL, len);
686 				if (!(n->m_flags & M_EXT)) {
687 					m_free(n);
688 					error = ENOBUFS;
689 					goto out;
690 				}
691 			}
692 			n->m_len = len;
693 			m->m_next = n;
694 		}
695 		m = m->m_next;
696 	}
697 out:
698 	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
699 		m->m_pkthdr.len = totlen;
700 
701 	return (error);
702 }
703 
704 /*
705  * Concatenate mbuf chain n to m.
706  * n might be copied into m (when n->m_len is small), therefore data portion of
707  * n could be copied into an mbuf of different mbuf type.
708  * Therefore both chains should be of the same type (e.g. MT_DATA).
709  * Any m_pkthdr is not updated.
710  */
711 void
712 m_cat(struct mbuf *m, struct mbuf *n)
713 {
714 	while (m->m_next)
715 		m = m->m_next;
716 	while (n) {
717 		if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) {
718 			/* just join the two chains */
719 			m->m_next = n;
720 			return;
721 		}
722 		/* splat the data from one into the other */
723 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
724 		    (u_int)n->m_len);
725 		m->m_len += n->m_len;
726 		n = m_free(n);
727 	}
728 }
729 
730 void
731 m_adj(struct mbuf *mp, int req_len)
732 {
733 	int len = req_len;
734 	struct mbuf *m;
735 	int count;
736 
737 	if ((m = mp) == NULL)
738 		return;
739 	if (len >= 0) {
740 		/*
741 		 * Trim from head.
742 		 */
743 		while (m != NULL && len > 0) {
744 			if (m->m_len <= len) {
745 				len -= m->m_len;
746 				m->m_len = 0;
747 				m = m->m_next;
748 			} else {
749 				m->m_len -= len;
750 				m->m_data += len;
751 				len = 0;
752 			}
753 		}
754 		if (mp->m_flags & M_PKTHDR)
755 			mp->m_pkthdr.len -= (req_len - len);
756 	} else {
757 		/*
758 		 * Trim from tail.  Scan the mbuf chain,
759 		 * calculating its length and finding the last mbuf.
760 		 * If the adjustment only affects this mbuf, then just
761 		 * adjust and return.  Otherwise, rescan and truncate
762 		 * after the remaining size.
763 		 */
764 		len = -len;
765 		count = 0;
766 		for (;;) {
767 			count += m->m_len;
768 			if (m->m_next == NULL)
769 				break;
770 			m = m->m_next;
771 		}
772 		if (m->m_len >= len) {
773 			m->m_len -= len;
774 			if (mp->m_flags & M_PKTHDR)
775 				mp->m_pkthdr.len -= len;
776 			return;
777 		}
778 		count -= len;
779 		if (count < 0)
780 			count = 0;
781 		/*
782 		 * Correct length for chain is "count".
783 		 * Find the mbuf with last data, adjust its length,
784 		 * and toss data from remaining mbufs on chain.
785 		 */
786 		m = mp;
787 		if (m->m_flags & M_PKTHDR)
788 			m->m_pkthdr.len = count;
789 		for (; m; m = m->m_next) {
790 			if (m->m_len >= count) {
791 				m->m_len = count;
792 				break;
793 			}
794 			count -= m->m_len;
795 		}
796 		while ((m = m->m_next) != NULL)
797 			m->m_len = 0;
798 	}
799 }
800 
801 /*
802  * Rearrange an mbuf chain so that len bytes are contiguous
803  * and in the data area of an mbuf (so that mtod will work
804  * for a structure of size len).  Returns the resulting
805  * mbuf chain on success, frees it and returns null on failure.
806  */
807 struct mbuf *
808 m_pullup(struct mbuf *n, int len)
809 {
810 	struct mbuf *m;
811 	int count;
812 
813 	/*
814 	 * If first mbuf has no cluster, and has room for len bytes
815 	 * without shifting current data, pullup into it,
816 	 * otherwise allocate a new mbuf to prepend to the chain.
817 	 */
818 	if ((n->m_flags & M_EXT) == 0 && n->m_next &&
819 	    n->m_data + len < &n->m_dat[MLEN]) {
820 		if (n->m_len >= len)
821 			return (n);
822 		m = n;
823 		n = n->m_next;
824 		len -= m->m_len;
825 	} else if ((n->m_flags & M_EXT) != 0 && len > MHLEN && n->m_next &&
826 	    n->m_data + len < &n->m_ext.ext_buf[n->m_ext.ext_size]) {
827 		if (n->m_len >= len)
828 			return (n);
829 		m = n;
830 		n = n->m_next;
831 		len -= m->m_len;
832 	} else {
833 		if (len > MAXMCLBYTES)
834 			goto bad;
835 		MGET(m, M_DONTWAIT, n->m_type);
836 		if (m == NULL)
837 			goto bad;
838 		if (len > MHLEN) {
839 			MCLGETI(m, M_DONTWAIT, NULL, len);
840 			if ((m->m_flags & M_EXT) == 0) {
841 				m_free(m);
842 				goto bad;
843 			}
844 		}
845 		m->m_len = 0;
846 		if (n->m_flags & M_PKTHDR)
847 			M_MOVE_PKTHDR(m, n);
848 	}
849 
850 	do {
851 		count = min(len, n->m_len);
852 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
853 		    (unsigned)count);
854 		len -= count;
855 		m->m_len += count;
856 		n->m_len -= count;
857 		if (n->m_len)
858 			n->m_data += count;
859 		else
860 			n = m_free(n);
861 	} while (len > 0 && n);
862 	if (len > 0) {
863 		(void)m_free(m);
864 		goto bad;
865 	}
866 	m->m_next = n;
867 
868 	return (m);
869 bad:
870 	m_freem(n);
871 	return (NULL);
872 }
873 
874 /*
875  * Return a pointer to mbuf/offset of location in mbuf chain.
876  */
877 struct mbuf *
878 m_getptr(struct mbuf *m, int loc, int *off)
879 {
880 	while (loc >= 0) {
881 		/* Normal end of search */
882 		if (m->m_len > loc) {
883 	    		*off = loc;
884 	    		return (m);
885 		} else {
886 	    		loc -= m->m_len;
887 
888 	    		if (m->m_next == NULL) {
889 				if (loc == 0) {
890  					/* Point at the end of valid data */
891 		    			*off = m->m_len;
892 		    			return (m);
893 				} else {
894 		  			return (NULL);
895 				}
896 	    		} else {
897 				m = m->m_next;
898 			}
899 		}
900     	}
901 
902 	return (NULL);
903 }
904 
905 /*
906  * Inject a new mbuf chain of length siz in mbuf chain m0 at
907  * position len0. Returns a pointer to the first injected mbuf, or
908  * NULL on failure (m0 is left undisturbed). Note that if there is
909  * enough space for an object of size siz in the appropriate position,
910  * no memory will be allocated. Also, there will be no data movement in
911  * the first len0 bytes (pointers to that will remain valid).
912  *
913  * XXX It is assumed that siz is less than the size of an mbuf at the moment.
914  */
915 struct mbuf *
916 m_inject(struct mbuf *m0, int len0, int siz, int wait)
917 {
918 	struct mbuf *m, *n, *n2 = NULL, *n3;
919 	unsigned len = len0, remain;
920 
921 	if ((siz >= MHLEN) || (len0 <= 0))
922 		return (NULL);
923 	for (m = m0; m && len > m->m_len; m = m->m_next)
924 		len -= m->m_len;
925 	if (m == NULL)
926 		return (NULL);
927 	remain = m->m_len - len;
928 	if (remain == 0) {
929 		if ((m->m_next) && (M_LEADINGSPACE(m->m_next) >= siz)) {
930 			m->m_next->m_len += siz;
931 			if (m0->m_flags & M_PKTHDR)
932 				m0->m_pkthdr.len += siz;
933 			m->m_next->m_data -= siz;
934 			return m->m_next;
935 		}
936 	} else {
937 		n2 = m_copym2(m, len, remain, wait);
938 		if (n2 == NULL)
939 			return (NULL);
940 	}
941 
942 	MGET(n, wait, MT_DATA);
943 	if (n == NULL) {
944 		if (n2)
945 			m_freem(n2);
946 		return (NULL);
947 	}
948 
949 	n->m_len = siz;
950 	if (m0->m_flags & M_PKTHDR)
951 		m0->m_pkthdr.len += siz;
952 	m->m_len -= remain; /* Trim */
953 	if (n2)	{
954 		for (n3 = n; n3->m_next != NULL; n3 = n3->m_next)
955 			;
956 		n3->m_next = n2;
957 	} else
958 		n3 = n;
959 	for (; n3->m_next != NULL; n3 = n3->m_next)
960 		;
961 	n3->m_next = m->m_next;
962 	m->m_next = n;
963 	return n;
964 }
965 
966 /*
967  * Partition an mbuf chain in two pieces, returning the tail --
968  * all but the first len0 bytes.  In case of failure, it returns NULL and
969  * attempts to restore the chain to its original state.
970  */
971 struct mbuf *
972 m_split(struct mbuf *m0, int len0, int wait)
973 {
974 	struct mbuf *m, *n;
975 	unsigned len = len0, remain, olen;
976 
977 	for (m = m0; m && len > m->m_len; m = m->m_next)
978 		len -= m->m_len;
979 	if (m == NULL)
980 		return (NULL);
981 	remain = m->m_len - len;
982 	if (m0->m_flags & M_PKTHDR) {
983 		MGETHDR(n, wait, m0->m_type);
984 		if (n == NULL)
985 			return (NULL);
986 		if (m_dup_pkthdr(n, m0, wait)) {
987 			m_freem(n);
988 			return (NULL);
989 		}
990 		n->m_pkthdr.len -= len0;
991 		olen = m0->m_pkthdr.len;
992 		m0->m_pkthdr.len = len0;
993 		if (m->m_flags & M_EXT)
994 			goto extpacket;
995 		if (remain > MHLEN) {
996 			/* m can't be the lead packet */
997 			MH_ALIGN(n, 0);
998 			n->m_next = m_split(m, len, wait);
999 			if (n->m_next == NULL) {
1000 				(void) m_free(n);
1001 				m0->m_pkthdr.len = olen;
1002 				return (NULL);
1003 			} else
1004 				return (n);
1005 		} else
1006 			MH_ALIGN(n, remain);
1007 	} else if (remain == 0) {
1008 		n = m->m_next;
1009 		m->m_next = NULL;
1010 		return (n);
1011 	} else {
1012 		MGET(n, wait, m->m_type);
1013 		if (n == NULL)
1014 			return (NULL);
1015 		M_ALIGN(n, remain);
1016 	}
1017 extpacket:
1018 	if (m->m_flags & M_EXT) {
1019 		n->m_ext = m->m_ext;
1020 		MCLADDREFERENCE(m, n);
1021 		n->m_data = m->m_data + len;
1022 	} else {
1023 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
1024 	}
1025 	n->m_len = remain;
1026 	m->m_len = len;
1027 	n->m_next = m->m_next;
1028 	m->m_next = NULL;
1029 	return (n);
1030 }
1031 
1032 /*
1033  * Routine to copy from device local memory into mbufs.
1034  */
1035 struct mbuf *
1036 m_devget(char *buf, int totlen, int off, struct ifnet *ifp)
1037 {
1038 	struct mbuf	*m;
1039 	struct mbuf	*top, **mp;
1040 	int		 len;
1041 
1042 	top = NULL;
1043 	mp = &top;
1044 
1045 	if (off < 0 || off > MHLEN)
1046 		return (NULL);
1047 
1048 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1049 	if (m == NULL)
1050 		return (NULL);
1051 
1052 	m->m_pkthdr.rcvif = ifp;
1053 	m->m_pkthdr.len = totlen;
1054 
1055 	len = MHLEN;
1056 
1057 	while (totlen > 0) {
1058 		if (top != NULL) {
1059 			MGET(m, M_DONTWAIT, MT_DATA);
1060 			if (m == NULL) {
1061 				/*
1062 				 * As we might get called by pfkey, make sure
1063 				 * we do not leak sensitive data.
1064 				 */
1065 				top->m_flags |= M_ZEROIZE;
1066 				m_freem(top);
1067 				return (NULL);
1068 			}
1069 			len = MLEN;
1070 		}
1071 
1072 		if (totlen + off >= MINCLSIZE) {
1073 			MCLGET(m, M_DONTWAIT);
1074 			if (m->m_flags & M_EXT)
1075 				len = MCLBYTES;
1076 		} else {
1077 			/* Place initial small packet/header at end of mbuf. */
1078 			if (top == NULL && totlen + off + max_linkhdr <= len) {
1079 				m->m_data += max_linkhdr;
1080 				len -= max_linkhdr;
1081 			}
1082 		}
1083 
1084 		if (off) {
1085 			m->m_data += off;
1086 			len -= off;
1087 			off = 0;
1088 		}
1089 
1090 		m->m_len = len = min(totlen, len);
1091 		memcpy(mtod(m, void *), buf, (size_t)len);
1092 
1093 		buf += len;
1094 		*mp = m;
1095 		mp = &m->m_next;
1096 		totlen -= len;
1097 	}
1098 	return (top);
1099 }
1100 
1101 void
1102 m_zero(struct mbuf *m)
1103 {
1104 #ifdef DIAGNOSTIC
1105 	if (M_READONLY(m))
1106 		panic("m_zero: M_READONLY");
1107 #endif /* DIAGNOSTIC */
1108 
1109 	if (m->m_flags & M_EXT)
1110 		explicit_bzero(m->m_ext.ext_buf, m->m_ext.ext_size);
1111 	else {
1112 		if (m->m_flags & M_PKTHDR)
1113 			explicit_bzero(m->m_pktdat, MHLEN);
1114 		else
1115 			explicit_bzero(m->m_dat, MLEN);
1116 	}
1117 }
1118 
1119 /*
1120  * Apply function f to the data in an mbuf chain starting "off" bytes from the
1121  * beginning, continuing for "len" bytes.
1122  */
1123 int
1124 m_apply(struct mbuf *m, int off, int len,
1125     int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate)
1126 {
1127 	int rval;
1128 	unsigned int count;
1129 
1130 	if (len < 0)
1131 		panic("m_apply: len %d < 0", len);
1132 	if (off < 0)
1133 		panic("m_apply: off %d < 0", off);
1134 	while (off > 0) {
1135 		if (m == NULL)
1136 			panic("m_apply: null mbuf in skip");
1137 		if (off < m->m_len)
1138 			break;
1139 		off -= m->m_len;
1140 		m = m->m_next;
1141 	}
1142 	while (len > 0) {
1143 		if (m == NULL)
1144 			panic("m_apply: null mbuf");
1145 		count = min(m->m_len - off, len);
1146 
1147 		rval = f(fstate, mtod(m, caddr_t) + off, count);
1148 		if (rval)
1149 			return (rval);
1150 
1151 		len -= count;
1152 		off = 0;
1153 		m = m->m_next;
1154 	}
1155 
1156 	return (0);
1157 }
1158 
1159 int
1160 m_leadingspace(struct mbuf *m)
1161 {
1162 	if (M_READONLY(m))
1163 		return 0;
1164 	return (m->m_flags & M_EXT ? m->m_data - m->m_ext.ext_buf :
1165 	    m->m_flags & M_PKTHDR ? m->m_data - m->m_pktdat :
1166 	    m->m_data - m->m_dat);
1167 }
1168 
1169 int
1170 m_trailingspace(struct mbuf *m)
1171 {
1172 	if (M_READONLY(m))
1173 		return 0;
1174 	return (m->m_flags & M_EXT ? m->m_ext.ext_buf +
1175 	    m->m_ext.ext_size - (m->m_data + m->m_len) :
1176 	    &m->m_dat[MLEN] - (m->m_data + m->m_len));
1177 }
1178 
1179 
1180 /*
1181  * Duplicate mbuf pkthdr from from to to.
1182  * from must have M_PKTHDR set, and to must be empty.
1183  */
1184 int
1185 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int wait)
1186 {
1187 	int error;
1188 
1189 	KASSERT(from->m_flags & M_PKTHDR);
1190 
1191 	to->m_flags = (to->m_flags & (M_EXT | M_EXTWR));
1192 	to->m_flags |= (from->m_flags & M_COPYFLAGS);
1193 	to->m_pkthdr = from->m_pkthdr;
1194 
1195 	SLIST_INIT(&to->m_pkthdr.tags);
1196 
1197 	if ((error = m_tag_copy_chain(to, from, wait)) != 0)
1198 		return (error);
1199 
1200 	if ((to->m_flags & M_EXT) == 0)
1201 		to->m_data = to->m_pktdat;
1202 
1203 	return (0);
1204 }
1205 
1206 #ifdef DDB
1207 void
1208 m_print(void *v,
1209     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1210 {
1211 	struct mbuf *m = v;
1212 
1213 	(*pr)("mbuf %p\n", m);
1214 	(*pr)("m_type: %i\tm_flags: %b\n", m->m_type, m->m_flags, M_BITS);
1215 	(*pr)("m_next: %p\tm_nextpkt: %p\n", m->m_next, m->m_nextpkt);
1216 	(*pr)("m_data: %p\tm_len: %u\n", m->m_data, m->m_len);
1217 	(*pr)("m_dat: %p\tm_pktdat: %p\n", m->m_dat, m->m_pktdat);
1218 	if (m->m_flags & M_PKTHDR) {
1219 		(*pr)("m_ptkhdr.rcvif: %p\tm_pkthdr.len: %i\n",
1220 		    m->m_pkthdr.rcvif, m->m_pkthdr.len);
1221 		(*pr)("m_ptkhdr.tags: %p\tm_pkthdr.tagsset: %b\n",
1222 		    SLIST_FIRST(&m->m_pkthdr.tags),
1223 		    m->m_pkthdr.tagsset, MTAG_BITS);
1224 		(*pr)("m_pkthdr.csum_flags: %b\n",
1225 		    m->m_pkthdr.csum_flags, MCS_BITS);
1226 		(*pr)("m_pkthdr.ether_vtag: %u\tm_ptkhdr.ph_rtableid: %u\n",
1227 		    m->m_pkthdr.ether_vtag, m->m_pkthdr.ph_rtableid);
1228 		(*pr)("m_pkthdr.pf.statekey: %p\tm_pkthdr.pf.inp %p\n",
1229 		    m->m_pkthdr.pf.statekey, m->m_pkthdr.pf.inp);
1230 		(*pr)("m_pkthdr.pf.qid: %u\tm_pkthdr.pf.tag: %u\n",
1231 		    m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag);
1232 		(*pr)("m_pkthdr.pf.flags: %b\n",
1233 		    m->m_pkthdr.pf.flags, MPF_BITS);
1234 		(*pr)("m_pkthdr.pf.routed: %u\tm_pkthdr.pf.prio: %u\n",
1235 		    m->m_pkthdr.pf.routed, m->m_pkthdr.pf.prio);
1236 	}
1237 	if (m->m_flags & M_EXT) {
1238 		(*pr)("m_ext.ext_buf: %p\tm_ext.ext_size: %u\n",
1239 		    m->m_ext.ext_buf, m->m_ext.ext_size);
1240 		(*pr)("m_ext.ext_free: %p\tm_ext.ext_arg: %p\n",
1241 		    m->m_ext.ext_free, m->m_ext.ext_arg);
1242 		(*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n",
1243 		    m->m_ext.ext_nextref, m->m_ext.ext_prevref);
1244 	}
1245 }
1246 #endif
1247