xref: /openbsd-src/sys/kern/uipc_mbuf.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: uipc_mbuf.c,v 1.119 2009/03/02 23:52:18 dlg Exp $	*/
2 /*	$NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
33  */
34 
35 /*
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  * 	This product includes software developed by the University of
50  * 	California, Berkeley and its contributors.
51  * 	This product includes software developed at the Information
52  * 	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/malloc.h>
79 #define MBTYPES
80 #include <sys/mbuf.h>
81 #include <sys/kernel.h>
82 #include <sys/syslog.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/pool.h>
86 
87 #include <sys/socket.h>
88 #include <sys/socketvar.h>
89 #include <net/if.h>
90 
91 #include <machine/cpu.h>
92 
93 #include <uvm/uvm_extern.h>
94 
95 struct	mbstat mbstat;		/* mbuf stats */
96 struct	pool mbpool;		/* mbuf pool */
97 
98 /* mbuf cluster pools */
99 u_int	mclsizes[] = {
100 	MCLBYTES,	/* must be at slot 0 */
101 	4 * 1024,
102 	8 * 1024,
103 	9 * 1024,
104 	12 * 1024,
105 	16 * 1024,
106 	64 * 1024
107 };
108 static	char mclnames[MCLPOOLS][8];
109 struct	pool mclpools[MCLPOOLS];
110 
111 int	m_clpool(u_int);
112 
113 int max_linkhdr;		/* largest link-level header */
114 int max_protohdr;		/* largest protocol header */
115 int max_hdr;			/* largest link+protocol header */
116 int max_datalen;		/* MHLEN - max_hdr */
117 
118 void	m_extfree(struct mbuf *);
119 struct mbuf *m_copym0(struct mbuf *, int, int, int, int);
120 void	nmbclust_update(void);
121 
122 
123 const char *mclpool_warnmsg =
124     "WARNING: mclpools limit reached; increase kern.maxclusters";
125 
126 /*
127  * Initialize the mbuf allocator.
128  */
129 void
130 mbinit(void)
131 {
132 	int i;
133 
134 	pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", NULL);
135 	pool_setlowat(&mbpool, mblowat);
136 
137 	for (i = 0; i < nitems(mclsizes); i++) {
138 		snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
139 		    mclsizes[i] >> 10);
140 		pool_init(&mclpools[i], mclsizes[i], 0, 0, 0, mclnames[i],
141 		    NULL);
142 		pool_setlowat(&mclpools[i], mcllowat);
143 	}
144 
145 	nmbclust_update();
146 }
147 
148 void
149 nmbclust_update(void)
150 {
151 	int i;
152 	/*
153 	 * Set the hard limit on the mclpools to the number of
154 	 * mbuf clusters the kernel is to support.  Log the limit
155 	 * reached message max once a minute.
156 	 */
157 	for (i = 0; i < nitems(mclsizes); i++) {
158 		(void)pool_sethardlimit(&mclpools[i], nmbclust,
159 		    mclpool_warnmsg, 60);
160 	}
161 	pool_sethiwat(&mbpool, nmbclust);
162 }
163 
164 void
165 m_reclaim(void *arg, int flags)
166 {
167 	struct domain *dp;
168 	struct protosw *pr;
169 	int s = splnet();
170 
171 	for (dp = domains; dp; dp = dp->dom_next)
172 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
173 			if (pr->pr_drain)
174 				(*pr->pr_drain)();
175 	mbstat.m_drain++;
176 	splx(s);
177 }
178 
179 /*
180  * Space allocation routines.
181  */
182 struct mbuf *
183 m_get(int nowait, int type)
184 {
185 	struct mbuf *m;
186 	int s;
187 
188 	s = splnet();
189 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : 0);
190 	if (m)
191 		mbstat.m_mtypes[type]++;
192 	splx(s);
193 	if (m) {
194 		m->m_type = type;
195 		m->m_next = (struct mbuf *)NULL;
196 		m->m_nextpkt = (struct mbuf *)NULL;
197 		m->m_data = m->m_dat;
198 		m->m_flags = 0;
199 	}
200 	return (m);
201 }
202 
203 /*
204  * ATTN: When changing anything here check m_inithdr() and m_defrag() those
205  * may need to change as well.
206  */
207 struct mbuf *
208 m_gethdr(int nowait, int type)
209 {
210 	struct mbuf *m;
211 	int s;
212 
213 	s = splnet();
214 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : 0);
215 	if (m)
216 		mbstat.m_mtypes[type]++;
217 	splx(s);
218 	if (m) {
219 		m->m_type = type;
220 
221 		/* keep in sync with m_inithdr */
222 		m->m_next = (struct mbuf *)NULL;
223 		m->m_nextpkt = (struct mbuf *)NULL;
224 		m->m_data = m->m_pktdat;
225 		m->m_flags = M_PKTHDR;
226 		m->m_pkthdr.rcvif = NULL;
227 		SLIST_INIT(&m->m_pkthdr.tags);
228 		m->m_pkthdr.csum_flags = 0;
229 		m->m_pkthdr.ether_vtag = 0;
230 		m->m_pkthdr.pf.hdr = NULL;
231 		m->m_pkthdr.pf.statekey = NULL;
232 		m->m_pkthdr.pf.rtableid = 0;
233 		m->m_pkthdr.pf.qid = 0;
234 		m->m_pkthdr.pf.tag = 0;
235 		m->m_pkthdr.pf.flags = 0;
236 		m->m_pkthdr.pf.routed = 0;
237 	}
238 	return (m);
239 }
240 
241 struct mbuf *
242 m_inithdr(struct mbuf *m)
243 {
244 	/* keep in sync with m_gethdr */
245 	m->m_next = (struct mbuf *)NULL;
246 	m->m_nextpkt = (struct mbuf *)NULL;
247 	m->m_data = m->m_pktdat;
248 	m->m_flags = M_PKTHDR;
249 	m->m_pkthdr.rcvif = NULL;
250 	SLIST_INIT(&m->m_pkthdr.tags);
251 	m->m_pkthdr.csum_flags = 0;
252 	m->m_pkthdr.ether_vtag = 0;
253 	m->m_pkthdr.pf.hdr = NULL;
254 	m->m_pkthdr.pf.statekey = NULL;
255 	m->m_pkthdr.pf.rtableid = 0;
256 	m->m_pkthdr.pf.qid = 0;
257 	m->m_pkthdr.pf.tag = 0;
258 	m->m_pkthdr.pf.flags = 0;
259 	m->m_pkthdr.pf.routed = 0;
260 
261 	return (m);
262 }
263 
264 struct mbuf *
265 m_getclr(int nowait, int type)
266 {
267 	struct mbuf *m;
268 
269 	MGET(m, nowait, type);
270 	if (m == NULL)
271 		return (NULL);
272 	memset(mtod(m, caddr_t), 0, MLEN);
273 	return (m);
274 }
275 
276 int
277 m_clpool(u_int pktlen)
278 {
279 	int pi;
280 
281 	for (pi = 0; pi < MCLPOOLS; pi++) {
282                 if (pktlen <= mclsizes[pi])
283 			return (pi);
284 	}
285 
286 	return (-1);
287 }
288 
289 void
290 m_clinitifp(struct ifnet *ifp)
291 {
292 	struct mclpool *mclp = ifp->if_data.ifi_mclpool;
293 	int i;
294 
295 	/* Initialize high water marks for use of cluster pools */
296 	for (i = 0; i < MCLPOOLS; i++) {
297 		mclp = &ifp->if_data.ifi_mclpool[i];
298 
299 		if (mclp->mcl_lwm == 0)
300 			mclp->mcl_lwm = 2;
301 		if (mclp->mcl_hwm == 0)
302 			mclp->mcl_hwm = 32768;
303 
304 		mclp->mcl_cwm = MAX(4, mclp->mcl_lwm);
305 	}
306 }
307 
308 void
309 m_clsetwms(struct ifnet *ifp, u_int pktlen, u_int lwm, u_int hwm)
310 {
311 	int pi;
312 
313 	pi = m_clpool(pktlen);
314 	if (pi == -1)
315 		return;
316 
317 	ifp->if_data.ifi_mclpool[pi].mcl_lwm = lwm;
318 	ifp->if_data.ifi_mclpool[pi].mcl_hwm = hwm;
319 }
320 
321 extern int m_clticks;
322 int m_livelock;
323 
324 int
325 m_cldrop(struct ifnet *ifp, int pi)
326 {
327 	static int liveticks;
328 	struct mclpool *mclp;
329 	extern int ticks;
330 	int i;
331 
332 	if (m_livelock == 0 && ticks - m_clticks > 2) {
333 		struct ifnet *aifp;
334 
335 		/*
336 		 * Timeout did not run, so we are in some kind of livelock.
337 		 * Decrease the cluster allocation high water marks on all
338 		 * interfaces and prevent them from growth for the very near
339 		 * future.
340 		 */
341 		m_livelock = 1;
342 		ifp->if_data.ifi_livelocks++;
343 		liveticks = ticks;
344 		TAILQ_FOREACH(aifp, &ifnet, if_list) {
345 			mclp = aifp->if_data.ifi_mclpool;
346 			for (i = 0; i < MCLPOOLS; i++) {
347 				mclp[i].mcl_cwm =
348 				    max(mclp[i].mcl_cwm / 2, mclp[i].mcl_lwm);
349 			}
350 		}
351 	} else if (m_livelock && ticks - liveticks > 5)
352 		m_livelock = 0;	/* Let the high water marks grow again */
353 
354 	mclp = &ifp->if_data.ifi_mclpool[pi];
355 	if (m_livelock == 0 && ISSET(ifp->if_flags, IFF_RUNNING) &&
356 	    mclp->mcl_alive <= 2 && mclp->mcl_cwm < mclp->mcl_hwm) {
357 		/* About to run out, so increase the current watermark */
358 		mclp->mcl_cwm++;
359 	} else if (mclp->mcl_alive >= mclp->mcl_cwm)
360 		return (1);		/* No more packets given */
361 
362 	return (0);
363 }
364 
365 void
366 m_clcount(struct ifnet *ifp, int pi)
367 {
368 	ifp->if_data.ifi_mclpool[pi].mcl_alive++;
369 }
370 
371 void
372 m_cluncount(struct mbuf *m, int all)
373 {
374 	struct mbuf_ext *me;
375 
376 	do {
377 		me = &m->m_ext;
378 		if (((m->m_flags & (M_EXT|M_CLUSTER)) != (M_EXT|M_CLUSTER)) ||
379 		    (me->ext_ifp == NULL))
380 			continue;
381 
382 		me->ext_ifp->if_data.ifi_mclpool[me->ext_backend].mcl_alive--;
383 		me->ext_ifp = NULL;
384 	} while (all && (m = m->m_next));
385 }
386 
387 void
388 m_clget(struct mbuf *m, int how, struct ifnet *ifp, u_int pktlen)
389 {
390 	int pi;
391 	int s;
392 
393 	pi = m_clpool(pktlen);
394 #ifdef DIAGNOSTIC
395 	if (pi == -1)
396 		panic("m_clget: request for %u byte cluster", pktlen);
397 #endif
398 
399 	if (ifp != NULL && m_cldrop(ifp, pi))
400 		return;
401 
402 	s = splnet();
403 	m->m_ext.ext_buf = pool_get(&mclpools[pi],
404 	    how == M_WAIT ? PR_WAITOK : 0);
405 	splx(s);
406 	if (m->m_ext.ext_buf != NULL) {
407 		m->m_data = m->m_ext.ext_buf;
408 		m->m_flags |= M_EXT|M_CLUSTER;
409 		m->m_ext.ext_size = mclpools[pi].pr_size;
410 		m->m_ext.ext_free = NULL;
411 		m->m_ext.ext_arg = NULL;
412 
413 		m->m_ext.ext_backend = pi;
414 		m->m_ext.ext_ifp = ifp;
415 		if (ifp != NULL)
416 			m_clcount(ifp, pi);
417 
418 		MCLINITREFERENCE(m);
419 	}
420 }
421 
422 struct mbuf *
423 m_free(struct mbuf *m)
424 {
425 	struct mbuf *n;
426 	int s;
427 
428 	s = splnet();
429 	mbstat.m_mtypes[m->m_type]--;
430 	if (m->m_flags & M_PKTHDR)
431 		m_tag_delete_chain(m);
432 	if (m->m_flags & M_EXT)
433 		m_extfree(m);
434 	m->m_flags = 0;
435 	n = m->m_next;
436 	pool_put(&mbpool, m);
437 	splx(s);
438 
439 	return (n);
440 }
441 
442 void
443 m_extfree(struct mbuf *m)
444 {
445 	if (MCLISREFERENCED(m)) {
446 		m->m_ext.ext_nextref->m_ext.ext_prevref =
447 		    m->m_ext.ext_prevref;
448 		m->m_ext.ext_prevref->m_ext.ext_nextref =
449 		    m->m_ext.ext_nextref;
450 	} else if (m->m_flags & M_CLUSTER) {
451 		m_cluncount(m, 0);
452 		pool_put(&mclpools[m->m_ext.ext_backend],
453 		    m->m_ext.ext_buf);
454 	} else if (m->m_ext.ext_free)
455 		(*(m->m_ext.ext_free))(m->m_ext.ext_buf,
456 		    m->m_ext.ext_size, m->m_ext.ext_arg);
457 	else
458 		panic("unknown type of extension buffer");
459 	m->m_ext.ext_size = 0;
460 	m->m_flags &= ~(M_EXT|M_CLUSTER);
461 }
462 
463 void
464 m_freem(struct mbuf *m)
465 {
466 	struct mbuf *n;
467 
468 	if (m == NULL)
469 		return;
470 	do {
471 		MFREE(m, n);
472 	} while ((m = n) != NULL);
473 }
474 
475 /*
476  * mbuf chain defragmenter. This function uses some evil tricks to defragment
477  * an mbuf chain into a single buffer without changing the mbuf pointer.
478  * This needs to know a lot of the mbuf internals to make this work.
479  */
480 int
481 m_defrag(struct mbuf *m, int how)
482 {
483 	struct mbuf *m0;
484 
485 	if (m->m_next == NULL)
486 		return 0;
487 
488 #ifdef DIAGNOSTIC
489 	if (!(m->m_flags & M_PKTHDR))
490 		panic("m_defrag: no packet hdr or not a chain");
491 #endif
492 
493 	if ((m0 = m_gethdr(how, m->m_type)) == NULL)
494 		return -1;
495 	if (m->m_pkthdr.len > MHLEN) {
496 		MCLGETI(m0, how, NULL, m->m_pkthdr.len);
497 		if (!(m0->m_flags & M_EXT)) {
498 			m_free(m0);
499 			return -1;
500 		}
501 	}
502 	m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
503 	m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
504 
505 	/* free chain behind and possible ext buf on the first mbuf */
506 	m_freem(m->m_next);
507 	m->m_next = NULL;
508 
509 	if (m->m_flags & M_EXT) {
510 		int s = splnet();
511 		m_extfree(m);
512 		splx(s);
513 	}
514 
515 	/*
516 	 * Bounce copy mbuf over to the original mbuf and set everything up.
517 	 * This needs to reset or clear all pointers that may go into the
518 	 * original mbuf chain.
519 	 */
520 	if (m0->m_flags & M_EXT) {
521 		bcopy(&m0->m_ext, &m->m_ext, sizeof(struct mbuf_ext));
522 		MCLINITREFERENCE(m);
523 		m->m_flags |= M_EXT|M_CLUSTER;
524 		m->m_data = m->m_ext.ext_buf;
525 	} else {
526 		m->m_data = m->m_pktdat;
527 		bcopy(m0->m_data, m->m_data, m0->m_len);
528 	}
529 	m->m_pkthdr.len = m->m_len = m0->m_len;
530 	m->m_pkthdr.pf.hdr = NULL;	/* altq will cope */
531 
532 	m0->m_flags &= ~(M_EXT|M_CLUSTER);	/* cluster is gone */
533 	m_free(m0);
534 
535 	return 0;
536 }
537 
538 /*
539  * Mbuffer utility routines.
540  */
541 
542 /*
543  * Lesser-used path for M_PREPEND:
544  * allocate new mbuf to prepend to chain,
545  * copy junk along.
546  */
547 struct mbuf *
548 m_prepend(struct mbuf *m, int len, int how)
549 {
550 	struct mbuf *mn;
551 
552 	if (len > MHLEN)
553 		panic("mbuf prepend length too big");
554 
555 	MGET(mn, how, m->m_type);
556 	if (mn == NULL) {
557 		m_freem(m);
558 		return (NULL);
559 	}
560 	if (m->m_flags & M_PKTHDR)
561 		M_MOVE_PKTHDR(mn, m);
562 	mn->m_next = m;
563 	m = mn;
564 	MH_ALIGN(m, len);
565 	m->m_len = len;
566 	return (m);
567 }
568 
569 /*
570  * Make a copy of an mbuf chain starting "off" bytes from the beginning,
571  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
572  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
573  */
574 int MCFail;
575 
576 struct mbuf *
577 m_copym(struct mbuf *m, int off, int len, int wait)
578 {
579 	return m_copym0(m, off, len, wait, 0);	/* shallow copy on M_EXT */
580 }
581 
582 /*
583  * m_copym2() is like m_copym(), except it COPIES cluster mbufs, instead
584  * of merely bumping the reference count.
585  */
586 struct mbuf *
587 m_copym2(struct mbuf *m, int off, int len, int wait)
588 {
589 	return m_copym0(m, off, len, wait, 1);	/* deep copy */
590 }
591 
592 struct mbuf *
593 m_copym0(struct mbuf *m, int off, int len, int wait, int deep)
594 {
595 	struct mbuf *n, **np;
596 	struct mbuf *top;
597 	int copyhdr = 0;
598 
599 	if (off < 0 || len < 0)
600 		panic("m_copym0: off %d, len %d", off, len);
601 	if (off == 0 && m->m_flags & M_PKTHDR)
602 		copyhdr = 1;
603 	while (off > 0) {
604 		if (m == NULL)
605 			panic("m_copym0: null mbuf");
606 		if (off < m->m_len)
607 			break;
608 		off -= m->m_len;
609 		m = m->m_next;
610 	}
611 	np = &top;
612 	top = NULL;
613 	while (len > 0) {
614 		if (m == NULL) {
615 			if (len != M_COPYALL)
616 				panic("m_copym0: m == NULL and not COPYALL");
617 			break;
618 		}
619 		MGET(n, wait, m->m_type);
620 		*np = n;
621 		if (n == NULL)
622 			goto nospace;
623 		if (copyhdr) {
624 			M_DUP_PKTHDR(n, m);
625 			if (len != M_COPYALL)
626 				n->m_pkthdr.len = len;
627 			copyhdr = 0;
628 		}
629 		n->m_len = min(len, m->m_len - off);
630 		if (m->m_flags & M_EXT) {
631 			if (!deep) {
632 				n->m_data = m->m_data + off;
633 				n->m_ext = m->m_ext;
634 				MCLADDREFERENCE(m, n);
635 			} else {
636 				/*
637 				 * we are unsure about the way m was allocated.
638 				 * copy into multiple MCLBYTES cluster mbufs.
639 				 */
640 				MCLGET(n, wait);
641 				n->m_len = 0;
642 				n->m_len = M_TRAILINGSPACE(n);
643 				n->m_len = min(n->m_len, len);
644 				n->m_len = min(n->m_len, m->m_len - off);
645 				memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
646 				    (unsigned)n->m_len);
647 			}
648 		} else
649 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
650 			    (unsigned)n->m_len);
651 		if (len != M_COPYALL)
652 			len -= n->m_len;
653 		off += n->m_len;
654 #ifdef DIAGNOSTIC
655 		if (off > m->m_len)
656 			panic("m_copym0 overrun");
657 #endif
658 		if (off == m->m_len) {
659 			m = m->m_next;
660 			off = 0;
661 		}
662 		np = &n->m_next;
663 	}
664 	if (top == NULL)
665 		MCFail++;
666 	return (top);
667 nospace:
668 	m_freem(top);
669 	MCFail++;
670 	return (NULL);
671 }
672 
673 /*
674  * Copy data from an mbuf chain starting "off" bytes from the beginning,
675  * continuing for "len" bytes, into the indicated buffer.
676  */
677 void
678 m_copydata(struct mbuf *m, int off, int len, caddr_t cp)
679 {
680 	unsigned count;
681 
682 	if (off < 0)
683 		panic("m_copydata: off %d < 0", off);
684 	if (len < 0)
685 		panic("m_copydata: len %d < 0", len);
686 	while (off > 0) {
687 		if (m == NULL)
688 			panic("m_copydata: null mbuf in skip");
689 		if (off < m->m_len)
690 			break;
691 		off -= m->m_len;
692 		m = m->m_next;
693 	}
694 	while (len > 0) {
695 		if (m == NULL)
696 			panic("m_copydata: null mbuf");
697 		count = min(m->m_len - off, len);
698 		bcopy(mtod(m, caddr_t) + off, cp, count);
699 		len -= count;
700 		cp += count;
701 		off = 0;
702 		m = m->m_next;
703 	}
704 }
705 
706 /*
707  * Copy data from a buffer back into the indicated mbuf chain,
708  * starting "off" bytes from the beginning, extending the mbuf
709  * chain if necessary. The mbuf needs to be properly initialized
710  * including the setting of m_len.
711  */
712 void
713 m_copyback(struct mbuf *m0, int off, int len, const void *_cp)
714 {
715 	int mlen, totlen = 0;
716 	struct mbuf *m = m0, *n;
717 	caddr_t cp = (caddr_t)_cp;
718 
719 	if (m0 == NULL)
720 		return;
721 	while (off > (mlen = m->m_len)) {
722 		off -= mlen;
723 		totlen += mlen;
724 		if (m->m_next == NULL) {
725 			if ((n = m_get(M_DONTWAIT, m->m_type)) == NULL)
726 				goto out;
727 
728 			if (off + len > MLEN) {
729 				MCLGETI(n, M_DONTWAIT, NULL, off + len);
730 				if (!(n->m_flags & M_EXT)) {
731 					m_free(n);
732 					goto out;
733 				}
734 			}
735 			bzero(mtod(n, caddr_t), off);
736 			n->m_len = len + off;
737 			m->m_next = n;
738 		}
739 		m = m->m_next;
740 	}
741 	while (len > 0) {
742 		/* extend last packet to be filled fully */
743 		if (m->m_next == NULL && (len > m->m_len - off))
744 			m->m_len += min(len - (m->m_len - off),
745 			    M_TRAILINGSPACE(m));
746 		mlen = min(m->m_len - off, len);
747 		bcopy(cp, mtod(m, caddr_t) + off, (size_t)mlen);
748 		cp += mlen;
749 		len -= mlen;
750 		totlen += mlen + off;
751 		if (len == 0)
752 			break;
753 		off = 0;
754 
755 		if (m->m_next == NULL) {
756 			if ((n = m_get(M_DONTWAIT, m->m_type)) == NULL)
757 				goto out;
758 
759 			if (len > MLEN) {
760 				MCLGETI(n, M_DONTWAIT, NULL, len);
761 				if (!(n->m_flags & M_EXT)) {
762 					m_free(n);
763 					goto out;
764 				}
765 			}
766 			n->m_len = len;
767 			m->m_next = n;
768 		}
769 		m = m->m_next;
770 	}
771 out:
772 	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
773 		m->m_pkthdr.len = totlen;
774 }
775 
776 /*
777  * Concatenate mbuf chain n to m.
778  * n might be copied into m (when n->m_len is small), therefore data portion of
779  * n could be copied into an mbuf of different mbuf type.
780  * Therefore both chains should be of the same type (e.g. MT_DATA).
781  * Any m_pkthdr is not updated.
782  */
783 void
784 m_cat(struct mbuf *m, struct mbuf *n)
785 {
786 	while (m->m_next)
787 		m = m->m_next;
788 	while (n) {
789 		if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) {
790 			/* just join the two chains */
791 			m->m_next = n;
792 			return;
793 		}
794 		/* splat the data from one into the other */
795 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
796 		    (u_int)n->m_len);
797 		m->m_len += n->m_len;
798 		n = m_free(n);
799 	}
800 }
801 
802 void
803 m_adj(struct mbuf *mp, int req_len)
804 {
805 	int len = req_len;
806 	struct mbuf *m;
807 	int count;
808 
809 	if ((m = mp) == NULL)
810 		return;
811 	if (len >= 0) {
812 		/*
813 		 * Trim from head.
814 		 */
815 		while (m != NULL && len > 0) {
816 			if (m->m_len <= len) {
817 				len -= m->m_len;
818 				m->m_len = 0;
819 				m = m->m_next;
820 			} else {
821 				m->m_len -= len;
822 				m->m_data += len;
823 				len = 0;
824 			}
825 		}
826 		m = mp;
827 		if (mp->m_flags & M_PKTHDR)
828 			m->m_pkthdr.len -= (req_len - len);
829 	} else {
830 		/*
831 		 * Trim from tail.  Scan the mbuf chain,
832 		 * calculating its length and finding the last mbuf.
833 		 * If the adjustment only affects this mbuf, then just
834 		 * adjust and return.  Otherwise, rescan and truncate
835 		 * after the remaining size.
836 		 */
837 		len = -len;
838 		count = 0;
839 		for (;;) {
840 			count += m->m_len;
841 			if (m->m_next == NULL)
842 				break;
843 			m = m->m_next;
844 		}
845 		if (m->m_len >= len) {
846 			m->m_len -= len;
847 			if (mp->m_flags & M_PKTHDR)
848 				mp->m_pkthdr.len -= len;
849 			return;
850 		}
851 		count -= len;
852 		if (count < 0)
853 			count = 0;
854 		/*
855 		 * Correct length for chain is "count".
856 		 * Find the mbuf with last data, adjust its length,
857 		 * and toss data from remaining mbufs on chain.
858 		 */
859 		m = mp;
860 		if (m->m_flags & M_PKTHDR)
861 			m->m_pkthdr.len = count;
862 		for (; m; m = m->m_next) {
863 			if (m->m_len >= count) {
864 				m->m_len = count;
865 				break;
866 			}
867 			count -= m->m_len;
868 		}
869 		while ((m = m->m_next) != NULL)
870 			m->m_len = 0;
871 	}
872 }
873 
874 /*
875  * Rearange an mbuf chain so that len bytes are contiguous
876  * and in the data area of an mbuf (so that mtod and dtom
877  * will work for a structure of size len).  Returns the resulting
878  * mbuf chain on success, frees it and returns null on failure.
879  * If there is room, it will add up to max_protohdr-len extra bytes to the
880  * contiguous region in an attempt to avoid being called next time.
881  */
882 int MPFail;
883 
884 struct mbuf *
885 m_pullup(struct mbuf *n, int len)
886 {
887 	struct mbuf *m;
888 	int count;
889 	int space;
890 
891 	/*
892 	 * If first mbuf has no cluster, and has room for len bytes
893 	 * without shifting current data, pullup into it,
894 	 * otherwise allocate a new mbuf to prepend to the chain.
895 	 */
896 	if ((n->m_flags & M_EXT) == 0 &&
897 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
898 		if (n->m_len >= len)
899 			return (n);
900 		m = n;
901 		n = n->m_next;
902 		len -= m->m_len;
903 	} else {
904 		if (len > MHLEN)
905 			goto bad;
906 		MGET(m, M_DONTWAIT, n->m_type);
907 		if (m == NULL)
908 			goto bad;
909 		m->m_len = 0;
910 		if (n->m_flags & M_PKTHDR)
911 			M_MOVE_PKTHDR(m, n);
912 	}
913 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
914 	do {
915 		count = min(min(max(len, max_protohdr), space), n->m_len);
916 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
917 		    (unsigned)count);
918 		len -= count;
919 		m->m_len += count;
920 		n->m_len -= count;
921 		space -= count;
922 		if (n->m_len)
923 			n->m_data += count;
924 		else
925 			n = m_free(n);
926 	} while (len > 0 && n);
927 	if (len > 0) {
928 		(void)m_free(m);
929 		goto bad;
930 	}
931 	m->m_next = n;
932 	return (m);
933 bad:
934 	m_freem(n);
935 	MPFail++;
936 	return (NULL);
937 }
938 
939 /*
940  * m_pullup2() works like m_pullup, save that len can be <= MCLBYTES.
941  * m_pullup2() only works on values of len such that MHLEN < len <= MCLBYTES,
942  * it calls m_pullup() for values <= MHLEN.  It also only coagulates the
943  * reqested number of bytes.  (For those of us who expect unwieldly option
944  * headers.
945  *
946  * KEBE SAYS:  Remember that dtom() calls with data in clusters does not work!
947  */
948 struct mbuf *
949 m_pullup2(struct mbuf *n, int len)
950 {
951 	struct mbuf *m;
952 	int count;
953 
954 	if (len <= MHLEN)
955 		return m_pullup(n, len);
956 	if ((n->m_flags & M_EXT) != 0 &&
957 	    n->m_data + len < &n->m_data[MCLBYTES] && n->m_next) {
958 		if (n->m_len >= len)
959 			return (n);
960 		m = n;
961 		n = n->m_next;
962 		len -= m->m_len;
963 	} else {
964 		if (len > MCLBYTES)
965 			goto bad;
966 		MGET(m, M_DONTWAIT, n->m_type);
967 		if (m == NULL)
968 			goto bad;
969 		MCLGET(m, M_DONTWAIT);
970 		if ((m->m_flags & M_EXT) == 0) {
971 			m_free(m);
972 			goto bad;
973 		}
974 		m->m_len = 0;
975 		if (n->m_flags & M_PKTHDR) {
976 			/* Too many adverse side effects. */
977 			/* M_MOVE_PKTHDR(m, n); */
978 			m->m_flags = (n->m_flags & M_COPYFLAGS) |
979 			    M_EXT | M_CLUSTER;
980 			M_MOVE_HDR(m, n);
981 			/* n->m_data is cool. */
982 		}
983 	}
984 
985 	do {
986 		count = min(len, n->m_len);
987 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
988 		    (unsigned)count);
989 		len -= count;
990 		m->m_len += count;
991 		n->m_len -= count;
992 		if (n->m_len)
993 			n->m_data += count;
994 		else
995 			n = m_free(n);
996 	} while (len > 0 && n);
997 	if (len > 0) {
998 		(void)m_free(m);
999 		goto bad;
1000 	}
1001 	m->m_next = n;
1002 
1003 	return (m);
1004 bad:
1005 	m_freem(n);
1006 	MPFail++;
1007 	return (NULL);
1008 }
1009 
1010 /*
1011  * Return a pointer to mbuf/offset of location in mbuf chain.
1012  */
1013 struct mbuf *
1014 m_getptr(struct mbuf *m, int loc, int *off)
1015 {
1016 	while (loc >= 0) {
1017 		/* Normal end of search */
1018 		if (m->m_len > loc) {
1019 	    		*off = loc;
1020 	    		return (m);
1021 		}
1022 		else {
1023 	    		loc -= m->m_len;
1024 
1025 	    		if (m->m_next == NULL) {
1026 				if (loc == 0) {
1027  					/* Point at the end of valid data */
1028 		    			*off = m->m_len;
1029 		    			return (m);
1030 				}
1031 				else
1032 		  			return (NULL);
1033 	    		} else
1034 	      			m = m->m_next;
1035 		}
1036     	}
1037 
1038 	return (NULL);
1039 }
1040 
1041 /*
1042  * Inject a new mbuf chain of length siz in mbuf chain m0 at
1043  * position len0. Returns a pointer to the first injected mbuf, or
1044  * NULL on failure (m0 is left undisturbed). Note that if there is
1045  * enough space for an object of size siz in the appropriate position,
1046  * no memory will be allocated. Also, there will be no data movement in
1047  * the first len0 bytes (pointers to that will remain valid).
1048  *
1049  * XXX It is assumed that siz is less than the size of an mbuf at the moment.
1050  */
1051 struct mbuf *
1052 m_inject(struct mbuf *m0, int len0, int siz, int wait)
1053 {
1054 	struct mbuf *m, *n, *n2 = NULL, *n3;
1055 	unsigned len = len0, remain;
1056 
1057 	if ((siz >= MHLEN) || (len0 <= 0))
1058 	        return (NULL);
1059 	for (m = m0; m && len > m->m_len; m = m->m_next)
1060 		len -= m->m_len;
1061 	if (m == NULL)
1062 		return (NULL);
1063 	remain = m->m_len - len;
1064 	if (remain == 0) {
1065 	        if ((m->m_next) && (M_LEADINGSPACE(m->m_next) >= siz)) {
1066 		        m->m_next->m_len += siz;
1067 			if (m0->m_flags & M_PKTHDR)
1068 				m0->m_pkthdr.len += siz;
1069 			m->m_next->m_data -= siz;
1070 			return m->m_next;
1071 		}
1072 	} else {
1073 	        n2 = m_copym2(m, len, remain, wait);
1074 		if (n2 == NULL)
1075 		        return (NULL);
1076 	}
1077 
1078 	MGET(n, wait, MT_DATA);
1079 	if (n == NULL) {
1080 	        if (n2)
1081 		        m_freem(n2);
1082 		return (NULL);
1083 	}
1084 
1085 	n->m_len = siz;
1086 	if (m0->m_flags & M_PKTHDR)
1087 		m0->m_pkthdr.len += siz;
1088 	m->m_len -= remain; /* Trim */
1089 	if (n2)	{
1090 	        for (n3 = n; n3->m_next != NULL; n3 = n3->m_next)
1091 		        ;
1092 		n3->m_next = n2;
1093 	} else
1094 	        n3 = n;
1095 	for (; n3->m_next != NULL; n3 = n3->m_next)
1096 	        ;
1097 	n3->m_next = m->m_next;
1098 	m->m_next = n;
1099 	return n;
1100 }
1101 
1102 /*
1103  * Partition an mbuf chain in two pieces, returning the tail --
1104  * all but the first len0 bytes.  In case of failure, it returns NULL and
1105  * attempts to restore the chain to its original state.
1106  */
1107 struct mbuf *
1108 m_split(struct mbuf *m0, int len0, int wait)
1109 {
1110 	struct mbuf *m, *n;
1111 	unsigned len = len0, remain, olen;
1112 
1113 	for (m = m0; m && len > m->m_len; m = m->m_next)
1114 		len -= m->m_len;
1115 	if (m == NULL)
1116 		return (NULL);
1117 	remain = m->m_len - len;
1118 	if (m0->m_flags & M_PKTHDR) {
1119 		MGETHDR(n, wait, m0->m_type);
1120 		if (n == NULL)
1121 			return (NULL);
1122 		M_DUP_PKTHDR(n, m0);
1123 		n->m_pkthdr.len -= len0;
1124 		olen = m0->m_pkthdr.len;
1125 		m0->m_pkthdr.len = len0;
1126 		if (m->m_flags & M_EXT)
1127 			goto extpacket;
1128 		if (remain > MHLEN) {
1129 			/* m can't be the lead packet */
1130 			MH_ALIGN(n, 0);
1131 			n->m_next = m_split(m, len, wait);
1132 			if (n->m_next == NULL) {
1133 				(void) m_free(n);
1134 				m0->m_pkthdr.len = olen;
1135 				return (NULL);
1136 			} else
1137 				return (n);
1138 		} else
1139 			MH_ALIGN(n, remain);
1140 	} else if (remain == 0) {
1141 		n = m->m_next;
1142 		m->m_next = NULL;
1143 		return (n);
1144 	} else {
1145 		MGET(n, wait, m->m_type);
1146 		if (n == NULL)
1147 			return (NULL);
1148 		M_ALIGN(n, remain);
1149 	}
1150 extpacket:
1151 	if (m->m_flags & M_EXT) {
1152 		n->m_ext = m->m_ext;
1153 		MCLADDREFERENCE(m, n);
1154 		n->m_data = m->m_data + len;
1155 	} else {
1156 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
1157 	}
1158 	n->m_len = remain;
1159 	m->m_len = len;
1160 	n->m_next = m->m_next;
1161 	m->m_next = NULL;
1162 	return (n);
1163 }
1164 
1165 /*
1166  * Routine to copy from device local memory into mbufs.
1167  */
1168 struct mbuf *
1169 m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
1170     void (*copy)(const void *, void *, size_t))
1171 {
1172 	struct mbuf	*m;
1173 	struct mbuf	*top, **mp;
1174 	int		 len;
1175 
1176 	top = NULL;
1177 	mp = &top;
1178 
1179 	if (off < 0 || off > MHLEN)
1180 		return (NULL);
1181 
1182 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1183 	if (m == NULL)
1184 		return (NULL);
1185 
1186 	m->m_pkthdr.rcvif = ifp;
1187 	m->m_pkthdr.len = totlen;
1188 
1189 	len = MHLEN;
1190 
1191 	while (totlen > 0) {
1192 		if (top != NULL) {
1193 			MGET(m, M_DONTWAIT, MT_DATA);
1194 			if (m == NULL) {
1195 				m_freem(top);
1196 				return (NULL);
1197 			}
1198 			len = MLEN;
1199 		}
1200 
1201 		if (totlen + off >= MINCLSIZE) {
1202 			MCLGET(m, M_DONTWAIT);
1203 			if (m->m_flags & M_EXT)
1204 				len = MCLBYTES;
1205 		} else {
1206 			/* Place initial small packet/header at end of mbuf. */
1207 			if (top == NULL && totlen + off + max_linkhdr <= len) {
1208 				m->m_data += max_linkhdr;
1209 				len -= max_linkhdr;
1210 			}
1211 		}
1212 
1213 		if (off) {
1214 			m->m_data += off;
1215 			len -= off;
1216 			off = 0;
1217 		}
1218 
1219 		m->m_len = len = min(totlen, len);
1220 
1221 		if (copy)
1222 			copy(buf, mtod(m, caddr_t), (size_t)len);
1223 		else
1224 			bcopy(buf, mtod(m, caddr_t), (size_t)len);
1225 
1226 		buf += len;
1227 		*mp = m;
1228 		mp = &m->m_next;
1229 		totlen -= len;
1230 	}
1231 	return (top);
1232 }
1233 
1234 void
1235 m_zero(struct mbuf *m)
1236 {
1237 	while (m) {
1238 #ifdef DIAGNOSTIC
1239 		if (M_READONLY(m))
1240 			panic("m_zero: M_READONLY");
1241 #endif /* DIAGNOSTIC */
1242 		if (m->m_flags & M_EXT)
1243 			memset(m->m_ext.ext_buf, 0, m->m_ext.ext_size);
1244 		else {
1245 			if (m->m_flags & M_PKTHDR)
1246 				memset(m->m_pktdat, 0, MHLEN);
1247 			else
1248 				memset(m->m_dat, 0, MLEN);
1249 		}
1250 		m = m->m_next;
1251 	}
1252 }
1253 
1254 /*
1255  * Apply function f to the data in an mbuf chain starting "off" bytes from the
1256  * beginning, continuing for "len" bytes.
1257  */
1258 int
1259 m_apply(struct mbuf *m, int off, int len,
1260     int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate)
1261 {
1262 	int rval;
1263 	unsigned int count;
1264 
1265 	if (len < 0)
1266 		panic("m_apply: len %d < 0", len);
1267 	if (off < 0)
1268 		panic("m_apply: off %d < 0", off);
1269 	while (off > 0) {
1270 		if (m == NULL)
1271 			panic("m_apply: null mbuf in skip");
1272 		if (off < m->m_len)
1273 			break;
1274 		off -= m->m_len;
1275 		m = m->m_next;
1276 	}
1277 	while (len > 0) {
1278 		if (m == NULL)
1279 			panic("m_apply: null mbuf");
1280 		count = min(m->m_len - off, len);
1281 
1282 		rval = f(fstate, mtod(m, caddr_t) + off, count);
1283 		if (rval)
1284 			return (rval);
1285 
1286 		len -= count;
1287 		off = 0;
1288 		m = m->m_next;
1289 	}
1290 
1291 	return (0);
1292 }
1293 
1294 int
1295 m_leadingspace(struct mbuf *m)
1296 {
1297 	if (M_READONLY(m))
1298 		return 0;
1299 	return (m->m_flags & M_EXT ? m->m_data - m->m_ext.ext_buf :
1300 	    m->m_flags & M_PKTHDR ? m->m_data - m->m_pktdat :
1301 	    m->m_data - m->m_dat);
1302 }
1303 
1304 int
1305 m_trailingspace(struct mbuf *m)
1306 {
1307 	if (M_READONLY(m))
1308 		return 0;
1309 	return (m->m_flags & M_EXT ? m->m_ext.ext_buf +
1310 	    m->m_ext.ext_size - (m->m_data + m->m_len) :
1311 	    &m->m_dat[MLEN] - (m->m_data + m->m_len));
1312 }
1313