xref: /netbsd-src/sys/kern/uipc_mbuf.c (revision ae1bfcddc410612bc8c58b807e1830becb69a24c)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	from: @(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34  *	$Id: uipc_mbuf.c,v 1.9 1994/05/13 06:01:32 mycroft Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/malloc.h>
41 #include <sys/map.h>
42 #define MBTYPES
43 #include <sys/mbuf.h>
44 #include <sys/kernel.h>
45 #include <sys/syslog.h>
46 #include <sys/domain.h>
47 #include <sys/protosw.h>
48 
49 #include <vm/vm.h>
50 
51 extern	vm_map_t mb_map;
52 struct	mbuf *mbutl;
53 char	*mclrefcnt;
54 
55 void
56 mbinit()
57 {
58 	int s;
59 
60 #if CLBYTES < 4096
61 #define NCL_INIT	(4096/CLBYTES)
62 #else
63 #define NCL_INIT	1
64 #endif
65 	s = splimp();
66 	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
67 		goto bad;
68 	splx(s);
69 	return;
70 bad:
71 	panic("mbinit");
72 }
73 
74 /*
75  * Allocate some number of mbuf clusters
76  * and place on cluster free list.
77  * Must be called at splimp.
78  */
79 /* ARGSUSED */
80 m_clalloc(ncl, nowait)
81 	register int ncl;
82 	int nowait;
83 {
84 	static int logged;
85 	register caddr_t p;
86 	register int i;
87 	int npg;
88 
89 	npg = ncl * CLSIZE;
90 	p = (caddr_t)kmem_malloc(mb_map, ctob(npg), !nowait);
91 	if (p == NULL) {
92 		if (logged == 0) {
93 			logged++;
94 			log(LOG_ERR, "mb_map full\n");
95 		}
96 		return (0);
97 	}
98 	ncl = ncl * CLBYTES / MCLBYTES;
99 	for (i = 0; i < ncl; i++) {
100 		((union mcluster *)p)->mcl_next = mclfree;
101 		mclfree = (union mcluster *)p;
102 		p += MCLBYTES;
103 		mbstat.m_clfree++;
104 	}
105 	mbstat.m_clusters += ncl;
106 	return (1);
107 }
108 
109 /*
110  * When MGET failes, ask protocols to free space when short of memory,
111  * then re-attempt to allocate an mbuf.
112  */
113 struct mbuf *
114 m_retry(i, t)
115 	int i, t;
116 {
117 	register struct mbuf *m;
118 
119 	m_reclaim();
120 #define m_retry(i, t)	(struct mbuf *)0
121 	MGET(m, i, t);
122 #undef m_retry
123 	return (m);
124 }
125 
126 /*
127  * As above; retry an MGETHDR.
128  */
129 struct mbuf *
130 m_retryhdr(i, t)
131 	int i, t;
132 {
133 	register struct mbuf *m;
134 
135 	m_reclaim();
136 #define m_retryhdr(i, t) (struct mbuf *)0
137 	MGETHDR(m, i, t);
138 #undef m_retryhdr
139 	return (m);
140 }
141 
142 m_reclaim()
143 {
144 	register struct domain *dp;
145 	register struct protosw *pr;
146 	int s = splimp();
147 
148 	for (dp = domains; dp; dp = dp->dom_next)
149 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
150 			if (pr->pr_drain)
151 				(*pr->pr_drain)();
152 	splx(s);
153 	mbstat.m_drain++;
154 }
155 
156 /*
157  * Space allocation routines.
158  * These are also available as macros
159  * for critical paths.
160  */
161 struct mbuf *
162 m_get(nowait, type)
163 	int nowait, type;
164 {
165 	register struct mbuf *m;
166 
167 	MGET(m, nowait, type);
168 	return (m);
169 }
170 
171 struct mbuf *
172 m_gethdr(nowait, type)
173 	int nowait, type;
174 {
175 	register struct mbuf *m;
176 
177 	MGETHDR(m, nowait, type);
178 	return (m);
179 }
180 
181 struct mbuf *
182 m_getclr(nowait, type)
183 	int nowait, type;
184 {
185 	register struct mbuf *m;
186 
187 	MGET(m, nowait, type);
188 	if (m == 0)
189 		return (0);
190 	bzero(mtod(m, caddr_t), MLEN);
191 	return (m);
192 }
193 
194 struct mbuf *
195 m_free(m)
196 	struct mbuf *m;
197 {
198 	register struct mbuf *n;
199 
200 	MFREE(m, n);
201 	return (n);
202 }
203 
204 void
205 m_freem(m)
206 	register struct mbuf *m;
207 {
208 	register struct mbuf *n;
209 
210 	if (m == NULL)
211 		return;
212 	do {
213 		MFREE(m, n);
214 	} while (m = n);
215 }
216 
217 /*
218  * Mbuffer utility routines.
219  */
220 
221 /*
222  * Lesser-used path for M_PREPEND:
223  * allocate new mbuf to prepend to chain,
224  * copy junk along.
225  */
226 struct mbuf *
227 m_prepend(m, len, how)
228 	register struct mbuf *m;
229 	int len, how;
230 {
231 	struct mbuf *mn;
232 
233 	MGET(mn, how, m->m_type);
234 	if (mn == (struct mbuf *)NULL) {
235 		m_freem(m);
236 		return ((struct mbuf *)NULL);
237 	}
238 	if (m->m_flags & M_PKTHDR) {
239 		M_COPY_PKTHDR(mn, m);
240 		m->m_flags &= ~M_PKTHDR;
241 	}
242 	mn->m_next = m;
243 	m = mn;
244 	if (len < MHLEN)
245 		MH_ALIGN(m, len);
246 	m->m_len = len;
247 	return (m);
248 }
249 
250 /*
251  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
252  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
253  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
254  */
255 int MCFail;
256 
257 struct mbuf *
258 m_copym(m, off0, len, wait)
259 	register struct mbuf *m;
260 	int off0, wait;
261 	register int len;
262 {
263 	register struct mbuf *n, **np;
264 	register int off = off0;
265 	struct mbuf *top;
266 	int copyhdr = 0;
267 
268 	if (off < 0 || len < 0)
269 		panic("m_copym");
270 	if (off == 0 && m->m_flags & M_PKTHDR)
271 		copyhdr = 1;
272 	while (off > 0) {
273 		if (m == 0)
274 			panic("m_copym");
275 		if (off < m->m_len)
276 			break;
277 		off -= m->m_len;
278 		m = m->m_next;
279 	}
280 	np = &top;
281 	top = 0;
282 	while (len > 0) {
283 		if (m == 0) {
284 			if (len != M_COPYALL)
285 				panic("m_copym");
286 			break;
287 		}
288 		MGET(n, wait, m->m_type);
289 		*np = n;
290 		if (n == 0)
291 			goto nospace;
292 		if (copyhdr) {
293 			M_COPY_PKTHDR(n, m);
294 			if (len == M_COPYALL)
295 				n->m_pkthdr.len -= off0;
296 			else
297 				n->m_pkthdr.len = len;
298 			copyhdr = 0;
299 		}
300 		n->m_len = min(len, m->m_len - off);
301 		if (m->m_flags & M_EXT) {
302 			n->m_data = m->m_data + off;
303 			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
304 			n->m_ext = m->m_ext;
305 			n->m_flags |= M_EXT;
306 		} else
307 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
308 			    (unsigned)n->m_len);
309 		if (len != M_COPYALL)
310 			len -= n->m_len;
311 		off = 0;
312 		m = m->m_next;
313 		np = &n->m_next;
314 	}
315 	if (top == 0)
316 		MCFail++;
317 	return (top);
318 nospace:
319 	m_freem(top);
320 	MCFail++;
321 	return (0);
322 }
323 
324 /*
325  * Copy data from an mbuf chain starting "off" bytes from the beginning,
326  * continuing for "len" bytes, into the indicated buffer.
327  */
328 m_copydata(m, off, len, cp)
329 	register struct mbuf *m;
330 	register int off;
331 	register int len;
332 	caddr_t cp;
333 {
334 	register unsigned count;
335 
336 	if (off < 0 || len < 0)
337 		panic("m_copydata");
338 	while (off > 0) {
339 		if (m == 0)
340 			panic("m_copydata");
341 		if (off < m->m_len)
342 			break;
343 		off -= m->m_len;
344 		m = m->m_next;
345 	}
346 	while (len > 0) {
347 		if (m == 0)
348 			panic("m_copydata");
349 		count = min(m->m_len - off, len);
350 		bcopy(mtod(m, caddr_t) + off, cp, count);
351 		len -= count;
352 		cp += count;
353 		off = 0;
354 		m = m->m_next;
355 	}
356 }
357 
358 /*
359  * Concatenate mbuf chain n to m.
360  * Both chains must be of the same type (e.g. MT_DATA).
361  * Any m_pkthdr is not updated.
362  */
363 m_cat(m, n)
364 	register struct mbuf *m, *n;
365 {
366 	while (m->m_next)
367 		m = m->m_next;
368 	while (n) {
369 		if (m->m_flags & M_EXT ||
370 		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
371 			/* just join the two chains */
372 			m->m_next = n;
373 			return;
374 		}
375 		/* splat the data from one into the other */
376 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
377 		    (u_int)n->m_len);
378 		m->m_len += n->m_len;
379 		n = m_free(n);
380 	}
381 }
382 
383 m_adj(mp, req_len)
384 	struct mbuf *mp;
385 	int req_len;
386 {
387 	register int len = req_len;
388 	register struct mbuf *m;
389 	register count;
390 
391 	if ((m = mp) == NULL)
392 		return;
393 	if (len >= 0) {
394 		/*
395 		 * Trim from head.
396 		 */
397 		while (m != NULL && len > 0) {
398 			if (m->m_len <= len) {
399 				len -= m->m_len;
400 				m->m_len = 0;
401 				m = m->m_next;
402 			} else {
403 				m->m_len -= len;
404 				m->m_data += len;
405 				len = 0;
406 			}
407 		}
408 		m = mp;
409 		if (mp->m_flags & M_PKTHDR)
410 			m->m_pkthdr.len -= (req_len - len);
411 	} else {
412 		/*
413 		 * Trim from tail.  Scan the mbuf chain,
414 		 * calculating its length and finding the last mbuf.
415 		 * If the adjustment only affects this mbuf, then just
416 		 * adjust and return.  Otherwise, rescan and truncate
417 		 * after the remaining size.
418 		 */
419 		len = -len;
420 		count = 0;
421 		for (;;) {
422 			count += m->m_len;
423 			if (m->m_next == (struct mbuf *)0)
424 				break;
425 			m = m->m_next;
426 		}
427 		if (m->m_len >= len) {
428 			m->m_len -= len;
429 			if (mp->m_flags & M_PKTHDR)
430 				mp->m_pkthdr.len -= len;
431 			return;
432 		}
433 		count -= len;
434 		if (count < 0)
435 			count = 0;
436 		/*
437 		 * Correct length for chain is "count".
438 		 * Find the mbuf with last data, adjust its length,
439 		 * and toss data from remaining mbufs on chain.
440 		 */
441 		m = mp;
442 		if (m->m_flags & M_PKTHDR)
443 			m->m_pkthdr.len = count;
444 		for (; m; m = m->m_next) {
445 			if (m->m_len >= count) {
446 				m->m_len = count;
447 				break;
448 			}
449 			count -= m->m_len;
450 		}
451 		while (m = m->m_next)
452 			m->m_len = 0;
453 	}
454 }
455 
456 /*
457  * Rearange an mbuf chain so that len bytes are contiguous
458  * and in the data area of an mbuf (so that mtod and dtom
459  * will work for a structure of size len).  Returns the resulting
460  * mbuf chain on success, frees it and returns null on failure.
461  * If there is room, it will add up to max_protohdr-len extra bytes to the
462  * contiguous region in an attempt to avoid being called next time.
463  */
464 int MPFail;
465 
466 struct mbuf *
467 m_pullup(n, len)
468 	register struct mbuf *n;
469 	int len;
470 {
471 	register struct mbuf *m;
472 	register int count;
473 	int space;
474 
475 	/*
476 	 * If first mbuf has no cluster, and has room for len bytes
477 	 * without shifting current data, pullup into it,
478 	 * otherwise allocate a new mbuf to prepend to the chain.
479 	 */
480 	if ((n->m_flags & M_EXT) == 0 &&
481 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
482 		if (n->m_len >= len)
483 			return (n);
484 		m = n;
485 		n = n->m_next;
486 		len -= m->m_len;
487 	} else {
488 		if (len > MHLEN)
489 			goto bad;
490 		MGET(m, M_DONTWAIT, n->m_type);
491 		if (m == 0)
492 			goto bad;
493 		m->m_len = 0;
494 		if (n->m_flags & M_PKTHDR) {
495 			M_COPY_PKTHDR(m, n);
496 			n->m_flags &= ~M_PKTHDR;
497 		}
498 	}
499 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
500 	do {
501 		count = min(min(max(len, max_protohdr), space), n->m_len);
502 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
503 		  (unsigned)count);
504 		len -= count;
505 		m->m_len += count;
506 		n->m_len -= count;
507 		space -= count;
508 		if (n->m_len)
509 			n->m_data += count;
510 		else
511 			n = m_free(n);
512 	} while (len > 0 && n);
513 	if (len > 0) {
514 		(void) m_free(m);
515 		goto bad;
516 	}
517 	m->m_next = n;
518 	return (m);
519 bad:
520 	m_freem(n);
521 	MPFail++;
522 	return (0);
523 }
524 
525 /*
526  * Partition an mbuf chain in two pieces, returning the tail --
527  * all but the first len0 bytes.  In case of failure, it returns NULL and
528  * attempts to restore the chain to its original state.
529  */
530 struct mbuf *
531 m_split(m0, len0, wait)
532 	register struct mbuf *m0;
533 	int len0, wait;
534 {
535 	register struct mbuf *m, *n;
536 	unsigned len = len0, remain;
537 
538 	for (m = m0; m && len > m->m_len; m = m->m_next)
539 		len -= m->m_len;
540 	if (m == 0)
541 		return (0);
542 	remain = m->m_len - len;
543 	if (m0->m_flags & M_PKTHDR) {
544 		MGETHDR(n, wait, m0->m_type);
545 		if (n == 0)
546 			return (0);
547 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
548 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
549 		m0->m_pkthdr.len = len0;
550 		if (m->m_flags & M_EXT)
551 			goto extpacket;
552 		if (remain > MHLEN) {
553 			/* m can't be the lead packet */
554 			MH_ALIGN(n, 0);
555 			n->m_next = m_split(m, len, wait);
556 			if (n->m_next == 0) {
557 				(void) m_free(n);
558 				return (0);
559 			} else
560 				return (n);
561 		} else
562 			MH_ALIGN(n, remain);
563 	} else if (remain == 0) {
564 		n = m->m_next;
565 		m->m_next = 0;
566 		return (n);
567 	} else {
568 		MGET(n, wait, m->m_type);
569 		if (n == 0)
570 			return (0);
571 		M_ALIGN(n, remain);
572 	}
573 extpacket:
574 	if (m->m_flags & M_EXT) {
575 		n->m_flags |= M_EXT;
576 		n->m_ext = m->m_ext;
577 		mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
578 		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
579 		n->m_data = m->m_data + len;
580 	} else {
581 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
582 	}
583 	n->m_len = remain;
584 	m->m_len = len;
585 	n->m_next = m->m_next;
586 	m->m_next = 0;
587 	return (n);
588 }
589 /*
590  * Routine to copy from device local memory into mbufs.
591  */
592 struct mbuf *
593 m_devget(buf, totlen, off0, ifp, copy)
594 	char *buf;
595 	int totlen, off0;
596 	struct ifnet *ifp;
597 	void (*copy)();
598 {
599 	register struct mbuf *m;
600 	struct mbuf *top = 0, **mp = &top;
601 	register int off = off0, len;
602 	register char *cp;
603 	char *epkt;
604 
605 	cp = buf;
606 	epkt = cp + totlen;
607 	if (off) {
608 		cp += off + 2 * sizeof(u_short);
609 		totlen -= 2 * sizeof(u_short);
610 	}
611 	MGETHDR(m, M_DONTWAIT, MT_DATA);
612 	if (m == 0)
613 		return (0);
614 	m->m_pkthdr.rcvif = ifp;
615 	m->m_pkthdr.len = totlen;
616 	m->m_len = MHLEN;
617 
618 	while (totlen > 0) {
619 		if (top) {
620 			MGET(m, M_DONTWAIT, MT_DATA);
621 			if (m == 0) {
622 				m_freem(top);
623 				return (0);
624 			}
625 			m->m_len = MLEN;
626 		}
627 		len = min(totlen, epkt - cp);
628 		if (len >= MINCLSIZE) {
629 			MCLGET(m, M_DONTWAIT);
630 			if (m->m_flags & M_EXT)
631 				m->m_len = len = min(len, MCLBYTES);
632 			else
633 				len = m->m_len;
634 		} else {
635 			/*
636 			 * Place initial small packet/header at end of mbuf.
637 			 */
638 			if (len < m->m_len) {
639 				if (top == 0 && len + max_linkhdr <= m->m_len)
640 					m->m_data += max_linkhdr;
641 				m->m_len = len;
642 			} else
643 				len = m->m_len;
644 		}
645 		if (copy)
646 			copy(cp, mtod(m, caddr_t), (unsigned)len);
647 		else
648 			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
649 		cp += len;
650 		*mp = m;
651 		mp = &m->m_next;
652 		totlen -= len;
653 		if (cp == epkt)
654 			cp = buf;
655 	}
656 	return (top);
657 }
658