xref: /netbsd-src/share/man/man9/mbuf.9 (revision f1e042b8d2af97db97f5dd4497e7e3b416d950d3)
1.\"	$NetBSD: mbuf.9,v 1.67 2022/06/28 20:12:52 rillig Exp $
2.\"
3.\" Copyright (c) 1997 The NetBSD Foundation, Inc.
4.\" All rights reserved.
5.\"
6.\" This documentation is derived from text contributed to The NetBSD Foundation
7.\" by S.P.Zeidler (aka stargazer).
8.\"
9.\" Redistribution and use in source and binary forms, with or without
10.\" modification, are permitted provided that the following conditions
11.\" are met:
12.\" 1. Redistributions of source code must retain the above copyright
13.\"    notice, this list of conditions and the following disclaimer.
14.\" 2. Redistributions in binary form must reproduce the above copyright
15.\"    notice, this list of conditions and the following disclaimer in the
16.\"    documentation and/or other materials provided with the distribution.
17.\"
18.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28.\" POSSIBILITY OF SUCH DAMAGE.
29.\"
30.Dd June 28, 2022
31.Dt MBUF 9
32.Os
33.Sh NAME
34.Nm mbuf ,
35.Nm m_get ,
36.Nm m_gethdr ,
37.Nm m_devget ,
38.Nm m_copym ,
39.Nm m_copypacket ,
40.Nm m_copydata ,
41.Nm m_copyback ,
42.Nm m_copyback_cow ,
43.Nm m_cat ,
44.Nm m_dup ,
45.Nm m_makewritable ,
46.Nm m_pulldown ,
47.Nm m_pullup ,
48.Nm m_copyup ,
49.Nm m_split ,
50.Nm m_adj ,
51.Nm m_apply ,
52.Nm m_free ,
53.Nm m_freem ,
54.Nm mtod ,
55.Nm MGET ,
56.Nm MGETHDR ,
57.Nm MEXTMALLOC ,
58.Nm MEXTADD ,
59.Nm MCLGET ,
60.Nm m_copy_pkthdr ,
61.Nm m_move_pkthdr ,
62.Nm m_remove_pkthdr ,
63.Nm m_align ,
64.Nm M_LEADINGSPACE ,
65.Nm M_TRAILINGSPACE ,
66.Nm M_PREPEND ,
67.Nm MCHTYPE
68.Nd "functions and macros for managing memory used by networking code"
69.Sh SYNOPSIS
70.In sys/mbuf.h
71.Ft struct mbuf *
72.Fn m_get "int how" "int type"
73.Ft struct mbuf *
74.Fn m_gethdr "int how" "int type"
75.Ft struct mbuf *
76.Fn m_devget "char *buf" "int totlen" "int off" "struct ifnet *ifp"
77.Ft struct mbuf *
78.Fn m_copym "struct mbuf *m" "int off" "int len" "int wait"
79.Ft struct mbuf *
80.Fn m_copypacket "struct mbuf *m" "int how"
81.Ft void
82.Fn m_copydata "struct mbuf *m" "int off" "int len" "void *cp"
83.Ft void
84.Fn m_copyback "struct mbuf *m0" "int off" "int len" "void *cp"
85.Ft struct mbuf *
86.Fn m_copyback_cow "struct mbuf *m0" "int off" "int len" "void *cp" "int how"
87.Ft int
88.Fn m_makewritable "struct mbuf **mp" "int off" "int len" "int how"
89.Ft void
90.Fn m_cat "struct mbuf *m" "struct mbuf *n"
91.Ft struct mbuf *
92.Fn m_dup "struct mbuf *m" "int off" "int len" "int wait"
93.Ft struct mbuf *
94.Fn m_pulldown "struct mbuf *m" "int off" "int len" "int *offp"
95.Ft struct mbuf *
96.Fn m_pullup "struct mbuf *n" "int len"
97.Ft struct mbuf *
98.Fn m_copyup "struct mbuf *m" "int len" "int dstoff"
99.Ft struct mbuf *
100.Fn m_split "struct mbuf *m0" "int len" "int wait"
101.Ft void
102.Fn m_adj "struct mbuf *mp" "int req_len"
103.Ft int
104.Fn m_apply "struct mbuf *m" "int off" "int len" "int *f(void *, void *, unsigned int)" "void *arg"
105.Ft struct mbuf *
106.Fn m_free "struct mbuf *m"
107.Ft void
108.Fn m_freem "struct mbuf *m"
109.Ft datatype
110.Fn mtod "struct mbuf *m" "datatype"
111.Ft void
112.Fn MGET "struct mbuf *m" "int how" "int type"
113.Ft void
114.Fn MGETHDR "struct mbuf *m" "int how" "int type"
115.Ft void
116.Fn MEXTMALLOC "struct mbuf *m" "int len" "int how"
117.Ft void
118.Fn MEXTADD "struct mbuf *m" "void *buf" "int size" "int type" "void (*free)(struct mbuf *, void *, size_t, void *)" "void *arg"
119.Ft void
120.Fn MCLGET "struct mbuf *m" "int how"
121.Ft void
122.Fn m_copy_pkthdr "struct mbuf *to" "struct mbuf *from"
123.Ft void
124.Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
125.Ft void
126.Fn m_remove_pkthdr "struct mbuf *m"
127.Ft void
128.Fn m_align "struct mbuf *m" "int len"
129.Ft int
130.Fn M_LEADINGSPACE "struct mbuf *m"
131.Ft int
132.Fn M_TRAILINGSPACE "struct mbuf *m"
133.Ft void
134.Fn M_PREPEND "struct mbuf *m" "int plen" "int how"
135.Ft void
136.Fn MCHTYPE "struct mbuf *m" "int type"
137.Sh DESCRIPTION
138The
139.Nm
140functions and macros provide an easy and consistent way to handle
141a networking stack's memory management needs.
142An
143.Nm
144consists of a header and a data area.
145It is of a fixed size,
146.Dv MSIZE
147.Pq defined in Aq Pa machine/param.h ,
148which includes the size of the header.
149The header contains a pointer to the next
150.Nm
151in the
152.Sq "mbuf chain" ,
153a pointer to the next
154.Sq "mbuf chain" ,
155a pointer to the data area, the amount of data in this mbuf, its type
156and a
157.Dv flags
158field.
159.Pp
160The
161.Dv type
162variable can signify:
163.Bl -tag -compact -offset indent -width "XXXXXXXXXXX"
164.It Dv MT_FREE
165the mbuf should be on the ``free'' list
166.It Dv MT_DATA
167data was dynamically allocated
168.It Dv MT_HEADER
169data is a packet header
170.It Dv MT_SONAME
171data is a socket name
172.It Dv MT_SOOPTS
173data is socket options
174.It Dv MT_FTABLE
175data is the fragment reassembly header
176.It Dv MT_CONTROL
177mbuf contains ancillary \&(protocol control\&) data
178.It Dv MT_OOBDATA
179mbuf contains out-of-band data.
180.El
181.Pp
182The
183.Dv flags
184variable contains information describing the
185.Nm ,
186notably:
187.Bl -tag -compact -offset indent -width "XXXXXXXXXXX"
188.It Dv M_EXT
189has external storage
190.It Dv M_PKTHDR
191is start of record
192.It Dv M_EOR
193is end of record
194.El
195.Pp
196If an
197.Nm
198designates the start of a record
199.Pq Dv M_PKTHDR ,
200its
201.Dv flags
202field may contain additional information describing the content of
203the record:
204.Bl -tag -compact -offset indent -width "XXXXXXXXXXX"
205.It Dv M_BCAST
206sent/received as link-level broadcast
207.It Dv M_MCAST
208sent/received as link-level multicast
209.El
210.Pp
211An
212.Nm
213may add a single
214.Sq "mbuf cluster"
215of
216.Dv MCLBYTES
217bytes
218.Pq defined in Aq Pa machine/param.h ,
219which has no additional overhead
220and is used instead of the internal data area; this is done when at least
221.Dv MINCLSIZE
222bytes of data must be stored.
223.Pp
224When the
225.Dv M_EXT
226flag is set on an mbuf,
227the external storage area could be shared among multiple mbufs.
228Therefore, care must be taken when overwriting the data content of an
229mbuf, because its external storage could be considered as read-only.
230.Bl -tag -width compact
231.It Fn m_get "int how" "int type"
232Allocates an mbuf and initializes it to contain internal data.
233The
234.Fa how
235parameter is a choice of
236.Dv M_WAIT / M_DONTWAIT
237from caller.
238.Dv M_WAIT
239means the call cannot fail, but may take forever.
240The
241.Fa type
242parameter is an mbuf type.
243.It Fn m_gethdr "int how" "int type"
244Allocates an mbuf and initializes it to contain a packet header and internal
245data.
246The
247.Fa how
248parameter is a choice of
249.Dv M_WAIT / M_DONTWAIT
250from caller.
251The
252.Fa type
253parameter is an mbuf type.
254.It Fn m_devget "char *buf" "int totlen" "int off" "struct ifnet *ifp"
255Copies
256.Fa len
257bytes from device local memory into mbufs.
258If parameter
259.Fa off
260is non-zero, the packet is supposed to be trailer-encapsulated and
261.Fa off
262bytes plus the type and length fields will be skipped before copying.
263Returns the top of the mbuf chain it created.
264.It Fn m_copym "struct mbuf *m" "int off" "int len" "int wait"
265Creates a copy of an mbuf chain starting
266.Fa off
267bytes from the beginning, continuing for
268.Fa len
269bytes.
270If the
271.Fa len
272requested is
273.Dv M_COPYALL ,
274the complete mbuf chain will be copied.
275The
276.Fa wait
277parameter is a choice of
278.Dv M_WAIT / M_DONTWAIT
279from caller.
280.It Fn m_copypacket "struct mbuf *m" "int how"
281Copies an entire packet, including header.
282This function is an optimization of the common case
283.Li m_copym ( m , 0 , Dv M_COPYALL , Fa how ) .
284However, contrary to
285.Fn m_copym ,
286a header must be present.
287It is incorrect to use
288.Fn m_copypacket
289with an mbuf that does not have a header.
290.It Fn m_copydata "struct mbuf *m" "int off" "int len" "void *cp"
291Copies
292.Fa len
293bytes data from mbuf chain
294.Fa m
295into the buffer
296.Fa cp ,
297starting
298.Fa off
299bytes from the beginning.
300.It Fn m_copyback "struct mbuf *m0" "int off" "int len" "void *cp"
301Copies
302.Fa len
303bytes data from buffer
304.Fa cp
305back into the mbuf chain
306.Fa m0 ,
307starting
308.Fa off
309bytes from the beginning of the chain, extending the mbuf chain if necessary.
310.Fn m_copyback
311can only fail when extending the chain.
312The caller should check for this kind of failure
313by checking the resulting length of the chain in that case.
314It is an error to use
315.Fn m_copyback
316on read-only mbufs.
317.It Fn m_copyback_cow "struct mbuf *m0" "int off" "int len" "void *cp" \
318"int how"
319Copies
320.Fa len
321bytes data from buffer
322.Fa cp
323back into the mbuf chain
324.Fa m0
325as
326.Fn m_copyback
327does.
328Unlike
329.Fn m_copyback ,
330it is safe to use
331.Fn m_copyback_cow
332on read-only mbufs.
333If needed,
334.Fn m_copyback_cow
335automatically allocates new mbufs and adjusts the chain.
336On success, it returns a pointer to the resulting mbuf chain,
337and frees the original mbuf
338.Fa m0 .
339Otherwise, it returns
340.Dv NULL .
341The
342.Fa how
343parameter is a choice of
344.Dv M_WAIT / M_DONTWAIT
345from the caller.
346Unlike
347.Fn m_copyback ,
348extending the mbuf chain isn't supported.
349It is an error to attempt to extend the mbuf chain using
350.Fn m_copyback_cow .
351.It Fn m_makewritable "struct mbuf **mp" "int off" "int len" "int how"
352Rearranges an mbuf chain so that
353.Fa len
354bytes from offset
355.Fa off
356are writable.
357When it meets read-only mbufs, it allocates new mbufs, adjusts the chain as
358.Fn m_copyback_cow
359does, and copies the original content into them.
360.Fn m_makewritable
361does
362.Em not
363guarantee that all
364.Fa len
365bytes at
366.Fa off
367are consecutive.
368The
369.Fa how
370parameter is a choice of
371.Dv M_WAIT / M_DONTWAIT
372from the caller.
373.Fn m_makewritable
374preserves the contents of the mbuf chain even in the case of failure.
375It updates a pointer to the mbuf chain pointed to by
376.Fa mp .
377It returns 0 on success.
378Otherwise, it returns an error code, typically
379.Er ENOBUFS .
380.It Fn m_cat "struct mbuf *m" "struct mbuf *n"
381Concatenates mbuf chain
382.Fa n
383to
384.Fa m .
385Both chains must be of the same type; packet headers will
386.Em not
387be updated if present.
388.It Fn m_dup "struct mbuf *m" "int off" "int len" "int wait"
389Similarly to
390.Fn m_copym ,
391the function creates a copy of an mbuf chain starting
392.Fa off
393bytes from the beginning, continuing for
394.Fa len
395bytes.
396While
397.Fn m_copym
398tries to share external storage for mbufs with
399.Dv M_EXT
400flag,
401.Fn m_dup
402will deep-copy the whole data content into new mbuf chain
403and avoids shared external storage.
404.It Fn m_pulldown "struct mbuf *m" "int off" "int len" "int *offp"
405Rearranges an mbuf chain so that
406.Fa len
407bytes from offset
408.Fa off
409are contiguous and in the data area of an mbuf.
410The return value points to an mbuf in the middle of the mbuf chain
411.Fa m .
412If we call the return value
413.Fa n ,
414the contiguous data region is available at
415.Li "mtod(n, void *) + *offp" ,
416or
417.Li "mtod(n, void *)"
418if
419.Fa offp
420is
421.Dv NULL .
422The top of the mbuf chain
423.Fa m ,
424and mbufs up to
425.Fa off ,
426will not be modified.
427On successful return, it is guaranteed that the mbuf pointed to by
428.Fa n
429does not have a shared external storage,
430therefore it is safe to update the contiguous region.
431Returns
432.Dv NULL
433and frees the mbuf chain on failure.
434.Fa len
435must be smaller than or equal to
436.Dv MCLBYTES .
437.It Fn m_pullup "struct mbuf *m" "int len"
438Rearranges an mbuf chain so that
439.Fa len
440bytes are contiguous
441and in the data area of an mbuf (so that
442.Fn mtod
443will work for a structure of size
444.Fa len ) .
445Returns the resulting
446mbuf chain on success, frees it and returns
447.Dv NULL
448on failure.
449If there is room, it will add up to
450.Dv max_protohdr
451-
452.Fa len
453extra bytes to the
454contiguous region to possibly avoid being called again.
455.Fa len
456must be smaller or equal than
457.Dv MHLEN .
458.It Fn m_copyup "struct mbuf *m" "int len" "int dstoff"
459Similar to
460.Fn m_pullup
461but copies
462.Fa len
463bytes of data into a new mbuf at
464.Fa dstoff
465bytes into the mbuf.
466The
467.Fa dstoff
468argument aligns the data and leaves room for a link layer header.
469Returns the new
470mbuf chain on success, and frees the mbuf chain and returns
471.Dv NULL
472on failure.
473Note that
474the function does not allocate mbuf clusters, so
475.Fa len + dstoff
476must be less than
477.Dv MHLEN .
478.It Fn m_split "struct mbuf *m0" "int len" "int wait"
479Partitions an mbuf chain in two pieces, returning the tail,
480which is all but the first
481.Fa len
482bytes.
483In case of failure, it returns
484.Dv NULL
485and restores the chain to its original state.
486.It Fn m_adj "struct mbuf *mp" "int req_len"
487Shaves off
488.Fa req_len
489bytes from head or tail of the (valid) data area.
490If
491.Fa req_len
492is greater than zero, front bytes are being shaved off, if it's smaller,
493from the back (and if it is zero, the mbuf will stay bearded).
494This function does not move data in any way, but is used to manipulate the
495data area pointer and data length variable of the mbuf in a non-clobbering
496way.
497.It Fn m_apply "struct mbuf *m" "int off" "int len" "int (*f)(void *, void *, unsigned int)" "void *arg"
498Apply function
499.Fa f
500to the data in an mbuf chain starting
501.Fa off
502bytes from the beginning, continuing for
503.Fa len
504bytes.
505Neither
506.Fa off
507nor
508.Fa len
509may be negative.
510.Fa arg
511will be supplied as first argument for
512.Fa f ,
513the second argument will be the pointer to the data buffer of a
514packet (starting after
515.Fa off
516bytes in the stream), and the third argument is the amount
517of data in bytes in this call.
518If
519.Fa f
520returns something not equal to zero
521.Fn m_apply
522will bail out, returning the return code of
523.Fa f .
524Upon successful completion it will return zero.
525.It Fn m_free "struct mbuf *m"
526Frees mbuf
527.Fa m .
528.It Fn m_freem "struct mbuf *m"
529Frees the mbuf chain beginning with
530.Fa m .
531This function contains the elementary sanity check for a
532.Dv NULL
533pointer.
534.It Fn mtod "struct mbuf *m" "datatype"
535Returns a pointer to the data contained in the specified mbuf
536.Fa m ,
537type-casted to the specified data type
538.Fa datatype .
539.It Fn MGET "struct mbuf *m" "int how" "int type"
540Allocates mbuf
541.Fa m
542and initializes it to contain internal data.
543See
544.Fn m_get .
545.It Fn MGETHDR "struct mbuf *m" "int how" "int type"
546Allocates mbuf
547.Fa m
548and initializes it to contain a packet header.
549See
550.Fn m_gethdr .
551.It Fn MEXTMALLOC "struct mbuf *m" "int len" "int how"
552Allocates external storage of size
553.Fa len
554for mbuf
555.Fa m .
556The
557.Fa how
558parameter is a choice of
559.Dv M_WAIT / M_DONTWAIT
560from caller.
561The flag
562.Dv M_EXT
563is set upon success.
564.It Fn MEXTADD "struct mbuf *m" "void *buf" "int size" "int type" "void (*free)(struct mbuf *, void *, size_t, void *)" "void *arg"
565Adds pre-allocated external storage
566.Fa buf
567to a normal mbuf
568.Fa m ;
569the parameters
570.Fa size ,
571.Fa type ,
572.Fa free
573and
574.Fa arg
575describe the external storage.
576.Fa size
577is the size of the storage,
578.Fa type
579describes its
580.Xr malloc 9
581type,
582.Fa free
583is a free routine (if not the usual one), and
584.Fa arg
585is a possible argument to the free routine.
586The flag
587.Dv M_EXT
588is set upon success.
589If a free routine is specified, it will be called when the mbuf is freed.
590In the case of former, the first argument for a free routine is the mbuf
591.Fa m
592and the routine is expected to free it in addition to the external storage
593pointed by second argument.
594In the case of latter, the first argument for the routine is NULL.
595.It Fn MCLGET "struct mbuf *m" "int how"
596Allocates and adds an mbuf cluster to a normal mbuf
597.Fa m .
598The
599.Fa how
600parameter is a choice of
601.Dv M_WAIT / M_DONTWAIT
602from caller.
603The flag
604.Dv M_EXT
605is set upon success.
606.It Fn m_copy_pkthdr "struct mbuf *to" "struct mbuf *from"
607Copies the mbuf pkthdr from mbuf
608.Fa from
609to mbuf
610.Fa to .
611.Fa from
612must have the type flag
613.Dv M_PKTHDR
614set, and
615.Fa to
616must be empty.
617.It Fn m_move_pkthdr "struct mbuf *to" "struct mbuf *from"
618Moves the mbuf pkthdr from mbuf
619.Fa from
620to mbuf
621.Fa to .
622.Fa from
623must have the type flag
624.Dv M_PKTHDR
625set, and
626.Fa to
627must be empty.
628The flag
629.Dv M_PKTHDR
630in mbuf
631.Fa from
632will be cleared.
633.It Fn m_remove_pkthdr "struct mbuf *m"
634Removes the mbuf pkthdr from mbuf
635.Fa m .
636.Fa m
637must have the flag
638.Dv M_PKTHDR
639set.
640This flag will be cleared.
641.It Fn m_align "struct mbuf *m" "int len"
642Sets the data pointer of a newly allocated mbuf
643.Fa m
644to
645.Fa len
646bytes from the end of the mbuf data area, so that
647.Fa len
648bytes of data written to the mbuf
649.Fa m ,
650starting at the data pointer, will be aligned to the end of the data area.
651.It Fn M_LEADINGSPACE "struct mbuf *m"
652Returns the amount of space available before the current start of valid
653data in mbuf
654.Fa m .
655Returns 0 if the mbuf data part is shared across multiple mbufs
656.Pq i.e. not writable .
657.It Fn M_TRAILINGSPACE "struct mbuf *m"
658Returns the amount of space available after the current end of valid
659data in mbuf
660.Fa m .
661Returns 0 if the mbuf data part is shared across multiple mbufs
662.Pq i.e. not writable .
663.It Fn M_PREPEND "struct mbuf *m" "int plen" "int how"
664Prepends space of size
665.Fa plen
666to mbuf
667.Fa m .
668If a new mbuf must be allocated,
669.Fa how
670specifies whether to wait.
671If
672.Fa how
673is
674.Dv M_DONTWAIT
675and allocation fails, the original mbuf chain is freed and
676.Fa m
677is set to
678.Dv NULL .
679It is illegal for the
680.Fa plen
681parameter to be greater than
682.Dv MHLEN .
683.It Fn MCHTYPE "struct mbuf *m" "int type"
684Change mbuf
685.Fa m
686to new type
687.Fa type .
688.El
689.Sh CODE REFERENCES
690The
691.Nm
692management functions are implemented within the file
693.Pa sys/kern/uipc_mbuf.c .
694Function prototypes, and the functions implemented as macros
695are located in
696.Pa sys/sys/mbuf.h .
697.Sh SEE ALSO
698.Pa /usr/share/doc/reference/ref9/net ,
699.Xr netstat 1 ,
700.Xr m_tag 9 ,
701.Xr malloc 9
702.Rs
703.%A Jun-ichiro Hagino
704.%T "Mbuf issues in 4.4BSD IPv6/IPsec support (experiences from KAME IPv6/IPsec implementation)"
705.%B "Proceedings of the freenix track: 2000 USENIX annual technical conference"
706.%D June 2000
707.Re
708.Sh AUTHORS
709.An -nosplit
710The original mbuf data structures were designed by Rob Gurwitz
711when he did the initial TCP/IP implementation at BBN.
712.Pp
713Further extensions and enhancements were made by Bill Joy, Sam Leffler,
714and Mike Karels at CSRG.
715.Pp
716Current implementation of external storage by
717.An Matt Thomas
718.Aq matt@3am-software.com
719and
720.An Jason R. Thorpe
721.Aq thorpej@NetBSD.org .
722