xref: /netbsd-src/sys/netipsec/ipsec_mbuf.c (revision b5677b36047b601b9addaaa494a58ceae82c2a6c)
1 /*	$NetBSD: ipsec_mbuf.c,v 1.11 2008/04/23 06:09:05 thorpej Exp $	*/
2 /*-
3  * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.11 2008/04/23 06:09:05 thorpej Exp $");
32 
33 /*
34  * IPsec-specific mbuf routines.
35  */
36 
37 #ifdef __FreeBSD__
38 #include "opt_param.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 
46 #include <net/route.h>
47 #include <netinet/in.h>
48 
49 #include <netipsec/ipsec.h>
50 #include <netipsec/ipsec_var.h>
51 #include <netipsec/ipsec_private.h>
52 
53 #include <netipsec/ipsec_osdep.h>
54 #include <net/net_osdep.h>
55 
56 extern	struct mbuf *m_getptr(struct mbuf *, int, int *);
57 
58 /*
59  * Create a writable copy of the mbuf chain.  While doing this
60  * we compact the chain with a goal of producing a chain with
61  * at most two mbufs.  The second mbuf in this chain is likely
62  * to be a cluster.  The primary purpose of this work is to create
63  * a writable packet for encryption, compression, etc.  The
64  * secondary goal is to linearize the data so the data can be
65  * passed to crypto hardware in the most efficient manner possible.
66  */
67 struct mbuf *
68 m_clone(struct mbuf *m0)
69 {
70 	struct mbuf *m, *mprev;
71 	struct mbuf *n, *mfirst, *mlast;
72 	int len, off;
73 
74 	IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
75 
76 	mprev = NULL;
77 	for (m = m0; m != NULL; m = mprev->m_next) {
78 		/*
79 		 * Regular mbufs are ignored unless there's a cluster
80 		 * in front of it that we can use to coalesce.  We do
81 		 * the latter mainly so later clusters can be coalesced
82 		 * also w/o having to handle them specially (i.e. convert
83 		 * mbuf+cluster -> cluster).  This optimization is heavily
84 		 * influenced by the assumption that we're running over
85 		 * Ethernet where MCLBYTES is large enough that the max
86 		 * packet size will permit lots of coalescing into a
87 		 * single cluster.  This in turn permits efficient
88 		 * crypto operations, especially when using hardware.
89 		 */
90 		if ((m->m_flags & M_EXT) == 0) {
91 			if (mprev && (mprev->m_flags & M_EXT) &&
92 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
93 				/* XXX: this ignores mbuf types */
94 				memcpy(mtod(mprev, char *) + mprev->m_len,
95 				       mtod(m, char *), m->m_len);
96 				mprev->m_len += m->m_len;
97 				mprev->m_next = m->m_next;	/* unlink from chain */
98 				m_free(m);			/* reclaim mbuf */
99 				IPSEC_STATINC(IPSEC_STAT_MBCOALESCED);
100 			} else {
101 				mprev = m;
102 			}
103 			continue;
104 		}
105 		/*
106 		 * Writable mbufs are left alone (for now).  Note
107 		 * that for 4.x systems it's not possible to identify
108 		 * whether or not mbufs with external buffers are
109 		 * writable unless they use clusters.
110 		 */
111 		if (M_EXT_WRITABLE(m)) {
112 			mprev = m;
113 			continue;
114 		}
115 
116 		/*
117 		 * Not writable, replace with a copy or coalesce with
118 		 * the previous mbuf if possible (since we have to copy
119 		 * it anyway, we try to reduce the number of mbufs and
120 		 * clusters so that future work is easier).
121 		 */
122 		IPSEC_ASSERT(m->m_flags & M_EXT,
123 			("m_clone: m_flags 0x%x", m->m_flags));
124 		/* NB: we only coalesce into a cluster or larger */
125 		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
126 		    m->m_len <= M_TRAILINGSPACE(mprev)) {
127 			/* XXX: this ignores mbuf types */
128 			memcpy(mtod(mprev, char *) + mprev->m_len,
129 			       mtod(m, char *), m->m_len);
130 			mprev->m_len += m->m_len;
131 			mprev->m_next = m->m_next;	/* unlink from chain */
132 			m_free(m);			/* reclaim mbuf */
133 			IPSEC_STATINC(IPSEC_STAT_CLCOALESCED);
134 			continue;
135 		}
136 
137 		/*
138 		 * Allocate new space to hold the copy...
139 		 */
140 		/* XXX why can M_PKTHDR be set past the first mbuf? */
141 		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
142 			/*
143 			 * NB: if a packet header is present we must
144 			 * allocate the mbuf separately from any cluster
145 			 * because M_MOVE_PKTHDR will smash the data
146 			 * pointer and drop the M_EXT marker.
147 			 */
148 			MGETHDR(n, M_DONTWAIT, m->m_type);
149 			if (n == NULL) {
150 				m_freem(m0);
151 				return (NULL);
152 			}
153 			M_MOVE_PKTHDR(n, m);
154 			MCLGET(n, M_DONTWAIT);
155 			if ((n->m_flags & M_EXT) == 0) {
156 				m_free(n);
157 				m_freem(m0);
158 				return (NULL);
159 			}
160 		} else {
161 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
162 			if (n == NULL) {
163 				m_freem(m0);
164 				return (NULL);
165 			}
166 		}
167 		/*
168 		 * ... and copy the data.  We deal with jumbo mbufs
169 		 * (i.e. m_len > MCLBYTES) by splitting them into
170 		 * clusters.  We could just malloc a buffer and make
171 		 * it external but too many device drivers don't know
172 		 * how to break up the non-contiguous memory when
173 		 * doing DMA.
174 		 */
175 		len = m->m_len;
176 		off = 0;
177 		mfirst = n;
178 		mlast = NULL;
179 		for (;;) {
180 			int cc = min(len, MCLBYTES);
181 			memcpy(mtod(n, char *), mtod(m, char *) + off, cc);
182 			n->m_len = cc;
183 			if (mlast != NULL)
184 				mlast->m_next = n;
185 			mlast = n;
186 			IPSEC_STATINC(IPSEC_STAT_CLCOPIED);
187 
188 			len -= cc;
189 			if (len <= 0)
190 				break;
191 			off += cc;
192 
193 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
194 			if (n == NULL) {
195 				m_freem(mfirst);
196 				m_freem(m0);
197 				return (NULL);
198 			}
199 		}
200 		n->m_next = m->m_next;
201 		if (mprev == NULL)
202 			m0 = mfirst;		/* new head of chain */
203 		else
204 			mprev->m_next = mfirst;	/* replace old mbuf */
205 		m_free(m);			/* release old mbuf */
206 		mprev = mfirst;
207 	}
208 	return (m0);
209 }
210 
211 /*
212  * Make space for a new header of length hlen at skip bytes
213  * into the packet.  When doing this we allocate new mbufs only
214  * when absolutely necessary.  The mbuf where the new header
215  * is to go is returned together with an offset into the mbuf.
216  * If NULL is returned then the mbuf chain may have been modified;
217  * the caller is assumed to always free the chain.
218  */
219 struct mbuf *
220 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
221 {
222 	struct mbuf *m;
223 	unsigned remain;
224 
225 	IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
226 	IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
227 
228 	for (m = m0; m && skip > m->m_len; m = m->m_next)
229 		skip -= m->m_len;
230 	if (m == NULL)
231 		return (NULL);
232 	/*
233 	 * At this point skip is the offset into the mbuf m
234 	 * where the new header should be placed.  Figure out
235 	 * if there's space to insert the new header.  If so,
236 	 * and copying the remainder makese sense then do so.
237 	 * Otherwise insert a new mbuf in the chain, splitting
238 	 * the contents of m as needed.
239 	 */
240 	remain = m->m_len - skip;		/* data to move */
241 	if (hlen > M_TRAILINGSPACE(m)) {
242 		struct mbuf *n0, *n, **np;
243 		int todo, len, done, alloc;
244 
245 		n0 = NULL;
246 		np = &n0;
247 		alloc = 0;
248 		done = 0;
249 		todo = remain;
250 		while (todo > 0) {
251 			if (todo > MHLEN) {
252 				n = m_getcl(M_DONTWAIT, m->m_type, 0);
253 				len = MCLBYTES;
254 			}
255 			else {
256 				n = m_get(M_DONTWAIT, m->m_type);
257 				len = MHLEN;
258 			}
259 			if (n == NULL) {
260 				m_freem(n0);
261 				return NULL;
262 			}
263 			*np = n;
264 			np = &n->m_next;
265 			alloc++;
266 			len = min(todo, len);
267 			memcpy(n->m_data, mtod(m, char *) + skip + done, len);
268 			n->m_len = len;
269 			done += len;
270 			todo -= len;
271 		}
272 
273 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
274 			m->m_len = skip + hlen;
275 			*off = skip;
276 			if (n0 != NULL) {
277 				*np = m->m_next;
278 				m->m_next = n0;
279 			}
280 		}
281 		else {
282 			n = m_get(M_DONTWAIT, m->m_type);
283 			if (n == NULL) {
284 				m_freem(n0);
285 				return NULL;
286 			}
287 			alloc++;
288 
289 			if ((n->m_next = n0) == NULL)
290 				np = &n->m_next;
291 			n0 = n;
292 
293 			*np = m->m_next;
294 			m->m_next = n0;
295 
296 			n->m_len = hlen;
297 			m->m_len = skip;
298 
299 			m = n;			/* header is at front ... */
300 			*off = 0;		/* ... of new mbuf */
301 		}
302 
303 		IPSEC_STATADD(IPSEC_STAT_MBINSERTED, alloc);
304 	} else {
305 		/*
306 		 * Copy the remainder to the back of the mbuf
307 		 * so there's space to write the new header.
308 		 */
309 		/* XXX can this be memcpy? does it handle overlap? */
310 		ovbcopy(mtod(m, char *) + skip,
311 			mtod(m, char *) + skip + hlen, remain);
312 		m->m_len += hlen;
313 		*off = skip;
314 	}
315 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
316 	return m;
317 }
318 
319 /*
320  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
321  * length is updated, and a pointer to the first byte of the padding
322  * (which is guaranteed to be all in one mbuf) is returned.
323  */
324 void *
325 m_pad(struct mbuf *m, int n)
326 {
327 	register struct mbuf *m0, *m1;
328 	register int len, pad;
329 	void *retval;
330 
331 	if (n <= 0) {  /* No stupid arguments. */
332 		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
333 		m_freem(m);
334 		return NULL;
335 	}
336 
337 	len = m->m_pkthdr.len;
338 	pad = n;
339 	m0 = m;
340 
341 	while (m0->m_len < len) {
342 IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
343 		len -= m0->m_len;
344 		m0 = m0->m_next;
345 	}
346 
347 	if (m0->m_len != len) {
348 		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
349 		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
350 
351 		m_freem(m);
352 		return NULL;
353 	}
354 
355 	/* Check for zero-length trailing mbufs, and find the last one. */
356 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
357 		if (m1->m_next->m_len != 0) {
358 			DPRINTF(("m_pad: length mismatch (should be %d "
359 			    "instead of %d)\n",
360 			    m->m_pkthdr.len,
361 			    m->m_pkthdr.len + m1->m_next->m_len));
362 
363 			m_freem(m);
364 			return NULL;
365 		}
366 
367 		m0 = m1->m_next;
368 	}
369 
370 	if (pad > M_TRAILINGSPACE(m0)) {
371 		/* Add an mbuf to the chain. */
372 		MGET(m1, M_DONTWAIT, MT_DATA);
373 		if (m1 == 0) {
374 			m_freem(m0);
375 			DPRINTF(("m_pad: unable to get extra mbuf\n"));
376 			return NULL;
377 		}
378 
379 		m0->m_next = m1;
380 		m0 = m1;
381 		m0->m_len = 0;
382 	}
383 
384 	retval = m0->m_data + m0->m_len;
385 	m0->m_len += pad;
386 	m->m_pkthdr.len += pad;
387 
388 	return retval;
389 }
390 
391 /*
392  * Remove hlen data at offset skip in the packet.  This is used by
393  * the protocols strip protocol headers and associated data (e.g. IV,
394  * authenticator) on input.
395  */
396 int
397 m_striphdr(struct mbuf *m, int skip, int hlen)
398 {
399 	struct mbuf *m1;
400 	int roff;
401 
402 	/* Find beginning of header */
403 	m1 = m_getptr(m, skip, &roff);
404 	if (m1 == NULL)
405 		return (EINVAL);
406 
407 	/* Remove the header and associated data from the mbuf. */
408 	if (roff == 0) {
409 		/* The header was at the beginning of the mbuf */
410 		IPSEC_STATINC(IPSEC_STAT_INPUT_FRONT);
411 		m_adj(m1, hlen);
412 		if ((m1->m_flags & M_PKTHDR) == 0)
413 			m->m_pkthdr.len -= hlen;
414 	} else if (roff + hlen >= m1->m_len) {
415 		struct mbuf *mo;
416 
417 		/*
418 		 * Part or all of the header is at the end of this mbuf,
419 		 * so first let's remove the remainder of the header from
420 		 * the beginning of the remainder of the mbuf chain, if any.
421 		 */
422 		IPSEC_STATINC(IPSEC_STAT_INPUT_END);
423 		if (roff + hlen > m1->m_len) {
424 			/* Adjust the next mbuf by the remainder */
425 			m_adj(m1->m_next, roff + hlen - m1->m_len);
426 
427 			/* The second mbuf is guaranteed not to have a pkthdr... */
428 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
429 		}
430 
431 		/* Now, let's unlink the mbuf chain for a second...*/
432 		mo = m1->m_next;
433 		m1->m_next = NULL;
434 
435 		/* ...and trim the end of the first part of the chain...sick */
436 		m_adj(m1, -(m1->m_len - roff));
437 		if ((m1->m_flags & M_PKTHDR) == 0)
438 			m->m_pkthdr.len -= (m1->m_len - roff);
439 
440 		/* Finally, let's relink */
441 		m1->m_next = mo;
442 	} else {
443 		/*
444 		 * The header lies in the "middle" of the mbuf; copy
445 		 * the remainder of the mbuf down over the header.
446 		 */
447 		IPSEC_STATINC(IPSEC_STAT_INPUT_MIDDLE);
448 		ovbcopy(mtod(m1, u_char *) + roff + hlen,
449 		      mtod(m1, u_char *) + roff,
450 		      m1->m_len - (roff + hlen));
451 		m1->m_len -= hlen;
452 		m->m_pkthdr.len -= hlen;
453 	}
454 	return (0);
455 }
456 
457 /*
458  * Diagnostic routine to check mbuf alignment as required by the
459  * crypto device drivers (that use DMA).
460  */
461 void
462 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
463 {
464 	int roff;
465 	struct mbuf *m = m_getptr(m0, off, &roff);
466 	void *addr;
467 
468 	if (m == NULL)
469 		return;
470 	printf("%s (off %u len %u): ", where, off, len);
471 	addr = mtod(m, char *) + roff;
472 	do {
473 		int mlen;
474 
475 		if (((uintptr_t) addr) & 3) {
476 			printf("addr misaligned %p,", addr);
477 			break;
478 		}
479 		mlen = m->m_len;
480 		if (mlen > len)
481 			mlen = len;
482 		len -= mlen;
483 		if (len && (mlen & 3)) {
484 			printf("len mismatch %u,", mlen);
485 			break;
486 		}
487 		m = m->m_next;
488 		addr = m ? mtod(m, void *) : NULL;
489 	} while (m && len > 0);
490 	for (m = m0; m; m = m->m_next)
491 		printf(" [%p:%u]", mtod(m, void *), m->m_len);
492 	printf("\n");
493 }
494