xref: /netbsd-src/sys/netipsec/ipsec_mbuf.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /*	$NetBSD: ipsec_mbuf.c,v 1.12 2011/05/16 10:05:23 drochner Exp $	*/
2 /*-
3  * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.12 2011/05/16 10:05:23 drochner Exp $");
32 
33 /*
34  * IPsec-specific mbuf routines.
35  */
36 
37 #ifdef __FreeBSD__
38 #include "opt_param.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 
46 #include <net/route.h>
47 #include <netinet/in.h>
48 
49 #include <netipsec/ipsec.h>
50 #include <netipsec/ipsec_var.h>
51 #include <netipsec/ipsec_private.h>
52 
53 #include <netipsec/ipsec_osdep.h>
54 #include <net/net_osdep.h>
55 
56 /*
57  * Create a writable copy of the mbuf chain.  While doing this
58  * we compact the chain with a goal of producing a chain with
59  * at most two mbufs.  The second mbuf in this chain is likely
60  * to be a cluster.  The primary purpose of this work is to create
61  * a writable packet for encryption, compression, etc.  The
62  * secondary goal is to linearize the data so the data can be
63  * passed to crypto hardware in the most efficient manner possible.
64  */
65 struct mbuf *
66 m_clone(struct mbuf *m0)
67 {
68 	struct mbuf *m, *mprev;
69 	struct mbuf *n, *mfirst, *mlast;
70 	int len, off;
71 
72 	IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
73 
74 	mprev = NULL;
75 	for (m = m0; m != NULL; m = mprev->m_next) {
76 		/*
77 		 * Regular mbufs are ignored unless there's a cluster
78 		 * in front of it that we can use to coalesce.  We do
79 		 * the latter mainly so later clusters can be coalesced
80 		 * also w/o having to handle them specially (i.e. convert
81 		 * mbuf+cluster -> cluster).  This optimization is heavily
82 		 * influenced by the assumption that we're running over
83 		 * Ethernet where MCLBYTES is large enough that the max
84 		 * packet size will permit lots of coalescing into a
85 		 * single cluster.  This in turn permits efficient
86 		 * crypto operations, especially when using hardware.
87 		 */
88 		if ((m->m_flags & M_EXT) == 0) {
89 			if (mprev && (mprev->m_flags & M_EXT) &&
90 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
91 				/* XXX: this ignores mbuf types */
92 				memcpy(mtod(mprev, char *) + mprev->m_len,
93 				       mtod(m, char *), m->m_len);
94 				mprev->m_len += m->m_len;
95 				mprev->m_next = m->m_next;	/* unlink from chain */
96 				m_free(m);			/* reclaim mbuf */
97 				IPSEC_STATINC(IPSEC_STAT_MBCOALESCED);
98 			} else {
99 				mprev = m;
100 			}
101 			continue;
102 		}
103 		/*
104 		 * Writable mbufs are left alone (for now).  Note
105 		 * that for 4.x systems it's not possible to identify
106 		 * whether or not mbufs with external buffers are
107 		 * writable unless they use clusters.
108 		 */
109 		if (M_EXT_WRITABLE(m)) {
110 			mprev = m;
111 			continue;
112 		}
113 
114 		/*
115 		 * Not writable, replace with a copy or coalesce with
116 		 * the previous mbuf if possible (since we have to copy
117 		 * it anyway, we try to reduce the number of mbufs and
118 		 * clusters so that future work is easier).
119 		 */
120 		IPSEC_ASSERT(m->m_flags & M_EXT,
121 			("m_clone: m_flags 0x%x", m->m_flags));
122 		/* NB: we only coalesce into a cluster or larger */
123 		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
124 		    m->m_len <= M_TRAILINGSPACE(mprev)) {
125 			/* XXX: this ignores mbuf types */
126 			memcpy(mtod(mprev, char *) + mprev->m_len,
127 			       mtod(m, char *), m->m_len);
128 			mprev->m_len += m->m_len;
129 			mprev->m_next = m->m_next;	/* unlink from chain */
130 			m_free(m);			/* reclaim mbuf */
131 			IPSEC_STATINC(IPSEC_STAT_CLCOALESCED);
132 			continue;
133 		}
134 
135 		/*
136 		 * Allocate new space to hold the copy...
137 		 */
138 		/* XXX why can M_PKTHDR be set past the first mbuf? */
139 		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
140 			/*
141 			 * NB: if a packet header is present we must
142 			 * allocate the mbuf separately from any cluster
143 			 * because M_MOVE_PKTHDR will smash the data
144 			 * pointer and drop the M_EXT marker.
145 			 */
146 			MGETHDR(n, M_DONTWAIT, m->m_type);
147 			if (n == NULL) {
148 				m_freem(m0);
149 				return (NULL);
150 			}
151 			M_MOVE_PKTHDR(n, m);
152 			MCLGET(n, M_DONTWAIT);
153 			if ((n->m_flags & M_EXT) == 0) {
154 				m_free(n);
155 				m_freem(m0);
156 				return (NULL);
157 			}
158 		} else {
159 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
160 			if (n == NULL) {
161 				m_freem(m0);
162 				return (NULL);
163 			}
164 		}
165 		/*
166 		 * ... and copy the data.  We deal with jumbo mbufs
167 		 * (i.e. m_len > MCLBYTES) by splitting them into
168 		 * clusters.  We could just malloc a buffer and make
169 		 * it external but too many device drivers don't know
170 		 * how to break up the non-contiguous memory when
171 		 * doing DMA.
172 		 */
173 		len = m->m_len;
174 		off = 0;
175 		mfirst = n;
176 		mlast = NULL;
177 		for (;;) {
178 			int cc = min(len, MCLBYTES);
179 			memcpy(mtod(n, char *), mtod(m, char *) + off, cc);
180 			n->m_len = cc;
181 			if (mlast != NULL)
182 				mlast->m_next = n;
183 			mlast = n;
184 			IPSEC_STATINC(IPSEC_STAT_CLCOPIED);
185 
186 			len -= cc;
187 			if (len <= 0)
188 				break;
189 			off += cc;
190 
191 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
192 			if (n == NULL) {
193 				m_freem(mfirst);
194 				m_freem(m0);
195 				return (NULL);
196 			}
197 		}
198 		n->m_next = m->m_next;
199 		if (mprev == NULL)
200 			m0 = mfirst;		/* new head of chain */
201 		else
202 			mprev->m_next = mfirst;	/* replace old mbuf */
203 		m_free(m);			/* release old mbuf */
204 		mprev = mfirst;
205 	}
206 	return (m0);
207 }
208 
209 /*
210  * Make space for a new header of length hlen at skip bytes
211  * into the packet.  When doing this we allocate new mbufs only
212  * when absolutely necessary.  The mbuf where the new header
213  * is to go is returned together with an offset into the mbuf.
214  * If NULL is returned then the mbuf chain may have been modified;
215  * the caller is assumed to always free the chain.
216  */
217 struct mbuf *
218 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
219 {
220 	struct mbuf *m;
221 	unsigned remain;
222 
223 	IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
224 	IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
225 
226 	for (m = m0; m && skip > m->m_len; m = m->m_next)
227 		skip -= m->m_len;
228 	if (m == NULL)
229 		return (NULL);
230 	/*
231 	 * At this point skip is the offset into the mbuf m
232 	 * where the new header should be placed.  Figure out
233 	 * if there's space to insert the new header.  If so,
234 	 * and copying the remainder makese sense then do so.
235 	 * Otherwise insert a new mbuf in the chain, splitting
236 	 * the contents of m as needed.
237 	 */
238 	remain = m->m_len - skip;		/* data to move */
239 	if (hlen > M_TRAILINGSPACE(m)) {
240 		struct mbuf *n0, *n, **np;
241 		int todo, len, done, alloc;
242 
243 		n0 = NULL;
244 		np = &n0;
245 		alloc = 0;
246 		done = 0;
247 		todo = remain;
248 		while (todo > 0) {
249 			if (todo > MHLEN) {
250 				n = m_getcl(M_DONTWAIT, m->m_type, 0);
251 				len = MCLBYTES;
252 			}
253 			else {
254 				n = m_get(M_DONTWAIT, m->m_type);
255 				len = MHLEN;
256 			}
257 			if (n == NULL) {
258 				m_freem(n0);
259 				return NULL;
260 			}
261 			*np = n;
262 			np = &n->m_next;
263 			alloc++;
264 			len = min(todo, len);
265 			memcpy(n->m_data, mtod(m, char *) + skip + done, len);
266 			n->m_len = len;
267 			done += len;
268 			todo -= len;
269 		}
270 
271 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
272 			m->m_len = skip + hlen;
273 			*off = skip;
274 			if (n0 != NULL) {
275 				*np = m->m_next;
276 				m->m_next = n0;
277 			}
278 		}
279 		else {
280 			n = m_get(M_DONTWAIT, m->m_type);
281 			if (n == NULL) {
282 				m_freem(n0);
283 				return NULL;
284 			}
285 			alloc++;
286 
287 			if ((n->m_next = n0) == NULL)
288 				np = &n->m_next;
289 			n0 = n;
290 
291 			*np = m->m_next;
292 			m->m_next = n0;
293 
294 			n->m_len = hlen;
295 			m->m_len = skip;
296 
297 			m = n;			/* header is at front ... */
298 			*off = 0;		/* ... of new mbuf */
299 		}
300 
301 		IPSEC_STATADD(IPSEC_STAT_MBINSERTED, alloc);
302 	} else {
303 		/*
304 		 * Copy the remainder to the back of the mbuf
305 		 * so there's space to write the new header.
306 		 */
307 		/* XXX can this be memcpy? does it handle overlap? */
308 		ovbcopy(mtod(m, char *) + skip,
309 			mtod(m, char *) + skip + hlen, remain);
310 		m->m_len += hlen;
311 		*off = skip;
312 	}
313 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
314 	return m;
315 }
316 
317 /*
318  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
319  * length is updated, and a pointer to the first byte of the padding
320  * (which is guaranteed to be all in one mbuf) is returned.
321  */
322 void *
323 m_pad(struct mbuf *m, int n)
324 {
325 	register struct mbuf *m0, *m1;
326 	register int len, pad;
327 	void *retval;
328 
329 	if (n <= 0) {  /* No stupid arguments. */
330 		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
331 		m_freem(m);
332 		return NULL;
333 	}
334 
335 	len = m->m_pkthdr.len;
336 	pad = n;
337 	m0 = m;
338 
339 	while (m0->m_len < len) {
340 IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
341 		len -= m0->m_len;
342 		m0 = m0->m_next;
343 	}
344 
345 	if (m0->m_len != len) {
346 		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
347 		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
348 
349 		m_freem(m);
350 		return NULL;
351 	}
352 
353 	/* Check for zero-length trailing mbufs, and find the last one. */
354 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
355 		if (m1->m_next->m_len != 0) {
356 			DPRINTF(("m_pad: length mismatch (should be %d "
357 			    "instead of %d)\n",
358 			    m->m_pkthdr.len,
359 			    m->m_pkthdr.len + m1->m_next->m_len));
360 
361 			m_freem(m);
362 			return NULL;
363 		}
364 
365 		m0 = m1->m_next;
366 	}
367 
368 	if (pad > M_TRAILINGSPACE(m0)) {
369 		/* Add an mbuf to the chain. */
370 		MGET(m1, M_DONTWAIT, MT_DATA);
371 		if (m1 == 0) {
372 			m_freem(m0);
373 			DPRINTF(("m_pad: unable to get extra mbuf\n"));
374 			return NULL;
375 		}
376 
377 		m0->m_next = m1;
378 		m0 = m1;
379 		m0->m_len = 0;
380 	}
381 
382 	retval = m0->m_data + m0->m_len;
383 	m0->m_len += pad;
384 	m->m_pkthdr.len += pad;
385 
386 	return retval;
387 }
388 
389 /*
390  * Remove hlen data at offset skip in the packet.  This is used by
391  * the protocols strip protocol headers and associated data (e.g. IV,
392  * authenticator) on input.
393  */
394 int
395 m_striphdr(struct mbuf *m, int skip, int hlen)
396 {
397 	struct mbuf *m1;
398 	int roff;
399 
400 	/* Find beginning of header */
401 	m1 = m_getptr(m, skip, &roff);
402 	if (m1 == NULL)
403 		return (EINVAL);
404 
405 	/* Remove the header and associated data from the mbuf. */
406 	if (roff == 0) {
407 		/* The header was at the beginning of the mbuf */
408 		IPSEC_STATINC(IPSEC_STAT_INPUT_FRONT);
409 		m_adj(m1, hlen);
410 		if ((m1->m_flags & M_PKTHDR) == 0)
411 			m->m_pkthdr.len -= hlen;
412 	} else if (roff + hlen >= m1->m_len) {
413 		struct mbuf *mo;
414 
415 		/*
416 		 * Part or all of the header is at the end of this mbuf,
417 		 * so first let's remove the remainder of the header from
418 		 * the beginning of the remainder of the mbuf chain, if any.
419 		 */
420 		IPSEC_STATINC(IPSEC_STAT_INPUT_END);
421 		if (roff + hlen > m1->m_len) {
422 			/* Adjust the next mbuf by the remainder */
423 			m_adj(m1->m_next, roff + hlen - m1->m_len);
424 
425 			/* The second mbuf is guaranteed not to have a pkthdr... */
426 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
427 		}
428 
429 		/* Now, let's unlink the mbuf chain for a second...*/
430 		mo = m1->m_next;
431 		m1->m_next = NULL;
432 
433 		/* ...and trim the end of the first part of the chain...sick */
434 		m_adj(m1, -(m1->m_len - roff));
435 		if ((m1->m_flags & M_PKTHDR) == 0)
436 			m->m_pkthdr.len -= (m1->m_len - roff);
437 
438 		/* Finally, let's relink */
439 		m1->m_next = mo;
440 	} else {
441 		/*
442 		 * The header lies in the "middle" of the mbuf; copy
443 		 * the remainder of the mbuf down over the header.
444 		 */
445 		IPSEC_STATINC(IPSEC_STAT_INPUT_MIDDLE);
446 		ovbcopy(mtod(m1, u_char *) + roff + hlen,
447 		      mtod(m1, u_char *) + roff,
448 		      m1->m_len - (roff + hlen));
449 		m1->m_len -= hlen;
450 		m->m_pkthdr.len -= hlen;
451 	}
452 	return (0);
453 }
454 
455 /*
456  * Diagnostic routine to check mbuf alignment as required by the
457  * crypto device drivers (that use DMA).
458  */
459 void
460 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
461 {
462 	int roff;
463 	struct mbuf *m = m_getptr(m0, off, &roff);
464 	void *addr;
465 
466 	if (m == NULL)
467 		return;
468 	printf("%s (off %u len %u): ", where, off, len);
469 	addr = mtod(m, char *) + roff;
470 	do {
471 		int mlen;
472 
473 		if (((uintptr_t) addr) & 3) {
474 			printf("addr misaligned %p,", addr);
475 			break;
476 		}
477 		mlen = m->m_len;
478 		if (mlen > len)
479 			mlen = len;
480 		len -= mlen;
481 		if (len && (mlen & 3)) {
482 			printf("len mismatch %u,", mlen);
483 			break;
484 		}
485 		m = m->m_next;
486 		addr = m ? mtod(m, void *) : NULL;
487 	} while (m && len > 0);
488 	for (m = m0; m; m = m->m_next)
489 		printf(" [%p:%u]", mtod(m, void *), m->m_len);
490 	printf("\n");
491 }
492