xref: /netbsd-src/sys/netipsec/ipsec_mbuf.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: ipsec_mbuf.c,v 1.10 2007/12/14 20:55:22 seanb Exp $	*/
2 /*-
3  * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.10 2007/12/14 20:55:22 seanb Exp $");
32 
33 /*
34  * IPsec-specific mbuf routines.
35  */
36 
37 #ifdef __FreeBSD__
38 #include "opt_param.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 
46 #include <net/route.h>
47 #include <netinet/in.h>
48 
49 #include <netipsec/ipsec.h>
50 #include <netipsec/ipsec_var.h>
51 
52 #include <netipsec/ipsec_osdep.h>
53 #include <net/net_osdep.h>
54 
55 extern	struct mbuf *m_getptr(struct mbuf *, int, int *);
56 
57 /*
58  * Create a writable copy of the mbuf chain.  While doing this
59  * we compact the chain with a goal of producing a chain with
60  * at most two mbufs.  The second mbuf in this chain is likely
61  * to be a cluster.  The primary purpose of this work is to create
62  * a writable packet for encryption, compression, etc.  The
63  * secondary goal is to linearize the data so the data can be
64  * passed to crypto hardware in the most efficient manner possible.
65  */
66 struct mbuf *
67 m_clone(struct mbuf *m0)
68 {
69 	struct mbuf *m, *mprev;
70 	struct mbuf *n, *mfirst, *mlast;
71 	int len, off;
72 
73 	IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
74 
75 	mprev = NULL;
76 	for (m = m0; m != NULL; m = mprev->m_next) {
77 		/*
78 		 * Regular mbufs are ignored unless there's a cluster
79 		 * in front of it that we can use to coalesce.  We do
80 		 * the latter mainly so later clusters can be coalesced
81 		 * also w/o having to handle them specially (i.e. convert
82 		 * mbuf+cluster -> cluster).  This optimization is heavily
83 		 * influenced by the assumption that we're running over
84 		 * Ethernet where MCLBYTES is large enough that the max
85 		 * packet size will permit lots of coalescing into a
86 		 * single cluster.  This in turn permits efficient
87 		 * crypto operations, especially when using hardware.
88 		 */
89 		if ((m->m_flags & M_EXT) == 0) {
90 			if (mprev && (mprev->m_flags & M_EXT) &&
91 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
92 				/* XXX: this ignores mbuf types */
93 				memcpy(mtod(mprev, char *) + mprev->m_len,
94 				       mtod(m, char *), m->m_len);
95 				mprev->m_len += m->m_len;
96 				mprev->m_next = m->m_next;	/* unlink from chain */
97 				m_free(m);			/* reclaim mbuf */
98 				newipsecstat.ips_mbcoalesced++;
99 			} else {
100 				mprev = m;
101 			}
102 			continue;
103 		}
104 		/*
105 		 * Writable mbufs are left alone (for now).  Note
106 		 * that for 4.x systems it's not possible to identify
107 		 * whether or not mbufs with external buffers are
108 		 * writable unless they use clusters.
109 		 */
110 		if (M_EXT_WRITABLE(m)) {
111 			mprev = m;
112 			continue;
113 		}
114 
115 		/*
116 		 * Not writable, replace with a copy or coalesce with
117 		 * the previous mbuf if possible (since we have to copy
118 		 * it anyway, we try to reduce the number of mbufs and
119 		 * clusters so that future work is easier).
120 		 */
121 		IPSEC_ASSERT(m->m_flags & M_EXT,
122 			("m_clone: m_flags 0x%x", m->m_flags));
123 		/* NB: we only coalesce into a cluster or larger */
124 		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
125 		    m->m_len <= M_TRAILINGSPACE(mprev)) {
126 			/* XXX: this ignores mbuf types */
127 			memcpy(mtod(mprev, char *) + mprev->m_len,
128 			       mtod(m, char *), m->m_len);
129 			mprev->m_len += m->m_len;
130 			mprev->m_next = m->m_next;	/* unlink from chain */
131 			m_free(m);			/* reclaim mbuf */
132 			newipsecstat.ips_clcoalesced++;
133 			continue;
134 		}
135 
136 		/*
137 		 * Allocate new space to hold the copy...
138 		 */
139 		/* XXX why can M_PKTHDR be set past the first mbuf? */
140 		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
141 			/*
142 			 * NB: if a packet header is present we must
143 			 * allocate the mbuf separately from any cluster
144 			 * because M_MOVE_PKTHDR will smash the data
145 			 * pointer and drop the M_EXT marker.
146 			 */
147 			MGETHDR(n, M_DONTWAIT, m->m_type);
148 			if (n == NULL) {
149 				m_freem(m0);
150 				return (NULL);
151 			}
152 			M_MOVE_PKTHDR(n, m);
153 			MCLGET(n, M_DONTWAIT);
154 			if ((n->m_flags & M_EXT) == 0) {
155 				m_free(n);
156 				m_freem(m0);
157 				return (NULL);
158 			}
159 		} else {
160 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
161 			if (n == NULL) {
162 				m_freem(m0);
163 				return (NULL);
164 			}
165 		}
166 		/*
167 		 * ... and copy the data.  We deal with jumbo mbufs
168 		 * (i.e. m_len > MCLBYTES) by splitting them into
169 		 * clusters.  We could just malloc a buffer and make
170 		 * it external but too many device drivers don't know
171 		 * how to break up the non-contiguous memory when
172 		 * doing DMA.
173 		 */
174 		len = m->m_len;
175 		off = 0;
176 		mfirst = n;
177 		mlast = NULL;
178 		for (;;) {
179 			int cc = min(len, MCLBYTES);
180 			memcpy(mtod(n, char *), mtod(m, char *) + off, cc);
181 			n->m_len = cc;
182 			if (mlast != NULL)
183 				mlast->m_next = n;
184 			mlast = n;
185 			newipsecstat.ips_clcopied++;
186 
187 			len -= cc;
188 			if (len <= 0)
189 				break;
190 			off += cc;
191 
192 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
193 			if (n == NULL) {
194 				m_freem(mfirst);
195 				m_freem(m0);
196 				return (NULL);
197 			}
198 		}
199 		n->m_next = m->m_next;
200 		if (mprev == NULL)
201 			m0 = mfirst;		/* new head of chain */
202 		else
203 			mprev->m_next = mfirst;	/* replace old mbuf */
204 		m_free(m);			/* release old mbuf */
205 		mprev = mfirst;
206 	}
207 	return (m0);
208 }
209 
210 /*
211  * Make space for a new header of length hlen at skip bytes
212  * into the packet.  When doing this we allocate new mbufs only
213  * when absolutely necessary.  The mbuf where the new header
214  * is to go is returned together with an offset into the mbuf.
215  * If NULL is returned then the mbuf chain may have been modified;
216  * the caller is assumed to always free the chain.
217  */
218 struct mbuf *
219 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
220 {
221 	struct mbuf *m;
222 	unsigned remain;
223 
224 	IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
225 	IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
226 
227 	for (m = m0; m && skip > m->m_len; m = m->m_next)
228 		skip -= m->m_len;
229 	if (m == NULL)
230 		return (NULL);
231 	/*
232 	 * At this point skip is the offset into the mbuf m
233 	 * where the new header should be placed.  Figure out
234 	 * if there's space to insert the new header.  If so,
235 	 * and copying the remainder makese sense then do so.
236 	 * Otherwise insert a new mbuf in the chain, splitting
237 	 * the contents of m as needed.
238 	 */
239 	remain = m->m_len - skip;		/* data to move */
240 	if (hlen > M_TRAILINGSPACE(m)) {
241 		struct mbuf *n0, *n, **np;
242 		int todo, len, done, alloc;
243 
244 		n0 = NULL;
245 		np = &n0;
246 		alloc = 0;
247 		done = 0;
248 		todo = remain;
249 		while (todo > 0) {
250 			if (todo > MHLEN) {
251 				n = m_getcl(M_DONTWAIT, m->m_type, 0);
252 				len = MCLBYTES;
253 			}
254 			else {
255 				n = m_get(M_DONTWAIT, m->m_type);
256 				len = MHLEN;
257 			}
258 			if (n == NULL) {
259 				m_freem(n0);
260 				return NULL;
261 			}
262 			*np = n;
263 			np = &n->m_next;
264 			alloc++;
265 			len = min(todo, len);
266 			memcpy(n->m_data, mtod(m, char *) + skip + done, len);
267 			n->m_len = len;
268 			done += len;
269 			todo -= len;
270 		}
271 
272 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
273 			m->m_len = skip + hlen;
274 			*off = skip;
275 			if (n0 != NULL) {
276 				*np = m->m_next;
277 				m->m_next = n0;
278 			}
279 		}
280 		else {
281 			n = m_get(M_DONTWAIT, m->m_type);
282 			if (n == NULL) {
283 				m_freem(n0);
284 				return NULL;
285 			}
286 			alloc++;
287 
288 			if ((n->m_next = n0) == NULL)
289 				np = &n->m_next;
290 			n0 = n;
291 
292 			*np = m->m_next;
293 			m->m_next = n0;
294 
295 			n->m_len = hlen;
296 			m->m_len = skip;
297 
298 			m = n;			/* header is at front ... */
299 			*off = 0;		/* ... of new mbuf */
300 		}
301 
302 		newipsecstat.ips_mbinserted += alloc;
303 	} else {
304 		/*
305 		 * Copy the remainder to the back of the mbuf
306 		 * so there's space to write the new header.
307 		 */
308 		/* XXX can this be memcpy? does it handle overlap? */
309 		ovbcopy(mtod(m, char *) + skip,
310 			mtod(m, char *) + skip + hlen, remain);
311 		m->m_len += hlen;
312 		*off = skip;
313 	}
314 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
315 	return m;
316 }
317 
318 /*
319  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
320  * length is updated, and a pointer to the first byte of the padding
321  * (which is guaranteed to be all in one mbuf) is returned.
322  */
323 void *
324 m_pad(struct mbuf *m, int n)
325 {
326 	register struct mbuf *m0, *m1;
327 	register int len, pad;
328 	void *retval;
329 
330 	if (n <= 0) {  /* No stupid arguments. */
331 		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
332 		m_freem(m);
333 		return NULL;
334 	}
335 
336 	len = m->m_pkthdr.len;
337 	pad = n;
338 	m0 = m;
339 
340 	while (m0->m_len < len) {
341 IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
342 		len -= m0->m_len;
343 		m0 = m0->m_next;
344 	}
345 
346 	if (m0->m_len != len) {
347 		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
348 		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
349 
350 		m_freem(m);
351 		return NULL;
352 	}
353 
354 	/* Check for zero-length trailing mbufs, and find the last one. */
355 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
356 		if (m1->m_next->m_len != 0) {
357 			DPRINTF(("m_pad: length mismatch (should be %d "
358 			    "instead of %d)\n",
359 			    m->m_pkthdr.len,
360 			    m->m_pkthdr.len + m1->m_next->m_len));
361 
362 			m_freem(m);
363 			return NULL;
364 		}
365 
366 		m0 = m1->m_next;
367 	}
368 
369 	if (pad > M_TRAILINGSPACE(m0)) {
370 		/* Add an mbuf to the chain. */
371 		MGET(m1, M_DONTWAIT, MT_DATA);
372 		if (m1 == 0) {
373 			m_freem(m0);
374 			DPRINTF(("m_pad: unable to get extra mbuf\n"));
375 			return NULL;
376 		}
377 
378 		m0->m_next = m1;
379 		m0 = m1;
380 		m0->m_len = 0;
381 	}
382 
383 	retval = m0->m_data + m0->m_len;
384 	m0->m_len += pad;
385 	m->m_pkthdr.len += pad;
386 
387 	return retval;
388 }
389 
390 /*
391  * Remove hlen data at offset skip in the packet.  This is used by
392  * the protocols strip protocol headers and associated data (e.g. IV,
393  * authenticator) on input.
394  */
395 int
396 m_striphdr(struct mbuf *m, int skip, int hlen)
397 {
398 	struct mbuf *m1;
399 	int roff;
400 
401 	/* Find beginning of header */
402 	m1 = m_getptr(m, skip, &roff);
403 	if (m1 == NULL)
404 		return (EINVAL);
405 
406 	/* Remove the header and associated data from the mbuf. */
407 	if (roff == 0) {
408 		/* The header was at the beginning of the mbuf */
409 		newipsecstat.ips_input_front++;
410 		m_adj(m1, hlen);
411 		if ((m1->m_flags & M_PKTHDR) == 0)
412 			m->m_pkthdr.len -= hlen;
413 	} else if (roff + hlen >= m1->m_len) {
414 		struct mbuf *mo;
415 
416 		/*
417 		 * Part or all of the header is at the end of this mbuf,
418 		 * so first let's remove the remainder of the header from
419 		 * the beginning of the remainder of the mbuf chain, if any.
420 		 */
421 		newipsecstat.ips_input_end++;
422 		if (roff + hlen > m1->m_len) {
423 			/* Adjust the next mbuf by the remainder */
424 			m_adj(m1->m_next, roff + hlen - m1->m_len);
425 
426 			/* The second mbuf is guaranteed not to have a pkthdr... */
427 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
428 		}
429 
430 		/* Now, let's unlink the mbuf chain for a second...*/
431 		mo = m1->m_next;
432 		m1->m_next = NULL;
433 
434 		/* ...and trim the end of the first part of the chain...sick */
435 		m_adj(m1, -(m1->m_len - roff));
436 		if ((m1->m_flags & M_PKTHDR) == 0)
437 			m->m_pkthdr.len -= (m1->m_len - roff);
438 
439 		/* Finally, let's relink */
440 		m1->m_next = mo;
441 	} else {
442 		/*
443 		 * The header lies in the "middle" of the mbuf; copy
444 		 * the remainder of the mbuf down over the header.
445 		 */
446 		newipsecstat.ips_input_middle++;
447 		ovbcopy(mtod(m1, u_char *) + roff + hlen,
448 		      mtod(m1, u_char *) + roff,
449 		      m1->m_len - (roff + hlen));
450 		m1->m_len -= hlen;
451 		m->m_pkthdr.len -= hlen;
452 	}
453 	return (0);
454 }
455 
456 /*
457  * Diagnostic routine to check mbuf alignment as required by the
458  * crypto device drivers (that use DMA).
459  */
460 void
461 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
462 {
463 	int roff;
464 	struct mbuf *m = m_getptr(m0, off, &roff);
465 	void *addr;
466 
467 	if (m == NULL)
468 		return;
469 	printf("%s (off %u len %u): ", where, off, len);
470 	addr = mtod(m, char *) + roff;
471 	do {
472 		int mlen;
473 
474 		if (((uintptr_t) addr) & 3) {
475 			printf("addr misaligned %p,", addr);
476 			break;
477 		}
478 		mlen = m->m_len;
479 		if (mlen > len)
480 			mlen = len;
481 		len -= mlen;
482 		if (len && (mlen & 3)) {
483 			printf("len mismatch %u,", mlen);
484 			break;
485 		}
486 		m = m->m_next;
487 		addr = m ? mtod(m, void *) : NULL;
488 	} while (m && len > 0);
489 	for (m = m0; m; m = m->m_next)
490 		printf(" [%p:%u]", mtod(m, void *), m->m_len);
491 	printf("\n");
492 }
493