xref: /netbsd-src/sys/netipsec/ipsec_mbuf.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /*	$NetBSD: ipsec_mbuf.c,v 1.16 2017/05/19 04:34:09 ozaki-r Exp $	*/
2 /*-
3  * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.16 2017/05/19 04:34:09 ozaki-r Exp $");
32 
33 /*
34  * IPsec-specific mbuf routines.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/mbuf.h>
40 #include <sys/socket.h>
41 
42 #include <net/route.h>
43 #include <netinet/in.h>
44 
45 #include <netipsec/ipsec.h>
46 #include <netipsec/ipsec_var.h>
47 #include <netipsec/ipsec_private.h>
48 
49 #include <net/net_osdep.h>
50 
51 /*
52  * Create a writable copy of the mbuf chain.  While doing this
53  * we compact the chain with a goal of producing a chain with
54  * at most two mbufs.  The second mbuf in this chain is likely
55  * to be a cluster.  The primary purpose of this work is to create
56  * a writable packet for encryption, compression, etc.  The
57  * secondary goal is to linearize the data so the data can be
58  * passed to crypto hardware in the most efficient manner possible.
59  */
60 struct mbuf *
61 m_clone(struct mbuf *m0)
62 {
63 	struct mbuf *m, *mprev;
64 	struct mbuf *n, *mfirst, *mlast;
65 	int len, off;
66 
67 	KASSERT(m0 != NULL);
68 
69 	mprev = NULL;
70 	for (m = m0; m != NULL; m = mprev->m_next) {
71 		/*
72 		 * Regular mbufs are ignored unless there's a cluster
73 		 * in front of it that we can use to coalesce.  We do
74 		 * the latter mainly so later clusters can be coalesced
75 		 * also w/o having to handle them specially (i.e. convert
76 		 * mbuf+cluster -> cluster).  This optimization is heavily
77 		 * influenced by the assumption that we're running over
78 		 * Ethernet where MCLBYTES is large enough that the max
79 		 * packet size will permit lots of coalescing into a
80 		 * single cluster.  This in turn permits efficient
81 		 * crypto operations, especially when using hardware.
82 		 */
83 		if ((m->m_flags & M_EXT) == 0) {
84 			if (mprev && (mprev->m_flags & M_EXT) &&
85 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
86 				/* XXX: this ignores mbuf types */
87 				memcpy(mtod(mprev, char *) + mprev->m_len,
88 				       mtod(m, char *), m->m_len);
89 				mprev->m_len += m->m_len;
90 				mprev->m_next = m->m_next;	/* unlink from chain */
91 				m_free(m);			/* reclaim mbuf */
92 				IPSEC_STATINC(IPSEC_STAT_MBCOALESCED);
93 			} else {
94 				mprev = m;
95 			}
96 			continue;
97 		}
98 		/*
99 		 * Writable mbufs are left alone (for now).  Note
100 		 * that for 4.x systems it's not possible to identify
101 		 * whether or not mbufs with external buffers are
102 		 * writable unless they use clusters.
103 		 */
104 		if (M_EXT_WRITABLE(m)) {
105 			mprev = m;
106 			continue;
107 		}
108 
109 		/*
110 		 * Not writable, replace with a copy or coalesce with
111 		 * the previous mbuf if possible (since we have to copy
112 		 * it anyway, we try to reduce the number of mbufs and
113 		 * clusters so that future work is easier).
114 		 */
115 		KASSERTMSG(m->m_flags & M_EXT, "m_flags 0x%x", m->m_flags);
116 		/* NB: we only coalesce into a cluster or larger */
117 		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
118 		    m->m_len <= M_TRAILINGSPACE(mprev)) {
119 			/* XXX: this ignores mbuf types */
120 			memcpy(mtod(mprev, char *) + mprev->m_len,
121 			       mtod(m, char *), m->m_len);
122 			mprev->m_len += m->m_len;
123 			mprev->m_next = m->m_next;	/* unlink from chain */
124 			m_free(m);			/* reclaim mbuf */
125 			IPSEC_STATINC(IPSEC_STAT_CLCOALESCED);
126 			continue;
127 		}
128 
129 		/*
130 		 * Allocate new space to hold the copy...
131 		 */
132 		/* XXX why can M_PKTHDR be set past the first mbuf? */
133 		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
134 			/*
135 			 * NB: if a packet header is present we must
136 			 * allocate the mbuf separately from any cluster
137 			 * because M_MOVE_PKTHDR will smash the data
138 			 * pointer and drop the M_EXT marker.
139 			 */
140 			MGETHDR(n, M_DONTWAIT, m->m_type);
141 			if (n == NULL) {
142 				m_freem(m0);
143 				return (NULL);
144 			}
145 			M_MOVE_PKTHDR(n, m);
146 			MCLGET(n, M_DONTWAIT);
147 			if ((n->m_flags & M_EXT) == 0) {
148 				m_free(n);
149 				m_freem(m0);
150 				return (NULL);
151 			}
152 		} else {
153 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
154 			if (n == NULL) {
155 				m_freem(m0);
156 				return (NULL);
157 			}
158 		}
159 		/*
160 		 * ... and copy the data.  We deal with jumbo mbufs
161 		 * (i.e. m_len > MCLBYTES) by splitting them into
162 		 * clusters.  We could just malloc a buffer and make
163 		 * it external but too many device drivers don't know
164 		 * how to break up the non-contiguous memory when
165 		 * doing DMA.
166 		 */
167 		len = m->m_len;
168 		off = 0;
169 		mfirst = n;
170 		mlast = NULL;
171 		for (;;) {
172 			int cc = min(len, MCLBYTES);
173 			memcpy(mtod(n, char *), mtod(m, char *) + off, cc);
174 			n->m_len = cc;
175 			if (mlast != NULL)
176 				mlast->m_next = n;
177 			mlast = n;
178 			IPSEC_STATINC(IPSEC_STAT_CLCOPIED);
179 
180 			len -= cc;
181 			if (len <= 0)
182 				break;
183 			off += cc;
184 
185 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
186 			if (n == NULL) {
187 				m_freem(mfirst);
188 				m_freem(m0);
189 				return (NULL);
190 			}
191 		}
192 		n->m_next = m->m_next;
193 		if (mprev == NULL)
194 			m0 = mfirst;		/* new head of chain */
195 		else
196 			mprev->m_next = mfirst;	/* replace old mbuf */
197 		m_free(m);			/* release old mbuf */
198 		mprev = mfirst;
199 	}
200 	return (m0);
201 }
202 
203 /*
204  * Make space for a new header of length hlen at skip bytes
205  * into the packet.  When doing this we allocate new mbufs only
206  * when absolutely necessary.  The mbuf where the new header
207  * is to go is returned together with an offset into the mbuf.
208  * If NULL is returned then the mbuf chain may have been modified;
209  * the caller is assumed to always free the chain.
210  */
211 struct mbuf *
212 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
213 {
214 	struct mbuf *m;
215 	unsigned remain;
216 
217 	KASSERT(m0 != NULL);
218 	KASSERTMSG(hlen < MHLEN, "hlen too big: %u", hlen);
219 
220 	for (m = m0; m && skip > m->m_len; m = m->m_next)
221 		skip -= m->m_len;
222 	if (m == NULL)
223 		return (NULL);
224 	/*
225 	 * At this point skip is the offset into the mbuf m
226 	 * where the new header should be placed.  Figure out
227 	 * if there's space to insert the new header.  If so,
228 	 * and copying the remainder makese sense then do so.
229 	 * Otherwise insert a new mbuf in the chain, splitting
230 	 * the contents of m as needed.
231 	 */
232 	remain = m->m_len - skip;		/* data to move */
233 	if (hlen > M_TRAILINGSPACE(m)) {
234 		struct mbuf *n0, *n, **np;
235 		int todo, len, done, alloc;
236 
237 		n0 = NULL;
238 		np = &n0;
239 		alloc = 0;
240 		done = 0;
241 		todo = remain;
242 		while (todo > 0) {
243 			if (todo > MHLEN) {
244 				n = m_getcl(M_DONTWAIT, m->m_type, 0);
245 				len = MCLBYTES;
246 			}
247 			else {
248 				n = m_get(M_DONTWAIT, m->m_type);
249 				len = MHLEN;
250 			}
251 			if (n == NULL) {
252 				m_freem(n0);
253 				return NULL;
254 			}
255 			*np = n;
256 			np = &n->m_next;
257 			alloc++;
258 			len = min(todo, len);
259 			memcpy(n->m_data, mtod(m, char *) + skip + done, len);
260 			n->m_len = len;
261 			done += len;
262 			todo -= len;
263 		}
264 
265 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
266 			m->m_len = skip + hlen;
267 			*off = skip;
268 			if (n0 != NULL) {
269 				*np = m->m_next;
270 				m->m_next = n0;
271 			}
272 		}
273 		else {
274 			n = m_get(M_DONTWAIT, m->m_type);
275 			if (n == NULL) {
276 				m_freem(n0);
277 				return NULL;
278 			}
279 			alloc++;
280 
281 			if ((n->m_next = n0) == NULL)
282 				np = &n->m_next;
283 			n0 = n;
284 
285 			*np = m->m_next;
286 			m->m_next = n0;
287 
288 			n->m_len = hlen;
289 			m->m_len = skip;
290 
291 			m = n;			/* header is at front ... */
292 			*off = 0;		/* ... of new mbuf */
293 		}
294 
295 		IPSEC_STATADD(IPSEC_STAT_MBINSERTED, alloc);
296 	} else {
297 		/*
298 		 * Copy the remainder to the back of the mbuf
299 		 * so there's space to write the new header.
300 		 */
301 		/* XXX can this be memcpy? does it handle overlap? */
302 		ovbcopy(mtod(m, char *) + skip,
303 			mtod(m, char *) + skip + hlen, remain);
304 		m->m_len += hlen;
305 		*off = skip;
306 	}
307 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
308 	return m;
309 }
310 
311 /*
312  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
313  * length is updated, and a pointer to the first byte of the padding
314  * (which is guaranteed to be all in one mbuf) is returned.
315  */
316 void *
317 m_pad(struct mbuf *m, int n)
318 {
319 	register struct mbuf *m0, *m1;
320 	register int len, pad;
321 	void *retval;
322 
323 	if (n <= 0) {  /* No stupid arguments. */
324 		IPSECLOG(LOG_DEBUG, "pad length invalid (%d)\n", n);
325 		m_freem(m);
326 		return NULL;
327 	}
328 
329 	len = m->m_pkthdr.len;
330 	pad = n;
331 	m0 = m;
332 
333 	while (m0->m_len < len) {
334 		KASSERTMSG(m0->m_next != NULL,
335 		    "m0 null, len %u m_len %u", len, m0->m_len);/*XXX*/
336 		len -= m0->m_len;
337 		m0 = m0->m_next;
338 	}
339 
340 	if (m0->m_len != len) {
341 		IPSECLOG(LOG_DEBUG,
342 		    "length mismatch (should be %d instead of %d)\n",
343 		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len);
344 
345 		m_freem(m);
346 		return NULL;
347 	}
348 
349 	/* Check for zero-length trailing mbufs, and find the last one. */
350 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
351 		if (m1->m_next->m_len != 0) {
352 			IPSECLOG(LOG_DEBUG,
353 			    "length mismatch (should be %d instead of %d)\n",
354 			    m->m_pkthdr.len,
355 			    m->m_pkthdr.len + m1->m_next->m_len);
356 
357 			m_freem(m);
358 			return NULL;
359 		}
360 
361 		m0 = m1->m_next;
362 	}
363 
364 	if (pad > M_TRAILINGSPACE(m0)) {
365 		/* Add an mbuf to the chain. */
366 		MGET(m1, M_DONTWAIT, MT_DATA);
367 		if (m1 == 0) {
368 			m_freem(m0);
369 			IPSECLOG(LOG_DEBUG, "unable to get extra mbuf\n");
370 			return NULL;
371 		}
372 
373 		m0->m_next = m1;
374 		m0 = m1;
375 		m0->m_len = 0;
376 	}
377 
378 	retval = m0->m_data + m0->m_len;
379 	m0->m_len += pad;
380 	m->m_pkthdr.len += pad;
381 
382 	return retval;
383 }
384 
385 /*
386  * Remove hlen data at offset skip in the packet.  This is used by
387  * the protocols strip protocol headers and associated data (e.g. IV,
388  * authenticator) on input.
389  */
390 int
391 m_striphdr(struct mbuf *m, int skip, int hlen)
392 {
393 	struct mbuf *m1;
394 	int roff;
395 
396 	/* Find beginning of header */
397 	m1 = m_getptr(m, skip, &roff);
398 	if (m1 == NULL)
399 		return (EINVAL);
400 
401 	/* Remove the header and associated data from the mbuf. */
402 	if (roff == 0) {
403 		/* The header was at the beginning of the mbuf */
404 		IPSEC_STATINC(IPSEC_STAT_INPUT_FRONT);
405 		m_adj(m1, hlen);
406 		if ((m1->m_flags & M_PKTHDR) == 0)
407 			m->m_pkthdr.len -= hlen;
408 	} else if (roff + hlen >= m1->m_len) {
409 		struct mbuf *mo;
410 
411 		/*
412 		 * Part or all of the header is at the end of this mbuf,
413 		 * so first let's remove the remainder of the header from
414 		 * the beginning of the remainder of the mbuf chain, if any.
415 		 */
416 		IPSEC_STATINC(IPSEC_STAT_INPUT_END);
417 		if (roff + hlen > m1->m_len) {
418 			/* Adjust the next mbuf by the remainder */
419 			m_adj(m1->m_next, roff + hlen - m1->m_len);
420 
421 			/* The second mbuf is guaranteed not to have a pkthdr... */
422 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
423 		}
424 
425 		/* Now, let's unlink the mbuf chain for a second...*/
426 		mo = m1->m_next;
427 		m1->m_next = NULL;
428 
429 		/* ...and trim the end of the first part of the chain...sick */
430 		m_adj(m1, -(m1->m_len - roff));
431 		if ((m1->m_flags & M_PKTHDR) == 0)
432 			m->m_pkthdr.len -= (m1->m_len - roff);
433 
434 		/* Finally, let's relink */
435 		m1->m_next = mo;
436 	} else {
437 		/*
438 		 * The header lies in the "middle" of the mbuf; copy
439 		 * the remainder of the mbuf down over the header.
440 		 */
441 		IPSEC_STATINC(IPSEC_STAT_INPUT_MIDDLE);
442 		ovbcopy(mtod(m1, u_char *) + roff + hlen,
443 		      mtod(m1, u_char *) + roff,
444 		      m1->m_len - (roff + hlen));
445 		m1->m_len -= hlen;
446 		m->m_pkthdr.len -= hlen;
447 	}
448 	return (0);
449 }
450 
451 /*
452  * Diagnostic routine to check mbuf alignment as required by the
453  * crypto device drivers (that use DMA).
454  */
455 void
456 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
457 {
458 	int roff;
459 	struct mbuf *m = m_getptr(m0, off, &roff);
460 	void *addr;
461 
462 	if (m == NULL)
463 		return;
464 	printf("%s (off %u len %u): ", where, off, len);
465 	addr = mtod(m, char *) + roff;
466 	do {
467 		int mlen;
468 
469 		if (((uintptr_t) addr) & 3) {
470 			printf("addr misaligned %p,", addr);
471 			break;
472 		}
473 		mlen = m->m_len;
474 		if (mlen > len)
475 			mlen = len;
476 		len -= mlen;
477 		if (len && (mlen & 3)) {
478 			printf("len mismatch %u,", mlen);
479 			break;
480 		}
481 		m = m->m_next;
482 		addr = m ? mtod(m, void *) : NULL;
483 	} while (m && len > 0);
484 	for (m = m0; m; m = m->m_next)
485 		printf(" [%p:%u]", mtod(m, void *), m->m_len);
486 	printf("\n");
487 }
488