xref: /netbsd-src/sys/netipsec/ipsec_mbuf.c (revision da5f4674a3fc214be3572d358b66af40ab9401e7)
1 /*	$NetBSD: ipsec_mbuf.c,v 1.3 2003/08/20 22:33:40 jonathan Exp $	*/
2 /*	$FreeBSD: src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.1 2003/01/24 05:11:35 sam Exp $	*/
3 
4 #include <sys/cdefs.h>
5 __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.3 2003/08/20 22:33:40 jonathan Exp $");
6 
7 /*
8  * IPsec-specific mbuf routines.
9  */
10 
11 #ifdef __FreeBSD__
12 #include "opt_param.h"
13 #endif
14 
15 #include <sys/param.h>
16 #include <sys/systm.h>
17 #include <sys/mbuf.h>
18 #include <sys/socket.h>
19 
20 #include <net/route.h>
21 #include <netinet/in.h>
22 
23 #include <netipsec/ipsec.h>
24 
25 #include <netipsec/ipsec_osdep.h>
26 #include <net/net_osdep.h>
27 
28 extern	struct mbuf *m_getptr(struct mbuf *, int, int *);
29 
30 /*
31  * Create a writable copy of the mbuf chain.  While doing this
32  * we compact the chain with a goal of producing a chain with
33  * at most two mbufs.  The second mbuf in this chain is likely
34  * to be a cluster.  The primary purpose of this work is to create
35  * a writable packet for encryption, compression, etc.  The
36  * secondary goal is to linearize the data so the data can be
37  * passed to crypto hardware in the most efficient manner possible.
38  */
39 struct mbuf *
40 m_clone(struct mbuf *m0)
41 {
42 	struct mbuf *m, *mprev;
43 	struct mbuf *n, *mfirst, *mlast;
44 	int len, off;
45 
46 	IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
47 
48 	mprev = NULL;
49 	for (m = m0; m != NULL; m = mprev->m_next) {
50 		/*
51 		 * Regular mbufs are ignored unless there's a cluster
52 		 * in front of it that we can use to coalesce.  We do
53 		 * the latter mainly so later clusters can be coalesced
54 		 * also w/o having to handle them specially (i.e. convert
55 		 * mbuf+cluster -> cluster).  This optimization is heavily
56 		 * influenced by the assumption that we're running over
57 		 * Ethernet where MCLBYTES is large enough that the max
58 		 * packet size will permit lots of coalescing into a
59 		 * single cluster.  This in turn permits efficient
60 		 * crypto operations, especially when using hardware.
61 		 */
62 		if ((m->m_flags & M_EXT) == 0) {
63 			if (mprev && (mprev->m_flags & M_EXT) &&
64 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
65 				/* XXX: this ignores mbuf types */
66 				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
67 				       mtod(m, caddr_t), m->m_len);
68 				mprev->m_len += m->m_len;
69 				mprev->m_next = m->m_next;	/* unlink from chain */
70 				m_free(m);			/* reclaim mbuf */
71 				newipsecstat.ips_mbcoalesced++;
72 			} else {
73 				mprev = m;
74 			}
75 			continue;
76 		}
77 		/*
78 		 * Writable mbufs are left alone (for now).  Note
79 		 * that for 4.x systems it's not possible to identify
80 		 * whether or not mbufs with external buffers are
81 		 * writable unless they use clusters.
82 		 */
83 		if (M_EXT_WRITABLE(m)) {
84 			mprev = m;
85 			continue;
86 		}
87 
88 		/*
89 		 * Not writable, replace with a copy or coalesce with
90 		 * the previous mbuf if possible (since we have to copy
91 		 * it anyway, we try to reduce the number of mbufs and
92 		 * clusters so that future work is easier).
93 		 */
94 		IPSEC_ASSERT(m->m_flags & M_EXT,
95 			("m_clone: m_flags 0x%x", m->m_flags));
96 		/* NB: we only coalesce into a cluster or larger */
97 		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
98 		    m->m_len <= M_TRAILINGSPACE(mprev)) {
99 			/* XXX: this ignores mbuf types */
100 			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
101 			       mtod(m, caddr_t), m->m_len);
102 			mprev->m_len += m->m_len;
103 			mprev->m_next = m->m_next;	/* unlink from chain */
104 			m_free(m);			/* reclaim mbuf */
105 			newipsecstat.ips_clcoalesced++;
106 			continue;
107 		}
108 
109 		/*
110 		 * Allocate new space to hold the copy...
111 		 */
112 		/* XXX why can M_PKTHDR be set past the first mbuf? */
113 		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
114 			/*
115 			 * NB: if a packet header is present we must
116 			 * allocate the mbuf separately from any cluster
117 			 * because M_MOVE_PKTHDR will smash the data
118 			 * pointer and drop the M_EXT marker.
119 			 */
120 			MGETHDR(n, M_DONTWAIT, m->m_type);
121 			if (n == NULL) {
122 				m_freem(m0);
123 				return (NULL);
124 			}
125 			M_MOVE_PKTHDR(n, m);
126 			MCLGET(n, M_DONTWAIT);
127 			if ((n->m_flags & M_EXT) == 0) {
128 				m_free(n);
129 				m_freem(m0);
130 				return (NULL);
131 			}
132 		} else {
133 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
134 			if (n == NULL) {
135 				m_freem(m0);
136 				return (NULL);
137 			}
138 		}
139 		/*
140 		 * ... and copy the data.  We deal with jumbo mbufs
141 		 * (i.e. m_len > MCLBYTES) by splitting them into
142 		 * clusters.  We could just malloc a buffer and make
143 		 * it external but too many device drivers don't know
144 		 * how to break up the non-contiguous memory when
145 		 * doing DMA.
146 		 */
147 		len = m->m_len;
148 		off = 0;
149 		mfirst = n;
150 		mlast = NULL;
151 		for (;;) {
152 			int cc = min(len, MCLBYTES);
153 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
154 			n->m_len = cc;
155 			if (mlast != NULL)
156 				mlast->m_next = n;
157 			mlast = n;
158 			newipsecstat.ips_clcopied++;
159 
160 			len -= cc;
161 			if (len <= 0)
162 				break;
163 			off += cc;
164 
165 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
166 			if (n == NULL) {
167 				m_freem(mfirst);
168 				m_freem(m0);
169 				return (NULL);
170 			}
171 		}
172 		n->m_next = m->m_next;
173 		if (mprev == NULL)
174 			m0 = mfirst;		/* new head of chain */
175 		else
176 			mprev->m_next = mfirst;	/* replace old mbuf */
177 		m_free(m);			/* release old mbuf */
178 		mprev = mfirst;
179 	}
180 	return (m0);
181 }
182 
183 /*
184  * Make space for a new header of length hlen at skip bytes
185  * into the packet.  When doing this we allocate new mbufs only
186  * when absolutely necessary.  The mbuf where the new header
187  * is to go is returned together with an offset into the mbuf.
188  * If NULL is returned then the mbuf chain may have been modified;
189  * the caller is assumed to always free the chain.
190  */
191 struct mbuf *
192 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
193 {
194 	struct mbuf *m;
195 	unsigned remain;
196 
197 	IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
198 	IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
199 
200 	for (m = m0; m && skip > m->m_len; m = m->m_next)
201 		skip -= m->m_len;
202 	if (m == NULL)
203 		return (NULL);
204 	/*
205 	 * At this point skip is the offset into the mbuf m
206 	 * where the new header should be placed.  Figure out
207 	 * if there's space to insert the new header.  If so,
208 	 * and copying the remainder makese sense then do so.
209 	 * Otherwise insert a new mbuf in the chain, splitting
210 	 * the contents of m as needed.
211 	 */
212 	remain = m->m_len - skip;		/* data to move */
213 	if (hlen > M_TRAILINGSPACE(m)) {
214 		struct mbuf *n;
215 
216 		/* XXX code doesn't handle clusters XXX */
217 		IPSEC_ASSERT(remain < MLEN,
218 			("m_makespace: remainder too big: %u", remain));
219 		/*
220 		 * Not enough space in m, split the contents
221 		 * of m, inserting new mbufs as required.
222 		 *
223 		 * NB: this ignores mbuf types.
224 		 */
225 		MGET(n, M_DONTWAIT, MT_DATA);
226 		if (n == NULL)
227 			return (NULL);
228 		n->m_next = m->m_next;		/* splice new mbuf */
229 		m->m_next = n;
230 		newipsecstat.ips_mbinserted++;
231 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
232 			/*
233 			 * New header fits in the old mbuf if we copy
234 			 * the remainder; just do the copy to the new
235 			 * mbuf and we're good to go.
236 			 */
237 			memcpy(mtod(n, caddr_t),
238 			       mtod(m, caddr_t) + skip, remain);
239 			n->m_len = remain;
240 			m->m_len = skip + hlen;
241 			*off = skip;
242 		} else {
243 			/*
244 			 * No space in the old mbuf for the new header.
245 			 * Make space in the new mbuf and check the
246 			 * remainder'd data fits too.  If not then we
247 			 * must allocate an additional mbuf (yech).
248 			 */
249 			n->m_len = 0;
250 			if (remain + hlen > M_TRAILINGSPACE(n)) {
251 				struct mbuf *n2;
252 
253 				MGET(n2, M_DONTWAIT, MT_DATA);
254 				/* NB: new mbuf is on chain, let caller free */
255 				if (n2 == NULL)
256 					return (NULL);
257 				n2->m_len = 0;
258 				memcpy(mtod(n2, caddr_t),
259 				       mtod(m, caddr_t) + skip, remain);
260 				n2->m_len = remain;
261 				/* splice in second mbuf */
262 				n2->m_next = n->m_next;
263 				n->m_next = n2;
264 				newipsecstat.ips_mbinserted++;
265 			} else {
266 				memcpy(mtod(n, caddr_t) + hlen,
267 				       mtod(m, caddr_t) + skip, remain);
268 				n->m_len += remain;
269 			}
270 			m->m_len -= remain;
271 			n->m_len += hlen;
272 			m = n;			/* header is at front ... */
273 			*off = 0;		/* ... of new mbuf */
274 		}
275 	} else {
276 		/*
277 		 * Copy the remainder to the back of the mbuf
278 		 * so there's space to write the new header.
279 		 */
280 		/* XXX can this be memcpy? does it handle overlap? */
281 		ovbcopy(mtod(m, caddr_t) + skip,
282 			mtod(m, caddr_t) + skip + hlen, remain);
283 		m->m_len += hlen;
284 		*off = skip;
285 	}
286 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
287 	return m;
288 }
289 
290 /*
291  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
292  * length is updated, and a pointer to the first byte of the padding
293  * (which is guaranteed to be all in one mbuf) is returned.
294  */
295 caddr_t
296 m_pad(struct mbuf *m, int n)
297 {
298 	register struct mbuf *m0, *m1;
299 	register int len, pad;
300 	caddr_t retval;
301 
302 	if (n <= 0) {  /* No stupid arguments. */
303 		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
304 		m_freem(m);
305 		return NULL;
306 	}
307 
308 	len = m->m_pkthdr.len;
309 	pad = n;
310 	m0 = m;
311 
312 	while (m0->m_len < len) {
313 IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
314 		len -= m0->m_len;
315 		m0 = m0->m_next;
316 	}
317 
318 	if (m0->m_len != len) {
319 		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
320 		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
321 
322 		m_freem(m);
323 		return NULL;
324 	}
325 
326 	/* Check for zero-length trailing mbufs, and find the last one. */
327 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
328 		if (m1->m_next->m_len != 0) {
329 			DPRINTF(("m_pad: length mismatch (should be %d "
330 			    "instead of %d)\n",
331 			    m->m_pkthdr.len,
332 			    m->m_pkthdr.len + m1->m_next->m_len));
333 
334 			m_freem(m);
335 			return NULL;
336 		}
337 
338 		m0 = m1->m_next;
339 	}
340 
341 	if (pad > M_TRAILINGSPACE(m0)) {
342 		/* Add an mbuf to the chain. */
343 		MGET(m1, M_DONTWAIT, MT_DATA);
344 		if (m1 == 0) {
345 			m_freem(m0);
346 			DPRINTF(("m_pad: unable to get extra mbuf\n"));
347 			return NULL;
348 		}
349 
350 		m0->m_next = m1;
351 		m0 = m1;
352 		m0->m_len = 0;
353 	}
354 
355 	retval = m0->m_data + m0->m_len;
356 	m0->m_len += pad;
357 	m->m_pkthdr.len += pad;
358 
359 	return retval;
360 }
361 
362 /*
363  * Remove hlen data at offset skip in the packet.  This is used by
364  * the protocols strip protocol headers and associated data (e.g. IV,
365  * authenticator) on input.
366  */
367 int
368 m_striphdr(struct mbuf *m, int skip, int hlen)
369 {
370 	struct mbuf *m1;
371 	int roff;
372 
373 	/* Find beginning of header */
374 	m1 = m_getptr(m, skip, &roff);
375 	if (m1 == NULL)
376 		return (EINVAL);
377 
378 	/* Remove the header and associated data from the mbuf. */
379 	if (roff == 0) {
380 		/* The header was at the beginning of the mbuf */
381 		newipsecstat.ips_input_front++;
382 		m_adj(m1, hlen);
383 		if ((m1->m_flags & M_PKTHDR) == 0)
384 			m->m_pkthdr.len -= hlen;
385 	} else if (roff + hlen >= m1->m_len) {
386 		struct mbuf *mo;
387 
388 		/*
389 		 * Part or all of the header is at the end of this mbuf,
390 		 * so first let's remove the remainder of the header from
391 		 * the beginning of the remainder of the mbuf chain, if any.
392 		 */
393 		newipsecstat.ips_input_end++;
394 		if (roff + hlen > m1->m_len) {
395 			/* Adjust the next mbuf by the remainder */
396 			m_adj(m1->m_next, roff + hlen - m1->m_len);
397 
398 			/* The second mbuf is guaranteed not to have a pkthdr... */
399 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
400 		}
401 
402 		/* Now, let's unlink the mbuf chain for a second...*/
403 		mo = m1->m_next;
404 		m1->m_next = NULL;
405 
406 		/* ...and trim the end of the first part of the chain...sick */
407 		m_adj(m1, -(m1->m_len - roff));
408 		if ((m1->m_flags & M_PKTHDR) == 0)
409 			m->m_pkthdr.len -= (m1->m_len - roff);
410 
411 		/* Finally, let's relink */
412 		m1->m_next = mo;
413 	} else {
414 		/*
415 		 * The header lies in the "middle" of the mbuf; copy
416 		 * the remainder of the mbuf down over the header.
417 		 */
418 		newipsecstat.ips_input_middle++;
419 		ovbcopy(mtod(m1, u_char *) + roff + hlen,
420 		      mtod(m1, u_char *) + roff,
421 		      m1->m_len - (roff + hlen));
422 		m1->m_len -= hlen;
423 		m->m_pkthdr.len -= hlen;
424 	}
425 	return (0);
426 }
427 
428 /*
429  * Diagnostic routine to check mbuf alignment as required by the
430  * crypto device drivers (that use DMA).
431  */
432 void
433 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
434 {
435 	int roff;
436 	struct mbuf *m = m_getptr(m0, off, &roff);
437 	caddr_t addr;
438 
439 	if (m == NULL)
440 		return;
441 	printf("%s (off %u len %u): ", where, off, len);
442 	addr = mtod(m, caddr_t) + roff;
443 	do {
444 		int mlen;
445 
446 		if (((uintptr_t) addr) & 3) {
447 			printf("addr misaligned %p,", addr);
448 			break;
449 		}
450 		mlen = m->m_len;
451 		if (mlen > len)
452 			mlen = len;
453 		len -= mlen;
454 		if (len && (mlen & 3)) {
455 			printf("len mismatch %u,", mlen);
456 			break;
457 		}
458 		m = m->m_next;
459 		addr = m ? mtod(m, caddr_t) : NULL;
460 	} while (m && len > 0);
461 	for (m = m0; m; m = m->m_next)
462 		printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
463 	printf("\n");
464 }
465