1 /* $NetBSD: ipsec_mbuf.c,v 1.3 2003/08/20 22:33:40 jonathan Exp $ */ 2 /* $FreeBSD: src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.1 2003/01/24 05:11:35 sam Exp $ */ 3 4 #include <sys/cdefs.h> 5 __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.3 2003/08/20 22:33:40 jonathan Exp $"); 6 7 /* 8 * IPsec-specific mbuf routines. 9 */ 10 11 #ifdef __FreeBSD__ 12 #include "opt_param.h" 13 #endif 14 15 #include <sys/param.h> 16 #include <sys/systm.h> 17 #include <sys/mbuf.h> 18 #include <sys/socket.h> 19 20 #include <net/route.h> 21 #include <netinet/in.h> 22 23 #include <netipsec/ipsec.h> 24 25 #include <netipsec/ipsec_osdep.h> 26 #include <net/net_osdep.h> 27 28 extern struct mbuf *m_getptr(struct mbuf *, int, int *); 29 30 /* 31 * Create a writable copy of the mbuf chain. While doing this 32 * we compact the chain with a goal of producing a chain with 33 * at most two mbufs. The second mbuf in this chain is likely 34 * to be a cluster. The primary purpose of this work is to create 35 * a writable packet for encryption, compression, etc. The 36 * secondary goal is to linearize the data so the data can be 37 * passed to crypto hardware in the most efficient manner possible. 38 */ 39 struct mbuf * 40 m_clone(struct mbuf *m0) 41 { 42 struct mbuf *m, *mprev; 43 struct mbuf *n, *mfirst, *mlast; 44 int len, off; 45 46 IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf")); 47 48 mprev = NULL; 49 for (m = m0; m != NULL; m = mprev->m_next) { 50 /* 51 * Regular mbufs are ignored unless there's a cluster 52 * in front of it that we can use to coalesce. We do 53 * the latter mainly so later clusters can be coalesced 54 * also w/o having to handle them specially (i.e. convert 55 * mbuf+cluster -> cluster). This optimization is heavily 56 * influenced by the assumption that we're running over 57 * Ethernet where MCLBYTES is large enough that the max 58 * packet size will permit lots of coalescing into a 59 * single cluster. This in turn permits efficient 60 * crypto operations, especially when using hardware. 61 */ 62 if ((m->m_flags & M_EXT) == 0) { 63 if (mprev && (mprev->m_flags & M_EXT) && 64 m->m_len <= M_TRAILINGSPACE(mprev)) { 65 /* XXX: this ignores mbuf types */ 66 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 67 mtod(m, caddr_t), m->m_len); 68 mprev->m_len += m->m_len; 69 mprev->m_next = m->m_next; /* unlink from chain */ 70 m_free(m); /* reclaim mbuf */ 71 newipsecstat.ips_mbcoalesced++; 72 } else { 73 mprev = m; 74 } 75 continue; 76 } 77 /* 78 * Writable mbufs are left alone (for now). Note 79 * that for 4.x systems it's not possible to identify 80 * whether or not mbufs with external buffers are 81 * writable unless they use clusters. 82 */ 83 if (M_EXT_WRITABLE(m)) { 84 mprev = m; 85 continue; 86 } 87 88 /* 89 * Not writable, replace with a copy or coalesce with 90 * the previous mbuf if possible (since we have to copy 91 * it anyway, we try to reduce the number of mbufs and 92 * clusters so that future work is easier). 93 */ 94 IPSEC_ASSERT(m->m_flags & M_EXT, 95 ("m_clone: m_flags 0x%x", m->m_flags)); 96 /* NB: we only coalesce into a cluster or larger */ 97 if (mprev != NULL && (mprev->m_flags & M_EXT) && 98 m->m_len <= M_TRAILINGSPACE(mprev)) { 99 /* XXX: this ignores mbuf types */ 100 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 101 mtod(m, caddr_t), m->m_len); 102 mprev->m_len += m->m_len; 103 mprev->m_next = m->m_next; /* unlink from chain */ 104 m_free(m); /* reclaim mbuf */ 105 newipsecstat.ips_clcoalesced++; 106 continue; 107 } 108 109 /* 110 * Allocate new space to hold the copy... 111 */ 112 /* XXX why can M_PKTHDR be set past the first mbuf? */ 113 if (mprev == NULL && (m->m_flags & M_PKTHDR)) { 114 /* 115 * NB: if a packet header is present we must 116 * allocate the mbuf separately from any cluster 117 * because M_MOVE_PKTHDR will smash the data 118 * pointer and drop the M_EXT marker. 119 */ 120 MGETHDR(n, M_DONTWAIT, m->m_type); 121 if (n == NULL) { 122 m_freem(m0); 123 return (NULL); 124 } 125 M_MOVE_PKTHDR(n, m); 126 MCLGET(n, M_DONTWAIT); 127 if ((n->m_flags & M_EXT) == 0) { 128 m_free(n); 129 m_freem(m0); 130 return (NULL); 131 } 132 } else { 133 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 134 if (n == NULL) { 135 m_freem(m0); 136 return (NULL); 137 } 138 } 139 /* 140 * ... and copy the data. We deal with jumbo mbufs 141 * (i.e. m_len > MCLBYTES) by splitting them into 142 * clusters. We could just malloc a buffer and make 143 * it external but too many device drivers don't know 144 * how to break up the non-contiguous memory when 145 * doing DMA. 146 */ 147 len = m->m_len; 148 off = 0; 149 mfirst = n; 150 mlast = NULL; 151 for (;;) { 152 int cc = min(len, MCLBYTES); 153 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc); 154 n->m_len = cc; 155 if (mlast != NULL) 156 mlast->m_next = n; 157 mlast = n; 158 newipsecstat.ips_clcopied++; 159 160 len -= cc; 161 if (len <= 0) 162 break; 163 off += cc; 164 165 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 166 if (n == NULL) { 167 m_freem(mfirst); 168 m_freem(m0); 169 return (NULL); 170 } 171 } 172 n->m_next = m->m_next; 173 if (mprev == NULL) 174 m0 = mfirst; /* new head of chain */ 175 else 176 mprev->m_next = mfirst; /* replace old mbuf */ 177 m_free(m); /* release old mbuf */ 178 mprev = mfirst; 179 } 180 return (m0); 181 } 182 183 /* 184 * Make space for a new header of length hlen at skip bytes 185 * into the packet. When doing this we allocate new mbufs only 186 * when absolutely necessary. The mbuf where the new header 187 * is to go is returned together with an offset into the mbuf. 188 * If NULL is returned then the mbuf chain may have been modified; 189 * the caller is assumed to always free the chain. 190 */ 191 struct mbuf * 192 m_makespace(struct mbuf *m0, int skip, int hlen, int *off) 193 { 194 struct mbuf *m; 195 unsigned remain; 196 197 IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf")); 198 IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen)); 199 200 for (m = m0; m && skip > m->m_len; m = m->m_next) 201 skip -= m->m_len; 202 if (m == NULL) 203 return (NULL); 204 /* 205 * At this point skip is the offset into the mbuf m 206 * where the new header should be placed. Figure out 207 * if there's space to insert the new header. If so, 208 * and copying the remainder makese sense then do so. 209 * Otherwise insert a new mbuf in the chain, splitting 210 * the contents of m as needed. 211 */ 212 remain = m->m_len - skip; /* data to move */ 213 if (hlen > M_TRAILINGSPACE(m)) { 214 struct mbuf *n; 215 216 /* XXX code doesn't handle clusters XXX */ 217 IPSEC_ASSERT(remain < MLEN, 218 ("m_makespace: remainder too big: %u", remain)); 219 /* 220 * Not enough space in m, split the contents 221 * of m, inserting new mbufs as required. 222 * 223 * NB: this ignores mbuf types. 224 */ 225 MGET(n, M_DONTWAIT, MT_DATA); 226 if (n == NULL) 227 return (NULL); 228 n->m_next = m->m_next; /* splice new mbuf */ 229 m->m_next = n; 230 newipsecstat.ips_mbinserted++; 231 if (hlen <= M_TRAILINGSPACE(m) + remain) { 232 /* 233 * New header fits in the old mbuf if we copy 234 * the remainder; just do the copy to the new 235 * mbuf and we're good to go. 236 */ 237 memcpy(mtod(n, caddr_t), 238 mtod(m, caddr_t) + skip, remain); 239 n->m_len = remain; 240 m->m_len = skip + hlen; 241 *off = skip; 242 } else { 243 /* 244 * No space in the old mbuf for the new header. 245 * Make space in the new mbuf and check the 246 * remainder'd data fits too. If not then we 247 * must allocate an additional mbuf (yech). 248 */ 249 n->m_len = 0; 250 if (remain + hlen > M_TRAILINGSPACE(n)) { 251 struct mbuf *n2; 252 253 MGET(n2, M_DONTWAIT, MT_DATA); 254 /* NB: new mbuf is on chain, let caller free */ 255 if (n2 == NULL) 256 return (NULL); 257 n2->m_len = 0; 258 memcpy(mtod(n2, caddr_t), 259 mtod(m, caddr_t) + skip, remain); 260 n2->m_len = remain; 261 /* splice in second mbuf */ 262 n2->m_next = n->m_next; 263 n->m_next = n2; 264 newipsecstat.ips_mbinserted++; 265 } else { 266 memcpy(mtod(n, caddr_t) + hlen, 267 mtod(m, caddr_t) + skip, remain); 268 n->m_len += remain; 269 } 270 m->m_len -= remain; 271 n->m_len += hlen; 272 m = n; /* header is at front ... */ 273 *off = 0; /* ... of new mbuf */ 274 } 275 } else { 276 /* 277 * Copy the remainder to the back of the mbuf 278 * so there's space to write the new header. 279 */ 280 /* XXX can this be memcpy? does it handle overlap? */ 281 ovbcopy(mtod(m, caddr_t) + skip, 282 mtod(m, caddr_t) + skip + hlen, remain); 283 m->m_len += hlen; 284 *off = skip; 285 } 286 m0->m_pkthdr.len += hlen; /* adjust packet length */ 287 return m; 288 } 289 290 /* 291 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header 292 * length is updated, and a pointer to the first byte of the padding 293 * (which is guaranteed to be all in one mbuf) is returned. 294 */ 295 caddr_t 296 m_pad(struct mbuf *m, int n) 297 { 298 register struct mbuf *m0, *m1; 299 register int len, pad; 300 caddr_t retval; 301 302 if (n <= 0) { /* No stupid arguments. */ 303 DPRINTF(("m_pad: pad length invalid (%d)\n", n)); 304 m_freem(m); 305 return NULL; 306 } 307 308 len = m->m_pkthdr.len; 309 pad = n; 310 m0 = m; 311 312 while (m0->m_len < len) { 313 IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/ 314 len -= m0->m_len; 315 m0 = m0->m_next; 316 } 317 318 if (m0->m_len != len) { 319 DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n", 320 m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len)); 321 322 m_freem(m); 323 return NULL; 324 } 325 326 /* Check for zero-length trailing mbufs, and find the last one. */ 327 for (m1 = m0; m1->m_next; m1 = m1->m_next) { 328 if (m1->m_next->m_len != 0) { 329 DPRINTF(("m_pad: length mismatch (should be %d " 330 "instead of %d)\n", 331 m->m_pkthdr.len, 332 m->m_pkthdr.len + m1->m_next->m_len)); 333 334 m_freem(m); 335 return NULL; 336 } 337 338 m0 = m1->m_next; 339 } 340 341 if (pad > M_TRAILINGSPACE(m0)) { 342 /* Add an mbuf to the chain. */ 343 MGET(m1, M_DONTWAIT, MT_DATA); 344 if (m1 == 0) { 345 m_freem(m0); 346 DPRINTF(("m_pad: unable to get extra mbuf\n")); 347 return NULL; 348 } 349 350 m0->m_next = m1; 351 m0 = m1; 352 m0->m_len = 0; 353 } 354 355 retval = m0->m_data + m0->m_len; 356 m0->m_len += pad; 357 m->m_pkthdr.len += pad; 358 359 return retval; 360 } 361 362 /* 363 * Remove hlen data at offset skip in the packet. This is used by 364 * the protocols strip protocol headers and associated data (e.g. IV, 365 * authenticator) on input. 366 */ 367 int 368 m_striphdr(struct mbuf *m, int skip, int hlen) 369 { 370 struct mbuf *m1; 371 int roff; 372 373 /* Find beginning of header */ 374 m1 = m_getptr(m, skip, &roff); 375 if (m1 == NULL) 376 return (EINVAL); 377 378 /* Remove the header and associated data from the mbuf. */ 379 if (roff == 0) { 380 /* The header was at the beginning of the mbuf */ 381 newipsecstat.ips_input_front++; 382 m_adj(m1, hlen); 383 if ((m1->m_flags & M_PKTHDR) == 0) 384 m->m_pkthdr.len -= hlen; 385 } else if (roff + hlen >= m1->m_len) { 386 struct mbuf *mo; 387 388 /* 389 * Part or all of the header is at the end of this mbuf, 390 * so first let's remove the remainder of the header from 391 * the beginning of the remainder of the mbuf chain, if any. 392 */ 393 newipsecstat.ips_input_end++; 394 if (roff + hlen > m1->m_len) { 395 /* Adjust the next mbuf by the remainder */ 396 m_adj(m1->m_next, roff + hlen - m1->m_len); 397 398 /* The second mbuf is guaranteed not to have a pkthdr... */ 399 m->m_pkthdr.len -= (roff + hlen - m1->m_len); 400 } 401 402 /* Now, let's unlink the mbuf chain for a second...*/ 403 mo = m1->m_next; 404 m1->m_next = NULL; 405 406 /* ...and trim the end of the first part of the chain...sick */ 407 m_adj(m1, -(m1->m_len - roff)); 408 if ((m1->m_flags & M_PKTHDR) == 0) 409 m->m_pkthdr.len -= (m1->m_len - roff); 410 411 /* Finally, let's relink */ 412 m1->m_next = mo; 413 } else { 414 /* 415 * The header lies in the "middle" of the mbuf; copy 416 * the remainder of the mbuf down over the header. 417 */ 418 newipsecstat.ips_input_middle++; 419 ovbcopy(mtod(m1, u_char *) + roff + hlen, 420 mtod(m1, u_char *) + roff, 421 m1->m_len - (roff + hlen)); 422 m1->m_len -= hlen; 423 m->m_pkthdr.len -= hlen; 424 } 425 return (0); 426 } 427 428 /* 429 * Diagnostic routine to check mbuf alignment as required by the 430 * crypto device drivers (that use DMA). 431 */ 432 void 433 m_checkalignment(const char* where, struct mbuf *m0, int off, int len) 434 { 435 int roff; 436 struct mbuf *m = m_getptr(m0, off, &roff); 437 caddr_t addr; 438 439 if (m == NULL) 440 return; 441 printf("%s (off %u len %u): ", where, off, len); 442 addr = mtod(m, caddr_t) + roff; 443 do { 444 int mlen; 445 446 if (((uintptr_t) addr) & 3) { 447 printf("addr misaligned %p,", addr); 448 break; 449 } 450 mlen = m->m_len; 451 if (mlen > len) 452 mlen = len; 453 len -= mlen; 454 if (len && (mlen & 3)) { 455 printf("len mismatch %u,", mlen); 456 break; 457 } 458 m = m->m_next; 459 addr = m ? mtod(m, caddr_t) : NULL; 460 } while (m && len > 0); 461 for (m = m0; m; m = m->m_next) 462 printf(" [%p:%u]", mtod(m, caddr_t), m->m_len); 463 printf("\n"); 464 } 465