1 /* $NetBSD: uipc_mbuf.c,v 1.104 2005/12/26 18:45:27 perry Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1988, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.104 2005/12/26 18:45:27 perry Exp $"); 73 74 #include "opt_mbuftrace.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/malloc.h> 80 #define MBTYPES 81 #include <sys/mbuf.h> 82 #include <sys/kernel.h> 83 #include <sys/syslog.h> 84 #include <sys/domain.h> 85 #include <sys/protosw.h> 86 #include <sys/pool.h> 87 #include <sys/socket.h> 88 #include <sys/sysctl.h> 89 90 #include <net/if.h> 91 92 #include <uvm/uvm.h> 93 94 95 struct pool mbpool; /* mbuf pool */ 96 struct pool mclpool; /* mbuf cluster pool */ 97 98 struct pool_cache mbpool_cache; 99 struct pool_cache mclpool_cache; 100 101 struct mbstat mbstat; 102 int max_linkhdr; 103 int max_protohdr; 104 int max_hdr; 105 int max_datalen; 106 107 static int mb_ctor(void *, void *, int); 108 109 static void *mclpool_alloc(struct pool *, int); 110 static void mclpool_release(struct pool *, void *); 111 112 static struct pool_allocator mclpool_allocator = { 113 mclpool_alloc, mclpool_release, 0, 114 }; 115 116 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 117 static struct mbuf *m_split0(struct mbuf *, int, int, int); 118 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 119 120 /* flags for m_copyback0 */ 121 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 122 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 123 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 124 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 125 126 static const char mclpool_warnmsg[] = 127 "WARNING: mclpool limit reached; increase NMBCLUSTERS"; 128 129 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 130 131 #ifdef MBUFTRACE 132 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 133 struct mowner unknown_mowners[] = { 134 { "unknown", "free" }, 135 { "unknown", "data" }, 136 { "unknown", "header" }, 137 { "unknown", "soname" }, 138 { "unknown", "soopts" }, 139 { "unknown", "ftable" }, 140 { "unknown", "control" }, 141 { "unknown", "oobdata" }, 142 }; 143 struct mowner revoked_mowner = { "revoked", "" }; 144 #endif 145 146 /* 147 * Initialize the mbuf allocator. 148 */ 149 void 150 mbinit(void) 151 { 152 153 KASSERT(sizeof(struct _m_ext) <= MHLEN); 154 KASSERT(sizeof(struct mbuf) == MSIZE); 155 156 pool_init(&mbpool, msize, 0, 0, 0, "mbpl", NULL); 157 pool_init(&mclpool, mclbytes, 0, 0, 0, "mclpl", &mclpool_allocator); 158 159 pool_set_drain_hook(&mbpool, m_reclaim, NULL); 160 pool_set_drain_hook(&mclpool, m_reclaim, NULL); 161 162 pool_cache_init(&mbpool_cache, &mbpool, mb_ctor, NULL, NULL); 163 pool_cache_init(&mclpool_cache, &mclpool, NULL, NULL, NULL); 164 165 /* 166 * Set the hard limit on the mclpool to the number of 167 * mbuf clusters the kernel is to support. Log the limit 168 * reached message max once a minute. 169 */ 170 pool_sethardlimit(&mclpool, nmbclusters, mclpool_warnmsg, 60); 171 172 /* 173 * Set a low water mark for both mbufs and clusters. This should 174 * help ensure that they can be allocated in a memory starvation 175 * situation. This is important for e.g. diskless systems which 176 * must allocate mbufs in order for the pagedaemon to clean pages. 177 */ 178 pool_setlowat(&mbpool, mblowat); 179 pool_setlowat(&mclpool, mcllowat); 180 181 #ifdef MBUFTRACE 182 { 183 /* 184 * Attach the unknown mowners. 185 */ 186 int i; 187 MOWNER_ATTACH(&revoked_mowner); 188 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 189 i-- > 0; ) 190 MOWNER_ATTACH(&unknown_mowners[i]); 191 } 192 #endif 193 } 194 195 /* 196 * sysctl helper routine for the kern.mbuf subtree. nmbclusters may 197 * or may not be writable, and mblowat and mcllowat need range 198 * checking and pool tweaking after being reset. 199 */ 200 static int 201 sysctl_kern_mbuf(SYSCTLFN_ARGS) 202 { 203 int error, newval; 204 struct sysctlnode node; 205 206 node = *rnode; 207 node.sysctl_data = &newval; 208 switch (rnode->sysctl_num) { 209 case MBUF_NMBCLUSTERS: 210 if (mb_map != NULL) { 211 node.sysctl_flags &= ~CTLFLAG_READWRITE; 212 node.sysctl_flags |= CTLFLAG_READONLY; 213 } 214 /* FALLTHROUGH */ 215 case MBUF_MBLOWAT: 216 case MBUF_MCLLOWAT: 217 newval = *(int*)rnode->sysctl_data; 218 break; 219 default: 220 return (EOPNOTSUPP); 221 } 222 223 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 224 if (error || newp == NULL) 225 return (error); 226 if (newval < 0) 227 return (EINVAL); 228 229 switch (node.sysctl_num) { 230 case MBUF_NMBCLUSTERS: 231 if (newval < nmbclusters) 232 return (EINVAL); 233 nmbclusters = newval; 234 pool_sethardlimit(&mclpool, nmbclusters, mclpool_warnmsg, 60); 235 break; 236 case MBUF_MBLOWAT: 237 mblowat = newval; 238 pool_setlowat(&mbpool, mblowat); 239 break; 240 case MBUF_MCLLOWAT: 241 mcllowat = newval; 242 pool_setlowat(&mclpool, mcllowat); 243 break; 244 } 245 246 return (0); 247 } 248 249 #ifdef MBUFTRACE 250 static int 251 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 252 { 253 struct mowner *mo; 254 size_t len = 0; 255 int error = 0; 256 257 if (namelen != 0) 258 return (EINVAL); 259 if (newp != NULL) 260 return (EPERM); 261 262 LIST_FOREACH(mo, &mowners, mo_link) { 263 if (oldp != NULL) { 264 if (*oldlenp - len < sizeof(*mo)) { 265 error = ENOMEM; 266 break; 267 } 268 error = copyout(mo, (caddr_t) oldp + len, 269 sizeof(*mo)); 270 if (error) 271 break; 272 } 273 len += sizeof(*mo); 274 } 275 276 if (error == 0) 277 *oldlenp = len; 278 279 return (error); 280 } 281 #endif /* MBUFTRACE */ 282 283 SYSCTL_SETUP(sysctl_kern_mbuf_setup, "sysctl kern.mbuf subtree setup") 284 { 285 286 sysctl_createv(clog, 0, NULL, NULL, 287 CTLFLAG_PERMANENT, 288 CTLTYPE_NODE, "kern", NULL, 289 NULL, 0, NULL, 0, 290 CTL_KERN, CTL_EOL); 291 sysctl_createv(clog, 0, NULL, NULL, 292 CTLFLAG_PERMANENT, 293 CTLTYPE_NODE, "mbuf", 294 SYSCTL_DESCR("mbuf control variables"), 295 NULL, 0, NULL, 0, 296 CTL_KERN, KERN_MBUF, CTL_EOL); 297 298 sysctl_createv(clog, 0, NULL, NULL, 299 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 300 CTLTYPE_INT, "msize", 301 SYSCTL_DESCR("mbuf base size"), 302 NULL, msize, NULL, 0, 303 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 304 sysctl_createv(clog, 0, NULL, NULL, 305 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 306 CTLTYPE_INT, "mclbytes", 307 SYSCTL_DESCR("mbuf cluster size"), 308 NULL, mclbytes, NULL, 0, 309 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 310 sysctl_createv(clog, 0, NULL, NULL, 311 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 312 CTLTYPE_INT, "nmbclusters", 313 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 314 sysctl_kern_mbuf, 0, &nmbclusters, 0, 315 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 316 sysctl_createv(clog, 0, NULL, NULL, 317 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 318 CTLTYPE_INT, "mblowat", 319 SYSCTL_DESCR("mbuf low water mark"), 320 sysctl_kern_mbuf, 0, &mblowat, 0, 321 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 322 sysctl_createv(clog, 0, NULL, NULL, 323 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 324 CTLTYPE_INT, "mcllowat", 325 SYSCTL_DESCR("mbuf cluster low water mark"), 326 sysctl_kern_mbuf, 0, &mcllowat, 0, 327 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 328 sysctl_createv(clog, 0, NULL, NULL, 329 CTLFLAG_PERMANENT, 330 CTLTYPE_STRUCT, "stats", 331 SYSCTL_DESCR("mbuf allocation statistics"), 332 NULL, 0, &mbstat, sizeof(mbstat), 333 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 334 #ifdef MBUFTRACE 335 sysctl_createv(clog, 0, NULL, NULL, 336 CTLFLAG_PERMANENT, 337 CTLTYPE_STRUCT, "mowners", 338 SYSCTL_DESCR("Information about mbuf owners"), 339 sysctl_kern_mbuf_mowners, 0, NULL, 0, 340 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 341 #endif /* MBUFTRACE */ 342 } 343 344 static void * 345 mclpool_alloc(struct pool *pp, int flags) 346 { 347 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 348 349 return ((void *)uvm_km_alloc_poolpage(mb_map, waitok)); 350 } 351 352 static void 353 mclpool_release(struct pool *pp, void *v) 354 { 355 356 uvm_km_free_poolpage(mb_map, (vaddr_t)v); 357 } 358 359 /*ARGSUSED*/ 360 static int 361 mb_ctor(void *arg, void *object, int flags) 362 { 363 struct mbuf *m = object; 364 365 #ifdef POOL_VTOPHYS 366 m->m_paddr = POOL_VTOPHYS(m); 367 #else 368 m->m_paddr = M_PADDR_INVALID; 369 #endif 370 return (0); 371 } 372 373 void 374 m_reclaim(void *arg, int flags) 375 { 376 struct domain *dp; 377 const struct protosw *pr; 378 struct ifnet *ifp; 379 int s = splvm(); 380 381 DOMAIN_FOREACH(dp) { 382 for (pr = dp->dom_protosw; 383 pr < dp->dom_protoswNPROTOSW; pr++) 384 if (pr->pr_drain) 385 (*pr->pr_drain)(); 386 } 387 IFNET_FOREACH(ifp) { 388 if (ifp->if_drain) 389 (*ifp->if_drain)(ifp); 390 } 391 splx(s); 392 mbstat.m_drain++; 393 } 394 395 /* 396 * Space allocation routines. 397 * These are also available as macros 398 * for critical paths. 399 */ 400 struct mbuf * 401 m_get(int nowait, int type) 402 { 403 struct mbuf *m; 404 405 MGET(m, nowait, type); 406 return (m); 407 } 408 409 struct mbuf * 410 m_gethdr(int nowait, int type) 411 { 412 struct mbuf *m; 413 414 MGETHDR(m, nowait, type); 415 return (m); 416 } 417 418 struct mbuf * 419 m_getclr(int nowait, int type) 420 { 421 struct mbuf *m; 422 423 MGET(m, nowait, type); 424 if (m == 0) 425 return (NULL); 426 memset(mtod(m, caddr_t), 0, MLEN); 427 return (m); 428 } 429 430 void 431 m_clget(struct mbuf *m, int nowait) 432 { 433 434 MCLGET(m, nowait); 435 } 436 437 struct mbuf * 438 m_free(struct mbuf *m) 439 { 440 struct mbuf *n; 441 442 MFREE(m, n); 443 return (n); 444 } 445 446 void 447 m_freem(struct mbuf *m) 448 { 449 struct mbuf *n; 450 451 if (m == NULL) 452 return; 453 do { 454 MFREE(m, n); 455 m = n; 456 } while (m); 457 } 458 459 #ifdef MBUFTRACE 460 /* 461 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 462 */ 463 void 464 m_claimm(struct mbuf *m, struct mowner *mo) 465 { 466 467 for (; m != NULL; m = m->m_next) 468 MCLAIM(m, mo); 469 } 470 #endif 471 472 /* 473 * Mbuffer utility routines. 474 */ 475 476 /* 477 * Lesser-used path for M_PREPEND: 478 * allocate new mbuf to prepend to chain, 479 * copy junk along. 480 */ 481 struct mbuf * 482 m_prepend(struct mbuf *m, int len, int how) 483 { 484 struct mbuf *mn; 485 486 MGET(mn, how, m->m_type); 487 if (mn == (struct mbuf *)NULL) { 488 m_freem(m); 489 return ((struct mbuf *)NULL); 490 } 491 if (m->m_flags & M_PKTHDR) { 492 M_MOVE_PKTHDR(mn, m); 493 } else { 494 MCLAIM(mn, m->m_owner); 495 } 496 mn->m_next = m; 497 m = mn; 498 if (len < MHLEN) 499 MH_ALIGN(m, len); 500 m->m_len = len; 501 return (m); 502 } 503 504 /* 505 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 506 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 507 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 508 */ 509 int MCFail; 510 511 struct mbuf * 512 m_copym(struct mbuf *m, int off0, int len, int wait) 513 { 514 515 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 516 } 517 518 struct mbuf * 519 m_dup(struct mbuf *m, int off0, int len, int wait) 520 { 521 522 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 523 } 524 525 static struct mbuf * 526 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 527 { 528 struct mbuf *n, **np; 529 int off = off0; 530 struct mbuf *top; 531 int copyhdr = 0; 532 533 if (off < 0 || len < 0) 534 panic("m_copym: off %d, len %d", off, len); 535 if (off == 0 && m->m_flags & M_PKTHDR) 536 copyhdr = 1; 537 while (off > 0) { 538 if (m == 0) 539 panic("m_copym: m == 0, off %d", off); 540 if (off < m->m_len) 541 break; 542 off -= m->m_len; 543 m = m->m_next; 544 } 545 np = ⊤ 546 top = 0; 547 while (len > 0) { 548 if (m == 0) { 549 if (len != M_COPYALL) 550 panic("m_copym: m == 0, len %d [!COPYALL]", 551 len); 552 break; 553 } 554 MGET(n, wait, m->m_type); 555 *np = n; 556 if (n == 0) 557 goto nospace; 558 MCLAIM(n, m->m_owner); 559 if (copyhdr) { 560 M_COPY_PKTHDR(n, m); 561 if (len == M_COPYALL) 562 n->m_pkthdr.len -= off0; 563 else 564 n->m_pkthdr.len = len; 565 copyhdr = 0; 566 } 567 n->m_len = min(len, m->m_len - off); 568 if (m->m_flags & M_EXT) { 569 if (!deep) { 570 n->m_data = m->m_data + off; 571 n->m_ext = m->m_ext; 572 MCLADDREFERENCE(m, n); 573 } else { 574 /* 575 * we are unsure about the way m was allocated. 576 * copy into multiple MCLBYTES cluster mbufs. 577 */ 578 MCLGET(n, wait); 579 n->m_len = 0; 580 n->m_len = M_TRAILINGSPACE(n); 581 n->m_len = min(n->m_len, len); 582 n->m_len = min(n->m_len, m->m_len - off); 583 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, 584 (unsigned)n->m_len); 585 } 586 } else 587 memcpy(mtod(n, caddr_t), mtod(m, caddr_t)+off, 588 (unsigned)n->m_len); 589 if (len != M_COPYALL) 590 len -= n->m_len; 591 off += n->m_len; 592 #ifdef DIAGNOSTIC 593 if (off > m->m_len) 594 panic("m_copym0 overrun"); 595 #endif 596 if (off == m->m_len) { 597 m = m->m_next; 598 off = 0; 599 } 600 np = &n->m_next; 601 } 602 if (top == 0) 603 MCFail++; 604 return (top); 605 nospace: 606 m_freem(top); 607 MCFail++; 608 return (NULL); 609 } 610 611 /* 612 * Copy an entire packet, including header (which must be present). 613 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 614 */ 615 struct mbuf * 616 m_copypacket(struct mbuf *m, int how) 617 { 618 struct mbuf *top, *n, *o; 619 620 MGET(n, how, m->m_type); 621 top = n; 622 if (!n) 623 goto nospace; 624 625 MCLAIM(n, m->m_owner); 626 M_COPY_PKTHDR(n, m); 627 n->m_len = m->m_len; 628 if (m->m_flags & M_EXT) { 629 n->m_data = m->m_data; 630 n->m_ext = m->m_ext; 631 MCLADDREFERENCE(m, n); 632 } else { 633 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 634 } 635 636 m = m->m_next; 637 while (m) { 638 MGET(o, how, m->m_type); 639 if (!o) 640 goto nospace; 641 642 MCLAIM(o, m->m_owner); 643 n->m_next = o; 644 n = n->m_next; 645 646 n->m_len = m->m_len; 647 if (m->m_flags & M_EXT) { 648 n->m_data = m->m_data; 649 n->m_ext = m->m_ext; 650 MCLADDREFERENCE(m, n); 651 } else { 652 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 653 } 654 655 m = m->m_next; 656 } 657 return top; 658 nospace: 659 m_freem(top); 660 MCFail++; 661 return NULL; 662 } 663 664 /* 665 * Copy data from an mbuf chain starting "off" bytes from the beginning, 666 * continuing for "len" bytes, into the indicated buffer. 667 */ 668 void 669 m_copydata(struct mbuf *m, int off, int len, void *vp) 670 { 671 unsigned count; 672 caddr_t cp = vp; 673 674 if (off < 0 || len < 0) 675 panic("m_copydata: off %d, len %d", off, len); 676 while (off > 0) { 677 if (m == NULL) 678 panic("m_copydata: m == NULL, off %d", off); 679 if (off < m->m_len) 680 break; 681 off -= m->m_len; 682 m = m->m_next; 683 } 684 while (len > 0) { 685 if (m == NULL) 686 panic("m_copydata: m == NULL, len %d", len); 687 count = min(m->m_len - off, len); 688 memcpy(cp, mtod(m, caddr_t) + off, count); 689 len -= count; 690 cp += count; 691 off = 0; 692 m = m->m_next; 693 } 694 } 695 696 /* 697 * Concatenate mbuf chain n to m. 698 * n might be copied into m (when n->m_len is small), therefore data portion of 699 * n could be copied into an mbuf of different mbuf type. 700 * Any m_pkthdr is not updated. 701 */ 702 void 703 m_cat(struct mbuf *m, struct mbuf *n) 704 { 705 706 while (m->m_next) 707 m = m->m_next; 708 while (n) { 709 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 710 /* just join the two chains */ 711 m->m_next = n; 712 return; 713 } 714 /* splat the data from one into the other */ 715 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), 716 (u_int)n->m_len); 717 m->m_len += n->m_len; 718 n = m_free(n); 719 } 720 } 721 722 void 723 m_adj(struct mbuf *mp, int req_len) 724 { 725 int len = req_len; 726 struct mbuf *m; 727 int count; 728 729 if ((m = mp) == NULL) 730 return; 731 if (len >= 0) { 732 /* 733 * Trim from head. 734 */ 735 while (m != NULL && len > 0) { 736 if (m->m_len <= len) { 737 len -= m->m_len; 738 m->m_len = 0; 739 m = m->m_next; 740 } else { 741 m->m_len -= len; 742 m->m_data += len; 743 len = 0; 744 } 745 } 746 m = mp; 747 if (mp->m_flags & M_PKTHDR) 748 m->m_pkthdr.len -= (req_len - len); 749 } else { 750 /* 751 * Trim from tail. Scan the mbuf chain, 752 * calculating its length and finding the last mbuf. 753 * If the adjustment only affects this mbuf, then just 754 * adjust and return. Otherwise, rescan and truncate 755 * after the remaining size. 756 */ 757 len = -len; 758 count = 0; 759 for (;;) { 760 count += m->m_len; 761 if (m->m_next == (struct mbuf *)0) 762 break; 763 m = m->m_next; 764 } 765 if (m->m_len >= len) { 766 m->m_len -= len; 767 if (mp->m_flags & M_PKTHDR) 768 mp->m_pkthdr.len -= len; 769 return; 770 } 771 count -= len; 772 if (count < 0) 773 count = 0; 774 /* 775 * Correct length for chain is "count". 776 * Find the mbuf with last data, adjust its length, 777 * and toss data from remaining mbufs on chain. 778 */ 779 m = mp; 780 if (m->m_flags & M_PKTHDR) 781 m->m_pkthdr.len = count; 782 for (; m; m = m->m_next) { 783 if (m->m_len >= count) { 784 m->m_len = count; 785 break; 786 } 787 count -= m->m_len; 788 } 789 while (m->m_next) 790 (m = m->m_next) ->m_len = 0; 791 } 792 } 793 794 /* 795 * Rearrange an mbuf chain so that len bytes are contiguous 796 * and in the data area of an mbuf (so that mtod and dtom 797 * will work for a structure of size len). Returns the resulting 798 * mbuf chain on success, frees it and returns null on failure. 799 * If there is room, it will add up to max_protohdr-len extra bytes to the 800 * contiguous region in an attempt to avoid being called next time. 801 */ 802 int MPFail; 803 804 struct mbuf * 805 m_pullup(struct mbuf *n, int len) 806 { 807 struct mbuf *m; 808 int count; 809 int space; 810 811 /* 812 * If first mbuf has no cluster, and has room for len bytes 813 * without shifting current data, pullup into it, 814 * otherwise allocate a new mbuf to prepend to the chain. 815 */ 816 if ((n->m_flags & M_EXT) == 0 && 817 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 818 if (n->m_len >= len) 819 return (n); 820 m = n; 821 n = n->m_next; 822 len -= m->m_len; 823 } else { 824 if (len > MHLEN) 825 goto bad; 826 MGET(m, M_DONTWAIT, n->m_type); 827 if (m == 0) 828 goto bad; 829 MCLAIM(m, n->m_owner); 830 m->m_len = 0; 831 if (n->m_flags & M_PKTHDR) { 832 M_MOVE_PKTHDR(m, n); 833 } 834 } 835 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 836 do { 837 count = min(min(max(len, max_protohdr), space), n->m_len); 838 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), 839 (unsigned)count); 840 len -= count; 841 m->m_len += count; 842 n->m_len -= count; 843 space -= count; 844 if (n->m_len) 845 n->m_data += count; 846 else 847 n = m_free(n); 848 } while (len > 0 && n); 849 if (len > 0) { 850 (void) m_free(m); 851 goto bad; 852 } 853 m->m_next = n; 854 return (m); 855 bad: 856 m_freem(n); 857 MPFail++; 858 return (NULL); 859 } 860 861 /* 862 * Like m_pullup(), except a new mbuf is always allocated, and we allow 863 * the amount of empty space before the data in the new mbuf to be specified 864 * (in the event that the caller expects to prepend later). 865 */ 866 int MSFail; 867 868 struct mbuf * 869 m_copyup(struct mbuf *n, int len, int dstoff) 870 { 871 struct mbuf *m; 872 int count, space; 873 874 if (len > (MHLEN - dstoff)) 875 goto bad; 876 MGET(m, M_DONTWAIT, n->m_type); 877 if (m == NULL) 878 goto bad; 879 MCLAIM(m, n->m_owner); 880 m->m_len = 0; 881 if (n->m_flags & M_PKTHDR) { 882 M_MOVE_PKTHDR(m, n); 883 } 884 m->m_data += dstoff; 885 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 886 do { 887 count = min(min(max(len, max_protohdr), space), n->m_len); 888 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), 889 (unsigned)count); 890 len -= count; 891 m->m_len += count; 892 n->m_len -= count; 893 space -= count; 894 if (n->m_len) 895 n->m_data += count; 896 else 897 n = m_free(n); 898 } while (len > 0 && n); 899 if (len > 0) { 900 (void) m_free(m); 901 goto bad; 902 } 903 m->m_next = n; 904 return (m); 905 bad: 906 m_freem(n); 907 MSFail++; 908 return (NULL); 909 } 910 911 /* 912 * Partition an mbuf chain in two pieces, returning the tail -- 913 * all but the first len0 bytes. In case of failure, it returns NULL and 914 * attempts to restore the chain to its original state. 915 */ 916 struct mbuf * 917 m_split(struct mbuf *m0, int len0, int wait) 918 { 919 920 return m_split0(m0, len0, wait, 1); 921 } 922 923 static struct mbuf * 924 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 925 { 926 struct mbuf *m, *n; 927 unsigned len = len0, remain, len_save; 928 929 for (m = m0; m && len > m->m_len; m = m->m_next) 930 len -= m->m_len; 931 if (m == 0) 932 return (NULL); 933 remain = m->m_len - len; 934 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 935 MGETHDR(n, wait, m0->m_type); 936 if (n == 0) 937 return (NULL); 938 MCLAIM(m, m0->m_owner); 939 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 940 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 941 len_save = m0->m_pkthdr.len; 942 m0->m_pkthdr.len = len0; 943 if (m->m_flags & M_EXT) 944 goto extpacket; 945 if (remain > MHLEN) { 946 /* m can't be the lead packet */ 947 MH_ALIGN(n, 0); 948 n->m_next = m_split(m, len, wait); 949 if (n->m_next == 0) { 950 (void) m_free(n); 951 m0->m_pkthdr.len = len_save; 952 return (NULL); 953 } else 954 return (n); 955 } else 956 MH_ALIGN(n, remain); 957 } else if (remain == 0) { 958 n = m->m_next; 959 m->m_next = 0; 960 return (n); 961 } else { 962 MGET(n, wait, m->m_type); 963 if (n == 0) 964 return (NULL); 965 MCLAIM(n, m->m_owner); 966 M_ALIGN(n, remain); 967 } 968 extpacket: 969 if (m->m_flags & M_EXT) { 970 n->m_ext = m->m_ext; 971 MCLADDREFERENCE(m, n); 972 n->m_data = m->m_data + len; 973 } else { 974 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain); 975 } 976 n->m_len = remain; 977 m->m_len = len; 978 n->m_next = m->m_next; 979 m->m_next = 0; 980 return (n); 981 } 982 /* 983 * Routine to copy from device local memory into mbufs. 984 */ 985 struct mbuf * 986 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 987 void (*copy)(const void *from, void *to, size_t len)) 988 { 989 struct mbuf *m; 990 struct mbuf *top = 0, **mp = ⊤ 991 int off = off0, len; 992 char *cp; 993 char *epkt; 994 995 cp = buf; 996 epkt = cp + totlen; 997 if (off) { 998 /* 999 * If 'off' is non-zero, packet is trailer-encapsulated, 1000 * so we have to skip the type and length fields. 1001 */ 1002 cp += off + 2 * sizeof(uint16_t); 1003 totlen -= 2 * sizeof(uint16_t); 1004 } 1005 MGETHDR(m, M_DONTWAIT, MT_DATA); 1006 if (m == 0) 1007 return (NULL); 1008 m->m_pkthdr.rcvif = ifp; 1009 m->m_pkthdr.len = totlen; 1010 m->m_len = MHLEN; 1011 1012 while (totlen > 0) { 1013 if (top) { 1014 MGET(m, M_DONTWAIT, MT_DATA); 1015 if (m == 0) { 1016 m_freem(top); 1017 return (NULL); 1018 } 1019 m->m_len = MLEN; 1020 } 1021 len = min(totlen, epkt - cp); 1022 if (len >= MINCLSIZE) { 1023 MCLGET(m, M_DONTWAIT); 1024 if ((m->m_flags & M_EXT) == 0) { 1025 m_free(m); 1026 m_freem(top); 1027 return (NULL); 1028 } 1029 m->m_len = len = min(len, MCLBYTES); 1030 } else { 1031 /* 1032 * Place initial small packet/header at end of mbuf. 1033 */ 1034 if (len < m->m_len) { 1035 if (top == 0 && len + max_linkhdr <= m->m_len) 1036 m->m_data += max_linkhdr; 1037 m->m_len = len; 1038 } else 1039 len = m->m_len; 1040 } 1041 if (copy) 1042 copy(cp, mtod(m, caddr_t), (size_t)len); 1043 else 1044 memcpy(mtod(m, caddr_t), cp, (size_t)len); 1045 cp += len; 1046 *mp = m; 1047 mp = &m->m_next; 1048 totlen -= len; 1049 if (cp == epkt) 1050 cp = buf; 1051 } 1052 return (top); 1053 } 1054 1055 /* 1056 * Copy data from a buffer back into the indicated mbuf chain, 1057 * starting "off" bytes from the beginning, extending the mbuf 1058 * chain if necessary. 1059 */ 1060 void 1061 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1062 { 1063 #if defined(DEBUG) 1064 struct mbuf *origm = m0; 1065 int error; 1066 #endif /* defined(DEBUG) */ 1067 1068 if (m0 == NULL) 1069 return; 1070 1071 #if defined(DEBUG) 1072 error = 1073 #endif /* defined(DEBUG) */ 1074 m_copyback0(&m0, off, len, cp, 1075 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1076 1077 #if defined(DEBUG) 1078 if (error != 0 || (m0 != NULL && origm != m0)) 1079 panic("m_copyback"); 1080 #endif /* defined(DEBUG) */ 1081 } 1082 1083 struct mbuf * 1084 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1085 { 1086 int error; 1087 1088 /* don't support chain expansion */ 1089 KDASSERT(off + len <= m_length(m0)); 1090 1091 error = m_copyback0(&m0, off, len, cp, 1092 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1093 if (error) { 1094 /* 1095 * no way to recover from partial success. 1096 * just free the chain. 1097 */ 1098 m_freem(m0); 1099 return NULL; 1100 } 1101 return m0; 1102 } 1103 1104 /* 1105 * m_makewritable: ensure the specified range writable. 1106 */ 1107 int 1108 m_makewritable(struct mbuf **mp, int off, int len, int how) 1109 { 1110 int error; 1111 #if defined(DEBUG) 1112 struct mbuf *n; 1113 int origlen, reslen; 1114 1115 origlen = m_length(*mp); 1116 #endif /* defined(DEBUG) */ 1117 1118 #if 0 /* M_COPYALL is large enough */ 1119 if (len == M_COPYALL) 1120 len = m_length(*mp) - off; /* XXX */ 1121 #endif 1122 1123 error = m_copyback0(mp, off, len, NULL, 1124 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1125 1126 #if defined(DEBUG) 1127 reslen = 0; 1128 for (n = *mp; n; n = n->m_next) 1129 reslen += n->m_len; 1130 if (origlen != reslen) 1131 panic("m_makewritable: length changed"); 1132 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1133 panic("m_makewritable: inconsist"); 1134 #endif /* defined(DEBUG) */ 1135 1136 return error; 1137 } 1138 1139 int 1140 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1141 int how) 1142 { 1143 int mlen; 1144 struct mbuf *m, *n; 1145 struct mbuf **mp; 1146 int totlen = 0; 1147 const char *cp = vp; 1148 1149 KASSERT(mp0 != NULL); 1150 KASSERT(*mp0 != NULL); 1151 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1152 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1153 1154 mp = mp0; 1155 m = *mp; 1156 while (off > (mlen = m->m_len)) { 1157 off -= mlen; 1158 totlen += mlen; 1159 if (m->m_next == 0) { 1160 if ((flags & M_COPYBACK0_EXTEND) == 0) 1161 goto out; 1162 n = m_getclr(how, m->m_type); 1163 if (n == 0) 1164 goto out; 1165 n->m_len = min(MLEN, len + off); 1166 m->m_next = n; 1167 } 1168 mp = &m->m_next; 1169 m = m->m_next; 1170 } 1171 while (len > 0) { 1172 mlen = m->m_len - off; 1173 if (mlen != 0 && M_READONLY(m)) { 1174 char *datap; 1175 int eatlen; 1176 1177 /* 1178 * this mbuf is read-only. 1179 * allocate a new writable mbuf and try again. 1180 */ 1181 1182 #if defined(DIAGNOSTIC) 1183 if ((flags & M_COPYBACK0_COW) == 0) 1184 panic("m_copyback0: read-only"); 1185 #endif /* defined(DIAGNOSTIC) */ 1186 1187 /* 1188 * if we're going to write into the middle of 1189 * a mbuf, split it first. 1190 */ 1191 if (off > 0 && len < mlen) { 1192 n = m_split0(m, off, how, 0); 1193 if (n == NULL) 1194 goto enobufs; 1195 m->m_next = n; 1196 mp = &m->m_next; 1197 m = n; 1198 off = 0; 1199 continue; 1200 } 1201 1202 /* 1203 * XXX TODO coalesce into the trailingspace of 1204 * the previous mbuf when possible. 1205 */ 1206 1207 /* 1208 * allocate a new mbuf. copy packet header if needed. 1209 */ 1210 MGET(n, how, m->m_type); 1211 if (n == NULL) 1212 goto enobufs; 1213 MCLAIM(n, m->m_owner); 1214 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1215 M_MOVE_PKTHDR(n, m); 1216 n->m_len = MHLEN; 1217 } else { 1218 if (len >= MINCLSIZE) 1219 MCLGET(n, M_DONTWAIT); 1220 n->m_len = 1221 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1222 } 1223 if (n->m_len > len) 1224 n->m_len = len; 1225 1226 /* 1227 * free the region which has been overwritten. 1228 * copying data from old mbufs if requested. 1229 */ 1230 if (flags & M_COPYBACK0_PRESERVE) 1231 datap = mtod(n, char *); 1232 else 1233 datap = NULL; 1234 eatlen = n->m_len; 1235 KDASSERT(off == 0 || eatlen >= mlen); 1236 if (off > 0) { 1237 KDASSERT(len >= mlen); 1238 m->m_len = off; 1239 m->m_next = n; 1240 if (datap) { 1241 m_copydata(m, off, mlen, datap); 1242 datap += mlen; 1243 } 1244 eatlen -= mlen; 1245 mp = &m->m_next; 1246 m = m->m_next; 1247 } 1248 while (m != NULL && M_READONLY(m) && 1249 n->m_type == m->m_type && eatlen > 0) { 1250 mlen = min(eatlen, m->m_len); 1251 if (datap) { 1252 m_copydata(m, 0, mlen, datap); 1253 datap += mlen; 1254 } 1255 m->m_data += mlen; 1256 m->m_len -= mlen; 1257 eatlen -= mlen; 1258 if (m->m_len == 0) 1259 *mp = m = m_free(m); 1260 } 1261 if (eatlen > 0) 1262 n->m_len -= eatlen; 1263 n->m_next = m; 1264 *mp = m = n; 1265 continue; 1266 } 1267 mlen = min(mlen, len); 1268 if (flags & M_COPYBACK0_COPYBACK) { 1269 memcpy(mtod(m, caddr_t) + off, cp, (unsigned)mlen); 1270 cp += mlen; 1271 } 1272 len -= mlen; 1273 mlen += off; 1274 off = 0; 1275 totlen += mlen; 1276 if (len == 0) 1277 break; 1278 if (m->m_next == 0) { 1279 if ((flags & M_COPYBACK0_EXTEND) == 0) 1280 goto out; 1281 n = m_get(how, m->m_type); 1282 if (n == 0) 1283 break; 1284 n->m_len = min(MLEN, len); 1285 m->m_next = n; 1286 } 1287 mp = &m->m_next; 1288 m = m->m_next; 1289 } 1290 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1291 m->m_pkthdr.len = totlen; 1292 1293 return 0; 1294 1295 enobufs: 1296 return ENOBUFS; 1297 } 1298 1299 void 1300 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1301 { 1302 1303 KASSERT((to->m_flags & M_EXT) == 0); 1304 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL); 1305 KASSERT((from->m_flags & M_PKTHDR) != 0); 1306 1307 to->m_pkthdr = from->m_pkthdr; 1308 to->m_flags = from->m_flags & M_COPYFLAGS; 1309 to->m_data = to->m_pktdat; 1310 1311 from->m_flags &= ~M_PKTHDR; 1312 } 1313 1314 /* 1315 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1316 * beginning, continuing for "len" bytes. 1317 */ 1318 int 1319 m_apply(struct mbuf *m, int off, int len, 1320 int (*f)(void *, caddr_t, unsigned int), void *arg) 1321 { 1322 unsigned int count; 1323 int rval; 1324 1325 KASSERT(len >= 0); 1326 KASSERT(off >= 0); 1327 1328 while (off > 0) { 1329 KASSERT(m != NULL); 1330 if (off < m->m_len) 1331 break; 1332 off -= m->m_len; 1333 m = m->m_next; 1334 } 1335 while (len > 0) { 1336 KASSERT(m != NULL); 1337 count = min(m->m_len - off, len); 1338 1339 rval = (*f)(arg, mtod(m, caddr_t) + off, count); 1340 if (rval) 1341 return (rval); 1342 1343 len -= count; 1344 off = 0; 1345 m = m->m_next; 1346 } 1347 1348 return (0); 1349 } 1350 1351 /* 1352 * Return a pointer to mbuf/offset of location in mbuf chain. 1353 */ 1354 struct mbuf * 1355 m_getptr(struct mbuf *m, int loc, int *off) 1356 { 1357 1358 while (loc >= 0) { 1359 /* Normal end of search */ 1360 if (m->m_len > loc) { 1361 *off = loc; 1362 return (m); 1363 } else { 1364 loc -= m->m_len; 1365 1366 if (m->m_next == NULL) { 1367 if (loc == 0) { 1368 /* Point at the end of valid data */ 1369 *off = m->m_len; 1370 return (m); 1371 } else 1372 return (NULL); 1373 } else 1374 m = m->m_next; 1375 } 1376 } 1377 1378 return (NULL); 1379 } 1380