1 /* uipc_mbuf.c,v 1.84 2004/07/21 12:06:46 yamt Exp */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1988, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "uipc_mbuf.c,v 1.84 2004/07/21 12:06:46 yamt Exp"); 73 74 #include "opt_mbuftrace.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/malloc.h> 80 #define MBTYPES 81 #include <sys/mbuf.h> 82 #include <sys/kernel.h> 83 #include <sys/syslog.h> 84 #include <sys/domain.h> 85 #include <sys/protosw.h> 86 #include <sys/pool.h> 87 #include <sys/socket.h> 88 #include <sys/sysctl.h> 89 90 #include <net/if.h> 91 92 #include <uvm/uvm.h> 93 94 95 struct pool mbpool; /* mbuf pool */ 96 struct pool mclpool; /* mbuf cluster pool */ 97 98 struct pool_cache mbpool_cache; 99 struct pool_cache mclpool_cache; 100 101 struct mbstat mbstat; 102 int max_linkhdr; 103 int max_protohdr; 104 int max_hdr; 105 int max_datalen; 106 107 static int mb_ctor(void *, void *, int); 108 109 void *mclpool_alloc(struct pool *, int); 110 void mclpool_release(struct pool *, void *); 111 112 struct pool_allocator mclpool_allocator = { 113 mclpool_alloc, mclpool_release, 0, 114 }; 115 116 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 117 static struct mbuf *m_split0(struct mbuf *, int, int, int); 118 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 119 120 /* flags for m_copyback0 */ 121 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 122 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 123 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 124 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 125 126 const char mclpool_warnmsg[] = 127 "WARNING: mclpool limit reached; increase NMBCLUSTERS"; 128 129 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 130 131 #ifdef MBUFTRACE 132 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 133 struct mowner unknown_mowners[] = { 134 { "unknown", "free" }, 135 { "unknown", "data" }, 136 { "unknown", "header" }, 137 { "unknown", "soname" }, 138 { "unknown", "soopts" }, 139 { "unknown", "ftable" }, 140 { "unknown", "control" }, 141 { "unknown", "oobdata" }, 142 }; 143 struct mowner revoked_mowner = { "revoked", "" }; 144 #endif 145 146 /* 147 * Initialize the mbuf allocator. 148 */ 149 void 150 mbinit(void) 151 { 152 153 KASSERT(sizeof(struct _m_ext) <= MHLEN); 154 KASSERT(sizeof(struct mbuf) == MSIZE); 155 156 pool_init(&mbpool, msize, 0, 0, 0, "mbpl", NULL); 157 pool_init(&mclpool, mclbytes, 0, 0, 0, "mclpl", &mclpool_allocator); 158 159 pool_set_drain_hook(&mbpool, m_reclaim, NULL); 160 pool_set_drain_hook(&mclpool, m_reclaim, NULL); 161 162 pool_cache_init(&mbpool_cache, &mbpool, mb_ctor, NULL, NULL); 163 pool_cache_init(&mclpool_cache, &mclpool, NULL, NULL, NULL); 164 165 /* 166 * Set the hard limit on the mclpool to the number of 167 * mbuf clusters the kernel is to support. Log the limit 168 * reached message max once a minute. 169 */ 170 pool_sethardlimit(&mclpool, nmbclusters, mclpool_warnmsg, 60); 171 172 /* 173 * Set a low water mark for both mbufs and clusters. This should 174 * help ensure that they can be allocated in a memory starvation 175 * situation. This is important for e.g. diskless systems which 176 * must allocate mbufs in order for the pagedaemon to clean pages. 177 */ 178 pool_setlowat(&mbpool, mblowat); 179 pool_setlowat(&mclpool, mcllowat); 180 181 #ifdef MBUFTRACE 182 { 183 /* 184 * Attach the unknown mowners. 185 */ 186 int i; 187 MOWNER_ATTACH(&revoked_mowner); 188 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 189 i-- > 0; ) 190 MOWNER_ATTACH(&unknown_mowners[i]); 191 } 192 #endif 193 } 194 195 /* 196 * sysctl helper routine for the kern.mbuf subtree. nmbclusters may 197 * or may not be writable, and mblowat and mcllowat need range 198 * checking and pool tweaking after being reset. 199 */ 200 static int 201 sysctl_kern_mbuf(SYSCTLFN_ARGS) 202 { 203 int error, newval; 204 struct sysctlnode node; 205 206 node = *rnode; 207 node.sysctl_data = &newval; 208 switch (rnode->sysctl_num) { 209 case MBUF_NMBCLUSTERS: 210 if (mb_map != NULL) { 211 node.sysctl_flags &= ~CTLFLAG_READWRITE; 212 node.sysctl_flags |= CTLFLAG_READONLY; 213 } 214 /* FALLTHROUGH */ 215 case MBUF_MBLOWAT: 216 case MBUF_MCLLOWAT: 217 newval = *(int*)rnode->sysctl_data; 218 break; 219 default: 220 return (EOPNOTSUPP); 221 } 222 223 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 224 if (error || newp == NULL) 225 return (error); 226 if (newval < 0) 227 return (EINVAL); 228 229 switch (node.sysctl_num) { 230 case MBUF_NMBCLUSTERS: 231 if (newval < nmbclusters) 232 return (EINVAL); 233 nmbclusters = newval; 234 pool_sethardlimit(&mclpool, nmbclusters, mclpool_warnmsg, 60); 235 break; 236 case MBUF_MBLOWAT: 237 mblowat = newval; 238 pool_setlowat(&mbpool, mblowat); 239 break; 240 case MBUF_MCLLOWAT: 241 mcllowat = newval; 242 pool_setlowat(&mclpool, mcllowat); 243 break; 244 } 245 246 return (0); 247 } 248 249 #ifdef MBUFTRACE 250 static int 251 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 252 { 253 struct mowner *mo; 254 size_t len = 0; 255 int error = 0; 256 257 if (namelen != 0) 258 return (EINVAL); 259 if (newp != NULL) 260 return (EPERM); 261 262 LIST_FOREACH(mo, &mowners, mo_link) { 263 if (oldp != NULL) { 264 if (*oldlenp - len < sizeof(*mo)) { 265 error = ENOMEM; 266 break; 267 } 268 error = copyout(mo, (caddr_t) oldp + len, 269 sizeof(*mo)); 270 if (error) 271 break; 272 } 273 len += sizeof(*mo); 274 } 275 276 if (error == 0) 277 *oldlenp = len; 278 279 return (error); 280 } 281 #endif /* MBUFTRACE */ 282 283 SYSCTL_SETUP(sysctl_kern_mbuf_setup, "sysctl kern.mbuf subtree setup") 284 { 285 286 sysctl_createv(clog, 0, NULL, NULL, 287 CTLFLAG_PERMANENT, 288 CTLTYPE_NODE, "kern", NULL, 289 NULL, 0, NULL, 0, 290 CTL_KERN, CTL_EOL); 291 sysctl_createv(clog, 0, NULL, NULL, 292 CTLFLAG_PERMANENT, 293 CTLTYPE_NODE, "mbuf", 294 SYSCTL_DESCR("mbuf control variables"), 295 NULL, 0, NULL, 0, 296 CTL_KERN, KERN_MBUF, CTL_EOL); 297 298 sysctl_createv(clog, 0, NULL, NULL, 299 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 300 CTLTYPE_INT, "msize", 301 SYSCTL_DESCR("mbuf base size"), 302 NULL, msize, NULL, 0, 303 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 304 sysctl_createv(clog, 0, NULL, NULL, 305 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 306 CTLTYPE_INT, "mclbytes", 307 SYSCTL_DESCR("mbuf cluster size"), 308 NULL, mclbytes, NULL, 0, 309 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 310 sysctl_createv(clog, 0, NULL, NULL, 311 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 312 CTLTYPE_INT, "nmbclusters", 313 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 314 sysctl_kern_mbuf, 0, &nmbclusters, 0, 315 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 316 sysctl_createv(clog, 0, NULL, NULL, 317 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 318 CTLTYPE_INT, "mblowat", 319 SYSCTL_DESCR("mbuf low water mark"), 320 sysctl_kern_mbuf, 0, &mblowat, 0, 321 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 322 sysctl_createv(clog, 0, NULL, NULL, 323 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 324 CTLTYPE_INT, "mcllowat", 325 SYSCTL_DESCR("mbuf cluster low water mark"), 326 sysctl_kern_mbuf, 0, &mcllowat, 0, 327 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 328 sysctl_createv(clog, 0, NULL, NULL, 329 CTLFLAG_PERMANENT, 330 CTLTYPE_STRUCT, "stats", 331 SYSCTL_DESCR("mbuf allocation statistics"), 332 NULL, 0, &mbstat, sizeof(mbstat), 333 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 334 #ifdef MBUFTRACE 335 sysctl_createv(clog, 0, NULL, NULL, 336 CTLFLAG_PERMANENT, 337 CTLTYPE_STRUCT, "mowners", 338 SYSCTL_DESCR("Information about mbuf owners"), 339 sysctl_kern_mbuf_mowners, 0, NULL, 0, 340 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 341 #endif /* MBUFTRACE */ 342 } 343 344 void * 345 mclpool_alloc(struct pool *pp, int flags) 346 { 347 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 348 349 return ((void *)uvm_km_alloc_poolpage1(mb_map, NULL, waitok)); 350 } 351 352 void 353 mclpool_release(struct pool *pp, void *v) 354 { 355 356 uvm_km_free_poolpage1(mb_map, (vaddr_t)v); 357 } 358 359 /*ARGSUSED*/ 360 static int 361 mb_ctor(void *arg, void *object, int flags) 362 { 363 struct mbuf *m = object; 364 365 #ifdef POOL_VTOPHYS 366 m->m_paddr = POOL_VTOPHYS(m); 367 #else 368 m->m_paddr = M_PADDR_INVALID; 369 #endif 370 return (0); 371 } 372 373 void 374 m_reclaim(void *arg, int flags) 375 { 376 struct domain *dp; 377 const struct protosw *pr; 378 struct ifnet *ifp; 379 int s = splvm(); 380 381 for (dp = domains; dp; dp = dp->dom_next) 382 for (pr = dp->dom_protosw; 383 pr < dp->dom_protoswNPROTOSW; pr++) 384 if (pr->pr_drain) 385 (*pr->pr_drain)(); 386 for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) 387 if (ifp->if_drain) 388 (*ifp->if_drain)(ifp); 389 splx(s); 390 mbstat.m_drain++; 391 } 392 393 /* 394 * Space allocation routines. 395 * These are also available as macros 396 * for critical paths. 397 */ 398 struct mbuf * 399 m_get(int nowait, int type) 400 { 401 struct mbuf *m; 402 403 MGET(m, nowait, type); 404 return (m); 405 } 406 407 struct mbuf * 408 m_gethdr(int nowait, int type) 409 { 410 struct mbuf *m; 411 412 MGETHDR(m, nowait, type); 413 return (m); 414 } 415 416 struct mbuf * 417 m_getclr(int nowait, int type) 418 { 419 struct mbuf *m; 420 421 MGET(m, nowait, type); 422 if (m == 0) 423 return (NULL); 424 memset(mtod(m, caddr_t), 0, MLEN); 425 return (m); 426 } 427 428 void 429 m_clget(struct mbuf *m, int nowait) 430 { 431 432 MCLGET(m, nowait); 433 } 434 435 struct mbuf * 436 m_free(struct mbuf *m) 437 { 438 struct mbuf *n; 439 440 MFREE(m, n); 441 return (n); 442 } 443 444 void 445 m_freem(struct mbuf *m) 446 { 447 struct mbuf *n; 448 449 if (m == NULL) 450 return; 451 do { 452 MFREE(m, n); 453 m = n; 454 } while (m); 455 } 456 457 #ifdef MBUFTRACE 458 /* 459 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 460 */ 461 void 462 m_claimm(struct mbuf *m, struct mowner *mo) 463 { 464 465 for (; m != NULL; m = m->m_next) 466 MCLAIM(m, mo); 467 } 468 #endif 469 470 /* 471 * Mbuffer utility routines. 472 */ 473 474 /* 475 * Lesser-used path for M_PREPEND: 476 * allocate new mbuf to prepend to chain, 477 * copy junk along. 478 */ 479 struct mbuf * 480 m_prepend(struct mbuf *m, int len, int how) 481 { 482 struct mbuf *mn; 483 484 MGET(mn, how, m->m_type); 485 if (mn == (struct mbuf *)NULL) { 486 m_freem(m); 487 return ((struct mbuf *)NULL); 488 } 489 if (m->m_flags & M_PKTHDR) { 490 M_COPY_PKTHDR(mn, m); 491 m_tag_delete_chain(m, NULL); 492 m->m_flags &= ~M_PKTHDR; 493 } else { 494 MCLAIM(mn, m->m_owner); 495 } 496 mn->m_next = m; 497 m = mn; 498 if (len < MHLEN) 499 MH_ALIGN(m, len); 500 m->m_len = len; 501 return (m); 502 } 503 504 /* 505 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 506 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 507 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 508 */ 509 int MCFail; 510 511 struct mbuf * 512 m_copym(struct mbuf *m, int off0, int len, int wait) 513 { 514 515 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 516 } 517 518 struct mbuf * 519 m_dup(struct mbuf *m, int off0, int len, int wait) 520 { 521 522 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 523 } 524 525 static struct mbuf * 526 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 527 { 528 struct mbuf *n, **np; 529 int off = off0; 530 struct mbuf *top; 531 int copyhdr = 0; 532 533 if (off < 0 || len < 0) 534 panic("m_copym: off %d, len %d", off, len); 535 if (off == 0 && m->m_flags & M_PKTHDR) 536 copyhdr = 1; 537 while (off > 0) { 538 if (m == 0) 539 panic("m_copym: m == 0, off %d", off); 540 if (off < m->m_len) 541 break; 542 off -= m->m_len; 543 m = m->m_next; 544 } 545 np = ⊤ 546 top = 0; 547 while (len > 0) { 548 if (m == 0) { 549 if (len != M_COPYALL) 550 panic("m_copym: m == 0, len %d [!COPYALL]", 551 len); 552 break; 553 } 554 MGET(n, wait, m->m_type); 555 *np = n; 556 if (n == 0) 557 goto nospace; 558 MCLAIM(n, m->m_owner); 559 if (copyhdr) { 560 M_COPY_PKTHDR(n, m); 561 if (len == M_COPYALL) 562 n->m_pkthdr.len -= off0; 563 else 564 n->m_pkthdr.len = len; 565 copyhdr = 0; 566 } 567 n->m_len = min(len, m->m_len - off); 568 if (m->m_flags & M_EXT) { 569 if (!deep) { 570 n->m_data = m->m_data + off; 571 n->m_ext = m->m_ext; 572 MCLADDREFERENCE(m, n); 573 } else { 574 /* 575 * we are unsure about the way m was allocated. 576 * copy into multiple MCLBYTES cluster mbufs. 577 */ 578 MCLGET(n, wait); 579 n->m_len = 0; 580 n->m_len = M_TRAILINGSPACE(n); 581 n->m_len = min(n->m_len, len); 582 n->m_len = min(n->m_len, m->m_len - off); 583 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, 584 (unsigned)n->m_len); 585 } 586 } else 587 memcpy(mtod(n, caddr_t), mtod(m, caddr_t)+off, 588 (unsigned)n->m_len); 589 if (len != M_COPYALL) 590 len -= n->m_len; 591 off += n->m_len; 592 #ifdef DIAGNOSTIC 593 if (off > m->m_len) 594 panic("m_copym0 overrun"); 595 #endif 596 if (off == m->m_len) { 597 m = m->m_next; 598 off = 0; 599 } 600 np = &n->m_next; 601 } 602 if (top == 0) 603 MCFail++; 604 return (top); 605 nospace: 606 m_freem(top); 607 MCFail++; 608 return (NULL); 609 } 610 611 /* 612 * Copy an entire packet, including header (which must be present). 613 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 614 */ 615 struct mbuf * 616 m_copypacket(struct mbuf *m, int how) 617 { 618 struct mbuf *top, *n, *o; 619 620 MGET(n, how, m->m_type); 621 top = n; 622 if (!n) 623 goto nospace; 624 625 MCLAIM(n, m->m_owner); 626 M_COPY_PKTHDR(n, m); 627 n->m_len = m->m_len; 628 if (m->m_flags & M_EXT) { 629 n->m_data = m->m_data; 630 n->m_ext = m->m_ext; 631 MCLADDREFERENCE(m, n); 632 } else { 633 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 634 } 635 636 m = m->m_next; 637 while (m) { 638 MGET(o, how, m->m_type); 639 if (!o) 640 goto nospace; 641 642 MCLAIM(o, m->m_owner); 643 n->m_next = o; 644 n = n->m_next; 645 646 n->m_len = m->m_len; 647 if (m->m_flags & M_EXT) { 648 n->m_data = m->m_data; 649 n->m_ext = m->m_ext; 650 MCLADDREFERENCE(m, n); 651 } else { 652 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 653 } 654 655 m = m->m_next; 656 } 657 return top; 658 nospace: 659 m_freem(top); 660 MCFail++; 661 return NULL; 662 } 663 664 /* 665 * Copy data from an mbuf chain starting "off" bytes from the beginning, 666 * continuing for "len" bytes, into the indicated buffer. 667 */ 668 void 669 m_copydata(struct mbuf *m, int off, int len, void *vp) 670 { 671 unsigned count; 672 char *cp = vp; 673 674 if (off < 0 || len < 0) 675 panic("m_copydata: off %d, len %d", off, len); 676 while (off > 0) { 677 if (m == 0) 678 panic("m_copydata: m == 0, off %d", off); 679 if (off < m->m_len) 680 break; 681 off -= m->m_len; 682 m = m->m_next; 683 } 684 while (len > 0) { 685 if (m == 0) 686 panic("m_copydata: m == 0, len %d", len); 687 count = min(m->m_len - off, len); 688 memcpy(cp, mtod(m, caddr_t) + off, count); 689 len -= count; 690 cp += count; 691 off = 0; 692 m = m->m_next; 693 } 694 } 695 696 /* 697 * Concatenate mbuf chain n to m. 698 * n might be copied into m (when n->m_len is small), therefore data portion of 699 * n could be copied into an mbuf of different mbuf type. 700 * Any m_pkthdr is not updated. 701 */ 702 void 703 m_cat(struct mbuf *m, struct mbuf *n) 704 { 705 706 while (m->m_next) 707 m = m->m_next; 708 while (n) { 709 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 710 /* just join the two chains */ 711 m->m_next = n; 712 return; 713 } 714 /* splat the data from one into the other */ 715 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), 716 (u_int)n->m_len); 717 m->m_len += n->m_len; 718 n = m_free(n); 719 } 720 } 721 722 void 723 m_adj(struct mbuf *mp, int req_len) 724 { 725 int len = req_len; 726 struct mbuf *m; 727 int count; 728 729 if ((m = mp) == NULL) 730 return; 731 if (len >= 0) { 732 /* 733 * Trim from head. 734 */ 735 while (m != NULL && len > 0) { 736 if (m->m_len <= len) { 737 len -= m->m_len; 738 m->m_len = 0; 739 m = m->m_next; 740 } else { 741 m->m_len -= len; 742 m->m_data += len; 743 len = 0; 744 } 745 } 746 m = mp; 747 if (mp->m_flags & M_PKTHDR) 748 m->m_pkthdr.len -= (req_len - len); 749 } else { 750 /* 751 * Trim from tail. Scan the mbuf chain, 752 * calculating its length and finding the last mbuf. 753 * If the adjustment only affects this mbuf, then just 754 * adjust and return. Otherwise, rescan and truncate 755 * after the remaining size. 756 */ 757 len = -len; 758 count = 0; 759 for (;;) { 760 count += m->m_len; 761 if (m->m_next == (struct mbuf *)0) 762 break; 763 m = m->m_next; 764 } 765 if (m->m_len >= len) { 766 m->m_len -= len; 767 if (mp->m_flags & M_PKTHDR) 768 mp->m_pkthdr.len -= len; 769 return; 770 } 771 count -= len; 772 if (count < 0) 773 count = 0; 774 /* 775 * Correct length for chain is "count". 776 * Find the mbuf with last data, adjust its length, 777 * and toss data from remaining mbufs on chain. 778 */ 779 m = mp; 780 if (m->m_flags & M_PKTHDR) 781 m->m_pkthdr.len = count; 782 for (; m; m = m->m_next) { 783 if (m->m_len >= count) { 784 m->m_len = count; 785 break; 786 } 787 count -= m->m_len; 788 } 789 while (m->m_next) 790 (m = m->m_next) ->m_len = 0; 791 } 792 } 793 794 /* 795 * Rearange an mbuf chain so that len bytes are contiguous 796 * and in the data area of an mbuf (so that mtod and dtom 797 * will work for a structure of size len). Returns the resulting 798 * mbuf chain on success, frees it and returns null on failure. 799 * If there is room, it will add up to max_protohdr-len extra bytes to the 800 * contiguous region in an attempt to avoid being called next time. 801 */ 802 int MPFail; 803 804 struct mbuf * 805 m_pullup(struct mbuf *n, int len) 806 { 807 struct mbuf *m; 808 int count; 809 int space; 810 811 /* 812 * If first mbuf has no cluster, and has room for len bytes 813 * without shifting current data, pullup into it, 814 * otherwise allocate a new mbuf to prepend to the chain. 815 */ 816 if ((n->m_flags & M_EXT) == 0 && 817 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 818 if (n->m_len >= len) 819 return (n); 820 m = n; 821 n = n->m_next; 822 len -= m->m_len; 823 } else { 824 if (len > MHLEN) 825 goto bad; 826 MGET(m, M_DONTWAIT, n->m_type); 827 if (m == 0) 828 goto bad; 829 MCLAIM(m, n->m_owner); 830 m->m_len = 0; 831 if (n->m_flags & M_PKTHDR) { 832 M_COPY_PKTHDR(m, n); 833 m_tag_delete_chain(n, NULL); 834 n->m_flags &= ~M_PKTHDR; 835 } 836 } 837 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 838 do { 839 count = min(min(max(len, max_protohdr), space), n->m_len); 840 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), 841 (unsigned)count); 842 len -= count; 843 m->m_len += count; 844 n->m_len -= count; 845 space -= count; 846 if (n->m_len) 847 n->m_data += count; 848 else 849 n = m_free(n); 850 } while (len > 0 && n); 851 if (len > 0) { 852 (void) m_free(m); 853 goto bad; 854 } 855 m->m_next = n; 856 return (m); 857 bad: 858 m_freem(n); 859 MPFail++; 860 return (NULL); 861 } 862 863 /* 864 * Like m_pullup(), except a new mbuf is always allocated, and we allow 865 * the amount of empty space before the data in the new mbuf to be specified 866 * (in the event that the caller expects to prepend later). 867 */ 868 int MSFail; 869 870 struct mbuf * 871 m_copyup(struct mbuf *n, int len, int dstoff) 872 { 873 struct mbuf *m; 874 int count, space; 875 876 if (len > (MHLEN - dstoff)) 877 goto bad; 878 MGET(m, M_DONTWAIT, n->m_type); 879 if (m == NULL) 880 goto bad; 881 MCLAIM(m, n->m_owner); 882 m->m_len = 0; 883 if (n->m_flags & M_PKTHDR) { 884 M_COPY_PKTHDR(m, n); 885 m_tag_delete_chain(n, NULL); 886 n->m_flags &= ~M_PKTHDR; 887 } 888 m->m_data += dstoff; 889 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 890 do { 891 count = min(min(max(len, max_protohdr), space), n->m_len); 892 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), 893 (unsigned)count); 894 len -= count; 895 m->m_len += count; 896 n->m_len -= count; 897 space -= count; 898 if (n->m_len) 899 n->m_data += count; 900 else 901 n = m_free(n); 902 } while (len > 0 && n); 903 if (len > 0) { 904 (void) m_free(m); 905 goto bad; 906 } 907 m->m_next = n; 908 return (m); 909 bad: 910 m_freem(n); 911 MSFail++; 912 return (NULL); 913 } 914 915 /* 916 * Partition an mbuf chain in two pieces, returning the tail -- 917 * all but the first len0 bytes. In case of failure, it returns NULL and 918 * attempts to restore the chain to its original state. 919 */ 920 struct mbuf * 921 m_split(struct mbuf *m0, int len0, int wait) 922 { 923 924 return m_split0(m0, len0, wait, 1); 925 } 926 927 static struct mbuf * 928 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 929 { 930 struct mbuf *m, *n; 931 unsigned len = len0, remain, len_save; 932 933 for (m = m0; m && len > m->m_len; m = m->m_next) 934 len -= m->m_len; 935 if (m == 0) 936 return (NULL); 937 remain = m->m_len - len; 938 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 939 MGETHDR(n, wait, m0->m_type); 940 if (n == 0) 941 return (NULL); 942 MCLAIM(m, m0->m_owner); 943 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 944 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 945 len_save = m0->m_pkthdr.len; 946 m0->m_pkthdr.len = len0; 947 if (m->m_flags & M_EXT) 948 goto extpacket; 949 if (remain > MHLEN) { 950 /* m can't be the lead packet */ 951 MH_ALIGN(n, 0); 952 n->m_next = m_split(m, len, wait); 953 if (n->m_next == 0) { 954 (void) m_free(n); 955 m0->m_pkthdr.len = len_save; 956 return (NULL); 957 } else 958 return (n); 959 } else 960 MH_ALIGN(n, remain); 961 } else if (remain == 0) { 962 n = m->m_next; 963 m->m_next = 0; 964 return (n); 965 } else { 966 MGET(n, wait, m->m_type); 967 if (n == 0) 968 return (NULL); 969 MCLAIM(n, m->m_owner); 970 M_ALIGN(n, remain); 971 } 972 extpacket: 973 if (m->m_flags & M_EXT) { 974 n->m_ext = m->m_ext; 975 MCLADDREFERENCE(m, n); 976 n->m_data = m->m_data + len; 977 } else { 978 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain); 979 } 980 n->m_len = remain; 981 m->m_len = len; 982 n->m_next = m->m_next; 983 m->m_next = 0; 984 return (n); 985 } 986 /* 987 * Routine to copy from device local memory into mbufs. 988 */ 989 struct mbuf * 990 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 991 void (*copy)(const void *from, void *to, size_t len)) 992 { 993 struct mbuf *m; 994 struct mbuf *top = 0, **mp = ⊤ 995 int off = off0, len; 996 char *cp; 997 char *epkt; 998 999 cp = buf; 1000 epkt = cp + totlen; 1001 if (off) { 1002 /* 1003 * If 'off' is non-zero, packet is trailer-encapsulated, 1004 * so we have to skip the type and length fields. 1005 */ 1006 cp += off + 2 * sizeof(u_int16_t); 1007 totlen -= 2 * sizeof(u_int16_t); 1008 } 1009 MGETHDR(m, M_DONTWAIT, MT_DATA); 1010 if (m == 0) 1011 return (NULL); 1012 m->m_pkthdr.rcvif = ifp; 1013 m->m_pkthdr.len = totlen; 1014 m->m_len = MHLEN; 1015 1016 while (totlen > 0) { 1017 if (top) { 1018 MGET(m, M_DONTWAIT, MT_DATA); 1019 if (m == 0) { 1020 m_freem(top); 1021 return (NULL); 1022 } 1023 m->m_len = MLEN; 1024 } 1025 len = min(totlen, epkt - cp); 1026 if (len >= MINCLSIZE) { 1027 MCLGET(m, M_DONTWAIT); 1028 if ((m->m_flags & M_EXT) == 0) { 1029 m_free(m); 1030 m_freem(top); 1031 return (NULL); 1032 } 1033 m->m_len = len = min(len, MCLBYTES); 1034 } else { 1035 /* 1036 * Place initial small packet/header at end of mbuf. 1037 */ 1038 if (len < m->m_len) { 1039 if (top == 0 && len + max_linkhdr <= m->m_len) 1040 m->m_data += max_linkhdr; 1041 m->m_len = len; 1042 } else 1043 len = m->m_len; 1044 } 1045 if (copy) 1046 copy(cp, mtod(m, caddr_t), (size_t)len); 1047 else 1048 memcpy(mtod(m, caddr_t), cp, (size_t)len); 1049 cp += len; 1050 *mp = m; 1051 mp = &m->m_next; 1052 totlen -= len; 1053 if (cp == epkt) 1054 cp = buf; 1055 } 1056 return (top); 1057 } 1058 1059 /* 1060 * Copy data from a buffer back into the indicated mbuf chain, 1061 * starting "off" bytes from the beginning, extending the mbuf 1062 * chain if necessary. 1063 */ 1064 void 1065 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1066 { 1067 #if defined(DEBUG) 1068 struct mbuf *origm = m0; 1069 int error; 1070 #endif /* defined(DEBUG) */ 1071 1072 if (m0 == NULL) 1073 return; 1074 1075 #if defined(DEBUG) 1076 error = 1077 #endif /* defined(DEBUG) */ 1078 m_copyback0(&m0, off, len, cp, 1079 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1080 1081 #if defined(DEBUG) 1082 if (error != 0 || (m0 != NULL && origm != m0)) 1083 panic("m_copyback"); 1084 #endif /* defined(DEBUG) */ 1085 } 1086 1087 struct mbuf * 1088 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1089 { 1090 int error; 1091 1092 /* don't support chain expansion */ 1093 KDASSERT(off + len <= m_length(m0)); 1094 1095 error = m_copyback0(&m0, off, len, cp, 1096 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1097 if (error) { 1098 /* 1099 * no way to recover from partial success. 1100 * just free the chain. 1101 */ 1102 m_freem(m0); 1103 return NULL; 1104 } 1105 return m0; 1106 } 1107 1108 /* 1109 * m_makewritable: ensure the specified range writable. 1110 */ 1111 int 1112 m_makewritable(struct mbuf **mp, int off, int len, int how) 1113 { 1114 int error; 1115 #if defined(DEBUG) 1116 struct mbuf *n; 1117 int origlen, reslen; 1118 1119 origlen = m_length(*mp); 1120 #endif /* defined(DEBUG) */ 1121 1122 #if 0 /* M_COPYALL is large enough */ 1123 if (len == M_COPYALL) 1124 len = m_length(*mp) - off; /* XXX */ 1125 #endif 1126 1127 error = m_copyback0(mp, off, len, NULL, 1128 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1129 1130 #if defined(DEBUG) 1131 reslen = 0; 1132 for (n = *mp; n; n = n->m_next) 1133 reslen += n->m_len; 1134 if (origlen != reslen) 1135 panic("m_makewritable: length changed"); 1136 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1137 panic("m_makewritable: inconsist"); 1138 #endif /* defined(DEBUG) */ 1139 1140 return error; 1141 } 1142 1143 int 1144 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1145 int how) 1146 { 1147 int mlen; 1148 struct mbuf *m, *n; 1149 struct mbuf **mp; 1150 int totlen = 0; 1151 const char *cp = vp; 1152 1153 KASSERT(mp0 != NULL); 1154 KASSERT(*mp0 != NULL); 1155 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1156 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1157 1158 mp = mp0; 1159 m = *mp; 1160 while (off > (mlen = m->m_len)) { 1161 off -= mlen; 1162 totlen += mlen; 1163 if (m->m_next == 0) { 1164 if ((flags & M_COPYBACK0_EXTEND) == 0) 1165 goto out; 1166 n = m_getclr(how, m->m_type); 1167 if (n == 0) 1168 goto out; 1169 n->m_len = min(MLEN, len + off); 1170 m->m_next = n; 1171 } 1172 mp = &m->m_next; 1173 m = m->m_next; 1174 } 1175 while (len > 0) { 1176 mlen = m->m_len - off; 1177 if (mlen != 0 && M_READONLY(m)) { 1178 char *datap; 1179 int eatlen; 1180 1181 /* 1182 * this mbuf is read-only. 1183 * allocate a new writable mbuf and try again. 1184 */ 1185 1186 #if defined(DIAGNOSTIC) 1187 if ((flags & M_COPYBACK0_COW) == 0) 1188 panic("m_copyback0: read-only"); 1189 #endif /* defined(DIAGNOSTIC) */ 1190 1191 /* 1192 * if we're going to write into the middle of 1193 * a mbuf, split it first. 1194 */ 1195 if (off > 0 && len < mlen) { 1196 n = m_split0(m, off, how, 0); 1197 if (n == NULL) 1198 goto enobufs; 1199 m->m_next = n; 1200 mp = &m->m_next; 1201 m = n; 1202 off = 0; 1203 continue; 1204 } 1205 1206 /* 1207 * XXX TODO coalesce into the trailingspace of 1208 * the previous mbuf when possible. 1209 */ 1210 1211 /* 1212 * allocate a new mbuf. copy packet header if needed. 1213 */ 1214 MGET(n, how, m->m_type); 1215 if (n == NULL) 1216 goto enobufs; 1217 MCLAIM(n, m->m_owner); 1218 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1219 /* XXX M_MOVE_PKTHDR */ 1220 M_COPY_PKTHDR(n, m); 1221 n->m_len = MHLEN; 1222 } else { 1223 if (len >= MINCLSIZE) 1224 MCLGET(n, M_DONTWAIT); 1225 n->m_len = 1226 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1227 } 1228 if (n->m_len > len) 1229 n->m_len = len; 1230 1231 /* 1232 * free the region which has been overwritten. 1233 * copying data from old mbufs if requested. 1234 */ 1235 if (flags & M_COPYBACK0_PRESERVE) 1236 datap = mtod(n, char *); 1237 else 1238 datap = NULL; 1239 eatlen = n->m_len; 1240 KDASSERT(off == 0 || eatlen >= mlen); 1241 if (off > 0) { 1242 KDASSERT(len >= mlen); 1243 m->m_len = off; 1244 m->m_next = n; 1245 if (datap) { 1246 m_copydata(m, off, mlen, datap); 1247 datap += mlen; 1248 } 1249 eatlen -= mlen; 1250 mp = &m->m_next; 1251 m = m->m_next; 1252 } 1253 while (m != NULL && M_READONLY(m) && 1254 n->m_type == m->m_type && eatlen > 0) { 1255 mlen = min(eatlen, m->m_len); 1256 if (datap) { 1257 m_copydata(m, 0, mlen, datap); 1258 datap += mlen; 1259 } 1260 m->m_data += mlen; 1261 m->m_len -= mlen; 1262 eatlen -= mlen; 1263 if (m->m_len == 0) 1264 *mp = m = m_free(m); 1265 } 1266 if (eatlen > 0) 1267 n->m_len -= eatlen; 1268 n->m_next = m; 1269 *mp = m = n; 1270 continue; 1271 } 1272 mlen = min(mlen, len); 1273 if (flags & M_COPYBACK0_COPYBACK) { 1274 memcpy(mtod(m, caddr_t) + off, cp, (unsigned)mlen); 1275 cp += mlen; 1276 } 1277 len -= mlen; 1278 mlen += off; 1279 off = 0; 1280 totlen += mlen; 1281 if (len == 0) 1282 break; 1283 if (m->m_next == 0) { 1284 if ((flags & M_COPYBACK0_EXTEND) == 0) 1285 goto out; 1286 n = m_get(how, m->m_type); 1287 if (n == 0) 1288 break; 1289 n->m_len = min(MLEN, len); 1290 m->m_next = n; 1291 } 1292 mp = &m->m_next; 1293 m = m->m_next; 1294 } 1295 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1296 m->m_pkthdr.len = totlen; 1297 1298 return 0; 1299 1300 enobufs: 1301 return ENOBUFS; 1302 } 1303 1304 /* 1305 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1306 * beginning, continuing for "len" bytes. 1307 */ 1308 int 1309 m_apply(struct mbuf *m, int off, int len, 1310 int (*f)(void *, caddr_t, unsigned int), void *arg) 1311 { 1312 unsigned int count; 1313 int rval; 1314 1315 KASSERT(len >= 0); 1316 KASSERT(off >= 0); 1317 1318 while (off > 0) { 1319 KASSERT(m != NULL); 1320 if (off < m->m_len) 1321 break; 1322 off -= m->m_len; 1323 m = m->m_next; 1324 } 1325 while (len > 0) { 1326 KASSERT(m != NULL); 1327 count = min(m->m_len - off, len); 1328 1329 rval = (*f)(arg, mtod(m, caddr_t) + off, count); 1330 if (rval) 1331 return (rval); 1332 1333 len -= count; 1334 off = 0; 1335 m = m->m_next; 1336 } 1337 1338 return (0); 1339 } 1340 1341 /* 1342 * Return a pointer to mbuf/offset of location in mbuf chain. 1343 */ 1344 struct mbuf * 1345 m_getptr(struct mbuf *m, int loc, int *off) 1346 { 1347 1348 while (loc >= 0) { 1349 /* Normal end of search */ 1350 if (m->m_len > loc) { 1351 *off = loc; 1352 return (m); 1353 } else { 1354 loc -= m->m_len; 1355 1356 if (m->m_next == NULL) { 1357 if (loc == 0) { 1358 /* Point at the end of valid data */ 1359 *off = m->m_len; 1360 return (m); 1361 } else 1362 return (NULL); 1363 } else 1364 m = m->m_next; 1365 } 1366 } 1367 1368 return (NULL); 1369 } 1370