1 /* uipc_mbuf.c,v 1.84 2004/07/21 12:06:46 yamt Exp */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1982, 1986, 1988, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 69 */ 70 71 #include <sys/cdefs.h> 72 __KERNEL_RCSID(0, "uipc_mbuf.c,v 1.84 2004/07/21 12:06:46 yamt Exp"); 73 74 #include "opt_mbuftrace.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/malloc.h> 80 #define MBTYPES 81 #include <sys/mbuf.h> 82 #include <sys/kernel.h> 83 #include <sys/syslog.h> 84 #include <sys/domain.h> 85 #include <sys/protosw.h> 86 #include <sys/pool.h> 87 #include <sys/socket.h> 88 #include <sys/sysctl.h> 89 90 #include <net/if.h> 91 92 #include <uvm/uvm.h> 93 94 95 struct pool mbpool; /* mbuf pool */ 96 struct pool mclpool; /* mbuf cluster pool */ 97 98 struct pool_cache mbpool_cache; 99 struct pool_cache mclpool_cache; 100 101 struct mbstat mbstat; 102 int max_linkhdr; 103 int max_protohdr; 104 int max_hdr; 105 int max_datalen; 106 107 static int mb_ctor(void *, void *, int); 108 109 void *mclpool_alloc(struct pool *, int); 110 void mclpool_release(struct pool *, void *); 111 112 struct pool_allocator mclpool_allocator = { 113 mclpool_alloc, mclpool_release, 0, 114 }; 115 116 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 117 static struct mbuf *m_split0(struct mbuf *, int, int, int); 118 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 119 120 /* flags for m_copyback0 */ 121 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 122 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 123 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 124 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 125 126 const char mclpool_warnmsg[] = 127 "WARNING: mclpool limit reached; increase NMBCLUSTERS"; 128 129 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 130 131 #ifdef MBUFTRACE 132 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 133 struct mowner unknown_mowners[] = { 134 { "unknown", "free" }, 135 { "unknown", "data" }, 136 { "unknown", "header" }, 137 { "unknown", "soname" }, 138 { "unknown", "soopts" }, 139 { "unknown", "ftable" }, 140 { "unknown", "control" }, 141 { "unknown", "oobdata" }, 142 }; 143 struct mowner revoked_mowner = { "revoked", "" }; 144 #endif 145 146 /* 147 * Initialize the mbuf allocator. 148 */ 149 void 150 mbinit(void) 151 { 152 153 KASSERT(sizeof(struct _m_ext) <= MHLEN); 154 KASSERT(sizeof(struct mbuf) == MSIZE); 155 156 pool_init(&mbpool, msize, 0, 0, 0, "mbpl", NULL); 157 pool_init(&mclpool, mclbytes, 0, 0, 0, "mclpl", &mclpool_allocator); 158 159 pool_set_drain_hook(&mbpool, m_reclaim, NULL); 160 pool_set_drain_hook(&mclpool, m_reclaim, NULL); 161 162 pool_cache_init(&mbpool_cache, &mbpool, mb_ctor, NULL, NULL); 163 pool_cache_init(&mclpool_cache, &mclpool, NULL, NULL, NULL); 164 165 /* 166 * Set the hard limit on the mclpool to the number of 167 * mbuf clusters the kernel is to support. Log the limit 168 * reached message max once a minute. 169 */ 170 pool_sethardlimit(&mclpool, nmbclusters, mclpool_warnmsg, 60); 171 172 /* 173 * Set a low water mark for both mbufs and clusters. This should 174 * help ensure that they can be allocated in a memory starvation 175 * situation. This is important for e.g. diskless systems which 176 * must allocate mbufs in order for the pagedaemon to clean pages. 177 */ 178 pool_setlowat(&mbpool, mblowat); 179 pool_setlowat(&mclpool, mcllowat); 180 181 #ifdef MBUFTRACE 182 { 183 /* 184 * Attach the unknown mowners. 185 */ 186 int i; 187 MOWNER_ATTACH(&revoked_mowner); 188 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 189 i-- > 0; ) 190 MOWNER_ATTACH(&unknown_mowners[i]); 191 } 192 #endif 193 } 194 195 /* 196 * sysctl helper routine for the kern.mbuf subtree. nmbclusters may 197 * or may not be writable, and mblowat and mcllowat need range 198 * checking and pool tweaking after being reset. 199 */ 200 static int 201 sysctl_kern_mbuf(SYSCTLFN_ARGS) 202 { 203 int error, newval; 204 struct sysctlnode node; 205 206 node = *rnode; 207 node.sysctl_data = &newval; 208 switch (rnode->sysctl_num) { 209 case MBUF_NMBCLUSTERS: 210 if (mb_map != NULL) { 211 node.sysctl_flags &= ~CTLFLAG_READWRITE; 212 node.sysctl_flags |= CTLFLAG_READONLY; 213 } 214 /* FALLTHROUGH */ 215 case MBUF_MBLOWAT: 216 case MBUF_MCLLOWAT: 217 newval = *(int*)rnode->sysctl_data; 218 break; 219 default: 220 return (EOPNOTSUPP); 221 } 222 223 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 224 if (error || newp == NULL) 225 return (error); 226 if (newval < 0) 227 return (EINVAL); 228 229 switch (node.sysctl_num) { 230 case MBUF_NMBCLUSTERS: 231 if (newval < nmbclusters) 232 return (EINVAL); 233 nmbclusters = newval; 234 pool_sethardlimit(&mclpool, nmbclusters, mclpool_warnmsg, 60); 235 break; 236 case MBUF_MBLOWAT: 237 mblowat = newval; 238 pool_setlowat(&mbpool, mblowat); 239 break; 240 case MBUF_MCLLOWAT: 241 mcllowat = newval; 242 pool_setlowat(&mclpool, mcllowat); 243 break; 244 } 245 246 return (0); 247 } 248 249 #ifdef MBUFTRACE 250 static int 251 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 252 { 253 struct mowner *mo; 254 size_t len = 0; 255 int error = 0; 256 257 if (namelen != 0) 258 return (EINVAL); 259 if (newp != NULL) 260 return (EPERM); 261 262 LIST_FOREACH(mo, &mowners, mo_link) { 263 if (oldp != NULL) { 264 if (*oldlenp - len < sizeof(*mo)) { 265 error = ENOMEM; 266 break; 267 } 268 error = copyout(mo, (caddr_t) oldp + len, 269 sizeof(*mo)); 270 if (error) 271 break; 272 } 273 len += sizeof(*mo); 274 } 275 276 if (error == 0) 277 *oldlenp = len; 278 279 return (error); 280 } 281 #endif /* MBUFTRACE */ 282 283 SYSCTL_SETUP(sysctl_kern_mbuf_setup, "sysctl kern.mbuf subtree setup") 284 { 285 286 sysctl_createv(clog, 0, NULL, NULL, 287 CTLFLAG_PERMANENT, 288 CTLTYPE_NODE, "kern", NULL, 289 NULL, 0, NULL, 0, 290 CTL_KERN, CTL_EOL); 291 sysctl_createv(clog, 0, NULL, NULL, 292 CTLFLAG_PERMANENT, 293 CTLTYPE_NODE, "mbuf", 294 SYSCTL_DESCR("mbuf control variables"), 295 NULL, 0, NULL, 0, 296 CTL_KERN, KERN_MBUF, CTL_EOL); 297 298 sysctl_createv(clog, 0, NULL, NULL, 299 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 300 CTLTYPE_INT, "msize", 301 SYSCTL_DESCR("mbuf base size"), 302 NULL, msize, NULL, 0, 303 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 304 sysctl_createv(clog, 0, NULL, NULL, 305 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 306 CTLTYPE_INT, "mclbytes", 307 SYSCTL_DESCR("mbuf cluster size"), 308 NULL, mclbytes, NULL, 0, 309 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 310 sysctl_createv(clog, 0, NULL, NULL, 311 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 312 CTLTYPE_INT, "nmbclusters", 313 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 314 sysctl_kern_mbuf, 0, &nmbclusters, 0, 315 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 316 sysctl_createv(clog, 0, NULL, NULL, 317 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 318 CTLTYPE_INT, "mblowat", 319 SYSCTL_DESCR("mbuf low water mark"), 320 sysctl_kern_mbuf, 0, &mblowat, 0, 321 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 322 sysctl_createv(clog, 0, NULL, NULL, 323 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 324 CTLTYPE_INT, "mcllowat", 325 SYSCTL_DESCR("mbuf cluster low water mark"), 326 sysctl_kern_mbuf, 0, &mcllowat, 0, 327 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 328 sysctl_createv(clog, 0, NULL, NULL, 329 CTLFLAG_PERMANENT, 330 CTLTYPE_STRUCT, "stats", 331 SYSCTL_DESCR("mbuf allocation statistics"), 332 NULL, 0, &mbstat, sizeof(mbstat), 333 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 334 #ifdef MBUFTRACE 335 sysctl_createv(clog, 0, NULL, NULL, 336 CTLFLAG_PERMANENT, 337 CTLTYPE_STRUCT, "mowners", 338 SYSCTL_DESCR("Information about mbuf owners"), 339 sysctl_kern_mbuf_mowners, 0, NULL, 0, 340 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 341 #endif /* MBUFTRACE */ 342 } 343 344 void * 345 mclpool_alloc(struct pool *pp, int flags) 346 { 347 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 348 349 return ((void *)uvm_km_alloc_poolpage1(mb_map, NULL, waitok)); 350 } 351 352 void 353 mclpool_release(struct pool *pp, void *v) 354 { 355 356 uvm_km_free_poolpage1(mb_map, (vaddr_t)v); 357 } 358 359 /*ARGSUSED*/ 360 static int 361 mb_ctor(void *arg, void *object, int flags) 362 { 363 struct mbuf *m = object; 364 365 #ifdef POOL_VTOPHYS 366 m->m_paddr = POOL_VTOPHYS(m); 367 #else 368 m->m_paddr = M_PADDR_INVALID; 369 #endif 370 return (0); 371 } 372 373 void 374 m_reclaim(void *arg, int flags) 375 { 376 struct domain *dp; 377 const struct protosw *pr; 378 struct ifnet *ifp; 379 int s = splvm(); 380 381 for (dp = domains; dp; dp = dp->dom_next) 382 for (pr = dp->dom_protosw; 383 pr < dp->dom_protoswNPROTOSW; pr++) 384 if (pr->pr_drain) 385 (*pr->pr_drain)(); 386 for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) 387 if (ifp->if_drain) 388 (*ifp->if_drain)(ifp); 389 splx(s); 390 mbstat.m_drain++; 391 } 392 393 /* 394 * Space allocation routines. 395 * These are also available as macros 396 * for critical paths. 397 */ 398 struct mbuf * 399 m_get(int nowait, int type) 400 { 401 struct mbuf *m; 402 403 MGET(m, nowait, type); 404 return (m); 405 } 406 407 struct mbuf * 408 m_gethdr(int nowait, int type) 409 { 410 struct mbuf *m; 411 412 MGETHDR(m, nowait, type); 413 return (m); 414 } 415 416 struct mbuf * 417 m_getclr(int nowait, int type) 418 { 419 struct mbuf *m; 420 421 MGET(m, nowait, type); 422 if (m == 0) 423 return (NULL); 424 memset(mtod(m, caddr_t), 0, MLEN); 425 return (m); 426 } 427 428 void 429 m_clget(struct mbuf *m, int nowait) 430 { 431 432 MCLGET(m, nowait); 433 } 434 435 struct mbuf * 436 m_free(struct mbuf *m) 437 { 438 struct mbuf *n; 439 440 MFREE(m, n); 441 return (n); 442 } 443 444 void 445 m_freem(struct mbuf *m) 446 { 447 struct mbuf *n; 448 449 if (m == NULL) 450 return; 451 do { 452 MFREE(m, n); 453 m = n; 454 } while (m); 455 } 456 457 #ifdef MBUFTRACE 458 /* 459 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 460 */ 461 void 462 m_claimm(struct mbuf *m, struct mowner *mo) 463 { 464 465 for (; m != NULL; m = m->m_next) 466 MCLAIM(m, mo); 467 } 468 #endif 469 470 /* 471 * Mbuffer utility routines. 472 */ 473 474 /* 475 * Lesser-used path for M_PREPEND: 476 * allocate new mbuf to prepend to chain, 477 * copy junk along. 478 */ 479 struct mbuf * 480 m_prepend(struct mbuf *m, int len, int how) 481 { 482 struct mbuf *mn; 483 484 MGET(mn, how, m->m_type); 485 if (mn == (struct mbuf *)NULL) { 486 m_freem(m); 487 return ((struct mbuf *)NULL); 488 } 489 if (m->m_flags & M_PKTHDR) { 490 M_COPY_PKTHDR(mn, m); 491 m_tag_delete_chain(m, NULL); 492 m->m_flags &= ~M_PKTHDR; 493 } else { 494 MCLAIM(mn, m->m_owner); 495 } 496 mn->m_next = m; 497 m = mn; 498 if (len < MHLEN) 499 MH_ALIGN(m, len); 500 m->m_len = len; 501 return (m); 502 } 503 504 /* 505 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 506 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 507 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 508 */ 509 int MCFail; 510 511 struct mbuf * 512 m_copym(struct mbuf *m, int off0, int len, int wait) 513 { 514 515 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 516 } 517 518 struct mbuf * 519 m_dup(struct mbuf *m, int off0, int len, int wait) 520 { 521 522 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 523 } 524 525 static struct mbuf * 526 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 527 { 528 struct mbuf *n, **np; 529 int off = off0; 530 struct mbuf *top; 531 int copyhdr = 0; 532 533 if (off < 0 || len < 0) 534 panic("m_copym: off %d, len %d", off, len); 535 if (off == 0 && m->m_flags & M_PKTHDR) 536 copyhdr = 1; 537 while (off > 0) { 538 if (m == 0) 539 panic("m_copym: m == 0"); 540 if (off < m->m_len) 541 break; 542 off -= m->m_len; 543 m = m->m_next; 544 } 545 np = ⊤ 546 top = 0; 547 while (len > 0) { 548 if (m == 0) { 549 if (len != M_COPYALL) 550 panic("m_copym: m == 0 and not COPYALL"); 551 break; 552 } 553 MGET(n, wait, m->m_type); 554 *np = n; 555 if (n == 0) 556 goto nospace; 557 MCLAIM(n, m->m_owner); 558 if (copyhdr) { 559 M_COPY_PKTHDR(n, m); 560 if (len == M_COPYALL) 561 n->m_pkthdr.len -= off0; 562 else 563 n->m_pkthdr.len = len; 564 copyhdr = 0; 565 } 566 n->m_len = min(len, m->m_len - off); 567 if (m->m_flags & M_EXT) { 568 if (!deep) { 569 n->m_data = m->m_data + off; 570 n->m_ext = m->m_ext; 571 MCLADDREFERENCE(m, n); 572 } else { 573 /* 574 * we are unsure about the way m was allocated. 575 * copy into multiple MCLBYTES cluster mbufs. 576 */ 577 MCLGET(n, wait); 578 n->m_len = 0; 579 n->m_len = M_TRAILINGSPACE(n); 580 n->m_len = min(n->m_len, len); 581 n->m_len = min(n->m_len, m->m_len - off); 582 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, 583 (unsigned)n->m_len); 584 } 585 } else 586 memcpy(mtod(n, caddr_t), mtod(m, caddr_t)+off, 587 (unsigned)n->m_len); 588 if (len != M_COPYALL) 589 len -= n->m_len; 590 off += n->m_len; 591 #ifdef DIAGNOSTIC 592 if (off > m->m_len) 593 panic("m_copym0 overrun"); 594 #endif 595 if (off == m->m_len) { 596 m = m->m_next; 597 off = 0; 598 } 599 np = &n->m_next; 600 } 601 if (top == 0) 602 MCFail++; 603 return (top); 604 nospace: 605 m_freem(top); 606 MCFail++; 607 return (NULL); 608 } 609 610 /* 611 * Copy an entire packet, including header (which must be present). 612 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 613 */ 614 struct mbuf * 615 m_copypacket(struct mbuf *m, int how) 616 { 617 struct mbuf *top, *n, *o; 618 619 MGET(n, how, m->m_type); 620 top = n; 621 if (!n) 622 goto nospace; 623 624 MCLAIM(n, m->m_owner); 625 M_COPY_PKTHDR(n, m); 626 n->m_len = m->m_len; 627 if (m->m_flags & M_EXT) { 628 n->m_data = m->m_data; 629 n->m_ext = m->m_ext; 630 MCLADDREFERENCE(m, n); 631 } else { 632 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 633 } 634 635 m = m->m_next; 636 while (m) { 637 MGET(o, how, m->m_type); 638 if (!o) 639 goto nospace; 640 641 MCLAIM(o, m->m_owner); 642 n->m_next = o; 643 n = n->m_next; 644 645 n->m_len = m->m_len; 646 if (m->m_flags & M_EXT) { 647 n->m_data = m->m_data; 648 n->m_ext = m->m_ext; 649 MCLADDREFERENCE(m, n); 650 } else { 651 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 652 } 653 654 m = m->m_next; 655 } 656 return top; 657 nospace: 658 m_freem(top); 659 MCFail++; 660 return NULL; 661 } 662 663 /* 664 * Copy data from an mbuf chain starting "off" bytes from the beginning, 665 * continuing for "len" bytes, into the indicated buffer. 666 */ 667 void 668 m_copydata(struct mbuf *m, int off, int len, void *vp) 669 { 670 unsigned count; 671 char *cp = vp; 672 673 if (off < 0 || len < 0) 674 panic("m_copydata"); 675 while (off > 0) { 676 if (m == 0) 677 panic("m_copydata"); 678 if (off < m->m_len) 679 break; 680 off -= m->m_len; 681 m = m->m_next; 682 } 683 while (len > 0) { 684 if (m == 0) 685 panic("m_copydata"); 686 count = min(m->m_len - off, len); 687 memcpy(cp, mtod(m, caddr_t) + off, count); 688 len -= count; 689 cp += count; 690 off = 0; 691 m = m->m_next; 692 } 693 } 694 695 /* 696 * Concatenate mbuf chain n to m. 697 * n might be copied into m (when n->m_len is small), therefore data portion of 698 * n could be copied into an mbuf of different mbuf type. 699 * Therefore both chains should be of the same type (e.g. MT_DATA). 700 * Any m_pkthdr is not updated. 701 */ 702 void 703 m_cat(struct mbuf *m, struct mbuf *n) 704 { 705 706 while (m->m_next) 707 m = m->m_next; 708 while (n) { 709 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 710 /* just join the two chains */ 711 m->m_next = n; 712 return; 713 } 714 KASSERT(n->m_len == 0 || m->m_type == n->m_type); 715 /* splat the data from one into the other */ 716 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), 717 (u_int)n->m_len); 718 m->m_len += n->m_len; 719 n = m_free(n); 720 } 721 } 722 723 void 724 m_adj(struct mbuf *mp, int req_len) 725 { 726 int len = req_len; 727 struct mbuf *m; 728 int count; 729 730 if ((m = mp) == NULL) 731 return; 732 if (len >= 0) { 733 /* 734 * Trim from head. 735 */ 736 while (m != NULL && len > 0) { 737 if (m->m_len <= len) { 738 len -= m->m_len; 739 m->m_len = 0; 740 m = m->m_next; 741 } else { 742 m->m_len -= len; 743 m->m_data += len; 744 len = 0; 745 } 746 } 747 m = mp; 748 if (mp->m_flags & M_PKTHDR) 749 m->m_pkthdr.len -= (req_len - len); 750 } else { 751 /* 752 * Trim from tail. Scan the mbuf chain, 753 * calculating its length and finding the last mbuf. 754 * If the adjustment only affects this mbuf, then just 755 * adjust and return. Otherwise, rescan and truncate 756 * after the remaining size. 757 */ 758 len = -len; 759 count = 0; 760 for (;;) { 761 count += m->m_len; 762 if (m->m_next == (struct mbuf *)0) 763 break; 764 m = m->m_next; 765 } 766 if (m->m_len >= len) { 767 m->m_len -= len; 768 if (mp->m_flags & M_PKTHDR) 769 mp->m_pkthdr.len -= len; 770 return; 771 } 772 count -= len; 773 if (count < 0) 774 count = 0; 775 /* 776 * Correct length for chain is "count". 777 * Find the mbuf with last data, adjust its length, 778 * and toss data from remaining mbufs on chain. 779 */ 780 m = mp; 781 if (m->m_flags & M_PKTHDR) 782 m->m_pkthdr.len = count; 783 for (; m; m = m->m_next) { 784 if (m->m_len >= count) { 785 m->m_len = count; 786 break; 787 } 788 count -= m->m_len; 789 } 790 while (m->m_next) 791 (m = m->m_next) ->m_len = 0; 792 } 793 } 794 795 /* 796 * Rearange an mbuf chain so that len bytes are contiguous 797 * and in the data area of an mbuf (so that mtod and dtom 798 * will work for a structure of size len). Returns the resulting 799 * mbuf chain on success, frees it and returns null on failure. 800 * If there is room, it will add up to max_protohdr-len extra bytes to the 801 * contiguous region in an attempt to avoid being called next time. 802 */ 803 int MPFail; 804 805 struct mbuf * 806 m_pullup(struct mbuf *n, int len) 807 { 808 struct mbuf *m; 809 int count; 810 int space; 811 812 /* 813 * If first mbuf has no cluster, and has room for len bytes 814 * without shifting current data, pullup into it, 815 * otherwise allocate a new mbuf to prepend to the chain. 816 */ 817 if ((n->m_flags & M_EXT) == 0 && 818 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 819 if (n->m_len >= len) 820 return (n); 821 m = n; 822 n = n->m_next; 823 len -= m->m_len; 824 } else { 825 if (len > MHLEN) 826 goto bad; 827 MGET(m, M_DONTWAIT, n->m_type); 828 if (m == 0) 829 goto bad; 830 MCLAIM(m, n->m_owner); 831 m->m_len = 0; 832 if (n->m_flags & M_PKTHDR) { 833 M_COPY_PKTHDR(m, n); 834 m_tag_delete_chain(n, NULL); 835 n->m_flags &= ~M_PKTHDR; 836 } 837 } 838 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 839 do { 840 count = min(min(max(len, max_protohdr), space), n->m_len); 841 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), 842 (unsigned)count); 843 len -= count; 844 m->m_len += count; 845 n->m_len -= count; 846 space -= count; 847 if (n->m_len) 848 n->m_data += count; 849 else 850 n = m_free(n); 851 } while (len > 0 && n); 852 if (len > 0) { 853 (void) m_free(m); 854 goto bad; 855 } 856 m->m_next = n; 857 return (m); 858 bad: 859 m_freem(n); 860 MPFail++; 861 return (NULL); 862 } 863 864 /* 865 * Like m_pullup(), except a new mbuf is always allocated, and we allow 866 * the amount of empty space before the data in the new mbuf to be specified 867 * (in the event that the caller expects to prepend later). 868 */ 869 int MSFail; 870 871 struct mbuf * 872 m_copyup(struct mbuf *n, int len, int dstoff) 873 { 874 struct mbuf *m; 875 int count, space; 876 877 if (len > (MHLEN - dstoff)) 878 goto bad; 879 MGET(m, M_DONTWAIT, n->m_type); 880 if (m == NULL) 881 goto bad; 882 MCLAIM(m, n->m_owner); 883 m->m_len = 0; 884 if (n->m_flags & M_PKTHDR) { 885 M_COPY_PKTHDR(m, n); 886 m_tag_delete_chain(m, NULL); 887 n->m_flags &= ~M_PKTHDR; 888 } 889 m->m_data += dstoff; 890 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 891 do { 892 count = min(min(max(len, max_protohdr), space), n->m_len); 893 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), 894 (unsigned)count); 895 len -= count; 896 m->m_len += count; 897 n->m_len -= count; 898 space -= count; 899 if (n->m_len) 900 n->m_data += count; 901 else 902 n = m_free(n); 903 } while (len > 0 && n); 904 if (len > 0) { 905 (void) m_free(m); 906 goto bad; 907 } 908 m->m_next = n; 909 return (m); 910 bad: 911 m_freem(n); 912 MSFail++; 913 return (NULL); 914 } 915 916 /* 917 * Partition an mbuf chain in two pieces, returning the tail -- 918 * all but the first len0 bytes. In case of failure, it returns NULL and 919 * attempts to restore the chain to its original state. 920 */ 921 struct mbuf * 922 m_split(struct mbuf *m0, int len0, int wait) 923 { 924 925 return m_split0(m0, len0, wait, 1); 926 } 927 928 static struct mbuf * 929 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 930 { 931 struct mbuf *m, *n; 932 unsigned len = len0, remain, len_save; 933 934 for (m = m0; m && len > m->m_len; m = m->m_next) 935 len -= m->m_len; 936 if (m == 0) 937 return (NULL); 938 remain = m->m_len - len; 939 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 940 MGETHDR(n, wait, m0->m_type); 941 if (n == 0) 942 return (NULL); 943 MCLAIM(m, m0->m_owner); 944 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 945 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 946 len_save = m0->m_pkthdr.len; 947 m0->m_pkthdr.len = len0; 948 if (m->m_flags & M_EXT) 949 goto extpacket; 950 if (remain > MHLEN) { 951 /* m can't be the lead packet */ 952 MH_ALIGN(n, 0); 953 n->m_next = m_split(m, len, wait); 954 if (n->m_next == 0) { 955 (void) m_free(n); 956 m0->m_pkthdr.len = len_save; 957 return (NULL); 958 } else 959 return (n); 960 } else 961 MH_ALIGN(n, remain); 962 } else if (remain == 0) { 963 n = m->m_next; 964 m->m_next = 0; 965 return (n); 966 } else { 967 MGET(n, wait, m->m_type); 968 if (n == 0) 969 return (NULL); 970 MCLAIM(n, m->m_owner); 971 M_ALIGN(n, remain); 972 } 973 extpacket: 974 if (m->m_flags & M_EXT) { 975 n->m_ext = m->m_ext; 976 MCLADDREFERENCE(m, n); 977 n->m_data = m->m_data + len; 978 } else { 979 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain); 980 } 981 n->m_len = remain; 982 m->m_len = len; 983 n->m_next = m->m_next; 984 m->m_next = 0; 985 return (n); 986 } 987 /* 988 * Routine to copy from device local memory into mbufs. 989 */ 990 struct mbuf * 991 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 992 void (*copy)(const void *from, void *to, size_t len)) 993 { 994 struct mbuf *m; 995 struct mbuf *top = 0, **mp = ⊤ 996 int off = off0, len; 997 char *cp; 998 char *epkt; 999 1000 cp = buf; 1001 epkt = cp + totlen; 1002 if (off) { 1003 /* 1004 * If 'off' is non-zero, packet is trailer-encapsulated, 1005 * so we have to skip the type and length fields. 1006 */ 1007 cp += off + 2 * sizeof(u_int16_t); 1008 totlen -= 2 * sizeof(u_int16_t); 1009 } 1010 MGETHDR(m, M_DONTWAIT, MT_DATA); 1011 if (m == 0) 1012 return (NULL); 1013 m->m_pkthdr.rcvif = ifp; 1014 m->m_pkthdr.len = totlen; 1015 m->m_len = MHLEN; 1016 1017 while (totlen > 0) { 1018 if (top) { 1019 MGET(m, M_DONTWAIT, MT_DATA); 1020 if (m == 0) { 1021 m_freem(top); 1022 return (NULL); 1023 } 1024 m->m_len = MLEN; 1025 } 1026 len = min(totlen, epkt - cp); 1027 if (len >= MINCLSIZE) { 1028 MCLGET(m, M_DONTWAIT); 1029 if ((m->m_flags & M_EXT) == 0) { 1030 m_free(m); 1031 m_freem(top); 1032 return (NULL); 1033 } 1034 m->m_len = len = min(len, MCLBYTES); 1035 } else { 1036 /* 1037 * Place initial small packet/header at end of mbuf. 1038 */ 1039 if (len < m->m_len) { 1040 if (top == 0 && len + max_linkhdr <= m->m_len) 1041 m->m_data += max_linkhdr; 1042 m->m_len = len; 1043 } else 1044 len = m->m_len; 1045 } 1046 if (copy) 1047 copy(cp, mtod(m, caddr_t), (size_t)len); 1048 else 1049 memcpy(mtod(m, caddr_t), cp, (size_t)len); 1050 cp += len; 1051 *mp = m; 1052 mp = &m->m_next; 1053 totlen -= len; 1054 if (cp == epkt) 1055 cp = buf; 1056 } 1057 return (top); 1058 } 1059 1060 /* 1061 * Copy data from a buffer back into the indicated mbuf chain, 1062 * starting "off" bytes from the beginning, extending the mbuf 1063 * chain if necessary. 1064 */ 1065 void 1066 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1067 { 1068 #if defined(DEBUG) 1069 struct mbuf *origm = m0; 1070 int error; 1071 #endif /* defined(DEBUG) */ 1072 1073 if (m0 == NULL) 1074 return; 1075 1076 #if defined(DEBUG) 1077 error = 1078 #endif /* defined(DEBUG) */ 1079 m_copyback0(&m0, off, len, cp, 1080 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1081 1082 #if defined(DEBUG) 1083 if (error != 0 || (m0 != NULL && origm != m0)) 1084 panic("m_copyback"); 1085 #endif /* defined(DEBUG) */ 1086 } 1087 1088 struct mbuf * 1089 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1090 { 1091 int error; 1092 1093 /* don't support chain expansion */ 1094 KDASSERT(off + len <= m_length(m0)); 1095 1096 error = m_copyback0(&m0, off, len, cp, 1097 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1098 if (error) { 1099 /* 1100 * no way to recover from partial success. 1101 * just free the chain. 1102 */ 1103 m_freem(m0); 1104 return NULL; 1105 } 1106 return m0; 1107 } 1108 1109 /* 1110 * m_makewritable: ensure the specified range writable. 1111 */ 1112 int 1113 m_makewritable(struct mbuf **mp, int off, int len, int how) 1114 { 1115 int error; 1116 #if defined(DEBUG) 1117 struct mbuf *n; 1118 int origlen, reslen; 1119 1120 origlen = m_length(*mp); 1121 #endif /* defined(DEBUG) */ 1122 1123 #if 0 /* M_COPYALL is large enough */ 1124 if (len == M_COPYALL) 1125 len = m_length(*mp) - off; /* XXX */ 1126 #endif 1127 1128 error = m_copyback0(mp, off, len, NULL, 1129 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1130 1131 #if defined(DEBUG) 1132 reslen = 0; 1133 for (n = *mp; n; n = n->m_next) 1134 reslen += n->m_len; 1135 if (origlen != reslen) 1136 panic("m_makewritable: length changed"); 1137 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1138 panic("m_makewritable: inconsist"); 1139 #endif /* defined(DEBUG) */ 1140 1141 return error; 1142 } 1143 1144 int 1145 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1146 int how) 1147 { 1148 int mlen; 1149 struct mbuf *m, *n; 1150 struct mbuf **mp; 1151 int totlen = 0; 1152 const char *cp = vp; 1153 1154 KASSERT(mp0 != NULL); 1155 KASSERT(*mp0 != NULL); 1156 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1157 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1158 1159 mp = mp0; 1160 m = *mp; 1161 while (off > (mlen = m->m_len)) { 1162 off -= mlen; 1163 totlen += mlen; 1164 if (m->m_next == 0) { 1165 if ((flags & M_COPYBACK0_EXTEND) == 0) 1166 goto out; 1167 n = m_getclr(how, m->m_type); 1168 if (n == 0) 1169 goto out; 1170 n->m_len = min(MLEN, len + off); 1171 m->m_next = n; 1172 } 1173 mp = &m->m_next; 1174 m = m->m_next; 1175 } 1176 while (len > 0) { 1177 mlen = m->m_len - off; 1178 if (mlen != 0 && M_READONLY(m)) { 1179 char *datap; 1180 int eatlen; 1181 1182 /* 1183 * this mbuf is read-only. 1184 * allocate a new writable mbuf and try again. 1185 */ 1186 1187 #if defined(DIAGNOSTIC) 1188 if ((flags & M_COPYBACK0_COW) == 0) 1189 panic("m_copyback0: read-only"); 1190 #endif /* defined(DIAGNOSTIC) */ 1191 1192 /* 1193 * if we're going to write into the middle of 1194 * a mbuf, split it first. 1195 */ 1196 if (off > 0 && len < mlen) { 1197 n = m_split0(m, off, how, 0); 1198 if (n == NULL) 1199 goto enobufs; 1200 m->m_next = n; 1201 mp = &m->m_next; 1202 m = n; 1203 off = 0; 1204 continue; 1205 } 1206 1207 /* 1208 * XXX TODO coalesce into the trailingspace of 1209 * the previous mbuf when possible. 1210 */ 1211 1212 /* 1213 * allocate a new mbuf. copy packet header if needed. 1214 */ 1215 MGET(n, how, m->m_type); 1216 if (n == NULL) 1217 goto enobufs; 1218 MCLAIM(n, m->m_owner); 1219 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1220 /* XXX M_MOVE_PKTHDR */ 1221 M_COPY_PKTHDR(n, m); 1222 n->m_len = MHLEN; 1223 } else { 1224 if (len >= MINCLSIZE) 1225 MCLGET(n, M_DONTWAIT); 1226 n->m_len = 1227 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1228 } 1229 if (n->m_len > len) 1230 n->m_len = len; 1231 1232 /* 1233 * free the region which has been overwritten. 1234 * copying data from old mbufs if requested. 1235 */ 1236 if (flags & M_COPYBACK0_PRESERVE) 1237 datap = mtod(n, char *); 1238 else 1239 datap = NULL; 1240 eatlen = n->m_len; 1241 KDASSERT(off == 0 || eatlen >= mlen); 1242 if (off > 0) { 1243 KDASSERT(len >= mlen); 1244 m->m_len = off; 1245 m->m_next = n; 1246 if (datap) { 1247 m_copydata(m, off, mlen, datap); 1248 datap += mlen; 1249 } 1250 eatlen -= mlen; 1251 mp = &m->m_next; 1252 m = m->m_next; 1253 } 1254 while (m != NULL && M_READONLY(m) && 1255 n->m_type == m->m_type && eatlen > 0) { 1256 mlen = min(eatlen, m->m_len); 1257 if (datap) { 1258 m_copydata(m, 0, mlen, datap); 1259 datap += mlen; 1260 } 1261 m->m_data += mlen; 1262 m->m_len -= mlen; 1263 eatlen -= mlen; 1264 if (m->m_len == 0) 1265 *mp = m = m_free(m); 1266 } 1267 if (eatlen > 0) 1268 n->m_len -= eatlen; 1269 n->m_next = m; 1270 *mp = m = n; 1271 continue; 1272 } 1273 mlen = min(mlen, len); 1274 if (flags & M_COPYBACK0_COPYBACK) { 1275 memcpy(mtod(m, caddr_t) + off, cp, (unsigned)mlen); 1276 cp += mlen; 1277 } 1278 len -= mlen; 1279 mlen += off; 1280 off = 0; 1281 totlen += mlen; 1282 if (len == 0) 1283 break; 1284 if (m->m_next == 0) { 1285 if ((flags & M_COPYBACK0_EXTEND) == 0) 1286 goto out; 1287 n = m_get(how, m->m_type); 1288 if (n == 0) 1289 break; 1290 n->m_len = min(MLEN, len); 1291 m->m_next = n; 1292 } 1293 mp = &m->m_next; 1294 m = m->m_next; 1295 } 1296 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1297 m->m_pkthdr.len = totlen; 1298 1299 return 0; 1300 1301 enobufs: 1302 return ENOBUFS; 1303 } 1304 1305 /* 1306 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1307 * beginning, continuing for "len" bytes. 1308 */ 1309 int 1310 m_apply(struct mbuf *m, int off, int len, 1311 int (*f)(void *, caddr_t, unsigned int), void *arg) 1312 { 1313 unsigned int count; 1314 int rval; 1315 1316 KASSERT(len >= 0); 1317 KASSERT(off >= 0); 1318 1319 while (off > 0) { 1320 KASSERT(m != NULL); 1321 if (off < m->m_len) 1322 break; 1323 off -= m->m_len; 1324 m = m->m_next; 1325 } 1326 while (len > 0) { 1327 KASSERT(m != NULL); 1328 count = min(m->m_len - off, len); 1329 1330 rval = (*f)(arg, mtod(m, caddr_t) + off, count); 1331 if (rval) 1332 return (rval); 1333 1334 len -= count; 1335 off = 0; 1336 m = m->m_next; 1337 } 1338 1339 return (0); 1340 } 1341 1342 /* 1343 * Return a pointer to mbuf/offset of location in mbuf chain. 1344 */ 1345 struct mbuf * 1346 m_getptr(struct mbuf *m, int loc, int *off) 1347 { 1348 1349 while (loc >= 0) { 1350 /* Normal end of search */ 1351 if (m->m_len > loc) { 1352 *off = loc; 1353 return (m); 1354 } else { 1355 loc -= m->m_len; 1356 1357 if (m->m_next == NULL) { 1358 if (loc == 0) { 1359 /* Point at the end of valid data */ 1360 *off = m->m_len; 1361 return (m); 1362 } else 1363 return (NULL); 1364 } else 1365 m = m->m_next; 1366 } 1367 } 1368 1369 return (NULL); 1370 } 1371