1 /* $NetBSD: uipc_mbuf.c,v 1.148 2013/01/19 00:51:52 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.148 2013/01/19 00:51:52 rmind Exp $"); 66 67 #include "opt_mbuftrace.h" 68 #include "opt_nmbclusters.h" 69 #include "opt_ddb.h" 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/atomic.h> 74 #include <sys/cpu.h> 75 #include <sys/proc.h> 76 #include <sys/mbuf.h> 77 #include <sys/kernel.h> 78 #include <sys/syslog.h> 79 #include <sys/domain.h> 80 #include <sys/protosw.h> 81 #include <sys/percpu.h> 82 #include <sys/pool.h> 83 #include <sys/socket.h> 84 #include <sys/sysctl.h> 85 86 #include <net/if.h> 87 88 pool_cache_t mb_cache; /* mbuf cache */ 89 pool_cache_t mcl_cache; /* mbuf cluster cache */ 90 91 struct mbstat mbstat; 92 int max_linkhdr; 93 int max_protohdr; 94 int max_hdr; 95 int max_datalen; 96 97 static int mb_ctor(void *, void *, int); 98 99 static void sysctl_kern_mbuf_setup(void); 100 101 static struct sysctllog *mbuf_sysctllog; 102 103 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 104 static struct mbuf *m_split0(struct mbuf *, int, int, int); 105 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 106 107 /* flags for m_copyback0 */ 108 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 109 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 110 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 111 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 112 113 static const char mclpool_warnmsg[] = 114 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 115 116 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 117 118 static percpu_t *mbstat_percpu; 119 120 #ifdef MBUFTRACE 121 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 122 struct mowner unknown_mowners[] = { 123 MOWNER_INIT("unknown", "free"), 124 MOWNER_INIT("unknown", "data"), 125 MOWNER_INIT("unknown", "header"), 126 MOWNER_INIT("unknown", "soname"), 127 MOWNER_INIT("unknown", "soopts"), 128 MOWNER_INIT("unknown", "ftable"), 129 MOWNER_INIT("unknown", "control"), 130 MOWNER_INIT("unknown", "oobdata"), 131 }; 132 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 133 #endif 134 135 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 136 137 #define MCLADDREFERENCE(o, n) \ 138 do { \ 139 KASSERT(((o)->m_flags & M_EXT) != 0); \ 140 KASSERT(((n)->m_flags & M_EXT) == 0); \ 141 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 142 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 143 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 144 (n)->m_ext_ref = (o)->m_ext_ref; \ 145 mowner_ref((n), (n)->m_flags); \ 146 MCLREFDEBUGN((n), __FILE__, __LINE__); \ 147 } while (/* CONSTCOND */ 0) 148 149 static int 150 nmbclusters_limit(void) 151 { 152 #if defined(PMAP_MAP_POOLPAGE) 153 /* direct mapping, doesn't use space in kmem_arena */ 154 vsize_t max_size = physmem / 4; 155 #else 156 vsize_t max_size = MIN(physmem / 4, nkmempages / 4); 157 #endif 158 159 max_size = max_size * PAGE_SIZE / MCLBYTES; 160 #ifdef NMBCLUSTERS_MAX 161 max_size = MIN(max_size, NMBCLUSTERS_MAX); 162 #endif 163 164 #ifdef NMBCLUSTERS 165 return MIN(max_size, NMBCLUSTERS); 166 #else 167 return max_size; 168 #endif 169 } 170 171 /* 172 * Initialize the mbuf allocator. 173 */ 174 void 175 mbinit(void) 176 { 177 178 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 179 CTASSERT(sizeof(struct mbuf) == MSIZE); 180 181 sysctl_kern_mbuf_setup(); 182 183 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 184 NULL, IPL_VM, mb_ctor, NULL, NULL); 185 KASSERT(mb_cache != NULL); 186 187 mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", NULL, 188 IPL_VM, NULL, NULL, NULL); 189 KASSERT(mcl_cache != NULL); 190 191 pool_cache_set_drain_hook(mb_cache, m_reclaim, NULL); 192 pool_cache_set_drain_hook(mcl_cache, m_reclaim, NULL); 193 194 /* 195 * Set an arbitrary default limit on the number of mbuf clusters. 196 */ 197 #ifdef NMBCLUSTERS 198 nmbclusters = nmbclusters_limit(); 199 #else 200 nmbclusters = MAX(1024, 201 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 202 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 203 #endif 204 205 /* 206 * Set the hard limit on the mclpool to the number of 207 * mbuf clusters the kernel is to support. Log the limit 208 * reached message max once a minute. 209 */ 210 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 211 212 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 213 214 /* 215 * Set a low water mark for both mbufs and clusters. This should 216 * help ensure that they can be allocated in a memory starvation 217 * situation. This is important for e.g. diskless systems which 218 * must allocate mbufs in order for the pagedaemon to clean pages. 219 */ 220 pool_cache_setlowat(mb_cache, mblowat); 221 pool_cache_setlowat(mcl_cache, mcllowat); 222 223 #ifdef MBUFTRACE 224 { 225 /* 226 * Attach the unknown mowners. 227 */ 228 int i; 229 MOWNER_ATTACH(&revoked_mowner); 230 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 231 i-- > 0; ) 232 MOWNER_ATTACH(&unknown_mowners[i]); 233 } 234 #endif 235 } 236 237 /* 238 * sysctl helper routine for the kern.mbuf subtree. 239 * nmbclusters, mblowat and mcllowat need range 240 * checking and pool tweaking after being reset. 241 */ 242 static int 243 sysctl_kern_mbuf(SYSCTLFN_ARGS) 244 { 245 int error, newval; 246 struct sysctlnode node; 247 248 node = *rnode; 249 node.sysctl_data = &newval; 250 switch (rnode->sysctl_num) { 251 case MBUF_NMBCLUSTERS: 252 case MBUF_MBLOWAT: 253 case MBUF_MCLLOWAT: 254 newval = *(int*)rnode->sysctl_data; 255 break; 256 default: 257 return (EOPNOTSUPP); 258 } 259 260 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 261 if (error || newp == NULL) 262 return (error); 263 if (newval < 0) 264 return (EINVAL); 265 266 switch (node.sysctl_num) { 267 case MBUF_NMBCLUSTERS: 268 if (newval < nmbclusters) 269 return (EINVAL); 270 if (newval > nmbclusters_limit()) 271 return (EINVAL); 272 nmbclusters = newval; 273 pool_cache_sethardlimit(mcl_cache, nmbclusters, 274 mclpool_warnmsg, 60); 275 break; 276 case MBUF_MBLOWAT: 277 mblowat = newval; 278 pool_cache_setlowat(mb_cache, mblowat); 279 break; 280 case MBUF_MCLLOWAT: 281 mcllowat = newval; 282 pool_cache_setlowat(mcl_cache, mcllowat); 283 break; 284 } 285 286 return (0); 287 } 288 289 #ifdef MBUFTRACE 290 static void 291 mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 292 { 293 struct mowner_counter *mc = v1; 294 struct mowner_user *mo_user = v2; 295 int i; 296 297 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 298 mo_user->mo_counter[i] += mc->mc_counter[i]; 299 } 300 } 301 302 static void 303 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 304 { 305 306 memset(mo_user, 0, sizeof(*mo_user)); 307 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 308 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 309 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 310 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 311 percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); 312 } 313 314 static int 315 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 316 { 317 struct mowner *mo; 318 size_t len = 0; 319 int error = 0; 320 321 if (namelen != 0) 322 return (EINVAL); 323 if (newp != NULL) 324 return (EPERM); 325 326 LIST_FOREACH(mo, &mowners, mo_link) { 327 struct mowner_user mo_user; 328 329 mowner_convert_to_user(mo, &mo_user); 330 331 if (oldp != NULL) { 332 if (*oldlenp - len < sizeof(mo_user)) { 333 error = ENOMEM; 334 break; 335 } 336 error = copyout(&mo_user, (char *)oldp + len, 337 sizeof(mo_user)); 338 if (error) 339 break; 340 } 341 len += sizeof(mo_user); 342 } 343 344 if (error == 0) 345 *oldlenp = len; 346 347 return (error); 348 } 349 #endif /* MBUFTRACE */ 350 351 static void 352 mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 353 { 354 struct mbstat_cpu *mbsc = v1; 355 struct mbstat *mbs = v2; 356 int i; 357 358 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 359 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 360 } 361 } 362 363 static void 364 mbstat_convert_to_user(struct mbstat *mbs) 365 { 366 367 memset(mbs, 0, sizeof(*mbs)); 368 mbs->m_drain = mbstat.m_drain; 369 percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); 370 } 371 372 static int 373 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 374 { 375 struct sysctlnode node; 376 struct mbstat mbs; 377 378 mbstat_convert_to_user(&mbs); 379 node = *rnode; 380 node.sysctl_data = &mbs; 381 node.sysctl_size = sizeof(mbs); 382 return sysctl_lookup(SYSCTLFN_CALL(&node)); 383 } 384 385 static void 386 sysctl_kern_mbuf_setup(void) 387 { 388 389 KASSERT(mbuf_sysctllog == NULL); 390 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 391 CTLFLAG_PERMANENT, 392 CTLTYPE_NODE, "kern", NULL, 393 NULL, 0, NULL, 0, 394 CTL_KERN, CTL_EOL); 395 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 396 CTLFLAG_PERMANENT, 397 CTLTYPE_NODE, "mbuf", 398 SYSCTL_DESCR("mbuf control variables"), 399 NULL, 0, NULL, 0, 400 CTL_KERN, KERN_MBUF, CTL_EOL); 401 402 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 403 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 404 CTLTYPE_INT, "msize", 405 SYSCTL_DESCR("mbuf base size"), 406 NULL, msize, NULL, 0, 407 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 408 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 409 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 410 CTLTYPE_INT, "mclbytes", 411 SYSCTL_DESCR("mbuf cluster size"), 412 NULL, mclbytes, NULL, 0, 413 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 414 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 415 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 416 CTLTYPE_INT, "nmbclusters", 417 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 418 sysctl_kern_mbuf, 0, &nmbclusters, 0, 419 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 420 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 421 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 422 CTLTYPE_INT, "mblowat", 423 SYSCTL_DESCR("mbuf low water mark"), 424 sysctl_kern_mbuf, 0, &mblowat, 0, 425 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 426 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 427 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 428 CTLTYPE_INT, "mcllowat", 429 SYSCTL_DESCR("mbuf cluster low water mark"), 430 sysctl_kern_mbuf, 0, &mcllowat, 0, 431 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 432 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 433 CTLFLAG_PERMANENT, 434 CTLTYPE_STRUCT, "stats", 435 SYSCTL_DESCR("mbuf allocation statistics"), 436 sysctl_kern_mbuf_stats, 0, NULL, 0, 437 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 438 #ifdef MBUFTRACE 439 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 440 CTLFLAG_PERMANENT, 441 CTLTYPE_STRUCT, "mowners", 442 SYSCTL_DESCR("Information about mbuf owners"), 443 sysctl_kern_mbuf_mowners, 0, NULL, 0, 444 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 445 #endif /* MBUFTRACE */ 446 } 447 448 static int 449 mb_ctor(void *arg, void *object, int flags) 450 { 451 struct mbuf *m = object; 452 453 #ifdef POOL_VTOPHYS 454 m->m_paddr = POOL_VTOPHYS(m); 455 #else 456 m->m_paddr = M_PADDR_INVALID; 457 #endif 458 return (0); 459 } 460 461 void 462 m_reclaim(void *arg, int flags) 463 { 464 struct domain *dp; 465 const struct protosw *pr; 466 struct ifnet *ifp; 467 int s; 468 469 KERNEL_LOCK(1, NULL); 470 s = splvm(); 471 DOMAIN_FOREACH(dp) { 472 for (pr = dp->dom_protosw; 473 pr < dp->dom_protoswNPROTOSW; pr++) 474 if (pr->pr_drain) 475 (*pr->pr_drain)(); 476 } 477 IFNET_FOREACH(ifp) { 478 if (ifp->if_drain) 479 (*ifp->if_drain)(ifp); 480 } 481 splx(s); 482 mbstat.m_drain++; 483 KERNEL_UNLOCK_ONE(NULL); 484 } 485 486 /* 487 * Space allocation routines. 488 * These are also available as macros 489 * for critical paths. 490 */ 491 struct mbuf * 492 m_get(int nowait, int type) 493 { 494 struct mbuf *m; 495 496 KASSERT(type != MT_FREE); 497 498 m = pool_cache_get(mb_cache, 499 nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); 500 if (m == NULL) 501 return NULL; 502 503 mbstat_type_add(type, 1); 504 mowner_init(m, type); 505 m->m_ext_ref = m; 506 m->m_type = type; 507 m->m_next = NULL; 508 m->m_nextpkt = NULL; 509 m->m_data = m->m_dat; 510 m->m_flags = 0; 511 512 return m; 513 } 514 515 struct mbuf * 516 m_gethdr(int nowait, int type) 517 { 518 struct mbuf *m; 519 520 m = m_get(nowait, type); 521 if (m == NULL) 522 return NULL; 523 524 m->m_data = m->m_pktdat; 525 m->m_flags = M_PKTHDR; 526 m->m_pkthdr.rcvif = NULL; 527 m->m_pkthdr.csum_flags = 0; 528 m->m_pkthdr.csum_data = 0; 529 SLIST_INIT(&m->m_pkthdr.tags); 530 531 return m; 532 } 533 534 struct mbuf * 535 m_getclr(int nowait, int type) 536 { 537 struct mbuf *m; 538 539 MGET(m, nowait, type); 540 if (m == 0) 541 return (NULL); 542 memset(mtod(m, void *), 0, MLEN); 543 return (m); 544 } 545 546 void 547 m_clget(struct mbuf *m, int nowait) 548 { 549 550 MCLGET(m, nowait); 551 } 552 553 struct mbuf * 554 m_free(struct mbuf *m) 555 { 556 struct mbuf *n; 557 558 MFREE(m, n); 559 return (n); 560 } 561 562 void 563 m_freem(struct mbuf *m) 564 { 565 struct mbuf *n; 566 567 if (m == NULL) 568 return; 569 do { 570 MFREE(m, n); 571 m = n; 572 } while (m); 573 } 574 575 #ifdef MBUFTRACE 576 /* 577 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 578 */ 579 void 580 m_claimm(struct mbuf *m, struct mowner *mo) 581 { 582 583 for (; m != NULL; m = m->m_next) 584 MCLAIM(m, mo); 585 } 586 #endif 587 588 /* 589 * Mbuffer utility routines. 590 */ 591 592 /* 593 * Lesser-used path for M_PREPEND: 594 * allocate new mbuf to prepend to chain, 595 * copy junk along. 596 */ 597 struct mbuf * 598 m_prepend(struct mbuf *m, int len, int how) 599 { 600 struct mbuf *mn; 601 602 MGET(mn, how, m->m_type); 603 if (mn == NULL) { 604 m_freem(m); 605 return (NULL); 606 } 607 if (m->m_flags & M_PKTHDR) { 608 M_MOVE_PKTHDR(mn, m); 609 } else { 610 MCLAIM(mn, m->m_owner); 611 } 612 mn->m_next = m; 613 m = mn; 614 if (len < MHLEN) 615 MH_ALIGN(m, len); 616 m->m_len = len; 617 return (m); 618 } 619 620 /* 621 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 622 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 623 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 624 */ 625 int MCFail; 626 627 struct mbuf * 628 m_copym(struct mbuf *m, int off0, int len, int wait) 629 { 630 631 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 632 } 633 634 struct mbuf * 635 m_dup(struct mbuf *m, int off0, int len, int wait) 636 { 637 638 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 639 } 640 641 static struct mbuf * 642 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 643 { 644 struct mbuf *n, **np; 645 int off = off0; 646 struct mbuf *top; 647 int copyhdr = 0; 648 649 if (off < 0 || len < 0) 650 panic("m_copym: off %d, len %d", off, len); 651 if (off == 0 && m->m_flags & M_PKTHDR) 652 copyhdr = 1; 653 while (off > 0) { 654 if (m == 0) 655 panic("m_copym: m == 0, off %d", off); 656 if (off < m->m_len) 657 break; 658 off -= m->m_len; 659 m = m->m_next; 660 } 661 np = ⊤ 662 top = 0; 663 while (len > 0) { 664 if (m == 0) { 665 if (len != M_COPYALL) 666 panic("m_copym: m == 0, len %d [!COPYALL]", 667 len); 668 break; 669 } 670 MGET(n, wait, m->m_type); 671 *np = n; 672 if (n == 0) 673 goto nospace; 674 MCLAIM(n, m->m_owner); 675 if (copyhdr) { 676 M_COPY_PKTHDR(n, m); 677 if (len == M_COPYALL) 678 n->m_pkthdr.len -= off0; 679 else 680 n->m_pkthdr.len = len; 681 copyhdr = 0; 682 } 683 n->m_len = min(len, m->m_len - off); 684 if (m->m_flags & M_EXT) { 685 if (!deep) { 686 n->m_data = m->m_data + off; 687 MCLADDREFERENCE(m, n); 688 } else { 689 /* 690 * we are unsure about the way m was allocated. 691 * copy into multiple MCLBYTES cluster mbufs. 692 */ 693 MCLGET(n, wait); 694 n->m_len = 0; 695 n->m_len = M_TRAILINGSPACE(n); 696 n->m_len = min(n->m_len, len); 697 n->m_len = min(n->m_len, m->m_len - off); 698 memcpy(mtod(n, void *), mtod(m, char *) + off, 699 (unsigned)n->m_len); 700 } 701 } else 702 memcpy(mtod(n, void *), mtod(m, char *) + off, 703 (unsigned)n->m_len); 704 if (len != M_COPYALL) 705 len -= n->m_len; 706 off += n->m_len; 707 #ifdef DIAGNOSTIC 708 if (off > m->m_len) 709 panic("m_copym0 overrun"); 710 #endif 711 if (off == m->m_len) { 712 m = m->m_next; 713 off = 0; 714 } 715 np = &n->m_next; 716 } 717 if (top == 0) 718 MCFail++; 719 return (top); 720 nospace: 721 m_freem(top); 722 MCFail++; 723 return (NULL); 724 } 725 726 /* 727 * Copy an entire packet, including header (which must be present). 728 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 729 */ 730 struct mbuf * 731 m_copypacket(struct mbuf *m, int how) 732 { 733 struct mbuf *top, *n, *o; 734 735 MGET(n, how, m->m_type); 736 top = n; 737 if (!n) 738 goto nospace; 739 740 MCLAIM(n, m->m_owner); 741 M_COPY_PKTHDR(n, m); 742 n->m_len = m->m_len; 743 if (m->m_flags & M_EXT) { 744 n->m_data = m->m_data; 745 MCLADDREFERENCE(m, n); 746 } else { 747 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 748 } 749 750 m = m->m_next; 751 while (m) { 752 MGET(o, how, m->m_type); 753 if (!o) 754 goto nospace; 755 756 MCLAIM(o, m->m_owner); 757 n->m_next = o; 758 n = n->m_next; 759 760 n->m_len = m->m_len; 761 if (m->m_flags & M_EXT) { 762 n->m_data = m->m_data; 763 MCLADDREFERENCE(m, n); 764 } else { 765 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 766 } 767 768 m = m->m_next; 769 } 770 return top; 771 nospace: 772 m_freem(top); 773 MCFail++; 774 return NULL; 775 } 776 777 /* 778 * Copy data from an mbuf chain starting "off" bytes from the beginning, 779 * continuing for "len" bytes, into the indicated buffer. 780 */ 781 void 782 m_copydata(struct mbuf *m, int off, int len, void *vp) 783 { 784 unsigned count; 785 void * cp = vp; 786 787 if (off < 0 || len < 0) 788 panic("m_copydata: off %d, len %d", off, len); 789 while (off > 0) { 790 if (m == NULL) 791 panic("m_copydata: m == NULL, off %d", off); 792 if (off < m->m_len) 793 break; 794 off -= m->m_len; 795 m = m->m_next; 796 } 797 while (len > 0) { 798 if (m == NULL) 799 panic("m_copydata: m == NULL, len %d", len); 800 count = min(m->m_len - off, len); 801 memcpy(cp, mtod(m, char *) + off, count); 802 len -= count; 803 cp = (char *)cp + count; 804 off = 0; 805 m = m->m_next; 806 } 807 } 808 809 /* 810 * Concatenate mbuf chain n to m. 811 * n might be copied into m (when n->m_len is small), therefore data portion of 812 * n could be copied into an mbuf of different mbuf type. 813 * Any m_pkthdr is not updated. 814 */ 815 void 816 m_cat(struct mbuf *m, struct mbuf *n) 817 { 818 819 while (m->m_next) 820 m = m->m_next; 821 while (n) { 822 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 823 /* just join the two chains */ 824 m->m_next = n; 825 return; 826 } 827 /* splat the data from one into the other */ 828 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 829 (u_int)n->m_len); 830 m->m_len += n->m_len; 831 n = m_free(n); 832 } 833 } 834 835 void 836 m_adj(struct mbuf *mp, int req_len) 837 { 838 int len = req_len; 839 struct mbuf *m; 840 int count; 841 842 if ((m = mp) == NULL) 843 return; 844 if (len >= 0) { 845 /* 846 * Trim from head. 847 */ 848 while (m != NULL && len > 0) { 849 if (m->m_len <= len) { 850 len -= m->m_len; 851 m->m_len = 0; 852 m = m->m_next; 853 } else { 854 m->m_len -= len; 855 m->m_data += len; 856 len = 0; 857 } 858 } 859 m = mp; 860 if (mp->m_flags & M_PKTHDR) 861 m->m_pkthdr.len -= (req_len - len); 862 } else { 863 /* 864 * Trim from tail. Scan the mbuf chain, 865 * calculating its length and finding the last mbuf. 866 * If the adjustment only affects this mbuf, then just 867 * adjust and return. Otherwise, rescan and truncate 868 * after the remaining size. 869 */ 870 len = -len; 871 count = 0; 872 for (;;) { 873 count += m->m_len; 874 if (m->m_next == (struct mbuf *)0) 875 break; 876 m = m->m_next; 877 } 878 if (m->m_len >= len) { 879 m->m_len -= len; 880 if (mp->m_flags & M_PKTHDR) 881 mp->m_pkthdr.len -= len; 882 return; 883 } 884 count -= len; 885 if (count < 0) 886 count = 0; 887 /* 888 * Correct length for chain is "count". 889 * Find the mbuf with last data, adjust its length, 890 * and toss data from remaining mbufs on chain. 891 */ 892 m = mp; 893 if (m->m_flags & M_PKTHDR) 894 m->m_pkthdr.len = count; 895 for (; m; m = m->m_next) { 896 if (m->m_len >= count) { 897 m->m_len = count; 898 break; 899 } 900 count -= m->m_len; 901 } 902 if (m) 903 while (m->m_next) 904 (m = m->m_next)->m_len = 0; 905 } 906 } 907 908 /* 909 * m_ensure_contig: rearrange an mbuf chain that given length of bytes 910 * would be contiguous and in the data area of an mbuf (therefore, mtod() 911 * would work for a structure of given length). 912 * 913 * => On success, returns true and the resulting mbuf chain; false otherwise. 914 * => The mbuf chain may change, but is always preserved valid. 915 */ 916 bool 917 m_ensure_contig(struct mbuf **m0, int len) 918 { 919 struct mbuf *n = *m0, *m; 920 size_t count, space; 921 922 /* 923 * If first mbuf has no cluster, and has room for len bytes 924 * without shifting current data, pullup into it, 925 * otherwise allocate a new mbuf to prepend to the chain. 926 */ 927 if ((n->m_flags & M_EXT) == 0 && 928 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 929 if (n->m_len >= len) { 930 return true; 931 } 932 m = n; 933 n = n->m_next; 934 len -= m->m_len; 935 } else { 936 if (len > MHLEN) { 937 return false; 938 } 939 MGET(m, M_DONTWAIT, n->m_type); 940 if (m == NULL) { 941 return false; 942 } 943 MCLAIM(m, n->m_owner); 944 m->m_len = 0; 945 if (n->m_flags & M_PKTHDR) { 946 M_MOVE_PKTHDR(m, n); 947 } 948 } 949 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 950 do { 951 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len); 952 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 953 (unsigned)count); 954 len -= count; 955 m->m_len += count; 956 n->m_len -= count; 957 space -= count; 958 if (n->m_len) 959 n->m_data += count; 960 else 961 n = m_free(n); 962 } while (len > 0 && n); 963 964 m->m_next = n; 965 *m0 = m; 966 967 return len <= 0; 968 } 969 970 /* 971 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error. 972 */ 973 int MPFail; 974 975 struct mbuf * 976 m_pullup(struct mbuf *n, int len) 977 { 978 struct mbuf *m = n; 979 980 if (!m_ensure_contig(&m, len)) { 981 KASSERT(m != NULL); 982 m_freem(m); 983 MPFail++; 984 m = NULL; 985 } 986 return m; 987 } 988 989 /* 990 * Like m_pullup(), except a new mbuf is always allocated, and we allow 991 * the amount of empty space before the data in the new mbuf to be specified 992 * (in the event that the caller expects to prepend later). 993 */ 994 int MSFail; 995 996 struct mbuf * 997 m_copyup(struct mbuf *n, int len, int dstoff) 998 { 999 struct mbuf *m; 1000 int count, space; 1001 1002 if (len > (MHLEN - dstoff)) 1003 goto bad; 1004 MGET(m, M_DONTWAIT, n->m_type); 1005 if (m == NULL) 1006 goto bad; 1007 MCLAIM(m, n->m_owner); 1008 m->m_len = 0; 1009 if (n->m_flags & M_PKTHDR) { 1010 M_MOVE_PKTHDR(m, n); 1011 } 1012 m->m_data += dstoff; 1013 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1014 do { 1015 count = min(min(max(len, max_protohdr), space), n->m_len); 1016 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1017 (unsigned)count); 1018 len -= count; 1019 m->m_len += count; 1020 n->m_len -= count; 1021 space -= count; 1022 if (n->m_len) 1023 n->m_data += count; 1024 else 1025 n = m_free(n); 1026 } while (len > 0 && n); 1027 if (len > 0) { 1028 (void) m_free(m); 1029 goto bad; 1030 } 1031 m->m_next = n; 1032 return (m); 1033 bad: 1034 m_freem(n); 1035 MSFail++; 1036 return (NULL); 1037 } 1038 1039 /* 1040 * Partition an mbuf chain in two pieces, returning the tail -- 1041 * all but the first len0 bytes. In case of failure, it returns NULL and 1042 * attempts to restore the chain to its original state. 1043 */ 1044 struct mbuf * 1045 m_split(struct mbuf *m0, int len0, int wait) 1046 { 1047 1048 return m_split0(m0, len0, wait, 1); 1049 } 1050 1051 static struct mbuf * 1052 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 1053 { 1054 struct mbuf *m, *n; 1055 unsigned len = len0, remain, len_save; 1056 1057 for (m = m0; m && len > m->m_len; m = m->m_next) 1058 len -= m->m_len; 1059 if (m == 0) 1060 return (NULL); 1061 remain = m->m_len - len; 1062 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1063 MGETHDR(n, wait, m0->m_type); 1064 if (n == 0) 1065 return (NULL); 1066 MCLAIM(n, m0->m_owner); 1067 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1068 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1069 len_save = m0->m_pkthdr.len; 1070 m0->m_pkthdr.len = len0; 1071 if (m->m_flags & M_EXT) 1072 goto extpacket; 1073 if (remain > MHLEN) { 1074 /* m can't be the lead packet */ 1075 MH_ALIGN(n, 0); 1076 n->m_len = 0; 1077 n->m_next = m_split(m, len, wait); 1078 if (n->m_next == 0) { 1079 (void) m_free(n); 1080 m0->m_pkthdr.len = len_save; 1081 return (NULL); 1082 } else 1083 return (n); 1084 } else 1085 MH_ALIGN(n, remain); 1086 } else if (remain == 0) { 1087 n = m->m_next; 1088 m->m_next = 0; 1089 return (n); 1090 } else { 1091 MGET(n, wait, m->m_type); 1092 if (n == 0) 1093 return (NULL); 1094 MCLAIM(n, m->m_owner); 1095 M_ALIGN(n, remain); 1096 } 1097 extpacket: 1098 if (m->m_flags & M_EXT) { 1099 n->m_data = m->m_data + len; 1100 MCLADDREFERENCE(m, n); 1101 } else { 1102 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1103 } 1104 n->m_len = remain; 1105 m->m_len = len; 1106 n->m_next = m->m_next; 1107 m->m_next = 0; 1108 return (n); 1109 } 1110 /* 1111 * Routine to copy from device local memory into mbufs. 1112 */ 1113 struct mbuf * 1114 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1115 void (*copy)(const void *from, void *to, size_t len)) 1116 { 1117 struct mbuf *m; 1118 struct mbuf *top = 0, **mp = ⊤ 1119 int off = off0, len; 1120 char *cp; 1121 char *epkt; 1122 1123 cp = buf; 1124 epkt = cp + totlen; 1125 if (off) { 1126 /* 1127 * If 'off' is non-zero, packet is trailer-encapsulated, 1128 * so we have to skip the type and length fields. 1129 */ 1130 cp += off + 2 * sizeof(uint16_t); 1131 totlen -= 2 * sizeof(uint16_t); 1132 } 1133 MGETHDR(m, M_DONTWAIT, MT_DATA); 1134 if (m == 0) 1135 return (NULL); 1136 m->m_pkthdr.rcvif = ifp; 1137 m->m_pkthdr.len = totlen; 1138 m->m_len = MHLEN; 1139 1140 while (totlen > 0) { 1141 if (top) { 1142 MGET(m, M_DONTWAIT, MT_DATA); 1143 if (m == 0) { 1144 m_freem(top); 1145 return (NULL); 1146 } 1147 m->m_len = MLEN; 1148 } 1149 len = min(totlen, epkt - cp); 1150 if (len >= MINCLSIZE) { 1151 MCLGET(m, M_DONTWAIT); 1152 if ((m->m_flags & M_EXT) == 0) { 1153 m_free(m); 1154 m_freem(top); 1155 return (NULL); 1156 } 1157 m->m_len = len = min(len, MCLBYTES); 1158 } else { 1159 /* 1160 * Place initial small packet/header at end of mbuf. 1161 */ 1162 if (len < m->m_len) { 1163 if (top == 0 && len + max_linkhdr <= m->m_len) 1164 m->m_data += max_linkhdr; 1165 m->m_len = len; 1166 } else 1167 len = m->m_len; 1168 } 1169 if (copy) 1170 copy(cp, mtod(m, void *), (size_t)len); 1171 else 1172 memcpy(mtod(m, void *), cp, (size_t)len); 1173 cp += len; 1174 *mp = m; 1175 mp = &m->m_next; 1176 totlen -= len; 1177 if (cp == epkt) 1178 cp = buf; 1179 } 1180 return (top); 1181 } 1182 1183 /* 1184 * Copy data from a buffer back into the indicated mbuf chain, 1185 * starting "off" bytes from the beginning, extending the mbuf 1186 * chain if necessary. 1187 */ 1188 void 1189 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1190 { 1191 #if defined(DEBUG) 1192 struct mbuf *origm = m0; 1193 int error; 1194 #endif /* defined(DEBUG) */ 1195 1196 if (m0 == NULL) 1197 return; 1198 1199 #if defined(DEBUG) 1200 error = 1201 #endif /* defined(DEBUG) */ 1202 m_copyback0(&m0, off, len, cp, 1203 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1204 1205 #if defined(DEBUG) 1206 if (error != 0 || (m0 != NULL && origm != m0)) 1207 panic("m_copyback"); 1208 #endif /* defined(DEBUG) */ 1209 } 1210 1211 struct mbuf * 1212 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1213 { 1214 int error; 1215 1216 /* don't support chain expansion */ 1217 KDASSERT(off + len <= m_length(m0)); 1218 1219 error = m_copyback0(&m0, off, len, cp, 1220 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1221 if (error) { 1222 /* 1223 * no way to recover from partial success. 1224 * just free the chain. 1225 */ 1226 m_freem(m0); 1227 return NULL; 1228 } 1229 return m0; 1230 } 1231 1232 /* 1233 * m_makewritable: ensure the specified range writable. 1234 */ 1235 int 1236 m_makewritable(struct mbuf **mp, int off, int len, int how) 1237 { 1238 int error; 1239 #if defined(DEBUG) 1240 struct mbuf *n; 1241 int origlen, reslen; 1242 1243 origlen = m_length(*mp); 1244 #endif /* defined(DEBUG) */ 1245 1246 #if 0 /* M_COPYALL is large enough */ 1247 if (len == M_COPYALL) 1248 len = m_length(*mp) - off; /* XXX */ 1249 #endif 1250 1251 error = m_copyback0(mp, off, len, NULL, 1252 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1253 1254 #if defined(DEBUG) 1255 reslen = 0; 1256 for (n = *mp; n; n = n->m_next) 1257 reslen += n->m_len; 1258 if (origlen != reslen) 1259 panic("m_makewritable: length changed"); 1260 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1261 panic("m_makewritable: inconsist"); 1262 #endif /* defined(DEBUG) */ 1263 1264 return error; 1265 } 1266 1267 /* 1268 * Copy the mbuf chain to a new mbuf chain that is as short as possible. 1269 * Return the new mbuf chain on success, NULL on failure. On success, 1270 * free the old mbuf chain. 1271 */ 1272 struct mbuf * 1273 m_defrag(struct mbuf *mold, int flags) 1274 { 1275 struct mbuf *m0, *mn, *n; 1276 size_t sz = mold->m_pkthdr.len; 1277 1278 #ifdef DIAGNOSTIC 1279 if ((mold->m_flags & M_PKTHDR) == 0) 1280 panic("m_defrag: not a mbuf chain header"); 1281 #endif 1282 1283 MGETHDR(m0, flags, MT_DATA); 1284 if (m0 == NULL) 1285 return NULL; 1286 M_COPY_PKTHDR(m0, mold); 1287 mn = m0; 1288 1289 do { 1290 if (sz > MHLEN) { 1291 MCLGET(mn, M_DONTWAIT); 1292 if ((mn->m_flags & M_EXT) == 0) { 1293 m_freem(m0); 1294 return NULL; 1295 } 1296 } 1297 1298 mn->m_len = MIN(sz, MCLBYTES); 1299 1300 m_copydata(mold, mold->m_pkthdr.len - sz, mn->m_len, 1301 mtod(mn, void *)); 1302 1303 sz -= mn->m_len; 1304 1305 if (sz > 0) { 1306 /* need more mbufs */ 1307 MGET(n, M_NOWAIT, MT_DATA); 1308 if (n == NULL) { 1309 m_freem(m0); 1310 return NULL; 1311 } 1312 1313 mn->m_next = n; 1314 mn = n; 1315 } 1316 } while (sz > 0); 1317 1318 m_freem(mold); 1319 1320 return m0; 1321 } 1322 1323 int 1324 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1325 int how) 1326 { 1327 int mlen; 1328 struct mbuf *m, *n; 1329 struct mbuf **mp; 1330 int totlen = 0; 1331 const char *cp = vp; 1332 1333 KASSERT(mp0 != NULL); 1334 KASSERT(*mp0 != NULL); 1335 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1336 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1337 1338 /* 1339 * we don't bother to update "totlen" in the case of M_COPYBACK0_COW, 1340 * assuming that M_COPYBACK0_EXTEND and M_COPYBACK0_COW are exclusive. 1341 */ 1342 1343 KASSERT((~flags & (M_COPYBACK0_EXTEND|M_COPYBACK0_COW)) != 0); 1344 1345 mp = mp0; 1346 m = *mp; 1347 while (off > (mlen = m->m_len)) { 1348 off -= mlen; 1349 totlen += mlen; 1350 if (m->m_next == NULL) { 1351 int tspace; 1352 extend: 1353 if ((flags & M_COPYBACK0_EXTEND) == 0) 1354 goto out; 1355 1356 /* 1357 * try to make some space at the end of "m". 1358 */ 1359 1360 mlen = m->m_len; 1361 if (off + len >= MINCLSIZE && 1362 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1363 MCLGET(m, how); 1364 } 1365 tspace = M_TRAILINGSPACE(m); 1366 if (tspace > 0) { 1367 tspace = min(tspace, off + len); 1368 KASSERT(tspace > 0); 1369 memset(mtod(m, char *) + m->m_len, 0, 1370 min(off, tspace)); 1371 m->m_len += tspace; 1372 off += mlen; 1373 totlen -= mlen; 1374 continue; 1375 } 1376 1377 /* 1378 * need to allocate an mbuf. 1379 */ 1380 1381 if (off + len >= MINCLSIZE) { 1382 n = m_getcl(how, m->m_type, 0); 1383 } else { 1384 n = m_get(how, m->m_type); 1385 } 1386 if (n == NULL) { 1387 goto out; 1388 } 1389 n->m_len = 0; 1390 n->m_len = min(M_TRAILINGSPACE(n), off + len); 1391 memset(mtod(n, char *), 0, min(n->m_len, off)); 1392 m->m_next = n; 1393 } 1394 mp = &m->m_next; 1395 m = m->m_next; 1396 } 1397 while (len > 0) { 1398 mlen = m->m_len - off; 1399 if (mlen != 0 && M_READONLY(m)) { 1400 char *datap; 1401 int eatlen; 1402 1403 /* 1404 * this mbuf is read-only. 1405 * allocate a new writable mbuf and try again. 1406 */ 1407 1408 #if defined(DIAGNOSTIC) 1409 if ((flags & M_COPYBACK0_COW) == 0) 1410 panic("m_copyback0: read-only"); 1411 #endif /* defined(DIAGNOSTIC) */ 1412 1413 /* 1414 * if we're going to write into the middle of 1415 * a mbuf, split it first. 1416 */ 1417 if (off > 0) { 1418 n = m_split0(m, off, how, 0); 1419 if (n == NULL) 1420 goto enobufs; 1421 m->m_next = n; 1422 mp = &m->m_next; 1423 m = n; 1424 off = 0; 1425 continue; 1426 } 1427 1428 /* 1429 * XXX TODO coalesce into the trailingspace of 1430 * the previous mbuf when possible. 1431 */ 1432 1433 /* 1434 * allocate a new mbuf. copy packet header if needed. 1435 */ 1436 MGET(n, how, m->m_type); 1437 if (n == NULL) 1438 goto enobufs; 1439 MCLAIM(n, m->m_owner); 1440 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1441 M_MOVE_PKTHDR(n, m); 1442 n->m_len = MHLEN; 1443 } else { 1444 if (len >= MINCLSIZE) 1445 MCLGET(n, M_DONTWAIT); 1446 n->m_len = 1447 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1448 } 1449 if (n->m_len > len) 1450 n->m_len = len; 1451 1452 /* 1453 * free the region which has been overwritten. 1454 * copying data from old mbufs if requested. 1455 */ 1456 if (flags & M_COPYBACK0_PRESERVE) 1457 datap = mtod(n, char *); 1458 else 1459 datap = NULL; 1460 eatlen = n->m_len; 1461 while (m != NULL && M_READONLY(m) && 1462 n->m_type == m->m_type && eatlen > 0) { 1463 mlen = min(eatlen, m->m_len); 1464 if (datap) { 1465 m_copydata(m, 0, mlen, datap); 1466 datap += mlen; 1467 } 1468 m->m_data += mlen; 1469 m->m_len -= mlen; 1470 eatlen -= mlen; 1471 if (m->m_len == 0) 1472 *mp = m = m_free(m); 1473 } 1474 if (eatlen > 0) 1475 n->m_len -= eatlen; 1476 n->m_next = m; 1477 *mp = m = n; 1478 continue; 1479 } 1480 mlen = min(mlen, len); 1481 if (flags & M_COPYBACK0_COPYBACK) { 1482 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1483 cp += mlen; 1484 } 1485 len -= mlen; 1486 mlen += off; 1487 off = 0; 1488 totlen += mlen; 1489 if (len == 0) 1490 break; 1491 if (m->m_next == NULL) { 1492 goto extend; 1493 } 1494 mp = &m->m_next; 1495 m = m->m_next; 1496 } 1497 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1498 KASSERT((flags & M_COPYBACK0_EXTEND) != 0); 1499 m->m_pkthdr.len = totlen; 1500 } 1501 1502 return 0; 1503 1504 enobufs: 1505 return ENOBUFS; 1506 } 1507 1508 void 1509 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1510 { 1511 1512 KASSERT((to->m_flags & M_EXT) == 0); 1513 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL); 1514 KASSERT((from->m_flags & M_PKTHDR) != 0); 1515 1516 to->m_pkthdr = from->m_pkthdr; 1517 to->m_flags = from->m_flags & M_COPYFLAGS; 1518 to->m_data = to->m_pktdat; 1519 1520 from->m_flags &= ~M_PKTHDR; 1521 } 1522 1523 /* 1524 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1525 * beginning, continuing for "len" bytes. 1526 */ 1527 int 1528 m_apply(struct mbuf *m, int off, int len, 1529 int (*f)(void *, void *, unsigned int), void *arg) 1530 { 1531 unsigned int count; 1532 int rval; 1533 1534 KASSERT(len >= 0); 1535 KASSERT(off >= 0); 1536 1537 while (off > 0) { 1538 KASSERT(m != NULL); 1539 if (off < m->m_len) 1540 break; 1541 off -= m->m_len; 1542 m = m->m_next; 1543 } 1544 while (len > 0) { 1545 KASSERT(m != NULL); 1546 count = min(m->m_len - off, len); 1547 1548 rval = (*f)(arg, mtod(m, char *) + off, count); 1549 if (rval) 1550 return (rval); 1551 1552 len -= count; 1553 off = 0; 1554 m = m->m_next; 1555 } 1556 1557 return (0); 1558 } 1559 1560 /* 1561 * Return a pointer to mbuf/offset of location in mbuf chain. 1562 */ 1563 struct mbuf * 1564 m_getptr(struct mbuf *m, int loc, int *off) 1565 { 1566 1567 while (loc >= 0) { 1568 /* Normal end of search */ 1569 if (m->m_len > loc) { 1570 *off = loc; 1571 return (m); 1572 } else { 1573 loc -= m->m_len; 1574 1575 if (m->m_next == NULL) { 1576 if (loc == 0) { 1577 /* Point at the end of valid data */ 1578 *off = m->m_len; 1579 return (m); 1580 } else 1581 return (NULL); 1582 } else 1583 m = m->m_next; 1584 } 1585 } 1586 1587 return (NULL); 1588 } 1589 1590 /* 1591 * m_ext_free: release a reference to the mbuf external storage. 1592 * 1593 * => free the mbuf m itsself as well. 1594 */ 1595 1596 void 1597 m_ext_free(struct mbuf *m) 1598 { 1599 bool embedded = MEXT_ISEMBEDDED(m); 1600 bool dofree = true; 1601 u_int refcnt; 1602 1603 KASSERT((m->m_flags & M_EXT) != 0); 1604 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1605 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1606 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1607 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1608 1609 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1610 refcnt = m->m_ext.ext_refcnt = 0; 1611 } else { 1612 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1613 } 1614 if (refcnt > 0) { 1615 if (embedded) { 1616 /* 1617 * other mbuf's m_ext_ref still points to us. 1618 */ 1619 dofree = false; 1620 } else { 1621 m->m_ext_ref = m; 1622 } 1623 } else { 1624 /* 1625 * dropping the last reference 1626 */ 1627 if (!embedded) { 1628 m->m_ext.ext_refcnt++; /* XXX */ 1629 m_ext_free(m->m_ext_ref); 1630 m->m_ext_ref = m; 1631 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1632 pool_cache_put_paddr((struct pool_cache *) 1633 m->m_ext.ext_arg, 1634 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1635 } else if (m->m_ext.ext_free) { 1636 (*m->m_ext.ext_free)(m, 1637 m->m_ext.ext_buf, m->m_ext.ext_size, 1638 m->m_ext.ext_arg); 1639 /* 1640 * 'm' is already freed by the ext_free callback. 1641 */ 1642 dofree = false; 1643 } else { 1644 free(m->m_ext.ext_buf, m->m_ext.ext_type); 1645 } 1646 } 1647 if (dofree) { 1648 pool_cache_put(mb_cache, m); 1649 } 1650 } 1651 1652 #if defined(DDB) 1653 void 1654 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 1655 { 1656 char ch; 1657 bool opt_c = false; 1658 char buf[512]; 1659 1660 while ((ch = *(modif++)) != '\0') { 1661 switch (ch) { 1662 case 'c': 1663 opt_c = true; 1664 break; 1665 } 1666 } 1667 1668 nextchain: 1669 (*pr)("MBUF %p\n", m); 1670 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 1671 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", 1672 m->m_data, m->m_len, m->m_type, buf); 1673 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 1674 m->m_nextpkt); 1675 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 1676 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 1677 (int)M_READONLY(m)); 1678 if ((m->m_flags & M_PKTHDR) != 0) { 1679 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 1680 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=0x%s, csum_data=0x%" 1681 PRIx32 ", segsz=%u\n", 1682 m->m_pkthdr.len, m->m_pkthdr.rcvif, 1683 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 1684 } 1685 if ((m->m_flags & M_EXT)) { 1686 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 1687 "ext_free=%p, ext_arg=%p\n", 1688 m->m_ext.ext_refcnt, 1689 m->m_ext.ext_buf, m->m_ext.ext_size, 1690 m->m_ext.ext_free, m->m_ext.ext_arg); 1691 } 1692 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 1693 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 1694 vaddr_t eva = sva + m->m_ext.ext_size; 1695 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 1696 int i; 1697 1698 (*pr)(" pages:"); 1699 for (i = 0; i < n; i ++) { 1700 (*pr)(" %p", m->m_ext.ext_pgs[i]); 1701 } 1702 (*pr)("\n"); 1703 } 1704 1705 if (opt_c) { 1706 m = m->m_next; 1707 if (m != NULL) { 1708 goto nextchain; 1709 } 1710 } 1711 } 1712 #endif /* defined(DDB) */ 1713 1714 void 1715 mbstat_type_add(int type, int diff) 1716 { 1717 struct mbstat_cpu *mb; 1718 int s; 1719 1720 s = splvm(); 1721 mb = percpu_getref(mbstat_percpu); 1722 mb->m_mtypes[type] += diff; 1723 percpu_putref(mbstat_percpu); 1724 splx(s); 1725 } 1726 1727 #if defined(MBUFTRACE) 1728 void 1729 mowner_attach(struct mowner *mo) 1730 { 1731 1732 KASSERT(mo->mo_counters == NULL); 1733 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 1734 1735 /* XXX lock */ 1736 LIST_INSERT_HEAD(&mowners, mo, mo_link); 1737 } 1738 1739 void 1740 mowner_detach(struct mowner *mo) 1741 { 1742 1743 KASSERT(mo->mo_counters != NULL); 1744 1745 /* XXX lock */ 1746 LIST_REMOVE(mo, mo_link); 1747 1748 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 1749 mo->mo_counters = NULL; 1750 } 1751 1752 void 1753 mowner_init(struct mbuf *m, int type) 1754 { 1755 struct mowner_counter *mc; 1756 struct mowner *mo; 1757 int s; 1758 1759 m->m_owner = mo = &unknown_mowners[type]; 1760 s = splvm(); 1761 mc = percpu_getref(mo->mo_counters); 1762 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1763 percpu_putref(mo->mo_counters); 1764 splx(s); 1765 } 1766 1767 void 1768 mowner_ref(struct mbuf *m, int flags) 1769 { 1770 struct mowner *mo = m->m_owner; 1771 struct mowner_counter *mc; 1772 int s; 1773 1774 s = splvm(); 1775 mc = percpu_getref(mo->mo_counters); 1776 if ((flags & M_EXT) != 0) 1777 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1778 if ((flags & M_CLUSTER) != 0) 1779 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1780 percpu_putref(mo->mo_counters); 1781 splx(s); 1782 } 1783 1784 void 1785 mowner_revoke(struct mbuf *m, bool all, int flags) 1786 { 1787 struct mowner *mo = m->m_owner; 1788 struct mowner_counter *mc; 1789 int s; 1790 1791 s = splvm(); 1792 mc = percpu_getref(mo->mo_counters); 1793 if ((flags & M_EXT) != 0) 1794 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 1795 if ((flags & M_CLUSTER) != 0) 1796 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 1797 if (all) 1798 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 1799 percpu_putref(mo->mo_counters); 1800 splx(s); 1801 if (all) 1802 m->m_owner = &revoked_mowner; 1803 } 1804 1805 static void 1806 mowner_claim(struct mbuf *m, struct mowner *mo) 1807 { 1808 struct mowner_counter *mc; 1809 int flags = m->m_flags; 1810 int s; 1811 1812 s = splvm(); 1813 mc = percpu_getref(mo->mo_counters); 1814 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1815 if ((flags & M_EXT) != 0) 1816 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1817 if ((flags & M_CLUSTER) != 0) 1818 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1819 percpu_putref(mo->mo_counters); 1820 splx(s); 1821 m->m_owner = mo; 1822 } 1823 1824 void 1825 m_claim(struct mbuf *m, struct mowner *mo) 1826 { 1827 1828 if (m->m_owner == mo || mo == NULL) 1829 return; 1830 1831 mowner_revoke(m, true, m->m_flags); 1832 mowner_claim(m, mo); 1833 } 1834 #endif /* defined(MBUFTRACE) */ 1835