1 /* $NetBSD: uipc_mbuf.c,v 1.136 2010/05/11 20:21:56 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.136 2010/05/11 20:21:56 pooka Exp $"); 66 67 #include "opt_mbuftrace.h" 68 #include "opt_nmbclusters.h" 69 #include "opt_ddb.h" 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/atomic.h> 74 #include <sys/cpu.h> 75 #include <sys/proc.h> 76 #include <sys/malloc.h> 77 #define MBTYPES 78 #include <sys/mbuf.h> 79 #include <sys/kernel.h> 80 #include <sys/syslog.h> 81 #include <sys/domain.h> 82 #include <sys/protosw.h> 83 #include <sys/percpu.h> 84 #include <sys/pool.h> 85 #include <sys/socket.h> 86 #include <sys/sysctl.h> 87 88 #include <net/if.h> 89 90 #include <uvm/uvm.h> 91 92 pool_cache_t mb_cache; /* mbuf cache */ 93 pool_cache_t mcl_cache; /* mbuf cluster cache */ 94 95 struct mbstat mbstat; 96 int max_linkhdr; 97 int max_protohdr; 98 int max_hdr; 99 int max_datalen; 100 101 static int mb_ctor(void *, void *, int); 102 103 static void sysctl_kern_mbuf_setup(void); 104 105 static struct sysctllog *mbuf_sysctllog; 106 107 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 108 static struct mbuf *m_split0(struct mbuf *, int, int, int); 109 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 110 111 /* flags for m_copyback0 */ 112 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 113 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 114 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 115 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 116 117 static const char mclpool_warnmsg[] = 118 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 119 120 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 121 122 static percpu_t *mbstat_percpu; 123 124 #ifdef MBUFTRACE 125 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 126 struct mowner unknown_mowners[] = { 127 MOWNER_INIT("unknown", "free"), 128 MOWNER_INIT("unknown", "data"), 129 MOWNER_INIT("unknown", "header"), 130 MOWNER_INIT("unknown", "soname"), 131 MOWNER_INIT("unknown", "soopts"), 132 MOWNER_INIT("unknown", "ftable"), 133 MOWNER_INIT("unknown", "control"), 134 MOWNER_INIT("unknown", "oobdata"), 135 }; 136 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 137 #endif 138 139 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 140 141 #define MCLADDREFERENCE(o, n) \ 142 do { \ 143 KASSERT(((o)->m_flags & M_EXT) != 0); \ 144 KASSERT(((n)->m_flags & M_EXT) == 0); \ 145 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 146 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 147 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 148 (n)->m_ext_ref = (o)->m_ext_ref; \ 149 mowner_ref((n), (n)->m_flags); \ 150 MCLREFDEBUGN((n), __FILE__, __LINE__); \ 151 } while (/* CONSTCOND */ 0) 152 153 static int 154 nmbclusters_limit(void) 155 { 156 #if defined(PMAP_MAP_POOLPAGE) 157 /* direct mapping, doesn't use space in kmem_map */ 158 vsize_t max_size = physmem / 4; 159 #else 160 vsize_t max_size = MIN(physmem / 4, nkmempages / 2); 161 #endif 162 163 max_size = max_size * PAGE_SIZE / MCLBYTES; 164 #ifdef NMBCLUSTERS_MAX 165 max_size = MIN(max_size, NMBCLUSTERS_MAX); 166 #endif 167 168 #ifdef NMBCLUSTERS 169 return MIN(max_size, NMBCLUSTERS); 170 #else 171 return max_size; 172 #endif 173 } 174 175 /* 176 * Initialize the mbuf allocator. 177 */ 178 void 179 mbinit(void) 180 { 181 182 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 183 CTASSERT(sizeof(struct mbuf) == MSIZE); 184 185 sysctl_kern_mbuf_setup(); 186 187 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 188 NULL, IPL_VM, mb_ctor, NULL, NULL); 189 KASSERT(mb_cache != NULL); 190 191 mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", NULL, 192 IPL_VM, NULL, NULL, NULL); 193 KASSERT(mcl_cache != NULL); 194 195 pool_cache_set_drain_hook(mb_cache, m_reclaim, NULL); 196 pool_cache_set_drain_hook(mcl_cache, m_reclaim, NULL); 197 198 /* 199 * Set an arbitrary default limit on the number of mbuf clusters. 200 */ 201 #ifdef NMBCLUSTERS 202 nmbclusters = nmbclusters_limit(); 203 #else 204 nmbclusters = MAX(1024, 205 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 206 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 207 #endif 208 209 /* 210 * Set the hard limit on the mclpool to the number of 211 * mbuf clusters the kernel is to support. Log the limit 212 * reached message max once a minute. 213 */ 214 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 215 216 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 217 218 /* 219 * Set a low water mark for both mbufs and clusters. This should 220 * help ensure that they can be allocated in a memory starvation 221 * situation. This is important for e.g. diskless systems which 222 * must allocate mbufs in order for the pagedaemon to clean pages. 223 */ 224 pool_cache_setlowat(mb_cache, mblowat); 225 pool_cache_setlowat(mcl_cache, mcllowat); 226 227 #ifdef MBUFTRACE 228 { 229 /* 230 * Attach the unknown mowners. 231 */ 232 int i; 233 MOWNER_ATTACH(&revoked_mowner); 234 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 235 i-- > 0; ) 236 MOWNER_ATTACH(&unknown_mowners[i]); 237 } 238 #endif 239 } 240 241 /* 242 * sysctl helper routine for the kern.mbuf subtree. 243 * nmbclusters, mblowat and mcllowat need range 244 * checking and pool tweaking after being reset. 245 */ 246 static int 247 sysctl_kern_mbuf(SYSCTLFN_ARGS) 248 { 249 int error, newval; 250 struct sysctlnode node; 251 252 node = *rnode; 253 node.sysctl_data = &newval; 254 switch (rnode->sysctl_num) { 255 case MBUF_NMBCLUSTERS: 256 case MBUF_MBLOWAT: 257 case MBUF_MCLLOWAT: 258 newval = *(int*)rnode->sysctl_data; 259 break; 260 default: 261 return (EOPNOTSUPP); 262 } 263 264 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 265 if (error || newp == NULL) 266 return (error); 267 if (newval < 0) 268 return (EINVAL); 269 270 switch (node.sysctl_num) { 271 case MBUF_NMBCLUSTERS: 272 if (newval < nmbclusters) 273 return (EINVAL); 274 if (newval > nmbclusters_limit()) 275 return (EINVAL); 276 nmbclusters = newval; 277 pool_cache_sethardlimit(mcl_cache, nmbclusters, 278 mclpool_warnmsg, 60); 279 break; 280 case MBUF_MBLOWAT: 281 mblowat = newval; 282 pool_cache_setlowat(mb_cache, mblowat); 283 break; 284 case MBUF_MCLLOWAT: 285 mcllowat = newval; 286 pool_cache_setlowat(mcl_cache, mcllowat); 287 break; 288 } 289 290 return (0); 291 } 292 293 #ifdef MBUFTRACE 294 static void 295 mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 296 { 297 struct mowner_counter *mc = v1; 298 struct mowner_user *mo_user = v2; 299 int i; 300 301 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 302 mo_user->mo_counter[i] += mc->mc_counter[i]; 303 } 304 } 305 306 static void 307 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 308 { 309 310 memset(mo_user, 0, sizeof(*mo_user)); 311 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 312 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 313 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 314 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 315 percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); 316 } 317 318 static int 319 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 320 { 321 struct mowner *mo; 322 size_t len = 0; 323 int error = 0; 324 325 if (namelen != 0) 326 return (EINVAL); 327 if (newp != NULL) 328 return (EPERM); 329 330 LIST_FOREACH(mo, &mowners, mo_link) { 331 struct mowner_user mo_user; 332 333 mowner_convert_to_user(mo, &mo_user); 334 335 if (oldp != NULL) { 336 if (*oldlenp - len < sizeof(mo_user)) { 337 error = ENOMEM; 338 break; 339 } 340 error = copyout(&mo_user, (char *)oldp + len, 341 sizeof(mo_user)); 342 if (error) 343 break; 344 } 345 len += sizeof(mo_user); 346 } 347 348 if (error == 0) 349 *oldlenp = len; 350 351 return (error); 352 } 353 #endif /* MBUFTRACE */ 354 355 static void 356 mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 357 { 358 struct mbstat_cpu *mbsc = v1; 359 struct mbstat *mbs = v2; 360 int i; 361 362 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 363 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 364 } 365 } 366 367 static void 368 mbstat_convert_to_user(struct mbstat *mbs) 369 { 370 371 memset(mbs, 0, sizeof(*mbs)); 372 mbs->m_drain = mbstat.m_drain; 373 percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); 374 } 375 376 static int 377 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 378 { 379 struct sysctlnode node; 380 struct mbstat mbs; 381 382 mbstat_convert_to_user(&mbs); 383 node = *rnode; 384 node.sysctl_data = &mbs; 385 node.sysctl_size = sizeof(mbs); 386 return sysctl_lookup(SYSCTLFN_CALL(&node)); 387 } 388 389 static void 390 sysctl_kern_mbuf_setup(void) 391 { 392 393 KASSERT(mbuf_sysctllog == NULL); 394 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 395 CTLFLAG_PERMANENT, 396 CTLTYPE_NODE, "kern", NULL, 397 NULL, 0, NULL, 0, 398 CTL_KERN, CTL_EOL); 399 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 400 CTLFLAG_PERMANENT, 401 CTLTYPE_NODE, "mbuf", 402 SYSCTL_DESCR("mbuf control variables"), 403 NULL, 0, NULL, 0, 404 CTL_KERN, KERN_MBUF, CTL_EOL); 405 406 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 407 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 408 CTLTYPE_INT, "msize", 409 SYSCTL_DESCR("mbuf base size"), 410 NULL, msize, NULL, 0, 411 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 412 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 413 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 414 CTLTYPE_INT, "mclbytes", 415 SYSCTL_DESCR("mbuf cluster size"), 416 NULL, mclbytes, NULL, 0, 417 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 418 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 419 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 420 CTLTYPE_INT, "nmbclusters", 421 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 422 sysctl_kern_mbuf, 0, &nmbclusters, 0, 423 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 424 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 425 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 426 CTLTYPE_INT, "mblowat", 427 SYSCTL_DESCR("mbuf low water mark"), 428 sysctl_kern_mbuf, 0, &mblowat, 0, 429 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 430 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 431 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 432 CTLTYPE_INT, "mcllowat", 433 SYSCTL_DESCR("mbuf cluster low water mark"), 434 sysctl_kern_mbuf, 0, &mcllowat, 0, 435 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 436 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 437 CTLFLAG_PERMANENT, 438 CTLTYPE_STRUCT, "stats", 439 SYSCTL_DESCR("mbuf allocation statistics"), 440 sysctl_kern_mbuf_stats, 0, NULL, 0, 441 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 442 #ifdef MBUFTRACE 443 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 444 CTLFLAG_PERMANENT, 445 CTLTYPE_STRUCT, "mowners", 446 SYSCTL_DESCR("Information about mbuf owners"), 447 sysctl_kern_mbuf_mowners, 0, NULL, 0, 448 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 449 #endif /* MBUFTRACE */ 450 } 451 452 static int 453 mb_ctor(void *arg, void *object, int flags) 454 { 455 struct mbuf *m = object; 456 457 #ifdef POOL_VTOPHYS 458 m->m_paddr = POOL_VTOPHYS(m); 459 #else 460 m->m_paddr = M_PADDR_INVALID; 461 #endif 462 return (0); 463 } 464 465 void 466 m_reclaim(void *arg, int flags) 467 { 468 struct domain *dp; 469 const struct protosw *pr; 470 struct ifnet *ifp; 471 int s; 472 473 KERNEL_LOCK(1, NULL); 474 s = splvm(); 475 DOMAIN_FOREACH(dp) { 476 for (pr = dp->dom_protosw; 477 pr < dp->dom_protoswNPROTOSW; pr++) 478 if (pr->pr_drain) 479 (*pr->pr_drain)(); 480 } 481 IFNET_FOREACH(ifp) { 482 if (ifp->if_drain) 483 (*ifp->if_drain)(ifp); 484 } 485 splx(s); 486 mbstat.m_drain++; 487 KERNEL_UNLOCK_ONE(NULL); 488 } 489 490 /* 491 * Space allocation routines. 492 * These are also available as macros 493 * for critical paths. 494 */ 495 struct mbuf * 496 m_get(int nowait, int type) 497 { 498 struct mbuf *m; 499 500 m = pool_cache_get(mb_cache, 501 nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); 502 if (m == NULL) 503 return NULL; 504 505 mbstat_type_add(type, 1); 506 mowner_init(m, type); 507 m->m_ext_ref = m; 508 m->m_type = type; 509 m->m_next = NULL; 510 m->m_nextpkt = NULL; 511 m->m_data = m->m_dat; 512 m->m_flags = 0; 513 514 return m; 515 } 516 517 struct mbuf * 518 m_gethdr(int nowait, int type) 519 { 520 struct mbuf *m; 521 522 m = m_get(nowait, type); 523 if (m == NULL) 524 return NULL; 525 526 m->m_data = m->m_pktdat; 527 m->m_flags = M_PKTHDR; 528 m->m_pkthdr.rcvif = NULL; 529 m->m_pkthdr.csum_flags = 0; 530 m->m_pkthdr.csum_data = 0; 531 SLIST_INIT(&m->m_pkthdr.tags); 532 533 return m; 534 } 535 536 struct mbuf * 537 m_getclr(int nowait, int type) 538 { 539 struct mbuf *m; 540 541 MGET(m, nowait, type); 542 if (m == 0) 543 return (NULL); 544 memset(mtod(m, void *), 0, MLEN); 545 return (m); 546 } 547 548 void 549 m_clget(struct mbuf *m, int nowait) 550 { 551 552 MCLGET(m, nowait); 553 } 554 555 struct mbuf * 556 m_free(struct mbuf *m) 557 { 558 struct mbuf *n; 559 560 MFREE(m, n); 561 return (n); 562 } 563 564 void 565 m_freem(struct mbuf *m) 566 { 567 struct mbuf *n; 568 569 if (m == NULL) 570 return; 571 do { 572 MFREE(m, n); 573 m = n; 574 } while (m); 575 } 576 577 #ifdef MBUFTRACE 578 /* 579 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 580 */ 581 void 582 m_claimm(struct mbuf *m, struct mowner *mo) 583 { 584 585 for (; m != NULL; m = m->m_next) 586 MCLAIM(m, mo); 587 } 588 #endif 589 590 /* 591 * Mbuffer utility routines. 592 */ 593 594 /* 595 * Lesser-used path for M_PREPEND: 596 * allocate new mbuf to prepend to chain, 597 * copy junk along. 598 */ 599 struct mbuf * 600 m_prepend(struct mbuf *m, int len, int how) 601 { 602 struct mbuf *mn; 603 604 MGET(mn, how, m->m_type); 605 if (mn == (struct mbuf *)NULL) { 606 m_freem(m); 607 return ((struct mbuf *)NULL); 608 } 609 if (m->m_flags & M_PKTHDR) { 610 M_MOVE_PKTHDR(mn, m); 611 } else { 612 MCLAIM(mn, m->m_owner); 613 } 614 mn->m_next = m; 615 m = mn; 616 if (len < MHLEN) 617 MH_ALIGN(m, len); 618 m->m_len = len; 619 return (m); 620 } 621 622 /* 623 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 624 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 625 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 626 */ 627 int MCFail; 628 629 struct mbuf * 630 m_copym(struct mbuf *m, int off0, int len, int wait) 631 { 632 633 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 634 } 635 636 struct mbuf * 637 m_dup(struct mbuf *m, int off0, int len, int wait) 638 { 639 640 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 641 } 642 643 static struct mbuf * 644 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 645 { 646 struct mbuf *n, **np; 647 int off = off0; 648 struct mbuf *top; 649 int copyhdr = 0; 650 651 if (off < 0 || len < 0) 652 panic("m_copym: off %d, len %d", off, len); 653 if (off == 0 && m->m_flags & M_PKTHDR) 654 copyhdr = 1; 655 while (off > 0) { 656 if (m == 0) 657 panic("m_copym: m == 0, off %d", off); 658 if (off < m->m_len) 659 break; 660 off -= m->m_len; 661 m = m->m_next; 662 } 663 np = ⊤ 664 top = 0; 665 while (len > 0) { 666 if (m == 0) { 667 if (len != M_COPYALL) 668 panic("m_copym: m == 0, len %d [!COPYALL]", 669 len); 670 break; 671 } 672 MGET(n, wait, m->m_type); 673 *np = n; 674 if (n == 0) 675 goto nospace; 676 MCLAIM(n, m->m_owner); 677 if (copyhdr) { 678 M_COPY_PKTHDR(n, m); 679 if (len == M_COPYALL) 680 n->m_pkthdr.len -= off0; 681 else 682 n->m_pkthdr.len = len; 683 copyhdr = 0; 684 } 685 n->m_len = min(len, m->m_len - off); 686 if (m->m_flags & M_EXT) { 687 if (!deep) { 688 n->m_data = m->m_data + off; 689 MCLADDREFERENCE(m, n); 690 } else { 691 /* 692 * we are unsure about the way m was allocated. 693 * copy into multiple MCLBYTES cluster mbufs. 694 */ 695 MCLGET(n, wait); 696 n->m_len = 0; 697 n->m_len = M_TRAILINGSPACE(n); 698 n->m_len = min(n->m_len, len); 699 n->m_len = min(n->m_len, m->m_len - off); 700 memcpy(mtod(n, void *), mtod(m, char *) + off, 701 (unsigned)n->m_len); 702 } 703 } else 704 memcpy(mtod(n, void *), mtod(m, char *) + off, 705 (unsigned)n->m_len); 706 if (len != M_COPYALL) 707 len -= n->m_len; 708 off += n->m_len; 709 #ifdef DIAGNOSTIC 710 if (off > m->m_len) 711 panic("m_copym0 overrun"); 712 #endif 713 if (off == m->m_len) { 714 m = m->m_next; 715 off = 0; 716 } 717 np = &n->m_next; 718 } 719 if (top == 0) 720 MCFail++; 721 return (top); 722 nospace: 723 m_freem(top); 724 MCFail++; 725 return (NULL); 726 } 727 728 /* 729 * Copy an entire packet, including header (which must be present). 730 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 731 */ 732 struct mbuf * 733 m_copypacket(struct mbuf *m, int how) 734 { 735 struct mbuf *top, *n, *o; 736 737 MGET(n, how, m->m_type); 738 top = n; 739 if (!n) 740 goto nospace; 741 742 MCLAIM(n, m->m_owner); 743 M_COPY_PKTHDR(n, m); 744 n->m_len = m->m_len; 745 if (m->m_flags & M_EXT) { 746 n->m_data = m->m_data; 747 MCLADDREFERENCE(m, n); 748 } else { 749 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 750 } 751 752 m = m->m_next; 753 while (m) { 754 MGET(o, how, m->m_type); 755 if (!o) 756 goto nospace; 757 758 MCLAIM(o, m->m_owner); 759 n->m_next = o; 760 n = n->m_next; 761 762 n->m_len = m->m_len; 763 if (m->m_flags & M_EXT) { 764 n->m_data = m->m_data; 765 MCLADDREFERENCE(m, n); 766 } else { 767 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 768 } 769 770 m = m->m_next; 771 } 772 return top; 773 nospace: 774 m_freem(top); 775 MCFail++; 776 return NULL; 777 } 778 779 /* 780 * Copy data from an mbuf chain starting "off" bytes from the beginning, 781 * continuing for "len" bytes, into the indicated buffer. 782 */ 783 void 784 m_copydata(struct mbuf *m, int off, int len, void *vp) 785 { 786 unsigned count; 787 void * cp = vp; 788 789 if (off < 0 || len < 0) 790 panic("m_copydata: off %d, len %d", off, len); 791 while (off > 0) { 792 if (m == NULL) 793 panic("m_copydata: m == NULL, off %d", off); 794 if (off < m->m_len) 795 break; 796 off -= m->m_len; 797 m = m->m_next; 798 } 799 while (len > 0) { 800 if (m == NULL) 801 panic("m_copydata: m == NULL, len %d", len); 802 count = min(m->m_len - off, len); 803 memcpy(cp, mtod(m, char *) + off, count); 804 len -= count; 805 cp = (char *)cp + count; 806 off = 0; 807 m = m->m_next; 808 } 809 } 810 811 /* 812 * Concatenate mbuf chain n to m. 813 * n might be copied into m (when n->m_len is small), therefore data portion of 814 * n could be copied into an mbuf of different mbuf type. 815 * Any m_pkthdr is not updated. 816 */ 817 void 818 m_cat(struct mbuf *m, struct mbuf *n) 819 { 820 821 while (m->m_next) 822 m = m->m_next; 823 while (n) { 824 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 825 /* just join the two chains */ 826 m->m_next = n; 827 return; 828 } 829 /* splat the data from one into the other */ 830 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 831 (u_int)n->m_len); 832 m->m_len += n->m_len; 833 n = m_free(n); 834 } 835 } 836 837 void 838 m_adj(struct mbuf *mp, int req_len) 839 { 840 int len = req_len; 841 struct mbuf *m; 842 int count; 843 844 if ((m = mp) == NULL) 845 return; 846 if (len >= 0) { 847 /* 848 * Trim from head. 849 */ 850 while (m != NULL && len > 0) { 851 if (m->m_len <= len) { 852 len -= m->m_len; 853 m->m_len = 0; 854 m = m->m_next; 855 } else { 856 m->m_len -= len; 857 m->m_data += len; 858 len = 0; 859 } 860 } 861 m = mp; 862 if (mp->m_flags & M_PKTHDR) 863 m->m_pkthdr.len -= (req_len - len); 864 } else { 865 /* 866 * Trim from tail. Scan the mbuf chain, 867 * calculating its length and finding the last mbuf. 868 * If the adjustment only affects this mbuf, then just 869 * adjust and return. Otherwise, rescan and truncate 870 * after the remaining size. 871 */ 872 len = -len; 873 count = 0; 874 for (;;) { 875 count += m->m_len; 876 if (m->m_next == (struct mbuf *)0) 877 break; 878 m = m->m_next; 879 } 880 if (m->m_len >= len) { 881 m->m_len -= len; 882 if (mp->m_flags & M_PKTHDR) 883 mp->m_pkthdr.len -= len; 884 return; 885 } 886 count -= len; 887 if (count < 0) 888 count = 0; 889 /* 890 * Correct length for chain is "count". 891 * Find the mbuf with last data, adjust its length, 892 * and toss data from remaining mbufs on chain. 893 */ 894 m = mp; 895 if (m->m_flags & M_PKTHDR) 896 m->m_pkthdr.len = count; 897 for (; m; m = m->m_next) { 898 if (m->m_len >= count) { 899 m->m_len = count; 900 break; 901 } 902 count -= m->m_len; 903 } 904 if (m) 905 while (m->m_next) 906 (m = m->m_next)->m_len = 0; 907 } 908 } 909 910 /* 911 * Rearrange an mbuf chain so that len bytes are contiguous 912 * and in the data area of an mbuf (so that mtod and dtom 913 * will work for a structure of size len). Returns the resulting 914 * mbuf chain on success, frees it and returns null on failure. 915 * If there is room, it will add up to max_protohdr-len extra bytes to the 916 * contiguous region in an attempt to avoid being called next time. 917 */ 918 int MPFail; 919 920 struct mbuf * 921 m_pullup(struct mbuf *n, int len) 922 { 923 struct mbuf *m; 924 int count; 925 int space; 926 927 /* 928 * If first mbuf has no cluster, and has room for len bytes 929 * without shifting current data, pullup into it, 930 * otherwise allocate a new mbuf to prepend to the chain. 931 */ 932 if ((n->m_flags & M_EXT) == 0 && 933 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 934 if (n->m_len >= len) 935 return (n); 936 m = n; 937 n = n->m_next; 938 len -= m->m_len; 939 } else { 940 if (len > MHLEN) 941 goto bad; 942 MGET(m, M_DONTWAIT, n->m_type); 943 if (m == 0) 944 goto bad; 945 MCLAIM(m, n->m_owner); 946 m->m_len = 0; 947 if (n->m_flags & M_PKTHDR) { 948 M_MOVE_PKTHDR(m, n); 949 } 950 } 951 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 952 do { 953 count = min(min(max(len, max_protohdr), space), n->m_len); 954 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 955 (unsigned)count); 956 len -= count; 957 m->m_len += count; 958 n->m_len -= count; 959 space -= count; 960 if (n->m_len) 961 n->m_data += count; 962 else 963 n = m_free(n); 964 } while (len > 0 && n); 965 if (len > 0) { 966 (void) m_free(m); 967 goto bad; 968 } 969 m->m_next = n; 970 return (m); 971 bad: 972 m_freem(n); 973 MPFail++; 974 return (NULL); 975 } 976 977 /* 978 * Like m_pullup(), except a new mbuf is always allocated, and we allow 979 * the amount of empty space before the data in the new mbuf to be specified 980 * (in the event that the caller expects to prepend later). 981 */ 982 int MSFail; 983 984 struct mbuf * 985 m_copyup(struct mbuf *n, int len, int dstoff) 986 { 987 struct mbuf *m; 988 int count, space; 989 990 if (len > (MHLEN - dstoff)) 991 goto bad; 992 MGET(m, M_DONTWAIT, n->m_type); 993 if (m == NULL) 994 goto bad; 995 MCLAIM(m, n->m_owner); 996 m->m_len = 0; 997 if (n->m_flags & M_PKTHDR) { 998 M_MOVE_PKTHDR(m, n); 999 } 1000 m->m_data += dstoff; 1001 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1002 do { 1003 count = min(min(max(len, max_protohdr), space), n->m_len); 1004 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1005 (unsigned)count); 1006 len -= count; 1007 m->m_len += count; 1008 n->m_len -= count; 1009 space -= count; 1010 if (n->m_len) 1011 n->m_data += count; 1012 else 1013 n = m_free(n); 1014 } while (len > 0 && n); 1015 if (len > 0) { 1016 (void) m_free(m); 1017 goto bad; 1018 } 1019 m->m_next = n; 1020 return (m); 1021 bad: 1022 m_freem(n); 1023 MSFail++; 1024 return (NULL); 1025 } 1026 1027 /* 1028 * Partition an mbuf chain in two pieces, returning the tail -- 1029 * all but the first len0 bytes. In case of failure, it returns NULL and 1030 * attempts to restore the chain to its original state. 1031 */ 1032 struct mbuf * 1033 m_split(struct mbuf *m0, int len0, int wait) 1034 { 1035 1036 return m_split0(m0, len0, wait, 1); 1037 } 1038 1039 static struct mbuf * 1040 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 1041 { 1042 struct mbuf *m, *n; 1043 unsigned len = len0, remain, len_save; 1044 1045 for (m = m0; m && len > m->m_len; m = m->m_next) 1046 len -= m->m_len; 1047 if (m == 0) 1048 return (NULL); 1049 remain = m->m_len - len; 1050 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1051 MGETHDR(n, wait, m0->m_type); 1052 if (n == 0) 1053 return (NULL); 1054 MCLAIM(n, m0->m_owner); 1055 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1056 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1057 len_save = m0->m_pkthdr.len; 1058 m0->m_pkthdr.len = len0; 1059 if (m->m_flags & M_EXT) 1060 goto extpacket; 1061 if (remain > MHLEN) { 1062 /* m can't be the lead packet */ 1063 MH_ALIGN(n, 0); 1064 n->m_len = 0; 1065 n->m_next = m_split(m, len, wait); 1066 if (n->m_next == 0) { 1067 (void) m_free(n); 1068 m0->m_pkthdr.len = len_save; 1069 return (NULL); 1070 } else 1071 return (n); 1072 } else 1073 MH_ALIGN(n, remain); 1074 } else if (remain == 0) { 1075 n = m->m_next; 1076 m->m_next = 0; 1077 return (n); 1078 } else { 1079 MGET(n, wait, m->m_type); 1080 if (n == 0) 1081 return (NULL); 1082 MCLAIM(n, m->m_owner); 1083 M_ALIGN(n, remain); 1084 } 1085 extpacket: 1086 if (m->m_flags & M_EXT) { 1087 n->m_data = m->m_data + len; 1088 MCLADDREFERENCE(m, n); 1089 } else { 1090 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1091 } 1092 n->m_len = remain; 1093 m->m_len = len; 1094 n->m_next = m->m_next; 1095 m->m_next = 0; 1096 return (n); 1097 } 1098 /* 1099 * Routine to copy from device local memory into mbufs. 1100 */ 1101 struct mbuf * 1102 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1103 void (*copy)(const void *from, void *to, size_t len)) 1104 { 1105 struct mbuf *m; 1106 struct mbuf *top = 0, **mp = ⊤ 1107 int off = off0, len; 1108 char *cp; 1109 char *epkt; 1110 1111 cp = buf; 1112 epkt = cp + totlen; 1113 if (off) { 1114 /* 1115 * If 'off' is non-zero, packet is trailer-encapsulated, 1116 * so we have to skip the type and length fields. 1117 */ 1118 cp += off + 2 * sizeof(uint16_t); 1119 totlen -= 2 * sizeof(uint16_t); 1120 } 1121 MGETHDR(m, M_DONTWAIT, MT_DATA); 1122 if (m == 0) 1123 return (NULL); 1124 m->m_pkthdr.rcvif = ifp; 1125 m->m_pkthdr.len = totlen; 1126 m->m_len = MHLEN; 1127 1128 while (totlen > 0) { 1129 if (top) { 1130 MGET(m, M_DONTWAIT, MT_DATA); 1131 if (m == 0) { 1132 m_freem(top); 1133 return (NULL); 1134 } 1135 m->m_len = MLEN; 1136 } 1137 len = min(totlen, epkt - cp); 1138 if (len >= MINCLSIZE) { 1139 MCLGET(m, M_DONTWAIT); 1140 if ((m->m_flags & M_EXT) == 0) { 1141 m_free(m); 1142 m_freem(top); 1143 return (NULL); 1144 } 1145 m->m_len = len = min(len, MCLBYTES); 1146 } else { 1147 /* 1148 * Place initial small packet/header at end of mbuf. 1149 */ 1150 if (len < m->m_len) { 1151 if (top == 0 && len + max_linkhdr <= m->m_len) 1152 m->m_data += max_linkhdr; 1153 m->m_len = len; 1154 } else 1155 len = m->m_len; 1156 } 1157 if (copy) 1158 copy(cp, mtod(m, void *), (size_t)len); 1159 else 1160 memcpy(mtod(m, void *), cp, (size_t)len); 1161 cp += len; 1162 *mp = m; 1163 mp = &m->m_next; 1164 totlen -= len; 1165 if (cp == epkt) 1166 cp = buf; 1167 } 1168 return (top); 1169 } 1170 1171 /* 1172 * Copy data from a buffer back into the indicated mbuf chain, 1173 * starting "off" bytes from the beginning, extending the mbuf 1174 * chain if necessary. 1175 */ 1176 void 1177 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1178 { 1179 #if defined(DEBUG) 1180 struct mbuf *origm = m0; 1181 int error; 1182 #endif /* defined(DEBUG) */ 1183 1184 if (m0 == NULL) 1185 return; 1186 1187 #if defined(DEBUG) 1188 error = 1189 #endif /* defined(DEBUG) */ 1190 m_copyback0(&m0, off, len, cp, 1191 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1192 1193 #if defined(DEBUG) 1194 if (error != 0 || (m0 != NULL && origm != m0)) 1195 panic("m_copyback"); 1196 #endif /* defined(DEBUG) */ 1197 } 1198 1199 struct mbuf * 1200 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1201 { 1202 int error; 1203 1204 /* don't support chain expansion */ 1205 KDASSERT(off + len <= m_length(m0)); 1206 1207 error = m_copyback0(&m0, off, len, cp, 1208 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1209 if (error) { 1210 /* 1211 * no way to recover from partial success. 1212 * just free the chain. 1213 */ 1214 m_freem(m0); 1215 return NULL; 1216 } 1217 return m0; 1218 } 1219 1220 /* 1221 * m_makewritable: ensure the specified range writable. 1222 */ 1223 int 1224 m_makewritable(struct mbuf **mp, int off, int len, int how) 1225 { 1226 int error; 1227 #if defined(DEBUG) 1228 struct mbuf *n; 1229 int origlen, reslen; 1230 1231 origlen = m_length(*mp); 1232 #endif /* defined(DEBUG) */ 1233 1234 #if 0 /* M_COPYALL is large enough */ 1235 if (len == M_COPYALL) 1236 len = m_length(*mp) - off; /* XXX */ 1237 #endif 1238 1239 error = m_copyback0(mp, off, len, NULL, 1240 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1241 1242 #if defined(DEBUG) 1243 reslen = 0; 1244 for (n = *mp; n; n = n->m_next) 1245 reslen += n->m_len; 1246 if (origlen != reslen) 1247 panic("m_makewritable: length changed"); 1248 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1249 panic("m_makewritable: inconsist"); 1250 #endif /* defined(DEBUG) */ 1251 1252 return error; 1253 } 1254 1255 int 1256 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1257 int how) 1258 { 1259 int mlen; 1260 struct mbuf *m, *n; 1261 struct mbuf **mp; 1262 int totlen = 0; 1263 const char *cp = vp; 1264 1265 KASSERT(mp0 != NULL); 1266 KASSERT(*mp0 != NULL); 1267 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1268 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1269 1270 /* 1271 * we don't bother to update "totlen" in the case of M_COPYBACK0_COW, 1272 * assuming that M_COPYBACK0_EXTEND and M_COPYBACK0_COW are exclusive. 1273 */ 1274 1275 KASSERT((~flags & (M_COPYBACK0_EXTEND|M_COPYBACK0_COW)) != 0); 1276 1277 mp = mp0; 1278 m = *mp; 1279 while (off > (mlen = m->m_len)) { 1280 off -= mlen; 1281 totlen += mlen; 1282 if (m->m_next == NULL) { 1283 int tspace; 1284 extend: 1285 if ((flags & M_COPYBACK0_EXTEND) == 0) 1286 goto out; 1287 1288 /* 1289 * try to make some space at the end of "m". 1290 */ 1291 1292 mlen = m->m_len; 1293 if (off + len >= MINCLSIZE && 1294 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1295 MCLGET(m, how); 1296 } 1297 tspace = M_TRAILINGSPACE(m); 1298 if (tspace > 0) { 1299 tspace = min(tspace, off + len); 1300 KASSERT(tspace > 0); 1301 memset(mtod(m, char *) + m->m_len, 0, 1302 min(off, tspace)); 1303 m->m_len += tspace; 1304 off += mlen; 1305 totlen -= mlen; 1306 continue; 1307 } 1308 1309 /* 1310 * need to allocate an mbuf. 1311 */ 1312 1313 if (off + len >= MINCLSIZE) { 1314 n = m_getcl(how, m->m_type, 0); 1315 } else { 1316 n = m_get(how, m->m_type); 1317 } 1318 if (n == NULL) { 1319 goto out; 1320 } 1321 n->m_len = 0; 1322 n->m_len = min(M_TRAILINGSPACE(n), off + len); 1323 memset(mtod(n, char *), 0, min(n->m_len, off)); 1324 m->m_next = n; 1325 } 1326 mp = &m->m_next; 1327 m = m->m_next; 1328 } 1329 while (len > 0) { 1330 mlen = m->m_len - off; 1331 if (mlen != 0 && M_READONLY(m)) { 1332 char *datap; 1333 int eatlen; 1334 1335 /* 1336 * this mbuf is read-only. 1337 * allocate a new writable mbuf and try again. 1338 */ 1339 1340 #if defined(DIAGNOSTIC) 1341 if ((flags & M_COPYBACK0_COW) == 0) 1342 panic("m_copyback0: read-only"); 1343 #endif /* defined(DIAGNOSTIC) */ 1344 1345 /* 1346 * if we're going to write into the middle of 1347 * a mbuf, split it first. 1348 */ 1349 if (off > 0 && len < mlen) { 1350 n = m_split0(m, off, how, 0); 1351 if (n == NULL) 1352 goto enobufs; 1353 m->m_next = n; 1354 mp = &m->m_next; 1355 m = n; 1356 off = 0; 1357 continue; 1358 } 1359 1360 /* 1361 * XXX TODO coalesce into the trailingspace of 1362 * the previous mbuf when possible. 1363 */ 1364 1365 /* 1366 * allocate a new mbuf. copy packet header if needed. 1367 */ 1368 MGET(n, how, m->m_type); 1369 if (n == NULL) 1370 goto enobufs; 1371 MCLAIM(n, m->m_owner); 1372 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1373 M_MOVE_PKTHDR(n, m); 1374 n->m_len = MHLEN; 1375 } else { 1376 if (len >= MINCLSIZE) 1377 MCLGET(n, M_DONTWAIT); 1378 n->m_len = 1379 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1380 } 1381 if (n->m_len > len) 1382 n->m_len = len; 1383 1384 /* 1385 * free the region which has been overwritten. 1386 * copying data from old mbufs if requested. 1387 */ 1388 if (flags & M_COPYBACK0_PRESERVE) 1389 datap = mtod(n, char *); 1390 else 1391 datap = NULL; 1392 eatlen = n->m_len; 1393 KDASSERT(off == 0 || eatlen >= mlen); 1394 if (off > 0) { 1395 KDASSERT(len >= mlen); 1396 m->m_len = off; 1397 m->m_next = n; 1398 if (datap) { 1399 m_copydata(m, off, mlen, datap); 1400 datap += mlen; 1401 } 1402 eatlen -= mlen; 1403 mp = &m->m_next; 1404 m = m->m_next; 1405 } 1406 while (m != NULL && M_READONLY(m) && 1407 n->m_type == m->m_type && eatlen > 0) { 1408 mlen = min(eatlen, m->m_len); 1409 if (datap) { 1410 m_copydata(m, 0, mlen, datap); 1411 datap += mlen; 1412 } 1413 m->m_data += mlen; 1414 m->m_len -= mlen; 1415 eatlen -= mlen; 1416 if (m->m_len == 0) 1417 *mp = m = m_free(m); 1418 } 1419 if (eatlen > 0) 1420 n->m_len -= eatlen; 1421 n->m_next = m; 1422 *mp = m = n; 1423 continue; 1424 } 1425 mlen = min(mlen, len); 1426 if (flags & M_COPYBACK0_COPYBACK) { 1427 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1428 cp += mlen; 1429 } 1430 len -= mlen; 1431 mlen += off; 1432 off = 0; 1433 totlen += mlen; 1434 if (len == 0) 1435 break; 1436 if (m->m_next == NULL) { 1437 goto extend; 1438 } 1439 mp = &m->m_next; 1440 m = m->m_next; 1441 } 1442 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1443 KASSERT((flags & M_COPYBACK0_EXTEND) != 0); 1444 m->m_pkthdr.len = totlen; 1445 } 1446 1447 return 0; 1448 1449 enobufs: 1450 return ENOBUFS; 1451 } 1452 1453 void 1454 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1455 { 1456 1457 KASSERT((to->m_flags & M_EXT) == 0); 1458 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL); 1459 KASSERT((from->m_flags & M_PKTHDR) != 0); 1460 1461 to->m_pkthdr = from->m_pkthdr; 1462 to->m_flags = from->m_flags & M_COPYFLAGS; 1463 to->m_data = to->m_pktdat; 1464 1465 from->m_flags &= ~M_PKTHDR; 1466 } 1467 1468 /* 1469 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1470 * beginning, continuing for "len" bytes. 1471 */ 1472 int 1473 m_apply(struct mbuf *m, int off, int len, 1474 int (*f)(void *, void *, unsigned int), void *arg) 1475 { 1476 unsigned int count; 1477 int rval; 1478 1479 KASSERT(len >= 0); 1480 KASSERT(off >= 0); 1481 1482 while (off > 0) { 1483 KASSERT(m != NULL); 1484 if (off < m->m_len) 1485 break; 1486 off -= m->m_len; 1487 m = m->m_next; 1488 } 1489 while (len > 0) { 1490 KASSERT(m != NULL); 1491 count = min(m->m_len - off, len); 1492 1493 rval = (*f)(arg, mtod(m, char *) + off, count); 1494 if (rval) 1495 return (rval); 1496 1497 len -= count; 1498 off = 0; 1499 m = m->m_next; 1500 } 1501 1502 return (0); 1503 } 1504 1505 /* 1506 * Return a pointer to mbuf/offset of location in mbuf chain. 1507 */ 1508 struct mbuf * 1509 m_getptr(struct mbuf *m, int loc, int *off) 1510 { 1511 1512 while (loc >= 0) { 1513 /* Normal end of search */ 1514 if (m->m_len > loc) { 1515 *off = loc; 1516 return (m); 1517 } else { 1518 loc -= m->m_len; 1519 1520 if (m->m_next == NULL) { 1521 if (loc == 0) { 1522 /* Point at the end of valid data */ 1523 *off = m->m_len; 1524 return (m); 1525 } else 1526 return (NULL); 1527 } else 1528 m = m->m_next; 1529 } 1530 } 1531 1532 return (NULL); 1533 } 1534 1535 /* 1536 * m_ext_free: release a reference to the mbuf external storage. 1537 * 1538 * => free the mbuf m itsself as well. 1539 */ 1540 1541 void 1542 m_ext_free(struct mbuf *m) 1543 { 1544 bool embedded = MEXT_ISEMBEDDED(m); 1545 bool dofree = true; 1546 u_int refcnt; 1547 1548 KASSERT((m->m_flags & M_EXT) != 0); 1549 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1550 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1551 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1552 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1553 1554 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1555 refcnt = m->m_ext.ext_refcnt = 0; 1556 } else { 1557 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1558 } 1559 if (refcnt > 0) { 1560 if (embedded) { 1561 /* 1562 * other mbuf's m_ext_ref still points to us. 1563 */ 1564 dofree = false; 1565 } else { 1566 m->m_ext_ref = m; 1567 } 1568 } else { 1569 /* 1570 * dropping the last reference 1571 */ 1572 if (!embedded) { 1573 m->m_ext.ext_refcnt++; /* XXX */ 1574 m_ext_free(m->m_ext_ref); 1575 m->m_ext_ref = m; 1576 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1577 pool_cache_put_paddr((struct pool_cache *) 1578 m->m_ext.ext_arg, 1579 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1580 } else if (m->m_ext.ext_free) { 1581 (*m->m_ext.ext_free)(m, 1582 m->m_ext.ext_buf, m->m_ext.ext_size, 1583 m->m_ext.ext_arg); 1584 /* 1585 * 'm' is already freed by the ext_free callback. 1586 */ 1587 dofree = false; 1588 } else { 1589 free(m->m_ext.ext_buf, m->m_ext.ext_type); 1590 } 1591 } 1592 if (dofree) { 1593 pool_cache_put(mb_cache, m); 1594 } 1595 } 1596 1597 #if defined(DDB) 1598 void 1599 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 1600 { 1601 char ch; 1602 bool opt_c = false; 1603 char buf[512]; 1604 1605 while ((ch = *(modif++)) != '\0') { 1606 switch (ch) { 1607 case 'c': 1608 opt_c = true; 1609 break; 1610 } 1611 } 1612 1613 nextchain: 1614 (*pr)("MBUF %p\n", m); 1615 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 1616 (*pr)(" data=%p, len=%d, type=%d, flags=0x%s\n", 1617 m->m_data, m->m_len, m->m_type, buf); 1618 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 1619 m->m_nextpkt); 1620 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 1621 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 1622 (int)M_READONLY(m)); 1623 if ((m->m_flags & M_PKTHDR) != 0) { 1624 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 1625 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=0x%s, csum_data=0x%" 1626 PRIx32 ", segsz=%u\n", 1627 m->m_pkthdr.len, m->m_pkthdr.rcvif, 1628 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 1629 } 1630 if ((m->m_flags & M_EXT)) { 1631 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 1632 "ext_free=%p, ext_arg=%p\n", 1633 m->m_ext.ext_refcnt, 1634 m->m_ext.ext_buf, m->m_ext.ext_size, 1635 m->m_ext.ext_free, m->m_ext.ext_arg); 1636 } 1637 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 1638 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 1639 vaddr_t eva = sva + m->m_ext.ext_size; 1640 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 1641 int i; 1642 1643 (*pr)(" pages:"); 1644 for (i = 0; i < n; i ++) { 1645 (*pr)(" %p", m->m_ext.ext_pgs[i]); 1646 } 1647 (*pr)("\n"); 1648 } 1649 1650 if (opt_c) { 1651 m = m->m_next; 1652 if (m != NULL) { 1653 goto nextchain; 1654 } 1655 } 1656 } 1657 #endif /* defined(DDB) */ 1658 1659 void 1660 mbstat_type_add(int type, int diff) 1661 { 1662 struct mbstat_cpu *mb; 1663 int s; 1664 1665 s = splvm(); 1666 mb = percpu_getref(mbstat_percpu); 1667 mb->m_mtypes[type] += diff; 1668 percpu_putref(mbstat_percpu); 1669 splx(s); 1670 } 1671 1672 #if defined(MBUFTRACE) 1673 void 1674 mowner_attach(struct mowner *mo) 1675 { 1676 1677 KASSERT(mo->mo_counters == NULL); 1678 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 1679 1680 /* XXX lock */ 1681 LIST_INSERT_HEAD(&mowners, mo, mo_link); 1682 } 1683 1684 void 1685 mowner_detach(struct mowner *mo) 1686 { 1687 1688 KASSERT(mo->mo_counters != NULL); 1689 1690 /* XXX lock */ 1691 LIST_REMOVE(mo, mo_link); 1692 1693 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 1694 mo->mo_counters = NULL; 1695 } 1696 1697 void 1698 mowner_init(struct mbuf *m, int type) 1699 { 1700 struct mowner_counter *mc; 1701 struct mowner *mo; 1702 int s; 1703 1704 m->m_owner = mo = &unknown_mowners[type]; 1705 s = splvm(); 1706 mc = percpu_getref(mo->mo_counters); 1707 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1708 percpu_putref(mo->mo_counters); 1709 splx(s); 1710 } 1711 1712 void 1713 mowner_ref(struct mbuf *m, int flags) 1714 { 1715 struct mowner *mo = m->m_owner; 1716 struct mowner_counter *mc; 1717 int s; 1718 1719 s = splvm(); 1720 mc = percpu_getref(mo->mo_counters); 1721 if ((flags & M_EXT) != 0) 1722 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1723 if ((flags & M_CLUSTER) != 0) 1724 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1725 percpu_putref(mo->mo_counters); 1726 splx(s); 1727 } 1728 1729 void 1730 mowner_revoke(struct mbuf *m, bool all, int flags) 1731 { 1732 struct mowner *mo = m->m_owner; 1733 struct mowner_counter *mc; 1734 int s; 1735 1736 s = splvm(); 1737 mc = percpu_getref(mo->mo_counters); 1738 if ((flags & M_EXT) != 0) 1739 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 1740 if ((flags & M_CLUSTER) != 0) 1741 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 1742 if (all) 1743 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 1744 percpu_putref(mo->mo_counters); 1745 splx(s); 1746 if (all) 1747 m->m_owner = &revoked_mowner; 1748 } 1749 1750 static void 1751 mowner_claim(struct mbuf *m, struct mowner *mo) 1752 { 1753 struct mowner_counter *mc; 1754 int flags = m->m_flags; 1755 int s; 1756 1757 s = splvm(); 1758 mc = percpu_getref(mo->mo_counters); 1759 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1760 if ((flags & M_EXT) != 0) 1761 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1762 if ((flags & M_CLUSTER) != 0) 1763 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1764 percpu_putref(mo->mo_counters); 1765 splx(s); 1766 m->m_owner = mo; 1767 } 1768 1769 void 1770 m_claim(struct mbuf *m, struct mowner *mo) 1771 { 1772 1773 if (m->m_owner == mo || mo == NULL) 1774 return; 1775 1776 mowner_revoke(m, true, m->m_flags); 1777 mowner_claim(m, mo); 1778 } 1779 #endif /* defined(MBUFTRACE) */ 1780