1 /* $NetBSD: uipc_mbuf.c,v 1.130 2008/12/16 22:35:37 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.130 2008/12/16 22:35:37 christos Exp $"); 66 67 #include "opt_mbuftrace.h" 68 #include "opt_ddb.h" 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/atomic.h> 73 #include <sys/cpu.h> 74 #include <sys/proc.h> 75 #include <sys/malloc.h> 76 #define MBTYPES 77 #include <sys/mbuf.h> 78 #include <sys/kernel.h> 79 #include <sys/syslog.h> 80 #include <sys/domain.h> 81 #include <sys/protosw.h> 82 #include <sys/percpu.h> 83 #include <sys/pool.h> 84 #include <sys/socket.h> 85 #include <sys/sysctl.h> 86 87 #include <net/if.h> 88 89 #include <uvm/uvm.h> 90 91 pool_cache_t mb_cache; /* mbuf cache */ 92 pool_cache_t mcl_cache; /* mbuf cluster cache */ 93 94 struct mbstat mbstat; 95 int max_linkhdr; 96 int max_protohdr; 97 int max_hdr; 98 int max_datalen; 99 100 static int mb_ctor(void *, void *, int); 101 102 static void *mclpool_alloc(struct pool *, int); 103 static void mclpool_release(struct pool *, void *); 104 105 static void sysctl_kern_mbuf_setup(void); 106 107 static struct sysctllog *mbuf_sysctllog; 108 109 static struct pool_allocator mclpool_allocator = { 110 .pa_alloc = mclpool_alloc, 111 .pa_free = mclpool_release, 112 }; 113 114 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 115 static struct mbuf *m_split0(struct mbuf *, int, int, int); 116 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 117 118 /* flags for m_copyback0 */ 119 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 120 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 121 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 122 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 123 124 static const char mclpool_warnmsg[] = 125 "WARNING: mclpool limit reached; increase NMBCLUSTERS"; 126 127 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 128 129 static percpu_t *mbstat_percpu; 130 131 #ifdef MBUFTRACE 132 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 133 struct mowner unknown_mowners[] = { 134 MOWNER_INIT("unknown", "free"), 135 MOWNER_INIT("unknown", "data"), 136 MOWNER_INIT("unknown", "header"), 137 MOWNER_INIT("unknown", "soname"), 138 MOWNER_INIT("unknown", "soopts"), 139 MOWNER_INIT("unknown", "ftable"), 140 MOWNER_INIT("unknown", "control"), 141 MOWNER_INIT("unknown", "oobdata"), 142 }; 143 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 144 #endif 145 146 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 147 148 #define MCLADDREFERENCE(o, n) \ 149 do { \ 150 KASSERT(((o)->m_flags & M_EXT) != 0); \ 151 KASSERT(((n)->m_flags & M_EXT) == 0); \ 152 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 153 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 154 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 155 (n)->m_ext_ref = (o)->m_ext_ref; \ 156 mowner_ref((n), (n)->m_flags); \ 157 MCLREFDEBUGN((n), __FILE__, __LINE__); \ 158 } while (/* CONSTCOND */ 0) 159 160 /* 161 * Initialize the mbuf allocator. 162 */ 163 void 164 mbinit(void) 165 { 166 167 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 168 CTASSERT(sizeof(struct mbuf) == MSIZE); 169 170 sysctl_kern_mbuf_setup(); 171 172 mclpool_allocator.pa_backingmap = mb_map; 173 174 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 175 NULL, IPL_VM, mb_ctor, NULL, NULL); 176 KASSERT(mb_cache != NULL); 177 178 mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", 179 &mclpool_allocator, IPL_VM, NULL, NULL, NULL); 180 KASSERT(mcl_cache != NULL); 181 182 pool_cache_set_drain_hook(mb_cache, m_reclaim, NULL); 183 pool_cache_set_drain_hook(mcl_cache, m_reclaim, NULL); 184 185 /* 186 * Set the hard limit on the mclpool to the number of 187 * mbuf clusters the kernel is to support. Log the limit 188 * reached message max once a minute. 189 */ 190 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 191 192 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 193 194 /* 195 * Set a low water mark for both mbufs and clusters. This should 196 * help ensure that they can be allocated in a memory starvation 197 * situation. This is important for e.g. diskless systems which 198 * must allocate mbufs in order for the pagedaemon to clean pages. 199 */ 200 pool_cache_setlowat(mb_cache, mblowat); 201 pool_cache_setlowat(mcl_cache, mcllowat); 202 203 #ifdef MBUFTRACE 204 { 205 /* 206 * Attach the unknown mowners. 207 */ 208 int i; 209 MOWNER_ATTACH(&revoked_mowner); 210 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 211 i-- > 0; ) 212 MOWNER_ATTACH(&unknown_mowners[i]); 213 } 214 #endif 215 } 216 217 /* 218 * sysctl helper routine for the kern.mbuf subtree. nmbclusters may 219 * or may not be writable, and mblowat and mcllowat need range 220 * checking and pool tweaking after being reset. 221 */ 222 static int 223 sysctl_kern_mbuf(SYSCTLFN_ARGS) 224 { 225 int error, newval; 226 struct sysctlnode node; 227 228 node = *rnode; 229 node.sysctl_data = &newval; 230 switch (rnode->sysctl_num) { 231 case MBUF_NMBCLUSTERS: 232 if (mb_map != NULL) { 233 node.sysctl_flags &= ~CTLFLAG_READWRITE; 234 node.sysctl_flags |= CTLFLAG_READONLY; 235 } 236 /* FALLTHROUGH */ 237 case MBUF_MBLOWAT: 238 case MBUF_MCLLOWAT: 239 newval = *(int*)rnode->sysctl_data; 240 break; 241 default: 242 return (EOPNOTSUPP); 243 } 244 245 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 246 if (error || newp == NULL) 247 return (error); 248 if (newval < 0) 249 return (EINVAL); 250 251 switch (node.sysctl_num) { 252 case MBUF_NMBCLUSTERS: 253 if (newval < nmbclusters) 254 return (EINVAL); 255 nmbclusters = newval; 256 pool_cache_sethardlimit(mcl_cache, nmbclusters, 257 mclpool_warnmsg, 60); 258 break; 259 case MBUF_MBLOWAT: 260 mblowat = newval; 261 pool_cache_setlowat(mb_cache, mblowat); 262 break; 263 case MBUF_MCLLOWAT: 264 mcllowat = newval; 265 pool_cache_setlowat(mcl_cache, mcllowat); 266 break; 267 } 268 269 return (0); 270 } 271 272 #ifdef MBUFTRACE 273 static void 274 mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 275 { 276 struct mowner_counter *mc = v1; 277 struct mowner_user *mo_user = v2; 278 int i; 279 280 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 281 mo_user->mo_counter[i] += mc->mc_counter[i]; 282 } 283 } 284 285 static void 286 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 287 { 288 289 memset(mo_user, 0, sizeof(*mo_user)); 290 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 291 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 292 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 293 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 294 percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); 295 } 296 297 static int 298 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 299 { 300 struct mowner *mo; 301 size_t len = 0; 302 int error = 0; 303 304 if (namelen != 0) 305 return (EINVAL); 306 if (newp != NULL) 307 return (EPERM); 308 309 LIST_FOREACH(mo, &mowners, mo_link) { 310 struct mowner_user mo_user; 311 312 mowner_convert_to_user(mo, &mo_user); 313 314 if (oldp != NULL) { 315 if (*oldlenp - len < sizeof(mo_user)) { 316 error = ENOMEM; 317 break; 318 } 319 error = copyout(&mo_user, (char *)oldp + len, 320 sizeof(mo_user)); 321 if (error) 322 break; 323 } 324 len += sizeof(mo_user); 325 } 326 327 if (error == 0) 328 *oldlenp = len; 329 330 return (error); 331 } 332 #endif /* MBUFTRACE */ 333 334 static void 335 mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 336 { 337 struct mbstat_cpu *mbsc = v1; 338 struct mbstat *mbs = v2; 339 int i; 340 341 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 342 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 343 } 344 } 345 346 static void 347 mbstat_convert_to_user(struct mbstat *mbs) 348 { 349 350 memset(mbs, 0, sizeof(*mbs)); 351 mbs->m_drain = mbstat.m_drain; 352 percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); 353 } 354 355 static int 356 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 357 { 358 struct sysctlnode node; 359 struct mbstat mbs; 360 361 mbstat_convert_to_user(&mbs); 362 node = *rnode; 363 node.sysctl_data = &mbs; 364 node.sysctl_size = sizeof(mbs); 365 return sysctl_lookup(SYSCTLFN_CALL(&node)); 366 } 367 368 static void 369 sysctl_kern_mbuf_setup() 370 { 371 372 KASSERT(mbuf_sysctllog == NULL); 373 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 374 CTLFLAG_PERMANENT, 375 CTLTYPE_NODE, "kern", NULL, 376 NULL, 0, NULL, 0, 377 CTL_KERN, CTL_EOL); 378 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 379 CTLFLAG_PERMANENT, 380 CTLTYPE_NODE, "mbuf", 381 SYSCTL_DESCR("mbuf control variables"), 382 NULL, 0, NULL, 0, 383 CTL_KERN, KERN_MBUF, CTL_EOL); 384 385 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 386 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 387 CTLTYPE_INT, "msize", 388 SYSCTL_DESCR("mbuf base size"), 389 NULL, msize, NULL, 0, 390 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 391 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 392 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 393 CTLTYPE_INT, "mclbytes", 394 SYSCTL_DESCR("mbuf cluster size"), 395 NULL, mclbytes, NULL, 0, 396 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 397 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 398 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 399 CTLTYPE_INT, "nmbclusters", 400 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 401 sysctl_kern_mbuf, 0, &nmbclusters, 0, 402 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 403 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 404 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 405 CTLTYPE_INT, "mblowat", 406 SYSCTL_DESCR("mbuf low water mark"), 407 sysctl_kern_mbuf, 0, &mblowat, 0, 408 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 409 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 410 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 411 CTLTYPE_INT, "mcllowat", 412 SYSCTL_DESCR("mbuf cluster low water mark"), 413 sysctl_kern_mbuf, 0, &mcllowat, 0, 414 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 415 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 416 CTLFLAG_PERMANENT, 417 CTLTYPE_STRUCT, "stats", 418 SYSCTL_DESCR("mbuf allocation statistics"), 419 sysctl_kern_mbuf_stats, 0, NULL, 0, 420 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 421 #ifdef MBUFTRACE 422 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 423 CTLFLAG_PERMANENT, 424 CTLTYPE_STRUCT, "mowners", 425 SYSCTL_DESCR("Information about mbuf owners"), 426 sysctl_kern_mbuf_mowners, 0, NULL, 0, 427 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 428 #endif /* MBUFTRACE */ 429 } 430 431 static void * 432 mclpool_alloc(struct pool *pp, int flags) 433 { 434 bool waitok = (flags & PR_WAITOK) ? true : false; 435 436 return ((void *)uvm_km_alloc_poolpage(mb_map, waitok)); 437 } 438 439 static void 440 mclpool_release(struct pool *pp, void *v) 441 { 442 443 uvm_km_free_poolpage(mb_map, (vaddr_t)v); 444 } 445 446 /*ARGSUSED*/ 447 static int 448 mb_ctor(void *arg, void *object, int flags) 449 { 450 struct mbuf *m = object; 451 452 #ifdef POOL_VTOPHYS 453 m->m_paddr = POOL_VTOPHYS(m); 454 #else 455 m->m_paddr = M_PADDR_INVALID; 456 #endif 457 return (0); 458 } 459 460 void 461 m_reclaim(void *arg, int flags) 462 { 463 struct domain *dp; 464 const struct protosw *pr; 465 struct ifnet *ifp; 466 int s; 467 468 KERNEL_LOCK(1, NULL); 469 s = splvm(); 470 DOMAIN_FOREACH(dp) { 471 for (pr = dp->dom_protosw; 472 pr < dp->dom_protoswNPROTOSW; pr++) 473 if (pr->pr_drain) 474 (*pr->pr_drain)(); 475 } 476 IFNET_FOREACH(ifp) { 477 if (ifp->if_drain) 478 (*ifp->if_drain)(ifp); 479 } 480 splx(s); 481 mbstat.m_drain++; 482 KERNEL_UNLOCK_ONE(NULL); 483 } 484 485 /* 486 * Space allocation routines. 487 * These are also available as macros 488 * for critical paths. 489 */ 490 struct mbuf * 491 m_get(int nowait, int type) 492 { 493 struct mbuf *m; 494 495 m = pool_cache_get(mb_cache, 496 nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); 497 if (m == NULL) 498 return NULL; 499 500 mbstat_type_add(type, 1); 501 mowner_init(m, type); 502 m->m_ext_ref = m; 503 m->m_type = type; 504 m->m_next = NULL; 505 m->m_nextpkt = NULL; 506 m->m_data = m->m_dat; 507 m->m_flags = 0; 508 509 return m; 510 } 511 512 struct mbuf * 513 m_gethdr(int nowait, int type) 514 { 515 struct mbuf *m; 516 517 m = m_get(nowait, type); 518 if (m == NULL) 519 return NULL; 520 521 m->m_data = m->m_pktdat; 522 m->m_flags = M_PKTHDR; 523 m->m_pkthdr.rcvif = NULL; 524 m->m_pkthdr.csum_flags = 0; 525 m->m_pkthdr.csum_data = 0; 526 SLIST_INIT(&m->m_pkthdr.tags); 527 528 return m; 529 } 530 531 struct mbuf * 532 m_getclr(int nowait, int type) 533 { 534 struct mbuf *m; 535 536 MGET(m, nowait, type); 537 if (m == 0) 538 return (NULL); 539 memset(mtod(m, void *), 0, MLEN); 540 return (m); 541 } 542 543 void 544 m_clget(struct mbuf *m, int nowait) 545 { 546 547 MCLGET(m, nowait); 548 } 549 550 struct mbuf * 551 m_free(struct mbuf *m) 552 { 553 struct mbuf *n; 554 555 MFREE(m, n); 556 return (n); 557 } 558 559 void 560 m_freem(struct mbuf *m) 561 { 562 struct mbuf *n; 563 564 if (m == NULL) 565 return; 566 do { 567 MFREE(m, n); 568 m = n; 569 } while (m); 570 } 571 572 #ifdef MBUFTRACE 573 /* 574 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 575 */ 576 void 577 m_claimm(struct mbuf *m, struct mowner *mo) 578 { 579 580 for (; m != NULL; m = m->m_next) 581 MCLAIM(m, mo); 582 } 583 #endif 584 585 /* 586 * Mbuffer utility routines. 587 */ 588 589 /* 590 * Lesser-used path for M_PREPEND: 591 * allocate new mbuf to prepend to chain, 592 * copy junk along. 593 */ 594 struct mbuf * 595 m_prepend(struct mbuf *m, int len, int how) 596 { 597 struct mbuf *mn; 598 599 MGET(mn, how, m->m_type); 600 if (mn == (struct mbuf *)NULL) { 601 m_freem(m); 602 return ((struct mbuf *)NULL); 603 } 604 if (m->m_flags & M_PKTHDR) { 605 M_MOVE_PKTHDR(mn, m); 606 } else { 607 MCLAIM(mn, m->m_owner); 608 } 609 mn->m_next = m; 610 m = mn; 611 if (len < MHLEN) 612 MH_ALIGN(m, len); 613 m->m_len = len; 614 return (m); 615 } 616 617 /* 618 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 619 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 620 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 621 */ 622 int MCFail; 623 624 struct mbuf * 625 m_copym(struct mbuf *m, int off0, int len, int wait) 626 { 627 628 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 629 } 630 631 struct mbuf * 632 m_dup(struct mbuf *m, int off0, int len, int wait) 633 { 634 635 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 636 } 637 638 static struct mbuf * 639 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 640 { 641 struct mbuf *n, **np; 642 int off = off0; 643 struct mbuf *top; 644 int copyhdr = 0; 645 646 if (off < 0 || len < 0) 647 panic("m_copym: off %d, len %d", off, len); 648 if (off == 0 && m->m_flags & M_PKTHDR) 649 copyhdr = 1; 650 while (off > 0) { 651 if (m == 0) 652 panic("m_copym: m == 0, off %d", off); 653 if (off < m->m_len) 654 break; 655 off -= m->m_len; 656 m = m->m_next; 657 } 658 np = ⊤ 659 top = 0; 660 while (len > 0) { 661 if (m == 0) { 662 if (len != M_COPYALL) 663 panic("m_copym: m == 0, len %d [!COPYALL]", 664 len); 665 break; 666 } 667 MGET(n, wait, m->m_type); 668 *np = n; 669 if (n == 0) 670 goto nospace; 671 MCLAIM(n, m->m_owner); 672 if (copyhdr) { 673 M_COPY_PKTHDR(n, m); 674 if (len == M_COPYALL) 675 n->m_pkthdr.len -= off0; 676 else 677 n->m_pkthdr.len = len; 678 copyhdr = 0; 679 } 680 n->m_len = min(len, m->m_len - off); 681 if (m->m_flags & M_EXT) { 682 if (!deep) { 683 n->m_data = m->m_data + off; 684 MCLADDREFERENCE(m, n); 685 } else { 686 /* 687 * we are unsure about the way m was allocated. 688 * copy into multiple MCLBYTES cluster mbufs. 689 */ 690 MCLGET(n, wait); 691 n->m_len = 0; 692 n->m_len = M_TRAILINGSPACE(n); 693 n->m_len = min(n->m_len, len); 694 n->m_len = min(n->m_len, m->m_len - off); 695 memcpy(mtod(n, void *), mtod(m, char *) + off, 696 (unsigned)n->m_len); 697 } 698 } else 699 memcpy(mtod(n, void *), mtod(m, char *) + off, 700 (unsigned)n->m_len); 701 if (len != M_COPYALL) 702 len -= n->m_len; 703 off += n->m_len; 704 #ifdef DIAGNOSTIC 705 if (off > m->m_len) 706 panic("m_copym0 overrun"); 707 #endif 708 if (off == m->m_len) { 709 m = m->m_next; 710 off = 0; 711 } 712 np = &n->m_next; 713 } 714 if (top == 0) 715 MCFail++; 716 return (top); 717 nospace: 718 m_freem(top); 719 MCFail++; 720 return (NULL); 721 } 722 723 /* 724 * Copy an entire packet, including header (which must be present). 725 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 726 */ 727 struct mbuf * 728 m_copypacket(struct mbuf *m, int how) 729 { 730 struct mbuf *top, *n, *o; 731 732 MGET(n, how, m->m_type); 733 top = n; 734 if (!n) 735 goto nospace; 736 737 MCLAIM(n, m->m_owner); 738 M_COPY_PKTHDR(n, m); 739 n->m_len = m->m_len; 740 if (m->m_flags & M_EXT) { 741 n->m_data = m->m_data; 742 MCLADDREFERENCE(m, n); 743 } else { 744 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 745 } 746 747 m = m->m_next; 748 while (m) { 749 MGET(o, how, m->m_type); 750 if (!o) 751 goto nospace; 752 753 MCLAIM(o, m->m_owner); 754 n->m_next = o; 755 n = n->m_next; 756 757 n->m_len = m->m_len; 758 if (m->m_flags & M_EXT) { 759 n->m_data = m->m_data; 760 MCLADDREFERENCE(m, n); 761 } else { 762 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 763 } 764 765 m = m->m_next; 766 } 767 return top; 768 nospace: 769 m_freem(top); 770 MCFail++; 771 return NULL; 772 } 773 774 /* 775 * Copy data from an mbuf chain starting "off" bytes from the beginning, 776 * continuing for "len" bytes, into the indicated buffer. 777 */ 778 void 779 m_copydata(struct mbuf *m, int off, int len, void *vp) 780 { 781 unsigned count; 782 void * cp = vp; 783 784 if (off < 0 || len < 0) 785 panic("m_copydata: off %d, len %d", off, len); 786 while (off > 0) { 787 if (m == NULL) 788 panic("m_copydata: m == NULL, off %d", off); 789 if (off < m->m_len) 790 break; 791 off -= m->m_len; 792 m = m->m_next; 793 } 794 while (len > 0) { 795 if (m == NULL) 796 panic("m_copydata: m == NULL, len %d", len); 797 count = min(m->m_len - off, len); 798 memcpy(cp, mtod(m, char *) + off, count); 799 len -= count; 800 cp = (char *)cp + count; 801 off = 0; 802 m = m->m_next; 803 } 804 } 805 806 /* 807 * Concatenate mbuf chain n to m. 808 * n might be copied into m (when n->m_len is small), therefore data portion of 809 * n could be copied into an mbuf of different mbuf type. 810 * Any m_pkthdr is not updated. 811 */ 812 void 813 m_cat(struct mbuf *m, struct mbuf *n) 814 { 815 816 while (m->m_next) 817 m = m->m_next; 818 while (n) { 819 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 820 /* just join the two chains */ 821 m->m_next = n; 822 return; 823 } 824 /* splat the data from one into the other */ 825 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 826 (u_int)n->m_len); 827 m->m_len += n->m_len; 828 n = m_free(n); 829 } 830 } 831 832 void 833 m_adj(struct mbuf *mp, int req_len) 834 { 835 int len = req_len; 836 struct mbuf *m; 837 int count; 838 839 if ((m = mp) == NULL) 840 return; 841 if (len >= 0) { 842 /* 843 * Trim from head. 844 */ 845 while (m != NULL && len > 0) { 846 if (m->m_len <= len) { 847 len -= m->m_len; 848 m->m_len = 0; 849 m = m->m_next; 850 } else { 851 m->m_len -= len; 852 m->m_data += len; 853 len = 0; 854 } 855 } 856 m = mp; 857 if (mp->m_flags & M_PKTHDR) 858 m->m_pkthdr.len -= (req_len - len); 859 } else { 860 /* 861 * Trim from tail. Scan the mbuf chain, 862 * calculating its length and finding the last mbuf. 863 * If the adjustment only affects this mbuf, then just 864 * adjust and return. Otherwise, rescan and truncate 865 * after the remaining size. 866 */ 867 len = -len; 868 count = 0; 869 for (;;) { 870 count += m->m_len; 871 if (m->m_next == (struct mbuf *)0) 872 break; 873 m = m->m_next; 874 } 875 if (m->m_len >= len) { 876 m->m_len -= len; 877 if (mp->m_flags & M_PKTHDR) 878 mp->m_pkthdr.len -= len; 879 return; 880 } 881 count -= len; 882 if (count < 0) 883 count = 0; 884 /* 885 * Correct length for chain is "count". 886 * Find the mbuf with last data, adjust its length, 887 * and toss data from remaining mbufs on chain. 888 */ 889 m = mp; 890 if (m->m_flags & M_PKTHDR) 891 m->m_pkthdr.len = count; 892 for (; m; m = m->m_next) { 893 if (m->m_len >= count) { 894 m->m_len = count; 895 break; 896 } 897 count -= m->m_len; 898 } 899 if (m) 900 while (m->m_next) 901 (m = m->m_next)->m_len = 0; 902 } 903 } 904 905 /* 906 * Rearrange an mbuf chain so that len bytes are contiguous 907 * and in the data area of an mbuf (so that mtod and dtom 908 * will work for a structure of size len). Returns the resulting 909 * mbuf chain on success, frees it and returns null on failure. 910 * If there is room, it will add up to max_protohdr-len extra bytes to the 911 * contiguous region in an attempt to avoid being called next time. 912 */ 913 int MPFail; 914 915 struct mbuf * 916 m_pullup(struct mbuf *n, int len) 917 { 918 struct mbuf *m; 919 int count; 920 int space; 921 922 /* 923 * If first mbuf has no cluster, and has room for len bytes 924 * without shifting current data, pullup into it, 925 * otherwise allocate a new mbuf to prepend to the chain. 926 */ 927 if ((n->m_flags & M_EXT) == 0 && 928 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 929 if (n->m_len >= len) 930 return (n); 931 m = n; 932 n = n->m_next; 933 len -= m->m_len; 934 } else { 935 if (len > MHLEN) 936 goto bad; 937 MGET(m, M_DONTWAIT, n->m_type); 938 if (m == 0) 939 goto bad; 940 MCLAIM(m, n->m_owner); 941 m->m_len = 0; 942 if (n->m_flags & M_PKTHDR) { 943 M_MOVE_PKTHDR(m, n); 944 } 945 } 946 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 947 do { 948 count = min(min(max(len, max_protohdr), space), n->m_len); 949 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 950 (unsigned)count); 951 len -= count; 952 m->m_len += count; 953 n->m_len -= count; 954 space -= count; 955 if (n->m_len) 956 n->m_data += count; 957 else 958 n = m_free(n); 959 } while (len > 0 && n); 960 if (len > 0) { 961 (void) m_free(m); 962 goto bad; 963 } 964 m->m_next = n; 965 return (m); 966 bad: 967 m_freem(n); 968 MPFail++; 969 return (NULL); 970 } 971 972 /* 973 * Like m_pullup(), except a new mbuf is always allocated, and we allow 974 * the amount of empty space before the data in the new mbuf to be specified 975 * (in the event that the caller expects to prepend later). 976 */ 977 int MSFail; 978 979 struct mbuf * 980 m_copyup(struct mbuf *n, int len, int dstoff) 981 { 982 struct mbuf *m; 983 int count, space; 984 985 if (len > (MHLEN - dstoff)) 986 goto bad; 987 MGET(m, M_DONTWAIT, n->m_type); 988 if (m == NULL) 989 goto bad; 990 MCLAIM(m, n->m_owner); 991 m->m_len = 0; 992 if (n->m_flags & M_PKTHDR) { 993 M_MOVE_PKTHDR(m, n); 994 } 995 m->m_data += dstoff; 996 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 997 do { 998 count = min(min(max(len, max_protohdr), space), n->m_len); 999 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1000 (unsigned)count); 1001 len -= count; 1002 m->m_len += count; 1003 n->m_len -= count; 1004 space -= count; 1005 if (n->m_len) 1006 n->m_data += count; 1007 else 1008 n = m_free(n); 1009 } while (len > 0 && n); 1010 if (len > 0) { 1011 (void) m_free(m); 1012 goto bad; 1013 } 1014 m->m_next = n; 1015 return (m); 1016 bad: 1017 m_freem(n); 1018 MSFail++; 1019 return (NULL); 1020 } 1021 1022 /* 1023 * Partition an mbuf chain in two pieces, returning the tail -- 1024 * all but the first len0 bytes. In case of failure, it returns NULL and 1025 * attempts to restore the chain to its original state. 1026 */ 1027 struct mbuf * 1028 m_split(struct mbuf *m0, int len0, int wait) 1029 { 1030 1031 return m_split0(m0, len0, wait, 1); 1032 } 1033 1034 static struct mbuf * 1035 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 1036 { 1037 struct mbuf *m, *n; 1038 unsigned len = len0, remain, len_save; 1039 1040 for (m = m0; m && len > m->m_len; m = m->m_next) 1041 len -= m->m_len; 1042 if (m == 0) 1043 return (NULL); 1044 remain = m->m_len - len; 1045 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1046 MGETHDR(n, wait, m0->m_type); 1047 if (n == 0) 1048 return (NULL); 1049 MCLAIM(n, m0->m_owner); 1050 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1051 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1052 len_save = m0->m_pkthdr.len; 1053 m0->m_pkthdr.len = len0; 1054 if (m->m_flags & M_EXT) 1055 goto extpacket; 1056 if (remain > MHLEN) { 1057 /* m can't be the lead packet */ 1058 MH_ALIGN(n, 0); 1059 n->m_next = m_split(m, len, wait); 1060 if (n->m_next == 0) { 1061 (void) m_free(n); 1062 m0->m_pkthdr.len = len_save; 1063 return (NULL); 1064 } else 1065 return (n); 1066 } else 1067 MH_ALIGN(n, remain); 1068 } else if (remain == 0) { 1069 n = m->m_next; 1070 m->m_next = 0; 1071 return (n); 1072 } else { 1073 MGET(n, wait, m->m_type); 1074 if (n == 0) 1075 return (NULL); 1076 MCLAIM(n, m->m_owner); 1077 M_ALIGN(n, remain); 1078 } 1079 extpacket: 1080 if (m->m_flags & M_EXT) { 1081 n->m_data = m->m_data + len; 1082 MCLADDREFERENCE(m, n); 1083 } else { 1084 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1085 } 1086 n->m_len = remain; 1087 m->m_len = len; 1088 n->m_next = m->m_next; 1089 m->m_next = 0; 1090 return (n); 1091 } 1092 /* 1093 * Routine to copy from device local memory into mbufs. 1094 */ 1095 struct mbuf * 1096 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1097 void (*copy)(const void *from, void *to, size_t len)) 1098 { 1099 struct mbuf *m; 1100 struct mbuf *top = 0, **mp = ⊤ 1101 int off = off0, len; 1102 char *cp; 1103 char *epkt; 1104 1105 cp = buf; 1106 epkt = cp + totlen; 1107 if (off) { 1108 /* 1109 * If 'off' is non-zero, packet is trailer-encapsulated, 1110 * so we have to skip the type and length fields. 1111 */ 1112 cp += off + 2 * sizeof(uint16_t); 1113 totlen -= 2 * sizeof(uint16_t); 1114 } 1115 MGETHDR(m, M_DONTWAIT, MT_DATA); 1116 if (m == 0) 1117 return (NULL); 1118 m->m_pkthdr.rcvif = ifp; 1119 m->m_pkthdr.len = totlen; 1120 m->m_len = MHLEN; 1121 1122 while (totlen > 0) { 1123 if (top) { 1124 MGET(m, M_DONTWAIT, MT_DATA); 1125 if (m == 0) { 1126 m_freem(top); 1127 return (NULL); 1128 } 1129 m->m_len = MLEN; 1130 } 1131 len = min(totlen, epkt - cp); 1132 if (len >= MINCLSIZE) { 1133 MCLGET(m, M_DONTWAIT); 1134 if ((m->m_flags & M_EXT) == 0) { 1135 m_free(m); 1136 m_freem(top); 1137 return (NULL); 1138 } 1139 m->m_len = len = min(len, MCLBYTES); 1140 } else { 1141 /* 1142 * Place initial small packet/header at end of mbuf. 1143 */ 1144 if (len < m->m_len) { 1145 if (top == 0 && len + max_linkhdr <= m->m_len) 1146 m->m_data += max_linkhdr; 1147 m->m_len = len; 1148 } else 1149 len = m->m_len; 1150 } 1151 if (copy) 1152 copy(cp, mtod(m, void *), (size_t)len); 1153 else 1154 memcpy(mtod(m, void *), cp, (size_t)len); 1155 cp += len; 1156 *mp = m; 1157 mp = &m->m_next; 1158 totlen -= len; 1159 if (cp == epkt) 1160 cp = buf; 1161 } 1162 return (top); 1163 } 1164 1165 /* 1166 * Copy data from a buffer back into the indicated mbuf chain, 1167 * starting "off" bytes from the beginning, extending the mbuf 1168 * chain if necessary. 1169 */ 1170 void 1171 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1172 { 1173 #if defined(DEBUG) 1174 struct mbuf *origm = m0; 1175 int error; 1176 #endif /* defined(DEBUG) */ 1177 1178 if (m0 == NULL) 1179 return; 1180 1181 #if defined(DEBUG) 1182 error = 1183 #endif /* defined(DEBUG) */ 1184 m_copyback0(&m0, off, len, cp, 1185 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1186 1187 #if defined(DEBUG) 1188 if (error != 0 || (m0 != NULL && origm != m0)) 1189 panic("m_copyback"); 1190 #endif /* defined(DEBUG) */ 1191 } 1192 1193 struct mbuf * 1194 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1195 { 1196 int error; 1197 1198 /* don't support chain expansion */ 1199 KDASSERT(off + len <= m_length(m0)); 1200 1201 error = m_copyback0(&m0, off, len, cp, 1202 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1203 if (error) { 1204 /* 1205 * no way to recover from partial success. 1206 * just free the chain. 1207 */ 1208 m_freem(m0); 1209 return NULL; 1210 } 1211 return m0; 1212 } 1213 1214 /* 1215 * m_makewritable: ensure the specified range writable. 1216 */ 1217 int 1218 m_makewritable(struct mbuf **mp, int off, int len, int how) 1219 { 1220 int error; 1221 #if defined(DEBUG) 1222 struct mbuf *n; 1223 int origlen, reslen; 1224 1225 origlen = m_length(*mp); 1226 #endif /* defined(DEBUG) */ 1227 1228 #if 0 /* M_COPYALL is large enough */ 1229 if (len == M_COPYALL) 1230 len = m_length(*mp) - off; /* XXX */ 1231 #endif 1232 1233 error = m_copyback0(mp, off, len, NULL, 1234 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1235 1236 #if defined(DEBUG) 1237 reslen = 0; 1238 for (n = *mp; n; n = n->m_next) 1239 reslen += n->m_len; 1240 if (origlen != reslen) 1241 panic("m_makewritable: length changed"); 1242 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1243 panic("m_makewritable: inconsist"); 1244 #endif /* defined(DEBUG) */ 1245 1246 return error; 1247 } 1248 1249 int 1250 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1251 int how) 1252 { 1253 int mlen; 1254 struct mbuf *m, *n; 1255 struct mbuf **mp; 1256 int totlen = 0; 1257 const char *cp = vp; 1258 1259 KASSERT(mp0 != NULL); 1260 KASSERT(*mp0 != NULL); 1261 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1262 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1263 1264 /* 1265 * we don't bother to update "totlen" in the case of M_COPYBACK0_COW, 1266 * assuming that M_COPYBACK0_EXTEND and M_COPYBACK0_COW are exclusive. 1267 */ 1268 1269 KASSERT((~flags & (M_COPYBACK0_EXTEND|M_COPYBACK0_COW)) != 0); 1270 1271 mp = mp0; 1272 m = *mp; 1273 while (off > (mlen = m->m_len)) { 1274 off -= mlen; 1275 totlen += mlen; 1276 if (m->m_next == NULL) { 1277 int tspace; 1278 extend: 1279 if ((flags & M_COPYBACK0_EXTEND) == 0) 1280 goto out; 1281 1282 /* 1283 * try to make some space at the end of "m". 1284 */ 1285 1286 mlen = m->m_len; 1287 if (off + len >= MINCLSIZE && 1288 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1289 MCLGET(m, how); 1290 } 1291 tspace = M_TRAILINGSPACE(m); 1292 if (tspace > 0) { 1293 tspace = min(tspace, off + len); 1294 KASSERT(tspace > 0); 1295 memset(mtod(m, char *) + m->m_len, 0, 1296 min(off, tspace)); 1297 m->m_len += tspace; 1298 off += mlen; 1299 totlen -= mlen; 1300 continue; 1301 } 1302 1303 /* 1304 * need to allocate an mbuf. 1305 */ 1306 1307 if (off + len >= MINCLSIZE) { 1308 n = m_getcl(how, m->m_type, 0); 1309 } else { 1310 n = m_get(how, m->m_type); 1311 } 1312 if (n == NULL) { 1313 goto out; 1314 } 1315 n->m_len = 0; 1316 n->m_len = min(M_TRAILINGSPACE(n), off + len); 1317 memset(mtod(n, char *), 0, min(n->m_len, off)); 1318 m->m_next = n; 1319 } 1320 mp = &m->m_next; 1321 m = m->m_next; 1322 } 1323 while (len > 0) { 1324 mlen = m->m_len - off; 1325 if (mlen != 0 && M_READONLY(m)) { 1326 char *datap; 1327 int eatlen; 1328 1329 /* 1330 * this mbuf is read-only. 1331 * allocate a new writable mbuf and try again. 1332 */ 1333 1334 #if defined(DIAGNOSTIC) 1335 if ((flags & M_COPYBACK0_COW) == 0) 1336 panic("m_copyback0: read-only"); 1337 #endif /* defined(DIAGNOSTIC) */ 1338 1339 /* 1340 * if we're going to write into the middle of 1341 * a mbuf, split it first. 1342 */ 1343 if (off > 0 && len < mlen) { 1344 n = m_split0(m, off, how, 0); 1345 if (n == NULL) 1346 goto enobufs; 1347 m->m_next = n; 1348 mp = &m->m_next; 1349 m = n; 1350 off = 0; 1351 continue; 1352 } 1353 1354 /* 1355 * XXX TODO coalesce into the trailingspace of 1356 * the previous mbuf when possible. 1357 */ 1358 1359 /* 1360 * allocate a new mbuf. copy packet header if needed. 1361 */ 1362 MGET(n, how, m->m_type); 1363 if (n == NULL) 1364 goto enobufs; 1365 MCLAIM(n, m->m_owner); 1366 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1367 M_MOVE_PKTHDR(n, m); 1368 n->m_len = MHLEN; 1369 } else { 1370 if (len >= MINCLSIZE) 1371 MCLGET(n, M_DONTWAIT); 1372 n->m_len = 1373 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1374 } 1375 if (n->m_len > len) 1376 n->m_len = len; 1377 1378 /* 1379 * free the region which has been overwritten. 1380 * copying data from old mbufs if requested. 1381 */ 1382 if (flags & M_COPYBACK0_PRESERVE) 1383 datap = mtod(n, char *); 1384 else 1385 datap = NULL; 1386 eatlen = n->m_len; 1387 KDASSERT(off == 0 || eatlen >= mlen); 1388 if (off > 0) { 1389 KDASSERT(len >= mlen); 1390 m->m_len = off; 1391 m->m_next = n; 1392 if (datap) { 1393 m_copydata(m, off, mlen, datap); 1394 datap += mlen; 1395 } 1396 eatlen -= mlen; 1397 mp = &m->m_next; 1398 m = m->m_next; 1399 } 1400 while (m != NULL && M_READONLY(m) && 1401 n->m_type == m->m_type && eatlen > 0) { 1402 mlen = min(eatlen, m->m_len); 1403 if (datap) { 1404 m_copydata(m, 0, mlen, datap); 1405 datap += mlen; 1406 } 1407 m->m_data += mlen; 1408 m->m_len -= mlen; 1409 eatlen -= mlen; 1410 if (m->m_len == 0) 1411 *mp = m = m_free(m); 1412 } 1413 if (eatlen > 0) 1414 n->m_len -= eatlen; 1415 n->m_next = m; 1416 *mp = m = n; 1417 continue; 1418 } 1419 mlen = min(mlen, len); 1420 if (flags & M_COPYBACK0_COPYBACK) { 1421 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1422 cp += mlen; 1423 } 1424 len -= mlen; 1425 mlen += off; 1426 off = 0; 1427 totlen += mlen; 1428 if (len == 0) 1429 break; 1430 if (m->m_next == NULL) { 1431 goto extend; 1432 } 1433 mp = &m->m_next; 1434 m = m->m_next; 1435 } 1436 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1437 KASSERT((flags & M_COPYBACK0_EXTEND) != 0); 1438 m->m_pkthdr.len = totlen; 1439 } 1440 1441 return 0; 1442 1443 enobufs: 1444 return ENOBUFS; 1445 } 1446 1447 void 1448 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1449 { 1450 1451 KASSERT((to->m_flags & M_EXT) == 0); 1452 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL); 1453 KASSERT((from->m_flags & M_PKTHDR) != 0); 1454 1455 to->m_pkthdr = from->m_pkthdr; 1456 to->m_flags = from->m_flags & M_COPYFLAGS; 1457 to->m_data = to->m_pktdat; 1458 1459 from->m_flags &= ~M_PKTHDR; 1460 } 1461 1462 /* 1463 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1464 * beginning, continuing for "len" bytes. 1465 */ 1466 int 1467 m_apply(struct mbuf *m, int off, int len, 1468 int (*f)(void *, void *, unsigned int), void *arg) 1469 { 1470 unsigned int count; 1471 int rval; 1472 1473 KASSERT(len >= 0); 1474 KASSERT(off >= 0); 1475 1476 while (off > 0) { 1477 KASSERT(m != NULL); 1478 if (off < m->m_len) 1479 break; 1480 off -= m->m_len; 1481 m = m->m_next; 1482 } 1483 while (len > 0) { 1484 KASSERT(m != NULL); 1485 count = min(m->m_len - off, len); 1486 1487 rval = (*f)(arg, mtod(m, char *) + off, count); 1488 if (rval) 1489 return (rval); 1490 1491 len -= count; 1492 off = 0; 1493 m = m->m_next; 1494 } 1495 1496 return (0); 1497 } 1498 1499 /* 1500 * Return a pointer to mbuf/offset of location in mbuf chain. 1501 */ 1502 struct mbuf * 1503 m_getptr(struct mbuf *m, int loc, int *off) 1504 { 1505 1506 while (loc >= 0) { 1507 /* Normal end of search */ 1508 if (m->m_len > loc) { 1509 *off = loc; 1510 return (m); 1511 } else { 1512 loc -= m->m_len; 1513 1514 if (m->m_next == NULL) { 1515 if (loc == 0) { 1516 /* Point at the end of valid data */ 1517 *off = m->m_len; 1518 return (m); 1519 } else 1520 return (NULL); 1521 } else 1522 m = m->m_next; 1523 } 1524 } 1525 1526 return (NULL); 1527 } 1528 1529 /* 1530 * m_ext_free: release a reference to the mbuf external storage. 1531 * 1532 * => free the mbuf m itsself as well. 1533 */ 1534 1535 void 1536 m_ext_free(struct mbuf *m) 1537 { 1538 bool embedded = MEXT_ISEMBEDDED(m); 1539 bool dofree = true; 1540 u_int refcnt; 1541 1542 KASSERT((m->m_flags & M_EXT) != 0); 1543 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1544 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1545 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1546 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1547 1548 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1549 refcnt = m->m_ext.ext_refcnt = 0; 1550 } else { 1551 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1552 } 1553 if (refcnt > 0) { 1554 if (embedded) { 1555 /* 1556 * other mbuf's m_ext_ref still points to us. 1557 */ 1558 dofree = false; 1559 } else { 1560 m->m_ext_ref = m; 1561 } 1562 } else { 1563 /* 1564 * dropping the last reference 1565 */ 1566 if (!embedded) { 1567 m->m_ext.ext_refcnt++; /* XXX */ 1568 m_ext_free(m->m_ext_ref); 1569 m->m_ext_ref = m; 1570 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1571 pool_cache_put_paddr((struct pool_cache *) 1572 m->m_ext.ext_arg, 1573 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1574 } else if (m->m_ext.ext_free) { 1575 (*m->m_ext.ext_free)(m, 1576 m->m_ext.ext_buf, m->m_ext.ext_size, 1577 m->m_ext.ext_arg); 1578 /* 1579 * 'm' is already freed by the ext_free callback. 1580 */ 1581 dofree = false; 1582 } else { 1583 free(m->m_ext.ext_buf, m->m_ext.ext_type); 1584 } 1585 } 1586 if (dofree) { 1587 pool_cache_put(mb_cache, m); 1588 } 1589 } 1590 1591 #if defined(DDB) 1592 void 1593 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 1594 { 1595 char ch; 1596 bool opt_c = false; 1597 char buf[512]; 1598 1599 while ((ch = *(modif++)) != '\0') { 1600 switch (ch) { 1601 case 'c': 1602 opt_c = true; 1603 break; 1604 } 1605 } 1606 1607 nextchain: 1608 (*pr)("MBUF %p\n", m); 1609 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 1610 (*pr)(" data=%p, len=%d, type=%d, flags=0x%s\n", 1611 m->m_data, m->m_len, m->m_type, buf); 1612 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 1613 m->m_nextpkt); 1614 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 1615 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 1616 (int)M_READONLY(m)); 1617 if ((m->m_flags & M_PKTHDR) != 0) { 1618 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 1619 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=0x%s, csum_data=0x%" 1620 PRIx32 ", segsz=%u\n", 1621 m->m_pkthdr.len, m->m_pkthdr.rcvif, 1622 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 1623 } 1624 if ((m->m_flags & M_EXT)) { 1625 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 1626 "ext_free=%p, ext_arg=%p\n", 1627 m->m_ext.ext_refcnt, 1628 m->m_ext.ext_buf, m->m_ext.ext_size, 1629 m->m_ext.ext_free, m->m_ext.ext_arg); 1630 } 1631 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 1632 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 1633 vaddr_t eva = sva + m->m_ext.ext_size; 1634 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 1635 int i; 1636 1637 (*pr)(" pages:"); 1638 for (i = 0; i < n; i ++) { 1639 (*pr)(" %p", m->m_ext.ext_pgs[i]); 1640 } 1641 (*pr)("\n"); 1642 } 1643 1644 if (opt_c) { 1645 m = m->m_next; 1646 if (m != NULL) { 1647 goto nextchain; 1648 } 1649 } 1650 } 1651 #endif /* defined(DDB) */ 1652 1653 void 1654 mbstat_type_add(int type, int diff) 1655 { 1656 struct mbstat_cpu *mb; 1657 int s; 1658 1659 s = splvm(); 1660 mb = percpu_getref(mbstat_percpu); 1661 mb->m_mtypes[type] += diff; 1662 percpu_putref(mbstat_percpu); 1663 splx(s); 1664 } 1665 1666 #if defined(MBUFTRACE) 1667 void 1668 mowner_attach(struct mowner *mo) 1669 { 1670 1671 KASSERT(mo->mo_counters == NULL); 1672 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 1673 1674 /* XXX lock */ 1675 LIST_INSERT_HEAD(&mowners, mo, mo_link); 1676 } 1677 1678 void 1679 mowner_detach(struct mowner *mo) 1680 { 1681 1682 KASSERT(mo->mo_counters != NULL); 1683 1684 /* XXX lock */ 1685 LIST_REMOVE(mo, mo_link); 1686 1687 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 1688 mo->mo_counters = NULL; 1689 } 1690 1691 void 1692 mowner_init(struct mbuf *m, int type) 1693 { 1694 struct mowner_counter *mc; 1695 struct mowner *mo; 1696 int s; 1697 1698 m->m_owner = mo = &unknown_mowners[type]; 1699 s = splvm(); 1700 mc = percpu_getref(mo->mo_counters); 1701 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1702 percpu_putref(mo->mo_counters); 1703 splx(s); 1704 } 1705 1706 void 1707 mowner_ref(struct mbuf *m, int flags) 1708 { 1709 struct mowner *mo = m->m_owner; 1710 struct mowner_counter *mc; 1711 int s; 1712 1713 s = splvm(); 1714 mc = percpu_getref(mo->mo_counters); 1715 if ((flags & M_EXT) != 0) 1716 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1717 if ((flags & M_CLUSTER) != 0) 1718 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1719 percpu_putref(mo->mo_counters); 1720 splx(s); 1721 } 1722 1723 void 1724 mowner_revoke(struct mbuf *m, bool all, int flags) 1725 { 1726 struct mowner *mo = m->m_owner; 1727 struct mowner_counter *mc; 1728 int s; 1729 1730 s = splvm(); 1731 mc = percpu_getref(mo->mo_counters); 1732 if ((flags & M_EXT) != 0) 1733 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 1734 if ((flags & M_CLUSTER) != 0) 1735 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 1736 if (all) 1737 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 1738 percpu_putref(mo->mo_counters); 1739 splx(s); 1740 if (all) 1741 m->m_owner = &revoked_mowner; 1742 } 1743 1744 static void 1745 mowner_claim(struct mbuf *m, struct mowner *mo) 1746 { 1747 struct mowner_counter *mc; 1748 int flags = m->m_flags; 1749 int s; 1750 1751 s = splvm(); 1752 mc = percpu_getref(mo->mo_counters); 1753 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1754 if ((flags & M_EXT) != 0) 1755 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1756 if ((flags & M_CLUSTER) != 0) 1757 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1758 percpu_putref(mo->mo_counters); 1759 splx(s); 1760 m->m_owner = mo; 1761 } 1762 1763 void 1764 m_claim(struct mbuf *m, struct mowner *mo) 1765 { 1766 1767 if (m->m_owner == mo || mo == NULL) 1768 return; 1769 1770 mowner_revoke(m, true, m->m_flags); 1771 mowner_claim(m, mo); 1772 } 1773 #endif /* defined(MBUFTRACE) */ 1774