1 /* $NetBSD: uipc_mbuf.c,v 1.145 2012/02/10 17:35:47 para Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.145 2012/02/10 17:35:47 para Exp $"); 66 67 #include "opt_mbuftrace.h" 68 #include "opt_nmbclusters.h" 69 #include "opt_ddb.h" 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/atomic.h> 74 #include <sys/cpu.h> 75 #include <sys/proc.h> 76 #define MBTYPES 77 #include <sys/mbuf.h> 78 #include <sys/kernel.h> 79 #include <sys/syslog.h> 80 #include <sys/domain.h> 81 #include <sys/protosw.h> 82 #include <sys/percpu.h> 83 #include <sys/pool.h> 84 #include <sys/socket.h> 85 #include <sys/sysctl.h> 86 87 #include <net/if.h> 88 89 pool_cache_t mb_cache; /* mbuf cache */ 90 pool_cache_t mcl_cache; /* mbuf cluster cache */ 91 92 struct mbstat mbstat; 93 int max_linkhdr; 94 int max_protohdr; 95 int max_hdr; 96 int max_datalen; 97 98 static int mb_ctor(void *, void *, int); 99 100 static void sysctl_kern_mbuf_setup(void); 101 102 static struct sysctllog *mbuf_sysctllog; 103 104 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 105 static struct mbuf *m_split0(struct mbuf *, int, int, int); 106 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 107 108 /* flags for m_copyback0 */ 109 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 110 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 111 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 112 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 113 114 static const char mclpool_warnmsg[] = 115 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 116 117 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 118 119 static percpu_t *mbstat_percpu; 120 121 #ifdef MBUFTRACE 122 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 123 struct mowner unknown_mowners[] = { 124 MOWNER_INIT("unknown", "free"), 125 MOWNER_INIT("unknown", "data"), 126 MOWNER_INIT("unknown", "header"), 127 MOWNER_INIT("unknown", "soname"), 128 MOWNER_INIT("unknown", "soopts"), 129 MOWNER_INIT("unknown", "ftable"), 130 MOWNER_INIT("unknown", "control"), 131 MOWNER_INIT("unknown", "oobdata"), 132 }; 133 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 134 #endif 135 136 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 137 138 #define MCLADDREFERENCE(o, n) \ 139 do { \ 140 KASSERT(((o)->m_flags & M_EXT) != 0); \ 141 KASSERT(((n)->m_flags & M_EXT) == 0); \ 142 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 143 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 144 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 145 (n)->m_ext_ref = (o)->m_ext_ref; \ 146 mowner_ref((n), (n)->m_flags); \ 147 MCLREFDEBUGN((n), __FILE__, __LINE__); \ 148 } while (/* CONSTCOND */ 0) 149 150 static int 151 nmbclusters_limit(void) 152 { 153 #if defined(PMAP_MAP_POOLPAGE) 154 /* direct mapping, doesn't use space in kmem_map */ 155 vsize_t max_size = physmem / 4; 156 #else 157 vsize_t max_size = MIN(physmem / 4, nkmempages / 4); 158 #endif 159 160 max_size = max_size * PAGE_SIZE / MCLBYTES; 161 #ifdef NMBCLUSTERS_MAX 162 max_size = MIN(max_size, NMBCLUSTERS_MAX); 163 #endif 164 165 #ifdef NMBCLUSTERS 166 return MIN(max_size, NMBCLUSTERS); 167 #else 168 return max_size; 169 #endif 170 } 171 172 /* 173 * Initialize the mbuf allocator. 174 */ 175 void 176 mbinit(void) 177 { 178 179 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 180 CTASSERT(sizeof(struct mbuf) == MSIZE); 181 182 sysctl_kern_mbuf_setup(); 183 184 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 185 NULL, IPL_VM, mb_ctor, NULL, NULL); 186 KASSERT(mb_cache != NULL); 187 188 mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", NULL, 189 IPL_VM, NULL, NULL, NULL); 190 KASSERT(mcl_cache != NULL); 191 192 pool_cache_set_drain_hook(mb_cache, m_reclaim, NULL); 193 pool_cache_set_drain_hook(mcl_cache, m_reclaim, NULL); 194 195 /* 196 * Set an arbitrary default limit on the number of mbuf clusters. 197 */ 198 #ifdef NMBCLUSTERS 199 nmbclusters = nmbclusters_limit(); 200 #else 201 nmbclusters = MAX(1024, 202 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 203 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 204 #endif 205 206 /* 207 * Set the hard limit on the mclpool to the number of 208 * mbuf clusters the kernel is to support. Log the limit 209 * reached message max once a minute. 210 */ 211 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 212 213 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 214 215 /* 216 * Set a low water mark for both mbufs and clusters. This should 217 * help ensure that they can be allocated in a memory starvation 218 * situation. This is important for e.g. diskless systems which 219 * must allocate mbufs in order for the pagedaemon to clean pages. 220 */ 221 pool_cache_setlowat(mb_cache, mblowat); 222 pool_cache_setlowat(mcl_cache, mcllowat); 223 224 #ifdef MBUFTRACE 225 { 226 /* 227 * Attach the unknown mowners. 228 */ 229 int i; 230 MOWNER_ATTACH(&revoked_mowner); 231 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 232 i-- > 0; ) 233 MOWNER_ATTACH(&unknown_mowners[i]); 234 } 235 #endif 236 } 237 238 /* 239 * sysctl helper routine for the kern.mbuf subtree. 240 * nmbclusters, mblowat and mcllowat need range 241 * checking and pool tweaking after being reset. 242 */ 243 static int 244 sysctl_kern_mbuf(SYSCTLFN_ARGS) 245 { 246 int error, newval; 247 struct sysctlnode node; 248 249 node = *rnode; 250 node.sysctl_data = &newval; 251 switch (rnode->sysctl_num) { 252 case MBUF_NMBCLUSTERS: 253 case MBUF_MBLOWAT: 254 case MBUF_MCLLOWAT: 255 newval = *(int*)rnode->sysctl_data; 256 break; 257 default: 258 return (EOPNOTSUPP); 259 } 260 261 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 262 if (error || newp == NULL) 263 return (error); 264 if (newval < 0) 265 return (EINVAL); 266 267 switch (node.sysctl_num) { 268 case MBUF_NMBCLUSTERS: 269 if (newval < nmbclusters) 270 return (EINVAL); 271 if (newval > nmbclusters_limit()) 272 return (EINVAL); 273 nmbclusters = newval; 274 pool_cache_sethardlimit(mcl_cache, nmbclusters, 275 mclpool_warnmsg, 60); 276 break; 277 case MBUF_MBLOWAT: 278 mblowat = newval; 279 pool_cache_setlowat(mb_cache, mblowat); 280 break; 281 case MBUF_MCLLOWAT: 282 mcllowat = newval; 283 pool_cache_setlowat(mcl_cache, mcllowat); 284 break; 285 } 286 287 return (0); 288 } 289 290 #ifdef MBUFTRACE 291 static void 292 mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 293 { 294 struct mowner_counter *mc = v1; 295 struct mowner_user *mo_user = v2; 296 int i; 297 298 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 299 mo_user->mo_counter[i] += mc->mc_counter[i]; 300 } 301 } 302 303 static void 304 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 305 { 306 307 memset(mo_user, 0, sizeof(*mo_user)); 308 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 309 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 310 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 311 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 312 percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); 313 } 314 315 static int 316 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 317 { 318 struct mowner *mo; 319 size_t len = 0; 320 int error = 0; 321 322 if (namelen != 0) 323 return (EINVAL); 324 if (newp != NULL) 325 return (EPERM); 326 327 LIST_FOREACH(mo, &mowners, mo_link) { 328 struct mowner_user mo_user; 329 330 mowner_convert_to_user(mo, &mo_user); 331 332 if (oldp != NULL) { 333 if (*oldlenp - len < sizeof(mo_user)) { 334 error = ENOMEM; 335 break; 336 } 337 error = copyout(&mo_user, (char *)oldp + len, 338 sizeof(mo_user)); 339 if (error) 340 break; 341 } 342 len += sizeof(mo_user); 343 } 344 345 if (error == 0) 346 *oldlenp = len; 347 348 return (error); 349 } 350 #endif /* MBUFTRACE */ 351 352 static void 353 mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 354 { 355 struct mbstat_cpu *mbsc = v1; 356 struct mbstat *mbs = v2; 357 int i; 358 359 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 360 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 361 } 362 } 363 364 static void 365 mbstat_convert_to_user(struct mbstat *mbs) 366 { 367 368 memset(mbs, 0, sizeof(*mbs)); 369 mbs->m_drain = mbstat.m_drain; 370 percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); 371 } 372 373 static int 374 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 375 { 376 struct sysctlnode node; 377 struct mbstat mbs; 378 379 mbstat_convert_to_user(&mbs); 380 node = *rnode; 381 node.sysctl_data = &mbs; 382 node.sysctl_size = sizeof(mbs); 383 return sysctl_lookup(SYSCTLFN_CALL(&node)); 384 } 385 386 static void 387 sysctl_kern_mbuf_setup(void) 388 { 389 390 KASSERT(mbuf_sysctllog == NULL); 391 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 392 CTLFLAG_PERMANENT, 393 CTLTYPE_NODE, "kern", NULL, 394 NULL, 0, NULL, 0, 395 CTL_KERN, CTL_EOL); 396 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 397 CTLFLAG_PERMANENT, 398 CTLTYPE_NODE, "mbuf", 399 SYSCTL_DESCR("mbuf control variables"), 400 NULL, 0, NULL, 0, 401 CTL_KERN, KERN_MBUF, CTL_EOL); 402 403 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 404 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 405 CTLTYPE_INT, "msize", 406 SYSCTL_DESCR("mbuf base size"), 407 NULL, msize, NULL, 0, 408 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 409 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 410 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 411 CTLTYPE_INT, "mclbytes", 412 SYSCTL_DESCR("mbuf cluster size"), 413 NULL, mclbytes, NULL, 0, 414 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 415 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 416 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 417 CTLTYPE_INT, "nmbclusters", 418 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 419 sysctl_kern_mbuf, 0, &nmbclusters, 0, 420 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 421 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 422 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 423 CTLTYPE_INT, "mblowat", 424 SYSCTL_DESCR("mbuf low water mark"), 425 sysctl_kern_mbuf, 0, &mblowat, 0, 426 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 427 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 428 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 429 CTLTYPE_INT, "mcllowat", 430 SYSCTL_DESCR("mbuf cluster low water mark"), 431 sysctl_kern_mbuf, 0, &mcllowat, 0, 432 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 433 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 434 CTLFLAG_PERMANENT, 435 CTLTYPE_STRUCT, "stats", 436 SYSCTL_DESCR("mbuf allocation statistics"), 437 sysctl_kern_mbuf_stats, 0, NULL, 0, 438 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 439 #ifdef MBUFTRACE 440 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 441 CTLFLAG_PERMANENT, 442 CTLTYPE_STRUCT, "mowners", 443 SYSCTL_DESCR("Information about mbuf owners"), 444 sysctl_kern_mbuf_mowners, 0, NULL, 0, 445 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 446 #endif /* MBUFTRACE */ 447 } 448 449 static int 450 mb_ctor(void *arg, void *object, int flags) 451 { 452 struct mbuf *m = object; 453 454 #ifdef POOL_VTOPHYS 455 m->m_paddr = POOL_VTOPHYS(m); 456 #else 457 m->m_paddr = M_PADDR_INVALID; 458 #endif 459 return (0); 460 } 461 462 void 463 m_reclaim(void *arg, int flags) 464 { 465 struct domain *dp; 466 const struct protosw *pr; 467 struct ifnet *ifp; 468 int s; 469 470 KERNEL_LOCK(1, NULL); 471 s = splvm(); 472 DOMAIN_FOREACH(dp) { 473 for (pr = dp->dom_protosw; 474 pr < dp->dom_protoswNPROTOSW; pr++) 475 if (pr->pr_drain) 476 (*pr->pr_drain)(); 477 } 478 IFNET_FOREACH(ifp) { 479 if (ifp->if_drain) 480 (*ifp->if_drain)(ifp); 481 } 482 splx(s); 483 mbstat.m_drain++; 484 KERNEL_UNLOCK_ONE(NULL); 485 } 486 487 /* 488 * Space allocation routines. 489 * These are also available as macros 490 * for critical paths. 491 */ 492 struct mbuf * 493 m_get(int nowait, int type) 494 { 495 struct mbuf *m; 496 497 KASSERT(type != MT_FREE); 498 499 m = pool_cache_get(mb_cache, 500 nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); 501 if (m == NULL) 502 return NULL; 503 504 mbstat_type_add(type, 1); 505 mowner_init(m, type); 506 m->m_ext_ref = m; 507 m->m_type = type; 508 m->m_next = NULL; 509 m->m_nextpkt = NULL; 510 m->m_data = m->m_dat; 511 m->m_flags = 0; 512 513 return m; 514 } 515 516 struct mbuf * 517 m_gethdr(int nowait, int type) 518 { 519 struct mbuf *m; 520 521 m = m_get(nowait, type); 522 if (m == NULL) 523 return NULL; 524 525 m->m_data = m->m_pktdat; 526 m->m_flags = M_PKTHDR; 527 m->m_pkthdr.rcvif = NULL; 528 m->m_pkthdr.csum_flags = 0; 529 m->m_pkthdr.csum_data = 0; 530 SLIST_INIT(&m->m_pkthdr.tags); 531 532 return m; 533 } 534 535 struct mbuf * 536 m_getclr(int nowait, int type) 537 { 538 struct mbuf *m; 539 540 MGET(m, nowait, type); 541 if (m == 0) 542 return (NULL); 543 memset(mtod(m, void *), 0, MLEN); 544 return (m); 545 } 546 547 void 548 m_clget(struct mbuf *m, int nowait) 549 { 550 551 MCLGET(m, nowait); 552 } 553 554 struct mbuf * 555 m_free(struct mbuf *m) 556 { 557 struct mbuf *n; 558 559 MFREE(m, n); 560 return (n); 561 } 562 563 void 564 m_freem(struct mbuf *m) 565 { 566 struct mbuf *n; 567 568 if (m == NULL) 569 return; 570 do { 571 MFREE(m, n); 572 m = n; 573 } while (m); 574 } 575 576 #ifdef MBUFTRACE 577 /* 578 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 579 */ 580 void 581 m_claimm(struct mbuf *m, struct mowner *mo) 582 { 583 584 for (; m != NULL; m = m->m_next) 585 MCLAIM(m, mo); 586 } 587 #endif 588 589 /* 590 * Mbuffer utility routines. 591 */ 592 593 /* 594 * Lesser-used path for M_PREPEND: 595 * allocate new mbuf to prepend to chain, 596 * copy junk along. 597 */ 598 struct mbuf * 599 m_prepend(struct mbuf *m, int len, int how) 600 { 601 struct mbuf *mn; 602 603 MGET(mn, how, m->m_type); 604 if (mn == NULL) { 605 m_freem(m); 606 return (NULL); 607 } 608 if (m->m_flags & M_PKTHDR) { 609 M_MOVE_PKTHDR(mn, m); 610 } else { 611 MCLAIM(mn, m->m_owner); 612 } 613 mn->m_next = m; 614 m = mn; 615 if (len < MHLEN) 616 MH_ALIGN(m, len); 617 m->m_len = len; 618 return (m); 619 } 620 621 /* 622 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 623 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 624 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 625 */ 626 int MCFail; 627 628 struct mbuf * 629 m_copym(struct mbuf *m, int off0, int len, int wait) 630 { 631 632 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 633 } 634 635 struct mbuf * 636 m_dup(struct mbuf *m, int off0, int len, int wait) 637 { 638 639 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 640 } 641 642 static struct mbuf * 643 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 644 { 645 struct mbuf *n, **np; 646 int off = off0; 647 struct mbuf *top; 648 int copyhdr = 0; 649 650 if (off < 0 || len < 0) 651 panic("m_copym: off %d, len %d", off, len); 652 if (off == 0 && m->m_flags & M_PKTHDR) 653 copyhdr = 1; 654 while (off > 0) { 655 if (m == 0) 656 panic("m_copym: m == 0, off %d", off); 657 if (off < m->m_len) 658 break; 659 off -= m->m_len; 660 m = m->m_next; 661 } 662 np = ⊤ 663 top = 0; 664 while (len > 0) { 665 if (m == 0) { 666 if (len != M_COPYALL) 667 panic("m_copym: m == 0, len %d [!COPYALL]", 668 len); 669 break; 670 } 671 MGET(n, wait, m->m_type); 672 *np = n; 673 if (n == 0) 674 goto nospace; 675 MCLAIM(n, m->m_owner); 676 if (copyhdr) { 677 M_COPY_PKTHDR(n, m); 678 if (len == M_COPYALL) 679 n->m_pkthdr.len -= off0; 680 else 681 n->m_pkthdr.len = len; 682 copyhdr = 0; 683 } 684 n->m_len = min(len, m->m_len - off); 685 if (m->m_flags & M_EXT) { 686 if (!deep) { 687 n->m_data = m->m_data + off; 688 MCLADDREFERENCE(m, n); 689 } else { 690 /* 691 * we are unsure about the way m was allocated. 692 * copy into multiple MCLBYTES cluster mbufs. 693 */ 694 MCLGET(n, wait); 695 n->m_len = 0; 696 n->m_len = M_TRAILINGSPACE(n); 697 n->m_len = min(n->m_len, len); 698 n->m_len = min(n->m_len, m->m_len - off); 699 memcpy(mtod(n, void *), mtod(m, char *) + off, 700 (unsigned)n->m_len); 701 } 702 } else 703 memcpy(mtod(n, void *), mtod(m, char *) + off, 704 (unsigned)n->m_len); 705 if (len != M_COPYALL) 706 len -= n->m_len; 707 off += n->m_len; 708 #ifdef DIAGNOSTIC 709 if (off > m->m_len) 710 panic("m_copym0 overrun"); 711 #endif 712 if (off == m->m_len) { 713 m = m->m_next; 714 off = 0; 715 } 716 np = &n->m_next; 717 } 718 if (top == 0) 719 MCFail++; 720 return (top); 721 nospace: 722 m_freem(top); 723 MCFail++; 724 return (NULL); 725 } 726 727 /* 728 * Copy an entire packet, including header (which must be present). 729 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 730 */ 731 struct mbuf * 732 m_copypacket(struct mbuf *m, int how) 733 { 734 struct mbuf *top, *n, *o; 735 736 MGET(n, how, m->m_type); 737 top = n; 738 if (!n) 739 goto nospace; 740 741 MCLAIM(n, m->m_owner); 742 M_COPY_PKTHDR(n, m); 743 n->m_len = m->m_len; 744 if (m->m_flags & M_EXT) { 745 n->m_data = m->m_data; 746 MCLADDREFERENCE(m, n); 747 } else { 748 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 749 } 750 751 m = m->m_next; 752 while (m) { 753 MGET(o, how, m->m_type); 754 if (!o) 755 goto nospace; 756 757 MCLAIM(o, m->m_owner); 758 n->m_next = o; 759 n = n->m_next; 760 761 n->m_len = m->m_len; 762 if (m->m_flags & M_EXT) { 763 n->m_data = m->m_data; 764 MCLADDREFERENCE(m, n); 765 } else { 766 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 767 } 768 769 m = m->m_next; 770 } 771 return top; 772 nospace: 773 m_freem(top); 774 MCFail++; 775 return NULL; 776 } 777 778 /* 779 * Copy data from an mbuf chain starting "off" bytes from the beginning, 780 * continuing for "len" bytes, into the indicated buffer. 781 */ 782 void 783 m_copydata(struct mbuf *m, int off, int len, void *vp) 784 { 785 unsigned count; 786 void * cp = vp; 787 788 if (off < 0 || len < 0) 789 panic("m_copydata: off %d, len %d", off, len); 790 while (off > 0) { 791 if (m == NULL) 792 panic("m_copydata: m == NULL, off %d", off); 793 if (off < m->m_len) 794 break; 795 off -= m->m_len; 796 m = m->m_next; 797 } 798 while (len > 0) { 799 if (m == NULL) 800 panic("m_copydata: m == NULL, len %d", len); 801 count = min(m->m_len - off, len); 802 memcpy(cp, mtod(m, char *) + off, count); 803 len -= count; 804 cp = (char *)cp + count; 805 off = 0; 806 m = m->m_next; 807 } 808 } 809 810 /* 811 * Concatenate mbuf chain n to m. 812 * n might be copied into m (when n->m_len is small), therefore data portion of 813 * n could be copied into an mbuf of different mbuf type. 814 * Any m_pkthdr is not updated. 815 */ 816 void 817 m_cat(struct mbuf *m, struct mbuf *n) 818 { 819 820 while (m->m_next) 821 m = m->m_next; 822 while (n) { 823 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 824 /* just join the two chains */ 825 m->m_next = n; 826 return; 827 } 828 /* splat the data from one into the other */ 829 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 830 (u_int)n->m_len); 831 m->m_len += n->m_len; 832 n = m_free(n); 833 } 834 } 835 836 void 837 m_adj(struct mbuf *mp, int req_len) 838 { 839 int len = req_len; 840 struct mbuf *m; 841 int count; 842 843 if ((m = mp) == NULL) 844 return; 845 if (len >= 0) { 846 /* 847 * Trim from head. 848 */ 849 while (m != NULL && len > 0) { 850 if (m->m_len <= len) { 851 len -= m->m_len; 852 m->m_len = 0; 853 m = m->m_next; 854 } else { 855 m->m_len -= len; 856 m->m_data += len; 857 len = 0; 858 } 859 } 860 m = mp; 861 if (mp->m_flags & M_PKTHDR) 862 m->m_pkthdr.len -= (req_len - len); 863 } else { 864 /* 865 * Trim from tail. Scan the mbuf chain, 866 * calculating its length and finding the last mbuf. 867 * If the adjustment only affects this mbuf, then just 868 * adjust and return. Otherwise, rescan and truncate 869 * after the remaining size. 870 */ 871 len = -len; 872 count = 0; 873 for (;;) { 874 count += m->m_len; 875 if (m->m_next == (struct mbuf *)0) 876 break; 877 m = m->m_next; 878 } 879 if (m->m_len >= len) { 880 m->m_len -= len; 881 if (mp->m_flags & M_PKTHDR) 882 mp->m_pkthdr.len -= len; 883 return; 884 } 885 count -= len; 886 if (count < 0) 887 count = 0; 888 /* 889 * Correct length for chain is "count". 890 * Find the mbuf with last data, adjust its length, 891 * and toss data from remaining mbufs on chain. 892 */ 893 m = mp; 894 if (m->m_flags & M_PKTHDR) 895 m->m_pkthdr.len = count; 896 for (; m; m = m->m_next) { 897 if (m->m_len >= count) { 898 m->m_len = count; 899 break; 900 } 901 count -= m->m_len; 902 } 903 if (m) 904 while (m->m_next) 905 (m = m->m_next)->m_len = 0; 906 } 907 } 908 909 /* 910 * Rearrange an mbuf chain so that len bytes are contiguous 911 * and in the data area of an mbuf (so that mtod and dtom 912 * will work for a structure of size len). Returns the resulting 913 * mbuf chain on success, frees it and returns null on failure. 914 * If there is room, it will add up to max_protohdr-len extra bytes to the 915 * contiguous region in an attempt to avoid being called next time. 916 */ 917 int MPFail; 918 919 struct mbuf * 920 m_pullup(struct mbuf *n, int len) 921 { 922 struct mbuf *m; 923 int count; 924 int space; 925 926 /* 927 * If first mbuf has no cluster, and has room for len bytes 928 * without shifting current data, pullup into it, 929 * otherwise allocate a new mbuf to prepend to the chain. 930 */ 931 if ((n->m_flags & M_EXT) == 0 && 932 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 933 if (n->m_len >= len) 934 return (n); 935 m = n; 936 n = n->m_next; 937 len -= m->m_len; 938 } else { 939 if (len > MHLEN) 940 goto bad; 941 MGET(m, M_DONTWAIT, n->m_type); 942 if (m == 0) 943 goto bad; 944 MCLAIM(m, n->m_owner); 945 m->m_len = 0; 946 if (n->m_flags & M_PKTHDR) { 947 M_MOVE_PKTHDR(m, n); 948 } 949 } 950 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 951 do { 952 count = min(min(max(len, max_protohdr), space), n->m_len); 953 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 954 (unsigned)count); 955 len -= count; 956 m->m_len += count; 957 n->m_len -= count; 958 space -= count; 959 if (n->m_len) 960 n->m_data += count; 961 else 962 n = m_free(n); 963 } while (len > 0 && n); 964 if (len > 0) { 965 (void) m_free(m); 966 goto bad; 967 } 968 m->m_next = n; 969 return (m); 970 bad: 971 m_freem(n); 972 MPFail++; 973 return (NULL); 974 } 975 976 /* 977 * Like m_pullup(), except a new mbuf is always allocated, and we allow 978 * the amount of empty space before the data in the new mbuf to be specified 979 * (in the event that the caller expects to prepend later). 980 */ 981 int MSFail; 982 983 struct mbuf * 984 m_copyup(struct mbuf *n, int len, int dstoff) 985 { 986 struct mbuf *m; 987 int count, space; 988 989 if (len > (MHLEN - dstoff)) 990 goto bad; 991 MGET(m, M_DONTWAIT, n->m_type); 992 if (m == NULL) 993 goto bad; 994 MCLAIM(m, n->m_owner); 995 m->m_len = 0; 996 if (n->m_flags & M_PKTHDR) { 997 M_MOVE_PKTHDR(m, n); 998 } 999 m->m_data += dstoff; 1000 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1001 do { 1002 count = min(min(max(len, max_protohdr), space), n->m_len); 1003 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1004 (unsigned)count); 1005 len -= count; 1006 m->m_len += count; 1007 n->m_len -= count; 1008 space -= count; 1009 if (n->m_len) 1010 n->m_data += count; 1011 else 1012 n = m_free(n); 1013 } while (len > 0 && n); 1014 if (len > 0) { 1015 (void) m_free(m); 1016 goto bad; 1017 } 1018 m->m_next = n; 1019 return (m); 1020 bad: 1021 m_freem(n); 1022 MSFail++; 1023 return (NULL); 1024 } 1025 1026 /* 1027 * Partition an mbuf chain in two pieces, returning the tail -- 1028 * all but the first len0 bytes. In case of failure, it returns NULL and 1029 * attempts to restore the chain to its original state. 1030 */ 1031 struct mbuf * 1032 m_split(struct mbuf *m0, int len0, int wait) 1033 { 1034 1035 return m_split0(m0, len0, wait, 1); 1036 } 1037 1038 static struct mbuf * 1039 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 1040 { 1041 struct mbuf *m, *n; 1042 unsigned len = len0, remain, len_save; 1043 1044 for (m = m0; m && len > m->m_len; m = m->m_next) 1045 len -= m->m_len; 1046 if (m == 0) 1047 return (NULL); 1048 remain = m->m_len - len; 1049 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1050 MGETHDR(n, wait, m0->m_type); 1051 if (n == 0) 1052 return (NULL); 1053 MCLAIM(n, m0->m_owner); 1054 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1055 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1056 len_save = m0->m_pkthdr.len; 1057 m0->m_pkthdr.len = len0; 1058 if (m->m_flags & M_EXT) 1059 goto extpacket; 1060 if (remain > MHLEN) { 1061 /* m can't be the lead packet */ 1062 MH_ALIGN(n, 0); 1063 n->m_len = 0; 1064 n->m_next = m_split(m, len, wait); 1065 if (n->m_next == 0) { 1066 (void) m_free(n); 1067 m0->m_pkthdr.len = len_save; 1068 return (NULL); 1069 } else 1070 return (n); 1071 } else 1072 MH_ALIGN(n, remain); 1073 } else if (remain == 0) { 1074 n = m->m_next; 1075 m->m_next = 0; 1076 return (n); 1077 } else { 1078 MGET(n, wait, m->m_type); 1079 if (n == 0) 1080 return (NULL); 1081 MCLAIM(n, m->m_owner); 1082 M_ALIGN(n, remain); 1083 } 1084 extpacket: 1085 if (m->m_flags & M_EXT) { 1086 n->m_data = m->m_data + len; 1087 MCLADDREFERENCE(m, n); 1088 } else { 1089 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1090 } 1091 n->m_len = remain; 1092 m->m_len = len; 1093 n->m_next = m->m_next; 1094 m->m_next = 0; 1095 return (n); 1096 } 1097 /* 1098 * Routine to copy from device local memory into mbufs. 1099 */ 1100 struct mbuf * 1101 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1102 void (*copy)(const void *from, void *to, size_t len)) 1103 { 1104 struct mbuf *m; 1105 struct mbuf *top = 0, **mp = ⊤ 1106 int off = off0, len; 1107 char *cp; 1108 char *epkt; 1109 1110 cp = buf; 1111 epkt = cp + totlen; 1112 if (off) { 1113 /* 1114 * If 'off' is non-zero, packet is trailer-encapsulated, 1115 * so we have to skip the type and length fields. 1116 */ 1117 cp += off + 2 * sizeof(uint16_t); 1118 totlen -= 2 * sizeof(uint16_t); 1119 } 1120 MGETHDR(m, M_DONTWAIT, MT_DATA); 1121 if (m == 0) 1122 return (NULL); 1123 m->m_pkthdr.rcvif = ifp; 1124 m->m_pkthdr.len = totlen; 1125 m->m_len = MHLEN; 1126 1127 while (totlen > 0) { 1128 if (top) { 1129 MGET(m, M_DONTWAIT, MT_DATA); 1130 if (m == 0) { 1131 m_freem(top); 1132 return (NULL); 1133 } 1134 m->m_len = MLEN; 1135 } 1136 len = min(totlen, epkt - cp); 1137 if (len >= MINCLSIZE) { 1138 MCLGET(m, M_DONTWAIT); 1139 if ((m->m_flags & M_EXT) == 0) { 1140 m_free(m); 1141 m_freem(top); 1142 return (NULL); 1143 } 1144 m->m_len = len = min(len, MCLBYTES); 1145 } else { 1146 /* 1147 * Place initial small packet/header at end of mbuf. 1148 */ 1149 if (len < m->m_len) { 1150 if (top == 0 && len + max_linkhdr <= m->m_len) 1151 m->m_data += max_linkhdr; 1152 m->m_len = len; 1153 } else 1154 len = m->m_len; 1155 } 1156 if (copy) 1157 copy(cp, mtod(m, void *), (size_t)len); 1158 else 1159 memcpy(mtod(m, void *), cp, (size_t)len); 1160 cp += len; 1161 *mp = m; 1162 mp = &m->m_next; 1163 totlen -= len; 1164 if (cp == epkt) 1165 cp = buf; 1166 } 1167 return (top); 1168 } 1169 1170 /* 1171 * Copy data from a buffer back into the indicated mbuf chain, 1172 * starting "off" bytes from the beginning, extending the mbuf 1173 * chain if necessary. 1174 */ 1175 void 1176 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1177 { 1178 #if defined(DEBUG) 1179 struct mbuf *origm = m0; 1180 int error; 1181 #endif /* defined(DEBUG) */ 1182 1183 if (m0 == NULL) 1184 return; 1185 1186 #if defined(DEBUG) 1187 error = 1188 #endif /* defined(DEBUG) */ 1189 m_copyback0(&m0, off, len, cp, 1190 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1191 1192 #if defined(DEBUG) 1193 if (error != 0 || (m0 != NULL && origm != m0)) 1194 panic("m_copyback"); 1195 #endif /* defined(DEBUG) */ 1196 } 1197 1198 struct mbuf * 1199 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1200 { 1201 int error; 1202 1203 /* don't support chain expansion */ 1204 KDASSERT(off + len <= m_length(m0)); 1205 1206 error = m_copyback0(&m0, off, len, cp, 1207 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1208 if (error) { 1209 /* 1210 * no way to recover from partial success. 1211 * just free the chain. 1212 */ 1213 m_freem(m0); 1214 return NULL; 1215 } 1216 return m0; 1217 } 1218 1219 /* 1220 * m_makewritable: ensure the specified range writable. 1221 */ 1222 int 1223 m_makewritable(struct mbuf **mp, int off, int len, int how) 1224 { 1225 int error; 1226 #if defined(DEBUG) 1227 struct mbuf *n; 1228 int origlen, reslen; 1229 1230 origlen = m_length(*mp); 1231 #endif /* defined(DEBUG) */ 1232 1233 #if 0 /* M_COPYALL is large enough */ 1234 if (len == M_COPYALL) 1235 len = m_length(*mp) - off; /* XXX */ 1236 #endif 1237 1238 error = m_copyback0(mp, off, len, NULL, 1239 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1240 1241 #if defined(DEBUG) 1242 reslen = 0; 1243 for (n = *mp; n; n = n->m_next) 1244 reslen += n->m_len; 1245 if (origlen != reslen) 1246 panic("m_makewritable: length changed"); 1247 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1248 panic("m_makewritable: inconsist"); 1249 #endif /* defined(DEBUG) */ 1250 1251 return error; 1252 } 1253 1254 /* 1255 * Copy the mbuf chain to a new mbuf chain that is as short as possible. 1256 * Return the new mbuf chain on success, NULL on failure. On success, 1257 * free the old mbuf chain. 1258 */ 1259 struct mbuf * 1260 m_defrag(struct mbuf *mold, int flags) 1261 { 1262 struct mbuf *m0, *mn, *n; 1263 size_t sz = mold->m_pkthdr.len; 1264 1265 #ifdef DIAGNOSTIC 1266 if ((mold->m_flags & M_PKTHDR) == 0) 1267 panic("m_defrag: not a mbuf chain header"); 1268 #endif 1269 1270 MGETHDR(m0, flags, MT_DATA); 1271 if (m0 == NULL) 1272 return NULL; 1273 M_COPY_PKTHDR(m0, mold); 1274 mn = m0; 1275 1276 do { 1277 if (sz > MHLEN) { 1278 MCLGET(mn, M_DONTWAIT); 1279 if ((mn->m_flags & M_EXT) == 0) { 1280 m_freem(m0); 1281 return NULL; 1282 } 1283 } 1284 1285 mn->m_len = MIN(sz, MCLBYTES); 1286 1287 m_copydata(mold, mold->m_pkthdr.len - sz, mn->m_len, 1288 mtod(mn, void *)); 1289 1290 sz -= mn->m_len; 1291 1292 if (sz > 0) { 1293 /* need more mbufs */ 1294 MGET(n, M_NOWAIT, MT_DATA); 1295 if (n == NULL) { 1296 m_freem(m0); 1297 return NULL; 1298 } 1299 1300 mn->m_next = n; 1301 mn = n; 1302 } 1303 } while (sz > 0); 1304 1305 m_freem(mold); 1306 1307 return m0; 1308 } 1309 1310 int 1311 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1312 int how) 1313 { 1314 int mlen; 1315 struct mbuf *m, *n; 1316 struct mbuf **mp; 1317 int totlen = 0; 1318 const char *cp = vp; 1319 1320 KASSERT(mp0 != NULL); 1321 KASSERT(*mp0 != NULL); 1322 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1323 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1324 1325 /* 1326 * we don't bother to update "totlen" in the case of M_COPYBACK0_COW, 1327 * assuming that M_COPYBACK0_EXTEND and M_COPYBACK0_COW are exclusive. 1328 */ 1329 1330 KASSERT((~flags & (M_COPYBACK0_EXTEND|M_COPYBACK0_COW)) != 0); 1331 1332 mp = mp0; 1333 m = *mp; 1334 while (off > (mlen = m->m_len)) { 1335 off -= mlen; 1336 totlen += mlen; 1337 if (m->m_next == NULL) { 1338 int tspace; 1339 extend: 1340 if ((flags & M_COPYBACK0_EXTEND) == 0) 1341 goto out; 1342 1343 /* 1344 * try to make some space at the end of "m". 1345 */ 1346 1347 mlen = m->m_len; 1348 if (off + len >= MINCLSIZE && 1349 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1350 MCLGET(m, how); 1351 } 1352 tspace = M_TRAILINGSPACE(m); 1353 if (tspace > 0) { 1354 tspace = min(tspace, off + len); 1355 KASSERT(tspace > 0); 1356 memset(mtod(m, char *) + m->m_len, 0, 1357 min(off, tspace)); 1358 m->m_len += tspace; 1359 off += mlen; 1360 totlen -= mlen; 1361 continue; 1362 } 1363 1364 /* 1365 * need to allocate an mbuf. 1366 */ 1367 1368 if (off + len >= MINCLSIZE) { 1369 n = m_getcl(how, m->m_type, 0); 1370 } else { 1371 n = m_get(how, m->m_type); 1372 } 1373 if (n == NULL) { 1374 goto out; 1375 } 1376 n->m_len = 0; 1377 n->m_len = min(M_TRAILINGSPACE(n), off + len); 1378 memset(mtod(n, char *), 0, min(n->m_len, off)); 1379 m->m_next = n; 1380 } 1381 mp = &m->m_next; 1382 m = m->m_next; 1383 } 1384 while (len > 0) { 1385 mlen = m->m_len - off; 1386 if (mlen != 0 && M_READONLY(m)) { 1387 char *datap; 1388 int eatlen; 1389 1390 /* 1391 * this mbuf is read-only. 1392 * allocate a new writable mbuf and try again. 1393 */ 1394 1395 #if defined(DIAGNOSTIC) 1396 if ((flags & M_COPYBACK0_COW) == 0) 1397 panic("m_copyback0: read-only"); 1398 #endif /* defined(DIAGNOSTIC) */ 1399 1400 /* 1401 * if we're going to write into the middle of 1402 * a mbuf, split it first. 1403 */ 1404 if (off > 0) { 1405 n = m_split0(m, off, how, 0); 1406 if (n == NULL) 1407 goto enobufs; 1408 m->m_next = n; 1409 mp = &m->m_next; 1410 m = n; 1411 off = 0; 1412 continue; 1413 } 1414 1415 /* 1416 * XXX TODO coalesce into the trailingspace of 1417 * the previous mbuf when possible. 1418 */ 1419 1420 /* 1421 * allocate a new mbuf. copy packet header if needed. 1422 */ 1423 MGET(n, how, m->m_type); 1424 if (n == NULL) 1425 goto enobufs; 1426 MCLAIM(n, m->m_owner); 1427 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1428 M_MOVE_PKTHDR(n, m); 1429 n->m_len = MHLEN; 1430 } else { 1431 if (len >= MINCLSIZE) 1432 MCLGET(n, M_DONTWAIT); 1433 n->m_len = 1434 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1435 } 1436 if (n->m_len > len) 1437 n->m_len = len; 1438 1439 /* 1440 * free the region which has been overwritten. 1441 * copying data from old mbufs if requested. 1442 */ 1443 if (flags & M_COPYBACK0_PRESERVE) 1444 datap = mtod(n, char *); 1445 else 1446 datap = NULL; 1447 eatlen = n->m_len; 1448 while (m != NULL && M_READONLY(m) && 1449 n->m_type == m->m_type && eatlen > 0) { 1450 mlen = min(eatlen, m->m_len); 1451 if (datap) { 1452 m_copydata(m, 0, mlen, datap); 1453 datap += mlen; 1454 } 1455 m->m_data += mlen; 1456 m->m_len -= mlen; 1457 eatlen -= mlen; 1458 if (m->m_len == 0) 1459 *mp = m = m_free(m); 1460 } 1461 if (eatlen > 0) 1462 n->m_len -= eatlen; 1463 n->m_next = m; 1464 *mp = m = n; 1465 continue; 1466 } 1467 mlen = min(mlen, len); 1468 if (flags & M_COPYBACK0_COPYBACK) { 1469 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1470 cp += mlen; 1471 } 1472 len -= mlen; 1473 mlen += off; 1474 off = 0; 1475 totlen += mlen; 1476 if (len == 0) 1477 break; 1478 if (m->m_next == NULL) { 1479 goto extend; 1480 } 1481 mp = &m->m_next; 1482 m = m->m_next; 1483 } 1484 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1485 KASSERT((flags & M_COPYBACK0_EXTEND) != 0); 1486 m->m_pkthdr.len = totlen; 1487 } 1488 1489 return 0; 1490 1491 enobufs: 1492 return ENOBUFS; 1493 } 1494 1495 void 1496 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1497 { 1498 1499 KASSERT((to->m_flags & M_EXT) == 0); 1500 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL); 1501 KASSERT((from->m_flags & M_PKTHDR) != 0); 1502 1503 to->m_pkthdr = from->m_pkthdr; 1504 to->m_flags = from->m_flags & M_COPYFLAGS; 1505 to->m_data = to->m_pktdat; 1506 1507 from->m_flags &= ~M_PKTHDR; 1508 } 1509 1510 /* 1511 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1512 * beginning, continuing for "len" bytes. 1513 */ 1514 int 1515 m_apply(struct mbuf *m, int off, int len, 1516 int (*f)(void *, void *, unsigned int), void *arg) 1517 { 1518 unsigned int count; 1519 int rval; 1520 1521 KASSERT(len >= 0); 1522 KASSERT(off >= 0); 1523 1524 while (off > 0) { 1525 KASSERT(m != NULL); 1526 if (off < m->m_len) 1527 break; 1528 off -= m->m_len; 1529 m = m->m_next; 1530 } 1531 while (len > 0) { 1532 KASSERT(m != NULL); 1533 count = min(m->m_len - off, len); 1534 1535 rval = (*f)(arg, mtod(m, char *) + off, count); 1536 if (rval) 1537 return (rval); 1538 1539 len -= count; 1540 off = 0; 1541 m = m->m_next; 1542 } 1543 1544 return (0); 1545 } 1546 1547 /* 1548 * Return a pointer to mbuf/offset of location in mbuf chain. 1549 */ 1550 struct mbuf * 1551 m_getptr(struct mbuf *m, int loc, int *off) 1552 { 1553 1554 while (loc >= 0) { 1555 /* Normal end of search */ 1556 if (m->m_len > loc) { 1557 *off = loc; 1558 return (m); 1559 } else { 1560 loc -= m->m_len; 1561 1562 if (m->m_next == NULL) { 1563 if (loc == 0) { 1564 /* Point at the end of valid data */ 1565 *off = m->m_len; 1566 return (m); 1567 } else 1568 return (NULL); 1569 } else 1570 m = m->m_next; 1571 } 1572 } 1573 1574 return (NULL); 1575 } 1576 1577 /* 1578 * m_ext_free: release a reference to the mbuf external storage. 1579 * 1580 * => free the mbuf m itsself as well. 1581 */ 1582 1583 void 1584 m_ext_free(struct mbuf *m) 1585 { 1586 bool embedded = MEXT_ISEMBEDDED(m); 1587 bool dofree = true; 1588 u_int refcnt; 1589 1590 KASSERT((m->m_flags & M_EXT) != 0); 1591 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1592 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1593 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1594 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1595 1596 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1597 refcnt = m->m_ext.ext_refcnt = 0; 1598 } else { 1599 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1600 } 1601 if (refcnt > 0) { 1602 if (embedded) { 1603 /* 1604 * other mbuf's m_ext_ref still points to us. 1605 */ 1606 dofree = false; 1607 } else { 1608 m->m_ext_ref = m; 1609 } 1610 } else { 1611 /* 1612 * dropping the last reference 1613 */ 1614 if (!embedded) { 1615 m->m_ext.ext_refcnt++; /* XXX */ 1616 m_ext_free(m->m_ext_ref); 1617 m->m_ext_ref = m; 1618 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1619 pool_cache_put_paddr((struct pool_cache *) 1620 m->m_ext.ext_arg, 1621 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1622 } else if (m->m_ext.ext_free) { 1623 (*m->m_ext.ext_free)(m, 1624 m->m_ext.ext_buf, m->m_ext.ext_size, 1625 m->m_ext.ext_arg); 1626 /* 1627 * 'm' is already freed by the ext_free callback. 1628 */ 1629 dofree = false; 1630 } else { 1631 free(m->m_ext.ext_buf, m->m_ext.ext_type); 1632 } 1633 } 1634 if (dofree) { 1635 pool_cache_put(mb_cache, m); 1636 } 1637 } 1638 1639 #if defined(DDB) 1640 void 1641 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 1642 { 1643 char ch; 1644 bool opt_c = false; 1645 char buf[512]; 1646 1647 while ((ch = *(modif++)) != '\0') { 1648 switch (ch) { 1649 case 'c': 1650 opt_c = true; 1651 break; 1652 } 1653 } 1654 1655 nextchain: 1656 (*pr)("MBUF %p\n", m); 1657 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 1658 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", 1659 m->m_data, m->m_len, m->m_type, buf); 1660 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 1661 m->m_nextpkt); 1662 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 1663 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 1664 (int)M_READONLY(m)); 1665 if ((m->m_flags & M_PKTHDR) != 0) { 1666 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 1667 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=0x%s, csum_data=0x%" 1668 PRIx32 ", segsz=%u\n", 1669 m->m_pkthdr.len, m->m_pkthdr.rcvif, 1670 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 1671 } 1672 if ((m->m_flags & M_EXT)) { 1673 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 1674 "ext_free=%p, ext_arg=%p\n", 1675 m->m_ext.ext_refcnt, 1676 m->m_ext.ext_buf, m->m_ext.ext_size, 1677 m->m_ext.ext_free, m->m_ext.ext_arg); 1678 } 1679 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 1680 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 1681 vaddr_t eva = sva + m->m_ext.ext_size; 1682 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 1683 int i; 1684 1685 (*pr)(" pages:"); 1686 for (i = 0; i < n; i ++) { 1687 (*pr)(" %p", m->m_ext.ext_pgs[i]); 1688 } 1689 (*pr)("\n"); 1690 } 1691 1692 if (opt_c) { 1693 m = m->m_next; 1694 if (m != NULL) { 1695 goto nextchain; 1696 } 1697 } 1698 } 1699 #endif /* defined(DDB) */ 1700 1701 void 1702 mbstat_type_add(int type, int diff) 1703 { 1704 struct mbstat_cpu *mb; 1705 int s; 1706 1707 s = splvm(); 1708 mb = percpu_getref(mbstat_percpu); 1709 mb->m_mtypes[type] += diff; 1710 percpu_putref(mbstat_percpu); 1711 splx(s); 1712 } 1713 1714 #if defined(MBUFTRACE) 1715 void 1716 mowner_attach(struct mowner *mo) 1717 { 1718 1719 KASSERT(mo->mo_counters == NULL); 1720 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 1721 1722 /* XXX lock */ 1723 LIST_INSERT_HEAD(&mowners, mo, mo_link); 1724 } 1725 1726 void 1727 mowner_detach(struct mowner *mo) 1728 { 1729 1730 KASSERT(mo->mo_counters != NULL); 1731 1732 /* XXX lock */ 1733 LIST_REMOVE(mo, mo_link); 1734 1735 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 1736 mo->mo_counters = NULL; 1737 } 1738 1739 void 1740 mowner_init(struct mbuf *m, int type) 1741 { 1742 struct mowner_counter *mc; 1743 struct mowner *mo; 1744 int s; 1745 1746 m->m_owner = mo = &unknown_mowners[type]; 1747 s = splvm(); 1748 mc = percpu_getref(mo->mo_counters); 1749 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1750 percpu_putref(mo->mo_counters); 1751 splx(s); 1752 } 1753 1754 void 1755 mowner_ref(struct mbuf *m, int flags) 1756 { 1757 struct mowner *mo = m->m_owner; 1758 struct mowner_counter *mc; 1759 int s; 1760 1761 s = splvm(); 1762 mc = percpu_getref(mo->mo_counters); 1763 if ((flags & M_EXT) != 0) 1764 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1765 if ((flags & M_CLUSTER) != 0) 1766 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1767 percpu_putref(mo->mo_counters); 1768 splx(s); 1769 } 1770 1771 void 1772 mowner_revoke(struct mbuf *m, bool all, int flags) 1773 { 1774 struct mowner *mo = m->m_owner; 1775 struct mowner_counter *mc; 1776 int s; 1777 1778 s = splvm(); 1779 mc = percpu_getref(mo->mo_counters); 1780 if ((flags & M_EXT) != 0) 1781 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 1782 if ((flags & M_CLUSTER) != 0) 1783 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 1784 if (all) 1785 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 1786 percpu_putref(mo->mo_counters); 1787 splx(s); 1788 if (all) 1789 m->m_owner = &revoked_mowner; 1790 } 1791 1792 static void 1793 mowner_claim(struct mbuf *m, struct mowner *mo) 1794 { 1795 struct mowner_counter *mc; 1796 int flags = m->m_flags; 1797 int s; 1798 1799 s = splvm(); 1800 mc = percpu_getref(mo->mo_counters); 1801 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1802 if ((flags & M_EXT) != 0) 1803 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1804 if ((flags & M_CLUSTER) != 0) 1805 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1806 percpu_putref(mo->mo_counters); 1807 splx(s); 1808 m->m_owner = mo; 1809 } 1810 1811 void 1812 m_claim(struct mbuf *m, struct mowner *mo) 1813 { 1814 1815 if (m->m_owner == mo || mo == NULL) 1816 return; 1817 1818 mowner_revoke(m, true, m->m_flags); 1819 mowner_claim(m, mo); 1820 } 1821 #endif /* defined(MBUFTRACE) */ 1822