1 /* $NetBSD: uipc_mbuf.c,v 1.134 2010/02/08 22:55:36 joerg Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.134 2010/02/08 22:55:36 joerg Exp $"); 66 67 #include "opt_mbuftrace.h" 68 #include "opt_nmbclusters.h" 69 #include "opt_ddb.h" 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/atomic.h> 74 #include <sys/cpu.h> 75 #include <sys/proc.h> 76 #include <sys/malloc.h> 77 #define MBTYPES 78 #include <sys/mbuf.h> 79 #include <sys/kernel.h> 80 #include <sys/syslog.h> 81 #include <sys/domain.h> 82 #include <sys/protosw.h> 83 #include <sys/percpu.h> 84 #include <sys/pool.h> 85 #include <sys/socket.h> 86 #include <sys/sysctl.h> 87 88 #include <net/if.h> 89 90 #include <uvm/uvm.h> 91 92 pool_cache_t mb_cache; /* mbuf cache */ 93 pool_cache_t mcl_cache; /* mbuf cluster cache */ 94 95 struct mbstat mbstat; 96 int max_linkhdr; 97 int max_protohdr; 98 int max_hdr; 99 int max_datalen; 100 101 static int mb_ctor(void *, void *, int); 102 103 static void *mclpool_alloc(struct pool *, int); 104 static void mclpool_release(struct pool *, void *); 105 106 static void sysctl_kern_mbuf_setup(void); 107 108 static struct sysctllog *mbuf_sysctllog; 109 110 static struct pool_allocator mclpool_allocator = { 111 .pa_alloc = mclpool_alloc, 112 .pa_free = mclpool_release, 113 }; 114 115 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 116 static struct mbuf *m_split0(struct mbuf *, int, int, int); 117 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 118 119 /* flags for m_copyback0 */ 120 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 121 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 122 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 123 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 124 125 static const char mclpool_warnmsg[] = 126 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 127 128 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 129 130 static percpu_t *mbstat_percpu; 131 132 #ifdef MBUFTRACE 133 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 134 struct mowner unknown_mowners[] = { 135 MOWNER_INIT("unknown", "free"), 136 MOWNER_INIT("unknown", "data"), 137 MOWNER_INIT("unknown", "header"), 138 MOWNER_INIT("unknown", "soname"), 139 MOWNER_INIT("unknown", "soopts"), 140 MOWNER_INIT("unknown", "ftable"), 141 MOWNER_INIT("unknown", "control"), 142 MOWNER_INIT("unknown", "oobdata"), 143 }; 144 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 145 #endif 146 147 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 148 149 #define MCLADDREFERENCE(o, n) \ 150 do { \ 151 KASSERT(((o)->m_flags & M_EXT) != 0); \ 152 KASSERT(((n)->m_flags & M_EXT) == 0); \ 153 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 154 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 155 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 156 (n)->m_ext_ref = (o)->m_ext_ref; \ 157 mowner_ref((n), (n)->m_flags); \ 158 MCLREFDEBUGN((n), __FILE__, __LINE__); \ 159 } while (/* CONSTCOND */ 0) 160 161 static int 162 nmbclusters_limit(void) 163 { 164 #if defined(PMAP_MAP_POOLPAGE) || defined(_RUMPKERNEL) 165 /* direct mapping, doesn't use space in kmem_map */ 166 vsize_t max_size = physmem / 4; 167 #else 168 vsize_t max_size = MIN(physmem / 4, nkmempages / 2); 169 #endif 170 171 max_size = max_size * PAGE_SIZE / MCLBYTES; 172 #ifdef NMBCLUSTERS_MAX 173 max_size = MIN(max_size, NMBCLUSTERS_MAX); 174 #endif 175 176 #ifdef NMBCLUSTERS 177 return MIN(max_size, NMBCLUSTERS); 178 #else 179 return max_size; 180 #endif 181 } 182 183 /* 184 * Initialize the mbuf allocator. 185 */ 186 void 187 mbinit(void) 188 { 189 190 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 191 CTASSERT(sizeof(struct mbuf) == MSIZE); 192 193 sysctl_kern_mbuf_setup(); 194 195 mclpool_allocator.pa_backingmap = kmem_map; 196 197 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 198 NULL, IPL_VM, mb_ctor, NULL, NULL); 199 KASSERT(mb_cache != NULL); 200 201 mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", 202 &mclpool_allocator, IPL_VM, NULL, NULL, NULL); 203 KASSERT(mcl_cache != NULL); 204 205 pool_cache_set_drain_hook(mb_cache, m_reclaim, NULL); 206 pool_cache_set_drain_hook(mcl_cache, m_reclaim, NULL); 207 208 /* 209 * Set an arbitrary default limit on the number of mbuf clusters. 210 */ 211 #ifdef NMBCLUSTERS 212 nmbclusters = nmbclusters_limit(); 213 #else 214 nmbclusters = MAX(1024, 215 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 216 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 217 #endif 218 219 /* 220 * Set the hard limit on the mclpool to the number of 221 * mbuf clusters the kernel is to support. Log the limit 222 * reached message max once a minute. 223 */ 224 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 225 226 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 227 228 /* 229 * Set a low water mark for both mbufs and clusters. This should 230 * help ensure that they can be allocated in a memory starvation 231 * situation. This is important for e.g. diskless systems which 232 * must allocate mbufs in order for the pagedaemon to clean pages. 233 */ 234 pool_cache_setlowat(mb_cache, mblowat); 235 pool_cache_setlowat(mcl_cache, mcllowat); 236 237 #ifdef MBUFTRACE 238 { 239 /* 240 * Attach the unknown mowners. 241 */ 242 int i; 243 MOWNER_ATTACH(&revoked_mowner); 244 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 245 i-- > 0; ) 246 MOWNER_ATTACH(&unknown_mowners[i]); 247 } 248 #endif 249 } 250 251 /* 252 * sysctl helper routine for the kern.mbuf subtree. 253 * nmbclusters, mblowat and mcllowat need range 254 * checking and pool tweaking after being reset. 255 */ 256 static int 257 sysctl_kern_mbuf(SYSCTLFN_ARGS) 258 { 259 int error, newval; 260 struct sysctlnode node; 261 262 node = *rnode; 263 node.sysctl_data = &newval; 264 switch (rnode->sysctl_num) { 265 case MBUF_NMBCLUSTERS: 266 case MBUF_MBLOWAT: 267 case MBUF_MCLLOWAT: 268 newval = *(int*)rnode->sysctl_data; 269 break; 270 default: 271 return (EOPNOTSUPP); 272 } 273 274 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 275 if (error || newp == NULL) 276 return (error); 277 if (newval < 0) 278 return (EINVAL); 279 280 switch (node.sysctl_num) { 281 case MBUF_NMBCLUSTERS: 282 if (newval < nmbclusters) 283 return (EINVAL); 284 if (newval > nmbclusters_limit()) 285 return (EINVAL); 286 nmbclusters = newval; 287 pool_cache_sethardlimit(mcl_cache, nmbclusters, 288 mclpool_warnmsg, 60); 289 break; 290 case MBUF_MBLOWAT: 291 mblowat = newval; 292 pool_cache_setlowat(mb_cache, mblowat); 293 break; 294 case MBUF_MCLLOWAT: 295 mcllowat = newval; 296 pool_cache_setlowat(mcl_cache, mcllowat); 297 break; 298 } 299 300 return (0); 301 } 302 303 #ifdef MBUFTRACE 304 static void 305 mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 306 { 307 struct mowner_counter *mc = v1; 308 struct mowner_user *mo_user = v2; 309 int i; 310 311 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 312 mo_user->mo_counter[i] += mc->mc_counter[i]; 313 } 314 } 315 316 static void 317 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 318 { 319 320 memset(mo_user, 0, sizeof(*mo_user)); 321 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 322 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 323 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 324 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 325 percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); 326 } 327 328 static int 329 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 330 { 331 struct mowner *mo; 332 size_t len = 0; 333 int error = 0; 334 335 if (namelen != 0) 336 return (EINVAL); 337 if (newp != NULL) 338 return (EPERM); 339 340 LIST_FOREACH(mo, &mowners, mo_link) { 341 struct mowner_user mo_user; 342 343 mowner_convert_to_user(mo, &mo_user); 344 345 if (oldp != NULL) { 346 if (*oldlenp - len < sizeof(mo_user)) { 347 error = ENOMEM; 348 break; 349 } 350 error = copyout(&mo_user, (char *)oldp + len, 351 sizeof(mo_user)); 352 if (error) 353 break; 354 } 355 len += sizeof(mo_user); 356 } 357 358 if (error == 0) 359 *oldlenp = len; 360 361 return (error); 362 } 363 #endif /* MBUFTRACE */ 364 365 static void 366 mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 367 { 368 struct mbstat_cpu *mbsc = v1; 369 struct mbstat *mbs = v2; 370 int i; 371 372 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 373 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 374 } 375 } 376 377 static void 378 mbstat_convert_to_user(struct mbstat *mbs) 379 { 380 381 memset(mbs, 0, sizeof(*mbs)); 382 mbs->m_drain = mbstat.m_drain; 383 percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); 384 } 385 386 static int 387 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 388 { 389 struct sysctlnode node; 390 struct mbstat mbs; 391 392 mbstat_convert_to_user(&mbs); 393 node = *rnode; 394 node.sysctl_data = &mbs; 395 node.sysctl_size = sizeof(mbs); 396 return sysctl_lookup(SYSCTLFN_CALL(&node)); 397 } 398 399 static void 400 sysctl_kern_mbuf_setup(void) 401 { 402 403 KASSERT(mbuf_sysctllog == NULL); 404 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 405 CTLFLAG_PERMANENT, 406 CTLTYPE_NODE, "kern", NULL, 407 NULL, 0, NULL, 0, 408 CTL_KERN, CTL_EOL); 409 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 410 CTLFLAG_PERMANENT, 411 CTLTYPE_NODE, "mbuf", 412 SYSCTL_DESCR("mbuf control variables"), 413 NULL, 0, NULL, 0, 414 CTL_KERN, KERN_MBUF, CTL_EOL); 415 416 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 417 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 418 CTLTYPE_INT, "msize", 419 SYSCTL_DESCR("mbuf base size"), 420 NULL, msize, NULL, 0, 421 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 422 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 423 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 424 CTLTYPE_INT, "mclbytes", 425 SYSCTL_DESCR("mbuf cluster size"), 426 NULL, mclbytes, NULL, 0, 427 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 428 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 429 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 430 CTLTYPE_INT, "nmbclusters", 431 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 432 sysctl_kern_mbuf, 0, &nmbclusters, 0, 433 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 434 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 435 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 436 CTLTYPE_INT, "mblowat", 437 SYSCTL_DESCR("mbuf low water mark"), 438 sysctl_kern_mbuf, 0, &mblowat, 0, 439 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 440 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 441 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 442 CTLTYPE_INT, "mcllowat", 443 SYSCTL_DESCR("mbuf cluster low water mark"), 444 sysctl_kern_mbuf, 0, &mcllowat, 0, 445 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 446 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 447 CTLFLAG_PERMANENT, 448 CTLTYPE_STRUCT, "stats", 449 SYSCTL_DESCR("mbuf allocation statistics"), 450 sysctl_kern_mbuf_stats, 0, NULL, 0, 451 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 452 #ifdef MBUFTRACE 453 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 454 CTLFLAG_PERMANENT, 455 CTLTYPE_STRUCT, "mowners", 456 SYSCTL_DESCR("Information about mbuf owners"), 457 sysctl_kern_mbuf_mowners, 0, NULL, 0, 458 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 459 #endif /* MBUFTRACE */ 460 } 461 462 static void * 463 mclpool_alloc(struct pool *pp, int flags) 464 { 465 bool waitok = (flags & PR_WAITOK) ? true : false; 466 467 return ((void *)uvm_km_alloc_poolpage(kmem_map, waitok)); 468 } 469 470 static void 471 mclpool_release(struct pool *pp, void *v) 472 { 473 474 uvm_km_free_poolpage(kmem_map, (vaddr_t)v); 475 } 476 477 /*ARGSUSED*/ 478 static int 479 mb_ctor(void *arg, void *object, int flags) 480 { 481 struct mbuf *m = object; 482 483 #ifdef POOL_VTOPHYS 484 m->m_paddr = POOL_VTOPHYS(m); 485 #else 486 m->m_paddr = M_PADDR_INVALID; 487 #endif 488 return (0); 489 } 490 491 void 492 m_reclaim(void *arg, int flags) 493 { 494 struct domain *dp; 495 const struct protosw *pr; 496 struct ifnet *ifp; 497 int s; 498 499 KERNEL_LOCK(1, NULL); 500 s = splvm(); 501 DOMAIN_FOREACH(dp) { 502 for (pr = dp->dom_protosw; 503 pr < dp->dom_protoswNPROTOSW; pr++) 504 if (pr->pr_drain) 505 (*pr->pr_drain)(); 506 } 507 IFNET_FOREACH(ifp) { 508 if (ifp->if_drain) 509 (*ifp->if_drain)(ifp); 510 } 511 splx(s); 512 mbstat.m_drain++; 513 KERNEL_UNLOCK_ONE(NULL); 514 } 515 516 /* 517 * Space allocation routines. 518 * These are also available as macros 519 * for critical paths. 520 */ 521 struct mbuf * 522 m_get(int nowait, int type) 523 { 524 struct mbuf *m; 525 526 m = pool_cache_get(mb_cache, 527 nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); 528 if (m == NULL) 529 return NULL; 530 531 mbstat_type_add(type, 1); 532 mowner_init(m, type); 533 m->m_ext_ref = m; 534 m->m_type = type; 535 m->m_next = NULL; 536 m->m_nextpkt = NULL; 537 m->m_data = m->m_dat; 538 m->m_flags = 0; 539 540 return m; 541 } 542 543 struct mbuf * 544 m_gethdr(int nowait, int type) 545 { 546 struct mbuf *m; 547 548 m = m_get(nowait, type); 549 if (m == NULL) 550 return NULL; 551 552 m->m_data = m->m_pktdat; 553 m->m_flags = M_PKTHDR; 554 m->m_pkthdr.rcvif = NULL; 555 m->m_pkthdr.csum_flags = 0; 556 m->m_pkthdr.csum_data = 0; 557 SLIST_INIT(&m->m_pkthdr.tags); 558 559 return m; 560 } 561 562 struct mbuf * 563 m_getclr(int nowait, int type) 564 { 565 struct mbuf *m; 566 567 MGET(m, nowait, type); 568 if (m == 0) 569 return (NULL); 570 memset(mtod(m, void *), 0, MLEN); 571 return (m); 572 } 573 574 void 575 m_clget(struct mbuf *m, int nowait) 576 { 577 578 MCLGET(m, nowait); 579 } 580 581 struct mbuf * 582 m_free(struct mbuf *m) 583 { 584 struct mbuf *n; 585 586 MFREE(m, n); 587 return (n); 588 } 589 590 void 591 m_freem(struct mbuf *m) 592 { 593 struct mbuf *n; 594 595 if (m == NULL) 596 return; 597 do { 598 MFREE(m, n); 599 m = n; 600 } while (m); 601 } 602 603 #ifdef MBUFTRACE 604 /* 605 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 606 */ 607 void 608 m_claimm(struct mbuf *m, struct mowner *mo) 609 { 610 611 for (; m != NULL; m = m->m_next) 612 MCLAIM(m, mo); 613 } 614 #endif 615 616 /* 617 * Mbuffer utility routines. 618 */ 619 620 /* 621 * Lesser-used path for M_PREPEND: 622 * allocate new mbuf to prepend to chain, 623 * copy junk along. 624 */ 625 struct mbuf * 626 m_prepend(struct mbuf *m, int len, int how) 627 { 628 struct mbuf *mn; 629 630 MGET(mn, how, m->m_type); 631 if (mn == (struct mbuf *)NULL) { 632 m_freem(m); 633 return ((struct mbuf *)NULL); 634 } 635 if (m->m_flags & M_PKTHDR) { 636 M_MOVE_PKTHDR(mn, m); 637 } else { 638 MCLAIM(mn, m->m_owner); 639 } 640 mn->m_next = m; 641 m = mn; 642 if (len < MHLEN) 643 MH_ALIGN(m, len); 644 m->m_len = len; 645 return (m); 646 } 647 648 /* 649 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 650 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 651 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 652 */ 653 int MCFail; 654 655 struct mbuf * 656 m_copym(struct mbuf *m, int off0, int len, int wait) 657 { 658 659 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 660 } 661 662 struct mbuf * 663 m_dup(struct mbuf *m, int off0, int len, int wait) 664 { 665 666 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 667 } 668 669 static struct mbuf * 670 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 671 { 672 struct mbuf *n, **np; 673 int off = off0; 674 struct mbuf *top; 675 int copyhdr = 0; 676 677 if (off < 0 || len < 0) 678 panic("m_copym: off %d, len %d", off, len); 679 if (off == 0 && m->m_flags & M_PKTHDR) 680 copyhdr = 1; 681 while (off > 0) { 682 if (m == 0) 683 panic("m_copym: m == 0, off %d", off); 684 if (off < m->m_len) 685 break; 686 off -= m->m_len; 687 m = m->m_next; 688 } 689 np = ⊤ 690 top = 0; 691 while (len > 0) { 692 if (m == 0) { 693 if (len != M_COPYALL) 694 panic("m_copym: m == 0, len %d [!COPYALL]", 695 len); 696 break; 697 } 698 MGET(n, wait, m->m_type); 699 *np = n; 700 if (n == 0) 701 goto nospace; 702 MCLAIM(n, m->m_owner); 703 if (copyhdr) { 704 M_COPY_PKTHDR(n, m); 705 if (len == M_COPYALL) 706 n->m_pkthdr.len -= off0; 707 else 708 n->m_pkthdr.len = len; 709 copyhdr = 0; 710 } 711 n->m_len = min(len, m->m_len - off); 712 if (m->m_flags & M_EXT) { 713 if (!deep) { 714 n->m_data = m->m_data + off; 715 MCLADDREFERENCE(m, n); 716 } else { 717 /* 718 * we are unsure about the way m was allocated. 719 * copy into multiple MCLBYTES cluster mbufs. 720 */ 721 MCLGET(n, wait); 722 n->m_len = 0; 723 n->m_len = M_TRAILINGSPACE(n); 724 n->m_len = min(n->m_len, len); 725 n->m_len = min(n->m_len, m->m_len - off); 726 memcpy(mtod(n, void *), mtod(m, char *) + off, 727 (unsigned)n->m_len); 728 } 729 } else 730 memcpy(mtod(n, void *), mtod(m, char *) + off, 731 (unsigned)n->m_len); 732 if (len != M_COPYALL) 733 len -= n->m_len; 734 off += n->m_len; 735 #ifdef DIAGNOSTIC 736 if (off > m->m_len) 737 panic("m_copym0 overrun"); 738 #endif 739 if (off == m->m_len) { 740 m = m->m_next; 741 off = 0; 742 } 743 np = &n->m_next; 744 } 745 if (top == 0) 746 MCFail++; 747 return (top); 748 nospace: 749 m_freem(top); 750 MCFail++; 751 return (NULL); 752 } 753 754 /* 755 * Copy an entire packet, including header (which must be present). 756 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 757 */ 758 struct mbuf * 759 m_copypacket(struct mbuf *m, int how) 760 { 761 struct mbuf *top, *n, *o; 762 763 MGET(n, how, m->m_type); 764 top = n; 765 if (!n) 766 goto nospace; 767 768 MCLAIM(n, m->m_owner); 769 M_COPY_PKTHDR(n, m); 770 n->m_len = m->m_len; 771 if (m->m_flags & M_EXT) { 772 n->m_data = m->m_data; 773 MCLADDREFERENCE(m, n); 774 } else { 775 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 776 } 777 778 m = m->m_next; 779 while (m) { 780 MGET(o, how, m->m_type); 781 if (!o) 782 goto nospace; 783 784 MCLAIM(o, m->m_owner); 785 n->m_next = o; 786 n = n->m_next; 787 788 n->m_len = m->m_len; 789 if (m->m_flags & M_EXT) { 790 n->m_data = m->m_data; 791 MCLADDREFERENCE(m, n); 792 } else { 793 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 794 } 795 796 m = m->m_next; 797 } 798 return top; 799 nospace: 800 m_freem(top); 801 MCFail++; 802 return NULL; 803 } 804 805 /* 806 * Copy data from an mbuf chain starting "off" bytes from the beginning, 807 * continuing for "len" bytes, into the indicated buffer. 808 */ 809 void 810 m_copydata(struct mbuf *m, int off, int len, void *vp) 811 { 812 unsigned count; 813 void * cp = vp; 814 815 if (off < 0 || len < 0) 816 panic("m_copydata: off %d, len %d", off, len); 817 while (off > 0) { 818 if (m == NULL) 819 panic("m_copydata: m == NULL, off %d", off); 820 if (off < m->m_len) 821 break; 822 off -= m->m_len; 823 m = m->m_next; 824 } 825 while (len > 0) { 826 if (m == NULL) 827 panic("m_copydata: m == NULL, len %d", len); 828 count = min(m->m_len - off, len); 829 memcpy(cp, mtod(m, char *) + off, count); 830 len -= count; 831 cp = (char *)cp + count; 832 off = 0; 833 m = m->m_next; 834 } 835 } 836 837 /* 838 * Concatenate mbuf chain n to m. 839 * n might be copied into m (when n->m_len is small), therefore data portion of 840 * n could be copied into an mbuf of different mbuf type. 841 * Any m_pkthdr is not updated. 842 */ 843 void 844 m_cat(struct mbuf *m, struct mbuf *n) 845 { 846 847 while (m->m_next) 848 m = m->m_next; 849 while (n) { 850 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 851 /* just join the two chains */ 852 m->m_next = n; 853 return; 854 } 855 /* splat the data from one into the other */ 856 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 857 (u_int)n->m_len); 858 m->m_len += n->m_len; 859 n = m_free(n); 860 } 861 } 862 863 void 864 m_adj(struct mbuf *mp, int req_len) 865 { 866 int len = req_len; 867 struct mbuf *m; 868 int count; 869 870 if ((m = mp) == NULL) 871 return; 872 if (len >= 0) { 873 /* 874 * Trim from head. 875 */ 876 while (m != NULL && len > 0) { 877 if (m->m_len <= len) { 878 len -= m->m_len; 879 m->m_len = 0; 880 m = m->m_next; 881 } else { 882 m->m_len -= len; 883 m->m_data += len; 884 len = 0; 885 } 886 } 887 m = mp; 888 if (mp->m_flags & M_PKTHDR) 889 m->m_pkthdr.len -= (req_len - len); 890 } else { 891 /* 892 * Trim from tail. Scan the mbuf chain, 893 * calculating its length and finding the last mbuf. 894 * If the adjustment only affects this mbuf, then just 895 * adjust and return. Otherwise, rescan and truncate 896 * after the remaining size. 897 */ 898 len = -len; 899 count = 0; 900 for (;;) { 901 count += m->m_len; 902 if (m->m_next == (struct mbuf *)0) 903 break; 904 m = m->m_next; 905 } 906 if (m->m_len >= len) { 907 m->m_len -= len; 908 if (mp->m_flags & M_PKTHDR) 909 mp->m_pkthdr.len -= len; 910 return; 911 } 912 count -= len; 913 if (count < 0) 914 count = 0; 915 /* 916 * Correct length for chain is "count". 917 * Find the mbuf with last data, adjust its length, 918 * and toss data from remaining mbufs on chain. 919 */ 920 m = mp; 921 if (m->m_flags & M_PKTHDR) 922 m->m_pkthdr.len = count; 923 for (; m; m = m->m_next) { 924 if (m->m_len >= count) { 925 m->m_len = count; 926 break; 927 } 928 count -= m->m_len; 929 } 930 if (m) 931 while (m->m_next) 932 (m = m->m_next)->m_len = 0; 933 } 934 } 935 936 /* 937 * Rearrange an mbuf chain so that len bytes are contiguous 938 * and in the data area of an mbuf (so that mtod and dtom 939 * will work for a structure of size len). Returns the resulting 940 * mbuf chain on success, frees it and returns null on failure. 941 * If there is room, it will add up to max_protohdr-len extra bytes to the 942 * contiguous region in an attempt to avoid being called next time. 943 */ 944 int MPFail; 945 946 struct mbuf * 947 m_pullup(struct mbuf *n, int len) 948 { 949 struct mbuf *m; 950 int count; 951 int space; 952 953 /* 954 * If first mbuf has no cluster, and has room for len bytes 955 * without shifting current data, pullup into it, 956 * otherwise allocate a new mbuf to prepend to the chain. 957 */ 958 if ((n->m_flags & M_EXT) == 0 && 959 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 960 if (n->m_len >= len) 961 return (n); 962 m = n; 963 n = n->m_next; 964 len -= m->m_len; 965 } else { 966 if (len > MHLEN) 967 goto bad; 968 MGET(m, M_DONTWAIT, n->m_type); 969 if (m == 0) 970 goto bad; 971 MCLAIM(m, n->m_owner); 972 m->m_len = 0; 973 if (n->m_flags & M_PKTHDR) { 974 M_MOVE_PKTHDR(m, n); 975 } 976 } 977 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 978 do { 979 count = min(min(max(len, max_protohdr), space), n->m_len); 980 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 981 (unsigned)count); 982 len -= count; 983 m->m_len += count; 984 n->m_len -= count; 985 space -= count; 986 if (n->m_len) 987 n->m_data += count; 988 else 989 n = m_free(n); 990 } while (len > 0 && n); 991 if (len > 0) { 992 (void) m_free(m); 993 goto bad; 994 } 995 m->m_next = n; 996 return (m); 997 bad: 998 m_freem(n); 999 MPFail++; 1000 return (NULL); 1001 } 1002 1003 /* 1004 * Like m_pullup(), except a new mbuf is always allocated, and we allow 1005 * the amount of empty space before the data in the new mbuf to be specified 1006 * (in the event that the caller expects to prepend later). 1007 */ 1008 int MSFail; 1009 1010 struct mbuf * 1011 m_copyup(struct mbuf *n, int len, int dstoff) 1012 { 1013 struct mbuf *m; 1014 int count, space; 1015 1016 if (len > (MHLEN - dstoff)) 1017 goto bad; 1018 MGET(m, M_DONTWAIT, n->m_type); 1019 if (m == NULL) 1020 goto bad; 1021 MCLAIM(m, n->m_owner); 1022 m->m_len = 0; 1023 if (n->m_flags & M_PKTHDR) { 1024 M_MOVE_PKTHDR(m, n); 1025 } 1026 m->m_data += dstoff; 1027 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1028 do { 1029 count = min(min(max(len, max_protohdr), space), n->m_len); 1030 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1031 (unsigned)count); 1032 len -= count; 1033 m->m_len += count; 1034 n->m_len -= count; 1035 space -= count; 1036 if (n->m_len) 1037 n->m_data += count; 1038 else 1039 n = m_free(n); 1040 } while (len > 0 && n); 1041 if (len > 0) { 1042 (void) m_free(m); 1043 goto bad; 1044 } 1045 m->m_next = n; 1046 return (m); 1047 bad: 1048 m_freem(n); 1049 MSFail++; 1050 return (NULL); 1051 } 1052 1053 /* 1054 * Partition an mbuf chain in two pieces, returning the tail -- 1055 * all but the first len0 bytes. In case of failure, it returns NULL and 1056 * attempts to restore the chain to its original state. 1057 */ 1058 struct mbuf * 1059 m_split(struct mbuf *m0, int len0, int wait) 1060 { 1061 1062 return m_split0(m0, len0, wait, 1); 1063 } 1064 1065 static struct mbuf * 1066 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 1067 { 1068 struct mbuf *m, *n; 1069 unsigned len = len0, remain, len_save; 1070 1071 for (m = m0; m && len > m->m_len; m = m->m_next) 1072 len -= m->m_len; 1073 if (m == 0) 1074 return (NULL); 1075 remain = m->m_len - len; 1076 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1077 MGETHDR(n, wait, m0->m_type); 1078 if (n == 0) 1079 return (NULL); 1080 MCLAIM(n, m0->m_owner); 1081 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1082 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1083 len_save = m0->m_pkthdr.len; 1084 m0->m_pkthdr.len = len0; 1085 if (m->m_flags & M_EXT) 1086 goto extpacket; 1087 if (remain > MHLEN) { 1088 /* m can't be the lead packet */ 1089 MH_ALIGN(n, 0); 1090 n->m_len = 0; 1091 n->m_next = m_split(m, len, wait); 1092 if (n->m_next == 0) { 1093 (void) m_free(n); 1094 m0->m_pkthdr.len = len_save; 1095 return (NULL); 1096 } else 1097 return (n); 1098 } else 1099 MH_ALIGN(n, remain); 1100 } else if (remain == 0) { 1101 n = m->m_next; 1102 m->m_next = 0; 1103 return (n); 1104 } else { 1105 MGET(n, wait, m->m_type); 1106 if (n == 0) 1107 return (NULL); 1108 MCLAIM(n, m->m_owner); 1109 M_ALIGN(n, remain); 1110 } 1111 extpacket: 1112 if (m->m_flags & M_EXT) { 1113 n->m_data = m->m_data + len; 1114 MCLADDREFERENCE(m, n); 1115 } else { 1116 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1117 } 1118 n->m_len = remain; 1119 m->m_len = len; 1120 n->m_next = m->m_next; 1121 m->m_next = 0; 1122 return (n); 1123 } 1124 /* 1125 * Routine to copy from device local memory into mbufs. 1126 */ 1127 struct mbuf * 1128 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1129 void (*copy)(const void *from, void *to, size_t len)) 1130 { 1131 struct mbuf *m; 1132 struct mbuf *top = 0, **mp = ⊤ 1133 int off = off0, len; 1134 char *cp; 1135 char *epkt; 1136 1137 cp = buf; 1138 epkt = cp + totlen; 1139 if (off) { 1140 /* 1141 * If 'off' is non-zero, packet is trailer-encapsulated, 1142 * so we have to skip the type and length fields. 1143 */ 1144 cp += off + 2 * sizeof(uint16_t); 1145 totlen -= 2 * sizeof(uint16_t); 1146 } 1147 MGETHDR(m, M_DONTWAIT, MT_DATA); 1148 if (m == 0) 1149 return (NULL); 1150 m->m_pkthdr.rcvif = ifp; 1151 m->m_pkthdr.len = totlen; 1152 m->m_len = MHLEN; 1153 1154 while (totlen > 0) { 1155 if (top) { 1156 MGET(m, M_DONTWAIT, MT_DATA); 1157 if (m == 0) { 1158 m_freem(top); 1159 return (NULL); 1160 } 1161 m->m_len = MLEN; 1162 } 1163 len = min(totlen, epkt - cp); 1164 if (len >= MINCLSIZE) { 1165 MCLGET(m, M_DONTWAIT); 1166 if ((m->m_flags & M_EXT) == 0) { 1167 m_free(m); 1168 m_freem(top); 1169 return (NULL); 1170 } 1171 m->m_len = len = min(len, MCLBYTES); 1172 } else { 1173 /* 1174 * Place initial small packet/header at end of mbuf. 1175 */ 1176 if (len < m->m_len) { 1177 if (top == 0 && len + max_linkhdr <= m->m_len) 1178 m->m_data += max_linkhdr; 1179 m->m_len = len; 1180 } else 1181 len = m->m_len; 1182 } 1183 if (copy) 1184 copy(cp, mtod(m, void *), (size_t)len); 1185 else 1186 memcpy(mtod(m, void *), cp, (size_t)len); 1187 cp += len; 1188 *mp = m; 1189 mp = &m->m_next; 1190 totlen -= len; 1191 if (cp == epkt) 1192 cp = buf; 1193 } 1194 return (top); 1195 } 1196 1197 /* 1198 * Copy data from a buffer back into the indicated mbuf chain, 1199 * starting "off" bytes from the beginning, extending the mbuf 1200 * chain if necessary. 1201 */ 1202 void 1203 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1204 { 1205 #if defined(DEBUG) 1206 struct mbuf *origm = m0; 1207 int error; 1208 #endif /* defined(DEBUG) */ 1209 1210 if (m0 == NULL) 1211 return; 1212 1213 #if defined(DEBUG) 1214 error = 1215 #endif /* defined(DEBUG) */ 1216 m_copyback0(&m0, off, len, cp, 1217 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1218 1219 #if defined(DEBUG) 1220 if (error != 0 || (m0 != NULL && origm != m0)) 1221 panic("m_copyback"); 1222 #endif /* defined(DEBUG) */ 1223 } 1224 1225 struct mbuf * 1226 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1227 { 1228 int error; 1229 1230 /* don't support chain expansion */ 1231 KDASSERT(off + len <= m_length(m0)); 1232 1233 error = m_copyback0(&m0, off, len, cp, 1234 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1235 if (error) { 1236 /* 1237 * no way to recover from partial success. 1238 * just free the chain. 1239 */ 1240 m_freem(m0); 1241 return NULL; 1242 } 1243 return m0; 1244 } 1245 1246 /* 1247 * m_makewritable: ensure the specified range writable. 1248 */ 1249 int 1250 m_makewritable(struct mbuf **mp, int off, int len, int how) 1251 { 1252 int error; 1253 #if defined(DEBUG) 1254 struct mbuf *n; 1255 int origlen, reslen; 1256 1257 origlen = m_length(*mp); 1258 #endif /* defined(DEBUG) */ 1259 1260 #if 0 /* M_COPYALL is large enough */ 1261 if (len == M_COPYALL) 1262 len = m_length(*mp) - off; /* XXX */ 1263 #endif 1264 1265 error = m_copyback0(mp, off, len, NULL, 1266 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1267 1268 #if defined(DEBUG) 1269 reslen = 0; 1270 for (n = *mp; n; n = n->m_next) 1271 reslen += n->m_len; 1272 if (origlen != reslen) 1273 panic("m_makewritable: length changed"); 1274 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1275 panic("m_makewritable: inconsist"); 1276 #endif /* defined(DEBUG) */ 1277 1278 return error; 1279 } 1280 1281 int 1282 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1283 int how) 1284 { 1285 int mlen; 1286 struct mbuf *m, *n; 1287 struct mbuf **mp; 1288 int totlen = 0; 1289 const char *cp = vp; 1290 1291 KASSERT(mp0 != NULL); 1292 KASSERT(*mp0 != NULL); 1293 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1294 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1295 1296 /* 1297 * we don't bother to update "totlen" in the case of M_COPYBACK0_COW, 1298 * assuming that M_COPYBACK0_EXTEND and M_COPYBACK0_COW are exclusive. 1299 */ 1300 1301 KASSERT((~flags & (M_COPYBACK0_EXTEND|M_COPYBACK0_COW)) != 0); 1302 1303 mp = mp0; 1304 m = *mp; 1305 while (off > (mlen = m->m_len)) { 1306 off -= mlen; 1307 totlen += mlen; 1308 if (m->m_next == NULL) { 1309 int tspace; 1310 extend: 1311 if ((flags & M_COPYBACK0_EXTEND) == 0) 1312 goto out; 1313 1314 /* 1315 * try to make some space at the end of "m". 1316 */ 1317 1318 mlen = m->m_len; 1319 if (off + len >= MINCLSIZE && 1320 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1321 MCLGET(m, how); 1322 } 1323 tspace = M_TRAILINGSPACE(m); 1324 if (tspace > 0) { 1325 tspace = min(tspace, off + len); 1326 KASSERT(tspace > 0); 1327 memset(mtod(m, char *) + m->m_len, 0, 1328 min(off, tspace)); 1329 m->m_len += tspace; 1330 off += mlen; 1331 totlen -= mlen; 1332 continue; 1333 } 1334 1335 /* 1336 * need to allocate an mbuf. 1337 */ 1338 1339 if (off + len >= MINCLSIZE) { 1340 n = m_getcl(how, m->m_type, 0); 1341 } else { 1342 n = m_get(how, m->m_type); 1343 } 1344 if (n == NULL) { 1345 goto out; 1346 } 1347 n->m_len = 0; 1348 n->m_len = min(M_TRAILINGSPACE(n), off + len); 1349 memset(mtod(n, char *), 0, min(n->m_len, off)); 1350 m->m_next = n; 1351 } 1352 mp = &m->m_next; 1353 m = m->m_next; 1354 } 1355 while (len > 0) { 1356 mlen = m->m_len - off; 1357 if (mlen != 0 && M_READONLY(m)) { 1358 char *datap; 1359 int eatlen; 1360 1361 /* 1362 * this mbuf is read-only. 1363 * allocate a new writable mbuf and try again. 1364 */ 1365 1366 #if defined(DIAGNOSTIC) 1367 if ((flags & M_COPYBACK0_COW) == 0) 1368 panic("m_copyback0: read-only"); 1369 #endif /* defined(DIAGNOSTIC) */ 1370 1371 /* 1372 * if we're going to write into the middle of 1373 * a mbuf, split it first. 1374 */ 1375 if (off > 0 && len < mlen) { 1376 n = m_split0(m, off, how, 0); 1377 if (n == NULL) 1378 goto enobufs; 1379 m->m_next = n; 1380 mp = &m->m_next; 1381 m = n; 1382 off = 0; 1383 continue; 1384 } 1385 1386 /* 1387 * XXX TODO coalesce into the trailingspace of 1388 * the previous mbuf when possible. 1389 */ 1390 1391 /* 1392 * allocate a new mbuf. copy packet header if needed. 1393 */ 1394 MGET(n, how, m->m_type); 1395 if (n == NULL) 1396 goto enobufs; 1397 MCLAIM(n, m->m_owner); 1398 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1399 M_MOVE_PKTHDR(n, m); 1400 n->m_len = MHLEN; 1401 } else { 1402 if (len >= MINCLSIZE) 1403 MCLGET(n, M_DONTWAIT); 1404 n->m_len = 1405 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1406 } 1407 if (n->m_len > len) 1408 n->m_len = len; 1409 1410 /* 1411 * free the region which has been overwritten. 1412 * copying data from old mbufs if requested. 1413 */ 1414 if (flags & M_COPYBACK0_PRESERVE) 1415 datap = mtod(n, char *); 1416 else 1417 datap = NULL; 1418 eatlen = n->m_len; 1419 KDASSERT(off == 0 || eatlen >= mlen); 1420 if (off > 0) { 1421 KDASSERT(len >= mlen); 1422 m->m_len = off; 1423 m->m_next = n; 1424 if (datap) { 1425 m_copydata(m, off, mlen, datap); 1426 datap += mlen; 1427 } 1428 eatlen -= mlen; 1429 mp = &m->m_next; 1430 m = m->m_next; 1431 } 1432 while (m != NULL && M_READONLY(m) && 1433 n->m_type == m->m_type && eatlen > 0) { 1434 mlen = min(eatlen, m->m_len); 1435 if (datap) { 1436 m_copydata(m, 0, mlen, datap); 1437 datap += mlen; 1438 } 1439 m->m_data += mlen; 1440 m->m_len -= mlen; 1441 eatlen -= mlen; 1442 if (m->m_len == 0) 1443 *mp = m = m_free(m); 1444 } 1445 if (eatlen > 0) 1446 n->m_len -= eatlen; 1447 n->m_next = m; 1448 *mp = m = n; 1449 continue; 1450 } 1451 mlen = min(mlen, len); 1452 if (flags & M_COPYBACK0_COPYBACK) { 1453 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1454 cp += mlen; 1455 } 1456 len -= mlen; 1457 mlen += off; 1458 off = 0; 1459 totlen += mlen; 1460 if (len == 0) 1461 break; 1462 if (m->m_next == NULL) { 1463 goto extend; 1464 } 1465 mp = &m->m_next; 1466 m = m->m_next; 1467 } 1468 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1469 KASSERT((flags & M_COPYBACK0_EXTEND) != 0); 1470 m->m_pkthdr.len = totlen; 1471 } 1472 1473 return 0; 1474 1475 enobufs: 1476 return ENOBUFS; 1477 } 1478 1479 void 1480 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1481 { 1482 1483 KASSERT((to->m_flags & M_EXT) == 0); 1484 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL); 1485 KASSERT((from->m_flags & M_PKTHDR) != 0); 1486 1487 to->m_pkthdr = from->m_pkthdr; 1488 to->m_flags = from->m_flags & M_COPYFLAGS; 1489 to->m_data = to->m_pktdat; 1490 1491 from->m_flags &= ~M_PKTHDR; 1492 } 1493 1494 /* 1495 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1496 * beginning, continuing for "len" bytes. 1497 */ 1498 int 1499 m_apply(struct mbuf *m, int off, int len, 1500 int (*f)(void *, void *, unsigned int), void *arg) 1501 { 1502 unsigned int count; 1503 int rval; 1504 1505 KASSERT(len >= 0); 1506 KASSERT(off >= 0); 1507 1508 while (off > 0) { 1509 KASSERT(m != NULL); 1510 if (off < m->m_len) 1511 break; 1512 off -= m->m_len; 1513 m = m->m_next; 1514 } 1515 while (len > 0) { 1516 KASSERT(m != NULL); 1517 count = min(m->m_len - off, len); 1518 1519 rval = (*f)(arg, mtod(m, char *) + off, count); 1520 if (rval) 1521 return (rval); 1522 1523 len -= count; 1524 off = 0; 1525 m = m->m_next; 1526 } 1527 1528 return (0); 1529 } 1530 1531 /* 1532 * Return a pointer to mbuf/offset of location in mbuf chain. 1533 */ 1534 struct mbuf * 1535 m_getptr(struct mbuf *m, int loc, int *off) 1536 { 1537 1538 while (loc >= 0) { 1539 /* Normal end of search */ 1540 if (m->m_len > loc) { 1541 *off = loc; 1542 return (m); 1543 } else { 1544 loc -= m->m_len; 1545 1546 if (m->m_next == NULL) { 1547 if (loc == 0) { 1548 /* Point at the end of valid data */ 1549 *off = m->m_len; 1550 return (m); 1551 } else 1552 return (NULL); 1553 } else 1554 m = m->m_next; 1555 } 1556 } 1557 1558 return (NULL); 1559 } 1560 1561 /* 1562 * m_ext_free: release a reference to the mbuf external storage. 1563 * 1564 * => free the mbuf m itsself as well. 1565 */ 1566 1567 void 1568 m_ext_free(struct mbuf *m) 1569 { 1570 bool embedded = MEXT_ISEMBEDDED(m); 1571 bool dofree = true; 1572 u_int refcnt; 1573 1574 KASSERT((m->m_flags & M_EXT) != 0); 1575 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1576 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1577 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1578 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1579 1580 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1581 refcnt = m->m_ext.ext_refcnt = 0; 1582 } else { 1583 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1584 } 1585 if (refcnt > 0) { 1586 if (embedded) { 1587 /* 1588 * other mbuf's m_ext_ref still points to us. 1589 */ 1590 dofree = false; 1591 } else { 1592 m->m_ext_ref = m; 1593 } 1594 } else { 1595 /* 1596 * dropping the last reference 1597 */ 1598 if (!embedded) { 1599 m->m_ext.ext_refcnt++; /* XXX */ 1600 m_ext_free(m->m_ext_ref); 1601 m->m_ext_ref = m; 1602 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1603 pool_cache_put_paddr((struct pool_cache *) 1604 m->m_ext.ext_arg, 1605 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1606 } else if (m->m_ext.ext_free) { 1607 (*m->m_ext.ext_free)(m, 1608 m->m_ext.ext_buf, m->m_ext.ext_size, 1609 m->m_ext.ext_arg); 1610 /* 1611 * 'm' is already freed by the ext_free callback. 1612 */ 1613 dofree = false; 1614 } else { 1615 free(m->m_ext.ext_buf, m->m_ext.ext_type); 1616 } 1617 } 1618 if (dofree) { 1619 pool_cache_put(mb_cache, m); 1620 } 1621 } 1622 1623 #if defined(DDB) 1624 void 1625 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 1626 { 1627 char ch; 1628 bool opt_c = false; 1629 char buf[512]; 1630 1631 while ((ch = *(modif++)) != '\0') { 1632 switch (ch) { 1633 case 'c': 1634 opt_c = true; 1635 break; 1636 } 1637 } 1638 1639 nextchain: 1640 (*pr)("MBUF %p\n", m); 1641 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 1642 (*pr)(" data=%p, len=%d, type=%d, flags=0x%s\n", 1643 m->m_data, m->m_len, m->m_type, buf); 1644 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 1645 m->m_nextpkt); 1646 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 1647 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 1648 (int)M_READONLY(m)); 1649 if ((m->m_flags & M_PKTHDR) != 0) { 1650 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 1651 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=0x%s, csum_data=0x%" 1652 PRIx32 ", segsz=%u\n", 1653 m->m_pkthdr.len, m->m_pkthdr.rcvif, 1654 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 1655 } 1656 if ((m->m_flags & M_EXT)) { 1657 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 1658 "ext_free=%p, ext_arg=%p\n", 1659 m->m_ext.ext_refcnt, 1660 m->m_ext.ext_buf, m->m_ext.ext_size, 1661 m->m_ext.ext_free, m->m_ext.ext_arg); 1662 } 1663 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 1664 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 1665 vaddr_t eva = sva + m->m_ext.ext_size; 1666 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 1667 int i; 1668 1669 (*pr)(" pages:"); 1670 for (i = 0; i < n; i ++) { 1671 (*pr)(" %p", m->m_ext.ext_pgs[i]); 1672 } 1673 (*pr)("\n"); 1674 } 1675 1676 if (opt_c) { 1677 m = m->m_next; 1678 if (m != NULL) { 1679 goto nextchain; 1680 } 1681 } 1682 } 1683 #endif /* defined(DDB) */ 1684 1685 void 1686 mbstat_type_add(int type, int diff) 1687 { 1688 struct mbstat_cpu *mb; 1689 int s; 1690 1691 s = splvm(); 1692 mb = percpu_getref(mbstat_percpu); 1693 mb->m_mtypes[type] += diff; 1694 percpu_putref(mbstat_percpu); 1695 splx(s); 1696 } 1697 1698 #if defined(MBUFTRACE) 1699 void 1700 mowner_attach(struct mowner *mo) 1701 { 1702 1703 KASSERT(mo->mo_counters == NULL); 1704 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 1705 1706 /* XXX lock */ 1707 LIST_INSERT_HEAD(&mowners, mo, mo_link); 1708 } 1709 1710 void 1711 mowner_detach(struct mowner *mo) 1712 { 1713 1714 KASSERT(mo->mo_counters != NULL); 1715 1716 /* XXX lock */ 1717 LIST_REMOVE(mo, mo_link); 1718 1719 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 1720 mo->mo_counters = NULL; 1721 } 1722 1723 void 1724 mowner_init(struct mbuf *m, int type) 1725 { 1726 struct mowner_counter *mc; 1727 struct mowner *mo; 1728 int s; 1729 1730 m->m_owner = mo = &unknown_mowners[type]; 1731 s = splvm(); 1732 mc = percpu_getref(mo->mo_counters); 1733 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1734 percpu_putref(mo->mo_counters); 1735 splx(s); 1736 } 1737 1738 void 1739 mowner_ref(struct mbuf *m, int flags) 1740 { 1741 struct mowner *mo = m->m_owner; 1742 struct mowner_counter *mc; 1743 int s; 1744 1745 s = splvm(); 1746 mc = percpu_getref(mo->mo_counters); 1747 if ((flags & M_EXT) != 0) 1748 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1749 if ((flags & M_CLUSTER) != 0) 1750 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1751 percpu_putref(mo->mo_counters); 1752 splx(s); 1753 } 1754 1755 void 1756 mowner_revoke(struct mbuf *m, bool all, int flags) 1757 { 1758 struct mowner *mo = m->m_owner; 1759 struct mowner_counter *mc; 1760 int s; 1761 1762 s = splvm(); 1763 mc = percpu_getref(mo->mo_counters); 1764 if ((flags & M_EXT) != 0) 1765 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 1766 if ((flags & M_CLUSTER) != 0) 1767 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 1768 if (all) 1769 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 1770 percpu_putref(mo->mo_counters); 1771 splx(s); 1772 if (all) 1773 m->m_owner = &revoked_mowner; 1774 } 1775 1776 static void 1777 mowner_claim(struct mbuf *m, struct mowner *mo) 1778 { 1779 struct mowner_counter *mc; 1780 int flags = m->m_flags; 1781 int s; 1782 1783 s = splvm(); 1784 mc = percpu_getref(mo->mo_counters); 1785 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1786 if ((flags & M_EXT) != 0) 1787 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1788 if ((flags & M_CLUSTER) != 0) 1789 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1790 percpu_putref(mo->mo_counters); 1791 splx(s); 1792 m->m_owner = mo; 1793 } 1794 1795 void 1796 m_claim(struct mbuf *m, struct mowner *mo) 1797 { 1798 1799 if (m->m_owner == mo || mo == NULL) 1800 return; 1801 1802 mowner_revoke(m, true, m->m_flags); 1803 mowner_claim(m, mo); 1804 } 1805 #endif /* defined(MBUFTRACE) */ 1806