1 /* $NetBSD: uipc_mbuf.c,v 1.157 2013/11/15 17:48:55 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.157 2013/11/15 17:48:55 christos Exp $"); 66 67 #include "opt_mbuftrace.h" 68 #include "opt_nmbclusters.h" 69 #include "opt_ddb.h" 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/atomic.h> 74 #include <sys/cpu.h> 75 #include <sys/proc.h> 76 #include <sys/mbuf.h> 77 #include <sys/kernel.h> 78 #include <sys/syslog.h> 79 #include <sys/domain.h> 80 #include <sys/protosw.h> 81 #include <sys/percpu.h> 82 #include <sys/pool.h> 83 #include <sys/socket.h> 84 #include <sys/sysctl.h> 85 86 #include <net/if.h> 87 88 pool_cache_t mb_cache; /* mbuf cache */ 89 pool_cache_t mcl_cache; /* mbuf cluster cache */ 90 91 struct mbstat mbstat; 92 int max_linkhdr; 93 int max_protohdr; 94 int max_hdr; 95 int max_datalen; 96 97 static int mb_ctor(void *, void *, int); 98 99 static void sysctl_kern_mbuf_setup(void); 100 101 static struct sysctllog *mbuf_sysctllog; 102 103 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 104 static struct mbuf *m_split0(struct mbuf *, int, int, int); 105 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 106 107 /* flags for m_copyback0 */ 108 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 109 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 110 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 111 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 112 113 static const char mclpool_warnmsg[] = 114 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 115 116 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 117 118 static percpu_t *mbstat_percpu; 119 120 #ifdef MBUFTRACE 121 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 122 struct mowner unknown_mowners[] = { 123 MOWNER_INIT("unknown", "free"), 124 MOWNER_INIT("unknown", "data"), 125 MOWNER_INIT("unknown", "header"), 126 MOWNER_INIT("unknown", "soname"), 127 MOWNER_INIT("unknown", "soopts"), 128 MOWNER_INIT("unknown", "ftable"), 129 MOWNER_INIT("unknown", "control"), 130 MOWNER_INIT("unknown", "oobdata"), 131 }; 132 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 133 #endif 134 135 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 136 137 #define MCLADDREFERENCE(o, n) \ 138 do { \ 139 KASSERT(((o)->m_flags & M_EXT) != 0); \ 140 KASSERT(((n)->m_flags & M_EXT) == 0); \ 141 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 142 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 143 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 144 (n)->m_ext_ref = (o)->m_ext_ref; \ 145 mowner_ref((n), (n)->m_flags); \ 146 MCLREFDEBUGN((n), __FILE__, __LINE__); \ 147 } while (/* CONSTCOND */ 0) 148 149 static int 150 nmbclusters_limit(void) 151 { 152 #if defined(PMAP_MAP_POOLPAGE) 153 /* direct mapping, doesn't use space in kmem_arena */ 154 vsize_t max_size = physmem / 4; 155 #else 156 vsize_t max_size = MIN(physmem / 4, nkmempages / 4); 157 #endif 158 159 max_size = max_size * PAGE_SIZE / MCLBYTES; 160 #ifdef NMBCLUSTERS_MAX 161 max_size = MIN(max_size, NMBCLUSTERS_MAX); 162 #endif 163 164 #ifdef NMBCLUSTERS 165 return MIN(max_size, NMBCLUSTERS); 166 #else 167 return max_size; 168 #endif 169 } 170 171 /* 172 * Initialize the mbuf allocator. 173 */ 174 void 175 mbinit(void) 176 { 177 178 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 179 CTASSERT(sizeof(struct mbuf) == MSIZE); 180 181 sysctl_kern_mbuf_setup(); 182 183 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 184 NULL, IPL_VM, mb_ctor, NULL, NULL); 185 KASSERT(mb_cache != NULL); 186 187 mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", NULL, 188 IPL_VM, NULL, NULL, NULL); 189 KASSERT(mcl_cache != NULL); 190 191 pool_cache_set_drain_hook(mb_cache, m_reclaim, NULL); 192 pool_cache_set_drain_hook(mcl_cache, m_reclaim, NULL); 193 194 /* 195 * Set an arbitrary default limit on the number of mbuf clusters. 196 */ 197 #ifdef NMBCLUSTERS 198 nmbclusters = nmbclusters_limit(); 199 #else 200 nmbclusters = MAX(1024, 201 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 202 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 203 #endif 204 205 /* 206 * Set the hard limit on the mclpool to the number of 207 * mbuf clusters the kernel is to support. Log the limit 208 * reached message max once a minute. 209 */ 210 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 211 212 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 213 214 /* 215 * Set a low water mark for both mbufs and clusters. This should 216 * help ensure that they can be allocated in a memory starvation 217 * situation. This is important for e.g. diskless systems which 218 * must allocate mbufs in order for the pagedaemon to clean pages. 219 */ 220 pool_cache_setlowat(mb_cache, mblowat); 221 pool_cache_setlowat(mcl_cache, mcllowat); 222 223 #ifdef MBUFTRACE 224 { 225 /* 226 * Attach the unknown mowners. 227 */ 228 int i; 229 MOWNER_ATTACH(&revoked_mowner); 230 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 231 i-- > 0; ) 232 MOWNER_ATTACH(&unknown_mowners[i]); 233 } 234 #endif 235 } 236 237 /* 238 * sysctl helper routine for the kern.mbuf subtree. 239 * nmbclusters, mblowat and mcllowat need range 240 * checking and pool tweaking after being reset. 241 */ 242 static int 243 sysctl_kern_mbuf(SYSCTLFN_ARGS) 244 { 245 int error, newval; 246 struct sysctlnode node; 247 248 node = *rnode; 249 node.sysctl_data = &newval; 250 switch (rnode->sysctl_num) { 251 case MBUF_NMBCLUSTERS: 252 case MBUF_MBLOWAT: 253 case MBUF_MCLLOWAT: 254 newval = *(int*)rnode->sysctl_data; 255 break; 256 default: 257 return (EOPNOTSUPP); 258 } 259 260 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 261 if (error || newp == NULL) 262 return (error); 263 if (newval < 0) 264 return (EINVAL); 265 266 switch (node.sysctl_num) { 267 case MBUF_NMBCLUSTERS: 268 if (newval < nmbclusters) 269 return (EINVAL); 270 if (newval > nmbclusters_limit()) 271 return (EINVAL); 272 nmbclusters = newval; 273 pool_cache_sethardlimit(mcl_cache, nmbclusters, 274 mclpool_warnmsg, 60); 275 break; 276 case MBUF_MBLOWAT: 277 mblowat = newval; 278 pool_cache_setlowat(mb_cache, mblowat); 279 break; 280 case MBUF_MCLLOWAT: 281 mcllowat = newval; 282 pool_cache_setlowat(mcl_cache, mcllowat); 283 break; 284 } 285 286 return (0); 287 } 288 289 #ifdef MBUFTRACE 290 static void 291 mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 292 { 293 struct mowner_counter *mc = v1; 294 struct mowner_user *mo_user = v2; 295 int i; 296 297 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 298 mo_user->mo_counter[i] += mc->mc_counter[i]; 299 } 300 } 301 302 static void 303 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 304 { 305 306 memset(mo_user, 0, sizeof(*mo_user)); 307 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 308 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 309 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 310 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 311 percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); 312 } 313 314 static int 315 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 316 { 317 struct mowner *mo; 318 size_t len = 0; 319 int error = 0; 320 321 if (namelen != 0) 322 return (EINVAL); 323 if (newp != NULL) 324 return (EPERM); 325 326 LIST_FOREACH(mo, &mowners, mo_link) { 327 struct mowner_user mo_user; 328 329 mowner_convert_to_user(mo, &mo_user); 330 331 if (oldp != NULL) { 332 if (*oldlenp - len < sizeof(mo_user)) { 333 error = ENOMEM; 334 break; 335 } 336 error = copyout(&mo_user, (char *)oldp + len, 337 sizeof(mo_user)); 338 if (error) 339 break; 340 } 341 len += sizeof(mo_user); 342 } 343 344 if (error == 0) 345 *oldlenp = len; 346 347 return (error); 348 } 349 #endif /* MBUFTRACE */ 350 351 static void 352 mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 353 { 354 struct mbstat_cpu *mbsc = v1; 355 struct mbstat *mbs = v2; 356 int i; 357 358 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 359 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 360 } 361 } 362 363 static void 364 mbstat_convert_to_user(struct mbstat *mbs) 365 { 366 367 memset(mbs, 0, sizeof(*mbs)); 368 mbs->m_drain = mbstat.m_drain; 369 percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); 370 } 371 372 static int 373 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 374 { 375 struct sysctlnode node; 376 struct mbstat mbs; 377 378 mbstat_convert_to_user(&mbs); 379 node = *rnode; 380 node.sysctl_data = &mbs; 381 node.sysctl_size = sizeof(mbs); 382 return sysctl_lookup(SYSCTLFN_CALL(&node)); 383 } 384 385 static void 386 sysctl_kern_mbuf_setup(void) 387 { 388 389 KASSERT(mbuf_sysctllog == NULL); 390 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 391 CTLFLAG_PERMANENT, 392 CTLTYPE_NODE, "kern", NULL, 393 NULL, 0, NULL, 0, 394 CTL_KERN, CTL_EOL); 395 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 396 CTLFLAG_PERMANENT, 397 CTLTYPE_NODE, "mbuf", 398 SYSCTL_DESCR("mbuf control variables"), 399 NULL, 0, NULL, 0, 400 CTL_KERN, KERN_MBUF, CTL_EOL); 401 402 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 403 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 404 CTLTYPE_INT, "msize", 405 SYSCTL_DESCR("mbuf base size"), 406 NULL, msize, NULL, 0, 407 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 408 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 409 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 410 CTLTYPE_INT, "mclbytes", 411 SYSCTL_DESCR("mbuf cluster size"), 412 NULL, mclbytes, NULL, 0, 413 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 414 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 415 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 416 CTLTYPE_INT, "nmbclusters", 417 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 418 sysctl_kern_mbuf, 0, &nmbclusters, 0, 419 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 420 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 421 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 422 CTLTYPE_INT, "mblowat", 423 SYSCTL_DESCR("mbuf low water mark"), 424 sysctl_kern_mbuf, 0, &mblowat, 0, 425 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 426 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 427 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 428 CTLTYPE_INT, "mcllowat", 429 SYSCTL_DESCR("mbuf cluster low water mark"), 430 sysctl_kern_mbuf, 0, &mcllowat, 0, 431 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 432 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 433 CTLFLAG_PERMANENT, 434 CTLTYPE_STRUCT, "stats", 435 SYSCTL_DESCR("mbuf allocation statistics"), 436 sysctl_kern_mbuf_stats, 0, NULL, 0, 437 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 438 #ifdef MBUFTRACE 439 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 440 CTLFLAG_PERMANENT, 441 CTLTYPE_STRUCT, "mowners", 442 SYSCTL_DESCR("Information about mbuf owners"), 443 sysctl_kern_mbuf_mowners, 0, NULL, 0, 444 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 445 #endif /* MBUFTRACE */ 446 } 447 448 static int 449 mb_ctor(void *arg, void *object, int flags) 450 { 451 struct mbuf *m = object; 452 453 #ifdef POOL_VTOPHYS 454 m->m_paddr = POOL_VTOPHYS(m); 455 #else 456 m->m_paddr = M_PADDR_INVALID; 457 #endif 458 return (0); 459 } 460 461 /* 462 * Add mbuf to the end of a chain 463 */ 464 struct mbuf * 465 m_add(struct mbuf *c, struct mbuf *m) { 466 struct mbuf *n; 467 468 if (c == NULL) 469 return m; 470 471 for (n = c; n->m_next != NULL; n = n->m_next) 472 continue; 473 n->m_next = m; 474 return c; 475 } 476 477 /* 478 * Set the m_data pointer of a newly-allocated mbuf 479 * to place an object of the specified size at the 480 * end of the mbuf, longword aligned. 481 */ 482 void 483 m_align(struct mbuf *m, int len) 484 { 485 int adjust; 486 487 KASSERT(len != M_COPYALL); 488 489 if (m->m_flags & M_EXT) 490 adjust = m->m_ext.ext_size - len; 491 else if (m->m_flags & M_PKTHDR) 492 adjust = MHLEN - len; 493 else 494 adjust = MLEN - len; 495 m->m_data += adjust &~ (sizeof(long)-1); 496 } 497 498 /* 499 * Append the specified data to the indicated mbuf chain, 500 * Extend the mbuf chain if the new data does not fit in 501 * existing space. 502 * 503 * Return 1 if able to complete the job; otherwise 0. 504 */ 505 int 506 m_append(struct mbuf *m0, int len, const void *cpv) 507 { 508 struct mbuf *m, *n; 509 int remainder, space; 510 const char *cp = cpv; 511 512 KASSERT(len != M_COPYALL); 513 for (m = m0; m->m_next != NULL; m = m->m_next) 514 continue; 515 remainder = len; 516 space = M_TRAILINGSPACE(m); 517 if (space > 0) { 518 /* 519 * Copy into available space. 520 */ 521 if (space > remainder) 522 space = remainder; 523 memmove(mtod(m, char *) + m->m_len, cp, space); 524 m->m_len += space; 525 cp = cp + space, remainder -= space; 526 } 527 while (remainder > 0) { 528 /* 529 * Allocate a new mbuf; could check space 530 * and allocate a cluster instead. 531 */ 532 n = m_get(M_DONTWAIT, m->m_type); 533 if (n == NULL) 534 break; 535 n->m_len = min(MLEN, remainder); 536 memmove(mtod(n, void *), cp, n->m_len); 537 cp += n->m_len, remainder -= n->m_len; 538 m->m_next = n; 539 m = n; 540 } 541 if (m0->m_flags & M_PKTHDR) 542 m0->m_pkthdr.len += len - remainder; 543 return (remainder == 0); 544 } 545 546 void 547 m_reclaim(void *arg, int flags) 548 { 549 struct domain *dp; 550 const struct protosw *pr; 551 struct ifnet *ifp; 552 int s; 553 554 KERNEL_LOCK(1, NULL); 555 s = splvm(); 556 DOMAIN_FOREACH(dp) { 557 for (pr = dp->dom_protosw; 558 pr < dp->dom_protoswNPROTOSW; pr++) 559 if (pr->pr_drain) 560 (*pr->pr_drain)(); 561 } 562 IFNET_FOREACH(ifp) { 563 if (ifp->if_drain) 564 (*ifp->if_drain)(ifp); 565 } 566 splx(s); 567 mbstat.m_drain++; 568 KERNEL_UNLOCK_ONE(NULL); 569 } 570 571 /* 572 * Space allocation routines. 573 * These are also available as macros 574 * for critical paths. 575 */ 576 struct mbuf * 577 m_get(int nowait, int type) 578 { 579 struct mbuf *m; 580 581 KASSERT(type != MT_FREE); 582 583 m = pool_cache_get(mb_cache, 584 nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); 585 if (m == NULL) 586 return NULL; 587 588 mbstat_type_add(type, 1); 589 mowner_init(m, type); 590 m->m_ext_ref = m; 591 m->m_type = type; 592 m->m_len = 0; 593 m->m_next = NULL; 594 m->m_nextpkt = NULL; 595 m->m_data = m->m_dat; 596 m->m_flags = 0; 597 598 return m; 599 } 600 601 struct mbuf * 602 m_gethdr(int nowait, int type) 603 { 604 struct mbuf *m; 605 606 m = m_get(nowait, type); 607 if (m == NULL) 608 return NULL; 609 610 m->m_data = m->m_pktdat; 611 m->m_flags = M_PKTHDR; 612 m->m_pkthdr.rcvif = NULL; 613 m->m_pkthdr.len = 0; 614 m->m_pkthdr.csum_flags = 0; 615 m->m_pkthdr.csum_data = 0; 616 SLIST_INIT(&m->m_pkthdr.tags); 617 618 return m; 619 } 620 621 struct mbuf * 622 m_getclr(int nowait, int type) 623 { 624 struct mbuf *m; 625 626 m = m_get(nowait, type); 627 if (m == 0) 628 return (NULL); 629 memset(mtod(m, void *), 0, MLEN); 630 return (m); 631 } 632 633 void 634 m_clget(struct mbuf *m, int nowait) 635 { 636 637 MCLGET(m, nowait); 638 } 639 640 struct mbuf * 641 m_free(struct mbuf *m) 642 { 643 struct mbuf *n; 644 645 MFREE(m, n); 646 return (n); 647 } 648 649 void 650 m_freem(struct mbuf *m) 651 { 652 struct mbuf *n; 653 654 if (m == NULL) 655 return; 656 do { 657 MFREE(m, n); 658 m = n; 659 } while (m); 660 } 661 662 #ifdef MBUFTRACE 663 /* 664 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 665 */ 666 void 667 m_claimm(struct mbuf *m, struct mowner *mo) 668 { 669 670 for (; m != NULL; m = m->m_next) 671 MCLAIM(m, mo); 672 } 673 #endif 674 675 /* 676 * Mbuffer utility routines. 677 */ 678 679 /* 680 * Lesser-used path for M_PREPEND: 681 * allocate new mbuf to prepend to chain, 682 * copy junk along. 683 */ 684 struct mbuf * 685 m_prepend(struct mbuf *m, int len, int how) 686 { 687 struct mbuf *mn; 688 689 KASSERT(len != M_COPYALL); 690 mn = m_get(how, m->m_type); 691 if (mn == NULL) { 692 m_freem(m); 693 return (NULL); 694 } 695 if (m->m_flags & M_PKTHDR) { 696 M_MOVE_PKTHDR(mn, m); 697 } else { 698 MCLAIM(mn, m->m_owner); 699 } 700 mn->m_next = m; 701 m = mn; 702 if (len < MHLEN) 703 MH_ALIGN(m, len); 704 m->m_len = len; 705 return (m); 706 } 707 708 /* 709 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 710 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 711 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 712 */ 713 int MCFail; 714 715 struct mbuf * 716 m_copym(struct mbuf *m, int off0, int len, int wait) 717 { 718 719 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 720 } 721 722 struct mbuf * 723 m_dup(struct mbuf *m, int off0, int len, int wait) 724 { 725 726 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 727 } 728 729 static inline int 730 m_copylen(int len, int copylen) { 731 return len == M_COPYALL ? copylen : min(len, copylen); 732 } 733 734 static struct mbuf * 735 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 736 { 737 struct mbuf *n, **np; 738 int off = off0; 739 struct mbuf *top; 740 int copyhdr = 0; 741 742 if (off < 0 || (len != M_COPYALL && len < 0)) 743 panic("m_copym: off %d, len %d", off, len); 744 if (off == 0 && m->m_flags & M_PKTHDR) 745 copyhdr = 1; 746 while (off > 0) { 747 if (m == 0) 748 panic("m_copym: m == 0, off %d", off); 749 if (off < m->m_len) 750 break; 751 off -= m->m_len; 752 m = m->m_next; 753 } 754 np = ⊤ 755 top = 0; 756 while (len == M_COPYALL || len > 0) { 757 if (m == 0) { 758 if (len != M_COPYALL) 759 panic("m_copym: m == 0, len %d [!COPYALL]", 760 len); 761 break; 762 } 763 n = m_get(wait, m->m_type); 764 *np = n; 765 if (n == 0) 766 goto nospace; 767 MCLAIM(n, m->m_owner); 768 if (copyhdr) { 769 M_COPY_PKTHDR(n, m); 770 if (len == M_COPYALL) 771 n->m_pkthdr.len -= off0; 772 else 773 n->m_pkthdr.len = len; 774 copyhdr = 0; 775 } 776 n->m_len = m_copylen(len, m->m_len - off); 777 if (m->m_flags & M_EXT) { 778 if (!deep) { 779 n->m_data = m->m_data + off; 780 MCLADDREFERENCE(m, n); 781 } else { 782 /* 783 * we are unsure about the way m was allocated. 784 * copy into multiple MCLBYTES cluster mbufs. 785 */ 786 MCLGET(n, wait); 787 n->m_len = M_TRAILINGSPACE(n); 788 n->m_len = m_copylen(len, n->m_len); 789 n->m_len = min(n->m_len, m->m_len - off); 790 memcpy(mtod(n, void *), mtod(m, char *) + off, 791 (unsigned)n->m_len); 792 } 793 } else 794 memcpy(mtod(n, void *), mtod(m, char *) + off, 795 (unsigned)n->m_len); 796 if (len != M_COPYALL) 797 len -= n->m_len; 798 off += n->m_len; 799 #ifdef DIAGNOSTIC 800 if (off > m->m_len) 801 panic("m_copym0 overrun %d %d", off, m->m_len); 802 #endif 803 if (off == m->m_len) { 804 m = m->m_next; 805 off = 0; 806 } 807 np = &n->m_next; 808 } 809 if (top == 0) 810 MCFail++; 811 return (top); 812 nospace: 813 m_freem(top); 814 MCFail++; 815 return (NULL); 816 } 817 818 /* 819 * Copy an entire packet, including header (which must be present). 820 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 821 */ 822 struct mbuf * 823 m_copypacket(struct mbuf *m, int how) 824 { 825 struct mbuf *top, *n, *o; 826 827 n = m_get(how, m->m_type); 828 top = n; 829 if (!n) 830 goto nospace; 831 832 MCLAIM(n, m->m_owner); 833 M_COPY_PKTHDR(n, m); 834 n->m_len = m->m_len; 835 if (m->m_flags & M_EXT) { 836 n->m_data = m->m_data; 837 MCLADDREFERENCE(m, n); 838 } else { 839 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 840 } 841 842 m = m->m_next; 843 while (m) { 844 o = m_get(how, m->m_type); 845 if (!o) 846 goto nospace; 847 848 MCLAIM(o, m->m_owner); 849 n->m_next = o; 850 n = n->m_next; 851 852 n->m_len = m->m_len; 853 if (m->m_flags & M_EXT) { 854 n->m_data = m->m_data; 855 MCLADDREFERENCE(m, n); 856 } else { 857 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 858 } 859 860 m = m->m_next; 861 } 862 return top; 863 nospace: 864 m_freem(top); 865 MCFail++; 866 return NULL; 867 } 868 869 /* 870 * Copy data from an mbuf chain starting "off" bytes from the beginning, 871 * continuing for "len" bytes, into the indicated buffer. 872 */ 873 void 874 m_copydata(struct mbuf *m, int off, int len, void *vp) 875 { 876 unsigned count; 877 void * cp = vp; 878 struct mbuf *m0 = m; 879 int len0 = len; 880 int off0 = off; 881 void *vp0 = vp; 882 883 KASSERT(len != M_COPYALL); 884 if (off < 0 || len < 0) 885 panic("m_copydata: off %d, len %d", off, len); 886 while (off > 0) { 887 if (m == NULL) 888 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)", 889 m0, len0, off0, vp0, off, off0 - off); 890 if (off < m->m_len) 891 break; 892 off -= m->m_len; 893 m = m->m_next; 894 } 895 while (len > 0) { 896 if (m == NULL) 897 panic("m_copydata(%p,%d,%d,%p): " 898 "m=NULL, off=%d (%d), len=%d (%d)", 899 m0, len0, off0, vp0, 900 off, off0 - off, len, len0 - len); 901 count = min(m->m_len - off, len); 902 memcpy(cp, mtod(m, char *) + off, count); 903 len -= count; 904 cp = (char *)cp + count; 905 off = 0; 906 m = m->m_next; 907 } 908 } 909 910 /* 911 * Concatenate mbuf chain n to m. 912 * n might be copied into m (when n->m_len is small), therefore data portion of 913 * n could be copied into an mbuf of different mbuf type. 914 * Any m_pkthdr is not updated. 915 */ 916 void 917 m_cat(struct mbuf *m, struct mbuf *n) 918 { 919 920 while (m->m_next) 921 m = m->m_next; 922 while (n) { 923 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 924 /* just join the two chains */ 925 m->m_next = n; 926 return; 927 } 928 /* splat the data from one into the other */ 929 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 930 (u_int)n->m_len); 931 m->m_len += n->m_len; 932 n = m_free(n); 933 } 934 } 935 936 void 937 m_adj(struct mbuf *mp, int req_len) 938 { 939 int len = req_len; 940 struct mbuf *m; 941 int count; 942 943 if ((m = mp) == NULL) 944 return; 945 if (len >= 0) { 946 /* 947 * Trim from head. 948 */ 949 while (m != NULL && len > 0) { 950 if (m->m_len <= len) { 951 len -= m->m_len; 952 m->m_len = 0; 953 m = m->m_next; 954 } else { 955 m->m_len -= len; 956 m->m_data += len; 957 len = 0; 958 } 959 } 960 m = mp; 961 if (mp->m_flags & M_PKTHDR) 962 m->m_pkthdr.len -= (req_len - len); 963 } else { 964 /* 965 * Trim from tail. Scan the mbuf chain, 966 * calculating its length and finding the last mbuf. 967 * If the adjustment only affects this mbuf, then just 968 * adjust and return. Otherwise, rescan and truncate 969 * after the remaining size. 970 */ 971 len = -len; 972 count = 0; 973 for (;;) { 974 count += m->m_len; 975 if (m->m_next == (struct mbuf *)0) 976 break; 977 m = m->m_next; 978 } 979 if (m->m_len >= len) { 980 m->m_len -= len; 981 if (mp->m_flags & M_PKTHDR) 982 mp->m_pkthdr.len -= len; 983 return; 984 } 985 count -= len; 986 if (count < 0) 987 count = 0; 988 /* 989 * Correct length for chain is "count". 990 * Find the mbuf with last data, adjust its length, 991 * and toss data from remaining mbufs on chain. 992 */ 993 m = mp; 994 if (m->m_flags & M_PKTHDR) 995 m->m_pkthdr.len = count; 996 for (; m; m = m->m_next) { 997 if (m->m_len >= count) { 998 m->m_len = count; 999 break; 1000 } 1001 count -= m->m_len; 1002 } 1003 if (m) 1004 while (m->m_next) 1005 (m = m->m_next)->m_len = 0; 1006 } 1007 } 1008 1009 /* 1010 * m_ensure_contig: rearrange an mbuf chain that given length of bytes 1011 * would be contiguous and in the data area of an mbuf (therefore, mtod() 1012 * would work for a structure of given length). 1013 * 1014 * => On success, returns true and the resulting mbuf chain; false otherwise. 1015 * => The mbuf chain may change, but is always preserved valid. 1016 */ 1017 bool 1018 m_ensure_contig(struct mbuf **m0, int len) 1019 { 1020 struct mbuf *n = *m0, *m; 1021 size_t count, space; 1022 1023 KASSERT(len != M_COPYALL); 1024 /* 1025 * If first mbuf has no cluster, and has room for len bytes 1026 * without shifting current data, pullup into it, 1027 * otherwise allocate a new mbuf to prepend to the chain. 1028 */ 1029 if ((n->m_flags & M_EXT) == 0 && 1030 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 1031 if (n->m_len >= len) { 1032 return true; 1033 } 1034 m = n; 1035 n = n->m_next; 1036 len -= m->m_len; 1037 } else { 1038 if (len > MHLEN) { 1039 return false; 1040 } 1041 m = m_get(M_DONTWAIT, n->m_type); 1042 if (m == NULL) { 1043 return false; 1044 } 1045 MCLAIM(m, n->m_owner); 1046 if (n->m_flags & M_PKTHDR) { 1047 M_MOVE_PKTHDR(m, n); 1048 } 1049 } 1050 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1051 do { 1052 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len); 1053 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1054 (unsigned)count); 1055 len -= count; 1056 m->m_len += count; 1057 n->m_len -= count; 1058 space -= count; 1059 if (n->m_len) 1060 n->m_data += count; 1061 else 1062 n = m_free(n); 1063 } while (len > 0 && n); 1064 1065 m->m_next = n; 1066 *m0 = m; 1067 1068 return len <= 0; 1069 } 1070 1071 /* 1072 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error. 1073 */ 1074 int MPFail; 1075 1076 struct mbuf * 1077 m_pullup(struct mbuf *n, int len) 1078 { 1079 struct mbuf *m = n; 1080 1081 KASSERT(len != M_COPYALL); 1082 if (!m_ensure_contig(&m, len)) { 1083 KASSERT(m != NULL); 1084 m_freem(m); 1085 MPFail++; 1086 m = NULL; 1087 } 1088 return m; 1089 } 1090 1091 /* 1092 * Like m_pullup(), except a new mbuf is always allocated, and we allow 1093 * the amount of empty space before the data in the new mbuf to be specified 1094 * (in the event that the caller expects to prepend later). 1095 */ 1096 int MSFail; 1097 1098 struct mbuf * 1099 m_copyup(struct mbuf *n, int len, int dstoff) 1100 { 1101 struct mbuf *m; 1102 int count, space; 1103 1104 KASSERT(len != M_COPYALL); 1105 if (len > (MHLEN - dstoff)) 1106 goto bad; 1107 m = m_get(M_DONTWAIT, n->m_type); 1108 if (m == NULL) 1109 goto bad; 1110 MCLAIM(m, n->m_owner); 1111 if (n->m_flags & M_PKTHDR) { 1112 M_MOVE_PKTHDR(m, n); 1113 } 1114 m->m_data += dstoff; 1115 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1116 do { 1117 count = min(min(max(len, max_protohdr), space), n->m_len); 1118 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1119 (unsigned)count); 1120 len -= count; 1121 m->m_len += count; 1122 n->m_len -= count; 1123 space -= count; 1124 if (n->m_len) 1125 n->m_data += count; 1126 else 1127 n = m_free(n); 1128 } while (len > 0 && n); 1129 if (len > 0) { 1130 (void) m_free(m); 1131 goto bad; 1132 } 1133 m->m_next = n; 1134 return (m); 1135 bad: 1136 m_freem(n); 1137 MSFail++; 1138 return (NULL); 1139 } 1140 1141 /* 1142 * Partition an mbuf chain in two pieces, returning the tail -- 1143 * all but the first len0 bytes. In case of failure, it returns NULL and 1144 * attempts to restore the chain to its original state. 1145 */ 1146 struct mbuf * 1147 m_split(struct mbuf *m0, int len0, int wait) 1148 { 1149 1150 return m_split0(m0, len0, wait, 1); 1151 } 1152 1153 static struct mbuf * 1154 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 1155 { 1156 struct mbuf *m, *n; 1157 unsigned len = len0, remain, len_save; 1158 1159 KASSERT(len0 != M_COPYALL); 1160 for (m = m0; m && len > m->m_len; m = m->m_next) 1161 len -= m->m_len; 1162 if (m == 0) 1163 return (NULL); 1164 remain = m->m_len - len; 1165 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1166 n = m_gethdr(wait, m0->m_type); 1167 if (n == NULL) 1168 return NULL; 1169 MCLAIM(n, m0->m_owner); 1170 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1171 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1172 len_save = m0->m_pkthdr.len; 1173 m0->m_pkthdr.len = len0; 1174 if (m->m_flags & M_EXT) 1175 goto extpacket; 1176 if (remain > MHLEN) { 1177 /* m can't be the lead packet */ 1178 MH_ALIGN(n, 0); 1179 n->m_len = 0; 1180 n->m_next = m_split(m, len, wait); 1181 if (n->m_next == 0) { 1182 (void) m_free(n); 1183 m0->m_pkthdr.len = len_save; 1184 return (NULL); 1185 } else 1186 return (n); 1187 } else 1188 MH_ALIGN(n, remain); 1189 } else if (remain == 0) { 1190 n = m->m_next; 1191 m->m_next = 0; 1192 return (n); 1193 } else { 1194 n = m_get(wait, m->m_type); 1195 if (n == 0) 1196 return (NULL); 1197 MCLAIM(n, m->m_owner); 1198 M_ALIGN(n, remain); 1199 } 1200 extpacket: 1201 if (m->m_flags & M_EXT) { 1202 n->m_data = m->m_data + len; 1203 MCLADDREFERENCE(m, n); 1204 } else { 1205 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1206 } 1207 n->m_len = remain; 1208 m->m_len = len; 1209 n->m_next = m->m_next; 1210 m->m_next = 0; 1211 return (n); 1212 } 1213 /* 1214 * Routine to copy from device local memory into mbufs. 1215 */ 1216 struct mbuf * 1217 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1218 void (*copy)(const void *from, void *to, size_t len)) 1219 { 1220 struct mbuf *m; 1221 struct mbuf *top = 0, **mp = ⊤ 1222 int off = off0, len; 1223 char *cp; 1224 char *epkt; 1225 1226 cp = buf; 1227 epkt = cp + totlen; 1228 if (off) { 1229 /* 1230 * If 'off' is non-zero, packet is trailer-encapsulated, 1231 * so we have to skip the type and length fields. 1232 */ 1233 cp += off + 2 * sizeof(uint16_t); 1234 totlen -= 2 * sizeof(uint16_t); 1235 } 1236 m = m_gethdr(M_DONTWAIT, MT_DATA); 1237 if (m == NULL) 1238 return NULL; 1239 m->m_pkthdr.rcvif = ifp; 1240 m->m_pkthdr.len = totlen; 1241 m->m_len = MHLEN; 1242 1243 while (totlen > 0) { 1244 if (top) { 1245 m = m_get(M_DONTWAIT, MT_DATA); 1246 if (m == 0) { 1247 m_freem(top); 1248 return (NULL); 1249 } 1250 m->m_len = MLEN; 1251 } 1252 len = min(totlen, epkt - cp); 1253 if (len >= MINCLSIZE) { 1254 MCLGET(m, M_DONTWAIT); 1255 if ((m->m_flags & M_EXT) == 0) { 1256 m_free(m); 1257 m_freem(top); 1258 return (NULL); 1259 } 1260 m->m_len = len = min(len, MCLBYTES); 1261 } else { 1262 /* 1263 * Place initial small packet/header at end of mbuf. 1264 */ 1265 if (len < m->m_len) { 1266 if (top == 0 && len + max_linkhdr <= m->m_len) 1267 m->m_data += max_linkhdr; 1268 m->m_len = len; 1269 } else 1270 len = m->m_len; 1271 } 1272 if (copy) 1273 copy(cp, mtod(m, void *), (size_t)len); 1274 else 1275 memcpy(mtod(m, void *), cp, (size_t)len); 1276 cp += len; 1277 *mp = m; 1278 mp = &m->m_next; 1279 totlen -= len; 1280 if (cp == epkt) 1281 cp = buf; 1282 } 1283 return (top); 1284 } 1285 1286 /* 1287 * Copy data from a buffer back into the indicated mbuf chain, 1288 * starting "off" bytes from the beginning, extending the mbuf 1289 * chain if necessary. 1290 */ 1291 void 1292 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1293 { 1294 #if defined(DEBUG) 1295 struct mbuf *origm = m0; 1296 int error; 1297 #endif /* defined(DEBUG) */ 1298 1299 if (m0 == NULL) 1300 return; 1301 1302 #if defined(DEBUG) 1303 error = 1304 #endif /* defined(DEBUG) */ 1305 m_copyback0(&m0, off, len, cp, 1306 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1307 1308 #if defined(DEBUG) 1309 if (error != 0 || (m0 != NULL && origm != m0)) 1310 panic("m_copyback"); 1311 #endif /* defined(DEBUG) */ 1312 } 1313 1314 struct mbuf * 1315 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1316 { 1317 int error; 1318 1319 /* don't support chain expansion */ 1320 KASSERT(len != M_COPYALL); 1321 KDASSERT(off + len <= m_length(m0)); 1322 1323 error = m_copyback0(&m0, off, len, cp, 1324 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1325 if (error) { 1326 /* 1327 * no way to recover from partial success. 1328 * just free the chain. 1329 */ 1330 m_freem(m0); 1331 return NULL; 1332 } 1333 return m0; 1334 } 1335 1336 /* 1337 * m_makewritable: ensure the specified range writable. 1338 */ 1339 int 1340 m_makewritable(struct mbuf **mp, int off, int len, int how) 1341 { 1342 int error; 1343 #if defined(DEBUG) 1344 int origlen = m_length(*mp); 1345 #endif /* defined(DEBUG) */ 1346 1347 error = m_copyback0(mp, off, len, NULL, 1348 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1349 1350 #if defined(DEBUG) 1351 int reslen = 0; 1352 for (struct mbuf *n = *mp; n; n = n->m_next) 1353 reslen += n->m_len; 1354 if (origlen != reslen) 1355 panic("m_makewritable: length changed"); 1356 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1357 panic("m_makewritable: inconsist"); 1358 #endif /* defined(DEBUG) */ 1359 1360 return error; 1361 } 1362 1363 /* 1364 * Copy the mbuf chain to a new mbuf chain that is as short as possible. 1365 * Return the new mbuf chain on success, NULL on failure. On success, 1366 * free the old mbuf chain. 1367 */ 1368 struct mbuf * 1369 m_defrag(struct mbuf *mold, int flags) 1370 { 1371 struct mbuf *m0, *mn, *n; 1372 size_t sz = mold->m_pkthdr.len; 1373 1374 #ifdef DIAGNOSTIC 1375 if ((mold->m_flags & M_PKTHDR) == 0) 1376 panic("m_defrag: not a mbuf chain header"); 1377 #endif 1378 1379 m0 = m_gethdr(flags, MT_DATA); 1380 if (m0 == NULL) 1381 return NULL; 1382 M_COPY_PKTHDR(m0, mold); 1383 mn = m0; 1384 1385 do { 1386 if (sz > MHLEN) { 1387 MCLGET(mn, M_DONTWAIT); 1388 if ((mn->m_flags & M_EXT) == 0) { 1389 m_freem(m0); 1390 return NULL; 1391 } 1392 } 1393 1394 mn->m_len = MIN(sz, MCLBYTES); 1395 1396 m_copydata(mold, mold->m_pkthdr.len - sz, mn->m_len, 1397 mtod(mn, void *)); 1398 1399 sz -= mn->m_len; 1400 1401 if (sz > 0) { 1402 /* need more mbufs */ 1403 n = m_get(M_NOWAIT, MT_DATA); 1404 if (n == NULL) { 1405 m_freem(m0); 1406 return NULL; 1407 } 1408 1409 mn->m_next = n; 1410 mn = n; 1411 } 1412 } while (sz > 0); 1413 1414 m_freem(mold); 1415 1416 return m0; 1417 } 1418 1419 int 1420 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1421 int how) 1422 { 1423 int mlen; 1424 struct mbuf *m, *n; 1425 struct mbuf **mp; 1426 int totlen = 0; 1427 const char *cp = vp; 1428 1429 KASSERT(mp0 != NULL); 1430 KASSERT(*mp0 != NULL); 1431 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1432 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1433 1434 if (len == M_COPYALL) 1435 len = m_length(*mp0) - off; 1436 1437 /* 1438 * we don't bother to update "totlen" in the case of M_COPYBACK0_COW, 1439 * assuming that M_COPYBACK0_EXTEND and M_COPYBACK0_COW are exclusive. 1440 */ 1441 1442 KASSERT((~flags & (M_COPYBACK0_EXTEND|M_COPYBACK0_COW)) != 0); 1443 1444 mp = mp0; 1445 m = *mp; 1446 while (off > (mlen = m->m_len)) { 1447 off -= mlen; 1448 totlen += mlen; 1449 if (m->m_next == NULL) { 1450 int tspace; 1451 extend: 1452 if ((flags & M_COPYBACK0_EXTEND) == 0) 1453 goto out; 1454 1455 /* 1456 * try to make some space at the end of "m". 1457 */ 1458 1459 mlen = m->m_len; 1460 if (off + len >= MINCLSIZE && 1461 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1462 MCLGET(m, how); 1463 } 1464 tspace = M_TRAILINGSPACE(m); 1465 if (tspace > 0) { 1466 tspace = min(tspace, off + len); 1467 KASSERT(tspace > 0); 1468 memset(mtod(m, char *) + m->m_len, 0, 1469 min(off, tspace)); 1470 m->m_len += tspace; 1471 off += mlen; 1472 totlen -= mlen; 1473 continue; 1474 } 1475 1476 /* 1477 * need to allocate an mbuf. 1478 */ 1479 1480 if (off + len >= MINCLSIZE) { 1481 n = m_getcl(how, m->m_type, 0); 1482 } else { 1483 n = m_get(how, m->m_type); 1484 } 1485 if (n == NULL) { 1486 goto out; 1487 } 1488 n->m_len = min(M_TRAILINGSPACE(n), off + len); 1489 memset(mtod(n, char *), 0, min(n->m_len, off)); 1490 m->m_next = n; 1491 } 1492 mp = &m->m_next; 1493 m = m->m_next; 1494 } 1495 while (len > 0) { 1496 mlen = m->m_len - off; 1497 if (mlen != 0 && M_READONLY(m)) { 1498 char *datap; 1499 int eatlen; 1500 1501 /* 1502 * this mbuf is read-only. 1503 * allocate a new writable mbuf and try again. 1504 */ 1505 1506 #if defined(DIAGNOSTIC) 1507 if ((flags & M_COPYBACK0_COW) == 0) 1508 panic("m_copyback0: read-only"); 1509 #endif /* defined(DIAGNOSTIC) */ 1510 1511 /* 1512 * if we're going to write into the middle of 1513 * a mbuf, split it first. 1514 */ 1515 if (off > 0) { 1516 n = m_split0(m, off, how, 0); 1517 if (n == NULL) 1518 goto enobufs; 1519 m->m_next = n; 1520 mp = &m->m_next; 1521 m = n; 1522 off = 0; 1523 continue; 1524 } 1525 1526 /* 1527 * XXX TODO coalesce into the trailingspace of 1528 * the previous mbuf when possible. 1529 */ 1530 1531 /* 1532 * allocate a new mbuf. copy packet header if needed. 1533 */ 1534 n = m_get(how, m->m_type); 1535 if (n == NULL) 1536 goto enobufs; 1537 MCLAIM(n, m->m_owner); 1538 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1539 M_MOVE_PKTHDR(n, m); 1540 n->m_len = MHLEN; 1541 } else { 1542 if (len >= MINCLSIZE) 1543 MCLGET(n, M_DONTWAIT); 1544 n->m_len = 1545 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1546 } 1547 if (n->m_len > len) 1548 n->m_len = len; 1549 1550 /* 1551 * free the region which has been overwritten. 1552 * copying data from old mbufs if requested. 1553 */ 1554 if (flags & M_COPYBACK0_PRESERVE) 1555 datap = mtod(n, char *); 1556 else 1557 datap = NULL; 1558 eatlen = n->m_len; 1559 while (m != NULL && M_READONLY(m) && 1560 n->m_type == m->m_type && eatlen > 0) { 1561 mlen = min(eatlen, m->m_len); 1562 if (datap) { 1563 m_copydata(m, 0, mlen, datap); 1564 datap += mlen; 1565 } 1566 m->m_data += mlen; 1567 m->m_len -= mlen; 1568 eatlen -= mlen; 1569 if (m->m_len == 0) 1570 *mp = m = m_free(m); 1571 } 1572 if (eatlen > 0) 1573 n->m_len -= eatlen; 1574 n->m_next = m; 1575 *mp = m = n; 1576 continue; 1577 } 1578 mlen = min(mlen, len); 1579 if (flags & M_COPYBACK0_COPYBACK) { 1580 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1581 cp += mlen; 1582 } 1583 len -= mlen; 1584 mlen += off; 1585 off = 0; 1586 totlen += mlen; 1587 if (len == 0) 1588 break; 1589 if (m->m_next == NULL) { 1590 goto extend; 1591 } 1592 mp = &m->m_next; 1593 m = m->m_next; 1594 } 1595 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1596 KASSERT((flags & M_COPYBACK0_EXTEND) != 0); 1597 m->m_pkthdr.len = totlen; 1598 } 1599 1600 return 0; 1601 1602 enobufs: 1603 return ENOBUFS; 1604 } 1605 1606 void 1607 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1608 { 1609 1610 KASSERT((to->m_flags & M_EXT) == 0); 1611 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL); 1612 KASSERT((from->m_flags & M_PKTHDR) != 0); 1613 1614 to->m_pkthdr = from->m_pkthdr; 1615 to->m_flags = from->m_flags & M_COPYFLAGS; 1616 to->m_data = to->m_pktdat; 1617 1618 from->m_flags &= ~M_PKTHDR; 1619 } 1620 1621 /* 1622 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1623 * beginning, continuing for "len" bytes. 1624 */ 1625 int 1626 m_apply(struct mbuf *m, int off, int len, 1627 int (*f)(void *, void *, unsigned int), void *arg) 1628 { 1629 unsigned int count; 1630 int rval; 1631 1632 KASSERT(len != M_COPYALL); 1633 KASSERT(len >= 0); 1634 KASSERT(off >= 0); 1635 1636 while (off > 0) { 1637 KASSERT(m != NULL); 1638 if (off < m->m_len) 1639 break; 1640 off -= m->m_len; 1641 m = m->m_next; 1642 } 1643 while (len > 0) { 1644 KASSERT(m != NULL); 1645 count = min(m->m_len - off, len); 1646 1647 rval = (*f)(arg, mtod(m, char *) + off, count); 1648 if (rval) 1649 return (rval); 1650 1651 len -= count; 1652 off = 0; 1653 m = m->m_next; 1654 } 1655 1656 return (0); 1657 } 1658 1659 /* 1660 * Return a pointer to mbuf/offset of location in mbuf chain. 1661 */ 1662 struct mbuf * 1663 m_getptr(struct mbuf *m, int loc, int *off) 1664 { 1665 1666 while (loc >= 0) { 1667 /* Normal end of search */ 1668 if (m->m_len > loc) { 1669 *off = loc; 1670 return (m); 1671 } else { 1672 loc -= m->m_len; 1673 1674 if (m->m_next == NULL) { 1675 if (loc == 0) { 1676 /* Point at the end of valid data */ 1677 *off = m->m_len; 1678 return (m); 1679 } else 1680 return (NULL); 1681 } else 1682 m = m->m_next; 1683 } 1684 } 1685 1686 return (NULL); 1687 } 1688 1689 /* 1690 * m_ext_free: release a reference to the mbuf external storage. 1691 * 1692 * => free the mbuf m itsself as well. 1693 */ 1694 1695 void 1696 m_ext_free(struct mbuf *m) 1697 { 1698 bool embedded = MEXT_ISEMBEDDED(m); 1699 bool dofree = true; 1700 u_int refcnt; 1701 1702 KASSERT((m->m_flags & M_EXT) != 0); 1703 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1704 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1705 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1706 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1707 1708 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1709 refcnt = m->m_ext.ext_refcnt = 0; 1710 } else { 1711 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1712 } 1713 if (refcnt > 0) { 1714 if (embedded) { 1715 /* 1716 * other mbuf's m_ext_ref still points to us. 1717 */ 1718 dofree = false; 1719 } else { 1720 m->m_ext_ref = m; 1721 } 1722 } else { 1723 /* 1724 * dropping the last reference 1725 */ 1726 if (!embedded) { 1727 m->m_ext.ext_refcnt++; /* XXX */ 1728 m_ext_free(m->m_ext_ref); 1729 m->m_ext_ref = m; 1730 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1731 pool_cache_put_paddr((struct pool_cache *) 1732 m->m_ext.ext_arg, 1733 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1734 } else if (m->m_ext.ext_free) { 1735 (*m->m_ext.ext_free)(m, 1736 m->m_ext.ext_buf, m->m_ext.ext_size, 1737 m->m_ext.ext_arg); 1738 /* 1739 * 'm' is already freed by the ext_free callback. 1740 */ 1741 dofree = false; 1742 } else { 1743 free(m->m_ext.ext_buf, m->m_ext.ext_type); 1744 } 1745 } 1746 if (dofree) { 1747 m->m_type = MT_FREE; 1748 pool_cache_put(mb_cache, m); 1749 } 1750 } 1751 1752 #if defined(DDB) 1753 void 1754 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 1755 { 1756 char ch; 1757 bool opt_c = false; 1758 char buf[512]; 1759 1760 while ((ch = *(modif++)) != '\0') { 1761 switch (ch) { 1762 case 'c': 1763 opt_c = true; 1764 break; 1765 } 1766 } 1767 1768 nextchain: 1769 (*pr)("MBUF %p\n", m); 1770 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 1771 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", 1772 m->m_data, m->m_len, m->m_type, buf); 1773 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 1774 m->m_nextpkt); 1775 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 1776 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 1777 (int)M_READONLY(m)); 1778 if ((m->m_flags & M_PKTHDR) != 0) { 1779 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 1780 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=0x%s, csum_data=0x%" 1781 PRIx32 ", segsz=%u\n", 1782 m->m_pkthdr.len, m->m_pkthdr.rcvif, 1783 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 1784 } 1785 if ((m->m_flags & M_EXT)) { 1786 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 1787 "ext_free=%p, ext_arg=%p\n", 1788 m->m_ext.ext_refcnt, 1789 m->m_ext.ext_buf, m->m_ext.ext_size, 1790 m->m_ext.ext_free, m->m_ext.ext_arg); 1791 } 1792 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 1793 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 1794 vaddr_t eva = sva + m->m_ext.ext_size; 1795 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 1796 int i; 1797 1798 (*pr)(" pages:"); 1799 for (i = 0; i < n; i ++) { 1800 (*pr)(" %p", m->m_ext.ext_pgs[i]); 1801 } 1802 (*pr)("\n"); 1803 } 1804 1805 if (opt_c) { 1806 m = m->m_next; 1807 if (m != NULL) { 1808 goto nextchain; 1809 } 1810 } 1811 } 1812 #endif /* defined(DDB) */ 1813 1814 void 1815 mbstat_type_add(int type, int diff) 1816 { 1817 struct mbstat_cpu *mb; 1818 int s; 1819 1820 s = splvm(); 1821 mb = percpu_getref(mbstat_percpu); 1822 mb->m_mtypes[type] += diff; 1823 percpu_putref(mbstat_percpu); 1824 splx(s); 1825 } 1826 1827 #if defined(MBUFTRACE) 1828 void 1829 mowner_attach(struct mowner *mo) 1830 { 1831 1832 KASSERT(mo->mo_counters == NULL); 1833 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 1834 1835 /* XXX lock */ 1836 LIST_INSERT_HEAD(&mowners, mo, mo_link); 1837 } 1838 1839 void 1840 mowner_detach(struct mowner *mo) 1841 { 1842 1843 KASSERT(mo->mo_counters != NULL); 1844 1845 /* XXX lock */ 1846 LIST_REMOVE(mo, mo_link); 1847 1848 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 1849 mo->mo_counters = NULL; 1850 } 1851 1852 void 1853 mowner_init(struct mbuf *m, int type) 1854 { 1855 struct mowner_counter *mc; 1856 struct mowner *mo; 1857 int s; 1858 1859 m->m_owner = mo = &unknown_mowners[type]; 1860 s = splvm(); 1861 mc = percpu_getref(mo->mo_counters); 1862 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1863 percpu_putref(mo->mo_counters); 1864 splx(s); 1865 } 1866 1867 void 1868 mowner_ref(struct mbuf *m, int flags) 1869 { 1870 struct mowner *mo = m->m_owner; 1871 struct mowner_counter *mc; 1872 int s; 1873 1874 s = splvm(); 1875 mc = percpu_getref(mo->mo_counters); 1876 if ((flags & M_EXT) != 0) 1877 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1878 if ((flags & M_CLUSTER) != 0) 1879 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1880 percpu_putref(mo->mo_counters); 1881 splx(s); 1882 } 1883 1884 void 1885 mowner_revoke(struct mbuf *m, bool all, int flags) 1886 { 1887 struct mowner *mo = m->m_owner; 1888 struct mowner_counter *mc; 1889 int s; 1890 1891 s = splvm(); 1892 mc = percpu_getref(mo->mo_counters); 1893 if ((flags & M_EXT) != 0) 1894 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 1895 if ((flags & M_CLUSTER) != 0) 1896 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 1897 if (all) 1898 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 1899 percpu_putref(mo->mo_counters); 1900 splx(s); 1901 if (all) 1902 m->m_owner = &revoked_mowner; 1903 } 1904 1905 static void 1906 mowner_claim(struct mbuf *m, struct mowner *mo) 1907 { 1908 struct mowner_counter *mc; 1909 int flags = m->m_flags; 1910 int s; 1911 1912 s = splvm(); 1913 mc = percpu_getref(mo->mo_counters); 1914 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1915 if ((flags & M_EXT) != 0) 1916 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1917 if ((flags & M_CLUSTER) != 0) 1918 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1919 percpu_putref(mo->mo_counters); 1920 splx(s); 1921 m->m_owner = mo; 1922 } 1923 1924 void 1925 m_claim(struct mbuf *m, struct mowner *mo) 1926 { 1927 1928 if (m->m_owner == mo || mo == NULL) 1929 return; 1930 1931 mowner_revoke(m, true, m->m_flags); 1932 mowner_claim(m, mo); 1933 } 1934 #endif /* defined(MBUFTRACE) */ 1935