1 /* $NetBSD: uipc_mbuf.c,v 1.158 2014/02/25 18:30:11 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.158 2014/02/25 18:30:11 pooka Exp $"); 66 67 #include "opt_mbuftrace.h" 68 #include "opt_nmbclusters.h" 69 #include "opt_ddb.h" 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/atomic.h> 74 #include <sys/cpu.h> 75 #include <sys/proc.h> 76 #include <sys/mbuf.h> 77 #include <sys/kernel.h> 78 #include <sys/syslog.h> 79 #include <sys/domain.h> 80 #include <sys/protosw.h> 81 #include <sys/percpu.h> 82 #include <sys/pool.h> 83 #include <sys/socket.h> 84 #include <sys/sysctl.h> 85 86 #include <net/if.h> 87 88 pool_cache_t mb_cache; /* mbuf cache */ 89 pool_cache_t mcl_cache; /* mbuf cluster cache */ 90 91 struct mbstat mbstat; 92 int max_linkhdr; 93 int max_protohdr; 94 int max_hdr; 95 int max_datalen; 96 97 static int mb_ctor(void *, void *, int); 98 99 static void sysctl_kern_mbuf_setup(void); 100 101 static struct sysctllog *mbuf_sysctllog; 102 103 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 104 static struct mbuf *m_split0(struct mbuf *, int, int, int); 105 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 106 107 /* flags for m_copyback0 */ 108 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 109 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 110 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 111 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 112 113 static const char mclpool_warnmsg[] = 114 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 115 116 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 117 118 static percpu_t *mbstat_percpu; 119 120 #ifdef MBUFTRACE 121 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 122 struct mowner unknown_mowners[] = { 123 MOWNER_INIT("unknown", "free"), 124 MOWNER_INIT("unknown", "data"), 125 MOWNER_INIT("unknown", "header"), 126 MOWNER_INIT("unknown", "soname"), 127 MOWNER_INIT("unknown", "soopts"), 128 MOWNER_INIT("unknown", "ftable"), 129 MOWNER_INIT("unknown", "control"), 130 MOWNER_INIT("unknown", "oobdata"), 131 }; 132 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 133 #endif 134 135 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 136 137 #define MCLADDREFERENCE(o, n) \ 138 do { \ 139 KASSERT(((o)->m_flags & M_EXT) != 0); \ 140 KASSERT(((n)->m_flags & M_EXT) == 0); \ 141 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 142 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 143 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 144 (n)->m_ext_ref = (o)->m_ext_ref; \ 145 mowner_ref((n), (n)->m_flags); \ 146 MCLREFDEBUGN((n), __FILE__, __LINE__); \ 147 } while (/* CONSTCOND */ 0) 148 149 static int 150 nmbclusters_limit(void) 151 { 152 #if defined(PMAP_MAP_POOLPAGE) 153 /* direct mapping, doesn't use space in kmem_arena */ 154 vsize_t max_size = physmem / 4; 155 #else 156 vsize_t max_size = MIN(physmem / 4, nkmempages / 4); 157 #endif 158 159 max_size = max_size * PAGE_SIZE / MCLBYTES; 160 #ifdef NMBCLUSTERS_MAX 161 max_size = MIN(max_size, NMBCLUSTERS_MAX); 162 #endif 163 164 #ifdef NMBCLUSTERS 165 return MIN(max_size, NMBCLUSTERS); 166 #else 167 return max_size; 168 #endif 169 } 170 171 /* 172 * Initialize the mbuf allocator. 173 */ 174 void 175 mbinit(void) 176 { 177 178 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 179 CTASSERT(sizeof(struct mbuf) == MSIZE); 180 181 sysctl_kern_mbuf_setup(); 182 183 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 184 NULL, IPL_VM, mb_ctor, NULL, NULL); 185 KASSERT(mb_cache != NULL); 186 187 mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", NULL, 188 IPL_VM, NULL, NULL, NULL); 189 KASSERT(mcl_cache != NULL); 190 191 pool_cache_set_drain_hook(mb_cache, m_reclaim, NULL); 192 pool_cache_set_drain_hook(mcl_cache, m_reclaim, NULL); 193 194 /* 195 * Set an arbitrary default limit on the number of mbuf clusters. 196 */ 197 #ifdef NMBCLUSTERS 198 nmbclusters = nmbclusters_limit(); 199 #else 200 nmbclusters = MAX(1024, 201 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 202 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 203 #endif 204 205 /* 206 * Set the hard limit on the mclpool to the number of 207 * mbuf clusters the kernel is to support. Log the limit 208 * reached message max once a minute. 209 */ 210 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 211 212 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 213 214 /* 215 * Set a low water mark for both mbufs and clusters. This should 216 * help ensure that they can be allocated in a memory starvation 217 * situation. This is important for e.g. diskless systems which 218 * must allocate mbufs in order for the pagedaemon to clean pages. 219 */ 220 pool_cache_setlowat(mb_cache, mblowat); 221 pool_cache_setlowat(mcl_cache, mcllowat); 222 223 #ifdef MBUFTRACE 224 { 225 /* 226 * Attach the unknown mowners. 227 */ 228 int i; 229 MOWNER_ATTACH(&revoked_mowner); 230 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 231 i-- > 0; ) 232 MOWNER_ATTACH(&unknown_mowners[i]); 233 } 234 #endif 235 } 236 237 /* 238 * sysctl helper routine for the kern.mbuf subtree. 239 * nmbclusters, mblowat and mcllowat need range 240 * checking and pool tweaking after being reset. 241 */ 242 static int 243 sysctl_kern_mbuf(SYSCTLFN_ARGS) 244 { 245 int error, newval; 246 struct sysctlnode node; 247 248 node = *rnode; 249 node.sysctl_data = &newval; 250 switch (rnode->sysctl_num) { 251 case MBUF_NMBCLUSTERS: 252 case MBUF_MBLOWAT: 253 case MBUF_MCLLOWAT: 254 newval = *(int*)rnode->sysctl_data; 255 break; 256 default: 257 return (EOPNOTSUPP); 258 } 259 260 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 261 if (error || newp == NULL) 262 return (error); 263 if (newval < 0) 264 return (EINVAL); 265 266 switch (node.sysctl_num) { 267 case MBUF_NMBCLUSTERS: 268 if (newval < nmbclusters) 269 return (EINVAL); 270 if (newval > nmbclusters_limit()) 271 return (EINVAL); 272 nmbclusters = newval; 273 pool_cache_sethardlimit(mcl_cache, nmbclusters, 274 mclpool_warnmsg, 60); 275 break; 276 case MBUF_MBLOWAT: 277 mblowat = newval; 278 pool_cache_setlowat(mb_cache, mblowat); 279 break; 280 case MBUF_MCLLOWAT: 281 mcllowat = newval; 282 pool_cache_setlowat(mcl_cache, mcllowat); 283 break; 284 } 285 286 return (0); 287 } 288 289 #ifdef MBUFTRACE 290 static void 291 mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 292 { 293 struct mowner_counter *mc = v1; 294 struct mowner_user *mo_user = v2; 295 int i; 296 297 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 298 mo_user->mo_counter[i] += mc->mc_counter[i]; 299 } 300 } 301 302 static void 303 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 304 { 305 306 memset(mo_user, 0, sizeof(*mo_user)); 307 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 308 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 309 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 310 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 311 percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); 312 } 313 314 static int 315 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 316 { 317 struct mowner *mo; 318 size_t len = 0; 319 int error = 0; 320 321 if (namelen != 0) 322 return (EINVAL); 323 if (newp != NULL) 324 return (EPERM); 325 326 LIST_FOREACH(mo, &mowners, mo_link) { 327 struct mowner_user mo_user; 328 329 mowner_convert_to_user(mo, &mo_user); 330 331 if (oldp != NULL) { 332 if (*oldlenp - len < sizeof(mo_user)) { 333 error = ENOMEM; 334 break; 335 } 336 error = copyout(&mo_user, (char *)oldp + len, 337 sizeof(mo_user)); 338 if (error) 339 break; 340 } 341 len += sizeof(mo_user); 342 } 343 344 if (error == 0) 345 *oldlenp = len; 346 347 return (error); 348 } 349 #endif /* MBUFTRACE */ 350 351 static void 352 mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 353 { 354 struct mbstat_cpu *mbsc = v1; 355 struct mbstat *mbs = v2; 356 int i; 357 358 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 359 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 360 } 361 } 362 363 static void 364 mbstat_convert_to_user(struct mbstat *mbs) 365 { 366 367 memset(mbs, 0, sizeof(*mbs)); 368 mbs->m_drain = mbstat.m_drain; 369 percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); 370 } 371 372 static int 373 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 374 { 375 struct sysctlnode node; 376 struct mbstat mbs; 377 378 mbstat_convert_to_user(&mbs); 379 node = *rnode; 380 node.sysctl_data = &mbs; 381 node.sysctl_size = sizeof(mbs); 382 return sysctl_lookup(SYSCTLFN_CALL(&node)); 383 } 384 385 static void 386 sysctl_kern_mbuf_setup(void) 387 { 388 389 KASSERT(mbuf_sysctllog == NULL); 390 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 391 CTLFLAG_PERMANENT, 392 CTLTYPE_NODE, "mbuf", 393 SYSCTL_DESCR("mbuf control variables"), 394 NULL, 0, NULL, 0, 395 CTL_KERN, KERN_MBUF, CTL_EOL); 396 397 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 398 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 399 CTLTYPE_INT, "msize", 400 SYSCTL_DESCR("mbuf base size"), 401 NULL, msize, NULL, 0, 402 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 403 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 404 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 405 CTLTYPE_INT, "mclbytes", 406 SYSCTL_DESCR("mbuf cluster size"), 407 NULL, mclbytes, NULL, 0, 408 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 409 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 410 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 411 CTLTYPE_INT, "nmbclusters", 412 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 413 sysctl_kern_mbuf, 0, &nmbclusters, 0, 414 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 415 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 416 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 417 CTLTYPE_INT, "mblowat", 418 SYSCTL_DESCR("mbuf low water mark"), 419 sysctl_kern_mbuf, 0, &mblowat, 0, 420 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 421 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 422 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 423 CTLTYPE_INT, "mcllowat", 424 SYSCTL_DESCR("mbuf cluster low water mark"), 425 sysctl_kern_mbuf, 0, &mcllowat, 0, 426 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 427 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 428 CTLFLAG_PERMANENT, 429 CTLTYPE_STRUCT, "stats", 430 SYSCTL_DESCR("mbuf allocation statistics"), 431 sysctl_kern_mbuf_stats, 0, NULL, 0, 432 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 433 #ifdef MBUFTRACE 434 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 435 CTLFLAG_PERMANENT, 436 CTLTYPE_STRUCT, "mowners", 437 SYSCTL_DESCR("Information about mbuf owners"), 438 sysctl_kern_mbuf_mowners, 0, NULL, 0, 439 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 440 #endif /* MBUFTRACE */ 441 } 442 443 static int 444 mb_ctor(void *arg, void *object, int flags) 445 { 446 struct mbuf *m = object; 447 448 #ifdef POOL_VTOPHYS 449 m->m_paddr = POOL_VTOPHYS(m); 450 #else 451 m->m_paddr = M_PADDR_INVALID; 452 #endif 453 return (0); 454 } 455 456 /* 457 * Add mbuf to the end of a chain 458 */ 459 struct mbuf * 460 m_add(struct mbuf *c, struct mbuf *m) { 461 struct mbuf *n; 462 463 if (c == NULL) 464 return m; 465 466 for (n = c; n->m_next != NULL; n = n->m_next) 467 continue; 468 n->m_next = m; 469 return c; 470 } 471 472 /* 473 * Set the m_data pointer of a newly-allocated mbuf 474 * to place an object of the specified size at the 475 * end of the mbuf, longword aligned. 476 */ 477 void 478 m_align(struct mbuf *m, int len) 479 { 480 int adjust; 481 482 KASSERT(len != M_COPYALL); 483 484 if (m->m_flags & M_EXT) 485 adjust = m->m_ext.ext_size - len; 486 else if (m->m_flags & M_PKTHDR) 487 adjust = MHLEN - len; 488 else 489 adjust = MLEN - len; 490 m->m_data += adjust &~ (sizeof(long)-1); 491 } 492 493 /* 494 * Append the specified data to the indicated mbuf chain, 495 * Extend the mbuf chain if the new data does not fit in 496 * existing space. 497 * 498 * Return 1 if able to complete the job; otherwise 0. 499 */ 500 int 501 m_append(struct mbuf *m0, int len, const void *cpv) 502 { 503 struct mbuf *m, *n; 504 int remainder, space; 505 const char *cp = cpv; 506 507 KASSERT(len != M_COPYALL); 508 for (m = m0; m->m_next != NULL; m = m->m_next) 509 continue; 510 remainder = len; 511 space = M_TRAILINGSPACE(m); 512 if (space > 0) { 513 /* 514 * Copy into available space. 515 */ 516 if (space > remainder) 517 space = remainder; 518 memmove(mtod(m, char *) + m->m_len, cp, space); 519 m->m_len += space; 520 cp = cp + space, remainder -= space; 521 } 522 while (remainder > 0) { 523 /* 524 * Allocate a new mbuf; could check space 525 * and allocate a cluster instead. 526 */ 527 n = m_get(M_DONTWAIT, m->m_type); 528 if (n == NULL) 529 break; 530 n->m_len = min(MLEN, remainder); 531 memmove(mtod(n, void *), cp, n->m_len); 532 cp += n->m_len, remainder -= n->m_len; 533 m->m_next = n; 534 m = n; 535 } 536 if (m0->m_flags & M_PKTHDR) 537 m0->m_pkthdr.len += len - remainder; 538 return (remainder == 0); 539 } 540 541 void 542 m_reclaim(void *arg, int flags) 543 { 544 struct domain *dp; 545 const struct protosw *pr; 546 struct ifnet *ifp; 547 int s; 548 549 KERNEL_LOCK(1, NULL); 550 s = splvm(); 551 DOMAIN_FOREACH(dp) { 552 for (pr = dp->dom_protosw; 553 pr < dp->dom_protoswNPROTOSW; pr++) 554 if (pr->pr_drain) 555 (*pr->pr_drain)(); 556 } 557 IFNET_FOREACH(ifp) { 558 if (ifp->if_drain) 559 (*ifp->if_drain)(ifp); 560 } 561 splx(s); 562 mbstat.m_drain++; 563 KERNEL_UNLOCK_ONE(NULL); 564 } 565 566 /* 567 * Space allocation routines. 568 * These are also available as macros 569 * for critical paths. 570 */ 571 struct mbuf * 572 m_get(int nowait, int type) 573 { 574 struct mbuf *m; 575 576 KASSERT(type != MT_FREE); 577 578 m = pool_cache_get(mb_cache, 579 nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); 580 if (m == NULL) 581 return NULL; 582 583 mbstat_type_add(type, 1); 584 mowner_init(m, type); 585 m->m_ext_ref = m; 586 m->m_type = type; 587 m->m_len = 0; 588 m->m_next = NULL; 589 m->m_nextpkt = NULL; 590 m->m_data = m->m_dat; 591 m->m_flags = 0; 592 593 return m; 594 } 595 596 struct mbuf * 597 m_gethdr(int nowait, int type) 598 { 599 struct mbuf *m; 600 601 m = m_get(nowait, type); 602 if (m == NULL) 603 return NULL; 604 605 m->m_data = m->m_pktdat; 606 m->m_flags = M_PKTHDR; 607 m->m_pkthdr.rcvif = NULL; 608 m->m_pkthdr.len = 0; 609 m->m_pkthdr.csum_flags = 0; 610 m->m_pkthdr.csum_data = 0; 611 SLIST_INIT(&m->m_pkthdr.tags); 612 613 return m; 614 } 615 616 struct mbuf * 617 m_getclr(int nowait, int type) 618 { 619 struct mbuf *m; 620 621 m = m_get(nowait, type); 622 if (m == 0) 623 return (NULL); 624 memset(mtod(m, void *), 0, MLEN); 625 return (m); 626 } 627 628 void 629 m_clget(struct mbuf *m, int nowait) 630 { 631 632 MCLGET(m, nowait); 633 } 634 635 struct mbuf * 636 m_free(struct mbuf *m) 637 { 638 struct mbuf *n; 639 640 MFREE(m, n); 641 return (n); 642 } 643 644 void 645 m_freem(struct mbuf *m) 646 { 647 struct mbuf *n; 648 649 if (m == NULL) 650 return; 651 do { 652 MFREE(m, n); 653 m = n; 654 } while (m); 655 } 656 657 #ifdef MBUFTRACE 658 /* 659 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 660 */ 661 void 662 m_claimm(struct mbuf *m, struct mowner *mo) 663 { 664 665 for (; m != NULL; m = m->m_next) 666 MCLAIM(m, mo); 667 } 668 #endif 669 670 /* 671 * Mbuffer utility routines. 672 */ 673 674 /* 675 * Lesser-used path for M_PREPEND: 676 * allocate new mbuf to prepend to chain, 677 * copy junk along. 678 */ 679 struct mbuf * 680 m_prepend(struct mbuf *m, int len, int how) 681 { 682 struct mbuf *mn; 683 684 KASSERT(len != M_COPYALL); 685 mn = m_get(how, m->m_type); 686 if (mn == NULL) { 687 m_freem(m); 688 return (NULL); 689 } 690 if (m->m_flags & M_PKTHDR) { 691 M_MOVE_PKTHDR(mn, m); 692 } else { 693 MCLAIM(mn, m->m_owner); 694 } 695 mn->m_next = m; 696 m = mn; 697 if (len < MHLEN) 698 MH_ALIGN(m, len); 699 m->m_len = len; 700 return (m); 701 } 702 703 /* 704 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 705 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 706 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 707 */ 708 int MCFail; 709 710 struct mbuf * 711 m_copym(struct mbuf *m, int off0, int len, int wait) 712 { 713 714 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 715 } 716 717 struct mbuf * 718 m_dup(struct mbuf *m, int off0, int len, int wait) 719 { 720 721 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 722 } 723 724 static inline int 725 m_copylen(int len, int copylen) { 726 return len == M_COPYALL ? copylen : min(len, copylen); 727 } 728 729 static struct mbuf * 730 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 731 { 732 struct mbuf *n, **np; 733 int off = off0; 734 struct mbuf *top; 735 int copyhdr = 0; 736 737 if (off < 0 || (len != M_COPYALL && len < 0)) 738 panic("m_copym: off %d, len %d", off, len); 739 if (off == 0 && m->m_flags & M_PKTHDR) 740 copyhdr = 1; 741 while (off > 0) { 742 if (m == 0) 743 panic("m_copym: m == 0, off %d", off); 744 if (off < m->m_len) 745 break; 746 off -= m->m_len; 747 m = m->m_next; 748 } 749 np = ⊤ 750 top = 0; 751 while (len == M_COPYALL || len > 0) { 752 if (m == 0) { 753 if (len != M_COPYALL) 754 panic("m_copym: m == 0, len %d [!COPYALL]", 755 len); 756 break; 757 } 758 n = m_get(wait, m->m_type); 759 *np = n; 760 if (n == 0) 761 goto nospace; 762 MCLAIM(n, m->m_owner); 763 if (copyhdr) { 764 M_COPY_PKTHDR(n, m); 765 if (len == M_COPYALL) 766 n->m_pkthdr.len -= off0; 767 else 768 n->m_pkthdr.len = len; 769 copyhdr = 0; 770 } 771 n->m_len = m_copylen(len, m->m_len - off); 772 if (m->m_flags & M_EXT) { 773 if (!deep) { 774 n->m_data = m->m_data + off; 775 MCLADDREFERENCE(m, n); 776 } else { 777 /* 778 * we are unsure about the way m was allocated. 779 * copy into multiple MCLBYTES cluster mbufs. 780 */ 781 MCLGET(n, wait); 782 n->m_len = M_TRAILINGSPACE(n); 783 n->m_len = m_copylen(len, n->m_len); 784 n->m_len = min(n->m_len, m->m_len - off); 785 memcpy(mtod(n, void *), mtod(m, char *) + off, 786 (unsigned)n->m_len); 787 } 788 } else 789 memcpy(mtod(n, void *), mtod(m, char *) + off, 790 (unsigned)n->m_len); 791 if (len != M_COPYALL) 792 len -= n->m_len; 793 off += n->m_len; 794 #ifdef DIAGNOSTIC 795 if (off > m->m_len) 796 panic("m_copym0 overrun %d %d", off, m->m_len); 797 #endif 798 if (off == m->m_len) { 799 m = m->m_next; 800 off = 0; 801 } 802 np = &n->m_next; 803 } 804 if (top == 0) 805 MCFail++; 806 return (top); 807 nospace: 808 m_freem(top); 809 MCFail++; 810 return (NULL); 811 } 812 813 /* 814 * Copy an entire packet, including header (which must be present). 815 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 816 */ 817 struct mbuf * 818 m_copypacket(struct mbuf *m, int how) 819 { 820 struct mbuf *top, *n, *o; 821 822 n = m_get(how, m->m_type); 823 top = n; 824 if (!n) 825 goto nospace; 826 827 MCLAIM(n, m->m_owner); 828 M_COPY_PKTHDR(n, m); 829 n->m_len = m->m_len; 830 if (m->m_flags & M_EXT) { 831 n->m_data = m->m_data; 832 MCLADDREFERENCE(m, n); 833 } else { 834 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 835 } 836 837 m = m->m_next; 838 while (m) { 839 o = m_get(how, m->m_type); 840 if (!o) 841 goto nospace; 842 843 MCLAIM(o, m->m_owner); 844 n->m_next = o; 845 n = n->m_next; 846 847 n->m_len = m->m_len; 848 if (m->m_flags & M_EXT) { 849 n->m_data = m->m_data; 850 MCLADDREFERENCE(m, n); 851 } else { 852 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 853 } 854 855 m = m->m_next; 856 } 857 return top; 858 nospace: 859 m_freem(top); 860 MCFail++; 861 return NULL; 862 } 863 864 /* 865 * Copy data from an mbuf chain starting "off" bytes from the beginning, 866 * continuing for "len" bytes, into the indicated buffer. 867 */ 868 void 869 m_copydata(struct mbuf *m, int off, int len, void *vp) 870 { 871 unsigned count; 872 void * cp = vp; 873 struct mbuf *m0 = m; 874 int len0 = len; 875 int off0 = off; 876 void *vp0 = vp; 877 878 KASSERT(len != M_COPYALL); 879 if (off < 0 || len < 0) 880 panic("m_copydata: off %d, len %d", off, len); 881 while (off > 0) { 882 if (m == NULL) 883 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)", 884 m0, len0, off0, vp0, off, off0 - off); 885 if (off < m->m_len) 886 break; 887 off -= m->m_len; 888 m = m->m_next; 889 } 890 while (len > 0) { 891 if (m == NULL) 892 panic("m_copydata(%p,%d,%d,%p): " 893 "m=NULL, off=%d (%d), len=%d (%d)", 894 m0, len0, off0, vp0, 895 off, off0 - off, len, len0 - len); 896 count = min(m->m_len - off, len); 897 memcpy(cp, mtod(m, char *) + off, count); 898 len -= count; 899 cp = (char *)cp + count; 900 off = 0; 901 m = m->m_next; 902 } 903 } 904 905 /* 906 * Concatenate mbuf chain n to m. 907 * n might be copied into m (when n->m_len is small), therefore data portion of 908 * n could be copied into an mbuf of different mbuf type. 909 * Any m_pkthdr is not updated. 910 */ 911 void 912 m_cat(struct mbuf *m, struct mbuf *n) 913 { 914 915 while (m->m_next) 916 m = m->m_next; 917 while (n) { 918 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 919 /* just join the two chains */ 920 m->m_next = n; 921 return; 922 } 923 /* splat the data from one into the other */ 924 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 925 (u_int)n->m_len); 926 m->m_len += n->m_len; 927 n = m_free(n); 928 } 929 } 930 931 void 932 m_adj(struct mbuf *mp, int req_len) 933 { 934 int len = req_len; 935 struct mbuf *m; 936 int count; 937 938 if ((m = mp) == NULL) 939 return; 940 if (len >= 0) { 941 /* 942 * Trim from head. 943 */ 944 while (m != NULL && len > 0) { 945 if (m->m_len <= len) { 946 len -= m->m_len; 947 m->m_len = 0; 948 m = m->m_next; 949 } else { 950 m->m_len -= len; 951 m->m_data += len; 952 len = 0; 953 } 954 } 955 m = mp; 956 if (mp->m_flags & M_PKTHDR) 957 m->m_pkthdr.len -= (req_len - len); 958 } else { 959 /* 960 * Trim from tail. Scan the mbuf chain, 961 * calculating its length and finding the last mbuf. 962 * If the adjustment only affects this mbuf, then just 963 * adjust and return. Otherwise, rescan and truncate 964 * after the remaining size. 965 */ 966 len = -len; 967 count = 0; 968 for (;;) { 969 count += m->m_len; 970 if (m->m_next == (struct mbuf *)0) 971 break; 972 m = m->m_next; 973 } 974 if (m->m_len >= len) { 975 m->m_len -= len; 976 if (mp->m_flags & M_PKTHDR) 977 mp->m_pkthdr.len -= len; 978 return; 979 } 980 count -= len; 981 if (count < 0) 982 count = 0; 983 /* 984 * Correct length for chain is "count". 985 * Find the mbuf with last data, adjust its length, 986 * and toss data from remaining mbufs on chain. 987 */ 988 m = mp; 989 if (m->m_flags & M_PKTHDR) 990 m->m_pkthdr.len = count; 991 for (; m; m = m->m_next) { 992 if (m->m_len >= count) { 993 m->m_len = count; 994 break; 995 } 996 count -= m->m_len; 997 } 998 if (m) 999 while (m->m_next) 1000 (m = m->m_next)->m_len = 0; 1001 } 1002 } 1003 1004 /* 1005 * m_ensure_contig: rearrange an mbuf chain that given length of bytes 1006 * would be contiguous and in the data area of an mbuf (therefore, mtod() 1007 * would work for a structure of given length). 1008 * 1009 * => On success, returns true and the resulting mbuf chain; false otherwise. 1010 * => The mbuf chain may change, but is always preserved valid. 1011 */ 1012 bool 1013 m_ensure_contig(struct mbuf **m0, int len) 1014 { 1015 struct mbuf *n = *m0, *m; 1016 size_t count, space; 1017 1018 KASSERT(len != M_COPYALL); 1019 /* 1020 * If first mbuf has no cluster, and has room for len bytes 1021 * without shifting current data, pullup into it, 1022 * otherwise allocate a new mbuf to prepend to the chain. 1023 */ 1024 if ((n->m_flags & M_EXT) == 0 && 1025 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 1026 if (n->m_len >= len) { 1027 return true; 1028 } 1029 m = n; 1030 n = n->m_next; 1031 len -= m->m_len; 1032 } else { 1033 if (len > MHLEN) { 1034 return false; 1035 } 1036 m = m_get(M_DONTWAIT, n->m_type); 1037 if (m == NULL) { 1038 return false; 1039 } 1040 MCLAIM(m, n->m_owner); 1041 if (n->m_flags & M_PKTHDR) { 1042 M_MOVE_PKTHDR(m, n); 1043 } 1044 } 1045 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1046 do { 1047 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len); 1048 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1049 (unsigned)count); 1050 len -= count; 1051 m->m_len += count; 1052 n->m_len -= count; 1053 space -= count; 1054 if (n->m_len) 1055 n->m_data += count; 1056 else 1057 n = m_free(n); 1058 } while (len > 0 && n); 1059 1060 m->m_next = n; 1061 *m0 = m; 1062 1063 return len <= 0; 1064 } 1065 1066 /* 1067 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error. 1068 */ 1069 int MPFail; 1070 1071 struct mbuf * 1072 m_pullup(struct mbuf *n, int len) 1073 { 1074 struct mbuf *m = n; 1075 1076 KASSERT(len != M_COPYALL); 1077 if (!m_ensure_contig(&m, len)) { 1078 KASSERT(m != NULL); 1079 m_freem(m); 1080 MPFail++; 1081 m = NULL; 1082 } 1083 return m; 1084 } 1085 1086 /* 1087 * Like m_pullup(), except a new mbuf is always allocated, and we allow 1088 * the amount of empty space before the data in the new mbuf to be specified 1089 * (in the event that the caller expects to prepend later). 1090 */ 1091 int MSFail; 1092 1093 struct mbuf * 1094 m_copyup(struct mbuf *n, int len, int dstoff) 1095 { 1096 struct mbuf *m; 1097 int count, space; 1098 1099 KASSERT(len != M_COPYALL); 1100 if (len > (MHLEN - dstoff)) 1101 goto bad; 1102 m = m_get(M_DONTWAIT, n->m_type); 1103 if (m == NULL) 1104 goto bad; 1105 MCLAIM(m, n->m_owner); 1106 if (n->m_flags & M_PKTHDR) { 1107 M_MOVE_PKTHDR(m, n); 1108 } 1109 m->m_data += dstoff; 1110 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1111 do { 1112 count = min(min(max(len, max_protohdr), space), n->m_len); 1113 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1114 (unsigned)count); 1115 len -= count; 1116 m->m_len += count; 1117 n->m_len -= count; 1118 space -= count; 1119 if (n->m_len) 1120 n->m_data += count; 1121 else 1122 n = m_free(n); 1123 } while (len > 0 && n); 1124 if (len > 0) { 1125 (void) m_free(m); 1126 goto bad; 1127 } 1128 m->m_next = n; 1129 return (m); 1130 bad: 1131 m_freem(n); 1132 MSFail++; 1133 return (NULL); 1134 } 1135 1136 /* 1137 * Partition an mbuf chain in two pieces, returning the tail -- 1138 * all but the first len0 bytes. In case of failure, it returns NULL and 1139 * attempts to restore the chain to its original state. 1140 */ 1141 struct mbuf * 1142 m_split(struct mbuf *m0, int len0, int wait) 1143 { 1144 1145 return m_split0(m0, len0, wait, 1); 1146 } 1147 1148 static struct mbuf * 1149 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 1150 { 1151 struct mbuf *m, *n; 1152 unsigned len = len0, remain, len_save; 1153 1154 KASSERT(len0 != M_COPYALL); 1155 for (m = m0; m && len > m->m_len; m = m->m_next) 1156 len -= m->m_len; 1157 if (m == 0) 1158 return (NULL); 1159 remain = m->m_len - len; 1160 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1161 n = m_gethdr(wait, m0->m_type); 1162 if (n == NULL) 1163 return NULL; 1164 MCLAIM(n, m0->m_owner); 1165 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1166 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1167 len_save = m0->m_pkthdr.len; 1168 m0->m_pkthdr.len = len0; 1169 if (m->m_flags & M_EXT) 1170 goto extpacket; 1171 if (remain > MHLEN) { 1172 /* m can't be the lead packet */ 1173 MH_ALIGN(n, 0); 1174 n->m_len = 0; 1175 n->m_next = m_split(m, len, wait); 1176 if (n->m_next == 0) { 1177 (void) m_free(n); 1178 m0->m_pkthdr.len = len_save; 1179 return (NULL); 1180 } else 1181 return (n); 1182 } else 1183 MH_ALIGN(n, remain); 1184 } else if (remain == 0) { 1185 n = m->m_next; 1186 m->m_next = 0; 1187 return (n); 1188 } else { 1189 n = m_get(wait, m->m_type); 1190 if (n == 0) 1191 return (NULL); 1192 MCLAIM(n, m->m_owner); 1193 M_ALIGN(n, remain); 1194 } 1195 extpacket: 1196 if (m->m_flags & M_EXT) { 1197 n->m_data = m->m_data + len; 1198 MCLADDREFERENCE(m, n); 1199 } else { 1200 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1201 } 1202 n->m_len = remain; 1203 m->m_len = len; 1204 n->m_next = m->m_next; 1205 m->m_next = 0; 1206 return (n); 1207 } 1208 /* 1209 * Routine to copy from device local memory into mbufs. 1210 */ 1211 struct mbuf * 1212 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1213 void (*copy)(const void *from, void *to, size_t len)) 1214 { 1215 struct mbuf *m; 1216 struct mbuf *top = 0, **mp = ⊤ 1217 int off = off0, len; 1218 char *cp; 1219 char *epkt; 1220 1221 cp = buf; 1222 epkt = cp + totlen; 1223 if (off) { 1224 /* 1225 * If 'off' is non-zero, packet is trailer-encapsulated, 1226 * so we have to skip the type and length fields. 1227 */ 1228 cp += off + 2 * sizeof(uint16_t); 1229 totlen -= 2 * sizeof(uint16_t); 1230 } 1231 m = m_gethdr(M_DONTWAIT, MT_DATA); 1232 if (m == NULL) 1233 return NULL; 1234 m->m_pkthdr.rcvif = ifp; 1235 m->m_pkthdr.len = totlen; 1236 m->m_len = MHLEN; 1237 1238 while (totlen > 0) { 1239 if (top) { 1240 m = m_get(M_DONTWAIT, MT_DATA); 1241 if (m == 0) { 1242 m_freem(top); 1243 return (NULL); 1244 } 1245 m->m_len = MLEN; 1246 } 1247 len = min(totlen, epkt - cp); 1248 if (len >= MINCLSIZE) { 1249 MCLGET(m, M_DONTWAIT); 1250 if ((m->m_flags & M_EXT) == 0) { 1251 m_free(m); 1252 m_freem(top); 1253 return (NULL); 1254 } 1255 m->m_len = len = min(len, MCLBYTES); 1256 } else { 1257 /* 1258 * Place initial small packet/header at end of mbuf. 1259 */ 1260 if (len < m->m_len) { 1261 if (top == 0 && len + max_linkhdr <= m->m_len) 1262 m->m_data += max_linkhdr; 1263 m->m_len = len; 1264 } else 1265 len = m->m_len; 1266 } 1267 if (copy) 1268 copy(cp, mtod(m, void *), (size_t)len); 1269 else 1270 memcpy(mtod(m, void *), cp, (size_t)len); 1271 cp += len; 1272 *mp = m; 1273 mp = &m->m_next; 1274 totlen -= len; 1275 if (cp == epkt) 1276 cp = buf; 1277 } 1278 return (top); 1279 } 1280 1281 /* 1282 * Copy data from a buffer back into the indicated mbuf chain, 1283 * starting "off" bytes from the beginning, extending the mbuf 1284 * chain if necessary. 1285 */ 1286 void 1287 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1288 { 1289 #if defined(DEBUG) 1290 struct mbuf *origm = m0; 1291 int error; 1292 #endif /* defined(DEBUG) */ 1293 1294 if (m0 == NULL) 1295 return; 1296 1297 #if defined(DEBUG) 1298 error = 1299 #endif /* defined(DEBUG) */ 1300 m_copyback0(&m0, off, len, cp, 1301 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1302 1303 #if defined(DEBUG) 1304 if (error != 0 || (m0 != NULL && origm != m0)) 1305 panic("m_copyback"); 1306 #endif /* defined(DEBUG) */ 1307 } 1308 1309 struct mbuf * 1310 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1311 { 1312 int error; 1313 1314 /* don't support chain expansion */ 1315 KASSERT(len != M_COPYALL); 1316 KDASSERT(off + len <= m_length(m0)); 1317 1318 error = m_copyback0(&m0, off, len, cp, 1319 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1320 if (error) { 1321 /* 1322 * no way to recover from partial success. 1323 * just free the chain. 1324 */ 1325 m_freem(m0); 1326 return NULL; 1327 } 1328 return m0; 1329 } 1330 1331 /* 1332 * m_makewritable: ensure the specified range writable. 1333 */ 1334 int 1335 m_makewritable(struct mbuf **mp, int off, int len, int how) 1336 { 1337 int error; 1338 #if defined(DEBUG) 1339 int origlen = m_length(*mp); 1340 #endif /* defined(DEBUG) */ 1341 1342 error = m_copyback0(mp, off, len, NULL, 1343 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1344 1345 #if defined(DEBUG) 1346 int reslen = 0; 1347 for (struct mbuf *n = *mp; n; n = n->m_next) 1348 reslen += n->m_len; 1349 if (origlen != reslen) 1350 panic("m_makewritable: length changed"); 1351 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1352 panic("m_makewritable: inconsist"); 1353 #endif /* defined(DEBUG) */ 1354 1355 return error; 1356 } 1357 1358 /* 1359 * Copy the mbuf chain to a new mbuf chain that is as short as possible. 1360 * Return the new mbuf chain on success, NULL on failure. On success, 1361 * free the old mbuf chain. 1362 */ 1363 struct mbuf * 1364 m_defrag(struct mbuf *mold, int flags) 1365 { 1366 struct mbuf *m0, *mn, *n; 1367 size_t sz = mold->m_pkthdr.len; 1368 1369 #ifdef DIAGNOSTIC 1370 if ((mold->m_flags & M_PKTHDR) == 0) 1371 panic("m_defrag: not a mbuf chain header"); 1372 #endif 1373 1374 m0 = m_gethdr(flags, MT_DATA); 1375 if (m0 == NULL) 1376 return NULL; 1377 M_COPY_PKTHDR(m0, mold); 1378 mn = m0; 1379 1380 do { 1381 if (sz > MHLEN) { 1382 MCLGET(mn, M_DONTWAIT); 1383 if ((mn->m_flags & M_EXT) == 0) { 1384 m_freem(m0); 1385 return NULL; 1386 } 1387 } 1388 1389 mn->m_len = MIN(sz, MCLBYTES); 1390 1391 m_copydata(mold, mold->m_pkthdr.len - sz, mn->m_len, 1392 mtod(mn, void *)); 1393 1394 sz -= mn->m_len; 1395 1396 if (sz > 0) { 1397 /* need more mbufs */ 1398 n = m_get(M_NOWAIT, MT_DATA); 1399 if (n == NULL) { 1400 m_freem(m0); 1401 return NULL; 1402 } 1403 1404 mn->m_next = n; 1405 mn = n; 1406 } 1407 } while (sz > 0); 1408 1409 m_freem(mold); 1410 1411 return m0; 1412 } 1413 1414 int 1415 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1416 int how) 1417 { 1418 int mlen; 1419 struct mbuf *m, *n; 1420 struct mbuf **mp; 1421 int totlen = 0; 1422 const char *cp = vp; 1423 1424 KASSERT(mp0 != NULL); 1425 KASSERT(*mp0 != NULL); 1426 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1427 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1428 1429 if (len == M_COPYALL) 1430 len = m_length(*mp0) - off; 1431 1432 /* 1433 * we don't bother to update "totlen" in the case of M_COPYBACK0_COW, 1434 * assuming that M_COPYBACK0_EXTEND and M_COPYBACK0_COW are exclusive. 1435 */ 1436 1437 KASSERT((~flags & (M_COPYBACK0_EXTEND|M_COPYBACK0_COW)) != 0); 1438 1439 mp = mp0; 1440 m = *mp; 1441 while (off > (mlen = m->m_len)) { 1442 off -= mlen; 1443 totlen += mlen; 1444 if (m->m_next == NULL) { 1445 int tspace; 1446 extend: 1447 if ((flags & M_COPYBACK0_EXTEND) == 0) 1448 goto out; 1449 1450 /* 1451 * try to make some space at the end of "m". 1452 */ 1453 1454 mlen = m->m_len; 1455 if (off + len >= MINCLSIZE && 1456 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1457 MCLGET(m, how); 1458 } 1459 tspace = M_TRAILINGSPACE(m); 1460 if (tspace > 0) { 1461 tspace = min(tspace, off + len); 1462 KASSERT(tspace > 0); 1463 memset(mtod(m, char *) + m->m_len, 0, 1464 min(off, tspace)); 1465 m->m_len += tspace; 1466 off += mlen; 1467 totlen -= mlen; 1468 continue; 1469 } 1470 1471 /* 1472 * need to allocate an mbuf. 1473 */ 1474 1475 if (off + len >= MINCLSIZE) { 1476 n = m_getcl(how, m->m_type, 0); 1477 } else { 1478 n = m_get(how, m->m_type); 1479 } 1480 if (n == NULL) { 1481 goto out; 1482 } 1483 n->m_len = min(M_TRAILINGSPACE(n), off + len); 1484 memset(mtod(n, char *), 0, min(n->m_len, off)); 1485 m->m_next = n; 1486 } 1487 mp = &m->m_next; 1488 m = m->m_next; 1489 } 1490 while (len > 0) { 1491 mlen = m->m_len - off; 1492 if (mlen != 0 && M_READONLY(m)) { 1493 char *datap; 1494 int eatlen; 1495 1496 /* 1497 * this mbuf is read-only. 1498 * allocate a new writable mbuf and try again. 1499 */ 1500 1501 #if defined(DIAGNOSTIC) 1502 if ((flags & M_COPYBACK0_COW) == 0) 1503 panic("m_copyback0: read-only"); 1504 #endif /* defined(DIAGNOSTIC) */ 1505 1506 /* 1507 * if we're going to write into the middle of 1508 * a mbuf, split it first. 1509 */ 1510 if (off > 0) { 1511 n = m_split0(m, off, how, 0); 1512 if (n == NULL) 1513 goto enobufs; 1514 m->m_next = n; 1515 mp = &m->m_next; 1516 m = n; 1517 off = 0; 1518 continue; 1519 } 1520 1521 /* 1522 * XXX TODO coalesce into the trailingspace of 1523 * the previous mbuf when possible. 1524 */ 1525 1526 /* 1527 * allocate a new mbuf. copy packet header if needed. 1528 */ 1529 n = m_get(how, m->m_type); 1530 if (n == NULL) 1531 goto enobufs; 1532 MCLAIM(n, m->m_owner); 1533 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1534 M_MOVE_PKTHDR(n, m); 1535 n->m_len = MHLEN; 1536 } else { 1537 if (len >= MINCLSIZE) 1538 MCLGET(n, M_DONTWAIT); 1539 n->m_len = 1540 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1541 } 1542 if (n->m_len > len) 1543 n->m_len = len; 1544 1545 /* 1546 * free the region which has been overwritten. 1547 * copying data from old mbufs if requested. 1548 */ 1549 if (flags & M_COPYBACK0_PRESERVE) 1550 datap = mtod(n, char *); 1551 else 1552 datap = NULL; 1553 eatlen = n->m_len; 1554 while (m != NULL && M_READONLY(m) && 1555 n->m_type == m->m_type && eatlen > 0) { 1556 mlen = min(eatlen, m->m_len); 1557 if (datap) { 1558 m_copydata(m, 0, mlen, datap); 1559 datap += mlen; 1560 } 1561 m->m_data += mlen; 1562 m->m_len -= mlen; 1563 eatlen -= mlen; 1564 if (m->m_len == 0) 1565 *mp = m = m_free(m); 1566 } 1567 if (eatlen > 0) 1568 n->m_len -= eatlen; 1569 n->m_next = m; 1570 *mp = m = n; 1571 continue; 1572 } 1573 mlen = min(mlen, len); 1574 if (flags & M_COPYBACK0_COPYBACK) { 1575 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1576 cp += mlen; 1577 } 1578 len -= mlen; 1579 mlen += off; 1580 off = 0; 1581 totlen += mlen; 1582 if (len == 0) 1583 break; 1584 if (m->m_next == NULL) { 1585 goto extend; 1586 } 1587 mp = &m->m_next; 1588 m = m->m_next; 1589 } 1590 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1591 KASSERT((flags & M_COPYBACK0_EXTEND) != 0); 1592 m->m_pkthdr.len = totlen; 1593 } 1594 1595 return 0; 1596 1597 enobufs: 1598 return ENOBUFS; 1599 } 1600 1601 void 1602 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1603 { 1604 1605 KASSERT((to->m_flags & M_EXT) == 0); 1606 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL); 1607 KASSERT((from->m_flags & M_PKTHDR) != 0); 1608 1609 to->m_pkthdr = from->m_pkthdr; 1610 to->m_flags = from->m_flags & M_COPYFLAGS; 1611 to->m_data = to->m_pktdat; 1612 1613 from->m_flags &= ~M_PKTHDR; 1614 } 1615 1616 /* 1617 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1618 * beginning, continuing for "len" bytes. 1619 */ 1620 int 1621 m_apply(struct mbuf *m, int off, int len, 1622 int (*f)(void *, void *, unsigned int), void *arg) 1623 { 1624 unsigned int count; 1625 int rval; 1626 1627 KASSERT(len != M_COPYALL); 1628 KASSERT(len >= 0); 1629 KASSERT(off >= 0); 1630 1631 while (off > 0) { 1632 KASSERT(m != NULL); 1633 if (off < m->m_len) 1634 break; 1635 off -= m->m_len; 1636 m = m->m_next; 1637 } 1638 while (len > 0) { 1639 KASSERT(m != NULL); 1640 count = min(m->m_len - off, len); 1641 1642 rval = (*f)(arg, mtod(m, char *) + off, count); 1643 if (rval) 1644 return (rval); 1645 1646 len -= count; 1647 off = 0; 1648 m = m->m_next; 1649 } 1650 1651 return (0); 1652 } 1653 1654 /* 1655 * Return a pointer to mbuf/offset of location in mbuf chain. 1656 */ 1657 struct mbuf * 1658 m_getptr(struct mbuf *m, int loc, int *off) 1659 { 1660 1661 while (loc >= 0) { 1662 /* Normal end of search */ 1663 if (m->m_len > loc) { 1664 *off = loc; 1665 return (m); 1666 } else { 1667 loc -= m->m_len; 1668 1669 if (m->m_next == NULL) { 1670 if (loc == 0) { 1671 /* Point at the end of valid data */ 1672 *off = m->m_len; 1673 return (m); 1674 } else 1675 return (NULL); 1676 } else 1677 m = m->m_next; 1678 } 1679 } 1680 1681 return (NULL); 1682 } 1683 1684 /* 1685 * m_ext_free: release a reference to the mbuf external storage. 1686 * 1687 * => free the mbuf m itsself as well. 1688 */ 1689 1690 void 1691 m_ext_free(struct mbuf *m) 1692 { 1693 bool embedded = MEXT_ISEMBEDDED(m); 1694 bool dofree = true; 1695 u_int refcnt; 1696 1697 KASSERT((m->m_flags & M_EXT) != 0); 1698 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1699 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1700 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1701 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1702 1703 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1704 refcnt = m->m_ext.ext_refcnt = 0; 1705 } else { 1706 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1707 } 1708 if (refcnt > 0) { 1709 if (embedded) { 1710 /* 1711 * other mbuf's m_ext_ref still points to us. 1712 */ 1713 dofree = false; 1714 } else { 1715 m->m_ext_ref = m; 1716 } 1717 } else { 1718 /* 1719 * dropping the last reference 1720 */ 1721 if (!embedded) { 1722 m->m_ext.ext_refcnt++; /* XXX */ 1723 m_ext_free(m->m_ext_ref); 1724 m->m_ext_ref = m; 1725 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1726 pool_cache_put_paddr((struct pool_cache *) 1727 m->m_ext.ext_arg, 1728 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1729 } else if (m->m_ext.ext_free) { 1730 (*m->m_ext.ext_free)(m, 1731 m->m_ext.ext_buf, m->m_ext.ext_size, 1732 m->m_ext.ext_arg); 1733 /* 1734 * 'm' is already freed by the ext_free callback. 1735 */ 1736 dofree = false; 1737 } else { 1738 free(m->m_ext.ext_buf, m->m_ext.ext_type); 1739 } 1740 } 1741 if (dofree) { 1742 m->m_type = MT_FREE; 1743 pool_cache_put(mb_cache, m); 1744 } 1745 } 1746 1747 #if defined(DDB) 1748 void 1749 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 1750 { 1751 char ch; 1752 bool opt_c = false; 1753 char buf[512]; 1754 1755 while ((ch = *(modif++)) != '\0') { 1756 switch (ch) { 1757 case 'c': 1758 opt_c = true; 1759 break; 1760 } 1761 } 1762 1763 nextchain: 1764 (*pr)("MBUF %p\n", m); 1765 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 1766 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", 1767 m->m_data, m->m_len, m->m_type, buf); 1768 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 1769 m->m_nextpkt); 1770 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 1771 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 1772 (int)M_READONLY(m)); 1773 if ((m->m_flags & M_PKTHDR) != 0) { 1774 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 1775 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=0x%s, csum_data=0x%" 1776 PRIx32 ", segsz=%u\n", 1777 m->m_pkthdr.len, m->m_pkthdr.rcvif, 1778 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 1779 } 1780 if ((m->m_flags & M_EXT)) { 1781 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 1782 "ext_free=%p, ext_arg=%p\n", 1783 m->m_ext.ext_refcnt, 1784 m->m_ext.ext_buf, m->m_ext.ext_size, 1785 m->m_ext.ext_free, m->m_ext.ext_arg); 1786 } 1787 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 1788 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 1789 vaddr_t eva = sva + m->m_ext.ext_size; 1790 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 1791 int i; 1792 1793 (*pr)(" pages:"); 1794 for (i = 0; i < n; i ++) { 1795 (*pr)(" %p", m->m_ext.ext_pgs[i]); 1796 } 1797 (*pr)("\n"); 1798 } 1799 1800 if (opt_c) { 1801 m = m->m_next; 1802 if (m != NULL) { 1803 goto nextchain; 1804 } 1805 } 1806 } 1807 #endif /* defined(DDB) */ 1808 1809 void 1810 mbstat_type_add(int type, int diff) 1811 { 1812 struct mbstat_cpu *mb; 1813 int s; 1814 1815 s = splvm(); 1816 mb = percpu_getref(mbstat_percpu); 1817 mb->m_mtypes[type] += diff; 1818 percpu_putref(mbstat_percpu); 1819 splx(s); 1820 } 1821 1822 #if defined(MBUFTRACE) 1823 void 1824 mowner_attach(struct mowner *mo) 1825 { 1826 1827 KASSERT(mo->mo_counters == NULL); 1828 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 1829 1830 /* XXX lock */ 1831 LIST_INSERT_HEAD(&mowners, mo, mo_link); 1832 } 1833 1834 void 1835 mowner_detach(struct mowner *mo) 1836 { 1837 1838 KASSERT(mo->mo_counters != NULL); 1839 1840 /* XXX lock */ 1841 LIST_REMOVE(mo, mo_link); 1842 1843 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 1844 mo->mo_counters = NULL; 1845 } 1846 1847 void 1848 mowner_init(struct mbuf *m, int type) 1849 { 1850 struct mowner_counter *mc; 1851 struct mowner *mo; 1852 int s; 1853 1854 m->m_owner = mo = &unknown_mowners[type]; 1855 s = splvm(); 1856 mc = percpu_getref(mo->mo_counters); 1857 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1858 percpu_putref(mo->mo_counters); 1859 splx(s); 1860 } 1861 1862 void 1863 mowner_ref(struct mbuf *m, int flags) 1864 { 1865 struct mowner *mo = m->m_owner; 1866 struct mowner_counter *mc; 1867 int s; 1868 1869 s = splvm(); 1870 mc = percpu_getref(mo->mo_counters); 1871 if ((flags & M_EXT) != 0) 1872 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1873 if ((flags & M_CLUSTER) != 0) 1874 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1875 percpu_putref(mo->mo_counters); 1876 splx(s); 1877 } 1878 1879 void 1880 mowner_revoke(struct mbuf *m, bool all, int flags) 1881 { 1882 struct mowner *mo = m->m_owner; 1883 struct mowner_counter *mc; 1884 int s; 1885 1886 s = splvm(); 1887 mc = percpu_getref(mo->mo_counters); 1888 if ((flags & M_EXT) != 0) 1889 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 1890 if ((flags & M_CLUSTER) != 0) 1891 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 1892 if (all) 1893 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 1894 percpu_putref(mo->mo_counters); 1895 splx(s); 1896 if (all) 1897 m->m_owner = &revoked_mowner; 1898 } 1899 1900 static void 1901 mowner_claim(struct mbuf *m, struct mowner *mo) 1902 { 1903 struct mowner_counter *mc; 1904 int flags = m->m_flags; 1905 int s; 1906 1907 s = splvm(); 1908 mc = percpu_getref(mo->mo_counters); 1909 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1910 if ((flags & M_EXT) != 0) 1911 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1912 if ((flags & M_CLUSTER) != 0) 1913 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1914 percpu_putref(mo->mo_counters); 1915 splx(s); 1916 m->m_owner = mo; 1917 } 1918 1919 void 1920 m_claim(struct mbuf *m, struct mowner *mo) 1921 { 1922 1923 if (m->m_owner == mo || mo == NULL) 1924 return; 1925 1926 mowner_revoke(m, true, m->m_flags); 1927 mowner_claim(m, mo); 1928 } 1929 #endif /* defined(MBUFTRACE) */ 1930