1 /* $NetBSD: uipc_mbuf.c,v 1.173 2017/11/09 22:21:27 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.173 2017/11/09 22:21:27 christos Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "opt_mbuftrace.h" 69 #include "opt_nmbclusters.h" 70 #include "opt_ddb.h" 71 #endif 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/atomic.h> 76 #include <sys/cpu.h> 77 #include <sys/proc.h> 78 #include <sys/mbuf.h> 79 #include <sys/kernel.h> 80 #include <sys/syslog.h> 81 #include <sys/domain.h> 82 #include <sys/protosw.h> 83 #include <sys/percpu.h> 84 #include <sys/pool.h> 85 #include <sys/socket.h> 86 #include <sys/sysctl.h> 87 88 #include <net/if.h> 89 90 pool_cache_t mb_cache; /* mbuf cache */ 91 pool_cache_t mcl_cache; /* mbuf cluster cache */ 92 93 struct mbstat mbstat; 94 int max_linkhdr; 95 int max_protohdr; 96 int max_hdr; 97 int max_datalen; 98 99 static int mb_ctor(void *, void *, int); 100 101 static void sysctl_kern_mbuf_setup(void); 102 103 static struct sysctllog *mbuf_sysctllog; 104 105 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 106 static struct mbuf *m_split0(struct mbuf *, int, int, int); 107 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 108 109 /* flags for m_copyback0 */ 110 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 111 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 112 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 113 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 114 115 static const char mclpool_warnmsg[] = 116 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 117 118 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 119 120 static percpu_t *mbstat_percpu; 121 122 #ifdef MBUFTRACE 123 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 124 struct mowner unknown_mowners[] = { 125 MOWNER_INIT("unknown", "free"), 126 MOWNER_INIT("unknown", "data"), 127 MOWNER_INIT("unknown", "header"), 128 MOWNER_INIT("unknown", "soname"), 129 MOWNER_INIT("unknown", "soopts"), 130 MOWNER_INIT("unknown", "ftable"), 131 MOWNER_INIT("unknown", "control"), 132 MOWNER_INIT("unknown", "oobdata"), 133 }; 134 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 135 #endif 136 137 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 138 139 #define MCLADDREFERENCE(o, n) \ 140 do { \ 141 KASSERT(((o)->m_flags & M_EXT) != 0); \ 142 KASSERT(((n)->m_flags & M_EXT) == 0); \ 143 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 144 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 145 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 146 (n)->m_ext_ref = (o)->m_ext_ref; \ 147 mowner_ref((n), (n)->m_flags); \ 148 MCLREFDEBUGN((n), __FILE__, __LINE__); \ 149 } while (/* CONSTCOND */ 0) 150 151 static int 152 nmbclusters_limit(void) 153 { 154 #if defined(PMAP_MAP_POOLPAGE) 155 /* direct mapping, doesn't use space in kmem_arena */ 156 vsize_t max_size = physmem / 4; 157 #else 158 vsize_t max_size = MIN(physmem / 4, nkmempages / 4); 159 #endif 160 161 max_size = max_size * PAGE_SIZE / MCLBYTES; 162 #ifdef NMBCLUSTERS_MAX 163 max_size = MIN(max_size, NMBCLUSTERS_MAX); 164 #endif 165 166 #ifdef NMBCLUSTERS 167 return MIN(max_size, NMBCLUSTERS); 168 #else 169 return max_size; 170 #endif 171 } 172 173 /* 174 * Initialize the mbuf allocator. 175 */ 176 void 177 mbinit(void) 178 { 179 180 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 181 CTASSERT(sizeof(struct mbuf) == MSIZE); 182 183 sysctl_kern_mbuf_setup(); 184 185 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 186 NULL, IPL_VM, mb_ctor, NULL, NULL); 187 KASSERT(mb_cache != NULL); 188 189 mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", NULL, 190 IPL_VM, NULL, NULL, NULL); 191 KASSERT(mcl_cache != NULL); 192 193 pool_cache_set_drain_hook(mb_cache, m_reclaim, NULL); 194 pool_cache_set_drain_hook(mcl_cache, m_reclaim, NULL); 195 196 /* 197 * Set an arbitrary default limit on the number of mbuf clusters. 198 */ 199 #ifdef NMBCLUSTERS 200 nmbclusters = nmbclusters_limit(); 201 #else 202 nmbclusters = MAX(1024, 203 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 204 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 205 #endif 206 207 /* 208 * Set the hard limit on the mclpool to the number of 209 * mbuf clusters the kernel is to support. Log the limit 210 * reached message max once a minute. 211 */ 212 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 213 214 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 215 216 /* 217 * Set a low water mark for both mbufs and clusters. This should 218 * help ensure that they can be allocated in a memory starvation 219 * situation. This is important for e.g. diskless systems which 220 * must allocate mbufs in order for the pagedaemon to clean pages. 221 */ 222 pool_cache_setlowat(mb_cache, mblowat); 223 pool_cache_setlowat(mcl_cache, mcllowat); 224 225 #ifdef MBUFTRACE 226 { 227 /* 228 * Attach the unknown mowners. 229 */ 230 int i; 231 MOWNER_ATTACH(&revoked_mowner); 232 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 233 i-- > 0; ) 234 MOWNER_ATTACH(&unknown_mowners[i]); 235 } 236 #endif 237 } 238 239 /* 240 * sysctl helper routine for the kern.mbuf subtree. 241 * nmbclusters, mblowat and mcllowat need range 242 * checking and pool tweaking after being reset. 243 */ 244 static int 245 sysctl_kern_mbuf(SYSCTLFN_ARGS) 246 { 247 int error, newval; 248 struct sysctlnode node; 249 250 node = *rnode; 251 node.sysctl_data = &newval; 252 switch (rnode->sysctl_num) { 253 case MBUF_NMBCLUSTERS: 254 case MBUF_MBLOWAT: 255 case MBUF_MCLLOWAT: 256 newval = *(int*)rnode->sysctl_data; 257 break; 258 default: 259 return (EOPNOTSUPP); 260 } 261 262 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 263 if (error || newp == NULL) 264 return (error); 265 if (newval < 0) 266 return (EINVAL); 267 268 switch (node.sysctl_num) { 269 case MBUF_NMBCLUSTERS: 270 if (newval < nmbclusters) 271 return (EINVAL); 272 if (newval > nmbclusters_limit()) 273 return (EINVAL); 274 nmbclusters = newval; 275 pool_cache_sethardlimit(mcl_cache, nmbclusters, 276 mclpool_warnmsg, 60); 277 break; 278 case MBUF_MBLOWAT: 279 mblowat = newval; 280 pool_cache_setlowat(mb_cache, mblowat); 281 break; 282 case MBUF_MCLLOWAT: 283 mcllowat = newval; 284 pool_cache_setlowat(mcl_cache, mcllowat); 285 break; 286 } 287 288 return (0); 289 } 290 291 #ifdef MBUFTRACE 292 static void 293 mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 294 { 295 struct mowner_counter *mc = v1; 296 struct mowner_user *mo_user = v2; 297 int i; 298 299 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 300 mo_user->mo_counter[i] += mc->mc_counter[i]; 301 } 302 } 303 304 static void 305 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 306 { 307 308 memset(mo_user, 0, sizeof(*mo_user)); 309 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 310 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 311 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 312 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 313 percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); 314 } 315 316 static int 317 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 318 { 319 struct mowner *mo; 320 size_t len = 0; 321 int error = 0; 322 323 if (namelen != 0) 324 return (EINVAL); 325 if (newp != NULL) 326 return (EPERM); 327 328 LIST_FOREACH(mo, &mowners, mo_link) { 329 struct mowner_user mo_user; 330 331 mowner_convert_to_user(mo, &mo_user); 332 333 if (oldp != NULL) { 334 if (*oldlenp - len < sizeof(mo_user)) { 335 error = ENOMEM; 336 break; 337 } 338 error = copyout(&mo_user, (char *)oldp + len, 339 sizeof(mo_user)); 340 if (error) 341 break; 342 } 343 len += sizeof(mo_user); 344 } 345 346 if (error == 0) 347 *oldlenp = len; 348 349 return (error); 350 } 351 #endif /* MBUFTRACE */ 352 353 static void 354 mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 355 { 356 struct mbstat_cpu *mbsc = v1; 357 struct mbstat *mbs = v2; 358 int i; 359 360 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 361 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 362 } 363 } 364 365 static void 366 mbstat_convert_to_user(struct mbstat *mbs) 367 { 368 369 memset(mbs, 0, sizeof(*mbs)); 370 mbs->m_drain = mbstat.m_drain; 371 percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); 372 } 373 374 static int 375 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 376 { 377 struct sysctlnode node; 378 struct mbstat mbs; 379 380 mbstat_convert_to_user(&mbs); 381 node = *rnode; 382 node.sysctl_data = &mbs; 383 node.sysctl_size = sizeof(mbs); 384 return sysctl_lookup(SYSCTLFN_CALL(&node)); 385 } 386 387 static void 388 sysctl_kern_mbuf_setup(void) 389 { 390 391 KASSERT(mbuf_sysctllog == NULL); 392 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 393 CTLFLAG_PERMANENT, 394 CTLTYPE_NODE, "mbuf", 395 SYSCTL_DESCR("mbuf control variables"), 396 NULL, 0, NULL, 0, 397 CTL_KERN, KERN_MBUF, CTL_EOL); 398 399 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 400 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 401 CTLTYPE_INT, "msize", 402 SYSCTL_DESCR("mbuf base size"), 403 NULL, msize, NULL, 0, 404 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 405 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 406 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 407 CTLTYPE_INT, "mclbytes", 408 SYSCTL_DESCR("mbuf cluster size"), 409 NULL, mclbytes, NULL, 0, 410 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 411 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 412 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 413 CTLTYPE_INT, "nmbclusters", 414 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 415 sysctl_kern_mbuf, 0, &nmbclusters, 0, 416 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 417 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 418 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 419 CTLTYPE_INT, "mblowat", 420 SYSCTL_DESCR("mbuf low water mark"), 421 sysctl_kern_mbuf, 0, &mblowat, 0, 422 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 423 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 424 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 425 CTLTYPE_INT, "mcllowat", 426 SYSCTL_DESCR("mbuf cluster low water mark"), 427 sysctl_kern_mbuf, 0, &mcllowat, 0, 428 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 429 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 430 CTLFLAG_PERMANENT, 431 CTLTYPE_STRUCT, "stats", 432 SYSCTL_DESCR("mbuf allocation statistics"), 433 sysctl_kern_mbuf_stats, 0, NULL, 0, 434 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 435 #ifdef MBUFTRACE 436 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 437 CTLFLAG_PERMANENT, 438 CTLTYPE_STRUCT, "mowners", 439 SYSCTL_DESCR("Information about mbuf owners"), 440 sysctl_kern_mbuf_mowners, 0, NULL, 0, 441 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 442 #endif /* MBUFTRACE */ 443 } 444 445 static int 446 mb_ctor(void *arg, void *object, int flags) 447 { 448 struct mbuf *m = object; 449 450 #ifdef POOL_VTOPHYS 451 m->m_paddr = POOL_VTOPHYS(m); 452 #else 453 m->m_paddr = M_PADDR_INVALID; 454 #endif 455 return (0); 456 } 457 458 /* 459 * Add mbuf to the end of a chain 460 */ 461 struct mbuf * 462 m_add(struct mbuf *c, struct mbuf *m) { 463 struct mbuf *n; 464 465 if (c == NULL) 466 return m; 467 468 for (n = c; n->m_next != NULL; n = n->m_next) 469 continue; 470 n->m_next = m; 471 return c; 472 } 473 474 /* 475 * Set the m_data pointer of a newly-allocated mbuf 476 * to place an object of the specified size at the 477 * end of the mbuf, longword aligned. 478 */ 479 void 480 m_align(struct mbuf *m, int len) 481 { 482 int adjust; 483 484 KASSERT(len != M_COPYALL); 485 486 if (m->m_flags & M_EXT) 487 adjust = m->m_ext.ext_size - len; 488 else if (m->m_flags & M_PKTHDR) 489 adjust = MHLEN - len; 490 else 491 adjust = MLEN - len; 492 m->m_data += adjust &~ (sizeof(long)-1); 493 } 494 495 /* 496 * Append the specified data to the indicated mbuf chain, 497 * Extend the mbuf chain if the new data does not fit in 498 * existing space. 499 * 500 * Return 1 if able to complete the job; otherwise 0. 501 */ 502 int 503 m_append(struct mbuf *m0, int len, const void *cpv) 504 { 505 struct mbuf *m, *n; 506 int remainder, space; 507 const char *cp = cpv; 508 509 KASSERT(len != M_COPYALL); 510 for (m = m0; m->m_next != NULL; m = m->m_next) 511 continue; 512 remainder = len; 513 space = M_TRAILINGSPACE(m); 514 if (space > 0) { 515 /* 516 * Copy into available space. 517 */ 518 if (space > remainder) 519 space = remainder; 520 memmove(mtod(m, char *) + m->m_len, cp, space); 521 m->m_len += space; 522 cp = cp + space, remainder -= space; 523 } 524 while (remainder > 0) { 525 /* 526 * Allocate a new mbuf; could check space 527 * and allocate a cluster instead. 528 */ 529 n = m_get(M_DONTWAIT, m->m_type); 530 if (n == NULL) 531 break; 532 n->m_len = min(MLEN, remainder); 533 memmove(mtod(n, void *), cp, n->m_len); 534 cp += n->m_len, remainder -= n->m_len; 535 m->m_next = n; 536 m = n; 537 } 538 if (m0->m_flags & M_PKTHDR) 539 m0->m_pkthdr.len += len - remainder; 540 return (remainder == 0); 541 } 542 543 void 544 m_reclaim(void *arg, int flags) 545 { 546 struct domain *dp; 547 const struct protosw *pr; 548 struct ifnet *ifp; 549 int s; 550 551 KERNEL_LOCK(1, NULL); 552 s = splvm(); 553 DOMAIN_FOREACH(dp) { 554 for (pr = dp->dom_protosw; 555 pr < dp->dom_protoswNPROTOSW; pr++) 556 if (pr->pr_drain) 557 (*pr->pr_drain)(); 558 } 559 /* XXX we cannot use psref in H/W interrupt */ 560 if (!cpu_intr_p()) { 561 int bound = curlwp_bind(); 562 IFNET_READER_FOREACH(ifp) { 563 struct psref psref; 564 565 if_acquire(ifp, &psref); 566 567 if (ifp->if_drain) 568 (*ifp->if_drain)(ifp); 569 570 if_release(ifp, &psref); 571 } 572 curlwp_bindx(bound); 573 } 574 splx(s); 575 mbstat.m_drain++; 576 KERNEL_UNLOCK_ONE(NULL); 577 } 578 579 /* 580 * Space allocation routines. 581 * These are also available as macros 582 * for critical paths. 583 */ 584 struct mbuf * 585 m_get(int nowait, int type) 586 { 587 struct mbuf *m; 588 589 KASSERT(type != MT_FREE); 590 591 m = pool_cache_get(mb_cache, 592 nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : PR_NOWAIT); 593 if (m == NULL) 594 return NULL; 595 596 mbstat_type_add(type, 1); 597 598 m_hdr_init(m, type, NULL, m->m_dat, 0); 599 600 return m; 601 } 602 603 struct mbuf * 604 m_gethdr(int nowait, int type) 605 { 606 struct mbuf *m; 607 608 m = m_get(nowait, type); 609 if (m == NULL) 610 return NULL; 611 612 m_pkthdr_init(m); 613 614 return m; 615 } 616 617 struct mbuf * 618 m_getclr(int nowait, int type) 619 { 620 struct mbuf *m; 621 622 m = m_get(nowait, type); 623 if (m == 0) 624 return (NULL); 625 memset(mtod(m, void *), 0, MLEN); 626 return (m); 627 } 628 629 void 630 m_clget(struct mbuf *m, int nowait) 631 { 632 633 MCLGET(m, nowait); 634 } 635 636 #ifdef MBUFTRACE 637 /* 638 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 639 */ 640 void 641 m_claimm(struct mbuf *m, struct mowner *mo) 642 { 643 644 for (; m != NULL; m = m->m_next) 645 MCLAIM(m, mo); 646 } 647 #endif 648 649 /* 650 * Mbuffer utility routines. 651 */ 652 653 /* 654 * Lesser-used path for M_PREPEND: 655 * allocate new mbuf to prepend to chain, 656 * copy junk along. 657 */ 658 struct mbuf * 659 m_prepend(struct mbuf *m, int len, int how) 660 { 661 struct mbuf *mn; 662 663 KASSERT(len != M_COPYALL); 664 mn = m_get(how, m->m_type); 665 if (mn == NULL) { 666 m_freem(m); 667 return (NULL); 668 } 669 if (m->m_flags & M_PKTHDR) { 670 M_MOVE_PKTHDR(mn, m); 671 } else { 672 MCLAIM(mn, m->m_owner); 673 } 674 mn->m_next = m; 675 m = mn; 676 if (len < MHLEN) 677 MH_ALIGN(m, len); 678 m->m_len = len; 679 return (m); 680 } 681 682 /* 683 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 684 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 685 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 686 */ 687 int MCFail; 688 689 struct mbuf * 690 m_copym(struct mbuf *m, int off0, int len, int wait) 691 { 692 693 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 694 } 695 696 struct mbuf * 697 m_dup(struct mbuf *m, int off0, int len, int wait) 698 { 699 700 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 701 } 702 703 static inline int 704 m_copylen(int len, int copylen) { 705 return len == M_COPYALL ? copylen : min(len, copylen); 706 } 707 708 static struct mbuf * 709 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 710 { 711 struct mbuf *n, **np; 712 int off = off0; 713 struct mbuf *top; 714 int copyhdr = 0; 715 716 if (off < 0 || (len != M_COPYALL && len < 0)) 717 panic("m_copym: off %d, len %d", off, len); 718 if (off == 0 && m->m_flags & M_PKTHDR) 719 copyhdr = 1; 720 while (off > 0) { 721 if (m == 0) 722 panic("m_copym: m == 0, off %d", off); 723 if (off < m->m_len) 724 break; 725 off -= m->m_len; 726 m = m->m_next; 727 } 728 np = ⊤ 729 top = 0; 730 while (len == M_COPYALL || len > 0) { 731 if (m == 0) { 732 if (len != M_COPYALL) 733 panic("m_copym: m == 0, len %d [!COPYALL]", 734 len); 735 break; 736 } 737 n = m_get(wait, m->m_type); 738 *np = n; 739 if (n == 0) 740 goto nospace; 741 MCLAIM(n, m->m_owner); 742 if (copyhdr) { 743 M_COPY_PKTHDR(n, m); 744 if (len == M_COPYALL) 745 n->m_pkthdr.len -= off0; 746 else 747 n->m_pkthdr.len = len; 748 copyhdr = 0; 749 } 750 n->m_len = m_copylen(len, m->m_len - off); 751 if (m->m_flags & M_EXT) { 752 if (!deep) { 753 n->m_data = m->m_data + off; 754 MCLADDREFERENCE(m, n); 755 } else { 756 /* 757 * we are unsure about the way m was allocated. 758 * copy into multiple MCLBYTES cluster mbufs. 759 * 760 * recompute m_len, it is no longer valid if MCLGET() 761 * fails to allocate a cluster. Then we try to split 762 * the source into normal sized mbufs. 763 */ 764 MCLGET(n, wait); 765 n->m_len = 0; 766 n->m_len = M_TRAILINGSPACE(n); 767 n->m_len = m_copylen(len, n->m_len); 768 n->m_len = min(n->m_len, m->m_len - off); 769 memcpy(mtod(n, void *), mtod(m, char *) + off, 770 (unsigned)n->m_len); 771 } 772 } else 773 memcpy(mtod(n, void *), mtod(m, char *) + off, 774 (unsigned)n->m_len); 775 if (len != M_COPYALL) 776 len -= n->m_len; 777 off += n->m_len; 778 #ifdef DIAGNOSTIC 779 if (off > m->m_len) 780 panic("m_copym0 overrun %d %d", off, m->m_len); 781 #endif 782 if (off == m->m_len) { 783 m = m->m_next; 784 off = 0; 785 } 786 np = &n->m_next; 787 } 788 if (top == 0) 789 MCFail++; 790 return (top); 791 nospace: 792 m_freem(top); 793 MCFail++; 794 return (NULL); 795 } 796 797 /* 798 * Copy an entire packet, including header (which must be present). 799 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 800 */ 801 struct mbuf * 802 m_copypacket(struct mbuf *m, int how) 803 { 804 struct mbuf *top, *n, *o; 805 806 n = m_get(how, m->m_type); 807 top = n; 808 if (!n) 809 goto nospace; 810 811 MCLAIM(n, m->m_owner); 812 M_COPY_PKTHDR(n, m); 813 n->m_len = m->m_len; 814 if (m->m_flags & M_EXT) { 815 n->m_data = m->m_data; 816 MCLADDREFERENCE(m, n); 817 } else { 818 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 819 } 820 821 m = m->m_next; 822 while (m) { 823 o = m_get(how, m->m_type); 824 if (!o) 825 goto nospace; 826 827 MCLAIM(o, m->m_owner); 828 n->m_next = o; 829 n = n->m_next; 830 831 n->m_len = m->m_len; 832 if (m->m_flags & M_EXT) { 833 n->m_data = m->m_data; 834 MCLADDREFERENCE(m, n); 835 } else { 836 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 837 } 838 839 m = m->m_next; 840 } 841 return top; 842 nospace: 843 m_freem(top); 844 MCFail++; 845 return NULL; 846 } 847 848 /* 849 * Copy data from an mbuf chain starting "off" bytes from the beginning, 850 * continuing for "len" bytes, into the indicated buffer. 851 */ 852 void 853 m_copydata(struct mbuf *m, int off, int len, void *vp) 854 { 855 unsigned count; 856 void * cp = vp; 857 struct mbuf *m0 = m; 858 int len0 = len; 859 int off0 = off; 860 void *vp0 = vp; 861 862 KASSERT(len != M_COPYALL); 863 if (off < 0 || len < 0) 864 panic("m_copydata: off %d, len %d", off, len); 865 while (off > 0) { 866 if (m == NULL) 867 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)", 868 m0, len0, off0, vp0, off, off0 - off); 869 if (off < m->m_len) 870 break; 871 off -= m->m_len; 872 m = m->m_next; 873 } 874 while (len > 0) { 875 if (m == NULL) 876 panic("m_copydata(%p,%d,%d,%p): " 877 "m=NULL, off=%d (%d), len=%d (%d)", 878 m0, len0, off0, vp0, 879 off, off0 - off, len, len0 - len); 880 count = min(m->m_len - off, len); 881 memcpy(cp, mtod(m, char *) + off, count); 882 len -= count; 883 cp = (char *)cp + count; 884 off = 0; 885 m = m->m_next; 886 } 887 } 888 889 /* 890 * Concatenate mbuf chain n to m. 891 * n might be copied into m (when n->m_len is small), therefore data portion of 892 * n could be copied into an mbuf of different mbuf type. 893 * Any m_pkthdr is not updated. 894 */ 895 void 896 m_cat(struct mbuf *m, struct mbuf *n) 897 { 898 899 while (m->m_next) 900 m = m->m_next; 901 while (n) { 902 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 903 /* just join the two chains */ 904 m->m_next = n; 905 return; 906 } 907 /* splat the data from one into the other */ 908 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 909 (u_int)n->m_len); 910 m->m_len += n->m_len; 911 n = m_free(n); 912 } 913 } 914 915 void 916 m_adj(struct mbuf *mp, int req_len) 917 { 918 int len = req_len; 919 struct mbuf *m; 920 int count; 921 922 if ((m = mp) == NULL) 923 return; 924 if (len >= 0) { 925 /* 926 * Trim from head. 927 */ 928 while (m != NULL && len > 0) { 929 if (m->m_len <= len) { 930 len -= m->m_len; 931 m->m_len = 0; 932 m = m->m_next; 933 } else { 934 m->m_len -= len; 935 m->m_data += len; 936 len = 0; 937 } 938 } 939 m = mp; 940 if (mp->m_flags & M_PKTHDR) 941 m->m_pkthdr.len -= (req_len - len); 942 } else { 943 /* 944 * Trim from tail. Scan the mbuf chain, 945 * calculating its length and finding the last mbuf. 946 * If the adjustment only affects this mbuf, then just 947 * adjust and return. Otherwise, rescan and truncate 948 * after the remaining size. 949 */ 950 len = -len; 951 count = 0; 952 for (;;) { 953 count += m->m_len; 954 if (m->m_next == (struct mbuf *)0) 955 break; 956 m = m->m_next; 957 } 958 if (m->m_len >= len) { 959 m->m_len -= len; 960 if (mp->m_flags & M_PKTHDR) 961 mp->m_pkthdr.len -= len; 962 return; 963 } 964 count -= len; 965 if (count < 0) 966 count = 0; 967 /* 968 * Correct length for chain is "count". 969 * Find the mbuf with last data, adjust its length, 970 * and toss data from remaining mbufs on chain. 971 */ 972 m = mp; 973 if (m->m_flags & M_PKTHDR) 974 m->m_pkthdr.len = count; 975 for (; m; m = m->m_next) { 976 if (m->m_len >= count) { 977 m->m_len = count; 978 break; 979 } 980 count -= m->m_len; 981 } 982 if (m) 983 while (m->m_next) 984 (m = m->m_next)->m_len = 0; 985 } 986 } 987 988 /* 989 * m_ensure_contig: rearrange an mbuf chain that given length of bytes 990 * would be contiguous and in the data area of an mbuf (therefore, mtod() 991 * would work for a structure of given length). 992 * 993 * => On success, returns true and the resulting mbuf chain; false otherwise. 994 * => The mbuf chain may change, but is always preserved valid. 995 */ 996 bool 997 m_ensure_contig(struct mbuf **m0, int len) 998 { 999 struct mbuf *n = *m0, *m; 1000 size_t count, space; 1001 1002 KASSERT(len != M_COPYALL); 1003 /* 1004 * If first mbuf has no cluster, and has room for len bytes 1005 * without shifting current data, pullup into it, 1006 * otherwise allocate a new mbuf to prepend to the chain. 1007 */ 1008 if ((n->m_flags & M_EXT) == 0 && 1009 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 1010 if (n->m_len >= len) { 1011 return true; 1012 } 1013 m = n; 1014 n = n->m_next; 1015 len -= m->m_len; 1016 } else { 1017 if (len > MHLEN) { 1018 return false; 1019 } 1020 m = m_get(M_DONTWAIT, n->m_type); 1021 if (m == NULL) { 1022 return false; 1023 } 1024 MCLAIM(m, n->m_owner); 1025 if (n->m_flags & M_PKTHDR) { 1026 M_MOVE_PKTHDR(m, n); 1027 } 1028 } 1029 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1030 do { 1031 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len); 1032 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1033 (unsigned)count); 1034 len -= count; 1035 m->m_len += count; 1036 n->m_len -= count; 1037 space -= count; 1038 if (n->m_len) 1039 n->m_data += count; 1040 else 1041 n = m_free(n); 1042 } while (len > 0 && n); 1043 1044 m->m_next = n; 1045 *m0 = m; 1046 1047 return len <= 0; 1048 } 1049 1050 /* 1051 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error. 1052 */ 1053 int MPFail; 1054 1055 struct mbuf * 1056 m_pullup(struct mbuf *n, int len) 1057 { 1058 struct mbuf *m = n; 1059 1060 KASSERT(len != M_COPYALL); 1061 if (!m_ensure_contig(&m, len)) { 1062 KASSERT(m != NULL); 1063 m_freem(m); 1064 MPFail++; 1065 m = NULL; 1066 } 1067 return m; 1068 } 1069 1070 /* 1071 * Like m_pullup(), except a new mbuf is always allocated, and we allow 1072 * the amount of empty space before the data in the new mbuf to be specified 1073 * (in the event that the caller expects to prepend later). 1074 */ 1075 int MSFail; 1076 1077 struct mbuf * 1078 m_copyup(struct mbuf *n, int len, int dstoff) 1079 { 1080 struct mbuf *m; 1081 int count, space; 1082 1083 KASSERT(len != M_COPYALL); 1084 if (len > (MHLEN - dstoff)) 1085 goto bad; 1086 m = m_get(M_DONTWAIT, n->m_type); 1087 if (m == NULL) 1088 goto bad; 1089 MCLAIM(m, n->m_owner); 1090 if (n->m_flags & M_PKTHDR) { 1091 M_MOVE_PKTHDR(m, n); 1092 } 1093 m->m_data += dstoff; 1094 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1095 do { 1096 count = min(min(max(len, max_protohdr), space), n->m_len); 1097 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1098 (unsigned)count); 1099 len -= count; 1100 m->m_len += count; 1101 n->m_len -= count; 1102 space -= count; 1103 if (n->m_len) 1104 n->m_data += count; 1105 else 1106 n = m_free(n); 1107 } while (len > 0 && n); 1108 if (len > 0) { 1109 (void) m_free(m); 1110 goto bad; 1111 } 1112 m->m_next = n; 1113 return (m); 1114 bad: 1115 m_freem(n); 1116 MSFail++; 1117 return (NULL); 1118 } 1119 1120 /* 1121 * Partition an mbuf chain in two pieces, returning the tail -- 1122 * all but the first len0 bytes. In case of failure, it returns NULL and 1123 * attempts to restore the chain to its original state. 1124 */ 1125 struct mbuf * 1126 m_split(struct mbuf *m0, int len0, int wait) 1127 { 1128 1129 return m_split0(m0, len0, wait, 1); 1130 } 1131 1132 static struct mbuf * 1133 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 1134 { 1135 struct mbuf *m, *n; 1136 unsigned len = len0, remain, len_save; 1137 1138 KASSERT(len0 != M_COPYALL); 1139 for (m = m0; m && len > m->m_len; m = m->m_next) 1140 len -= m->m_len; 1141 if (m == 0) 1142 return (NULL); 1143 remain = m->m_len - len; 1144 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1145 n = m_gethdr(wait, m0->m_type); 1146 if (n == NULL) 1147 return NULL; 1148 MCLAIM(n, m0->m_owner); 1149 m_copy_rcvif(n, m0); 1150 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1151 len_save = m0->m_pkthdr.len; 1152 m0->m_pkthdr.len = len0; 1153 if (m->m_flags & M_EXT) 1154 goto extpacket; 1155 if (remain > MHLEN) { 1156 /* m can't be the lead packet */ 1157 MH_ALIGN(n, 0); 1158 n->m_len = 0; 1159 n->m_next = m_split(m, len, wait); 1160 if (n->m_next == 0) { 1161 (void) m_free(n); 1162 m0->m_pkthdr.len = len_save; 1163 return (NULL); 1164 } else 1165 return (n); 1166 } else 1167 MH_ALIGN(n, remain); 1168 } else if (remain == 0) { 1169 n = m->m_next; 1170 m->m_next = 0; 1171 return (n); 1172 } else { 1173 n = m_get(wait, m->m_type); 1174 if (n == 0) 1175 return (NULL); 1176 MCLAIM(n, m->m_owner); 1177 M_ALIGN(n, remain); 1178 } 1179 extpacket: 1180 if (m->m_flags & M_EXT) { 1181 n->m_data = m->m_data + len; 1182 MCLADDREFERENCE(m, n); 1183 } else { 1184 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1185 } 1186 n->m_len = remain; 1187 m->m_len = len; 1188 n->m_next = m->m_next; 1189 m->m_next = 0; 1190 return (n); 1191 } 1192 /* 1193 * Routine to copy from device local memory into mbufs. 1194 */ 1195 struct mbuf * 1196 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1197 void (*copy)(const void *from, void *to, size_t len)) 1198 { 1199 struct mbuf *m; 1200 struct mbuf *top = 0, **mp = ⊤ 1201 int off = off0, len; 1202 char *cp; 1203 char *epkt; 1204 1205 cp = buf; 1206 epkt = cp + totlen; 1207 if (off) { 1208 /* 1209 * If 'off' is non-zero, packet is trailer-encapsulated, 1210 * so we have to skip the type and length fields. 1211 */ 1212 cp += off + 2 * sizeof(uint16_t); 1213 totlen -= 2 * sizeof(uint16_t); 1214 } 1215 m = m_gethdr(M_DONTWAIT, MT_DATA); 1216 if (m == NULL) 1217 return NULL; 1218 m_set_rcvif(m, ifp); 1219 m->m_pkthdr.len = totlen; 1220 m->m_len = MHLEN; 1221 1222 while (totlen > 0) { 1223 if (top) { 1224 m = m_get(M_DONTWAIT, MT_DATA); 1225 if (m == 0) { 1226 m_freem(top); 1227 return (NULL); 1228 } 1229 m->m_len = MLEN; 1230 } 1231 len = min(totlen, epkt - cp); 1232 if (len >= MINCLSIZE) { 1233 MCLGET(m, M_DONTWAIT); 1234 if ((m->m_flags & M_EXT) == 0) { 1235 m_free(m); 1236 m_freem(top); 1237 return (NULL); 1238 } 1239 m->m_len = len = min(len, MCLBYTES); 1240 } else { 1241 /* 1242 * Place initial small packet/header at end of mbuf. 1243 */ 1244 if (len < m->m_len) { 1245 if (top == 0 && len + max_linkhdr <= m->m_len) 1246 m->m_data += max_linkhdr; 1247 m->m_len = len; 1248 } else 1249 len = m->m_len; 1250 } 1251 if (copy) 1252 copy(cp, mtod(m, void *), (size_t)len); 1253 else 1254 memcpy(mtod(m, void *), cp, (size_t)len); 1255 cp += len; 1256 *mp = m; 1257 mp = &m->m_next; 1258 totlen -= len; 1259 if (cp == epkt) 1260 cp = buf; 1261 } 1262 return (top); 1263 } 1264 1265 /* 1266 * Copy data from a buffer back into the indicated mbuf chain, 1267 * starting "off" bytes from the beginning, extending the mbuf 1268 * chain if necessary. 1269 */ 1270 void 1271 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1272 { 1273 #if defined(DEBUG) 1274 struct mbuf *origm = m0; 1275 int error; 1276 #endif /* defined(DEBUG) */ 1277 1278 if (m0 == NULL) 1279 return; 1280 1281 #if defined(DEBUG) 1282 error = 1283 #endif /* defined(DEBUG) */ 1284 m_copyback0(&m0, off, len, cp, 1285 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1286 1287 #if defined(DEBUG) 1288 if (error != 0 || (m0 != NULL && origm != m0)) 1289 panic("m_copyback"); 1290 #endif /* defined(DEBUG) */ 1291 } 1292 1293 struct mbuf * 1294 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1295 { 1296 int error; 1297 1298 /* don't support chain expansion */ 1299 KASSERT(len != M_COPYALL); 1300 KDASSERT(off + len <= m_length(m0)); 1301 1302 error = m_copyback0(&m0, off, len, cp, 1303 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1304 if (error) { 1305 /* 1306 * no way to recover from partial success. 1307 * just free the chain. 1308 */ 1309 m_freem(m0); 1310 return NULL; 1311 } 1312 return m0; 1313 } 1314 1315 /* 1316 * m_makewritable: ensure the specified range writable. 1317 */ 1318 int 1319 m_makewritable(struct mbuf **mp, int off, int len, int how) 1320 { 1321 int error; 1322 #if defined(DEBUG) 1323 int origlen = m_length(*mp); 1324 #endif /* defined(DEBUG) */ 1325 1326 error = m_copyback0(mp, off, len, NULL, 1327 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1328 1329 if (error) 1330 return error; 1331 1332 #if defined(DEBUG) 1333 int reslen = 0; 1334 for (struct mbuf *n = *mp; n; n = n->m_next) 1335 reslen += n->m_len; 1336 if (origlen != reslen) 1337 panic("m_makewritable: length changed"); 1338 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1339 panic("m_makewritable: inconsist"); 1340 #endif /* defined(DEBUG) */ 1341 1342 return 0; 1343 } 1344 1345 /* 1346 * Copy the mbuf chain to a new mbuf chain that is as short as possible. 1347 * Return the new mbuf chain on success, NULL on failure. On success, 1348 * free the old mbuf chain. 1349 */ 1350 struct mbuf * 1351 m_defrag(struct mbuf *mold, int flags) 1352 { 1353 struct mbuf *m0, *mn, *n; 1354 size_t sz = mold->m_pkthdr.len; 1355 1356 #ifdef DIAGNOSTIC 1357 if ((mold->m_flags & M_PKTHDR) == 0) 1358 panic("m_defrag: not a mbuf chain header"); 1359 #endif 1360 1361 m0 = m_gethdr(flags, MT_DATA); 1362 if (m0 == NULL) 1363 return NULL; 1364 M_COPY_PKTHDR(m0, mold); 1365 mn = m0; 1366 1367 do { 1368 if (sz > MHLEN) { 1369 MCLGET(mn, M_DONTWAIT); 1370 if ((mn->m_flags & M_EXT) == 0) { 1371 m_freem(m0); 1372 return NULL; 1373 } 1374 } 1375 1376 mn->m_len = MIN(sz, MCLBYTES); 1377 1378 m_copydata(mold, mold->m_pkthdr.len - sz, mn->m_len, 1379 mtod(mn, void *)); 1380 1381 sz -= mn->m_len; 1382 1383 if (sz > 0) { 1384 /* need more mbufs */ 1385 n = m_get(M_NOWAIT, MT_DATA); 1386 if (n == NULL) { 1387 m_freem(m0); 1388 return NULL; 1389 } 1390 1391 mn->m_next = n; 1392 mn = n; 1393 } 1394 } while (sz > 0); 1395 1396 m_freem(mold); 1397 1398 return m0; 1399 } 1400 1401 int 1402 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1403 int how) 1404 { 1405 int mlen; 1406 struct mbuf *m, *n; 1407 struct mbuf **mp; 1408 int totlen = 0; 1409 const char *cp = vp; 1410 1411 KASSERT(mp0 != NULL); 1412 KASSERT(*mp0 != NULL); 1413 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1414 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1415 1416 if (len == M_COPYALL) 1417 len = m_length(*mp0) - off; 1418 1419 /* 1420 * we don't bother to update "totlen" in the case of M_COPYBACK0_COW, 1421 * assuming that M_COPYBACK0_EXTEND and M_COPYBACK0_COW are exclusive. 1422 */ 1423 1424 KASSERT((~flags & (M_COPYBACK0_EXTEND|M_COPYBACK0_COW)) != 0); 1425 1426 mp = mp0; 1427 m = *mp; 1428 while (off > (mlen = m->m_len)) { 1429 off -= mlen; 1430 totlen += mlen; 1431 if (m->m_next == NULL) { 1432 int tspace; 1433 extend: 1434 if ((flags & M_COPYBACK0_EXTEND) == 0) 1435 goto out; 1436 1437 /* 1438 * try to make some space at the end of "m". 1439 */ 1440 1441 mlen = m->m_len; 1442 if (off + len >= MINCLSIZE && 1443 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1444 MCLGET(m, how); 1445 } 1446 tspace = M_TRAILINGSPACE(m); 1447 if (tspace > 0) { 1448 tspace = min(tspace, off + len); 1449 KASSERT(tspace > 0); 1450 memset(mtod(m, char *) + m->m_len, 0, 1451 min(off, tspace)); 1452 m->m_len += tspace; 1453 off += mlen; 1454 totlen -= mlen; 1455 continue; 1456 } 1457 1458 /* 1459 * need to allocate an mbuf. 1460 */ 1461 1462 if (off + len >= MINCLSIZE) { 1463 n = m_getcl(how, m->m_type, 0); 1464 } else { 1465 n = m_get(how, m->m_type); 1466 } 1467 if (n == NULL) { 1468 goto out; 1469 } 1470 n->m_len = min(M_TRAILINGSPACE(n), off + len); 1471 memset(mtod(n, char *), 0, min(n->m_len, off)); 1472 m->m_next = n; 1473 } 1474 mp = &m->m_next; 1475 m = m->m_next; 1476 } 1477 while (len > 0) { 1478 mlen = m->m_len - off; 1479 if (mlen != 0 && M_READONLY(m)) { 1480 char *datap; 1481 int eatlen; 1482 1483 /* 1484 * this mbuf is read-only. 1485 * allocate a new writable mbuf and try again. 1486 */ 1487 1488 #if defined(DIAGNOSTIC) 1489 if ((flags & M_COPYBACK0_COW) == 0) 1490 panic("m_copyback0: read-only"); 1491 #endif /* defined(DIAGNOSTIC) */ 1492 1493 /* 1494 * if we're going to write into the middle of 1495 * a mbuf, split it first. 1496 */ 1497 if (off > 0) { 1498 n = m_split0(m, off, how, 0); 1499 if (n == NULL) 1500 goto enobufs; 1501 m->m_next = n; 1502 mp = &m->m_next; 1503 m = n; 1504 off = 0; 1505 continue; 1506 } 1507 1508 /* 1509 * XXX TODO coalesce into the trailingspace of 1510 * the previous mbuf when possible. 1511 */ 1512 1513 /* 1514 * allocate a new mbuf. copy packet header if needed. 1515 */ 1516 n = m_get(how, m->m_type); 1517 if (n == NULL) 1518 goto enobufs; 1519 MCLAIM(n, m->m_owner); 1520 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1521 M_MOVE_PKTHDR(n, m); 1522 n->m_len = MHLEN; 1523 } else { 1524 if (len >= MINCLSIZE) 1525 MCLGET(n, M_DONTWAIT); 1526 n->m_len = 1527 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1528 } 1529 if (n->m_len > len) 1530 n->m_len = len; 1531 1532 /* 1533 * free the region which has been overwritten. 1534 * copying data from old mbufs if requested. 1535 */ 1536 if (flags & M_COPYBACK0_PRESERVE) 1537 datap = mtod(n, char *); 1538 else 1539 datap = NULL; 1540 eatlen = n->m_len; 1541 while (m != NULL && M_READONLY(m) && 1542 n->m_type == m->m_type && eatlen > 0) { 1543 mlen = min(eatlen, m->m_len); 1544 if (datap) { 1545 m_copydata(m, 0, mlen, datap); 1546 datap += mlen; 1547 } 1548 m->m_data += mlen; 1549 m->m_len -= mlen; 1550 eatlen -= mlen; 1551 if (m->m_len == 0) 1552 *mp = m = m_free(m); 1553 } 1554 if (eatlen > 0) 1555 n->m_len -= eatlen; 1556 n->m_next = m; 1557 *mp = m = n; 1558 continue; 1559 } 1560 mlen = min(mlen, len); 1561 if (flags & M_COPYBACK0_COPYBACK) { 1562 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1563 cp += mlen; 1564 } 1565 len -= mlen; 1566 mlen += off; 1567 off = 0; 1568 totlen += mlen; 1569 if (len == 0) 1570 break; 1571 if (m->m_next == NULL) { 1572 goto extend; 1573 } 1574 mp = &m->m_next; 1575 m = m->m_next; 1576 } 1577 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1578 KASSERT((flags & M_COPYBACK0_EXTEND) != 0); 1579 m->m_pkthdr.len = totlen; 1580 } 1581 1582 return 0; 1583 1584 enobufs: 1585 return ENOBUFS; 1586 } 1587 1588 void 1589 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1590 { 1591 1592 KASSERT((to->m_flags & M_EXT) == 0); 1593 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL); 1594 KASSERT((from->m_flags & M_PKTHDR) != 0); 1595 1596 to->m_pkthdr = from->m_pkthdr; 1597 to->m_flags = from->m_flags & M_COPYFLAGS; 1598 to->m_data = to->m_pktdat; 1599 1600 from->m_flags &= ~M_PKTHDR; 1601 } 1602 1603 /* 1604 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1605 * beginning, continuing for "len" bytes. 1606 */ 1607 int 1608 m_apply(struct mbuf *m, int off, int len, 1609 int (*f)(void *, void *, unsigned int), void *arg) 1610 { 1611 unsigned int count; 1612 int rval; 1613 1614 KASSERT(len != M_COPYALL); 1615 KASSERT(len >= 0); 1616 KASSERT(off >= 0); 1617 1618 while (off > 0) { 1619 KASSERT(m != NULL); 1620 if (off < m->m_len) 1621 break; 1622 off -= m->m_len; 1623 m = m->m_next; 1624 } 1625 while (len > 0) { 1626 KASSERT(m != NULL); 1627 count = min(m->m_len - off, len); 1628 1629 rval = (*f)(arg, mtod(m, char *) + off, count); 1630 if (rval) 1631 return (rval); 1632 1633 len -= count; 1634 off = 0; 1635 m = m->m_next; 1636 } 1637 1638 return (0); 1639 } 1640 1641 /* 1642 * Return a pointer to mbuf/offset of location in mbuf chain. 1643 */ 1644 struct mbuf * 1645 m_getptr(struct mbuf *m, int loc, int *off) 1646 { 1647 1648 while (loc >= 0) { 1649 /* Normal end of search */ 1650 if (m->m_len > loc) { 1651 *off = loc; 1652 return (m); 1653 } else { 1654 loc -= m->m_len; 1655 1656 if (m->m_next == NULL) { 1657 if (loc == 0) { 1658 /* Point at the end of valid data */ 1659 *off = m->m_len; 1660 return (m); 1661 } else 1662 return (NULL); 1663 } else 1664 m = m->m_next; 1665 } 1666 } 1667 1668 return (NULL); 1669 } 1670 1671 /* 1672 * m_ext_free: release a reference to the mbuf external storage. 1673 * 1674 * => free the mbuf m itself as well. 1675 */ 1676 1677 void 1678 m_ext_free(struct mbuf *m) 1679 { 1680 bool embedded = MEXT_ISEMBEDDED(m); 1681 bool dofree = true; 1682 u_int refcnt; 1683 1684 KASSERT((m->m_flags & M_EXT) != 0); 1685 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1686 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1687 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1688 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1689 1690 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1691 refcnt = m->m_ext.ext_refcnt = 0; 1692 } else { 1693 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1694 } 1695 if (refcnt > 0) { 1696 if (embedded) { 1697 /* 1698 * other mbuf's m_ext_ref still points to us. 1699 */ 1700 dofree = false; 1701 } else { 1702 m->m_ext_ref = m; 1703 } 1704 } else { 1705 /* 1706 * dropping the last reference 1707 */ 1708 if (!embedded) { 1709 m->m_ext.ext_refcnt++; /* XXX */ 1710 m_ext_free(m->m_ext_ref); 1711 m->m_ext_ref = m; 1712 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1713 pool_cache_put_paddr((struct pool_cache *) 1714 m->m_ext.ext_arg, 1715 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1716 } else if (m->m_ext.ext_free) { 1717 (*m->m_ext.ext_free)(m, 1718 m->m_ext.ext_buf, m->m_ext.ext_size, 1719 m->m_ext.ext_arg); 1720 /* 1721 * 'm' is already freed by the ext_free callback. 1722 */ 1723 dofree = false; 1724 } else { 1725 free(m->m_ext.ext_buf, m->m_ext.ext_type); 1726 } 1727 } 1728 if (dofree) { 1729 m->m_type = MT_FREE; 1730 pool_cache_put(mb_cache, m); 1731 } 1732 } 1733 1734 #if defined(DDB) 1735 void 1736 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 1737 { 1738 char ch; 1739 bool opt_c = false; 1740 char buf[512]; 1741 1742 while ((ch = *(modif++)) != '\0') { 1743 switch (ch) { 1744 case 'c': 1745 opt_c = true; 1746 break; 1747 } 1748 } 1749 1750 nextchain: 1751 (*pr)("MBUF %p\n", m); 1752 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 1753 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", 1754 m->m_data, m->m_len, m->m_type, buf); 1755 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 1756 m->m_nextpkt); 1757 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 1758 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 1759 (int)M_READONLY(m)); 1760 if ((m->m_flags & M_PKTHDR) != 0) { 1761 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 1762 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=%s, csum_data=0x%" 1763 PRIx32 ", segsz=%u\n", 1764 m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m), 1765 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 1766 } 1767 if ((m->m_flags & M_EXT)) { 1768 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 1769 "ext_free=%p, ext_arg=%p\n", 1770 m->m_ext.ext_refcnt, 1771 m->m_ext.ext_buf, m->m_ext.ext_size, 1772 m->m_ext.ext_free, m->m_ext.ext_arg); 1773 } 1774 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 1775 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 1776 vaddr_t eva = sva + m->m_ext.ext_size; 1777 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 1778 int i; 1779 1780 (*pr)(" pages:"); 1781 for (i = 0; i < n; i ++) { 1782 (*pr)(" %p", m->m_ext.ext_pgs[i]); 1783 } 1784 (*pr)("\n"); 1785 } 1786 1787 if (opt_c) { 1788 m = m->m_next; 1789 if (m != NULL) { 1790 goto nextchain; 1791 } 1792 } 1793 } 1794 #endif /* defined(DDB) */ 1795 1796 void 1797 mbstat_type_add(int type, int diff) 1798 { 1799 struct mbstat_cpu *mb; 1800 int s; 1801 1802 s = splvm(); 1803 mb = percpu_getref(mbstat_percpu); 1804 mb->m_mtypes[type] += diff; 1805 percpu_putref(mbstat_percpu); 1806 splx(s); 1807 } 1808 1809 #if defined(MBUFTRACE) 1810 void 1811 mowner_attach(struct mowner *mo) 1812 { 1813 1814 KASSERT(mo->mo_counters == NULL); 1815 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 1816 1817 /* XXX lock */ 1818 LIST_INSERT_HEAD(&mowners, mo, mo_link); 1819 } 1820 1821 void 1822 mowner_detach(struct mowner *mo) 1823 { 1824 1825 KASSERT(mo->mo_counters != NULL); 1826 1827 /* XXX lock */ 1828 LIST_REMOVE(mo, mo_link); 1829 1830 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 1831 mo->mo_counters = NULL; 1832 } 1833 1834 void 1835 mowner_init(struct mbuf *m, int type) 1836 { 1837 struct mowner_counter *mc; 1838 struct mowner *mo; 1839 int s; 1840 1841 m->m_owner = mo = &unknown_mowners[type]; 1842 s = splvm(); 1843 mc = percpu_getref(mo->mo_counters); 1844 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1845 percpu_putref(mo->mo_counters); 1846 splx(s); 1847 } 1848 1849 void 1850 mowner_ref(struct mbuf *m, int flags) 1851 { 1852 struct mowner *mo = m->m_owner; 1853 struct mowner_counter *mc; 1854 int s; 1855 1856 s = splvm(); 1857 mc = percpu_getref(mo->mo_counters); 1858 if ((flags & M_EXT) != 0) 1859 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1860 if ((flags & M_CLUSTER) != 0) 1861 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1862 percpu_putref(mo->mo_counters); 1863 splx(s); 1864 } 1865 1866 void 1867 mowner_revoke(struct mbuf *m, bool all, int flags) 1868 { 1869 struct mowner *mo = m->m_owner; 1870 struct mowner_counter *mc; 1871 int s; 1872 1873 s = splvm(); 1874 mc = percpu_getref(mo->mo_counters); 1875 if ((flags & M_EXT) != 0) 1876 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 1877 if ((flags & M_CLUSTER) != 0) 1878 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 1879 if (all) 1880 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 1881 percpu_putref(mo->mo_counters); 1882 splx(s); 1883 if (all) 1884 m->m_owner = &revoked_mowner; 1885 } 1886 1887 static void 1888 mowner_claim(struct mbuf *m, struct mowner *mo) 1889 { 1890 struct mowner_counter *mc; 1891 int flags = m->m_flags; 1892 int s; 1893 1894 s = splvm(); 1895 mc = percpu_getref(mo->mo_counters); 1896 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1897 if ((flags & M_EXT) != 0) 1898 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1899 if ((flags & M_CLUSTER) != 0) 1900 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1901 percpu_putref(mo->mo_counters); 1902 splx(s); 1903 m->m_owner = mo; 1904 } 1905 1906 void 1907 m_claim(struct mbuf *m, struct mowner *mo) 1908 { 1909 1910 if (m->m_owner == mo || mo == NULL) 1911 return; 1912 1913 mowner_revoke(m, true, m->m_flags); 1914 mowner_claim(m, mo); 1915 } 1916 #endif /* defined(MBUFTRACE) */ 1917 1918 /* 1919 * MFREE(struct mbuf *m, struct mbuf *n) 1920 * Free a single mbuf and associated external storage. 1921 * Place the successor, if any, in n. 1922 */ 1923 #define MFREE(f, l, m, n) \ 1924 mowner_revoke((m), 1, (m)->m_flags); \ 1925 mbstat_type_add((m)->m_type, -1); \ 1926 if ((m)->m_flags & M_PKTHDR) \ 1927 m_tag_delete_chain((m), NULL); \ 1928 (n) = (m)->m_next; \ 1929 if ((m)->m_flags & M_EXT) { \ 1930 m_ext_free((m)); \ 1931 } else { \ 1932 MBUFFREE(f, l, m); \ 1933 } \ 1934 1935 #ifdef DEBUG 1936 #define MBUFFREE(f, l, m) \ 1937 do { \ 1938 if ((m)->m_type == MT_FREE) \ 1939 panic("mbuf was already freed at %s,%d", \ 1940 m->m_data, m->m_len); \ 1941 (m)->m_type = MT_FREE; \ 1942 (m)->m_data = __UNCONST(f); \ 1943 (m)->m_len = l; \ 1944 pool_cache_put(mb_cache, (m)); \ 1945 } while (/*CONSTCOND*/0) 1946 1947 #else 1948 #define MBUFFREE(f, l, m) \ 1949 do { \ 1950 KASSERT((m)->m_type != MT_FREE); \ 1951 (m)->m_type = MT_FREE; \ 1952 pool_cache_put(mb_cache, (m)); \ 1953 } while (/*CONSTCOND*/0) 1954 #endif 1955 1956 struct mbuf * 1957 m__free(const char *f, int l, struct mbuf *m) 1958 { 1959 struct mbuf *n; 1960 1961 MFREE(f, l, m, n); 1962 return (n); 1963 } 1964 1965 void 1966 m__freem(const char *f, int l, struct mbuf *m) 1967 { 1968 struct mbuf *n; 1969 1970 if (m == NULL) 1971 return; 1972 do { 1973 MFREE(f, l, m, n); 1974 m = n; 1975 } while (m); 1976 } 1977 1978 #undef m_free 1979 struct mbuf *m_free(struct mbuf *); 1980 struct mbuf * 1981 m_free(struct mbuf *m) 1982 { 1983 return m__free(__func__, __LINE__, m); 1984 } 1985 1986 #undef m_freem 1987 void m_freem(struct mbuf *); 1988 void 1989 m_freem(struct mbuf *m) 1990 { 1991 m__freem(__func__, __LINE__, m); 1992 } 1993