1 /* $NetBSD: uipc_mbuf.c,v 1.214 2018/05/03 07:46:17 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.214 2018/05/03 07:46:17 maxv Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "opt_mbuftrace.h" 69 #include "opt_nmbclusters.h" 70 #include "opt_ddb.h" 71 #endif 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/atomic.h> 76 #include <sys/cpu.h> 77 #include <sys/proc.h> 78 #include <sys/mbuf.h> 79 #include <sys/kernel.h> 80 #include <sys/syslog.h> 81 #include <sys/domain.h> 82 #include <sys/protosw.h> 83 #include <sys/percpu.h> 84 #include <sys/pool.h> 85 #include <sys/socket.h> 86 #include <sys/sysctl.h> 87 88 #include <net/if.h> 89 90 pool_cache_t mb_cache; /* mbuf cache */ 91 pool_cache_t mcl_cache; /* mbuf cluster cache */ 92 93 struct mbstat mbstat; 94 int max_linkhdr; 95 int max_protohdr; 96 int max_hdr; 97 int max_datalen; 98 99 static void mb_drain(void *, int); 100 static int mb_ctor(void *, void *, int); 101 102 static void sysctl_kern_mbuf_setup(void); 103 104 static struct sysctllog *mbuf_sysctllog; 105 106 static struct mbuf *m_copy_internal(struct mbuf *, int, int, int, bool); 107 static struct mbuf *m_split_internal(struct mbuf *, int, int, bool); 108 static int m_copyback_internal(struct mbuf **, int, int, const void *, 109 int, int); 110 111 /* Flags for m_copyback_internal. */ 112 #define CB_COPYBACK 0x0001 /* copyback from cp */ 113 #define CB_PRESERVE 0x0002 /* preserve original data */ 114 #define CB_COW 0x0004 /* do copy-on-write */ 115 #define CB_EXTEND 0x0008 /* extend chain */ 116 117 static const char mclpool_warnmsg[] = 118 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 119 120 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 121 122 static percpu_t *mbstat_percpu; 123 124 #ifdef MBUFTRACE 125 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 126 struct mowner unknown_mowners[] = { 127 MOWNER_INIT("unknown", "free"), 128 MOWNER_INIT("unknown", "data"), 129 MOWNER_INIT("unknown", "header"), 130 MOWNER_INIT("unknown", "soname"), 131 MOWNER_INIT("unknown", "soopts"), 132 MOWNER_INIT("unknown", "ftable"), 133 MOWNER_INIT("unknown", "control"), 134 MOWNER_INIT("unknown", "oobdata"), 135 }; 136 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 137 #endif 138 139 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 140 141 #define MCLADDREFERENCE(o, n) \ 142 do { \ 143 KASSERT(((o)->m_flags & M_EXT) != 0); \ 144 KASSERT(((n)->m_flags & M_EXT) == 0); \ 145 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 146 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 147 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 148 (n)->m_ext_ref = (o)->m_ext_ref; \ 149 mowner_ref((n), (n)->m_flags); \ 150 } while (/* CONSTCOND */ 0) 151 152 static int 153 nmbclusters_limit(void) 154 { 155 #if defined(PMAP_MAP_POOLPAGE) 156 /* direct mapping, doesn't use space in kmem_arena */ 157 vsize_t max_size = physmem / 4; 158 #else 159 vsize_t max_size = MIN(physmem / 4, nkmempages / 4); 160 #endif 161 162 max_size = max_size * PAGE_SIZE / MCLBYTES; 163 #ifdef NMBCLUSTERS_MAX 164 max_size = MIN(max_size, NMBCLUSTERS_MAX); 165 #endif 166 167 #ifdef NMBCLUSTERS 168 return MIN(max_size, NMBCLUSTERS); 169 #else 170 return max_size; 171 #endif 172 } 173 174 /* 175 * Initialize the mbuf allocator. 176 */ 177 void 178 mbinit(void) 179 { 180 181 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 182 CTASSERT(sizeof(struct mbuf) == MSIZE); 183 184 sysctl_kern_mbuf_setup(); 185 186 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 187 NULL, IPL_VM, mb_ctor, NULL, NULL); 188 KASSERT(mb_cache != NULL); 189 190 mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", NULL, 191 IPL_VM, NULL, NULL, NULL); 192 KASSERT(mcl_cache != NULL); 193 194 pool_cache_set_drain_hook(mb_cache, mb_drain, NULL); 195 pool_cache_set_drain_hook(mcl_cache, mb_drain, NULL); 196 197 /* 198 * Set an arbitrary default limit on the number of mbuf clusters. 199 */ 200 #ifdef NMBCLUSTERS 201 nmbclusters = nmbclusters_limit(); 202 #else 203 nmbclusters = MAX(1024, 204 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 205 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 206 #endif 207 208 /* 209 * Set the hard limit on the mclpool to the number of 210 * mbuf clusters the kernel is to support. Log the limit 211 * reached message max once a minute. 212 */ 213 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 214 215 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 216 217 /* 218 * Set a low water mark for both mbufs and clusters. This should 219 * help ensure that they can be allocated in a memory starvation 220 * situation. This is important for e.g. diskless systems which 221 * must allocate mbufs in order for the pagedaemon to clean pages. 222 */ 223 pool_cache_setlowat(mb_cache, mblowat); 224 pool_cache_setlowat(mcl_cache, mcllowat); 225 226 #ifdef MBUFTRACE 227 { 228 /* 229 * Attach the unknown mowners. 230 */ 231 int i; 232 MOWNER_ATTACH(&revoked_mowner); 233 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 234 i-- > 0; ) 235 MOWNER_ATTACH(&unknown_mowners[i]); 236 } 237 #endif 238 } 239 240 static void 241 mb_drain(void *arg, int flags) 242 { 243 struct domain *dp; 244 const struct protosw *pr; 245 struct ifnet *ifp; 246 int s; 247 248 KERNEL_LOCK(1, NULL); 249 s = splvm(); 250 DOMAIN_FOREACH(dp) { 251 for (pr = dp->dom_protosw; 252 pr < dp->dom_protoswNPROTOSW; pr++) 253 if (pr->pr_drain) 254 (*pr->pr_drain)(); 255 } 256 /* XXX we cannot use psref in H/W interrupt */ 257 if (!cpu_intr_p()) { 258 int bound = curlwp_bind(); 259 IFNET_READER_FOREACH(ifp) { 260 struct psref psref; 261 262 if_acquire(ifp, &psref); 263 264 if (ifp->if_drain) 265 (*ifp->if_drain)(ifp); 266 267 if_release(ifp, &psref); 268 } 269 curlwp_bindx(bound); 270 } 271 splx(s); 272 mbstat.m_drain++; 273 KERNEL_UNLOCK_ONE(NULL); 274 } 275 276 /* 277 * sysctl helper routine for the kern.mbuf subtree. 278 * nmbclusters, mblowat and mcllowat need range 279 * checking and pool tweaking after being reset. 280 */ 281 static int 282 sysctl_kern_mbuf(SYSCTLFN_ARGS) 283 { 284 int error, newval; 285 struct sysctlnode node; 286 287 node = *rnode; 288 node.sysctl_data = &newval; 289 switch (rnode->sysctl_num) { 290 case MBUF_NMBCLUSTERS: 291 case MBUF_MBLOWAT: 292 case MBUF_MCLLOWAT: 293 newval = *(int*)rnode->sysctl_data; 294 break; 295 default: 296 return EOPNOTSUPP; 297 } 298 299 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 300 if (error || newp == NULL) 301 return error; 302 if (newval < 0) 303 return EINVAL; 304 305 switch (node.sysctl_num) { 306 case MBUF_NMBCLUSTERS: 307 if (newval < nmbclusters) 308 return EINVAL; 309 if (newval > nmbclusters_limit()) 310 return EINVAL; 311 nmbclusters = newval; 312 pool_cache_sethardlimit(mcl_cache, nmbclusters, 313 mclpool_warnmsg, 60); 314 break; 315 case MBUF_MBLOWAT: 316 mblowat = newval; 317 pool_cache_setlowat(mb_cache, mblowat); 318 break; 319 case MBUF_MCLLOWAT: 320 mcllowat = newval; 321 pool_cache_setlowat(mcl_cache, mcllowat); 322 break; 323 } 324 325 return 0; 326 } 327 328 #ifdef MBUFTRACE 329 static void 330 mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 331 { 332 struct mowner_counter *mc = v1; 333 struct mowner_user *mo_user = v2; 334 int i; 335 336 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 337 mo_user->mo_counter[i] += mc->mc_counter[i]; 338 } 339 } 340 341 static void 342 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 343 { 344 345 memset(mo_user, 0, sizeof(*mo_user)); 346 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 347 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 348 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 349 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 350 percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); 351 } 352 353 static int 354 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 355 { 356 struct mowner *mo; 357 size_t len = 0; 358 int error = 0; 359 360 if (namelen != 0) 361 return EINVAL; 362 if (newp != NULL) 363 return EPERM; 364 365 LIST_FOREACH(mo, &mowners, mo_link) { 366 struct mowner_user mo_user; 367 368 mowner_convert_to_user(mo, &mo_user); 369 370 if (oldp != NULL) { 371 if (*oldlenp - len < sizeof(mo_user)) { 372 error = ENOMEM; 373 break; 374 } 375 error = copyout(&mo_user, (char *)oldp + len, 376 sizeof(mo_user)); 377 if (error) 378 break; 379 } 380 len += sizeof(mo_user); 381 } 382 383 if (error == 0) 384 *oldlenp = len; 385 386 return error; 387 } 388 #endif /* MBUFTRACE */ 389 390 void 391 mbstat_type_add(int type, int diff) 392 { 393 struct mbstat_cpu *mb; 394 int s; 395 396 s = splvm(); 397 mb = percpu_getref(mbstat_percpu); 398 mb->m_mtypes[type] += diff; 399 percpu_putref(mbstat_percpu); 400 splx(s); 401 } 402 403 static void 404 mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 405 { 406 struct mbstat_cpu *mbsc = v1; 407 struct mbstat *mbs = v2; 408 int i; 409 410 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 411 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 412 } 413 } 414 415 static void 416 mbstat_convert_to_user(struct mbstat *mbs) 417 { 418 419 memset(mbs, 0, sizeof(*mbs)); 420 mbs->m_drain = mbstat.m_drain; 421 percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); 422 } 423 424 static int 425 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 426 { 427 struct sysctlnode node; 428 struct mbstat mbs; 429 430 mbstat_convert_to_user(&mbs); 431 node = *rnode; 432 node.sysctl_data = &mbs; 433 node.sysctl_size = sizeof(mbs); 434 return sysctl_lookup(SYSCTLFN_CALL(&node)); 435 } 436 437 static void 438 sysctl_kern_mbuf_setup(void) 439 { 440 441 KASSERT(mbuf_sysctllog == NULL); 442 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 443 CTLFLAG_PERMANENT, 444 CTLTYPE_NODE, "mbuf", 445 SYSCTL_DESCR("mbuf control variables"), 446 NULL, 0, NULL, 0, 447 CTL_KERN, KERN_MBUF, CTL_EOL); 448 449 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 450 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 451 CTLTYPE_INT, "msize", 452 SYSCTL_DESCR("mbuf base size"), 453 NULL, msize, NULL, 0, 454 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 455 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 456 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 457 CTLTYPE_INT, "mclbytes", 458 SYSCTL_DESCR("mbuf cluster size"), 459 NULL, mclbytes, NULL, 0, 460 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 461 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 462 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 463 CTLTYPE_INT, "nmbclusters", 464 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 465 sysctl_kern_mbuf, 0, &nmbclusters, 0, 466 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 467 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 468 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 469 CTLTYPE_INT, "mblowat", 470 SYSCTL_DESCR("mbuf low water mark"), 471 sysctl_kern_mbuf, 0, &mblowat, 0, 472 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 473 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 474 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 475 CTLTYPE_INT, "mcllowat", 476 SYSCTL_DESCR("mbuf cluster low water mark"), 477 sysctl_kern_mbuf, 0, &mcllowat, 0, 478 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 479 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 480 CTLFLAG_PERMANENT, 481 CTLTYPE_STRUCT, "stats", 482 SYSCTL_DESCR("mbuf allocation statistics"), 483 sysctl_kern_mbuf_stats, 0, NULL, 0, 484 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 485 #ifdef MBUFTRACE 486 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 487 CTLFLAG_PERMANENT, 488 CTLTYPE_STRUCT, "mowners", 489 SYSCTL_DESCR("Information about mbuf owners"), 490 sysctl_kern_mbuf_mowners, 0, NULL, 0, 491 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 492 #endif 493 } 494 495 static int 496 mb_ctor(void *arg, void *object, int flags) 497 { 498 struct mbuf *m = object; 499 500 #ifdef POOL_VTOPHYS 501 m->m_paddr = POOL_VTOPHYS(m); 502 #else 503 m->m_paddr = M_PADDR_INVALID; 504 #endif 505 return 0; 506 } 507 508 /* 509 * Add mbuf to the end of a chain 510 */ 511 struct mbuf * 512 m_add(struct mbuf *c, struct mbuf *m) 513 { 514 struct mbuf *n; 515 516 if (c == NULL) 517 return m; 518 519 for (n = c; n->m_next != NULL; n = n->m_next) 520 continue; 521 n->m_next = m; 522 return c; 523 } 524 525 struct mbuf * 526 m_get(int how, int type) 527 { 528 struct mbuf *m; 529 530 KASSERT(type != MT_FREE); 531 532 m = pool_cache_get(mb_cache, 533 how == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : PR_NOWAIT); 534 if (m == NULL) 535 return NULL; 536 537 mbstat_type_add(type, 1); 538 539 mowner_init(m, type); 540 m->m_ext_ref = m; /* default */ 541 m->m_type = type; 542 m->m_len = 0; 543 m->m_next = NULL; 544 m->m_nextpkt = NULL; /* default */ 545 m->m_data = m->m_dat; 546 m->m_flags = 0; /* default */ 547 548 return m; 549 } 550 551 struct mbuf * 552 m_gethdr(int how, int type) 553 { 554 struct mbuf *m; 555 556 m = m_get(how, type); 557 if (m == NULL) 558 return NULL; 559 560 m->m_data = m->m_pktdat; 561 m->m_flags = M_PKTHDR; 562 563 m_reset_rcvif(m); 564 m->m_pkthdr.len = 0; 565 m->m_pkthdr.csum_flags = 0; 566 m->m_pkthdr.csum_data = 0; 567 SLIST_INIT(&m->m_pkthdr.tags); 568 569 m->m_pkthdr.pattr_class = NULL; 570 m->m_pkthdr.pattr_af = AF_UNSPEC; 571 m->m_pkthdr.pattr_hdr = NULL; 572 573 return m; 574 } 575 576 void 577 m_clget(struct mbuf *m, int how) 578 { 579 m->m_ext_storage.ext_buf = (char *)pool_cache_get_paddr(mcl_cache, 580 how == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : PR_NOWAIT, 581 &m->m_ext_storage.ext_paddr); 582 583 if (m->m_ext_storage.ext_buf == NULL) 584 return; 585 586 MCLINITREFERENCE(m); 587 m->m_data = m->m_ext.ext_buf; 588 m->m_flags = (m->m_flags & ~M_EXTCOPYFLAGS) | 589 M_EXT|M_EXT_CLUSTER|M_EXT_RW; 590 m->m_ext.ext_size = MCLBYTES; 591 m->m_ext.ext_free = NULL; 592 m->m_ext.ext_arg = NULL; 593 /* ext_paddr initialized above */ 594 595 mowner_ref(m, M_EXT|M_EXT_CLUSTER); 596 } 597 598 /* 599 * Utility function for M_PREPEND. Do *NOT* use it directly. 600 */ 601 struct mbuf * 602 m_prepend(struct mbuf *m, int len, int how) 603 { 604 struct mbuf *mn; 605 606 if (__predict_false(len > MHLEN)) { 607 panic("%s: len > MHLEN", __func__); 608 } 609 610 KASSERT(len != M_COPYALL); 611 mn = m_get(how, m->m_type); 612 if (mn == NULL) { 613 m_freem(m); 614 return NULL; 615 } 616 617 if (m->m_flags & M_PKTHDR) { 618 M_MOVE_PKTHDR(mn, m); 619 } else { 620 MCLAIM(mn, m->m_owner); 621 } 622 mn->m_next = m; 623 m = mn; 624 625 if (m->m_flags & M_PKTHDR) { 626 if (len < MHLEN) 627 MH_ALIGN(m, len); 628 } else { 629 if (len < MLEN) 630 M_ALIGN(m, len); 631 } 632 633 m->m_len = len; 634 return m; 635 } 636 637 struct mbuf * 638 m_copym(struct mbuf *m, int off, int len, int wait) 639 { 640 /* Shallow copy on M_EXT. */ 641 return m_copy_internal(m, off, len, wait, false); 642 } 643 644 struct mbuf * 645 m_dup(struct mbuf *m, int off, int len, int wait) 646 { 647 /* Deep copy. */ 648 return m_copy_internal(m, off, len, wait, true); 649 } 650 651 static inline int 652 m_copylen(int len, int copylen) 653 { 654 return (len == M_COPYALL) ? copylen : min(len, copylen); 655 } 656 657 static struct mbuf * 658 m_copy_internal(struct mbuf *m, int off0, int len, int wait, bool deep) 659 { 660 struct mbuf *n, **np; 661 int off = off0; 662 struct mbuf *top; 663 int copyhdr = 0; 664 665 if (off < 0 || (len != M_COPYALL && len < 0)) 666 panic("%s: off %d, len %d", __func__, off, len); 667 if (off == 0 && m->m_flags & M_PKTHDR) 668 copyhdr = 1; 669 while (off > 0) { 670 if (m == NULL) 671 panic("%s: m == NULL, off %d", __func__, off); 672 if (off < m->m_len) 673 break; 674 off -= m->m_len; 675 m = m->m_next; 676 } 677 678 np = ⊤ 679 top = NULL; 680 while (len == M_COPYALL || len > 0) { 681 if (m == NULL) { 682 if (len != M_COPYALL) 683 panic("%s: m == NULL, len %d [!COPYALL]", 684 __func__, len); 685 break; 686 } 687 688 n = m_get(wait, m->m_type); 689 *np = n; 690 if (n == NULL) 691 goto nospace; 692 MCLAIM(n, m->m_owner); 693 694 if (copyhdr) { 695 M_COPY_PKTHDR(n, m); 696 if (len == M_COPYALL) 697 n->m_pkthdr.len -= off0; 698 else 699 n->m_pkthdr.len = len; 700 copyhdr = 0; 701 } 702 n->m_len = m_copylen(len, m->m_len - off); 703 704 if (m->m_flags & M_EXT) { 705 if (!deep) { 706 n->m_data = m->m_data + off; 707 MCLADDREFERENCE(m, n); 708 } else { 709 /* 710 * We don't care if MCLGET fails. n->m_len is 711 * recomputed and handles that. 712 */ 713 MCLGET(n, wait); 714 n->m_len = 0; 715 n->m_len = M_TRAILINGSPACE(n); 716 n->m_len = m_copylen(len, n->m_len); 717 n->m_len = min(n->m_len, m->m_len - off); 718 memcpy(mtod(n, void *), mtod(m, char *) + off, 719 (unsigned)n->m_len); 720 } 721 } else { 722 memcpy(mtod(n, void *), mtod(m, char *) + off, 723 (unsigned)n->m_len); 724 } 725 726 if (len != M_COPYALL) 727 len -= n->m_len; 728 off += n->m_len; 729 730 KASSERT(off <= m->m_len); 731 732 if (off == m->m_len) { 733 m = m->m_next; 734 off = 0; 735 } 736 np = &n->m_next; 737 } 738 739 return top; 740 741 nospace: 742 m_freem(top); 743 return NULL; 744 } 745 746 /* 747 * Copy an entire packet, including header (which must be present). 748 * An optimization of the common case 'm_copym(m, 0, M_COPYALL, how)'. 749 */ 750 struct mbuf * 751 m_copypacket(struct mbuf *m, int how) 752 { 753 struct mbuf *top, *n, *o; 754 755 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { 756 panic("%s: no header (m = %p)", __func__, m); 757 } 758 759 n = m_get(how, m->m_type); 760 top = n; 761 if (!n) 762 goto nospace; 763 764 MCLAIM(n, m->m_owner); 765 M_COPY_PKTHDR(n, m); 766 n->m_len = m->m_len; 767 if (m->m_flags & M_EXT) { 768 n->m_data = m->m_data; 769 MCLADDREFERENCE(m, n); 770 } else { 771 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 772 } 773 774 m = m->m_next; 775 while (m) { 776 o = m_get(how, m->m_type); 777 if (!o) 778 goto nospace; 779 780 MCLAIM(o, m->m_owner); 781 n->m_next = o; 782 n = n->m_next; 783 784 n->m_len = m->m_len; 785 if (m->m_flags & M_EXT) { 786 n->m_data = m->m_data; 787 MCLADDREFERENCE(m, n); 788 } else { 789 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 790 } 791 792 m = m->m_next; 793 } 794 return top; 795 796 nospace: 797 m_freem(top); 798 return NULL; 799 } 800 801 void 802 m_copydata(struct mbuf *m, int off, int len, void *cp) 803 { 804 unsigned int count; 805 struct mbuf *m0 = m; 806 int len0 = len; 807 int off0 = off; 808 void *cp0 = cp; 809 810 KASSERT(len != M_COPYALL); 811 if (off < 0 || len < 0) 812 panic("m_copydata: off %d, len %d", off, len); 813 while (off > 0) { 814 if (m == NULL) 815 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)", 816 m0, len0, off0, cp0, off, off0 - off); 817 if (off < m->m_len) 818 break; 819 off -= m->m_len; 820 m = m->m_next; 821 } 822 while (len > 0) { 823 if (m == NULL) 824 panic("m_copydata(%p,%d,%d,%p): " 825 "m=NULL, off=%d (%d), len=%d (%d)", 826 m0, len0, off0, cp0, 827 off, off0 - off, len, len0 - len); 828 count = min(m->m_len - off, len); 829 memcpy(cp, mtod(m, char *) + off, count); 830 len -= count; 831 cp = (char *)cp + count; 832 off = 0; 833 m = m->m_next; 834 } 835 } 836 837 /* 838 * Concatenate mbuf chain n to m. 839 * n might be copied into m (when n->m_len is small), therefore data portion of 840 * n could be copied into an mbuf of different mbuf type. 841 * Any m_pkthdr is not updated. 842 */ 843 void 844 m_cat(struct mbuf *m, struct mbuf *n) 845 { 846 847 while (m->m_next) 848 m = m->m_next; 849 while (n) { 850 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 851 /* just join the two chains */ 852 m->m_next = n; 853 return; 854 } 855 /* splat the data from one into the other */ 856 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 857 (u_int)n->m_len); 858 m->m_len += n->m_len; 859 n = m_free(n); 860 } 861 } 862 863 void 864 m_adj(struct mbuf *mp, int req_len) 865 { 866 int len = req_len; 867 struct mbuf *m; 868 int count; 869 870 if ((m = mp) == NULL) 871 return; 872 if (len >= 0) { 873 /* 874 * Trim from head. 875 */ 876 while (m != NULL && len > 0) { 877 if (m->m_len <= len) { 878 len -= m->m_len; 879 m->m_len = 0; 880 m = m->m_next; 881 } else { 882 m->m_len -= len; 883 m->m_data += len; 884 len = 0; 885 } 886 } 887 if (mp->m_flags & M_PKTHDR) 888 mp->m_pkthdr.len -= (req_len - len); 889 } else { 890 /* 891 * Trim from tail. Scan the mbuf chain, 892 * calculating its length and finding the last mbuf. 893 * If the adjustment only affects this mbuf, then just 894 * adjust and return. Otherwise, rescan and truncate 895 * after the remaining size. 896 */ 897 len = -len; 898 count = 0; 899 for (;;) { 900 count += m->m_len; 901 if (m->m_next == NULL) 902 break; 903 m = m->m_next; 904 } 905 if (m->m_len >= len) { 906 m->m_len -= len; 907 if (mp->m_flags & M_PKTHDR) 908 mp->m_pkthdr.len -= len; 909 return; 910 } 911 912 count -= len; 913 if (count < 0) 914 count = 0; 915 916 /* 917 * Correct length for chain is "count". 918 * Find the mbuf with last data, adjust its length, 919 * and toss data from remaining mbufs on chain. 920 */ 921 m = mp; 922 if (m->m_flags & M_PKTHDR) 923 m->m_pkthdr.len = count; 924 for (; m; m = m->m_next) { 925 if (m->m_len >= count) { 926 m->m_len = count; 927 break; 928 } 929 count -= m->m_len; 930 } 931 if (m) { 932 while (m->m_next) 933 (m = m->m_next)->m_len = 0; 934 } 935 } 936 } 937 938 /* 939 * m_ensure_contig: rearrange an mbuf chain that given length of bytes 940 * would be contiguous and in the data area of an mbuf (therefore, mtod() 941 * would work for a structure of given length). 942 * 943 * => On success, returns true and the resulting mbuf chain; false otherwise. 944 * => The mbuf chain may change, but is always preserved valid. 945 */ 946 bool 947 m_ensure_contig(struct mbuf **m0, int len) 948 { 949 struct mbuf *n = *m0, *m; 950 size_t count, space; 951 952 KASSERT(len != M_COPYALL); 953 /* 954 * If first mbuf has no cluster, and has room for len bytes 955 * without shifting current data, pullup into it, 956 * otherwise allocate a new mbuf to prepend to the chain. 957 */ 958 if ((n->m_flags & M_EXT) == 0 && 959 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 960 if (n->m_len >= len) { 961 return true; 962 } 963 m = n; 964 n = n->m_next; 965 len -= m->m_len; 966 } else { 967 if (len > MHLEN) { 968 return false; 969 } 970 m = m_get(M_DONTWAIT, n->m_type); 971 if (m == NULL) { 972 return false; 973 } 974 MCLAIM(m, n->m_owner); 975 if (n->m_flags & M_PKTHDR) { 976 M_MOVE_PKTHDR(m, n); 977 } 978 } 979 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 980 do { 981 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len); 982 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 983 (unsigned)count); 984 len -= count; 985 m->m_len += count; 986 n->m_len -= count; 987 space -= count; 988 if (n->m_len) 989 n->m_data += count; 990 else 991 n = m_free(n); 992 } while (len > 0 && n); 993 994 m->m_next = n; 995 *m0 = m; 996 997 return len <= 0; 998 } 999 1000 /* 1001 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error. 1002 */ 1003 struct mbuf * 1004 m_pullup(struct mbuf *n, int len) 1005 { 1006 struct mbuf *m = n; 1007 1008 KASSERT(len != M_COPYALL); 1009 if (!m_ensure_contig(&m, len)) { 1010 KASSERT(m != NULL); 1011 m_freem(m); 1012 m = NULL; 1013 } 1014 return m; 1015 } 1016 1017 /* 1018 * Like m_pullup(), except a new mbuf is always allocated, and we allow 1019 * the amount of empty space before the data in the new mbuf to be specified 1020 * (in the event that the caller expects to prepend later). 1021 */ 1022 struct mbuf * 1023 m_copyup(struct mbuf *n, int len, int dstoff) 1024 { 1025 struct mbuf *m; 1026 int count, space; 1027 1028 KASSERT(len != M_COPYALL); 1029 if (len > ((int)MHLEN - dstoff)) 1030 goto bad; 1031 m = m_get(M_DONTWAIT, n->m_type); 1032 if (m == NULL) 1033 goto bad; 1034 MCLAIM(m, n->m_owner); 1035 if (n->m_flags & M_PKTHDR) { 1036 M_MOVE_PKTHDR(m, n); 1037 } 1038 m->m_data += dstoff; 1039 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1040 do { 1041 count = min(min(max(len, max_protohdr), space), n->m_len); 1042 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1043 (unsigned)count); 1044 len -= count; 1045 m->m_len += count; 1046 n->m_len -= count; 1047 space -= count; 1048 if (n->m_len) 1049 n->m_data += count; 1050 else 1051 n = m_free(n); 1052 } while (len > 0 && n); 1053 if (len > 0) { 1054 (void) m_free(m); 1055 goto bad; 1056 } 1057 m->m_next = n; 1058 return m; 1059 bad: 1060 m_freem(n); 1061 return NULL; 1062 } 1063 1064 struct mbuf * 1065 m_split(struct mbuf *m0, int len, int wait) 1066 { 1067 return m_split_internal(m0, len, wait, true); 1068 } 1069 1070 static struct mbuf * 1071 m_split_internal(struct mbuf *m0, int len0, int wait, bool copyhdr) 1072 { 1073 struct mbuf *m, *n; 1074 unsigned len = len0, remain, len_save; 1075 1076 KASSERT(len0 != M_COPYALL); 1077 for (m = m0; m && len > m->m_len; m = m->m_next) 1078 len -= m->m_len; 1079 if (m == NULL) 1080 return NULL; 1081 1082 remain = m->m_len - len; 1083 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1084 n = m_gethdr(wait, m0->m_type); 1085 if (n == NULL) 1086 return NULL; 1087 1088 MCLAIM(n, m0->m_owner); 1089 m_copy_rcvif(n, m0); 1090 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1091 len_save = m0->m_pkthdr.len; 1092 m0->m_pkthdr.len = len0; 1093 1094 if (m->m_flags & M_EXT) 1095 goto extpacket; 1096 1097 if (remain > MHLEN) { 1098 /* m can't be the lead packet */ 1099 MH_ALIGN(n, 0); 1100 n->m_len = 0; 1101 n->m_next = m_split(m, len, wait); 1102 if (n->m_next == NULL) { 1103 (void)m_free(n); 1104 m0->m_pkthdr.len = len_save; 1105 return NULL; 1106 } 1107 return n; 1108 } else { 1109 MH_ALIGN(n, remain); 1110 } 1111 } else if (remain == 0) { 1112 n = m->m_next; 1113 m->m_next = NULL; 1114 return n; 1115 } else { 1116 n = m_get(wait, m->m_type); 1117 if (n == NULL) 1118 return NULL; 1119 MCLAIM(n, m->m_owner); 1120 M_ALIGN(n, remain); 1121 } 1122 1123 extpacket: 1124 if (m->m_flags & M_EXT) { 1125 n->m_data = m->m_data + len; 1126 MCLADDREFERENCE(m, n); 1127 } else { 1128 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1129 } 1130 1131 n->m_len = remain; 1132 m->m_len = len; 1133 n->m_next = m->m_next; 1134 m->m_next = NULL; 1135 return n; 1136 } 1137 1138 /* 1139 * Routine to copy from device local memory into mbufs. 1140 */ 1141 struct mbuf * 1142 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1143 void (*copy)(const void *from, void *to, size_t len)) 1144 { 1145 struct mbuf *m; 1146 struct mbuf *top = NULL, **mp = ⊤ 1147 int off = off0, len; 1148 char *cp, *epkt; 1149 1150 cp = buf; 1151 epkt = cp + totlen; 1152 if (off) { 1153 /* 1154 * If 'off' is non-zero, packet is trailer-encapsulated, 1155 * so we have to skip the type and length fields. 1156 */ 1157 cp += off + 2 * sizeof(uint16_t); 1158 totlen -= 2 * sizeof(uint16_t); 1159 } 1160 1161 m = m_gethdr(M_DONTWAIT, MT_DATA); 1162 if (m == NULL) 1163 return NULL; 1164 m_set_rcvif(m, ifp); 1165 m->m_pkthdr.len = totlen; 1166 m->m_len = MHLEN; 1167 1168 while (totlen > 0) { 1169 if (top) { 1170 m = m_get(M_DONTWAIT, MT_DATA); 1171 if (m == NULL) { 1172 m_freem(top); 1173 return NULL; 1174 } 1175 m->m_len = MLEN; 1176 } 1177 1178 len = min(totlen, epkt - cp); 1179 1180 if (len >= MINCLSIZE) { 1181 MCLGET(m, M_DONTWAIT); 1182 if ((m->m_flags & M_EXT) == 0) { 1183 m_free(m); 1184 m_freem(top); 1185 return NULL; 1186 } 1187 m->m_len = len = min(len, MCLBYTES); 1188 } else { 1189 /* 1190 * Place initial small packet/header at end of mbuf. 1191 */ 1192 if (len < m->m_len) { 1193 if (top == 0 && len + max_linkhdr <= m->m_len) 1194 m->m_data += max_linkhdr; 1195 m->m_len = len; 1196 } else 1197 len = m->m_len; 1198 } 1199 1200 if (copy) 1201 copy(cp, mtod(m, void *), (size_t)len); 1202 else 1203 memcpy(mtod(m, void *), cp, (size_t)len); 1204 1205 cp += len; 1206 *mp = m; 1207 mp = &m->m_next; 1208 totlen -= len; 1209 if (cp == epkt) 1210 cp = buf; 1211 } 1212 1213 return top; 1214 } 1215 1216 /* 1217 * Copy data from a buffer back into the indicated mbuf chain, 1218 * starting "off" bytes from the beginning, extending the mbuf 1219 * chain if necessary. 1220 */ 1221 void 1222 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1223 { 1224 #if defined(DEBUG) 1225 struct mbuf *origm = m0; 1226 int error; 1227 #endif 1228 1229 if (m0 == NULL) 1230 return; 1231 1232 #if defined(DEBUG) 1233 error = 1234 #endif 1235 m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_EXTEND, 1236 M_DONTWAIT); 1237 1238 #if defined(DEBUG) 1239 if (error != 0 || (m0 != NULL && origm != m0)) 1240 panic("m_copyback"); 1241 #endif 1242 } 1243 1244 struct mbuf * 1245 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1246 { 1247 int error; 1248 1249 /* don't support chain expansion */ 1250 KASSERT(len != M_COPYALL); 1251 KDASSERT(off + len <= m_length(m0)); 1252 1253 error = m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_COW, 1254 how); 1255 if (error) { 1256 /* 1257 * no way to recover from partial success. 1258 * just free the chain. 1259 */ 1260 m_freem(m0); 1261 return NULL; 1262 } 1263 return m0; 1264 } 1265 1266 int 1267 m_makewritable(struct mbuf **mp, int off, int len, int how) 1268 { 1269 int error; 1270 #if defined(DEBUG) 1271 int origlen = m_length(*mp); 1272 #endif 1273 1274 error = m_copyback_internal(mp, off, len, NULL, CB_PRESERVE|CB_COW, 1275 how); 1276 if (error) 1277 return error; 1278 1279 #if defined(DEBUG) 1280 int reslen = 0; 1281 for (struct mbuf *n = *mp; n; n = n->m_next) 1282 reslen += n->m_len; 1283 if (origlen != reslen) 1284 panic("m_makewritable: length changed"); 1285 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1286 panic("m_makewritable: inconsist"); 1287 #endif 1288 1289 return 0; 1290 } 1291 1292 static int 1293 m_copyback_internal(struct mbuf **mp0, int off, int len, const void *vp, 1294 int flags, int how) 1295 { 1296 int mlen; 1297 struct mbuf *m, *n; 1298 struct mbuf **mp; 1299 int totlen = 0; 1300 const char *cp = vp; 1301 1302 KASSERT(mp0 != NULL); 1303 KASSERT(*mp0 != NULL); 1304 KASSERT((flags & CB_PRESERVE) == 0 || cp == NULL); 1305 KASSERT((flags & CB_COPYBACK) == 0 || cp != NULL); 1306 1307 if (len == M_COPYALL) 1308 len = m_length(*mp0) - off; 1309 1310 /* 1311 * we don't bother to update "totlen" in the case of CB_COW, 1312 * assuming that CB_EXTEND and CB_COW are exclusive. 1313 */ 1314 1315 KASSERT((~flags & (CB_EXTEND|CB_COW)) != 0); 1316 1317 mp = mp0; 1318 m = *mp; 1319 while (off > (mlen = m->m_len)) { 1320 off -= mlen; 1321 totlen += mlen; 1322 if (m->m_next == NULL) { 1323 int tspace; 1324 extend: 1325 if ((flags & CB_EXTEND) == 0) 1326 goto out; 1327 1328 /* 1329 * try to make some space at the end of "m". 1330 */ 1331 1332 mlen = m->m_len; 1333 if (off + len >= MINCLSIZE && 1334 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1335 MCLGET(m, how); 1336 } 1337 tspace = M_TRAILINGSPACE(m); 1338 if (tspace > 0) { 1339 tspace = min(tspace, off + len); 1340 KASSERT(tspace > 0); 1341 memset(mtod(m, char *) + m->m_len, 0, 1342 min(off, tspace)); 1343 m->m_len += tspace; 1344 off += mlen; 1345 totlen -= mlen; 1346 continue; 1347 } 1348 1349 /* 1350 * need to allocate an mbuf. 1351 */ 1352 1353 if (off + len >= MINCLSIZE) { 1354 n = m_getcl(how, m->m_type, 0); 1355 } else { 1356 n = m_get(how, m->m_type); 1357 } 1358 if (n == NULL) { 1359 goto out; 1360 } 1361 n->m_len = min(M_TRAILINGSPACE(n), off + len); 1362 memset(mtod(n, char *), 0, min(n->m_len, off)); 1363 m->m_next = n; 1364 } 1365 mp = &m->m_next; 1366 m = m->m_next; 1367 } 1368 while (len > 0) { 1369 mlen = m->m_len - off; 1370 if (mlen != 0 && M_READONLY(m)) { 1371 /* 1372 * This mbuf is read-only. Allocate a new writable 1373 * mbuf and try again. 1374 */ 1375 char *datap; 1376 int eatlen; 1377 1378 KASSERT((flags & CB_COW) != 0); 1379 1380 /* 1381 * if we're going to write into the middle of 1382 * a mbuf, split it first. 1383 */ 1384 if (off > 0) { 1385 n = m_split_internal(m, off, how, false); 1386 if (n == NULL) 1387 goto enobufs; 1388 m->m_next = n; 1389 mp = &m->m_next; 1390 m = n; 1391 off = 0; 1392 continue; 1393 } 1394 1395 /* 1396 * XXX TODO coalesce into the trailingspace of 1397 * the previous mbuf when possible. 1398 */ 1399 1400 /* 1401 * allocate a new mbuf. copy packet header if needed. 1402 */ 1403 n = m_get(how, m->m_type); 1404 if (n == NULL) 1405 goto enobufs; 1406 MCLAIM(n, m->m_owner); 1407 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1408 M_MOVE_PKTHDR(n, m); 1409 n->m_len = MHLEN; 1410 } else { 1411 if (len >= MINCLSIZE) 1412 MCLGET(n, M_DONTWAIT); 1413 n->m_len = 1414 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1415 } 1416 if (n->m_len > len) 1417 n->m_len = len; 1418 1419 /* 1420 * free the region which has been overwritten. 1421 * copying data from old mbufs if requested. 1422 */ 1423 if (flags & CB_PRESERVE) 1424 datap = mtod(n, char *); 1425 else 1426 datap = NULL; 1427 eatlen = n->m_len; 1428 while (m != NULL && M_READONLY(m) && 1429 n->m_type == m->m_type && eatlen > 0) { 1430 mlen = min(eatlen, m->m_len); 1431 if (datap) { 1432 m_copydata(m, 0, mlen, datap); 1433 datap += mlen; 1434 } 1435 m->m_data += mlen; 1436 m->m_len -= mlen; 1437 eatlen -= mlen; 1438 if (m->m_len == 0) 1439 *mp = m = m_free(m); 1440 } 1441 if (eatlen > 0) 1442 n->m_len -= eatlen; 1443 n->m_next = m; 1444 *mp = m = n; 1445 continue; 1446 } 1447 mlen = min(mlen, len); 1448 if (flags & CB_COPYBACK) { 1449 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1450 cp += mlen; 1451 } 1452 len -= mlen; 1453 mlen += off; 1454 off = 0; 1455 totlen += mlen; 1456 if (len == 0) 1457 break; 1458 if (m->m_next == NULL) { 1459 goto extend; 1460 } 1461 mp = &m->m_next; 1462 m = m->m_next; 1463 } 1464 1465 out: 1466 if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1467 KASSERT((flags & CB_EXTEND) != 0); 1468 m->m_pkthdr.len = totlen; 1469 } 1470 1471 return 0; 1472 1473 enobufs: 1474 return ENOBUFS; 1475 } 1476 1477 /* 1478 * Compress the mbuf chain. Return the new mbuf chain on success, NULL on 1479 * failure. The first mbuf is preserved, and on success the pointer returned 1480 * is the same as the one passed. 1481 */ 1482 struct mbuf * 1483 m_defrag(struct mbuf *m, int how) 1484 { 1485 struct mbuf *m0, *mn, *n; 1486 int sz; 1487 1488 KASSERT((m->m_flags & M_PKTHDR) != 0); 1489 1490 if (m->m_next == NULL) 1491 return m; 1492 1493 m0 = m_get(how, MT_DATA); 1494 if (m0 == NULL) 1495 return NULL; 1496 mn = m0; 1497 1498 sz = m->m_pkthdr.len - m->m_len; 1499 KASSERT(sz >= 0); 1500 1501 do { 1502 if (sz > MLEN) { 1503 MCLGET(mn, how); 1504 if ((mn->m_flags & M_EXT) == 0) { 1505 m_freem(m0); 1506 return NULL; 1507 } 1508 } 1509 1510 mn->m_len = MIN(sz, MCLBYTES); 1511 1512 m_copydata(m, m->m_pkthdr.len - sz, mn->m_len, 1513 mtod(mn, void *)); 1514 1515 sz -= mn->m_len; 1516 1517 if (sz > 0) { 1518 /* need more mbufs */ 1519 n = m_get(how, MT_DATA); 1520 if (n == NULL) { 1521 m_freem(m0); 1522 return NULL; 1523 } 1524 1525 mn->m_next = n; 1526 mn = n; 1527 } 1528 } while (sz > 0); 1529 1530 m_freem(m->m_next); 1531 m->m_next = m0; 1532 1533 return m; 1534 } 1535 1536 void 1537 m_remove_pkthdr(struct mbuf *m) 1538 { 1539 KASSERT(m->m_flags & M_PKTHDR); 1540 1541 m_tag_delete_chain(m, NULL); 1542 m->m_flags &= ~M_PKTHDR; 1543 memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr)); 1544 } 1545 1546 void 1547 m_copy_pkthdr(struct mbuf *to, struct mbuf *from) 1548 { 1549 KASSERT((from->m_flags & M_PKTHDR) != 0); 1550 1551 to->m_pkthdr = from->m_pkthdr; 1552 to->m_flags = from->m_flags & M_COPYFLAGS; 1553 SLIST_INIT(&to->m_pkthdr.tags); 1554 m_tag_copy_chain(to, from); 1555 to->m_data = to->m_pktdat; 1556 } 1557 1558 void 1559 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1560 { 1561 1562 KASSERT((to->m_flags & M_EXT) == 0); 1563 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL); 1564 KASSERT((from->m_flags & M_PKTHDR) != 0); 1565 1566 to->m_pkthdr = from->m_pkthdr; 1567 to->m_flags = from->m_flags & M_COPYFLAGS; 1568 to->m_data = to->m_pktdat; 1569 1570 from->m_flags &= ~M_PKTHDR; 1571 } 1572 1573 /* 1574 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1575 * beginning, continuing for "len" bytes. 1576 */ 1577 int 1578 m_apply(struct mbuf *m, int off, int len, 1579 int (*f)(void *, void *, unsigned int), void *arg) 1580 { 1581 unsigned int count; 1582 int rval; 1583 1584 KASSERT(len != M_COPYALL); 1585 KASSERT(len >= 0); 1586 KASSERT(off >= 0); 1587 1588 while (off > 0) { 1589 KASSERT(m != NULL); 1590 if (off < m->m_len) 1591 break; 1592 off -= m->m_len; 1593 m = m->m_next; 1594 } 1595 while (len > 0) { 1596 KASSERT(m != NULL); 1597 count = min(m->m_len - off, len); 1598 1599 rval = (*f)(arg, mtod(m, char *) + off, count); 1600 if (rval) 1601 return rval; 1602 1603 len -= count; 1604 off = 0; 1605 m = m->m_next; 1606 } 1607 1608 return 0; 1609 } 1610 1611 /* 1612 * Return a pointer to mbuf/offset of location in mbuf chain. 1613 */ 1614 struct mbuf * 1615 m_getptr(struct mbuf *m, int loc, int *off) 1616 { 1617 1618 while (loc >= 0) { 1619 /* Normal end of search */ 1620 if (m->m_len > loc) { 1621 *off = loc; 1622 return m; 1623 } 1624 1625 loc -= m->m_len; 1626 1627 if (m->m_next == NULL) { 1628 if (loc == 0) { 1629 /* Point at the end of valid data */ 1630 *off = m->m_len; 1631 return m; 1632 } 1633 return NULL; 1634 } else { 1635 m = m->m_next; 1636 } 1637 } 1638 1639 return NULL; 1640 } 1641 1642 #if defined(DDB) 1643 void 1644 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 1645 { 1646 char ch; 1647 bool opt_c = false; 1648 char buf[512]; 1649 1650 while ((ch = *(modif++)) != '\0') { 1651 switch (ch) { 1652 case 'c': 1653 opt_c = true; 1654 break; 1655 } 1656 } 1657 1658 nextchain: 1659 (*pr)("MBUF %p\n", m); 1660 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 1661 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", 1662 m->m_data, m->m_len, m->m_type, buf); 1663 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 1664 m->m_nextpkt); 1665 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 1666 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 1667 (int)M_READONLY(m)); 1668 if ((m->m_flags & M_PKTHDR) != 0) { 1669 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 1670 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=%s, csum_data=0x%" 1671 PRIx32 ", segsz=%u\n", 1672 m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m), 1673 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 1674 } 1675 if ((m->m_flags & M_EXT)) { 1676 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 1677 "ext_free=%p, ext_arg=%p\n", 1678 m->m_ext.ext_refcnt, 1679 m->m_ext.ext_buf, m->m_ext.ext_size, 1680 m->m_ext.ext_free, m->m_ext.ext_arg); 1681 } 1682 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 1683 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 1684 vaddr_t eva = sva + m->m_ext.ext_size; 1685 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 1686 int i; 1687 1688 (*pr)(" pages:"); 1689 for (i = 0; i < n; i ++) { 1690 (*pr)(" %p", m->m_ext.ext_pgs[i]); 1691 } 1692 (*pr)("\n"); 1693 } 1694 1695 if (opt_c) { 1696 m = m->m_next; 1697 if (m != NULL) { 1698 goto nextchain; 1699 } 1700 } 1701 } 1702 #endif /* defined(DDB) */ 1703 1704 #if defined(MBUFTRACE) 1705 void 1706 mowner_attach(struct mowner *mo) 1707 { 1708 1709 KASSERT(mo->mo_counters == NULL); 1710 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 1711 1712 /* XXX lock */ 1713 LIST_INSERT_HEAD(&mowners, mo, mo_link); 1714 } 1715 1716 void 1717 mowner_detach(struct mowner *mo) 1718 { 1719 1720 KASSERT(mo->mo_counters != NULL); 1721 1722 /* XXX lock */ 1723 LIST_REMOVE(mo, mo_link); 1724 1725 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 1726 mo->mo_counters = NULL; 1727 } 1728 1729 void 1730 mowner_init(struct mbuf *m, int type) 1731 { 1732 struct mowner_counter *mc; 1733 struct mowner *mo; 1734 int s; 1735 1736 m->m_owner = mo = &unknown_mowners[type]; 1737 s = splvm(); 1738 mc = percpu_getref(mo->mo_counters); 1739 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1740 percpu_putref(mo->mo_counters); 1741 splx(s); 1742 } 1743 1744 void 1745 mowner_ref(struct mbuf *m, int flags) 1746 { 1747 struct mowner *mo = m->m_owner; 1748 struct mowner_counter *mc; 1749 int s; 1750 1751 s = splvm(); 1752 mc = percpu_getref(mo->mo_counters); 1753 if ((flags & M_EXT) != 0) 1754 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1755 if ((flags & M_EXT_CLUSTER) != 0) 1756 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1757 percpu_putref(mo->mo_counters); 1758 splx(s); 1759 } 1760 1761 void 1762 mowner_revoke(struct mbuf *m, bool all, int flags) 1763 { 1764 struct mowner *mo = m->m_owner; 1765 struct mowner_counter *mc; 1766 int s; 1767 1768 s = splvm(); 1769 mc = percpu_getref(mo->mo_counters); 1770 if ((flags & M_EXT) != 0) 1771 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 1772 if ((flags & M_EXT_CLUSTER) != 0) 1773 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 1774 if (all) 1775 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 1776 percpu_putref(mo->mo_counters); 1777 splx(s); 1778 if (all) 1779 m->m_owner = &revoked_mowner; 1780 } 1781 1782 static void 1783 mowner_claim(struct mbuf *m, struct mowner *mo) 1784 { 1785 struct mowner_counter *mc; 1786 int flags = m->m_flags; 1787 int s; 1788 1789 s = splvm(); 1790 mc = percpu_getref(mo->mo_counters); 1791 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1792 if ((flags & M_EXT) != 0) 1793 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1794 if ((flags & M_EXT_CLUSTER) != 0) 1795 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1796 percpu_putref(mo->mo_counters); 1797 splx(s); 1798 m->m_owner = mo; 1799 } 1800 1801 void 1802 m_claim(struct mbuf *m, struct mowner *mo) 1803 { 1804 1805 if (m->m_owner == mo || mo == NULL) 1806 return; 1807 1808 mowner_revoke(m, true, m->m_flags); 1809 mowner_claim(m, mo); 1810 } 1811 1812 void 1813 m_claimm(struct mbuf *m, struct mowner *mo) 1814 { 1815 1816 for (; m != NULL; m = m->m_next) 1817 m_claim(m, mo); 1818 } 1819 #endif /* defined(MBUFTRACE) */ 1820 1821 #ifdef DIAGNOSTIC 1822 /* 1823 * Verify that the mbuf chain is not malformed. Used only for diagnostic. 1824 * Panics on error. 1825 */ 1826 void 1827 m_verify_packet(struct mbuf *m) 1828 { 1829 struct mbuf *n = m; 1830 char *low, *high, *dat; 1831 int totlen = 0, len; 1832 1833 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { 1834 panic("%s: mbuf doesn't have M_PKTHDR", __func__); 1835 } 1836 1837 while (n != NULL) { 1838 if (__predict_false(n->m_type == MT_FREE)) { 1839 panic("%s: mbuf already freed (n = %p)", __func__, n); 1840 } 1841 #if 0 1842 /* 1843 * This ought to be a rule of the mbuf API. Unfortunately, 1844 * many places don't respect that rule. 1845 */ 1846 if (__predict_false((n != m) && (n->m_flags & M_PKTHDR) != 0)) { 1847 panic("%s: M_PKTHDR set on secondary mbuf", __func__); 1848 } 1849 #endif 1850 if (__predict_false(n->m_nextpkt != NULL)) { 1851 panic("%s: m_nextpkt not null (m_nextpkt = %p)", 1852 __func__, n->m_nextpkt); 1853 } 1854 1855 dat = n->m_data; 1856 len = n->m_len; 1857 1858 if (n->m_flags & M_EXT) { 1859 low = n->m_ext.ext_buf; 1860 high = low + n->m_ext.ext_size; 1861 } else if (n->m_flags & M_PKTHDR) { 1862 low = n->m_pktdat; 1863 high = low + MHLEN; 1864 } else { 1865 low = n->m_dat; 1866 high = low + MLEN; 1867 } 1868 if (__predict_false(dat + len < dat)) { 1869 panic("%s: incorrect length (len = %d)", __func__, len); 1870 } 1871 if (__predict_false((dat < low) || (dat + len > high))) { 1872 panic("%s: m_data not in packet" 1873 "(dat = %p, len = %d, low = %p, high = %p)", 1874 __func__, dat, len, low, high); 1875 } 1876 1877 totlen += len; 1878 n = n->m_next; 1879 } 1880 1881 if (__predict_false(totlen != m->m_pkthdr.len)) { 1882 panic("%s: inconsistent mbuf length (%d != %d)", __func__, 1883 totlen, m->m_pkthdr.len); 1884 } 1885 } 1886 #endif 1887 1888 /* 1889 * Release a reference to the mbuf external storage. 1890 * 1891 * => free the mbuf m itself as well. 1892 */ 1893 static void 1894 m_ext_free(struct mbuf *m) 1895 { 1896 const bool embedded = MEXT_ISEMBEDDED(m); 1897 bool dofree = true; 1898 u_int refcnt; 1899 1900 KASSERT((m->m_flags & M_EXT) != 0); 1901 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1902 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1903 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1904 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1905 1906 if (__predict_false(m->m_type == MT_FREE)) { 1907 panic("mbuf %p already freed", m); 1908 } 1909 1910 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1911 refcnt = m->m_ext.ext_refcnt = 0; 1912 } else { 1913 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1914 } 1915 1916 if (refcnt > 0) { 1917 if (embedded) { 1918 /* 1919 * other mbuf's m_ext_ref still points to us. 1920 */ 1921 dofree = false; 1922 } else { 1923 m->m_ext_ref = m; 1924 } 1925 } else { 1926 /* 1927 * dropping the last reference 1928 */ 1929 if (!embedded) { 1930 m->m_ext.ext_refcnt++; /* XXX */ 1931 m_ext_free(m->m_ext_ref); 1932 m->m_ext_ref = m; 1933 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1934 pool_cache_put_paddr(mcl_cache, 1935 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1936 } else if (m->m_ext.ext_free) { 1937 (*m->m_ext.ext_free)(m, 1938 m->m_ext.ext_buf, m->m_ext.ext_size, 1939 m->m_ext.ext_arg); 1940 /* 1941 * 'm' is already freed by the ext_free callback. 1942 */ 1943 dofree = false; 1944 } else { 1945 free(m->m_ext.ext_buf, 0); 1946 } 1947 } 1948 1949 if (dofree) { 1950 m->m_type = MT_FREE; 1951 m->m_data = NULL; 1952 pool_cache_put(mb_cache, m); 1953 } 1954 } 1955 1956 /* 1957 * Free a single mbuf and associated external storage. Return the 1958 * successor, if any. 1959 */ 1960 struct mbuf * 1961 m_free(struct mbuf *m) 1962 { 1963 struct mbuf *n; 1964 1965 mowner_revoke(m, 1, m->m_flags); 1966 mbstat_type_add(m->m_type, -1); 1967 1968 if (m->m_flags & M_PKTHDR) 1969 m_tag_delete_chain(m, NULL); 1970 1971 n = m->m_next; 1972 1973 if (m->m_flags & M_EXT) { 1974 m_ext_free(m); 1975 } else { 1976 if (__predict_false(m->m_type == MT_FREE)) { 1977 panic("mbuf %p already freed", m); 1978 } 1979 m->m_type = MT_FREE; 1980 m->m_data = NULL; 1981 pool_cache_put(mb_cache, m); 1982 } 1983 1984 return n; 1985 } 1986 1987 void 1988 m_freem(struct mbuf *m) 1989 { 1990 if (m == NULL) 1991 return; 1992 do { 1993 m = m_free(m); 1994 } while (m); 1995 } 1996