1 /* $NetBSD: uipc_mbuf.c,v 1.241 2020/05/05 20:36:48 jdolecek Exp $ */ 2 3 /* 4 * Copyright (c) 1999, 2001, 2018 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and Maxime Villard. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.241 2020/05/05 20:36:48 jdolecek Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "opt_mbuftrace.h" 69 #include "opt_nmbclusters.h" 70 #include "opt_ddb.h" 71 #include "ether.h" 72 #endif 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/atomic.h> 77 #include <sys/cpu.h> 78 #include <sys/proc.h> 79 #include <sys/mbuf.h> 80 #include <sys/kernel.h> 81 #include <sys/syslog.h> 82 #include <sys/domain.h> 83 #include <sys/protosw.h> 84 #include <sys/percpu.h> 85 #include <sys/pool.h> 86 #include <sys/socket.h> 87 #include <sys/sysctl.h> 88 89 #include <net/if.h> 90 91 pool_cache_t mb_cache; /* mbuf cache */ 92 static pool_cache_t mcl_cache; /* mbuf cluster cache */ 93 94 struct mbstat mbstat; 95 int max_linkhdr; 96 int max_protohdr; 97 int max_hdr; 98 int max_datalen; 99 100 static void mb_drain(void *, int); 101 static int mb_ctor(void *, void *, int); 102 103 static void sysctl_kern_mbuf_setup(void); 104 105 static struct sysctllog *mbuf_sysctllog; 106 107 static struct mbuf *m_copy_internal(struct mbuf *, int, int, int, bool); 108 static struct mbuf *m_split_internal(struct mbuf *, int, int, bool); 109 static int m_copyback_internal(struct mbuf **, int, int, const void *, 110 int, int); 111 112 /* Flags for m_copyback_internal. */ 113 #define CB_COPYBACK 0x0001 /* copyback from cp */ 114 #define CB_PRESERVE 0x0002 /* preserve original data */ 115 #define CB_COW 0x0004 /* do copy-on-write */ 116 #define CB_EXTEND 0x0008 /* extend chain */ 117 118 static const char mclpool_warnmsg[] = 119 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 120 121 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 122 123 static percpu_t *mbstat_percpu; 124 125 #ifdef MBUFTRACE 126 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 127 struct mowner unknown_mowners[] = { 128 MOWNER_INIT("unknown", "free"), 129 MOWNER_INIT("unknown", "data"), 130 MOWNER_INIT("unknown", "header"), 131 MOWNER_INIT("unknown", "soname"), 132 MOWNER_INIT("unknown", "soopts"), 133 MOWNER_INIT("unknown", "ftable"), 134 MOWNER_INIT("unknown", "control"), 135 MOWNER_INIT("unknown", "oobdata"), 136 }; 137 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 138 #endif 139 140 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 141 142 #define MCLADDREFERENCE(o, n) \ 143 do { \ 144 KASSERT(((o)->m_flags & M_EXT) != 0); \ 145 KASSERT(((n)->m_flags & M_EXT) == 0); \ 146 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 147 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 148 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 149 (n)->m_ext_ref = (o)->m_ext_ref; \ 150 mowner_ref((n), (n)->m_flags); \ 151 } while (/* CONSTCOND */ 0) 152 153 static int 154 nmbclusters_limit(void) 155 { 156 #if defined(PMAP_MAP_POOLPAGE) 157 /* direct mapping, doesn't use space in kmem_arena */ 158 vsize_t max_size = physmem / 4; 159 #else 160 vsize_t max_size = MIN(physmem / 4, nkmempages / 4); 161 #endif 162 163 max_size = max_size * PAGE_SIZE / MCLBYTES; 164 #ifdef NMBCLUSTERS_MAX 165 max_size = MIN(max_size, NMBCLUSTERS_MAX); 166 #endif 167 168 #ifdef NMBCLUSTERS 169 return MIN(max_size, NMBCLUSTERS); 170 #else 171 return max_size; 172 #endif 173 } 174 175 /* 176 * Initialize the mbuf allocator. 177 */ 178 void 179 mbinit(void) 180 { 181 182 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 183 CTASSERT(sizeof(struct mbuf) == MSIZE); 184 185 sysctl_kern_mbuf_setup(); 186 187 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 188 NULL, IPL_VM, mb_ctor, NULL, NULL); 189 KASSERT(mb_cache != NULL); 190 191 mcl_cache = pool_cache_init(mclbytes, COHERENCY_UNIT, 0, 0, "mclpl", 192 NULL, IPL_VM, NULL, NULL, NULL); 193 KASSERT(mcl_cache != NULL); 194 195 pool_cache_set_drain_hook(mb_cache, mb_drain, NULL); 196 pool_cache_set_drain_hook(mcl_cache, mb_drain, NULL); 197 198 /* 199 * Set an arbitrary default limit on the number of mbuf clusters. 200 */ 201 #ifdef NMBCLUSTERS 202 nmbclusters = nmbclusters_limit(); 203 #else 204 nmbclusters = MAX(1024, 205 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 206 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 207 #endif 208 209 /* 210 * Set the hard limit on the mclpool to the number of 211 * mbuf clusters the kernel is to support. Log the limit 212 * reached message max once a minute. 213 */ 214 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 215 216 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 217 218 /* 219 * Set a low water mark for both mbufs and clusters. This should 220 * help ensure that they can be allocated in a memory starvation 221 * situation. This is important for e.g. diskless systems which 222 * must allocate mbufs in order for the pagedaemon to clean pages. 223 */ 224 pool_cache_setlowat(mb_cache, mblowat); 225 pool_cache_setlowat(mcl_cache, mcllowat); 226 227 #ifdef MBUFTRACE 228 { 229 /* 230 * Attach the unknown mowners. 231 */ 232 int i; 233 MOWNER_ATTACH(&revoked_mowner); 234 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 235 i-- > 0; ) 236 MOWNER_ATTACH(&unknown_mowners[i]); 237 } 238 #endif 239 } 240 241 static void 242 mb_drain(void *arg, int flags) 243 { 244 struct domain *dp; 245 const struct protosw *pr; 246 struct ifnet *ifp; 247 int s; 248 249 KERNEL_LOCK(1, NULL); 250 s = splvm(); 251 DOMAIN_FOREACH(dp) { 252 for (pr = dp->dom_protosw; 253 pr < dp->dom_protoswNPROTOSW; pr++) 254 if (pr->pr_drain) 255 (*pr->pr_drain)(); 256 } 257 /* XXX we cannot use psref in H/W interrupt */ 258 if (!cpu_intr_p()) { 259 int bound = curlwp_bind(); 260 IFNET_READER_FOREACH(ifp) { 261 struct psref psref; 262 263 if_acquire(ifp, &psref); 264 265 if (ifp->if_drain) 266 (*ifp->if_drain)(ifp); 267 268 if_release(ifp, &psref); 269 } 270 curlwp_bindx(bound); 271 } 272 splx(s); 273 mbstat.m_drain++; 274 KERNEL_UNLOCK_ONE(NULL); 275 } 276 277 /* 278 * sysctl helper routine for the kern.mbuf subtree. 279 * nmbclusters, mblowat and mcllowat need range 280 * checking and pool tweaking after being reset. 281 */ 282 static int 283 sysctl_kern_mbuf(SYSCTLFN_ARGS) 284 { 285 int error, newval; 286 struct sysctlnode node; 287 288 node = *rnode; 289 node.sysctl_data = &newval; 290 switch (rnode->sysctl_num) { 291 case MBUF_NMBCLUSTERS: 292 case MBUF_MBLOWAT: 293 case MBUF_MCLLOWAT: 294 newval = *(int*)rnode->sysctl_data; 295 break; 296 default: 297 return EOPNOTSUPP; 298 } 299 300 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 301 if (error || newp == NULL) 302 return error; 303 if (newval < 0) 304 return EINVAL; 305 306 switch (node.sysctl_num) { 307 case MBUF_NMBCLUSTERS: 308 if (newval < nmbclusters) 309 return EINVAL; 310 if (newval > nmbclusters_limit()) 311 return EINVAL; 312 nmbclusters = newval; 313 pool_cache_sethardlimit(mcl_cache, nmbclusters, 314 mclpool_warnmsg, 60); 315 break; 316 case MBUF_MBLOWAT: 317 mblowat = newval; 318 pool_cache_setlowat(mb_cache, mblowat); 319 break; 320 case MBUF_MCLLOWAT: 321 mcllowat = newval; 322 pool_cache_setlowat(mcl_cache, mcllowat); 323 break; 324 } 325 326 return 0; 327 } 328 329 #ifdef MBUFTRACE 330 static void 331 mowner_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 332 { 333 struct mowner_counter *mc = v1; 334 struct mowner_user *mo_user = v2; 335 int i; 336 337 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 338 mo_user->mo_counter[i] += mc->mc_counter[i]; 339 } 340 } 341 342 static void 343 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 344 { 345 346 memset(mo_user, 0, sizeof(*mo_user)); 347 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 348 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 349 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 350 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 351 percpu_foreach(mo->mo_counters, mowner_convert_to_user_cb, mo_user); 352 } 353 354 static int 355 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 356 { 357 struct mowner *mo; 358 size_t len = 0; 359 int error = 0; 360 361 if (namelen != 0) 362 return EINVAL; 363 if (newp != NULL) 364 return EPERM; 365 366 LIST_FOREACH(mo, &mowners, mo_link) { 367 struct mowner_user mo_user; 368 369 mowner_convert_to_user(mo, &mo_user); 370 371 if (oldp != NULL) { 372 if (*oldlenp - len < sizeof(mo_user)) { 373 error = ENOMEM; 374 break; 375 } 376 error = copyout(&mo_user, (char *)oldp + len, 377 sizeof(mo_user)); 378 if (error) 379 break; 380 } 381 len += sizeof(mo_user); 382 } 383 384 if (error == 0) 385 *oldlenp = len; 386 387 return error; 388 } 389 #endif /* MBUFTRACE */ 390 391 void 392 mbstat_type_add(int type, int diff) 393 { 394 struct mbstat_cpu *mb; 395 int s; 396 397 s = splvm(); 398 mb = percpu_getref(mbstat_percpu); 399 mb->m_mtypes[type] += diff; 400 percpu_putref(mbstat_percpu); 401 splx(s); 402 } 403 404 static void 405 mbstat_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 406 { 407 struct mbstat_cpu *mbsc = v1; 408 struct mbstat *mbs = v2; 409 int i; 410 411 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 412 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 413 } 414 } 415 416 static void 417 mbstat_convert_to_user(struct mbstat *mbs) 418 { 419 420 memset(mbs, 0, sizeof(*mbs)); 421 mbs->m_drain = mbstat.m_drain; 422 percpu_foreach(mbstat_percpu, mbstat_convert_to_user_cb, mbs); 423 } 424 425 static int 426 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 427 { 428 struct sysctlnode node; 429 struct mbstat mbs; 430 431 mbstat_convert_to_user(&mbs); 432 node = *rnode; 433 node.sysctl_data = &mbs; 434 node.sysctl_size = sizeof(mbs); 435 return sysctl_lookup(SYSCTLFN_CALL(&node)); 436 } 437 438 static void 439 sysctl_kern_mbuf_setup(void) 440 { 441 442 KASSERT(mbuf_sysctllog == NULL); 443 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 444 CTLFLAG_PERMANENT, 445 CTLTYPE_NODE, "mbuf", 446 SYSCTL_DESCR("mbuf control variables"), 447 NULL, 0, NULL, 0, 448 CTL_KERN, KERN_MBUF, CTL_EOL); 449 450 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 451 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 452 CTLTYPE_INT, "msize", 453 SYSCTL_DESCR("mbuf base size"), 454 NULL, msize, NULL, 0, 455 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 456 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 457 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 458 CTLTYPE_INT, "mclbytes", 459 SYSCTL_DESCR("mbuf cluster size"), 460 NULL, mclbytes, NULL, 0, 461 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 462 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 463 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 464 CTLTYPE_INT, "nmbclusters", 465 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 466 sysctl_kern_mbuf, 0, &nmbclusters, 0, 467 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 468 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 469 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 470 CTLTYPE_INT, "mblowat", 471 SYSCTL_DESCR("mbuf low water mark"), 472 sysctl_kern_mbuf, 0, &mblowat, 0, 473 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 474 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 475 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 476 CTLTYPE_INT, "mcllowat", 477 SYSCTL_DESCR("mbuf cluster low water mark"), 478 sysctl_kern_mbuf, 0, &mcllowat, 0, 479 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 480 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 481 CTLFLAG_PERMANENT, 482 CTLTYPE_STRUCT, "stats", 483 SYSCTL_DESCR("mbuf allocation statistics"), 484 sysctl_kern_mbuf_stats, 0, NULL, 0, 485 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 486 #ifdef MBUFTRACE 487 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 488 CTLFLAG_PERMANENT, 489 CTLTYPE_STRUCT, "mowners", 490 SYSCTL_DESCR("Information about mbuf owners"), 491 sysctl_kern_mbuf_mowners, 0, NULL, 0, 492 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 493 #endif 494 } 495 496 static int 497 mb_ctor(void *arg, void *object, int flags) 498 { 499 struct mbuf *m = object; 500 501 #ifdef POOL_VTOPHYS 502 m->m_paddr = POOL_VTOPHYS(m); 503 #else 504 m->m_paddr = M_PADDR_INVALID; 505 #endif 506 return 0; 507 } 508 509 /* 510 * Add mbuf to the end of a chain 511 */ 512 struct mbuf * 513 m_add(struct mbuf *c, struct mbuf *m) 514 { 515 struct mbuf *n; 516 517 if (c == NULL) 518 return m; 519 520 for (n = c; n->m_next != NULL; n = n->m_next) 521 continue; 522 n->m_next = m; 523 return c; 524 } 525 526 struct mbuf * 527 m_get(int how, int type) 528 { 529 struct mbuf *m; 530 531 KASSERT(type != MT_FREE); 532 533 m = pool_cache_get(mb_cache, 534 how == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : PR_NOWAIT); 535 if (m == NULL) 536 return NULL; 537 KASSERT(((vaddr_t)m->m_dat & PAGE_MASK) + MLEN <= PAGE_SIZE); 538 539 mbstat_type_add(type, 1); 540 541 mowner_init(m, type); 542 m->m_ext_ref = m; /* default */ 543 m->m_type = type; 544 m->m_len = 0; 545 m->m_next = NULL; 546 m->m_nextpkt = NULL; /* default */ 547 m->m_data = m->m_dat; 548 m->m_flags = 0; /* default */ 549 550 return m; 551 } 552 553 struct mbuf * 554 m_gethdr(int how, int type) 555 { 556 struct mbuf *m; 557 558 m = m_get(how, type); 559 if (m == NULL) 560 return NULL; 561 562 m->m_data = m->m_pktdat; 563 m->m_flags = M_PKTHDR; 564 565 m_reset_rcvif(m); 566 m->m_pkthdr.len = 0; 567 m->m_pkthdr.csum_flags = 0; 568 m->m_pkthdr.csum_data = 0; 569 m->m_pkthdr.segsz = 0; 570 m->m_pkthdr.ether_vtag = 0; 571 m->m_pkthdr.pkthdr_flags = 0; 572 SLIST_INIT(&m->m_pkthdr.tags); 573 574 m->m_pkthdr.pattr_class = NULL; 575 m->m_pkthdr.pattr_af = AF_UNSPEC; 576 m->m_pkthdr.pattr_hdr = NULL; 577 578 return m; 579 } 580 581 void 582 m_clget(struct mbuf *m, int how) 583 { 584 m->m_ext_storage.ext_buf = (char *)pool_cache_get_paddr(mcl_cache, 585 how == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : PR_NOWAIT, 586 &m->m_ext_storage.ext_paddr); 587 588 if (m->m_ext_storage.ext_buf == NULL) 589 return; 590 591 KASSERT(((vaddr_t)m->m_ext_storage.ext_buf & PAGE_MASK) + mclbytes 592 <= PAGE_SIZE); 593 594 MCLINITREFERENCE(m); 595 m->m_data = m->m_ext.ext_buf; 596 m->m_flags = (m->m_flags & ~M_EXTCOPYFLAGS) | 597 M_EXT|M_EXT_CLUSTER|M_EXT_RW; 598 m->m_ext.ext_size = MCLBYTES; 599 m->m_ext.ext_free = NULL; 600 m->m_ext.ext_arg = NULL; 601 /* ext_paddr initialized above */ 602 603 mowner_ref(m, M_EXT|M_EXT_CLUSTER); 604 } 605 606 struct mbuf * 607 m_getcl(int how, int type, int flags) 608 { 609 struct mbuf *mp; 610 611 if ((flags & M_PKTHDR) != 0) 612 mp = m_gethdr(how, type); 613 else 614 mp = m_get(how, type); 615 616 if (mp == NULL) 617 return NULL; 618 619 MCLGET(mp, how); 620 if ((mp->m_flags & M_EXT) != 0) 621 return mp; 622 623 m_free(mp); 624 return NULL; 625 } 626 627 /* 628 * Utility function for M_PREPEND. Do *NOT* use it directly. 629 */ 630 struct mbuf * 631 m_prepend(struct mbuf *m, int len, int how) 632 { 633 struct mbuf *mn; 634 635 if (__predict_false(len > MHLEN)) { 636 panic("%s: len > MHLEN", __func__); 637 } 638 639 KASSERT(len != M_COPYALL); 640 mn = m_get(how, m->m_type); 641 if (mn == NULL) { 642 m_freem(m); 643 return NULL; 644 } 645 646 if (m->m_flags & M_PKTHDR) { 647 m_move_pkthdr(mn, m); 648 } else { 649 MCLAIM(mn, m->m_owner); 650 } 651 mn->m_next = m; 652 m = mn; 653 654 if (m->m_flags & M_PKTHDR) { 655 if (len < MHLEN) 656 m_align(m, len); 657 } else { 658 if (len < MLEN) 659 m_align(m, len); 660 } 661 662 m->m_len = len; 663 return m; 664 } 665 666 struct mbuf * 667 m_copym(struct mbuf *m, int off, int len, int wait) 668 { 669 /* Shallow copy on M_EXT. */ 670 return m_copy_internal(m, off, len, wait, false); 671 } 672 673 struct mbuf * 674 m_dup(struct mbuf *m, int off, int len, int wait) 675 { 676 /* Deep copy. */ 677 return m_copy_internal(m, off, len, wait, true); 678 } 679 680 static inline int 681 m_copylen(int len, int copylen) 682 { 683 return (len == M_COPYALL) ? copylen : uimin(len, copylen); 684 } 685 686 static struct mbuf * 687 m_copy_internal(struct mbuf *m, int off0, int len, int wait, bool deep) 688 { 689 struct mbuf *n, **np; 690 int off = off0; 691 struct mbuf *top; 692 int copyhdr = 0; 693 694 if (off < 0 || (len != M_COPYALL && len < 0)) 695 panic("%s: off %d, len %d", __func__, off, len); 696 if (off == 0 && m->m_flags & M_PKTHDR) 697 copyhdr = 1; 698 while (off > 0) { 699 if (m == NULL) 700 panic("%s: m == NULL, off %d", __func__, off); 701 if (off < m->m_len) 702 break; 703 off -= m->m_len; 704 m = m->m_next; 705 } 706 707 np = ⊤ 708 top = NULL; 709 while (len == M_COPYALL || len > 0) { 710 if (m == NULL) { 711 if (len != M_COPYALL) 712 panic("%s: m == NULL, len %d [!COPYALL]", 713 __func__, len); 714 break; 715 } 716 717 n = m_get(wait, m->m_type); 718 *np = n; 719 if (n == NULL) 720 goto nospace; 721 MCLAIM(n, m->m_owner); 722 723 if (copyhdr) { 724 m_copy_pkthdr(n, m); 725 if (len == M_COPYALL) 726 n->m_pkthdr.len -= off0; 727 else 728 n->m_pkthdr.len = len; 729 copyhdr = 0; 730 } 731 n->m_len = m_copylen(len, m->m_len - off); 732 733 if (m->m_flags & M_EXT) { 734 if (!deep) { 735 n->m_data = m->m_data + off; 736 MCLADDREFERENCE(m, n); 737 } else { 738 /* 739 * We don't care if MCLGET fails. n->m_len is 740 * recomputed and handles that. 741 */ 742 MCLGET(n, wait); 743 n->m_len = 0; 744 n->m_len = M_TRAILINGSPACE(n); 745 n->m_len = m_copylen(len, n->m_len); 746 n->m_len = uimin(n->m_len, m->m_len - off); 747 memcpy(mtod(n, void *), mtod(m, char *) + off, 748 (unsigned)n->m_len); 749 } 750 } else { 751 memcpy(mtod(n, void *), mtod(m, char *) + off, 752 (unsigned)n->m_len); 753 } 754 755 if (len != M_COPYALL) 756 len -= n->m_len; 757 off += n->m_len; 758 759 KASSERT(off <= m->m_len); 760 761 if (off == m->m_len) { 762 m = m->m_next; 763 off = 0; 764 } 765 np = &n->m_next; 766 } 767 768 return top; 769 770 nospace: 771 m_freem(top); 772 return NULL; 773 } 774 775 /* 776 * Copy an entire packet, including header (which must be present). 777 * An optimization of the common case 'm_copym(m, 0, M_COPYALL, how)'. 778 */ 779 struct mbuf * 780 m_copypacket(struct mbuf *m, int how) 781 { 782 struct mbuf *top, *n, *o; 783 784 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { 785 panic("%s: no header (m = %p)", __func__, m); 786 } 787 788 n = m_get(how, m->m_type); 789 top = n; 790 if (!n) 791 goto nospace; 792 793 MCLAIM(n, m->m_owner); 794 m_copy_pkthdr(n, m); 795 n->m_len = m->m_len; 796 if (m->m_flags & M_EXT) { 797 n->m_data = m->m_data; 798 MCLADDREFERENCE(m, n); 799 } else { 800 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 801 } 802 803 m = m->m_next; 804 while (m) { 805 o = m_get(how, m->m_type); 806 if (!o) 807 goto nospace; 808 809 MCLAIM(o, m->m_owner); 810 n->m_next = o; 811 n = n->m_next; 812 813 n->m_len = m->m_len; 814 if (m->m_flags & M_EXT) { 815 n->m_data = m->m_data; 816 MCLADDREFERENCE(m, n); 817 } else { 818 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 819 } 820 821 m = m->m_next; 822 } 823 return top; 824 825 nospace: 826 m_freem(top); 827 return NULL; 828 } 829 830 void 831 m_copydata(struct mbuf *m, int off, int len, void *cp) 832 { 833 unsigned int count; 834 struct mbuf *m0 = m; 835 int len0 = len; 836 int off0 = off; 837 void *cp0 = cp; 838 839 KASSERT(len != M_COPYALL); 840 if (off < 0 || len < 0) 841 panic("m_copydata: off %d, len %d", off, len); 842 while (off > 0) { 843 if (m == NULL) 844 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)", 845 m0, len0, off0, cp0, off, off0 - off); 846 if (off < m->m_len) 847 break; 848 off -= m->m_len; 849 m = m->m_next; 850 } 851 while (len > 0) { 852 if (m == NULL) 853 panic("m_copydata(%p,%d,%d,%p): " 854 "m=NULL, off=%d (%d), len=%d (%d)", 855 m0, len0, off0, cp0, 856 off, off0 - off, len, len0 - len); 857 count = uimin(m->m_len - off, len); 858 memcpy(cp, mtod(m, char *) + off, count); 859 len -= count; 860 cp = (char *)cp + count; 861 off = 0; 862 m = m->m_next; 863 } 864 } 865 866 /* 867 * Concatenate mbuf chain n to m. 868 * n might be copied into m (when n->m_len is small), therefore data portion of 869 * n could be copied into an mbuf of different mbuf type. 870 * Any m_pkthdr is not updated. 871 */ 872 void 873 m_cat(struct mbuf *m, struct mbuf *n) 874 { 875 876 while (m->m_next) 877 m = m->m_next; 878 while (n) { 879 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 880 /* just join the two chains */ 881 m->m_next = n; 882 return; 883 } 884 /* splat the data from one into the other */ 885 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 886 (u_int)n->m_len); 887 m->m_len += n->m_len; 888 n = m_free(n); 889 } 890 } 891 892 void 893 m_adj(struct mbuf *mp, int req_len) 894 { 895 int len = req_len; 896 struct mbuf *m; 897 int count; 898 899 if ((m = mp) == NULL) 900 return; 901 if (len >= 0) { 902 /* 903 * Trim from head. 904 */ 905 while (m != NULL && len > 0) { 906 if (m->m_len <= len) { 907 len -= m->m_len; 908 m->m_len = 0; 909 m = m->m_next; 910 } else { 911 m->m_len -= len; 912 m->m_data += len; 913 len = 0; 914 } 915 } 916 if (mp->m_flags & M_PKTHDR) 917 mp->m_pkthdr.len -= (req_len - len); 918 } else { 919 /* 920 * Trim from tail. Scan the mbuf chain, 921 * calculating its length and finding the last mbuf. 922 * If the adjustment only affects this mbuf, then just 923 * adjust and return. Otherwise, rescan and truncate 924 * after the remaining size. 925 */ 926 len = -len; 927 count = 0; 928 for (;;) { 929 count += m->m_len; 930 if (m->m_next == NULL) 931 break; 932 m = m->m_next; 933 } 934 if (m->m_len >= len) { 935 m->m_len -= len; 936 if (mp->m_flags & M_PKTHDR) 937 mp->m_pkthdr.len -= len; 938 return; 939 } 940 941 count -= len; 942 if (count < 0) 943 count = 0; 944 945 /* 946 * Correct length for chain is "count". 947 * Find the mbuf with last data, adjust its length, 948 * and toss data from remaining mbufs on chain. 949 */ 950 m = mp; 951 if (m->m_flags & M_PKTHDR) 952 m->m_pkthdr.len = count; 953 for (; m; m = m->m_next) { 954 if (m->m_len >= count) { 955 m->m_len = count; 956 break; 957 } 958 count -= m->m_len; 959 } 960 if (m) { 961 while (m->m_next) 962 (m = m->m_next)->m_len = 0; 963 } 964 } 965 } 966 967 /* 968 * m_ensure_contig: rearrange an mbuf chain that given length of bytes 969 * would be contiguous and in the data area of an mbuf (therefore, mtod() 970 * would work for a structure of given length). 971 * 972 * => On success, returns true and the resulting mbuf chain; false otherwise. 973 * => The mbuf chain may change, but is always preserved valid. 974 */ 975 bool 976 m_ensure_contig(struct mbuf **m0, int len) 977 { 978 struct mbuf *n = *m0, *m; 979 size_t count, space; 980 981 KASSERT(len != M_COPYALL); 982 /* 983 * If first mbuf has no cluster, and has room for len bytes 984 * without shifting current data, pullup into it, 985 * otherwise allocate a new mbuf to prepend to the chain. 986 */ 987 if ((n->m_flags & M_EXT) == 0 && 988 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 989 if (n->m_len >= len) { 990 return true; 991 } 992 m = n; 993 n = n->m_next; 994 len -= m->m_len; 995 } else { 996 if (len > MHLEN) { 997 return false; 998 } 999 m = m_get(M_DONTWAIT, n->m_type); 1000 if (m == NULL) { 1001 return false; 1002 } 1003 MCLAIM(m, n->m_owner); 1004 if (n->m_flags & M_PKTHDR) { 1005 m_move_pkthdr(m, n); 1006 } 1007 } 1008 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1009 do { 1010 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len); 1011 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1012 (unsigned)count); 1013 len -= count; 1014 m->m_len += count; 1015 n->m_len -= count; 1016 space -= count; 1017 if (n->m_len) 1018 n->m_data += count; 1019 else 1020 n = m_free(n); 1021 } while (len > 0 && n); 1022 1023 m->m_next = n; 1024 *m0 = m; 1025 1026 return len <= 0; 1027 } 1028 1029 /* 1030 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error. 1031 */ 1032 struct mbuf * 1033 m_pullup(struct mbuf *n, int len) 1034 { 1035 struct mbuf *m = n; 1036 1037 KASSERT(len != M_COPYALL); 1038 if (!m_ensure_contig(&m, len)) { 1039 KASSERT(m != NULL); 1040 m_freem(m); 1041 m = NULL; 1042 } 1043 return m; 1044 } 1045 1046 /* 1047 * ensure that [off, off + len) is contiguous on the mbuf chain "m". 1048 * packet chain before "off" is kept untouched. 1049 * if offp == NULL, the target will start at <retval, 0> on resulting chain. 1050 * if offp != NULL, the target will start at <retval, *offp> on resulting chain. 1051 * 1052 * on error return (NULL return value), original "m" will be freed. 1053 * 1054 * XXX M_TRAILINGSPACE/M_LEADINGSPACE on shared cluster (sharedcluster) 1055 */ 1056 struct mbuf * 1057 m_pulldown(struct mbuf *m, int off, int len, int *offp) 1058 { 1059 struct mbuf *n, *o; 1060 int hlen, tlen, olen; 1061 int sharedcluster; 1062 1063 /* Check invalid arguments. */ 1064 if (m == NULL) 1065 panic("%s: m == NULL", __func__); 1066 if (len > MCLBYTES) { 1067 m_freem(m); 1068 return NULL; 1069 } 1070 1071 n = m; 1072 while (n != NULL && off > 0) { 1073 if (n->m_len > off) 1074 break; 1075 off -= n->m_len; 1076 n = n->m_next; 1077 } 1078 /* Be sure to point non-empty mbuf. */ 1079 while (n != NULL && n->m_len == 0) 1080 n = n->m_next; 1081 if (!n) { 1082 m_freem(m); 1083 return NULL; /* mbuf chain too short */ 1084 } 1085 1086 sharedcluster = M_READONLY(n); 1087 1088 /* 1089 * The target data is on <n, off>. If we got enough data on the mbuf 1090 * "n", we're done. 1091 */ 1092 #ifdef __NO_STRICT_ALIGNMENT 1093 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster) 1094 #else 1095 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster && 1096 ALIGNED_POINTER((mtod(n, char *) + off), uint32_t)) 1097 #endif 1098 goto ok; 1099 1100 /* 1101 * When (len <= n->m_len - off) and (off != 0), it is a special case. 1102 * Len bytes from <n, off> sit in single mbuf, but the caller does 1103 * not like the starting position (off). 1104 * 1105 * Chop the current mbuf into two pieces, set off to 0. 1106 */ 1107 if (len <= n->m_len - off) { 1108 struct mbuf *mlast; 1109 1110 o = m_dup(n, off, n->m_len - off, M_DONTWAIT); 1111 if (o == NULL) { 1112 m_freem(m); 1113 return NULL; /* ENOBUFS */ 1114 } 1115 KASSERT(o->m_len >= len); 1116 for (mlast = o; mlast->m_next != NULL; mlast = mlast->m_next) 1117 ; 1118 n->m_len = off; 1119 mlast->m_next = n->m_next; 1120 n->m_next = o; 1121 n = o; 1122 off = 0; 1123 goto ok; 1124 } 1125 1126 /* 1127 * We need to take hlen from <n, off> and tlen from <n->m_next, 0>, 1128 * and construct contiguous mbuf with m_len == len. 1129 * 1130 * Note that hlen + tlen == len, and tlen > 0. 1131 */ 1132 hlen = n->m_len - off; 1133 tlen = len - hlen; 1134 1135 /* 1136 * Ensure that we have enough trailing data on mbuf chain. If not, 1137 * we can do nothing about the chain. 1138 */ 1139 olen = 0; 1140 for (o = n->m_next; o != NULL; o = o->m_next) 1141 olen += o->m_len; 1142 if (hlen + olen < len) { 1143 m_freem(m); 1144 return NULL; /* mbuf chain too short */ 1145 } 1146 1147 /* 1148 * Easy cases first. We need to use m_copydata() to get data from 1149 * <n->m_next, 0>. 1150 */ 1151 if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen && 1152 !sharedcluster) { 1153 m_copydata(n->m_next, 0, tlen, mtod(n, char *) + n->m_len); 1154 n->m_len += tlen; 1155 m_adj(n->m_next, tlen); 1156 goto ok; 1157 } 1158 if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen && 1159 #ifndef __NO_STRICT_ALIGNMENT 1160 ALIGNED_POINTER((n->m_next->m_data - hlen), uint32_t) && 1161 #endif 1162 !sharedcluster && n->m_next->m_len >= tlen) { 1163 n->m_next->m_data -= hlen; 1164 n->m_next->m_len += hlen; 1165 memcpy(mtod(n->m_next, void *), mtod(n, char *) + off, hlen); 1166 n->m_len -= hlen; 1167 n = n->m_next; 1168 off = 0; 1169 goto ok; 1170 } 1171 1172 /* 1173 * Now, we need to do the hard way. Don't copy as there's no room 1174 * on both ends. 1175 */ 1176 o = m_get(M_DONTWAIT, m->m_type); 1177 if (o && len > MLEN) { 1178 MCLGET(o, M_DONTWAIT); 1179 if ((o->m_flags & M_EXT) == 0) { 1180 m_free(o); 1181 o = NULL; 1182 } 1183 } 1184 if (!o) { 1185 m_freem(m); 1186 return NULL; /* ENOBUFS */ 1187 } 1188 /* get hlen from <n, off> into <o, 0> */ 1189 o->m_len = hlen; 1190 memcpy(mtod(o, void *), mtod(n, char *) + off, hlen); 1191 n->m_len -= hlen; 1192 /* get tlen from <n->m_next, 0> into <o, hlen> */ 1193 m_copydata(n->m_next, 0, tlen, mtod(o, char *) + o->m_len); 1194 o->m_len += tlen; 1195 m_adj(n->m_next, tlen); 1196 o->m_next = n->m_next; 1197 n->m_next = o; 1198 n = o; 1199 off = 0; 1200 1201 ok: 1202 if (offp) 1203 *offp = off; 1204 return n; 1205 } 1206 1207 /* 1208 * Like m_pullup(), except a new mbuf is always allocated, and we allow 1209 * the amount of empty space before the data in the new mbuf to be specified 1210 * (in the event that the caller expects to prepend later). 1211 */ 1212 struct mbuf * 1213 m_copyup(struct mbuf *n, int len, int dstoff) 1214 { 1215 struct mbuf *m; 1216 int count, space; 1217 1218 KASSERT(len != M_COPYALL); 1219 if (len > ((int)MHLEN - dstoff)) 1220 goto bad; 1221 m = m_get(M_DONTWAIT, n->m_type); 1222 if (m == NULL) 1223 goto bad; 1224 MCLAIM(m, n->m_owner); 1225 if (n->m_flags & M_PKTHDR) { 1226 m_move_pkthdr(m, n); 1227 } 1228 m->m_data += dstoff; 1229 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1230 do { 1231 count = uimin(uimin(uimax(len, max_protohdr), space), n->m_len); 1232 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1233 (unsigned)count); 1234 len -= count; 1235 m->m_len += count; 1236 n->m_len -= count; 1237 space -= count; 1238 if (n->m_len) 1239 n->m_data += count; 1240 else 1241 n = m_free(n); 1242 } while (len > 0 && n); 1243 if (len > 0) { 1244 (void) m_free(m); 1245 goto bad; 1246 } 1247 m->m_next = n; 1248 return m; 1249 bad: 1250 m_freem(n); 1251 return NULL; 1252 } 1253 1254 struct mbuf * 1255 m_split(struct mbuf *m0, int len, int wait) 1256 { 1257 return m_split_internal(m0, len, wait, true); 1258 } 1259 1260 static struct mbuf * 1261 m_split_internal(struct mbuf *m0, int len0, int wait, bool copyhdr) 1262 { 1263 struct mbuf *m, *n; 1264 unsigned len = len0, remain, len_save; 1265 1266 KASSERT(len0 != M_COPYALL); 1267 for (m = m0; m && len > m->m_len; m = m->m_next) 1268 len -= m->m_len; 1269 if (m == NULL) 1270 return NULL; 1271 1272 remain = m->m_len - len; 1273 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1274 n = m_gethdr(wait, m0->m_type); 1275 if (n == NULL) 1276 return NULL; 1277 1278 MCLAIM(n, m0->m_owner); 1279 m_copy_rcvif(n, m0); 1280 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1281 len_save = m0->m_pkthdr.len; 1282 m0->m_pkthdr.len = len0; 1283 1284 if (m->m_flags & M_EXT) 1285 goto extpacket; 1286 1287 if (remain > MHLEN) { 1288 /* m can't be the lead packet */ 1289 m_align(n, 0); 1290 n->m_len = 0; 1291 n->m_next = m_split(m, len, wait); 1292 if (n->m_next == NULL) { 1293 (void)m_free(n); 1294 m0->m_pkthdr.len = len_save; 1295 return NULL; 1296 } 1297 return n; 1298 } else { 1299 m_align(n, remain); 1300 } 1301 } else if (remain == 0) { 1302 n = m->m_next; 1303 m->m_next = NULL; 1304 return n; 1305 } else { 1306 n = m_get(wait, m->m_type); 1307 if (n == NULL) 1308 return NULL; 1309 MCLAIM(n, m->m_owner); 1310 m_align(n, remain); 1311 } 1312 1313 extpacket: 1314 if (m->m_flags & M_EXT) { 1315 n->m_data = m->m_data + len; 1316 MCLADDREFERENCE(m, n); 1317 } else { 1318 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1319 } 1320 1321 n->m_len = remain; 1322 m->m_len = len; 1323 n->m_next = m->m_next; 1324 m->m_next = NULL; 1325 return n; 1326 } 1327 1328 /* 1329 * Routine to copy from device local memory into mbufs. 1330 */ 1331 struct mbuf * 1332 m_devget(char *buf, int totlen, int off, struct ifnet *ifp) 1333 { 1334 struct mbuf *m; 1335 struct mbuf *top = NULL, **mp = ⊤ 1336 char *cp, *epkt; 1337 int len; 1338 1339 cp = buf; 1340 epkt = cp + totlen; 1341 if (off) { 1342 /* 1343 * If 'off' is non-zero, packet is trailer-encapsulated, 1344 * so we have to skip the type and length fields. 1345 */ 1346 cp += off + 2 * sizeof(uint16_t); 1347 totlen -= 2 * sizeof(uint16_t); 1348 } 1349 1350 m = m_gethdr(M_DONTWAIT, MT_DATA); 1351 if (m == NULL) 1352 return NULL; 1353 m_set_rcvif(m, ifp); 1354 m->m_pkthdr.len = totlen; 1355 m->m_len = MHLEN; 1356 1357 while (totlen > 0) { 1358 if (top) { 1359 m = m_get(M_DONTWAIT, MT_DATA); 1360 if (m == NULL) { 1361 m_freem(top); 1362 return NULL; 1363 } 1364 m->m_len = MLEN; 1365 } 1366 1367 len = uimin(totlen, epkt - cp); 1368 1369 if (len >= MINCLSIZE) { 1370 MCLGET(m, M_DONTWAIT); 1371 if ((m->m_flags & M_EXT) == 0) { 1372 m_free(m); 1373 m_freem(top); 1374 return NULL; 1375 } 1376 m->m_len = len = uimin(len, MCLBYTES); 1377 } else { 1378 /* 1379 * Place initial small packet/header at end of mbuf. 1380 */ 1381 if (len < m->m_len) { 1382 if (top == 0 && len + max_linkhdr <= m->m_len) 1383 m->m_data += max_linkhdr; 1384 m->m_len = len; 1385 } else 1386 len = m->m_len; 1387 } 1388 1389 memcpy(mtod(m, void *), cp, (size_t)len); 1390 1391 cp += len; 1392 *mp = m; 1393 mp = &m->m_next; 1394 totlen -= len; 1395 if (cp == epkt) 1396 cp = buf; 1397 } 1398 1399 return top; 1400 } 1401 1402 /* 1403 * Copy data from a buffer back into the indicated mbuf chain, 1404 * starting "off" bytes from the beginning, extending the mbuf 1405 * chain if necessary. 1406 */ 1407 void 1408 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1409 { 1410 #if defined(DEBUG) 1411 struct mbuf *origm = m0; 1412 int error; 1413 #endif 1414 1415 if (m0 == NULL) 1416 return; 1417 1418 #if defined(DEBUG) 1419 error = 1420 #endif 1421 m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_EXTEND, 1422 M_DONTWAIT); 1423 1424 #if defined(DEBUG) 1425 if (error != 0 || (m0 != NULL && origm != m0)) 1426 panic("m_copyback"); 1427 #endif 1428 } 1429 1430 struct mbuf * 1431 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1432 { 1433 int error; 1434 1435 /* don't support chain expansion */ 1436 KASSERT(len != M_COPYALL); 1437 KDASSERT(off + len <= m_length(m0)); 1438 1439 error = m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_COW, 1440 how); 1441 if (error) { 1442 /* 1443 * no way to recover from partial success. 1444 * just free the chain. 1445 */ 1446 m_freem(m0); 1447 return NULL; 1448 } 1449 return m0; 1450 } 1451 1452 int 1453 m_makewritable(struct mbuf **mp, int off, int len, int how) 1454 { 1455 int error; 1456 #if defined(DEBUG) 1457 int origlen = m_length(*mp); 1458 #endif 1459 1460 error = m_copyback_internal(mp, off, len, NULL, CB_PRESERVE|CB_COW, 1461 how); 1462 if (error) 1463 return error; 1464 1465 #if defined(DEBUG) 1466 int reslen = 0; 1467 for (struct mbuf *n = *mp; n; n = n->m_next) 1468 reslen += n->m_len; 1469 if (origlen != reslen) 1470 panic("m_makewritable: length changed"); 1471 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1472 panic("m_makewritable: inconsist"); 1473 #endif 1474 1475 return 0; 1476 } 1477 1478 static int 1479 m_copyback_internal(struct mbuf **mp0, int off, int len, const void *vp, 1480 int flags, int how) 1481 { 1482 int mlen; 1483 struct mbuf *m, *n; 1484 struct mbuf **mp; 1485 int totlen = 0; 1486 const char *cp = vp; 1487 1488 KASSERT(mp0 != NULL); 1489 KASSERT(*mp0 != NULL); 1490 KASSERT((flags & CB_PRESERVE) == 0 || cp == NULL); 1491 KASSERT((flags & CB_COPYBACK) == 0 || cp != NULL); 1492 1493 if (len == M_COPYALL) 1494 len = m_length(*mp0) - off; 1495 1496 /* 1497 * we don't bother to update "totlen" in the case of CB_COW, 1498 * assuming that CB_EXTEND and CB_COW are exclusive. 1499 */ 1500 1501 KASSERT((~flags & (CB_EXTEND|CB_COW)) != 0); 1502 1503 mp = mp0; 1504 m = *mp; 1505 while (off > (mlen = m->m_len)) { 1506 off -= mlen; 1507 totlen += mlen; 1508 if (m->m_next == NULL) { 1509 int tspace; 1510 extend: 1511 if ((flags & CB_EXTEND) == 0) 1512 goto out; 1513 1514 /* 1515 * try to make some space at the end of "m". 1516 */ 1517 1518 mlen = m->m_len; 1519 if (off + len >= MINCLSIZE && 1520 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1521 MCLGET(m, how); 1522 } 1523 tspace = M_TRAILINGSPACE(m); 1524 if (tspace > 0) { 1525 tspace = uimin(tspace, off + len); 1526 KASSERT(tspace > 0); 1527 memset(mtod(m, char *) + m->m_len, 0, 1528 uimin(off, tspace)); 1529 m->m_len += tspace; 1530 off += mlen; 1531 totlen -= mlen; 1532 continue; 1533 } 1534 1535 /* 1536 * need to allocate an mbuf. 1537 */ 1538 1539 if (off + len >= MINCLSIZE) { 1540 n = m_getcl(how, m->m_type, 0); 1541 } else { 1542 n = m_get(how, m->m_type); 1543 } 1544 if (n == NULL) { 1545 goto out; 1546 } 1547 n->m_len = uimin(M_TRAILINGSPACE(n), off + len); 1548 memset(mtod(n, char *), 0, uimin(n->m_len, off)); 1549 m->m_next = n; 1550 } 1551 mp = &m->m_next; 1552 m = m->m_next; 1553 } 1554 while (len > 0) { 1555 mlen = m->m_len - off; 1556 if (mlen != 0 && M_READONLY(m)) { 1557 /* 1558 * This mbuf is read-only. Allocate a new writable 1559 * mbuf and try again. 1560 */ 1561 char *datap; 1562 int eatlen; 1563 1564 KASSERT((flags & CB_COW) != 0); 1565 1566 /* 1567 * if we're going to write into the middle of 1568 * a mbuf, split it first. 1569 */ 1570 if (off > 0) { 1571 n = m_split_internal(m, off, how, false); 1572 if (n == NULL) 1573 goto enobufs; 1574 m->m_next = n; 1575 mp = &m->m_next; 1576 m = n; 1577 off = 0; 1578 continue; 1579 } 1580 1581 /* 1582 * XXX TODO coalesce into the trailingspace of 1583 * the previous mbuf when possible. 1584 */ 1585 1586 /* 1587 * allocate a new mbuf. copy packet header if needed. 1588 */ 1589 n = m_get(how, m->m_type); 1590 if (n == NULL) 1591 goto enobufs; 1592 MCLAIM(n, m->m_owner); 1593 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1594 m_move_pkthdr(n, m); 1595 n->m_len = MHLEN; 1596 } else { 1597 if (len >= MINCLSIZE) 1598 MCLGET(n, M_DONTWAIT); 1599 n->m_len = 1600 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1601 } 1602 if (n->m_len > len) 1603 n->m_len = len; 1604 1605 /* 1606 * free the region which has been overwritten. 1607 * copying data from old mbufs if requested. 1608 */ 1609 if (flags & CB_PRESERVE) 1610 datap = mtod(n, char *); 1611 else 1612 datap = NULL; 1613 eatlen = n->m_len; 1614 while (m != NULL && M_READONLY(m) && 1615 n->m_type == m->m_type && eatlen > 0) { 1616 mlen = uimin(eatlen, m->m_len); 1617 if (datap) { 1618 m_copydata(m, 0, mlen, datap); 1619 datap += mlen; 1620 } 1621 m->m_data += mlen; 1622 m->m_len -= mlen; 1623 eatlen -= mlen; 1624 if (m->m_len == 0) 1625 *mp = m = m_free(m); 1626 } 1627 if (eatlen > 0) 1628 n->m_len -= eatlen; 1629 n->m_next = m; 1630 *mp = m = n; 1631 continue; 1632 } 1633 mlen = uimin(mlen, len); 1634 if (flags & CB_COPYBACK) { 1635 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1636 cp += mlen; 1637 } 1638 len -= mlen; 1639 mlen += off; 1640 off = 0; 1641 totlen += mlen; 1642 if (len == 0) 1643 break; 1644 if (m->m_next == NULL) { 1645 goto extend; 1646 } 1647 mp = &m->m_next; 1648 m = m->m_next; 1649 } 1650 1651 out: 1652 if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1653 KASSERT((flags & CB_EXTEND) != 0); 1654 m->m_pkthdr.len = totlen; 1655 } 1656 1657 return 0; 1658 1659 enobufs: 1660 return ENOBUFS; 1661 } 1662 1663 /* 1664 * Compress the mbuf chain. Return the new mbuf chain on success, NULL on 1665 * failure. The first mbuf is preserved, and on success the pointer returned 1666 * is the same as the one passed. 1667 */ 1668 struct mbuf * 1669 m_defrag(struct mbuf *m, int how) 1670 { 1671 struct mbuf *m0, *mn, *n; 1672 int sz; 1673 1674 KASSERT((m->m_flags & M_PKTHDR) != 0); 1675 1676 if (m->m_next == NULL) 1677 return m; 1678 1679 /* Defrag to single mbuf if at all possible */ 1680 if ((m->m_flags & M_EXT) == 0 && m->m_pkthdr.len <= MCLBYTES) { 1681 if (m->m_pkthdr.len <= MHLEN) { 1682 if (M_TRAILINGSPACE(m) < (m->m_pkthdr.len - m->m_len)) { 1683 KASSERTMSG(M_LEADINGSPACE(m) + 1684 M_TRAILINGSPACE(m) >= 1685 (m->m_pkthdr.len - m->m_len), 1686 "too small leading %d trailing %d ro? %d" 1687 " pkthdr.len %d mlen %d", 1688 (int)M_LEADINGSPACE(m), 1689 (int)M_TRAILINGSPACE(m), 1690 M_READONLY(m), 1691 m->m_pkthdr.len, m->m_len); 1692 1693 memmove(m->m_pktdat, m->m_data, m->m_len); 1694 m->m_data = m->m_pktdat; 1695 1696 KASSERT(M_TRAILINGSPACE(m) >= 1697 (m->m_pkthdr.len - m->m_len)); 1698 } 1699 } else { 1700 /* Must copy data before adding cluster */ 1701 m0 = m_get(how, MT_DATA); 1702 if (m0 == NULL) 1703 return NULL; 1704 KASSERT(m->m_len <= MHLEN); 1705 m_copydata(m, 0, m->m_len, mtod(m0, void *)); 1706 1707 MCLGET(m, how); 1708 if ((m->m_flags & M_EXT) == 0) { 1709 m_free(m0); 1710 return NULL; 1711 } 1712 memcpy(m->m_data, mtod(m0, void *), m->m_len); 1713 m_free(m0); 1714 } 1715 KASSERT(M_TRAILINGSPACE(m) >= (m->m_pkthdr.len - m->m_len)); 1716 m_copydata(m->m_next, 0, m->m_pkthdr.len - m->m_len, 1717 mtod(m, char *) + m->m_len); 1718 m->m_len = m->m_pkthdr.len; 1719 m_freem(m->m_next); 1720 m->m_next = NULL; 1721 return m; 1722 } 1723 1724 m0 = m_get(how, MT_DATA); 1725 if (m0 == NULL) 1726 return NULL; 1727 mn = m0; 1728 1729 sz = m->m_pkthdr.len - m->m_len; 1730 KASSERT(sz >= 0); 1731 1732 do { 1733 if (sz > MLEN) { 1734 MCLGET(mn, how); 1735 if ((mn->m_flags & M_EXT) == 0) { 1736 m_freem(m0); 1737 return NULL; 1738 } 1739 } 1740 1741 mn->m_len = MIN(sz, MCLBYTES); 1742 1743 m_copydata(m, m->m_pkthdr.len - sz, mn->m_len, 1744 mtod(mn, void *)); 1745 1746 sz -= mn->m_len; 1747 1748 if (sz > 0) { 1749 /* need more mbufs */ 1750 n = m_get(how, MT_DATA); 1751 if (n == NULL) { 1752 m_freem(m0); 1753 return NULL; 1754 } 1755 1756 mn->m_next = n; 1757 mn = n; 1758 } 1759 } while (sz > 0); 1760 1761 m_freem(m->m_next); 1762 m->m_next = m0; 1763 1764 return m; 1765 } 1766 1767 void 1768 m_remove_pkthdr(struct mbuf *m) 1769 { 1770 KASSERT(m->m_flags & M_PKTHDR); 1771 1772 m_tag_delete_chain(m); 1773 m->m_flags &= ~M_PKTHDR; 1774 memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr)); 1775 } 1776 1777 void 1778 m_copy_pkthdr(struct mbuf *to, struct mbuf *from) 1779 { 1780 KASSERT((to->m_flags & M_EXT) == 0); 1781 KASSERT((to->m_flags & M_PKTHDR) == 0 || 1782 SLIST_FIRST(&to->m_pkthdr.tags) == NULL); 1783 KASSERT((from->m_flags & M_PKTHDR) != 0); 1784 1785 to->m_pkthdr = from->m_pkthdr; 1786 to->m_flags = from->m_flags & M_COPYFLAGS; 1787 to->m_data = to->m_pktdat; 1788 1789 SLIST_INIT(&to->m_pkthdr.tags); 1790 m_tag_copy_chain(to, from); 1791 } 1792 1793 void 1794 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1795 { 1796 KASSERT((to->m_flags & M_EXT) == 0); 1797 KASSERT((to->m_flags & M_PKTHDR) == 0 || 1798 SLIST_FIRST(&to->m_pkthdr.tags) == NULL); 1799 KASSERT((from->m_flags & M_PKTHDR) != 0); 1800 1801 to->m_pkthdr = from->m_pkthdr; 1802 to->m_flags = from->m_flags & M_COPYFLAGS; 1803 to->m_data = to->m_pktdat; 1804 1805 from->m_flags &= ~M_PKTHDR; 1806 } 1807 1808 /* 1809 * Set the m_data pointer of a newly-allocated mbuf to place an object of the 1810 * specified size at the end of the mbuf, longword aligned. 1811 */ 1812 void 1813 m_align(struct mbuf *m, int len) 1814 { 1815 int buflen, adjust; 1816 1817 KASSERT(len != M_COPYALL); 1818 KASSERT(M_LEADINGSPACE(m) == 0); 1819 1820 buflen = M_BUFSIZE(m); 1821 1822 KASSERT(len <= buflen); 1823 adjust = buflen - len; 1824 m->m_data += adjust &~ (sizeof(long)-1); 1825 } 1826 1827 /* 1828 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1829 * beginning, continuing for "len" bytes. 1830 */ 1831 int 1832 m_apply(struct mbuf *m, int off, int len, 1833 int (*f)(void *, void *, unsigned int), void *arg) 1834 { 1835 unsigned int count; 1836 int rval; 1837 1838 KASSERT(len != M_COPYALL); 1839 KASSERT(len >= 0); 1840 KASSERT(off >= 0); 1841 1842 while (off > 0) { 1843 KASSERT(m != NULL); 1844 if (off < m->m_len) 1845 break; 1846 off -= m->m_len; 1847 m = m->m_next; 1848 } 1849 while (len > 0) { 1850 KASSERT(m != NULL); 1851 count = uimin(m->m_len - off, len); 1852 1853 rval = (*f)(arg, mtod(m, char *) + off, count); 1854 if (rval) 1855 return rval; 1856 1857 len -= count; 1858 off = 0; 1859 m = m->m_next; 1860 } 1861 1862 return 0; 1863 } 1864 1865 /* 1866 * Return a pointer to mbuf/offset of location in mbuf chain. 1867 */ 1868 struct mbuf * 1869 m_getptr(struct mbuf *m, int loc, int *off) 1870 { 1871 1872 while (loc >= 0) { 1873 /* Normal end of search */ 1874 if (m->m_len > loc) { 1875 *off = loc; 1876 return m; 1877 } 1878 1879 loc -= m->m_len; 1880 1881 if (m->m_next == NULL) { 1882 if (loc == 0) { 1883 /* Point at the end of valid data */ 1884 *off = m->m_len; 1885 return m; 1886 } 1887 return NULL; 1888 } else { 1889 m = m->m_next; 1890 } 1891 } 1892 1893 return NULL; 1894 } 1895 1896 /* 1897 * Release a reference to the mbuf external storage. 1898 * 1899 * => free the mbuf m itself as well. 1900 */ 1901 static void 1902 m_ext_free(struct mbuf *m) 1903 { 1904 const bool embedded = MEXT_ISEMBEDDED(m); 1905 bool dofree = true; 1906 u_int refcnt; 1907 1908 KASSERT((m->m_flags & M_EXT) != 0); 1909 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1910 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1911 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1912 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1913 1914 if (__predict_false(m->m_type == MT_FREE)) { 1915 panic("mbuf %p already freed", m); 1916 } 1917 1918 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1919 refcnt = m->m_ext.ext_refcnt = 0; 1920 } else { 1921 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1922 } 1923 1924 if (refcnt > 0) { 1925 if (embedded) { 1926 /* 1927 * other mbuf's m_ext_ref still points to us. 1928 */ 1929 dofree = false; 1930 } else { 1931 m->m_ext_ref = m; 1932 } 1933 } else { 1934 /* 1935 * dropping the last reference 1936 */ 1937 if (!embedded) { 1938 m->m_ext.ext_refcnt++; /* XXX */ 1939 m_ext_free(m->m_ext_ref); 1940 m->m_ext_ref = m; 1941 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1942 pool_cache_put_paddr(mcl_cache, 1943 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1944 } else if (m->m_ext.ext_free) { 1945 (*m->m_ext.ext_free)(m, 1946 m->m_ext.ext_buf, m->m_ext.ext_size, 1947 m->m_ext.ext_arg); 1948 /* 1949 * 'm' is already freed by the ext_free callback. 1950 */ 1951 dofree = false; 1952 } else { 1953 free(m->m_ext.ext_buf, 0); 1954 } 1955 } 1956 1957 if (dofree) { 1958 m->m_type = MT_FREE; 1959 m->m_data = NULL; 1960 pool_cache_put(mb_cache, m); 1961 } 1962 } 1963 1964 /* 1965 * Free a single mbuf and associated external storage. Return the 1966 * successor, if any. 1967 */ 1968 struct mbuf * 1969 m_free(struct mbuf *m) 1970 { 1971 struct mbuf *n; 1972 1973 mowner_revoke(m, 1, m->m_flags); 1974 mbstat_type_add(m->m_type, -1); 1975 1976 if (m->m_flags & M_PKTHDR) 1977 m_tag_delete_chain(m); 1978 1979 n = m->m_next; 1980 1981 if (m->m_flags & M_EXT) { 1982 m_ext_free(m); 1983 } else { 1984 if (__predict_false(m->m_type == MT_FREE)) { 1985 panic("mbuf %p already freed", m); 1986 } 1987 m->m_type = MT_FREE; 1988 m->m_data = NULL; 1989 pool_cache_put(mb_cache, m); 1990 } 1991 1992 return n; 1993 } 1994 1995 void 1996 m_freem(struct mbuf *m) 1997 { 1998 if (m == NULL) 1999 return; 2000 do { 2001 m = m_free(m); 2002 } while (m); 2003 } 2004 2005 #if defined(DDB) 2006 void 2007 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 2008 { 2009 char ch; 2010 bool opt_c = false; 2011 bool opt_d = false; 2012 #if NETHER > 0 2013 bool opt_v = false; 2014 const struct mbuf *m0 = NULL; 2015 #endif 2016 int no = 0; 2017 char buf[512]; 2018 2019 while ((ch = *(modif++)) != '\0') { 2020 switch (ch) { 2021 case 'c': 2022 opt_c = true; 2023 break; 2024 case 'd': 2025 opt_d = true; 2026 break; 2027 #if NETHER > 0 2028 case 'v': 2029 opt_v = true; 2030 m0 = m; 2031 break; 2032 #endif 2033 default: 2034 break; 2035 } 2036 } 2037 2038 nextchain: 2039 (*pr)("MBUF(%d) %p\n", no, m); 2040 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 2041 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", 2042 m->m_data, m->m_len, m->m_type, buf); 2043 if (opt_d) { 2044 int i; 2045 unsigned char *p = m->m_data; 2046 2047 (*pr)(" data:"); 2048 2049 for (i = 0; i < m->m_len; i++) { 2050 if (i % 16 == 0) 2051 (*pr)("\n"); 2052 (*pr)(" %02x", p[i]); 2053 } 2054 2055 (*pr)("\n"); 2056 } 2057 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 2058 m->m_nextpkt); 2059 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 2060 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 2061 (int)M_READONLY(m)); 2062 if ((m->m_flags & M_PKTHDR) != 0) { 2063 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 2064 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=%s, csum_data=0x%" 2065 PRIx32 ", segsz=%u\n", 2066 m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m), 2067 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 2068 } 2069 if ((m->m_flags & M_EXT)) { 2070 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 2071 "ext_free=%p, ext_arg=%p\n", 2072 m->m_ext.ext_refcnt, 2073 m->m_ext.ext_buf, m->m_ext.ext_size, 2074 m->m_ext.ext_free, m->m_ext.ext_arg); 2075 } 2076 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 2077 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 2078 vaddr_t eva = sva + m->m_ext.ext_size; 2079 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 2080 int i; 2081 2082 (*pr)(" pages:"); 2083 for (i = 0; i < n; i ++) { 2084 (*pr)(" %p", m->m_ext.ext_pgs[i]); 2085 } 2086 (*pr)("\n"); 2087 } 2088 2089 if (opt_c) { 2090 m = m->m_next; 2091 if (m != NULL) { 2092 no++; 2093 goto nextchain; 2094 } 2095 } 2096 2097 #if NETHER > 0 2098 if (opt_v && m0) 2099 m_examine(m0, AF_ETHER, modif, pr); 2100 #endif 2101 } 2102 #endif /* defined(DDB) */ 2103 2104 #if defined(MBUFTRACE) 2105 void 2106 mowner_init_owner(struct mowner *mo, const char *name, const char *descr) 2107 { 2108 memset(mo, 0, sizeof(*mo)); 2109 strlcpy(mo->mo_name, name, sizeof(mo->mo_name)); 2110 strlcpy(mo->mo_descr, descr, sizeof(mo->mo_descr)); 2111 } 2112 2113 void 2114 mowner_attach(struct mowner *mo) 2115 { 2116 2117 KASSERT(mo->mo_counters == NULL); 2118 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 2119 2120 /* XXX lock */ 2121 LIST_INSERT_HEAD(&mowners, mo, mo_link); 2122 } 2123 2124 void 2125 mowner_detach(struct mowner *mo) 2126 { 2127 2128 KASSERT(mo->mo_counters != NULL); 2129 2130 /* XXX lock */ 2131 LIST_REMOVE(mo, mo_link); 2132 2133 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 2134 mo->mo_counters = NULL; 2135 } 2136 2137 void 2138 mowner_init(struct mbuf *m, int type) 2139 { 2140 struct mowner_counter *mc; 2141 struct mowner *mo; 2142 int s; 2143 2144 m->m_owner = mo = &unknown_mowners[type]; 2145 s = splvm(); 2146 mc = percpu_getref(mo->mo_counters); 2147 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 2148 percpu_putref(mo->mo_counters); 2149 splx(s); 2150 } 2151 2152 void 2153 mowner_ref(struct mbuf *m, int flags) 2154 { 2155 struct mowner *mo = m->m_owner; 2156 struct mowner_counter *mc; 2157 int s; 2158 2159 s = splvm(); 2160 mc = percpu_getref(mo->mo_counters); 2161 if ((flags & M_EXT) != 0) 2162 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 2163 if ((flags & M_EXT_CLUSTER) != 0) 2164 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 2165 percpu_putref(mo->mo_counters); 2166 splx(s); 2167 } 2168 2169 void 2170 mowner_revoke(struct mbuf *m, bool all, int flags) 2171 { 2172 struct mowner *mo = m->m_owner; 2173 struct mowner_counter *mc; 2174 int s; 2175 2176 s = splvm(); 2177 mc = percpu_getref(mo->mo_counters); 2178 if ((flags & M_EXT) != 0) 2179 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 2180 if ((flags & M_EXT_CLUSTER) != 0) 2181 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 2182 if (all) 2183 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 2184 percpu_putref(mo->mo_counters); 2185 splx(s); 2186 if (all) 2187 m->m_owner = &revoked_mowner; 2188 } 2189 2190 static void 2191 mowner_claim(struct mbuf *m, struct mowner *mo) 2192 { 2193 struct mowner_counter *mc; 2194 int flags = m->m_flags; 2195 int s; 2196 2197 s = splvm(); 2198 mc = percpu_getref(mo->mo_counters); 2199 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 2200 if ((flags & M_EXT) != 0) 2201 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 2202 if ((flags & M_EXT_CLUSTER) != 0) 2203 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 2204 percpu_putref(mo->mo_counters); 2205 splx(s); 2206 m->m_owner = mo; 2207 } 2208 2209 void 2210 m_claim(struct mbuf *m, struct mowner *mo) 2211 { 2212 2213 if (m->m_owner == mo || mo == NULL) 2214 return; 2215 2216 mowner_revoke(m, true, m->m_flags); 2217 mowner_claim(m, mo); 2218 } 2219 2220 void 2221 m_claimm(struct mbuf *m, struct mowner *mo) 2222 { 2223 2224 for (; m != NULL; m = m->m_next) 2225 m_claim(m, mo); 2226 } 2227 #endif /* defined(MBUFTRACE) */ 2228 2229 #ifdef DIAGNOSTIC 2230 /* 2231 * Verify that the mbuf chain is not malformed. Used only for diagnostic. 2232 * Panics on error. 2233 */ 2234 void 2235 m_verify_packet(struct mbuf *m) 2236 { 2237 struct mbuf *n = m; 2238 char *low, *high, *dat; 2239 int totlen = 0, len; 2240 2241 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { 2242 panic("%s: mbuf doesn't have M_PKTHDR", __func__); 2243 } 2244 2245 while (n != NULL) { 2246 if (__predict_false(n->m_type == MT_FREE)) { 2247 panic("%s: mbuf already freed (n = %p)", __func__, n); 2248 } 2249 #if 0 2250 /* 2251 * This ought to be a rule of the mbuf API. Unfortunately, 2252 * many places don't respect that rule. 2253 */ 2254 if (__predict_false((n != m) && (n->m_flags & M_PKTHDR) != 0)) { 2255 panic("%s: M_PKTHDR set on secondary mbuf", __func__); 2256 } 2257 #endif 2258 if (__predict_false(n->m_nextpkt != NULL)) { 2259 panic("%s: m_nextpkt not null (m_nextpkt = %p)", 2260 __func__, n->m_nextpkt); 2261 } 2262 2263 dat = n->m_data; 2264 len = n->m_len; 2265 if (__predict_false(len < 0)) { 2266 panic("%s: incorrect length (len = %d)", __func__, len); 2267 } 2268 2269 low = M_BUFADDR(n); 2270 high = low + M_BUFSIZE(n); 2271 if (__predict_false((dat < low) || (dat + len > high))) { 2272 panic("%s: m_data not in packet" 2273 "(dat = %p, len = %d, low = %p, high = %p)", 2274 __func__, dat, len, low, high); 2275 } 2276 2277 totlen += len; 2278 n = n->m_next; 2279 } 2280 2281 if (__predict_false(totlen != m->m_pkthdr.len)) { 2282 panic("%s: inconsistent mbuf length (%d != %d)", __func__, 2283 totlen, m->m_pkthdr.len); 2284 } 2285 } 2286 #endif 2287 2288 struct m_tag * 2289 m_tag_get(int type, int len, int wait) 2290 { 2291 struct m_tag *t; 2292 2293 if (len < 0) 2294 return NULL; 2295 t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait); 2296 if (t == NULL) 2297 return NULL; 2298 t->m_tag_id = type; 2299 t->m_tag_len = len; 2300 return t; 2301 } 2302 2303 void 2304 m_tag_free(struct m_tag *t) 2305 { 2306 free(t, M_PACKET_TAGS); 2307 } 2308 2309 void 2310 m_tag_prepend(struct mbuf *m, struct m_tag *t) 2311 { 2312 KASSERT((m->m_flags & M_PKTHDR) != 0); 2313 SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); 2314 } 2315 2316 void 2317 m_tag_unlink(struct mbuf *m, struct m_tag *t) 2318 { 2319 KASSERT((m->m_flags & M_PKTHDR) != 0); 2320 SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); 2321 } 2322 2323 void 2324 m_tag_delete(struct mbuf *m, struct m_tag *t) 2325 { 2326 m_tag_unlink(m, t); 2327 m_tag_free(t); 2328 } 2329 2330 void 2331 m_tag_delete_chain(struct mbuf *m) 2332 { 2333 struct m_tag *p, *q; 2334 2335 KASSERT((m->m_flags & M_PKTHDR) != 0); 2336 2337 p = SLIST_FIRST(&m->m_pkthdr.tags); 2338 if (p == NULL) 2339 return; 2340 while ((q = SLIST_NEXT(p, m_tag_link)) != NULL) 2341 m_tag_delete(m, q); 2342 m_tag_delete(m, p); 2343 } 2344 2345 struct m_tag * 2346 m_tag_find(const struct mbuf *m, int type) 2347 { 2348 struct m_tag *p; 2349 2350 KASSERT((m->m_flags & M_PKTHDR) != 0); 2351 2352 p = SLIST_FIRST(&m->m_pkthdr.tags); 2353 while (p != NULL) { 2354 if (p->m_tag_id == type) 2355 return p; 2356 p = SLIST_NEXT(p, m_tag_link); 2357 } 2358 return NULL; 2359 } 2360 2361 struct m_tag * 2362 m_tag_copy(struct m_tag *t) 2363 { 2364 struct m_tag *p; 2365 2366 p = m_tag_get(t->m_tag_id, t->m_tag_len, M_NOWAIT); 2367 if (p == NULL) 2368 return NULL; 2369 memcpy(p + 1, t + 1, t->m_tag_len); 2370 return p; 2371 } 2372 2373 /* 2374 * Copy two tag chains. The destination mbuf (to) loses any attached 2375 * tags even if the operation fails. This should not be a problem, as 2376 * m_tag_copy_chain() is typically called with a newly-allocated 2377 * destination mbuf. 2378 */ 2379 int 2380 m_tag_copy_chain(struct mbuf *to, struct mbuf *from) 2381 { 2382 struct m_tag *p, *t, *tprev = NULL; 2383 2384 KASSERT((from->m_flags & M_PKTHDR) != 0); 2385 2386 m_tag_delete_chain(to); 2387 SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) { 2388 t = m_tag_copy(p); 2389 if (t == NULL) { 2390 m_tag_delete_chain(to); 2391 return 0; 2392 } 2393 if (tprev == NULL) 2394 SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link); 2395 else 2396 SLIST_INSERT_AFTER(tprev, t, m_tag_link); 2397 tprev = t; 2398 } 2399 return 1; 2400 } 2401