1 /* $NetBSD: uipc_mbuf.c,v 1.248 2023/02/24 11:02:27 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 1999, 2001, 2018 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and Maxime Villard. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.248 2023/02/24 11:02:27 riastradh Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "opt_mbuftrace.h" 69 #include "opt_nmbclusters.h" 70 #include "opt_ddb.h" 71 #include "ether.h" 72 #endif 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/atomic.h> 77 #include <sys/cpu.h> 78 #include <sys/proc.h> 79 #include <sys/mbuf.h> 80 #include <sys/kernel.h> 81 #include <sys/syslog.h> 82 #include <sys/domain.h> 83 #include <sys/protosw.h> 84 #include <sys/percpu.h> 85 #include <sys/pool.h> 86 #include <sys/socket.h> 87 #include <sys/sysctl.h> 88 89 #include <net/if.h> 90 91 pool_cache_t mb_cache; /* mbuf cache */ 92 static pool_cache_t mcl_cache; /* mbuf cluster cache */ 93 94 struct mbstat mbstat; 95 int max_linkhdr; 96 int max_protohdr; 97 int max_hdr; 98 int max_datalen; 99 100 static void mb_drain(void *, int); 101 static int mb_ctor(void *, void *, int); 102 103 static void sysctl_kern_mbuf_setup(void); 104 105 static struct sysctllog *mbuf_sysctllog; 106 107 static struct mbuf *m_copy_internal(struct mbuf *, int, int, int, bool); 108 static struct mbuf *m_split_internal(struct mbuf *, int, int, bool); 109 static int m_copyback_internal(struct mbuf **, int, int, const void *, 110 int, int); 111 112 /* Flags for m_copyback_internal. */ 113 #define CB_COPYBACK 0x0001 /* copyback from cp */ 114 #define CB_PRESERVE 0x0002 /* preserve original data */ 115 #define CB_COW 0x0004 /* do copy-on-write */ 116 #define CB_EXTEND 0x0008 /* extend chain */ 117 118 static const char mclpool_warnmsg[] = 119 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 120 121 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 122 123 static percpu_t *mbstat_percpu; 124 125 #ifdef MBUFTRACE 126 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 127 struct mowner unknown_mowners[] = { 128 MOWNER_INIT("unknown", "free"), 129 MOWNER_INIT("unknown", "data"), 130 MOWNER_INIT("unknown", "header"), 131 MOWNER_INIT("unknown", "soname"), 132 MOWNER_INIT("unknown", "soopts"), 133 MOWNER_INIT("unknown", "ftable"), 134 MOWNER_INIT("unknown", "control"), 135 MOWNER_INIT("unknown", "oobdata"), 136 }; 137 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 138 #endif 139 140 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 141 142 #define MCLADDREFERENCE(o, n) \ 143 do { \ 144 KASSERT(((o)->m_flags & M_EXT) != 0); \ 145 KASSERT(((n)->m_flags & M_EXT) == 0); \ 146 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 147 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 148 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 149 (n)->m_ext_ref = (o)->m_ext_ref; \ 150 mowner_ref((n), (n)->m_flags); \ 151 } while (/* CONSTCOND */ 0) 152 153 static int 154 nmbclusters_limit(void) 155 { 156 #if defined(PMAP_MAP_POOLPAGE) 157 /* direct mapping, doesn't use space in kmem_arena */ 158 vsize_t max_size = physmem / 4; 159 #else 160 vsize_t max_size = MIN(physmem / 4, nkmempages / 4); 161 #endif 162 163 max_size = max_size * PAGE_SIZE / MCLBYTES; 164 #ifdef NMBCLUSTERS_MAX 165 max_size = MIN(max_size, NMBCLUSTERS_MAX); 166 #endif 167 168 return max_size; 169 } 170 171 /* 172 * Initialize the mbuf allocator. 173 */ 174 void 175 mbinit(void) 176 { 177 178 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 179 CTASSERT(sizeof(struct mbuf) == MSIZE); 180 181 sysctl_kern_mbuf_setup(); 182 183 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 184 NULL, IPL_VM, mb_ctor, NULL, NULL); 185 KASSERT(mb_cache != NULL); 186 187 mcl_cache = pool_cache_init(mclbytes, COHERENCY_UNIT, 0, 0, "mclpl", 188 NULL, IPL_VM, NULL, NULL, NULL); 189 KASSERT(mcl_cache != NULL); 190 191 pool_cache_set_drain_hook(mb_cache, mb_drain, NULL); 192 pool_cache_set_drain_hook(mcl_cache, mb_drain, NULL); 193 194 /* 195 * Set an arbitrary default limit on the number of mbuf clusters. 196 */ 197 #ifdef NMBCLUSTERS 198 nmbclusters = MIN(NMBCLUSTERS, nmbclusters_limit()); 199 #else 200 nmbclusters = MAX(1024, 201 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 202 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 203 #endif 204 205 /* 206 * Set the hard limit on the mclpool to the number of 207 * mbuf clusters the kernel is to support. Log the limit 208 * reached message max once a minute. 209 */ 210 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 211 212 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 213 214 /* 215 * Set a low water mark for both mbufs and clusters. This should 216 * help ensure that they can be allocated in a memory starvation 217 * situation. This is important for e.g. diskless systems which 218 * must allocate mbufs in order for the pagedaemon to clean pages. 219 */ 220 pool_cache_setlowat(mb_cache, mblowat); 221 pool_cache_setlowat(mcl_cache, mcllowat); 222 223 #ifdef MBUFTRACE 224 { 225 /* 226 * Attach the unknown mowners. 227 */ 228 int i; 229 MOWNER_ATTACH(&revoked_mowner); 230 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 231 i-- > 0; ) 232 MOWNER_ATTACH(&unknown_mowners[i]); 233 } 234 #endif 235 } 236 237 static void 238 mb_drain(void *arg, int flags) 239 { 240 struct domain *dp; 241 const struct protosw *pr; 242 struct ifnet *ifp; 243 int s; 244 245 KERNEL_LOCK(1, NULL); 246 s = splvm(); 247 DOMAIN_FOREACH(dp) { 248 for (pr = dp->dom_protosw; 249 pr < dp->dom_protoswNPROTOSW; pr++) 250 if (pr->pr_drain) 251 (*pr->pr_drain)(); 252 } 253 /* XXX we cannot use psref in H/W interrupt */ 254 if (!cpu_intr_p()) { 255 int bound = curlwp_bind(); 256 IFNET_READER_FOREACH(ifp) { 257 struct psref psref; 258 259 if_acquire(ifp, &psref); 260 261 if (ifp->if_drain) 262 (*ifp->if_drain)(ifp); 263 264 if_release(ifp, &psref); 265 } 266 curlwp_bindx(bound); 267 } 268 splx(s); 269 mbstat.m_drain++; 270 KERNEL_UNLOCK_ONE(NULL); 271 } 272 273 /* 274 * sysctl helper routine for the kern.mbuf subtree. 275 * nmbclusters, mblowat and mcllowat need range 276 * checking and pool tweaking after being reset. 277 */ 278 static int 279 sysctl_kern_mbuf(SYSCTLFN_ARGS) 280 { 281 int error, newval; 282 struct sysctlnode node; 283 284 node = *rnode; 285 node.sysctl_data = &newval; 286 switch (rnode->sysctl_num) { 287 case MBUF_NMBCLUSTERS: 288 case MBUF_MBLOWAT: 289 case MBUF_MCLLOWAT: 290 newval = *(int*)rnode->sysctl_data; 291 break; 292 case MBUF_NMBCLUSTERS_LIMIT: 293 newval = nmbclusters_limit(); 294 break; 295 default: 296 return EOPNOTSUPP; 297 } 298 299 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 300 if (error || newp == NULL) 301 return error; 302 if (newval < 0) 303 return EINVAL; 304 305 switch (node.sysctl_num) { 306 case MBUF_NMBCLUSTERS: 307 if (newval < nmbclusters) 308 return EINVAL; 309 if (newval > nmbclusters_limit()) 310 return EINVAL; 311 nmbclusters = newval; 312 pool_cache_sethardlimit(mcl_cache, nmbclusters, 313 mclpool_warnmsg, 60); 314 break; 315 case MBUF_MBLOWAT: 316 mblowat = newval; 317 pool_cache_setlowat(mb_cache, mblowat); 318 break; 319 case MBUF_MCLLOWAT: 320 mcllowat = newval; 321 pool_cache_setlowat(mcl_cache, mcllowat); 322 break; 323 } 324 325 return 0; 326 } 327 328 #ifdef MBUFTRACE 329 static void 330 mowner_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 331 { 332 struct mowner_counter *mc = v1; 333 struct mowner_user *mo_user = v2; 334 int i; 335 336 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 337 mo_user->mo_counter[i] += mc->mc_counter[i]; 338 } 339 } 340 341 static void 342 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 343 { 344 345 memset(mo_user, 0, sizeof(*mo_user)); 346 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 347 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 348 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 349 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 350 percpu_foreach(mo->mo_counters, mowner_convert_to_user_cb, mo_user); 351 } 352 353 static int 354 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 355 { 356 struct mowner *mo; 357 size_t len = 0; 358 int error = 0; 359 360 if (namelen != 0) 361 return EINVAL; 362 if (newp != NULL) 363 return EPERM; 364 365 LIST_FOREACH(mo, &mowners, mo_link) { 366 struct mowner_user mo_user; 367 368 mowner_convert_to_user(mo, &mo_user); 369 370 if (oldp != NULL) { 371 if (*oldlenp - len < sizeof(mo_user)) { 372 error = ENOMEM; 373 break; 374 } 375 error = copyout(&mo_user, (char *)oldp + len, 376 sizeof(mo_user)); 377 if (error) 378 break; 379 } 380 len += sizeof(mo_user); 381 } 382 383 if (error == 0) 384 *oldlenp = len; 385 386 return error; 387 } 388 #endif /* MBUFTRACE */ 389 390 void 391 mbstat_type_add(int type, int diff) 392 { 393 struct mbstat_cpu *mb; 394 int s; 395 396 s = splvm(); 397 mb = percpu_getref(mbstat_percpu); 398 mb->m_mtypes[type] += diff; 399 percpu_putref(mbstat_percpu); 400 splx(s); 401 } 402 403 static void 404 mbstat_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 405 { 406 struct mbstat_cpu *mbsc = v1; 407 struct mbstat *mbs = v2; 408 int i; 409 410 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 411 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 412 } 413 } 414 415 static void 416 mbstat_convert_to_user(struct mbstat *mbs) 417 { 418 419 memset(mbs, 0, sizeof(*mbs)); 420 mbs->m_drain = mbstat.m_drain; 421 percpu_foreach(mbstat_percpu, mbstat_convert_to_user_cb, mbs); 422 } 423 424 static int 425 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 426 { 427 struct sysctlnode node; 428 struct mbstat mbs; 429 430 mbstat_convert_to_user(&mbs); 431 node = *rnode; 432 node.sysctl_data = &mbs; 433 node.sysctl_size = sizeof(mbs); 434 return sysctl_lookup(SYSCTLFN_CALL(&node)); 435 } 436 437 static void 438 sysctl_kern_mbuf_setup(void) 439 { 440 441 KASSERT(mbuf_sysctllog == NULL); 442 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 443 CTLFLAG_PERMANENT, 444 CTLTYPE_NODE, "mbuf", 445 SYSCTL_DESCR("mbuf control variables"), 446 NULL, 0, NULL, 0, 447 CTL_KERN, KERN_MBUF, CTL_EOL); 448 449 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 450 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 451 CTLTYPE_INT, "msize", 452 SYSCTL_DESCR("mbuf base size"), 453 NULL, msize, NULL, 0, 454 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 455 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 456 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 457 CTLTYPE_INT, "mclbytes", 458 SYSCTL_DESCR("mbuf cluster size"), 459 NULL, mclbytes, NULL, 0, 460 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 461 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 462 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 463 CTLTYPE_INT, "nmbclusters", 464 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 465 sysctl_kern_mbuf, 0, &nmbclusters, 0, 466 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 467 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 468 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 469 CTLTYPE_INT, "mblowat", 470 SYSCTL_DESCR("mbuf low water mark"), 471 sysctl_kern_mbuf, 0, &mblowat, 0, 472 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 473 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 474 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 475 CTLTYPE_INT, "mcllowat", 476 SYSCTL_DESCR("mbuf cluster low water mark"), 477 sysctl_kern_mbuf, 0, &mcllowat, 0, 478 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 479 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 480 CTLFLAG_PERMANENT, 481 CTLTYPE_STRUCT, "stats", 482 SYSCTL_DESCR("mbuf allocation statistics"), 483 sysctl_kern_mbuf_stats, 0, NULL, 0, 484 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 485 #ifdef MBUFTRACE 486 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 487 CTLFLAG_PERMANENT, 488 CTLTYPE_STRUCT, "mowners", 489 SYSCTL_DESCR("Information about mbuf owners"), 490 sysctl_kern_mbuf_mowners, 0, NULL, 0, 491 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 492 #endif 493 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 494 CTLFLAG_PERMANENT|CTLFLAG_READONLY, 495 CTLTYPE_INT, "nmbclusters_limit", 496 SYSCTL_DESCR("Limit of nmbclusters"), 497 sysctl_kern_mbuf, 0, NULL, 0, 498 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS_LIMIT, CTL_EOL); 499 } 500 501 static int 502 mb_ctor(void *arg, void *object, int flags) 503 { 504 struct mbuf *m = object; 505 506 #ifdef POOL_VTOPHYS 507 m->m_paddr = POOL_VTOPHYS(m); 508 #else 509 m->m_paddr = M_PADDR_INVALID; 510 #endif 511 return 0; 512 } 513 514 /* 515 * Add mbuf to the end of a chain 516 */ 517 struct mbuf * 518 m_add(struct mbuf *c, struct mbuf *m) 519 { 520 struct mbuf *n; 521 522 if (c == NULL) 523 return m; 524 525 for (n = c; n->m_next != NULL; n = n->m_next) 526 continue; 527 n->m_next = m; 528 return c; 529 } 530 531 struct mbuf * 532 m_get(int how, int type) 533 { 534 struct mbuf *m; 535 536 KASSERT(type != MT_FREE); 537 538 m = pool_cache_get(mb_cache, 539 how == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : PR_NOWAIT); 540 if (m == NULL) 541 return NULL; 542 KASSERT(((vaddr_t)m->m_dat & PAGE_MASK) + MLEN <= PAGE_SIZE); 543 544 mbstat_type_add(type, 1); 545 546 mowner_init(m, type); 547 m->m_ext_ref = m; /* default */ 548 m->m_type = type; 549 m->m_len = 0; 550 m->m_next = NULL; 551 m->m_nextpkt = NULL; /* default */ 552 m->m_data = m->m_dat; 553 m->m_flags = 0; /* default */ 554 555 return m; 556 } 557 558 struct mbuf * 559 m_gethdr(int how, int type) 560 { 561 struct mbuf *m; 562 563 m = m_get(how, type); 564 if (m == NULL) 565 return NULL; 566 567 m->m_data = m->m_pktdat; 568 m->m_flags = M_PKTHDR; 569 570 m_reset_rcvif(m); 571 m->m_pkthdr.len = 0; 572 m->m_pkthdr.csum_flags = 0; 573 m->m_pkthdr.csum_data = 0; 574 m->m_pkthdr.segsz = 0; 575 m->m_pkthdr.ether_vtag = 0; 576 m->m_pkthdr.pkthdr_flags = 0; 577 SLIST_INIT(&m->m_pkthdr.tags); 578 579 m->m_pkthdr.pattr_class = NULL; 580 m->m_pkthdr.pattr_af = AF_UNSPEC; 581 m->m_pkthdr.pattr_hdr = NULL; 582 583 return m; 584 } 585 586 void 587 m_clget(struct mbuf *m, int how) 588 { 589 m->m_ext_storage.ext_buf = (char *)pool_cache_get_paddr(mcl_cache, 590 how == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : PR_NOWAIT, 591 &m->m_ext_storage.ext_paddr); 592 593 if (m->m_ext_storage.ext_buf == NULL) 594 return; 595 596 KASSERT(((vaddr_t)m->m_ext_storage.ext_buf & PAGE_MASK) + mclbytes 597 <= PAGE_SIZE); 598 599 MCLINITREFERENCE(m); 600 m->m_data = m->m_ext.ext_buf; 601 m->m_flags = (m->m_flags & ~M_EXTCOPYFLAGS) | 602 M_EXT|M_EXT_CLUSTER|M_EXT_RW; 603 m->m_ext.ext_size = MCLBYTES; 604 m->m_ext.ext_free = NULL; 605 m->m_ext.ext_arg = NULL; 606 /* ext_paddr initialized above */ 607 608 mowner_ref(m, M_EXT|M_EXT_CLUSTER); 609 } 610 611 struct mbuf * 612 m_getcl(int how, int type, int flags) 613 { 614 struct mbuf *mp; 615 616 if ((flags & M_PKTHDR) != 0) 617 mp = m_gethdr(how, type); 618 else 619 mp = m_get(how, type); 620 621 if (mp == NULL) 622 return NULL; 623 624 MCLGET(mp, how); 625 if ((mp->m_flags & M_EXT) != 0) 626 return mp; 627 628 m_free(mp); 629 return NULL; 630 } 631 632 /* 633 * Utility function for M_PREPEND. Do *NOT* use it directly. 634 */ 635 struct mbuf * 636 m_prepend(struct mbuf *m, int len, int how) 637 { 638 struct mbuf *mn; 639 640 if (__predict_false(len > MHLEN)) { 641 panic("%s: len > MHLEN", __func__); 642 } 643 644 KASSERT(len != M_COPYALL); 645 mn = m_get(how, m->m_type); 646 if (mn == NULL) { 647 m_freem(m); 648 return NULL; 649 } 650 651 if (m->m_flags & M_PKTHDR) { 652 m_move_pkthdr(mn, m); 653 } else { 654 MCLAIM(mn, m->m_owner); 655 } 656 mn->m_next = m; 657 m = mn; 658 659 if (m->m_flags & M_PKTHDR) { 660 if (len < MHLEN) 661 m_align(m, len); 662 } else { 663 if (len < MLEN) 664 m_align(m, len); 665 } 666 667 m->m_len = len; 668 return m; 669 } 670 671 struct mbuf * 672 m_copym(struct mbuf *m, int off, int len, int wait) 673 { 674 /* Shallow copy on M_EXT. */ 675 return m_copy_internal(m, off, len, wait, false); 676 } 677 678 struct mbuf * 679 m_dup(struct mbuf *m, int off, int len, int wait) 680 { 681 /* Deep copy. */ 682 return m_copy_internal(m, off, len, wait, true); 683 } 684 685 static inline int 686 m_copylen(int len, int copylen) 687 { 688 return (len == M_COPYALL) ? copylen : uimin(len, copylen); 689 } 690 691 static struct mbuf * 692 m_copy_internal(struct mbuf *m, int off0, int len, int wait, bool deep) 693 { 694 struct mbuf *n, **np; 695 int off = off0; 696 struct mbuf *top; 697 int copyhdr = 0; 698 699 if (off < 0 || (len != M_COPYALL && len < 0)) 700 panic("%s: off %d, len %d", __func__, off, len); 701 if (off == 0 && m->m_flags & M_PKTHDR) 702 copyhdr = 1; 703 while (off > 0) { 704 if (m == NULL) 705 panic("%s: m == NULL, off %d", __func__, off); 706 if (off < m->m_len) 707 break; 708 off -= m->m_len; 709 m = m->m_next; 710 } 711 712 np = ⊤ 713 top = NULL; 714 while (len == M_COPYALL || len > 0) { 715 if (m == NULL) { 716 if (len != M_COPYALL) 717 panic("%s: m == NULL, len %d [!COPYALL]", 718 __func__, len); 719 break; 720 } 721 722 n = m_get(wait, m->m_type); 723 *np = n; 724 if (n == NULL) 725 goto nospace; 726 MCLAIM(n, m->m_owner); 727 728 if (copyhdr) { 729 m_copy_pkthdr(n, m); 730 if (len == M_COPYALL) 731 n->m_pkthdr.len -= off0; 732 else 733 n->m_pkthdr.len = len; 734 copyhdr = 0; 735 } 736 n->m_len = m_copylen(len, m->m_len - off); 737 738 if (m->m_flags & M_EXT) { 739 if (!deep) { 740 n->m_data = m->m_data + off; 741 MCLADDREFERENCE(m, n); 742 } else { 743 /* 744 * We don't care if MCLGET fails. n->m_len is 745 * recomputed and handles that. 746 */ 747 MCLGET(n, wait); 748 n->m_len = 0; 749 n->m_len = M_TRAILINGSPACE(n); 750 n->m_len = m_copylen(len, n->m_len); 751 n->m_len = uimin(n->m_len, m->m_len - off); 752 memcpy(mtod(n, void *), mtod(m, char *) + off, 753 (unsigned)n->m_len); 754 } 755 } else { 756 memcpy(mtod(n, void *), mtod(m, char *) + off, 757 (unsigned)n->m_len); 758 } 759 760 if (len != M_COPYALL) 761 len -= n->m_len; 762 off += n->m_len; 763 764 KASSERT(off <= m->m_len); 765 766 if (off == m->m_len) { 767 m = m->m_next; 768 off = 0; 769 } 770 np = &n->m_next; 771 } 772 773 return top; 774 775 nospace: 776 m_freem(top); 777 return NULL; 778 } 779 780 /* 781 * Copy an entire packet, including header (which must be present). 782 * An optimization of the common case 'm_copym(m, 0, M_COPYALL, how)'. 783 */ 784 struct mbuf * 785 m_copypacket(struct mbuf *m, int how) 786 { 787 struct mbuf *top, *n, *o; 788 789 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { 790 panic("%s: no header (m = %p)", __func__, m); 791 } 792 793 n = m_get(how, m->m_type); 794 top = n; 795 if (!n) 796 goto nospace; 797 798 MCLAIM(n, m->m_owner); 799 m_copy_pkthdr(n, m); 800 n->m_len = m->m_len; 801 if (m->m_flags & M_EXT) { 802 n->m_data = m->m_data; 803 MCLADDREFERENCE(m, n); 804 } else { 805 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 806 } 807 808 m = m->m_next; 809 while (m) { 810 o = m_get(how, m->m_type); 811 if (!o) 812 goto nospace; 813 814 MCLAIM(o, m->m_owner); 815 n->m_next = o; 816 n = n->m_next; 817 818 n->m_len = m->m_len; 819 if (m->m_flags & M_EXT) { 820 n->m_data = m->m_data; 821 MCLADDREFERENCE(m, n); 822 } else { 823 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 824 } 825 826 m = m->m_next; 827 } 828 return top; 829 830 nospace: 831 m_freem(top); 832 return NULL; 833 } 834 835 void 836 m_copydata(struct mbuf *m, int off, int len, void *cp) 837 { 838 unsigned int count; 839 struct mbuf *m0 = m; 840 int len0 = len; 841 int off0 = off; 842 void *cp0 = cp; 843 844 KASSERT(len != M_COPYALL); 845 if (off < 0 || len < 0) 846 panic("m_copydata: off %d, len %d", off, len); 847 while (off > 0) { 848 if (m == NULL) 849 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)", 850 m0, len0, off0, cp0, off, off0 - off); 851 if (off < m->m_len) 852 break; 853 off -= m->m_len; 854 m = m->m_next; 855 } 856 while (len > 0) { 857 if (m == NULL) 858 panic("m_copydata(%p,%d,%d,%p): " 859 "m=NULL, off=%d (%d), len=%d (%d)", 860 m0, len0, off0, cp0, 861 off, off0 - off, len, len0 - len); 862 count = uimin(m->m_len - off, len); 863 memcpy(cp, mtod(m, char *) + off, count); 864 len -= count; 865 cp = (char *)cp + count; 866 off = 0; 867 m = m->m_next; 868 } 869 } 870 871 /* 872 * Concatenate mbuf chain n to m. 873 * n might be copied into m (when n->m_len is small), therefore data portion of 874 * n could be copied into an mbuf of different mbuf type. 875 * Any m_pkthdr is not updated. 876 */ 877 void 878 m_cat(struct mbuf *m, struct mbuf *n) 879 { 880 881 while (m->m_next) 882 m = m->m_next; 883 while (n) { 884 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 885 /* just join the two chains */ 886 m->m_next = n; 887 return; 888 } 889 /* splat the data from one into the other */ 890 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 891 (u_int)n->m_len); 892 m->m_len += n->m_len; 893 n = m_free(n); 894 } 895 } 896 897 void 898 m_adj(struct mbuf *mp, int req_len) 899 { 900 int len = req_len; 901 struct mbuf *m; 902 int count; 903 904 if ((m = mp) == NULL) 905 return; 906 if (len >= 0) { 907 /* 908 * Trim from head. 909 */ 910 while (m != NULL && len > 0) { 911 if (m->m_len <= len) { 912 len -= m->m_len; 913 m->m_len = 0; 914 m = m->m_next; 915 } else { 916 m->m_len -= len; 917 m->m_data += len; 918 len = 0; 919 } 920 } 921 if (mp->m_flags & M_PKTHDR) 922 mp->m_pkthdr.len -= (req_len - len); 923 } else { 924 /* 925 * Trim from tail. Scan the mbuf chain, 926 * calculating its length and finding the last mbuf. 927 * If the adjustment only affects this mbuf, then just 928 * adjust and return. Otherwise, rescan and truncate 929 * after the remaining size. 930 */ 931 len = -len; 932 count = 0; 933 for (;;) { 934 count += m->m_len; 935 if (m->m_next == NULL) 936 break; 937 m = m->m_next; 938 } 939 if (m->m_len >= len) { 940 m->m_len -= len; 941 if (mp->m_flags & M_PKTHDR) 942 mp->m_pkthdr.len -= len; 943 return; 944 } 945 946 count -= len; 947 if (count < 0) 948 count = 0; 949 950 /* 951 * Correct length for chain is "count". 952 * Find the mbuf with last data, adjust its length, 953 * and toss data from remaining mbufs on chain. 954 */ 955 m = mp; 956 if (m->m_flags & M_PKTHDR) 957 m->m_pkthdr.len = count; 958 for (; m; m = m->m_next) { 959 if (m->m_len >= count) { 960 m->m_len = count; 961 break; 962 } 963 count -= m->m_len; 964 } 965 if (m) { 966 while (m->m_next) 967 (m = m->m_next)->m_len = 0; 968 } 969 } 970 } 971 972 /* 973 * m_ensure_contig: rearrange an mbuf chain that given length of bytes 974 * would be contiguous and in the data area of an mbuf (therefore, mtod() 975 * would work for a structure of given length). 976 * 977 * => On success, returns true and the resulting mbuf chain; false otherwise. 978 * => The mbuf chain may change, but is always preserved valid. 979 */ 980 bool 981 m_ensure_contig(struct mbuf **m0, int len) 982 { 983 struct mbuf *n = *m0, *m; 984 size_t count, space; 985 986 KASSERT(len != M_COPYALL); 987 /* 988 * If first mbuf has no cluster, and has room for len bytes 989 * without shifting current data, pullup into it, 990 * otherwise allocate a new mbuf to prepend to the chain. 991 */ 992 if ((n->m_flags & M_EXT) == 0 && 993 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 994 if (n->m_len >= len) { 995 return true; 996 } 997 m = n; 998 n = n->m_next; 999 len -= m->m_len; 1000 } else { 1001 if (len > MHLEN) { 1002 return false; 1003 } 1004 m = m_get(M_DONTWAIT, n->m_type); 1005 if (m == NULL) { 1006 return false; 1007 } 1008 MCLAIM(m, n->m_owner); 1009 if (n->m_flags & M_PKTHDR) { 1010 m_move_pkthdr(m, n); 1011 } 1012 } 1013 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1014 do { 1015 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len); 1016 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1017 (unsigned)count); 1018 len -= count; 1019 m->m_len += count; 1020 n->m_len -= count; 1021 space -= count; 1022 if (n->m_len) 1023 n->m_data += count; 1024 else 1025 n = m_free(n); 1026 } while (len > 0 && n); 1027 1028 m->m_next = n; 1029 *m0 = m; 1030 1031 return len <= 0; 1032 } 1033 1034 /* 1035 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error. 1036 */ 1037 struct mbuf * 1038 m_pullup(struct mbuf *n, int len) 1039 { 1040 struct mbuf *m = n; 1041 1042 KASSERT(len != M_COPYALL); 1043 if (!m_ensure_contig(&m, len)) { 1044 KASSERT(m != NULL); 1045 m_freem(m); 1046 m = NULL; 1047 } 1048 return m; 1049 } 1050 1051 /* 1052 * ensure that [off, off + len) is contiguous on the mbuf chain "m". 1053 * packet chain before "off" is kept untouched. 1054 * if offp == NULL, the target will start at <retval, 0> on resulting chain. 1055 * if offp != NULL, the target will start at <retval, *offp> on resulting chain. 1056 * 1057 * on error return (NULL return value), original "m" will be freed. 1058 * 1059 * XXX M_TRAILINGSPACE/M_LEADINGSPACE on shared cluster (sharedcluster) 1060 */ 1061 struct mbuf * 1062 m_pulldown(struct mbuf *m, int off, int len, int *offp) 1063 { 1064 struct mbuf *n, *o; 1065 int hlen, tlen, olen; 1066 int sharedcluster; 1067 1068 /* Check invalid arguments. */ 1069 if (m == NULL) 1070 panic("%s: m == NULL", __func__); 1071 if (len > MCLBYTES) { 1072 m_freem(m); 1073 return NULL; 1074 } 1075 1076 n = m; 1077 while (n != NULL && off > 0) { 1078 if (n->m_len > off) 1079 break; 1080 off -= n->m_len; 1081 n = n->m_next; 1082 } 1083 /* Be sure to point non-empty mbuf. */ 1084 while (n != NULL && n->m_len == 0) 1085 n = n->m_next; 1086 if (!n) { 1087 m_freem(m); 1088 return NULL; /* mbuf chain too short */ 1089 } 1090 1091 sharedcluster = M_READONLY(n); 1092 1093 /* 1094 * The target data is on <n, off>. If we got enough data on the mbuf 1095 * "n", we're done. 1096 */ 1097 #ifdef __NO_STRICT_ALIGNMENT 1098 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster) 1099 #else 1100 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster && 1101 ALIGNED_POINTER((mtod(n, char *) + off), uint32_t)) 1102 #endif 1103 goto ok; 1104 1105 /* 1106 * When (len <= n->m_len - off) and (off != 0), it is a special case. 1107 * Len bytes from <n, off> sit in single mbuf, but the caller does 1108 * not like the starting position (off). 1109 * 1110 * Chop the current mbuf into two pieces, set off to 0. 1111 */ 1112 if (len <= n->m_len - off) { 1113 struct mbuf *mlast; 1114 1115 o = m_dup(n, off, n->m_len - off, M_DONTWAIT); 1116 if (o == NULL) { 1117 m_freem(m); 1118 return NULL; /* ENOBUFS */ 1119 } 1120 KASSERT(o->m_len >= len); 1121 for (mlast = o; mlast->m_next != NULL; mlast = mlast->m_next) 1122 ; 1123 n->m_len = off; 1124 mlast->m_next = n->m_next; 1125 n->m_next = o; 1126 n = o; 1127 off = 0; 1128 goto ok; 1129 } 1130 1131 /* 1132 * We need to take hlen from <n, off> and tlen from <n->m_next, 0>, 1133 * and construct contiguous mbuf with m_len == len. 1134 * 1135 * Note that hlen + tlen == len, and tlen > 0. 1136 */ 1137 hlen = n->m_len - off; 1138 tlen = len - hlen; 1139 1140 /* 1141 * Ensure that we have enough trailing data on mbuf chain. If not, 1142 * we can do nothing about the chain. 1143 */ 1144 olen = 0; 1145 for (o = n->m_next; o != NULL; o = o->m_next) 1146 olen += o->m_len; 1147 if (hlen + olen < len) { 1148 m_freem(m); 1149 return NULL; /* mbuf chain too short */ 1150 } 1151 1152 /* 1153 * Easy cases first. We need to use m_copydata() to get data from 1154 * <n->m_next, 0>. 1155 */ 1156 if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen && 1157 !sharedcluster) { 1158 m_copydata(n->m_next, 0, tlen, mtod(n, char *) + n->m_len); 1159 n->m_len += tlen; 1160 m_adj(n->m_next, tlen); 1161 goto ok; 1162 } 1163 if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen && 1164 #ifndef __NO_STRICT_ALIGNMENT 1165 ALIGNED_POINTER((n->m_next->m_data - hlen), uint32_t) && 1166 #endif 1167 !sharedcluster && n->m_next->m_len >= tlen) { 1168 n->m_next->m_data -= hlen; 1169 n->m_next->m_len += hlen; 1170 memcpy(mtod(n->m_next, void *), mtod(n, char *) + off, hlen); 1171 n->m_len -= hlen; 1172 n = n->m_next; 1173 off = 0; 1174 goto ok; 1175 } 1176 1177 /* 1178 * Now, we need to do the hard way. Don't copy as there's no room 1179 * on both ends. 1180 */ 1181 o = m_get(M_DONTWAIT, m->m_type); 1182 if (o && len > MLEN) { 1183 MCLGET(o, M_DONTWAIT); 1184 if ((o->m_flags & M_EXT) == 0) { 1185 m_free(o); 1186 o = NULL; 1187 } 1188 } 1189 if (!o) { 1190 m_freem(m); 1191 return NULL; /* ENOBUFS */ 1192 } 1193 /* get hlen from <n, off> into <o, 0> */ 1194 o->m_len = hlen; 1195 memcpy(mtod(o, void *), mtod(n, char *) + off, hlen); 1196 n->m_len -= hlen; 1197 /* get tlen from <n->m_next, 0> into <o, hlen> */ 1198 m_copydata(n->m_next, 0, tlen, mtod(o, char *) + o->m_len); 1199 o->m_len += tlen; 1200 m_adj(n->m_next, tlen); 1201 o->m_next = n->m_next; 1202 n->m_next = o; 1203 n = o; 1204 off = 0; 1205 1206 ok: 1207 if (offp) 1208 *offp = off; 1209 return n; 1210 } 1211 1212 /* 1213 * Like m_pullup(), except a new mbuf is always allocated, and we allow 1214 * the amount of empty space before the data in the new mbuf to be specified 1215 * (in the event that the caller expects to prepend later). 1216 */ 1217 struct mbuf * 1218 m_copyup(struct mbuf *n, int len, int dstoff) 1219 { 1220 struct mbuf *m; 1221 int count, space; 1222 1223 KASSERT(len != M_COPYALL); 1224 if (len > ((int)MHLEN - dstoff)) 1225 goto bad; 1226 m = m_get(M_DONTWAIT, n->m_type); 1227 if (m == NULL) 1228 goto bad; 1229 MCLAIM(m, n->m_owner); 1230 if (n->m_flags & M_PKTHDR) { 1231 m_move_pkthdr(m, n); 1232 } 1233 m->m_data += dstoff; 1234 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1235 do { 1236 count = uimin(uimin(uimax(len, max_protohdr), space), n->m_len); 1237 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1238 (unsigned)count); 1239 len -= count; 1240 m->m_len += count; 1241 n->m_len -= count; 1242 space -= count; 1243 if (n->m_len) 1244 n->m_data += count; 1245 else 1246 n = m_free(n); 1247 } while (len > 0 && n); 1248 if (len > 0) { 1249 (void) m_free(m); 1250 goto bad; 1251 } 1252 m->m_next = n; 1253 return m; 1254 bad: 1255 m_freem(n); 1256 return NULL; 1257 } 1258 1259 struct mbuf * 1260 m_split(struct mbuf *m0, int len, int wait) 1261 { 1262 return m_split_internal(m0, len, wait, true); 1263 } 1264 1265 static struct mbuf * 1266 m_split_internal(struct mbuf *m0, int len0, int wait, bool copyhdr) 1267 { 1268 struct mbuf *m, *n; 1269 unsigned len = len0, remain, len_save; 1270 1271 KASSERT(len0 != M_COPYALL); 1272 for (m = m0; m && len > m->m_len; m = m->m_next) 1273 len -= m->m_len; 1274 if (m == NULL) 1275 return NULL; 1276 1277 remain = m->m_len - len; 1278 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1279 n = m_gethdr(wait, m0->m_type); 1280 if (n == NULL) 1281 return NULL; 1282 1283 MCLAIM(n, m0->m_owner); 1284 m_copy_rcvif(n, m0); 1285 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1286 len_save = m0->m_pkthdr.len; 1287 m0->m_pkthdr.len = len0; 1288 1289 if (m->m_flags & M_EXT) 1290 goto extpacket; 1291 1292 if (remain > MHLEN) { 1293 /* m can't be the lead packet */ 1294 m_align(n, 0); 1295 n->m_len = 0; 1296 n->m_next = m_split(m, len, wait); 1297 if (n->m_next == NULL) { 1298 (void)m_free(n); 1299 m0->m_pkthdr.len = len_save; 1300 return NULL; 1301 } 1302 return n; 1303 } else { 1304 m_align(n, remain); 1305 } 1306 } else if (remain == 0) { 1307 n = m->m_next; 1308 m->m_next = NULL; 1309 return n; 1310 } else { 1311 n = m_get(wait, m->m_type); 1312 if (n == NULL) 1313 return NULL; 1314 MCLAIM(n, m->m_owner); 1315 m_align(n, remain); 1316 } 1317 1318 extpacket: 1319 if (m->m_flags & M_EXT) { 1320 n->m_data = m->m_data + len; 1321 MCLADDREFERENCE(m, n); 1322 } else { 1323 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1324 } 1325 1326 n->m_len = remain; 1327 m->m_len = len; 1328 n->m_next = m->m_next; 1329 m->m_next = NULL; 1330 return n; 1331 } 1332 1333 /* 1334 * Routine to copy from device local memory into mbufs. 1335 */ 1336 struct mbuf * 1337 m_devget(char *buf, int totlen, int off, struct ifnet *ifp) 1338 { 1339 struct mbuf *m; 1340 struct mbuf *top = NULL, **mp = ⊤ 1341 char *cp, *epkt; 1342 int len; 1343 1344 cp = buf; 1345 epkt = cp + totlen; 1346 if (off) { 1347 /* 1348 * If 'off' is non-zero, packet is trailer-encapsulated, 1349 * so we have to skip the type and length fields. 1350 */ 1351 cp += off + 2 * sizeof(uint16_t); 1352 totlen -= 2 * sizeof(uint16_t); 1353 } 1354 1355 m = m_gethdr(M_DONTWAIT, MT_DATA); 1356 if (m == NULL) 1357 return NULL; 1358 m_set_rcvif(m, ifp); 1359 m->m_pkthdr.len = totlen; 1360 m->m_len = MHLEN; 1361 1362 while (totlen > 0) { 1363 if (top) { 1364 m = m_get(M_DONTWAIT, MT_DATA); 1365 if (m == NULL) { 1366 m_freem(top); 1367 return NULL; 1368 } 1369 m->m_len = MLEN; 1370 } 1371 1372 len = uimin(totlen, epkt - cp); 1373 1374 if (len >= MINCLSIZE) { 1375 MCLGET(m, M_DONTWAIT); 1376 if ((m->m_flags & M_EXT) == 0) { 1377 m_free(m); 1378 m_freem(top); 1379 return NULL; 1380 } 1381 m->m_len = len = uimin(len, MCLBYTES); 1382 } else { 1383 /* 1384 * Place initial small packet/header at end of mbuf. 1385 */ 1386 if (len < m->m_len) { 1387 if (top == 0 && len + max_linkhdr <= m->m_len) 1388 m->m_data += max_linkhdr; 1389 m->m_len = len; 1390 } else 1391 len = m->m_len; 1392 } 1393 1394 memcpy(mtod(m, void *), cp, (size_t)len); 1395 1396 cp += len; 1397 *mp = m; 1398 mp = &m->m_next; 1399 totlen -= len; 1400 if (cp == epkt) 1401 cp = buf; 1402 } 1403 1404 return top; 1405 } 1406 1407 /* 1408 * Copy data from a buffer back into the indicated mbuf chain, 1409 * starting "off" bytes from the beginning, extending the mbuf 1410 * chain if necessary. 1411 */ 1412 void 1413 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1414 { 1415 #if defined(DEBUG) 1416 struct mbuf *origm = m0; 1417 int error; 1418 #endif 1419 1420 if (m0 == NULL) 1421 return; 1422 1423 #if defined(DEBUG) 1424 error = 1425 #endif 1426 m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_EXTEND, 1427 M_DONTWAIT); 1428 1429 #if defined(DEBUG) 1430 if (error != 0 || (m0 != NULL && origm != m0)) 1431 panic("m_copyback"); 1432 #endif 1433 } 1434 1435 struct mbuf * 1436 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1437 { 1438 int error; 1439 1440 /* don't support chain expansion */ 1441 KASSERT(len != M_COPYALL); 1442 KDASSERT(off + len <= m_length(m0)); 1443 1444 error = m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_COW, 1445 how); 1446 if (error) { 1447 /* 1448 * no way to recover from partial success. 1449 * just free the chain. 1450 */ 1451 m_freem(m0); 1452 return NULL; 1453 } 1454 return m0; 1455 } 1456 1457 int 1458 m_makewritable(struct mbuf **mp, int off, int len, int how) 1459 { 1460 int error; 1461 #if defined(DEBUG) 1462 int origlen = m_length(*mp); 1463 #endif 1464 1465 error = m_copyback_internal(mp, off, len, NULL, CB_PRESERVE|CB_COW, 1466 how); 1467 if (error) 1468 return error; 1469 1470 #if defined(DEBUG) 1471 int reslen = 0; 1472 for (struct mbuf *n = *mp; n; n = n->m_next) 1473 reslen += n->m_len; 1474 if (origlen != reslen) 1475 panic("m_makewritable: length changed"); 1476 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1477 panic("m_makewritable: inconsist"); 1478 #endif 1479 1480 return 0; 1481 } 1482 1483 static int 1484 m_copyback_internal(struct mbuf **mp0, int off, int len, const void *vp, 1485 int flags, int how) 1486 { 1487 int mlen; 1488 struct mbuf *m, *n; 1489 struct mbuf **mp; 1490 int totlen = 0; 1491 const char *cp = vp; 1492 1493 KASSERT(mp0 != NULL); 1494 KASSERT(*mp0 != NULL); 1495 KASSERT((flags & CB_PRESERVE) == 0 || cp == NULL); 1496 KASSERT((flags & CB_COPYBACK) == 0 || cp != NULL); 1497 1498 if (len == M_COPYALL) 1499 len = m_length(*mp0) - off; 1500 1501 /* 1502 * we don't bother to update "totlen" in the case of CB_COW, 1503 * assuming that CB_EXTEND and CB_COW are exclusive. 1504 */ 1505 1506 KASSERT((~flags & (CB_EXTEND|CB_COW)) != 0); 1507 1508 mp = mp0; 1509 m = *mp; 1510 while (off > (mlen = m->m_len)) { 1511 off -= mlen; 1512 totlen += mlen; 1513 if (m->m_next == NULL) { 1514 int tspace; 1515 extend: 1516 if ((flags & CB_EXTEND) == 0) 1517 goto out; 1518 1519 /* 1520 * try to make some space at the end of "m". 1521 */ 1522 1523 mlen = m->m_len; 1524 if (off + len >= MINCLSIZE && 1525 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1526 MCLGET(m, how); 1527 } 1528 tspace = M_TRAILINGSPACE(m); 1529 if (tspace > 0) { 1530 tspace = uimin(tspace, off + len); 1531 KASSERT(tspace > 0); 1532 memset(mtod(m, char *) + m->m_len, 0, 1533 uimin(off, tspace)); 1534 m->m_len += tspace; 1535 off += mlen; 1536 totlen -= mlen; 1537 continue; 1538 } 1539 1540 /* 1541 * need to allocate an mbuf. 1542 */ 1543 1544 if (off + len >= MINCLSIZE) { 1545 n = m_getcl(how, m->m_type, 0); 1546 } else { 1547 n = m_get(how, m->m_type); 1548 } 1549 if (n == NULL) { 1550 goto out; 1551 } 1552 n->m_len = uimin(M_TRAILINGSPACE(n), off + len); 1553 memset(mtod(n, char *), 0, uimin(n->m_len, off)); 1554 m->m_next = n; 1555 } 1556 mp = &m->m_next; 1557 m = m->m_next; 1558 } 1559 while (len > 0) { 1560 mlen = m->m_len - off; 1561 if (mlen != 0 && M_READONLY(m)) { 1562 /* 1563 * This mbuf is read-only. Allocate a new writable 1564 * mbuf and try again. 1565 */ 1566 char *datap; 1567 int eatlen; 1568 1569 KASSERT((flags & CB_COW) != 0); 1570 1571 /* 1572 * if we're going to write into the middle of 1573 * a mbuf, split it first. 1574 */ 1575 if (off > 0) { 1576 n = m_split_internal(m, off, how, false); 1577 if (n == NULL) 1578 goto enobufs; 1579 m->m_next = n; 1580 mp = &m->m_next; 1581 m = n; 1582 off = 0; 1583 continue; 1584 } 1585 1586 /* 1587 * XXX TODO coalesce into the trailingspace of 1588 * the previous mbuf when possible. 1589 */ 1590 1591 /* 1592 * allocate a new mbuf. copy packet header if needed. 1593 */ 1594 n = m_get(how, m->m_type); 1595 if (n == NULL) 1596 goto enobufs; 1597 MCLAIM(n, m->m_owner); 1598 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1599 m_move_pkthdr(n, m); 1600 n->m_len = MHLEN; 1601 } else { 1602 if (len >= MINCLSIZE) 1603 MCLGET(n, M_DONTWAIT); 1604 n->m_len = 1605 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1606 } 1607 if (n->m_len > len) 1608 n->m_len = len; 1609 1610 /* 1611 * free the region which has been overwritten. 1612 * copying data from old mbufs if requested. 1613 */ 1614 if (flags & CB_PRESERVE) 1615 datap = mtod(n, char *); 1616 else 1617 datap = NULL; 1618 eatlen = n->m_len; 1619 while (m != NULL && M_READONLY(m) && 1620 n->m_type == m->m_type && eatlen > 0) { 1621 mlen = uimin(eatlen, m->m_len); 1622 if (datap) { 1623 m_copydata(m, 0, mlen, datap); 1624 datap += mlen; 1625 } 1626 m->m_data += mlen; 1627 m->m_len -= mlen; 1628 eatlen -= mlen; 1629 if (m->m_len == 0) 1630 *mp = m = m_free(m); 1631 } 1632 if (eatlen > 0) 1633 n->m_len -= eatlen; 1634 n->m_next = m; 1635 *mp = m = n; 1636 continue; 1637 } 1638 mlen = uimin(mlen, len); 1639 if (flags & CB_COPYBACK) { 1640 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1641 cp += mlen; 1642 } 1643 len -= mlen; 1644 mlen += off; 1645 off = 0; 1646 totlen += mlen; 1647 if (len == 0) 1648 break; 1649 if (m->m_next == NULL) { 1650 goto extend; 1651 } 1652 mp = &m->m_next; 1653 m = m->m_next; 1654 } 1655 1656 out: 1657 if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1658 KASSERT((flags & CB_EXTEND) != 0); 1659 m->m_pkthdr.len = totlen; 1660 } 1661 1662 return 0; 1663 1664 enobufs: 1665 return ENOBUFS; 1666 } 1667 1668 /* 1669 * Compress the mbuf chain. Return the new mbuf chain on success, NULL on 1670 * failure. The first mbuf is preserved, and on success the pointer returned 1671 * is the same as the one passed. 1672 */ 1673 struct mbuf * 1674 m_defrag(struct mbuf *m, int how) 1675 { 1676 struct mbuf *m0, *mn, *n; 1677 int sz; 1678 1679 KASSERT((m->m_flags & M_PKTHDR) != 0); 1680 1681 if (m->m_next == NULL) 1682 return m; 1683 1684 /* Defrag to single mbuf if at all possible */ 1685 if ((m->m_flags & M_EXT) == 0 && m->m_pkthdr.len <= MCLBYTES) { 1686 if (m->m_pkthdr.len <= MHLEN) { 1687 if (M_TRAILINGSPACE(m) < (m->m_pkthdr.len - m->m_len)) { 1688 KASSERTMSG(M_LEADINGSPACE(m) + 1689 M_TRAILINGSPACE(m) >= 1690 (m->m_pkthdr.len - m->m_len), 1691 "too small leading %d trailing %d ro? %d" 1692 " pkthdr.len %d mlen %d", 1693 (int)M_LEADINGSPACE(m), 1694 (int)M_TRAILINGSPACE(m), 1695 M_READONLY(m), 1696 m->m_pkthdr.len, m->m_len); 1697 1698 memmove(m->m_pktdat, m->m_data, m->m_len); 1699 m->m_data = m->m_pktdat; 1700 1701 KASSERT(M_TRAILINGSPACE(m) >= 1702 (m->m_pkthdr.len - m->m_len)); 1703 } 1704 } else { 1705 /* Must copy data before adding cluster */ 1706 m0 = m_get(how, MT_DATA); 1707 if (m0 == NULL) 1708 return NULL; 1709 KASSERT(m->m_len <= MHLEN); 1710 m_copydata(m, 0, m->m_len, mtod(m0, void *)); 1711 1712 MCLGET(m, how); 1713 if ((m->m_flags & M_EXT) == 0) { 1714 m_free(m0); 1715 return NULL; 1716 } 1717 memcpy(m->m_data, mtod(m0, void *), m->m_len); 1718 m_free(m0); 1719 } 1720 KASSERT(M_TRAILINGSPACE(m) >= (m->m_pkthdr.len - m->m_len)); 1721 m_copydata(m->m_next, 0, m->m_pkthdr.len - m->m_len, 1722 mtod(m, char *) + m->m_len); 1723 m->m_len = m->m_pkthdr.len; 1724 m_freem(m->m_next); 1725 m->m_next = NULL; 1726 return m; 1727 } 1728 1729 m0 = m_get(how, MT_DATA); 1730 if (m0 == NULL) 1731 return NULL; 1732 mn = m0; 1733 1734 sz = m->m_pkthdr.len - m->m_len; 1735 KASSERT(sz >= 0); 1736 1737 do { 1738 if (sz > MLEN) { 1739 MCLGET(mn, how); 1740 if ((mn->m_flags & M_EXT) == 0) { 1741 m_freem(m0); 1742 return NULL; 1743 } 1744 } 1745 1746 mn->m_len = MIN(sz, MCLBYTES); 1747 1748 m_copydata(m, m->m_pkthdr.len - sz, mn->m_len, 1749 mtod(mn, void *)); 1750 1751 sz -= mn->m_len; 1752 1753 if (sz > 0) { 1754 /* need more mbufs */ 1755 n = m_get(how, MT_DATA); 1756 if (n == NULL) { 1757 m_freem(m0); 1758 return NULL; 1759 } 1760 1761 mn->m_next = n; 1762 mn = n; 1763 } 1764 } while (sz > 0); 1765 1766 m_freem(m->m_next); 1767 m->m_next = m0; 1768 1769 return m; 1770 } 1771 1772 void 1773 m_remove_pkthdr(struct mbuf *m) 1774 { 1775 KASSERT(m->m_flags & M_PKTHDR); 1776 1777 m_tag_delete_chain(m); 1778 m->m_flags &= ~M_PKTHDR; 1779 memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr)); 1780 } 1781 1782 void 1783 m_copy_pkthdr(struct mbuf *to, struct mbuf *from) 1784 { 1785 KASSERT((to->m_flags & M_EXT) == 0); 1786 KASSERT((to->m_flags & M_PKTHDR) == 0 || 1787 SLIST_FIRST(&to->m_pkthdr.tags) == NULL); 1788 KASSERT((from->m_flags & M_PKTHDR) != 0); 1789 1790 to->m_pkthdr = from->m_pkthdr; 1791 to->m_flags = from->m_flags & M_COPYFLAGS; 1792 to->m_data = to->m_pktdat; 1793 1794 SLIST_INIT(&to->m_pkthdr.tags); 1795 m_tag_copy_chain(to, from); 1796 } 1797 1798 void 1799 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1800 { 1801 KASSERT((to->m_flags & M_EXT) == 0); 1802 KASSERT((to->m_flags & M_PKTHDR) == 0 || 1803 SLIST_FIRST(&to->m_pkthdr.tags) == NULL); 1804 KASSERT((from->m_flags & M_PKTHDR) != 0); 1805 1806 to->m_pkthdr = from->m_pkthdr; 1807 to->m_flags = from->m_flags & M_COPYFLAGS; 1808 to->m_data = to->m_pktdat; 1809 1810 from->m_flags &= ~M_PKTHDR; 1811 } 1812 1813 /* 1814 * Set the m_data pointer of a newly-allocated mbuf to place an object of the 1815 * specified size at the end of the mbuf, longword aligned. 1816 */ 1817 void 1818 m_align(struct mbuf *m, int len) 1819 { 1820 int buflen, adjust; 1821 1822 KASSERT(len != M_COPYALL); 1823 KASSERT(M_LEADINGSPACE(m) == 0); 1824 1825 buflen = M_BUFSIZE(m); 1826 1827 KASSERT(len <= buflen); 1828 adjust = buflen - len; 1829 m->m_data += adjust &~ (sizeof(long)-1); 1830 } 1831 1832 /* 1833 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1834 * beginning, continuing for "len" bytes. 1835 */ 1836 int 1837 m_apply(struct mbuf *m, int off, int len, 1838 int (*f)(void *, void *, unsigned int), void *arg) 1839 { 1840 unsigned int count; 1841 int rval; 1842 1843 KASSERT(len != M_COPYALL); 1844 KASSERT(len >= 0); 1845 KASSERT(off >= 0); 1846 1847 while (off > 0) { 1848 KASSERT(m != NULL); 1849 if (off < m->m_len) 1850 break; 1851 off -= m->m_len; 1852 m = m->m_next; 1853 } 1854 while (len > 0) { 1855 KASSERT(m != NULL); 1856 count = uimin(m->m_len - off, len); 1857 1858 rval = (*f)(arg, mtod(m, char *) + off, count); 1859 if (rval) 1860 return rval; 1861 1862 len -= count; 1863 off = 0; 1864 m = m->m_next; 1865 } 1866 1867 return 0; 1868 } 1869 1870 /* 1871 * Return a pointer to mbuf/offset of location in mbuf chain. 1872 */ 1873 struct mbuf * 1874 m_getptr(struct mbuf *m, int loc, int *off) 1875 { 1876 1877 while (loc >= 0) { 1878 /* Normal end of search */ 1879 if (m->m_len > loc) { 1880 *off = loc; 1881 return m; 1882 } 1883 1884 loc -= m->m_len; 1885 1886 if (m->m_next == NULL) { 1887 if (loc == 0) { 1888 /* Point at the end of valid data */ 1889 *off = m->m_len; 1890 return m; 1891 } 1892 return NULL; 1893 } else { 1894 m = m->m_next; 1895 } 1896 } 1897 1898 return NULL; 1899 } 1900 1901 /* 1902 * Release a reference to the mbuf external storage. 1903 * 1904 * => free the mbuf m itself as well. 1905 */ 1906 static void 1907 m_ext_free(struct mbuf *m) 1908 { 1909 const bool embedded = MEXT_ISEMBEDDED(m); 1910 bool dofree = true; 1911 u_int refcnt; 1912 1913 KASSERT((m->m_flags & M_EXT) != 0); 1914 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1915 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1916 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1917 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1918 1919 if (__predict_false(m->m_type == MT_FREE)) { 1920 panic("mbuf %p already freed", m); 1921 } 1922 1923 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1924 refcnt = m->m_ext.ext_refcnt = 0; 1925 } else { 1926 membar_release(); 1927 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1928 } 1929 1930 if (refcnt > 0) { 1931 if (embedded) { 1932 /* 1933 * other mbuf's m_ext_ref still points to us. 1934 */ 1935 dofree = false; 1936 } else { 1937 m->m_ext_ref = m; 1938 } 1939 } else { 1940 /* 1941 * dropping the last reference 1942 */ 1943 membar_acquire(); 1944 if (!embedded) { 1945 m->m_ext.ext_refcnt++; /* XXX */ 1946 m_ext_free(m->m_ext_ref); 1947 m->m_ext_ref = m; 1948 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1949 pool_cache_put_paddr(mcl_cache, 1950 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1951 } else if (m->m_ext.ext_free) { 1952 (*m->m_ext.ext_free)(m, 1953 m->m_ext.ext_buf, m->m_ext.ext_size, 1954 m->m_ext.ext_arg); 1955 /* 1956 * 'm' is already freed by the ext_free callback. 1957 */ 1958 dofree = false; 1959 } else { 1960 free(m->m_ext.ext_buf, 0); 1961 } 1962 } 1963 1964 if (dofree) { 1965 m->m_type = MT_FREE; 1966 m->m_data = NULL; 1967 pool_cache_put(mb_cache, m); 1968 } 1969 } 1970 1971 /* 1972 * Free a single mbuf and associated external storage. Return the 1973 * successor, if any. 1974 */ 1975 struct mbuf * 1976 m_free(struct mbuf *m) 1977 { 1978 struct mbuf *n; 1979 1980 mowner_revoke(m, 1, m->m_flags); 1981 mbstat_type_add(m->m_type, -1); 1982 1983 if (m->m_flags & M_PKTHDR) 1984 m_tag_delete_chain(m); 1985 1986 n = m->m_next; 1987 1988 if (m->m_flags & M_EXT) { 1989 m_ext_free(m); 1990 } else { 1991 if (__predict_false(m->m_type == MT_FREE)) { 1992 panic("mbuf %p already freed", m); 1993 } 1994 m->m_type = MT_FREE; 1995 m->m_data = NULL; 1996 pool_cache_put(mb_cache, m); 1997 } 1998 1999 return n; 2000 } 2001 2002 void 2003 m_freem(struct mbuf *m) 2004 { 2005 if (m == NULL) 2006 return; 2007 do { 2008 m = m_free(m); 2009 } while (m); 2010 } 2011 2012 #if defined(DDB) 2013 void 2014 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 2015 { 2016 char ch; 2017 bool opt_c = false; 2018 bool opt_d = false; 2019 #if NETHER > 0 2020 bool opt_v = false; 2021 const struct mbuf *m0 = NULL; 2022 #endif 2023 int no = 0; 2024 char buf[512]; 2025 2026 while ((ch = *(modif++)) != '\0') { 2027 switch (ch) { 2028 case 'c': 2029 opt_c = true; 2030 break; 2031 case 'd': 2032 opt_d = true; 2033 break; 2034 #if NETHER > 0 2035 case 'v': 2036 opt_v = true; 2037 m0 = m; 2038 break; 2039 #endif 2040 default: 2041 break; 2042 } 2043 } 2044 2045 nextchain: 2046 (*pr)("MBUF(%d) %p\n", no, m); 2047 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 2048 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", 2049 m->m_data, m->m_len, m->m_type, buf); 2050 if (opt_d) { 2051 int i; 2052 unsigned char *p = m->m_data; 2053 2054 (*pr)(" data:"); 2055 2056 for (i = 0; i < m->m_len; i++) { 2057 if (i % 16 == 0) 2058 (*pr)("\n"); 2059 (*pr)(" %02x", p[i]); 2060 } 2061 2062 (*pr)("\n"); 2063 } 2064 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 2065 m->m_nextpkt); 2066 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 2067 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 2068 (int)M_READONLY(m)); 2069 if ((m->m_flags & M_PKTHDR) != 0) { 2070 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 2071 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=%s, csum_data=0x%" 2072 PRIx32 ", segsz=%u\n", 2073 m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m), 2074 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 2075 } 2076 if ((m->m_flags & M_EXT)) { 2077 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 2078 "ext_free=%p, ext_arg=%p\n", 2079 m->m_ext.ext_refcnt, 2080 m->m_ext.ext_buf, m->m_ext.ext_size, 2081 m->m_ext.ext_free, m->m_ext.ext_arg); 2082 } 2083 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 2084 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 2085 vaddr_t eva = sva + m->m_ext.ext_size; 2086 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 2087 int i; 2088 2089 (*pr)(" pages:"); 2090 for (i = 0; i < n; i ++) { 2091 (*pr)(" %p", m->m_ext.ext_pgs[i]); 2092 } 2093 (*pr)("\n"); 2094 } 2095 2096 if (opt_c) { 2097 m = m->m_next; 2098 if (m != NULL) { 2099 no++; 2100 goto nextchain; 2101 } 2102 } 2103 2104 #if NETHER > 0 2105 if (opt_v && m0) 2106 m_examine(m0, AF_ETHER, modif, pr); 2107 #endif 2108 } 2109 #endif /* defined(DDB) */ 2110 2111 #if defined(MBUFTRACE) 2112 void 2113 mowner_init_owner(struct mowner *mo, const char *name, const char *descr) 2114 { 2115 memset(mo, 0, sizeof(*mo)); 2116 strlcpy(mo->mo_name, name, sizeof(mo->mo_name)); 2117 strlcpy(mo->mo_descr, descr, sizeof(mo->mo_descr)); 2118 } 2119 2120 void 2121 mowner_attach(struct mowner *mo) 2122 { 2123 2124 KASSERT(mo->mo_counters == NULL); 2125 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 2126 2127 /* XXX lock */ 2128 LIST_INSERT_HEAD(&mowners, mo, mo_link); 2129 } 2130 2131 void 2132 mowner_detach(struct mowner *mo) 2133 { 2134 2135 KASSERT(mo->mo_counters != NULL); 2136 2137 /* XXX lock */ 2138 LIST_REMOVE(mo, mo_link); 2139 2140 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 2141 mo->mo_counters = NULL; 2142 } 2143 2144 void 2145 mowner_init(struct mbuf *m, int type) 2146 { 2147 struct mowner_counter *mc; 2148 struct mowner *mo; 2149 int s; 2150 2151 m->m_owner = mo = &unknown_mowners[type]; 2152 s = splvm(); 2153 mc = percpu_getref(mo->mo_counters); 2154 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 2155 percpu_putref(mo->mo_counters); 2156 splx(s); 2157 } 2158 2159 void 2160 mowner_ref(struct mbuf *m, int flags) 2161 { 2162 struct mowner *mo = m->m_owner; 2163 struct mowner_counter *mc; 2164 int s; 2165 2166 s = splvm(); 2167 mc = percpu_getref(mo->mo_counters); 2168 if ((flags & M_EXT) != 0) 2169 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 2170 if ((flags & M_EXT_CLUSTER) != 0) 2171 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 2172 percpu_putref(mo->mo_counters); 2173 splx(s); 2174 } 2175 2176 void 2177 mowner_revoke(struct mbuf *m, bool all, int flags) 2178 { 2179 struct mowner *mo = m->m_owner; 2180 struct mowner_counter *mc; 2181 int s; 2182 2183 s = splvm(); 2184 mc = percpu_getref(mo->mo_counters); 2185 if ((flags & M_EXT) != 0) 2186 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 2187 if ((flags & M_EXT_CLUSTER) != 0) 2188 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 2189 if (all) 2190 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 2191 percpu_putref(mo->mo_counters); 2192 splx(s); 2193 if (all) 2194 m->m_owner = &revoked_mowner; 2195 } 2196 2197 static void 2198 mowner_claim(struct mbuf *m, struct mowner *mo) 2199 { 2200 struct mowner_counter *mc; 2201 int flags = m->m_flags; 2202 int s; 2203 2204 s = splvm(); 2205 mc = percpu_getref(mo->mo_counters); 2206 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 2207 if ((flags & M_EXT) != 0) 2208 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 2209 if ((flags & M_EXT_CLUSTER) != 0) 2210 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 2211 percpu_putref(mo->mo_counters); 2212 splx(s); 2213 m->m_owner = mo; 2214 } 2215 2216 void 2217 m_claim(struct mbuf *m, struct mowner *mo) 2218 { 2219 2220 if (m->m_owner == mo || mo == NULL) 2221 return; 2222 2223 mowner_revoke(m, true, m->m_flags); 2224 mowner_claim(m, mo); 2225 } 2226 2227 void 2228 m_claimm(struct mbuf *m, struct mowner *mo) 2229 { 2230 2231 for (; m != NULL; m = m->m_next) 2232 m_claim(m, mo); 2233 } 2234 #endif /* defined(MBUFTRACE) */ 2235 2236 #ifdef DIAGNOSTIC 2237 /* 2238 * Verify that the mbuf chain is not malformed. Used only for diagnostic. 2239 * Panics on error. 2240 */ 2241 void 2242 m_verify_packet(struct mbuf *m) 2243 { 2244 struct mbuf *n = m; 2245 char *low, *high, *dat; 2246 int totlen = 0, len; 2247 2248 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { 2249 panic("%s: mbuf doesn't have M_PKTHDR", __func__); 2250 } 2251 2252 while (n != NULL) { 2253 if (__predict_false(n->m_type == MT_FREE)) { 2254 panic("%s: mbuf already freed (n = %p)", __func__, n); 2255 } 2256 #if 0 2257 /* 2258 * This ought to be a rule of the mbuf API. Unfortunately, 2259 * many places don't respect that rule. 2260 */ 2261 if (__predict_false((n != m) && (n->m_flags & M_PKTHDR) != 0)) { 2262 panic("%s: M_PKTHDR set on secondary mbuf", __func__); 2263 } 2264 #endif 2265 if (__predict_false(n->m_nextpkt != NULL)) { 2266 panic("%s: m_nextpkt not null (m_nextpkt = %p)", 2267 __func__, n->m_nextpkt); 2268 } 2269 2270 dat = n->m_data; 2271 len = n->m_len; 2272 if (__predict_false(len < 0)) { 2273 panic("%s: incorrect length (len = %d)", __func__, len); 2274 } 2275 2276 low = M_BUFADDR(n); 2277 high = low + M_BUFSIZE(n); 2278 if (__predict_false((dat < low) || (dat + len > high))) { 2279 panic("%s: m_data not in packet" 2280 "(dat = %p, len = %d, low = %p, high = %p)", 2281 __func__, dat, len, low, high); 2282 } 2283 2284 totlen += len; 2285 n = n->m_next; 2286 } 2287 2288 if (__predict_false(totlen != m->m_pkthdr.len)) { 2289 panic("%s: inconsistent mbuf length (%d != %d)", __func__, 2290 totlen, m->m_pkthdr.len); 2291 } 2292 } 2293 #endif 2294 2295 struct m_tag * 2296 m_tag_get(int type, int len, int wait) 2297 { 2298 struct m_tag *t; 2299 2300 if (len < 0) 2301 return NULL; 2302 t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait); 2303 if (t == NULL) 2304 return NULL; 2305 t->m_tag_id = type; 2306 t->m_tag_len = len; 2307 return t; 2308 } 2309 2310 void 2311 m_tag_free(struct m_tag *t) 2312 { 2313 free(t, M_PACKET_TAGS); 2314 } 2315 2316 void 2317 m_tag_prepend(struct mbuf *m, struct m_tag *t) 2318 { 2319 KASSERT((m->m_flags & M_PKTHDR) != 0); 2320 SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); 2321 } 2322 2323 void 2324 m_tag_unlink(struct mbuf *m, struct m_tag *t) 2325 { 2326 KASSERT((m->m_flags & M_PKTHDR) != 0); 2327 SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); 2328 } 2329 2330 void 2331 m_tag_delete(struct mbuf *m, struct m_tag *t) 2332 { 2333 m_tag_unlink(m, t); 2334 m_tag_free(t); 2335 } 2336 2337 void 2338 m_tag_delete_chain(struct mbuf *m) 2339 { 2340 struct m_tag *p, *q; 2341 2342 KASSERT((m->m_flags & M_PKTHDR) != 0); 2343 2344 p = SLIST_FIRST(&m->m_pkthdr.tags); 2345 if (p == NULL) 2346 return; 2347 while ((q = SLIST_NEXT(p, m_tag_link)) != NULL) 2348 m_tag_delete(m, q); 2349 m_tag_delete(m, p); 2350 } 2351 2352 struct m_tag * 2353 m_tag_find(const struct mbuf *m, int type) 2354 { 2355 struct m_tag *p; 2356 2357 KASSERT((m->m_flags & M_PKTHDR) != 0); 2358 2359 p = SLIST_FIRST(&m->m_pkthdr.tags); 2360 while (p != NULL) { 2361 if (p->m_tag_id == type) 2362 return p; 2363 p = SLIST_NEXT(p, m_tag_link); 2364 } 2365 return NULL; 2366 } 2367 2368 struct m_tag * 2369 m_tag_copy(struct m_tag *t) 2370 { 2371 struct m_tag *p; 2372 2373 p = m_tag_get(t->m_tag_id, t->m_tag_len, M_NOWAIT); 2374 if (p == NULL) 2375 return NULL; 2376 memcpy(p + 1, t + 1, t->m_tag_len); 2377 return p; 2378 } 2379 2380 /* 2381 * Copy two tag chains. The destination mbuf (to) loses any attached 2382 * tags even if the operation fails. This should not be a problem, as 2383 * m_tag_copy_chain() is typically called with a newly-allocated 2384 * destination mbuf. 2385 */ 2386 int 2387 m_tag_copy_chain(struct mbuf *to, struct mbuf *from) 2388 { 2389 struct m_tag *p, *t, *tprev = NULL; 2390 2391 KASSERT((from->m_flags & M_PKTHDR) != 0); 2392 2393 m_tag_delete_chain(to); 2394 SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) { 2395 t = m_tag_copy(p); 2396 if (t == NULL) { 2397 m_tag_delete_chain(to); 2398 return 0; 2399 } 2400 if (tprev == NULL) 2401 SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link); 2402 else 2403 SLIST_INSERT_AFTER(tprev, t, m_tag_link); 2404 tprev = t; 2405 } 2406 return 1; 2407 } 2408