1 /* $NetBSD: uipc_mbuf.c,v 1.251 2023/04/12 06:48:08 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 1999, 2001, 2018 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and Maxime Villard. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.251 2023/04/12 06:48:08 riastradh Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "opt_mbuftrace.h" 69 #include "opt_nmbclusters.h" 70 #include "opt_ddb.h" 71 #include "ether.h" 72 #endif 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/atomic.h> 77 #include <sys/cpu.h> 78 #include <sys/proc.h> 79 #include <sys/mbuf.h> 80 #include <sys/kernel.h> 81 #include <sys/syslog.h> 82 #include <sys/domain.h> 83 #include <sys/protosw.h> 84 #include <sys/percpu.h> 85 #include <sys/pool.h> 86 #include <sys/socket.h> 87 #include <sys/sysctl.h> 88 89 #include <net/if.h> 90 91 pool_cache_t mb_cache; /* mbuf cache */ 92 static pool_cache_t mcl_cache; /* mbuf cluster cache */ 93 94 struct mbstat mbstat; 95 int max_linkhdr; 96 int max_protohdr; 97 int max_hdr; 98 int max_datalen; 99 100 static void mb_drain(void *, int); 101 static int mb_ctor(void *, void *, int); 102 103 static void sysctl_kern_mbuf_setup(void); 104 105 static struct sysctllog *mbuf_sysctllog; 106 107 static struct mbuf *m_copy_internal(struct mbuf *, int, int, int, bool); 108 static struct mbuf *m_split_internal(struct mbuf *, int, int, bool); 109 static int m_copyback_internal(struct mbuf **, int, int, const void *, 110 int, int); 111 112 /* Flags for m_copyback_internal. */ 113 #define CB_COPYBACK 0x0001 /* copyback from cp */ 114 #define CB_PRESERVE 0x0002 /* preserve original data */ 115 #define CB_COW 0x0004 /* do copy-on-write */ 116 #define CB_EXTEND 0x0008 /* extend chain */ 117 118 static const char mclpool_warnmsg[] = 119 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 120 121 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 122 123 static percpu_t *mbstat_percpu; 124 125 #ifdef MBUFTRACE 126 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 127 struct mowner unknown_mowners[] = { 128 MOWNER_INIT("unknown", "free"), 129 MOWNER_INIT("unknown", "data"), 130 MOWNER_INIT("unknown", "header"), 131 MOWNER_INIT("unknown", "soname"), 132 MOWNER_INIT("unknown", "soopts"), 133 MOWNER_INIT("unknown", "ftable"), 134 MOWNER_INIT("unknown", "control"), 135 MOWNER_INIT("unknown", "oobdata"), 136 }; 137 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 138 #endif 139 140 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 141 142 #define MCLADDREFERENCE(o, n) \ 143 do { \ 144 KASSERT(((o)->m_flags & M_EXT) != 0); \ 145 KASSERT(((n)->m_flags & M_EXT) == 0); \ 146 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 147 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 148 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 149 (n)->m_ext_ref = (o)->m_ext_ref; \ 150 mowner_ref((n), (n)->m_flags); \ 151 } while (/* CONSTCOND */ 0) 152 153 static int 154 nmbclusters_limit(void) 155 { 156 #if defined(PMAP_MAP_POOLPAGE) 157 /* direct mapping, doesn't use space in kmem_arena */ 158 vsize_t max_size = physmem / 4; 159 #else 160 vsize_t max_size = MIN(physmem / 4, nkmempages / 4); 161 #endif 162 163 max_size = max_size * PAGE_SIZE / MCLBYTES; 164 #ifdef NMBCLUSTERS_MAX 165 max_size = MIN(max_size, NMBCLUSTERS_MAX); 166 #endif 167 168 return max_size; 169 } 170 171 /* 172 * Initialize the mbuf allocator. 173 */ 174 void 175 mbinit(void) 176 { 177 178 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 179 CTASSERT(sizeof(struct mbuf) == MSIZE); 180 181 sysctl_kern_mbuf_setup(); 182 183 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 184 NULL, IPL_VM, mb_ctor, NULL, NULL); 185 KASSERT(mb_cache != NULL); 186 187 mcl_cache = pool_cache_init(mclbytes, COHERENCY_UNIT, 0, 0, "mclpl", 188 NULL, IPL_VM, NULL, NULL, NULL); 189 KASSERT(mcl_cache != NULL); 190 191 pool_cache_set_drain_hook(mb_cache, mb_drain, NULL); 192 pool_cache_set_drain_hook(mcl_cache, mb_drain, NULL); 193 194 /* 195 * Set an arbitrary default limit on the number of mbuf clusters. 196 */ 197 #ifdef NMBCLUSTERS 198 nmbclusters = MIN(NMBCLUSTERS, nmbclusters_limit()); 199 #else 200 nmbclusters = MAX(1024, 201 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 202 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 203 #endif 204 205 /* 206 * Set the hard limit on the mclpool to the number of 207 * mbuf clusters the kernel is to support. Log the limit 208 * reached message max once a minute. 209 */ 210 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 211 212 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 213 214 /* 215 * Set a low water mark for both mbufs and clusters. This should 216 * help ensure that they can be allocated in a memory starvation 217 * situation. This is important for e.g. diskless systems which 218 * must allocate mbufs in order for the pagedaemon to clean pages. 219 */ 220 pool_cache_setlowat(mb_cache, mblowat); 221 pool_cache_setlowat(mcl_cache, mcllowat); 222 223 #ifdef MBUFTRACE 224 { 225 /* 226 * Attach the unknown mowners. 227 */ 228 int i; 229 MOWNER_ATTACH(&revoked_mowner); 230 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 231 i-- > 0; ) 232 MOWNER_ATTACH(&unknown_mowners[i]); 233 } 234 #endif 235 } 236 237 static void 238 mb_drain(void *arg, int flags) 239 { 240 struct domain *dp; 241 const struct protosw *pr; 242 struct ifnet *ifp; 243 int s; 244 245 KERNEL_LOCK(1, NULL); 246 s = splvm(); 247 DOMAIN_FOREACH(dp) { 248 for (pr = dp->dom_protosw; 249 pr < dp->dom_protoswNPROTOSW; pr++) 250 if (pr->pr_drain) 251 (*pr->pr_drain)(); 252 } 253 /* XXX we cannot use psref in H/W interrupt */ 254 if (!cpu_intr_p()) { 255 int bound = curlwp_bind(); 256 IFNET_READER_FOREACH(ifp) { 257 struct psref psref; 258 259 if_acquire(ifp, &psref); 260 261 if (ifp->if_drain) 262 (*ifp->if_drain)(ifp); 263 264 if_release(ifp, &psref); 265 } 266 curlwp_bindx(bound); 267 } 268 splx(s); 269 mbstat.m_drain++; 270 KERNEL_UNLOCK_ONE(NULL); 271 } 272 273 /* 274 * sysctl helper routine for the kern.mbuf subtree. 275 * nmbclusters, mblowat and mcllowat need range 276 * checking and pool tweaking after being reset. 277 */ 278 static int 279 sysctl_kern_mbuf(SYSCTLFN_ARGS) 280 { 281 int error, newval; 282 struct sysctlnode node; 283 284 node = *rnode; 285 node.sysctl_data = &newval; 286 switch (rnode->sysctl_num) { 287 case MBUF_NMBCLUSTERS: 288 case MBUF_MBLOWAT: 289 case MBUF_MCLLOWAT: 290 newval = *(int*)rnode->sysctl_data; 291 break; 292 case MBUF_NMBCLUSTERS_LIMIT: 293 newval = nmbclusters_limit(); 294 break; 295 default: 296 return EOPNOTSUPP; 297 } 298 299 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 300 if (error || newp == NULL) 301 return error; 302 if (newval < 0) 303 return EINVAL; 304 305 switch (node.sysctl_num) { 306 case MBUF_NMBCLUSTERS: 307 if (newval < nmbclusters) 308 return EINVAL; 309 if (newval > nmbclusters_limit()) 310 return EINVAL; 311 nmbclusters = newval; 312 pool_cache_sethardlimit(mcl_cache, nmbclusters, 313 mclpool_warnmsg, 60); 314 break; 315 case MBUF_MBLOWAT: 316 mblowat = newval; 317 pool_cache_setlowat(mb_cache, mblowat); 318 break; 319 case MBUF_MCLLOWAT: 320 mcllowat = newval; 321 pool_cache_setlowat(mcl_cache, mcllowat); 322 break; 323 } 324 325 return 0; 326 } 327 328 #ifdef MBUFTRACE 329 static void 330 mowner_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 331 { 332 struct mowner_counter *mc = v1; 333 struct mowner_user *mo_user = v2; 334 int i; 335 336 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 337 mo_user->mo_counter[i] += mc->mc_counter[i]; 338 } 339 } 340 341 static void 342 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 343 { 344 345 memset(mo_user, 0, sizeof(*mo_user)); 346 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 347 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 348 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 349 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 350 percpu_foreach(mo->mo_counters, mowner_convert_to_user_cb, mo_user); 351 } 352 353 static int 354 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 355 { 356 struct mowner *mo; 357 size_t len = 0; 358 int error = 0; 359 360 if (namelen != 0) 361 return EINVAL; 362 if (newp != NULL) 363 return EPERM; 364 365 LIST_FOREACH(mo, &mowners, mo_link) { 366 struct mowner_user mo_user; 367 368 mowner_convert_to_user(mo, &mo_user); 369 370 if (oldp != NULL) { 371 if (*oldlenp - len < sizeof(mo_user)) { 372 error = ENOMEM; 373 break; 374 } 375 error = copyout(&mo_user, (char *)oldp + len, 376 sizeof(mo_user)); 377 if (error) 378 break; 379 } 380 len += sizeof(mo_user); 381 } 382 383 if (error == 0) 384 *oldlenp = len; 385 386 return error; 387 } 388 #endif /* MBUFTRACE */ 389 390 void 391 mbstat_type_add(int type, int diff) 392 { 393 struct mbstat_cpu *mb; 394 int s; 395 396 s = splvm(); 397 mb = percpu_getref(mbstat_percpu); 398 mb->m_mtypes[type] += diff; 399 percpu_putref(mbstat_percpu); 400 splx(s); 401 } 402 403 static void 404 mbstat_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 405 { 406 struct mbstat_cpu *mbsc = v1; 407 struct mbstat *mbs = v2; 408 int i; 409 410 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 411 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 412 } 413 } 414 415 static void 416 mbstat_convert_to_user(struct mbstat *mbs) 417 { 418 419 memset(mbs, 0, sizeof(*mbs)); 420 mbs->m_drain = mbstat.m_drain; 421 percpu_foreach(mbstat_percpu, mbstat_convert_to_user_cb, mbs); 422 } 423 424 static int 425 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 426 { 427 struct sysctlnode node; 428 struct mbstat mbs; 429 430 mbstat_convert_to_user(&mbs); 431 node = *rnode; 432 node.sysctl_data = &mbs; 433 node.sysctl_size = sizeof(mbs); 434 return sysctl_lookup(SYSCTLFN_CALL(&node)); 435 } 436 437 static void 438 sysctl_kern_mbuf_setup(void) 439 { 440 441 KASSERT(mbuf_sysctllog == NULL); 442 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 443 CTLFLAG_PERMANENT, 444 CTLTYPE_NODE, "mbuf", 445 SYSCTL_DESCR("mbuf control variables"), 446 NULL, 0, NULL, 0, 447 CTL_KERN, KERN_MBUF, CTL_EOL); 448 449 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 450 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 451 CTLTYPE_INT, "msize", 452 SYSCTL_DESCR("mbuf base size"), 453 NULL, msize, NULL, 0, 454 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 455 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 456 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 457 CTLTYPE_INT, "mclbytes", 458 SYSCTL_DESCR("mbuf cluster size"), 459 NULL, mclbytes, NULL, 0, 460 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 461 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 462 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 463 CTLTYPE_INT, "nmbclusters", 464 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 465 sysctl_kern_mbuf, 0, &nmbclusters, 0, 466 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 467 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 468 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 469 CTLTYPE_INT, "mblowat", 470 SYSCTL_DESCR("mbuf low water mark"), 471 sysctl_kern_mbuf, 0, &mblowat, 0, 472 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 473 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 474 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 475 CTLTYPE_INT, "mcllowat", 476 SYSCTL_DESCR("mbuf cluster low water mark"), 477 sysctl_kern_mbuf, 0, &mcllowat, 0, 478 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 479 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 480 CTLFLAG_PERMANENT, 481 CTLTYPE_STRUCT, "stats", 482 SYSCTL_DESCR("mbuf allocation statistics"), 483 sysctl_kern_mbuf_stats, 0, NULL, 0, 484 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 485 #ifdef MBUFTRACE 486 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 487 CTLFLAG_PERMANENT, 488 CTLTYPE_STRUCT, "mowners", 489 SYSCTL_DESCR("Information about mbuf owners"), 490 sysctl_kern_mbuf_mowners, 0, NULL, 0, 491 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 492 #endif 493 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 494 CTLFLAG_PERMANENT|CTLFLAG_READONLY, 495 CTLTYPE_INT, "nmbclusters_limit", 496 SYSCTL_DESCR("Limit of nmbclusters"), 497 sysctl_kern_mbuf, 0, NULL, 0, 498 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS_LIMIT, CTL_EOL); 499 } 500 501 static int 502 mb_ctor(void *arg, void *object, int flags) 503 { 504 struct mbuf *m = object; 505 506 #ifdef POOL_VTOPHYS 507 m->m_paddr = POOL_VTOPHYS(m); 508 #else 509 m->m_paddr = M_PADDR_INVALID; 510 #endif 511 return 0; 512 } 513 514 /* 515 * Add mbuf to the end of a chain 516 */ 517 struct mbuf * 518 m_add(struct mbuf *c, struct mbuf *m) 519 { 520 struct mbuf *n; 521 522 if (c == NULL) 523 return m; 524 525 for (n = c; n->m_next != NULL; n = n->m_next) 526 continue; 527 n->m_next = m; 528 return c; 529 } 530 531 struct mbuf * 532 m_get(int how, int type) 533 { 534 struct mbuf *m; 535 536 KASSERT(type != MT_FREE); 537 538 m = pool_cache_get(mb_cache, 539 how == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : PR_NOWAIT); 540 if (m == NULL) 541 return NULL; 542 KASSERTMSG(((vaddr_t)m->m_dat & PAGE_MASK) + MLEN <= PAGE_SIZE, 543 "m=%p m->m_dat=%p" 544 " MLEN=%u PAGE_MASK=0x%x PAGE_SIZE=%u", 545 m, m->m_dat, 546 (unsigned)MLEN, (unsigned)PAGE_MASK, (unsigned)PAGE_SIZE); 547 548 mbstat_type_add(type, 1); 549 550 mowner_init(m, type); 551 m->m_ext_ref = m; /* default */ 552 m->m_type = type; 553 m->m_len = 0; 554 m->m_next = NULL; 555 m->m_nextpkt = NULL; /* default */ 556 m->m_data = m->m_dat; 557 m->m_flags = 0; /* default */ 558 559 return m; 560 } 561 562 struct mbuf * 563 m_gethdr(int how, int type) 564 { 565 struct mbuf *m; 566 567 m = m_get(how, type); 568 if (m == NULL) 569 return NULL; 570 571 m->m_data = m->m_pktdat; 572 m->m_flags = M_PKTHDR; 573 574 m_reset_rcvif(m); 575 m->m_pkthdr.len = 0; 576 m->m_pkthdr.csum_flags = 0; 577 m->m_pkthdr.csum_data = 0; 578 m->m_pkthdr.segsz = 0; 579 m->m_pkthdr.ether_vtag = 0; 580 m->m_pkthdr.pkthdr_flags = 0; 581 SLIST_INIT(&m->m_pkthdr.tags); 582 583 m->m_pkthdr.pattr_class = NULL; 584 m->m_pkthdr.pattr_af = AF_UNSPEC; 585 m->m_pkthdr.pattr_hdr = NULL; 586 587 return m; 588 } 589 590 struct mbuf * 591 m_get_n(int how, int type, size_t alignbytes, size_t nbytes) 592 { 593 struct mbuf *m; 594 595 if (alignbytes > MCLBYTES || nbytes > MCLBYTES - alignbytes) 596 return NULL; 597 if ((m = m_get(how, type)) == NULL) 598 return NULL; 599 if (nbytes + alignbytes > MLEN) { 600 m_clget(m, how); 601 if ((m->m_flags & M_EXT) == 0) { 602 m_free(m); 603 return NULL; 604 } 605 } 606 m->m_len = alignbytes + nbytes; 607 m_adj(m, alignbytes); 608 609 return m; 610 } 611 612 struct mbuf * 613 m_gethdr_n(int how, int type, size_t alignbytes, size_t nbytes) 614 { 615 struct mbuf *m; 616 617 if (nbytes > MCLBYTES || nbytes > MCLBYTES - alignbytes) 618 return NULL; 619 if ((m = m_gethdr(how, type)) == NULL) 620 return NULL; 621 if (alignbytes + nbytes > MHLEN) { 622 m_clget(m, how); 623 if ((m->m_flags & M_EXT) == 0) { 624 m_free(m); 625 return NULL; 626 } 627 } 628 m->m_len = m->m_pkthdr.len = alignbytes + nbytes; 629 m_adj(m, alignbytes); 630 631 return m; 632 } 633 634 void 635 m_clget(struct mbuf *m, int how) 636 { 637 m->m_ext_storage.ext_buf = (char *)pool_cache_get_paddr(mcl_cache, 638 how == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : PR_NOWAIT, 639 &m->m_ext_storage.ext_paddr); 640 641 if (m->m_ext_storage.ext_buf == NULL) 642 return; 643 644 KASSERTMSG((((vaddr_t)m->m_ext_storage.ext_buf & PAGE_MASK) + mclbytes 645 <= PAGE_SIZE), 646 "m=%p m->m_ext_storage.ext_buf=%p" 647 " mclbytes=%u PAGE_MASK=0x%x PAGE_SIZE=%u", 648 m, m->m_dat, 649 (unsigned)mclbytes, (unsigned)PAGE_MASK, (unsigned)PAGE_SIZE); 650 651 MCLINITREFERENCE(m); 652 m->m_data = m->m_ext.ext_buf; 653 m->m_flags = (m->m_flags & ~M_EXTCOPYFLAGS) | 654 M_EXT|M_EXT_CLUSTER|M_EXT_RW; 655 m->m_ext.ext_size = MCLBYTES; 656 m->m_ext.ext_free = NULL; 657 m->m_ext.ext_arg = NULL; 658 /* ext_paddr initialized above */ 659 660 mowner_ref(m, M_EXT|M_EXT_CLUSTER); 661 } 662 663 struct mbuf * 664 m_getcl(int how, int type, int flags) 665 { 666 struct mbuf *mp; 667 668 if ((flags & M_PKTHDR) != 0) 669 mp = m_gethdr(how, type); 670 else 671 mp = m_get(how, type); 672 673 if (mp == NULL) 674 return NULL; 675 676 MCLGET(mp, how); 677 if ((mp->m_flags & M_EXT) != 0) 678 return mp; 679 680 m_free(mp); 681 return NULL; 682 } 683 684 /* 685 * Utility function for M_PREPEND. Do *NOT* use it directly. 686 */ 687 struct mbuf * 688 m_prepend(struct mbuf *m, int len, int how) 689 { 690 struct mbuf *mn; 691 692 if (__predict_false(len > MHLEN)) { 693 panic("%s: len > MHLEN", __func__); 694 } 695 696 KASSERT(len != M_COPYALL); 697 mn = m_get(how, m->m_type); 698 if (mn == NULL) { 699 m_freem(m); 700 return NULL; 701 } 702 703 if (m->m_flags & M_PKTHDR) { 704 m_move_pkthdr(mn, m); 705 } else { 706 MCLAIM(mn, m->m_owner); 707 } 708 mn->m_next = m; 709 m = mn; 710 711 if (m->m_flags & M_PKTHDR) { 712 if (len < MHLEN) 713 m_align(m, len); 714 } else { 715 if (len < MLEN) 716 m_align(m, len); 717 } 718 719 m->m_len = len; 720 return m; 721 } 722 723 struct mbuf * 724 m_copym(struct mbuf *m, int off, int len, int wait) 725 { 726 /* Shallow copy on M_EXT. */ 727 return m_copy_internal(m, off, len, wait, false); 728 } 729 730 struct mbuf * 731 m_dup(struct mbuf *m, int off, int len, int wait) 732 { 733 /* Deep copy. */ 734 return m_copy_internal(m, off, len, wait, true); 735 } 736 737 static inline int 738 m_copylen(int len, int copylen) 739 { 740 return (len == M_COPYALL) ? copylen : uimin(len, copylen); 741 } 742 743 static struct mbuf * 744 m_copy_internal(struct mbuf *m, int off0, int len, int wait, bool deep) 745 { 746 struct mbuf *m0 __diagused = m; 747 int len0 __diagused = len; 748 struct mbuf *n, **np; 749 int off = off0; 750 struct mbuf *top; 751 int copyhdr = 0; 752 753 if (off < 0 || (len != M_COPYALL && len < 0)) 754 panic("%s: off %d, len %d", __func__, off, len); 755 if (off == 0 && m->m_flags & M_PKTHDR) 756 copyhdr = 1; 757 while (off > 0) { 758 if (m == NULL) 759 panic("%s: m == NULL, off %d", __func__, off); 760 if (off < m->m_len) 761 break; 762 off -= m->m_len; 763 m = m->m_next; 764 } 765 766 np = ⊤ 767 top = NULL; 768 while (len == M_COPYALL || len > 0) { 769 if (m == NULL) { 770 if (len != M_COPYALL) 771 panic("%s: m == NULL, len %d [!COPYALL]", 772 __func__, len); 773 break; 774 } 775 776 n = m_get(wait, m->m_type); 777 *np = n; 778 if (n == NULL) 779 goto nospace; 780 MCLAIM(n, m->m_owner); 781 782 if (copyhdr) { 783 m_copy_pkthdr(n, m); 784 if (len == M_COPYALL) 785 n->m_pkthdr.len -= off0; 786 else 787 n->m_pkthdr.len = len; 788 copyhdr = 0; 789 } 790 n->m_len = m_copylen(len, m->m_len - off); 791 792 if (m->m_flags & M_EXT) { 793 if (!deep) { 794 n->m_data = m->m_data + off; 795 MCLADDREFERENCE(m, n); 796 } else { 797 /* 798 * We don't care if MCLGET fails. n->m_len is 799 * recomputed and handles that. 800 */ 801 MCLGET(n, wait); 802 n->m_len = 0; 803 n->m_len = M_TRAILINGSPACE(n); 804 n->m_len = m_copylen(len, n->m_len); 805 n->m_len = uimin(n->m_len, m->m_len - off); 806 memcpy(mtod(n, void *), mtod(m, char *) + off, 807 (unsigned)n->m_len); 808 } 809 } else { 810 memcpy(mtod(n, void *), mtod(m, char *) + off, 811 (unsigned)n->m_len); 812 } 813 814 if (len != M_COPYALL) 815 len -= n->m_len; 816 off += n->m_len; 817 818 KASSERTMSG(off <= m->m_len, 819 "m=%p m->m_len=%d off=%d len=%d m0=%p off0=%d len0=%d", 820 m, m->m_len, off, len, m0, off0, len0); 821 822 if (off == m->m_len) { 823 m = m->m_next; 824 off = 0; 825 } 826 np = &n->m_next; 827 } 828 829 return top; 830 831 nospace: 832 m_freem(top); 833 return NULL; 834 } 835 836 /* 837 * Copy an entire packet, including header (which must be present). 838 * An optimization of the common case 'm_copym(m, 0, M_COPYALL, how)'. 839 */ 840 struct mbuf * 841 m_copypacket(struct mbuf *m, int how) 842 { 843 struct mbuf *top, *n, *o; 844 845 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { 846 panic("%s: no header (m = %p)", __func__, m); 847 } 848 849 n = m_get(how, m->m_type); 850 top = n; 851 if (!n) 852 goto nospace; 853 854 MCLAIM(n, m->m_owner); 855 m_copy_pkthdr(n, m); 856 n->m_len = m->m_len; 857 if (m->m_flags & M_EXT) { 858 n->m_data = m->m_data; 859 MCLADDREFERENCE(m, n); 860 } else { 861 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 862 } 863 864 m = m->m_next; 865 while (m) { 866 o = m_get(how, m->m_type); 867 if (!o) 868 goto nospace; 869 870 MCLAIM(o, m->m_owner); 871 n->m_next = o; 872 n = n->m_next; 873 874 n->m_len = m->m_len; 875 if (m->m_flags & M_EXT) { 876 n->m_data = m->m_data; 877 MCLADDREFERENCE(m, n); 878 } else { 879 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 880 } 881 882 m = m->m_next; 883 } 884 return top; 885 886 nospace: 887 m_freem(top); 888 return NULL; 889 } 890 891 void 892 m_copydata(struct mbuf *m, int off, int len, void *cp) 893 { 894 unsigned int count; 895 struct mbuf *m0 = m; 896 int len0 = len; 897 int off0 = off; 898 void *cp0 = cp; 899 900 KASSERT(len != M_COPYALL); 901 if (off < 0 || len < 0) 902 panic("m_copydata: off %d, len %d", off, len); 903 while (off > 0) { 904 if (m == NULL) 905 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)", 906 m0, len0, off0, cp0, off, off0 - off); 907 if (off < m->m_len) 908 break; 909 off -= m->m_len; 910 m = m->m_next; 911 } 912 while (len > 0) { 913 if (m == NULL) 914 panic("m_copydata(%p,%d,%d,%p): " 915 "m=NULL, off=%d (%d), len=%d (%d)", 916 m0, len0, off0, cp0, 917 off, off0 - off, len, len0 - len); 918 count = uimin(m->m_len - off, len); 919 memcpy(cp, mtod(m, char *) + off, count); 920 len -= count; 921 cp = (char *)cp + count; 922 off = 0; 923 m = m->m_next; 924 } 925 } 926 927 /* 928 * Concatenate mbuf chain n to m. 929 * n might be copied into m (when n->m_len is small), therefore data portion of 930 * n could be copied into an mbuf of different mbuf type. 931 * Any m_pkthdr is not updated. 932 */ 933 void 934 m_cat(struct mbuf *m, struct mbuf *n) 935 { 936 937 while (m->m_next) 938 m = m->m_next; 939 while (n) { 940 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 941 /* just join the two chains */ 942 m->m_next = n; 943 return; 944 } 945 /* splat the data from one into the other */ 946 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 947 (u_int)n->m_len); 948 m->m_len += n->m_len; 949 n = m_free(n); 950 } 951 } 952 953 void 954 m_adj(struct mbuf *mp, int req_len) 955 { 956 int len = req_len; 957 struct mbuf *m; 958 int count; 959 960 if ((m = mp) == NULL) 961 return; 962 if (len >= 0) { 963 /* 964 * Trim from head. 965 */ 966 while (m != NULL && len > 0) { 967 if (m->m_len <= len) { 968 len -= m->m_len; 969 m->m_len = 0; 970 m = m->m_next; 971 } else { 972 m->m_len -= len; 973 m->m_data += len; 974 len = 0; 975 } 976 } 977 if (mp->m_flags & M_PKTHDR) 978 mp->m_pkthdr.len -= (req_len - len); 979 } else { 980 /* 981 * Trim from tail. Scan the mbuf chain, 982 * calculating its length and finding the last mbuf. 983 * If the adjustment only affects this mbuf, then just 984 * adjust and return. Otherwise, rescan and truncate 985 * after the remaining size. 986 */ 987 len = -len; 988 count = 0; 989 for (;;) { 990 count += m->m_len; 991 if (m->m_next == NULL) 992 break; 993 m = m->m_next; 994 } 995 if (m->m_len >= len) { 996 m->m_len -= len; 997 if (mp->m_flags & M_PKTHDR) 998 mp->m_pkthdr.len -= len; 999 return; 1000 } 1001 1002 count -= len; 1003 if (count < 0) 1004 count = 0; 1005 1006 /* 1007 * Correct length for chain is "count". 1008 * Find the mbuf with last data, adjust its length, 1009 * and toss data from remaining mbufs on chain. 1010 */ 1011 m = mp; 1012 if (m->m_flags & M_PKTHDR) 1013 m->m_pkthdr.len = count; 1014 for (; m; m = m->m_next) { 1015 if (m->m_len >= count) { 1016 m->m_len = count; 1017 break; 1018 } 1019 count -= m->m_len; 1020 } 1021 if (m) { 1022 while (m->m_next) 1023 (m = m->m_next)->m_len = 0; 1024 } 1025 } 1026 } 1027 1028 /* 1029 * m_ensure_contig: rearrange an mbuf chain that given length of bytes 1030 * would be contiguous and in the data area of an mbuf (therefore, mtod() 1031 * would work for a structure of given length). 1032 * 1033 * => On success, returns true and the resulting mbuf chain; false otherwise. 1034 * => The mbuf chain may change, but is always preserved valid. 1035 */ 1036 bool 1037 m_ensure_contig(struct mbuf **m0, int len) 1038 { 1039 struct mbuf *n = *m0, *m; 1040 size_t count, space; 1041 1042 KASSERT(len != M_COPYALL); 1043 /* 1044 * If first mbuf has no cluster, and has room for len bytes 1045 * without shifting current data, pullup into it, 1046 * otherwise allocate a new mbuf to prepend to the chain. 1047 */ 1048 if ((n->m_flags & M_EXT) == 0 && 1049 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 1050 if (n->m_len >= len) { 1051 return true; 1052 } 1053 m = n; 1054 n = n->m_next; 1055 len -= m->m_len; 1056 } else { 1057 if (len > MHLEN) { 1058 return false; 1059 } 1060 m = m_get(M_DONTWAIT, n->m_type); 1061 if (m == NULL) { 1062 return false; 1063 } 1064 MCLAIM(m, n->m_owner); 1065 if (n->m_flags & M_PKTHDR) { 1066 m_move_pkthdr(m, n); 1067 } 1068 } 1069 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1070 do { 1071 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len); 1072 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1073 (unsigned)count); 1074 len -= count; 1075 m->m_len += count; 1076 n->m_len -= count; 1077 space -= count; 1078 if (n->m_len) 1079 n->m_data += count; 1080 else 1081 n = m_free(n); 1082 } while (len > 0 && n); 1083 1084 m->m_next = n; 1085 *m0 = m; 1086 1087 return len <= 0; 1088 } 1089 1090 /* 1091 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error. 1092 */ 1093 struct mbuf * 1094 m_pullup(struct mbuf *n, int len) 1095 { 1096 struct mbuf *m = n; 1097 1098 KASSERT(len != M_COPYALL); 1099 if (!m_ensure_contig(&m, len)) { 1100 KASSERT(m != NULL); 1101 m_freem(m); 1102 m = NULL; 1103 } 1104 return m; 1105 } 1106 1107 /* 1108 * ensure that [off, off + len) is contiguous on the mbuf chain "m". 1109 * packet chain before "off" is kept untouched. 1110 * if offp == NULL, the target will start at <retval, 0> on resulting chain. 1111 * if offp != NULL, the target will start at <retval, *offp> on resulting chain. 1112 * 1113 * on error return (NULL return value), original "m" will be freed. 1114 * 1115 * XXX M_TRAILINGSPACE/M_LEADINGSPACE on shared cluster (sharedcluster) 1116 */ 1117 struct mbuf * 1118 m_pulldown(struct mbuf *m, int off, int len, int *offp) 1119 { 1120 struct mbuf *n, *o; 1121 int hlen, tlen, olen; 1122 int sharedcluster; 1123 1124 /* Check invalid arguments. */ 1125 if (m == NULL) 1126 panic("%s: m == NULL", __func__); 1127 if (len > MCLBYTES) { 1128 m_freem(m); 1129 return NULL; 1130 } 1131 1132 n = m; 1133 while (n != NULL && off > 0) { 1134 if (n->m_len > off) 1135 break; 1136 off -= n->m_len; 1137 n = n->m_next; 1138 } 1139 /* Be sure to point non-empty mbuf. */ 1140 while (n != NULL && n->m_len == 0) 1141 n = n->m_next; 1142 if (!n) { 1143 m_freem(m); 1144 return NULL; /* mbuf chain too short */ 1145 } 1146 1147 sharedcluster = M_READONLY(n); 1148 1149 /* 1150 * The target data is on <n, off>. If we got enough data on the mbuf 1151 * "n", we're done. 1152 */ 1153 #ifdef __NO_STRICT_ALIGNMENT 1154 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster) 1155 #else 1156 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster && 1157 ALIGNED_POINTER((mtod(n, char *) + off), uint32_t)) 1158 #endif 1159 goto ok; 1160 1161 /* 1162 * When (len <= n->m_len - off) and (off != 0), it is a special case. 1163 * Len bytes from <n, off> sit in single mbuf, but the caller does 1164 * not like the starting position (off). 1165 * 1166 * Chop the current mbuf into two pieces, set off to 0. 1167 */ 1168 if (len <= n->m_len - off) { 1169 struct mbuf *mlast; 1170 1171 o = m_dup(n, off, n->m_len - off, M_DONTWAIT); 1172 if (o == NULL) { 1173 m_freem(m); 1174 return NULL; /* ENOBUFS */ 1175 } 1176 KASSERTMSG(o->m_len >= len, "o=%p o->m_len=%d len=%d", 1177 o, o->m_len, len); 1178 for (mlast = o; mlast->m_next != NULL; mlast = mlast->m_next) 1179 ; 1180 n->m_len = off; 1181 mlast->m_next = n->m_next; 1182 n->m_next = o; 1183 n = o; 1184 off = 0; 1185 goto ok; 1186 } 1187 1188 /* 1189 * We need to take hlen from <n, off> and tlen from <n->m_next, 0>, 1190 * and construct contiguous mbuf with m_len == len. 1191 * 1192 * Note that hlen + tlen == len, and tlen > 0. 1193 */ 1194 hlen = n->m_len - off; 1195 tlen = len - hlen; 1196 1197 /* 1198 * Ensure that we have enough trailing data on mbuf chain. If not, 1199 * we can do nothing about the chain. 1200 */ 1201 olen = 0; 1202 for (o = n->m_next; o != NULL; o = o->m_next) 1203 olen += o->m_len; 1204 if (hlen + olen < len) { 1205 m_freem(m); 1206 return NULL; /* mbuf chain too short */ 1207 } 1208 1209 /* 1210 * Easy cases first. We need to use m_copydata() to get data from 1211 * <n->m_next, 0>. 1212 */ 1213 if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen && 1214 !sharedcluster) { 1215 m_copydata(n->m_next, 0, tlen, mtod(n, char *) + n->m_len); 1216 n->m_len += tlen; 1217 m_adj(n->m_next, tlen); 1218 goto ok; 1219 } 1220 if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen && 1221 #ifndef __NO_STRICT_ALIGNMENT 1222 ALIGNED_POINTER((n->m_next->m_data - hlen), uint32_t) && 1223 #endif 1224 !sharedcluster && n->m_next->m_len >= tlen) { 1225 n->m_next->m_data -= hlen; 1226 n->m_next->m_len += hlen; 1227 memcpy(mtod(n->m_next, void *), mtod(n, char *) + off, hlen); 1228 n->m_len -= hlen; 1229 n = n->m_next; 1230 off = 0; 1231 goto ok; 1232 } 1233 1234 /* 1235 * Now, we need to do the hard way. Don't copy as there's no room 1236 * on both ends. 1237 */ 1238 o = m_get(M_DONTWAIT, m->m_type); 1239 if (o && len > MLEN) { 1240 MCLGET(o, M_DONTWAIT); 1241 if ((o->m_flags & M_EXT) == 0) { 1242 m_free(o); 1243 o = NULL; 1244 } 1245 } 1246 if (!o) { 1247 m_freem(m); 1248 return NULL; /* ENOBUFS */ 1249 } 1250 /* get hlen from <n, off> into <o, 0> */ 1251 o->m_len = hlen; 1252 memcpy(mtod(o, void *), mtod(n, char *) + off, hlen); 1253 n->m_len -= hlen; 1254 /* get tlen from <n->m_next, 0> into <o, hlen> */ 1255 m_copydata(n->m_next, 0, tlen, mtod(o, char *) + o->m_len); 1256 o->m_len += tlen; 1257 m_adj(n->m_next, tlen); 1258 o->m_next = n->m_next; 1259 n->m_next = o; 1260 n = o; 1261 off = 0; 1262 1263 ok: 1264 if (offp) 1265 *offp = off; 1266 return n; 1267 } 1268 1269 /* 1270 * Like m_pullup(), except a new mbuf is always allocated, and we allow 1271 * the amount of empty space before the data in the new mbuf to be specified 1272 * (in the event that the caller expects to prepend later). 1273 */ 1274 struct mbuf * 1275 m_copyup(struct mbuf *n, int len, int dstoff) 1276 { 1277 struct mbuf *m; 1278 int count, space; 1279 1280 KASSERT(len != M_COPYALL); 1281 if (len > ((int)MHLEN - dstoff)) 1282 goto bad; 1283 m = m_get(M_DONTWAIT, n->m_type); 1284 if (m == NULL) 1285 goto bad; 1286 MCLAIM(m, n->m_owner); 1287 if (n->m_flags & M_PKTHDR) { 1288 m_move_pkthdr(m, n); 1289 } 1290 m->m_data += dstoff; 1291 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1292 do { 1293 count = uimin(uimin(uimax(len, max_protohdr), space), n->m_len); 1294 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1295 (unsigned)count); 1296 len -= count; 1297 m->m_len += count; 1298 n->m_len -= count; 1299 space -= count; 1300 if (n->m_len) 1301 n->m_data += count; 1302 else 1303 n = m_free(n); 1304 } while (len > 0 && n); 1305 if (len > 0) { 1306 (void) m_free(m); 1307 goto bad; 1308 } 1309 m->m_next = n; 1310 return m; 1311 bad: 1312 m_freem(n); 1313 return NULL; 1314 } 1315 1316 struct mbuf * 1317 m_split(struct mbuf *m0, int len, int wait) 1318 { 1319 return m_split_internal(m0, len, wait, true); 1320 } 1321 1322 static struct mbuf * 1323 m_split_internal(struct mbuf *m0, int len0, int wait, bool copyhdr) 1324 { 1325 struct mbuf *m, *n; 1326 unsigned len = len0, remain, len_save; 1327 1328 KASSERT(len0 != M_COPYALL); 1329 for (m = m0; m && len > m->m_len; m = m->m_next) 1330 len -= m->m_len; 1331 if (m == NULL) 1332 return NULL; 1333 1334 remain = m->m_len - len; 1335 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1336 n = m_gethdr(wait, m0->m_type); 1337 if (n == NULL) 1338 return NULL; 1339 1340 MCLAIM(n, m0->m_owner); 1341 m_copy_rcvif(n, m0); 1342 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1343 len_save = m0->m_pkthdr.len; 1344 m0->m_pkthdr.len = len0; 1345 1346 if (m->m_flags & M_EXT) 1347 goto extpacket; 1348 1349 if (remain > MHLEN) { 1350 /* m can't be the lead packet */ 1351 m_align(n, 0); 1352 n->m_len = 0; 1353 n->m_next = m_split(m, len, wait); 1354 if (n->m_next == NULL) { 1355 (void)m_free(n); 1356 m0->m_pkthdr.len = len_save; 1357 return NULL; 1358 } 1359 return n; 1360 } else { 1361 m_align(n, remain); 1362 } 1363 } else if (remain == 0) { 1364 n = m->m_next; 1365 m->m_next = NULL; 1366 return n; 1367 } else { 1368 n = m_get(wait, m->m_type); 1369 if (n == NULL) 1370 return NULL; 1371 MCLAIM(n, m->m_owner); 1372 m_align(n, remain); 1373 } 1374 1375 extpacket: 1376 if (m->m_flags & M_EXT) { 1377 n->m_data = m->m_data + len; 1378 MCLADDREFERENCE(m, n); 1379 } else { 1380 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1381 } 1382 1383 n->m_len = remain; 1384 m->m_len = len; 1385 n->m_next = m->m_next; 1386 m->m_next = NULL; 1387 return n; 1388 } 1389 1390 /* 1391 * Routine to copy from device local memory into mbufs. 1392 */ 1393 struct mbuf * 1394 m_devget(char *buf, int totlen, int off, struct ifnet *ifp) 1395 { 1396 struct mbuf *m; 1397 struct mbuf *top = NULL, **mp = ⊤ 1398 char *cp, *epkt; 1399 int len; 1400 1401 cp = buf; 1402 epkt = cp + totlen; 1403 if (off) { 1404 /* 1405 * If 'off' is non-zero, packet is trailer-encapsulated, 1406 * so we have to skip the type and length fields. 1407 */ 1408 cp += off + 2 * sizeof(uint16_t); 1409 totlen -= 2 * sizeof(uint16_t); 1410 } 1411 1412 m = m_gethdr(M_DONTWAIT, MT_DATA); 1413 if (m == NULL) 1414 return NULL; 1415 m_set_rcvif(m, ifp); 1416 m->m_pkthdr.len = totlen; 1417 m->m_len = MHLEN; 1418 1419 while (totlen > 0) { 1420 if (top) { 1421 m = m_get(M_DONTWAIT, MT_DATA); 1422 if (m == NULL) { 1423 m_freem(top); 1424 return NULL; 1425 } 1426 m->m_len = MLEN; 1427 } 1428 1429 len = uimin(totlen, epkt - cp); 1430 1431 if (len >= MINCLSIZE) { 1432 MCLGET(m, M_DONTWAIT); 1433 if ((m->m_flags & M_EXT) == 0) { 1434 m_free(m); 1435 m_freem(top); 1436 return NULL; 1437 } 1438 m->m_len = len = uimin(len, MCLBYTES); 1439 } else { 1440 /* 1441 * Place initial small packet/header at end of mbuf. 1442 */ 1443 if (len < m->m_len) { 1444 if (top == 0 && len + max_linkhdr <= m->m_len) 1445 m->m_data += max_linkhdr; 1446 m->m_len = len; 1447 } else 1448 len = m->m_len; 1449 } 1450 1451 memcpy(mtod(m, void *), cp, (size_t)len); 1452 1453 cp += len; 1454 *mp = m; 1455 mp = &m->m_next; 1456 totlen -= len; 1457 if (cp == epkt) 1458 cp = buf; 1459 } 1460 1461 return top; 1462 } 1463 1464 /* 1465 * Copy data from a buffer back into the indicated mbuf chain, 1466 * starting "off" bytes from the beginning, extending the mbuf 1467 * chain if necessary. 1468 */ 1469 void 1470 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1471 { 1472 #if defined(DEBUG) 1473 struct mbuf *origm = m0; 1474 int error; 1475 #endif 1476 1477 if (m0 == NULL) 1478 return; 1479 1480 #if defined(DEBUG) 1481 error = 1482 #endif 1483 m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_EXTEND, 1484 M_DONTWAIT); 1485 1486 #if defined(DEBUG) 1487 if (error != 0 || (m0 != NULL && origm != m0)) 1488 panic("m_copyback"); 1489 #endif 1490 } 1491 1492 struct mbuf * 1493 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1494 { 1495 int error; 1496 1497 /* don't support chain expansion */ 1498 KASSERT(len != M_COPYALL); 1499 KDASSERT(off + len <= m_length(m0)); 1500 1501 error = m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_COW, 1502 how); 1503 if (error) { 1504 /* 1505 * no way to recover from partial success. 1506 * just free the chain. 1507 */ 1508 m_freem(m0); 1509 return NULL; 1510 } 1511 return m0; 1512 } 1513 1514 int 1515 m_makewritable(struct mbuf **mp, int off, int len, int how) 1516 { 1517 int error; 1518 #if defined(DEBUG) 1519 int origlen = m_length(*mp); 1520 #endif 1521 1522 error = m_copyback_internal(mp, off, len, NULL, CB_PRESERVE|CB_COW, 1523 how); 1524 if (error) 1525 return error; 1526 1527 #if defined(DEBUG) 1528 int reslen = 0; 1529 for (struct mbuf *n = *mp; n; n = n->m_next) 1530 reslen += n->m_len; 1531 if (origlen != reslen) 1532 panic("m_makewritable: length changed"); 1533 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1534 panic("m_makewritable: inconsist"); 1535 #endif 1536 1537 return 0; 1538 } 1539 1540 static int 1541 m_copyback_internal(struct mbuf **mp0, int off, int len, const void *vp, 1542 int flags, int how) 1543 { 1544 int mlen; 1545 struct mbuf *m, *n; 1546 struct mbuf **mp; 1547 int totlen = 0; 1548 const char *cp = vp; 1549 1550 KASSERT(mp0 != NULL); 1551 KASSERT(*mp0 != NULL); 1552 KASSERT((flags & CB_PRESERVE) == 0 || cp == NULL); 1553 KASSERT((flags & CB_COPYBACK) == 0 || cp != NULL); 1554 1555 if (len == M_COPYALL) 1556 len = m_length(*mp0) - off; 1557 1558 /* 1559 * we don't bother to update "totlen" in the case of CB_COW, 1560 * assuming that CB_EXTEND and CB_COW are exclusive. 1561 */ 1562 1563 KASSERT((~flags & (CB_EXTEND|CB_COW)) != 0); 1564 1565 mp = mp0; 1566 m = *mp; 1567 while (off > (mlen = m->m_len)) { 1568 off -= mlen; 1569 totlen += mlen; 1570 if (m->m_next == NULL) { 1571 int tspace; 1572 extend: 1573 if ((flags & CB_EXTEND) == 0) 1574 goto out; 1575 1576 /* 1577 * try to make some space at the end of "m". 1578 */ 1579 1580 mlen = m->m_len; 1581 if (off + len >= MINCLSIZE && 1582 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1583 MCLGET(m, how); 1584 } 1585 tspace = M_TRAILINGSPACE(m); 1586 if (tspace > 0) { 1587 tspace = uimin(tspace, off + len); 1588 KASSERT(tspace > 0); 1589 memset(mtod(m, char *) + m->m_len, 0, 1590 uimin(off, tspace)); 1591 m->m_len += tspace; 1592 off += mlen; 1593 totlen -= mlen; 1594 continue; 1595 } 1596 1597 /* 1598 * need to allocate an mbuf. 1599 */ 1600 1601 if (off + len >= MINCLSIZE) { 1602 n = m_getcl(how, m->m_type, 0); 1603 } else { 1604 n = m_get(how, m->m_type); 1605 } 1606 if (n == NULL) { 1607 goto out; 1608 } 1609 n->m_len = uimin(M_TRAILINGSPACE(n), off + len); 1610 memset(mtod(n, char *), 0, uimin(n->m_len, off)); 1611 m->m_next = n; 1612 } 1613 mp = &m->m_next; 1614 m = m->m_next; 1615 } 1616 while (len > 0) { 1617 mlen = m->m_len - off; 1618 if (mlen != 0 && M_READONLY(m)) { 1619 /* 1620 * This mbuf is read-only. Allocate a new writable 1621 * mbuf and try again. 1622 */ 1623 char *datap; 1624 int eatlen; 1625 1626 KASSERT((flags & CB_COW) != 0); 1627 1628 /* 1629 * if we're going to write into the middle of 1630 * a mbuf, split it first. 1631 */ 1632 if (off > 0) { 1633 n = m_split_internal(m, off, how, false); 1634 if (n == NULL) 1635 goto enobufs; 1636 m->m_next = n; 1637 mp = &m->m_next; 1638 m = n; 1639 off = 0; 1640 continue; 1641 } 1642 1643 /* 1644 * XXX TODO coalesce into the trailingspace of 1645 * the previous mbuf when possible. 1646 */ 1647 1648 /* 1649 * allocate a new mbuf. copy packet header if needed. 1650 */ 1651 n = m_get(how, m->m_type); 1652 if (n == NULL) 1653 goto enobufs; 1654 MCLAIM(n, m->m_owner); 1655 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1656 m_move_pkthdr(n, m); 1657 n->m_len = MHLEN; 1658 } else { 1659 if (len >= MINCLSIZE) 1660 MCLGET(n, M_DONTWAIT); 1661 n->m_len = 1662 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1663 } 1664 if (n->m_len > len) 1665 n->m_len = len; 1666 1667 /* 1668 * free the region which has been overwritten. 1669 * copying data from old mbufs if requested. 1670 */ 1671 if (flags & CB_PRESERVE) 1672 datap = mtod(n, char *); 1673 else 1674 datap = NULL; 1675 eatlen = n->m_len; 1676 while (m != NULL && M_READONLY(m) && 1677 n->m_type == m->m_type && eatlen > 0) { 1678 mlen = uimin(eatlen, m->m_len); 1679 if (datap) { 1680 m_copydata(m, 0, mlen, datap); 1681 datap += mlen; 1682 } 1683 m->m_data += mlen; 1684 m->m_len -= mlen; 1685 eatlen -= mlen; 1686 if (m->m_len == 0) 1687 *mp = m = m_free(m); 1688 } 1689 if (eatlen > 0) 1690 n->m_len -= eatlen; 1691 n->m_next = m; 1692 *mp = m = n; 1693 continue; 1694 } 1695 mlen = uimin(mlen, len); 1696 if (flags & CB_COPYBACK) { 1697 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1698 cp += mlen; 1699 } 1700 len -= mlen; 1701 mlen += off; 1702 off = 0; 1703 totlen += mlen; 1704 if (len == 0) 1705 break; 1706 if (m->m_next == NULL) { 1707 goto extend; 1708 } 1709 mp = &m->m_next; 1710 m = m->m_next; 1711 } 1712 1713 out: 1714 if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1715 KASSERT((flags & CB_EXTEND) != 0); 1716 m->m_pkthdr.len = totlen; 1717 } 1718 1719 return 0; 1720 1721 enobufs: 1722 return ENOBUFS; 1723 } 1724 1725 /* 1726 * Compress the mbuf chain. Return the new mbuf chain on success, NULL on 1727 * failure. The first mbuf is preserved, and on success the pointer returned 1728 * is the same as the one passed. 1729 */ 1730 struct mbuf * 1731 m_defrag(struct mbuf *m, int how) 1732 { 1733 struct mbuf *m0, *mn, *n; 1734 int sz; 1735 1736 KASSERT((m->m_flags & M_PKTHDR) != 0); 1737 1738 if (m->m_next == NULL) 1739 return m; 1740 1741 /* Defrag to single mbuf if at all possible */ 1742 if ((m->m_flags & M_EXT) == 0 && m->m_pkthdr.len <= MCLBYTES) { 1743 if (m->m_pkthdr.len <= MHLEN) { 1744 if (M_TRAILINGSPACE(m) < (m->m_pkthdr.len - m->m_len)) { 1745 KASSERTMSG(M_LEADINGSPACE(m) + 1746 M_TRAILINGSPACE(m) >= 1747 (m->m_pkthdr.len - m->m_len), 1748 "too small leading %d trailing %d ro? %d" 1749 " pkthdr.len %d mlen %d", 1750 (int)M_LEADINGSPACE(m), 1751 (int)M_TRAILINGSPACE(m), 1752 M_READONLY(m), 1753 m->m_pkthdr.len, m->m_len); 1754 1755 memmove(m->m_pktdat, m->m_data, m->m_len); 1756 m->m_data = m->m_pktdat; 1757 1758 KASSERT(M_TRAILINGSPACE(m) >= 1759 (m->m_pkthdr.len - m->m_len)); 1760 } 1761 } else { 1762 /* Must copy data before adding cluster */ 1763 m0 = m_get(how, MT_DATA); 1764 if (m0 == NULL) 1765 return NULL; 1766 KASSERTMSG(m->m_len <= MHLEN, 1767 "m=%p m->m_len=%d MHLEN=%u", 1768 m, m->m_len, (unsigned)MHLEN); 1769 m_copydata(m, 0, m->m_len, mtod(m0, void *)); 1770 1771 MCLGET(m, how); 1772 if ((m->m_flags & M_EXT) == 0) { 1773 m_free(m0); 1774 return NULL; 1775 } 1776 memcpy(m->m_data, mtod(m0, void *), m->m_len); 1777 m_free(m0); 1778 } 1779 KASSERTMSG(M_TRAILINGSPACE(m) >= (m->m_pkthdr.len - m->m_len), 1780 "m=%p M_TRAILINGSPACE(m)=%zd m->m_pkthdr.len=%d" 1781 " m->m_len=%d", 1782 m, M_TRAILINGSPACE(m), m->m_pkthdr.len, m->m_len); 1783 m_copydata(m->m_next, 0, m->m_pkthdr.len - m->m_len, 1784 mtod(m, char *) + m->m_len); 1785 m->m_len = m->m_pkthdr.len; 1786 m_freem(m->m_next); 1787 m->m_next = NULL; 1788 return m; 1789 } 1790 1791 m0 = m_get(how, MT_DATA); 1792 if (m0 == NULL) 1793 return NULL; 1794 mn = m0; 1795 1796 sz = m->m_pkthdr.len - m->m_len; 1797 KASSERT(sz >= 0); 1798 1799 do { 1800 if (sz > MLEN) { 1801 MCLGET(mn, how); 1802 if ((mn->m_flags & M_EXT) == 0) { 1803 m_freem(m0); 1804 return NULL; 1805 } 1806 } 1807 1808 mn->m_len = MIN(sz, MCLBYTES); 1809 1810 m_copydata(m, m->m_pkthdr.len - sz, mn->m_len, 1811 mtod(mn, void *)); 1812 1813 sz -= mn->m_len; 1814 1815 if (sz > 0) { 1816 /* need more mbufs */ 1817 n = m_get(how, MT_DATA); 1818 if (n == NULL) { 1819 m_freem(m0); 1820 return NULL; 1821 } 1822 1823 mn->m_next = n; 1824 mn = n; 1825 } 1826 } while (sz > 0); 1827 1828 m_freem(m->m_next); 1829 m->m_next = m0; 1830 1831 return m; 1832 } 1833 1834 void 1835 m_remove_pkthdr(struct mbuf *m) 1836 { 1837 KASSERT(m->m_flags & M_PKTHDR); 1838 1839 m_tag_delete_chain(m); 1840 m->m_flags &= ~M_PKTHDR; 1841 memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr)); 1842 } 1843 1844 void 1845 m_copy_pkthdr(struct mbuf *to, struct mbuf *from) 1846 { 1847 KASSERT((to->m_flags & M_EXT) == 0); 1848 KASSERT((to->m_flags & M_PKTHDR) == 0 || 1849 SLIST_FIRST(&to->m_pkthdr.tags) == NULL); 1850 KASSERT((from->m_flags & M_PKTHDR) != 0); 1851 1852 to->m_pkthdr = from->m_pkthdr; 1853 to->m_flags = from->m_flags & M_COPYFLAGS; 1854 to->m_data = to->m_pktdat; 1855 1856 SLIST_INIT(&to->m_pkthdr.tags); 1857 m_tag_copy_chain(to, from); 1858 } 1859 1860 void 1861 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1862 { 1863 KASSERT((to->m_flags & M_EXT) == 0); 1864 KASSERT((to->m_flags & M_PKTHDR) == 0 || 1865 SLIST_FIRST(&to->m_pkthdr.tags) == NULL); 1866 KASSERT((from->m_flags & M_PKTHDR) != 0); 1867 1868 to->m_pkthdr = from->m_pkthdr; 1869 to->m_flags = from->m_flags & M_COPYFLAGS; 1870 to->m_data = to->m_pktdat; 1871 1872 from->m_flags &= ~M_PKTHDR; 1873 } 1874 1875 /* 1876 * Set the m_data pointer of a newly-allocated mbuf to place an object of the 1877 * specified size at the end of the mbuf, longword aligned. 1878 */ 1879 void 1880 m_align(struct mbuf *m, int len) 1881 { 1882 int buflen, adjust; 1883 1884 KASSERT(len != M_COPYALL); 1885 KASSERTMSG(M_LEADINGSPACE(m) == 0, "m=%p M_LEADINGSPACE(m)=%zd", 1886 m, M_LEADINGSPACE(m)); 1887 1888 buflen = M_BUFSIZE(m); 1889 1890 KASSERTMSG(len <= buflen, "m=%p len=%d buflen=%d", m, len, buflen); 1891 adjust = buflen - len; 1892 m->m_data += adjust &~ (sizeof(long)-1); 1893 } 1894 1895 /* 1896 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1897 * beginning, continuing for "len" bytes. 1898 */ 1899 int 1900 m_apply(struct mbuf *m, int off, int len, 1901 int (*f)(void *, void *, unsigned int), void *arg) 1902 { 1903 unsigned int count; 1904 int rval; 1905 1906 KASSERT(len != M_COPYALL); 1907 KASSERT(len >= 0); 1908 KASSERT(off >= 0); 1909 1910 while (off > 0) { 1911 KASSERT(m != NULL); 1912 if (off < m->m_len) 1913 break; 1914 off -= m->m_len; 1915 m = m->m_next; 1916 } 1917 while (len > 0) { 1918 KASSERT(m != NULL); 1919 count = uimin(m->m_len - off, len); 1920 1921 rval = (*f)(arg, mtod(m, char *) + off, count); 1922 if (rval) 1923 return rval; 1924 1925 len -= count; 1926 off = 0; 1927 m = m->m_next; 1928 } 1929 1930 return 0; 1931 } 1932 1933 /* 1934 * Return a pointer to mbuf/offset of location in mbuf chain. 1935 */ 1936 struct mbuf * 1937 m_getptr(struct mbuf *m, int loc, int *off) 1938 { 1939 1940 while (loc >= 0) { 1941 /* Normal end of search */ 1942 if (m->m_len > loc) { 1943 *off = loc; 1944 return m; 1945 } 1946 1947 loc -= m->m_len; 1948 1949 if (m->m_next == NULL) { 1950 if (loc == 0) { 1951 /* Point at the end of valid data */ 1952 *off = m->m_len; 1953 return m; 1954 } 1955 return NULL; 1956 } else { 1957 m = m->m_next; 1958 } 1959 } 1960 1961 return NULL; 1962 } 1963 1964 /* 1965 * Release a reference to the mbuf external storage. 1966 * 1967 * => free the mbuf m itself as well. 1968 */ 1969 static void 1970 m_ext_free(struct mbuf *m) 1971 { 1972 const bool embedded = MEXT_ISEMBEDDED(m); 1973 bool dofree = true; 1974 u_int refcnt; 1975 1976 KASSERT((m->m_flags & M_EXT) != 0); 1977 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1978 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1979 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1980 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1981 1982 if (__predict_false(m->m_type == MT_FREE)) { 1983 panic("mbuf %p already freed", m); 1984 } 1985 1986 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1987 refcnt = m->m_ext.ext_refcnt = 0; 1988 } else { 1989 membar_release(); 1990 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1991 } 1992 1993 if (refcnt > 0) { 1994 if (embedded) { 1995 /* 1996 * other mbuf's m_ext_ref still points to us. 1997 */ 1998 dofree = false; 1999 } else { 2000 m->m_ext_ref = m; 2001 } 2002 } else { 2003 /* 2004 * dropping the last reference 2005 */ 2006 membar_acquire(); 2007 if (!embedded) { 2008 m->m_ext.ext_refcnt++; /* XXX */ 2009 m_ext_free(m->m_ext_ref); 2010 m->m_ext_ref = m; 2011 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 2012 pool_cache_put_paddr(mcl_cache, 2013 m->m_ext.ext_buf, m->m_ext.ext_paddr); 2014 } else if (m->m_ext.ext_free) { 2015 (*m->m_ext.ext_free)(m, 2016 m->m_ext.ext_buf, m->m_ext.ext_size, 2017 m->m_ext.ext_arg); 2018 /* 2019 * 'm' is already freed by the ext_free callback. 2020 */ 2021 dofree = false; 2022 } else { 2023 free(m->m_ext.ext_buf, 0); 2024 } 2025 } 2026 2027 if (dofree) { 2028 m->m_type = MT_FREE; 2029 m->m_data = NULL; 2030 pool_cache_put(mb_cache, m); 2031 } 2032 } 2033 2034 /* 2035 * Free a single mbuf and associated external storage. Return the 2036 * successor, if any. 2037 */ 2038 struct mbuf * 2039 m_free(struct mbuf *m) 2040 { 2041 struct mbuf *n; 2042 2043 mowner_revoke(m, 1, m->m_flags); 2044 mbstat_type_add(m->m_type, -1); 2045 2046 if (m->m_flags & M_PKTHDR) 2047 m_tag_delete_chain(m); 2048 2049 n = m->m_next; 2050 2051 if (m->m_flags & M_EXT) { 2052 m_ext_free(m); 2053 } else { 2054 if (__predict_false(m->m_type == MT_FREE)) { 2055 panic("mbuf %p already freed", m); 2056 } 2057 m->m_type = MT_FREE; 2058 m->m_data = NULL; 2059 pool_cache_put(mb_cache, m); 2060 } 2061 2062 return n; 2063 } 2064 2065 void 2066 m_freem(struct mbuf *m) 2067 { 2068 if (m == NULL) 2069 return; 2070 do { 2071 m = m_free(m); 2072 } while (m); 2073 } 2074 2075 #if defined(DDB) 2076 void 2077 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 2078 { 2079 char ch; 2080 bool opt_c = false; 2081 bool opt_d = false; 2082 #if NETHER > 0 2083 bool opt_v = false; 2084 const struct mbuf *m0 = NULL; 2085 #endif 2086 int no = 0; 2087 char buf[512]; 2088 2089 while ((ch = *(modif++)) != '\0') { 2090 switch (ch) { 2091 case 'c': 2092 opt_c = true; 2093 break; 2094 case 'd': 2095 opt_d = true; 2096 break; 2097 #if NETHER > 0 2098 case 'v': 2099 opt_v = true; 2100 m0 = m; 2101 break; 2102 #endif 2103 default: 2104 break; 2105 } 2106 } 2107 2108 nextchain: 2109 (*pr)("MBUF(%d) %p\n", no, m); 2110 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 2111 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", 2112 m->m_data, m->m_len, m->m_type, buf); 2113 if (opt_d) { 2114 int i; 2115 unsigned char *p = m->m_data; 2116 2117 (*pr)(" data:"); 2118 2119 for (i = 0; i < m->m_len; i++) { 2120 if (i % 16 == 0) 2121 (*pr)("\n"); 2122 (*pr)(" %02x", p[i]); 2123 } 2124 2125 (*pr)("\n"); 2126 } 2127 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 2128 m->m_nextpkt); 2129 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 2130 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 2131 (int)M_READONLY(m)); 2132 if ((m->m_flags & M_PKTHDR) != 0) { 2133 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 2134 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=%s, csum_data=0x%" 2135 PRIx32 ", segsz=%u\n", 2136 m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m), 2137 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 2138 } 2139 if ((m->m_flags & M_EXT)) { 2140 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 2141 "ext_free=%p, ext_arg=%p\n", 2142 m->m_ext.ext_refcnt, 2143 m->m_ext.ext_buf, m->m_ext.ext_size, 2144 m->m_ext.ext_free, m->m_ext.ext_arg); 2145 } 2146 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 2147 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 2148 vaddr_t eva = sva + m->m_ext.ext_size; 2149 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 2150 int i; 2151 2152 (*pr)(" pages:"); 2153 for (i = 0; i < n; i ++) { 2154 (*pr)(" %p", m->m_ext.ext_pgs[i]); 2155 } 2156 (*pr)("\n"); 2157 } 2158 2159 if (opt_c) { 2160 m = m->m_next; 2161 if (m != NULL) { 2162 no++; 2163 goto nextchain; 2164 } 2165 } 2166 2167 #if NETHER > 0 2168 if (opt_v && m0) 2169 m_examine(m0, AF_ETHER, modif, pr); 2170 #endif 2171 } 2172 #endif /* defined(DDB) */ 2173 2174 #if defined(MBUFTRACE) 2175 void 2176 mowner_init_owner(struct mowner *mo, const char *name, const char *descr) 2177 { 2178 memset(mo, 0, sizeof(*mo)); 2179 strlcpy(mo->mo_name, name, sizeof(mo->mo_name)); 2180 strlcpy(mo->mo_descr, descr, sizeof(mo->mo_descr)); 2181 } 2182 2183 void 2184 mowner_attach(struct mowner *mo) 2185 { 2186 2187 KASSERT(mo->mo_counters == NULL); 2188 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 2189 2190 /* XXX lock */ 2191 LIST_INSERT_HEAD(&mowners, mo, mo_link); 2192 } 2193 2194 void 2195 mowner_detach(struct mowner *mo) 2196 { 2197 2198 KASSERT(mo->mo_counters != NULL); 2199 2200 /* XXX lock */ 2201 LIST_REMOVE(mo, mo_link); 2202 2203 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 2204 mo->mo_counters = NULL; 2205 } 2206 2207 void 2208 mowner_init(struct mbuf *m, int type) 2209 { 2210 struct mowner_counter *mc; 2211 struct mowner *mo; 2212 int s; 2213 2214 m->m_owner = mo = &unknown_mowners[type]; 2215 s = splvm(); 2216 mc = percpu_getref(mo->mo_counters); 2217 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 2218 percpu_putref(mo->mo_counters); 2219 splx(s); 2220 } 2221 2222 void 2223 mowner_ref(struct mbuf *m, int flags) 2224 { 2225 struct mowner *mo = m->m_owner; 2226 struct mowner_counter *mc; 2227 int s; 2228 2229 s = splvm(); 2230 mc = percpu_getref(mo->mo_counters); 2231 if ((flags & M_EXT) != 0) 2232 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 2233 if ((flags & M_EXT_CLUSTER) != 0) 2234 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 2235 percpu_putref(mo->mo_counters); 2236 splx(s); 2237 } 2238 2239 void 2240 mowner_revoke(struct mbuf *m, bool all, int flags) 2241 { 2242 struct mowner *mo = m->m_owner; 2243 struct mowner_counter *mc; 2244 int s; 2245 2246 s = splvm(); 2247 mc = percpu_getref(mo->mo_counters); 2248 if ((flags & M_EXT) != 0) 2249 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 2250 if ((flags & M_EXT_CLUSTER) != 0) 2251 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 2252 if (all) 2253 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 2254 percpu_putref(mo->mo_counters); 2255 splx(s); 2256 if (all) 2257 m->m_owner = &revoked_mowner; 2258 } 2259 2260 static void 2261 mowner_claim(struct mbuf *m, struct mowner *mo) 2262 { 2263 struct mowner_counter *mc; 2264 int flags = m->m_flags; 2265 int s; 2266 2267 s = splvm(); 2268 mc = percpu_getref(mo->mo_counters); 2269 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 2270 if ((flags & M_EXT) != 0) 2271 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 2272 if ((flags & M_EXT_CLUSTER) != 0) 2273 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 2274 percpu_putref(mo->mo_counters); 2275 splx(s); 2276 m->m_owner = mo; 2277 } 2278 2279 void 2280 m_claim(struct mbuf *m, struct mowner *mo) 2281 { 2282 2283 if (m->m_owner == mo || mo == NULL) 2284 return; 2285 2286 mowner_revoke(m, true, m->m_flags); 2287 mowner_claim(m, mo); 2288 } 2289 2290 void 2291 m_claimm(struct mbuf *m, struct mowner *mo) 2292 { 2293 2294 for (; m != NULL; m = m->m_next) 2295 m_claim(m, mo); 2296 } 2297 #endif /* defined(MBUFTRACE) */ 2298 2299 #ifdef DIAGNOSTIC 2300 /* 2301 * Verify that the mbuf chain is not malformed. Used only for diagnostic. 2302 * Panics on error. 2303 */ 2304 void 2305 m_verify_packet(struct mbuf *m) 2306 { 2307 struct mbuf *n = m; 2308 char *low, *high, *dat; 2309 int totlen = 0, len; 2310 2311 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { 2312 panic("%s: mbuf doesn't have M_PKTHDR", __func__); 2313 } 2314 2315 while (n != NULL) { 2316 if (__predict_false(n->m_type == MT_FREE)) { 2317 panic("%s: mbuf already freed (n = %p)", __func__, n); 2318 } 2319 #if 0 2320 /* 2321 * This ought to be a rule of the mbuf API. Unfortunately, 2322 * many places don't respect that rule. 2323 */ 2324 if (__predict_false((n != m) && (n->m_flags & M_PKTHDR) != 0)) { 2325 panic("%s: M_PKTHDR set on secondary mbuf", __func__); 2326 } 2327 #endif 2328 if (__predict_false(n->m_nextpkt != NULL)) { 2329 panic("%s: m_nextpkt not null (m_nextpkt = %p)", 2330 __func__, n->m_nextpkt); 2331 } 2332 2333 dat = n->m_data; 2334 len = n->m_len; 2335 if (__predict_false(len < 0)) { 2336 panic("%s: incorrect length (len = %d)", __func__, len); 2337 } 2338 2339 low = M_BUFADDR(n); 2340 high = low + M_BUFSIZE(n); 2341 if (__predict_false((dat < low) || (dat + len > high))) { 2342 panic("%s: m_data not in packet" 2343 "(dat = %p, len = %d, low = %p, high = %p)", 2344 __func__, dat, len, low, high); 2345 } 2346 2347 totlen += len; 2348 n = n->m_next; 2349 } 2350 2351 if (__predict_false(totlen != m->m_pkthdr.len)) { 2352 panic("%s: inconsistent mbuf length (%d != %d)", __func__, 2353 totlen, m->m_pkthdr.len); 2354 } 2355 } 2356 #endif 2357 2358 struct m_tag * 2359 m_tag_get(int type, int len, int wait) 2360 { 2361 struct m_tag *t; 2362 2363 if (len < 0) 2364 return NULL; 2365 t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait); 2366 if (t == NULL) 2367 return NULL; 2368 t->m_tag_id = type; 2369 t->m_tag_len = len; 2370 return t; 2371 } 2372 2373 void 2374 m_tag_free(struct m_tag *t) 2375 { 2376 free(t, M_PACKET_TAGS); 2377 } 2378 2379 void 2380 m_tag_prepend(struct mbuf *m, struct m_tag *t) 2381 { 2382 KASSERT((m->m_flags & M_PKTHDR) != 0); 2383 SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); 2384 } 2385 2386 void 2387 m_tag_unlink(struct mbuf *m, struct m_tag *t) 2388 { 2389 KASSERT((m->m_flags & M_PKTHDR) != 0); 2390 SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); 2391 } 2392 2393 void 2394 m_tag_delete(struct mbuf *m, struct m_tag *t) 2395 { 2396 m_tag_unlink(m, t); 2397 m_tag_free(t); 2398 } 2399 2400 void 2401 m_tag_delete_chain(struct mbuf *m) 2402 { 2403 struct m_tag *p, *q; 2404 2405 KASSERT((m->m_flags & M_PKTHDR) != 0); 2406 2407 p = SLIST_FIRST(&m->m_pkthdr.tags); 2408 if (p == NULL) 2409 return; 2410 while ((q = SLIST_NEXT(p, m_tag_link)) != NULL) 2411 m_tag_delete(m, q); 2412 m_tag_delete(m, p); 2413 } 2414 2415 struct m_tag * 2416 m_tag_find(const struct mbuf *m, int type) 2417 { 2418 struct m_tag *p; 2419 2420 KASSERT((m->m_flags & M_PKTHDR) != 0); 2421 2422 p = SLIST_FIRST(&m->m_pkthdr.tags); 2423 while (p != NULL) { 2424 if (p->m_tag_id == type) 2425 return p; 2426 p = SLIST_NEXT(p, m_tag_link); 2427 } 2428 return NULL; 2429 } 2430 2431 struct m_tag * 2432 m_tag_copy(struct m_tag *t) 2433 { 2434 struct m_tag *p; 2435 2436 p = m_tag_get(t->m_tag_id, t->m_tag_len, M_NOWAIT); 2437 if (p == NULL) 2438 return NULL; 2439 memcpy(p + 1, t + 1, t->m_tag_len); 2440 return p; 2441 } 2442 2443 /* 2444 * Copy two tag chains. The destination mbuf (to) loses any attached 2445 * tags even if the operation fails. This should not be a problem, as 2446 * m_tag_copy_chain() is typically called with a newly-allocated 2447 * destination mbuf. 2448 */ 2449 int 2450 m_tag_copy_chain(struct mbuf *to, struct mbuf *from) 2451 { 2452 struct m_tag *p, *t, *tprev = NULL; 2453 2454 KASSERT((from->m_flags & M_PKTHDR) != 0); 2455 2456 m_tag_delete_chain(to); 2457 SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) { 2458 t = m_tag_copy(p); 2459 if (t == NULL) { 2460 m_tag_delete_chain(to); 2461 return 0; 2462 } 2463 if (tprev == NULL) 2464 SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link); 2465 else 2466 SLIST_INSERT_AFTER(tprev, t, m_tag_link); 2467 tprev = t; 2468 } 2469 return 1; 2470 } 2471