1 /* $NetBSD: uipc_mbuf.c,v 1.255 2024/12/15 11:07:10 skrll Exp $ */ 2 3 /* 4 * Copyright (c) 1999, 2001, 2018 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and Maxime Villard. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.255 2024/12/15 11:07:10 skrll Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "ether.h" 69 #include "opt_ddb.h" 70 #include "opt_mbuftrace.h" 71 #include "opt_nmbclusters.h" 72 #endif 73 74 #include <sys/param.h> 75 #include <sys/types.h> 76 77 #include <sys/atomic.h> 78 #include <sys/cpu.h> 79 #include <sys/domain.h> 80 #include <sys/kernel.h> 81 #include <sys/mbuf.h> 82 #include <sys/percpu.h> 83 #include <sys/pool.h> 84 #include <sys/proc.h> 85 #include <sys/protosw.h> 86 #include <sys/sdt.h> 87 #include <sys/socket.h> 88 #include <sys/sysctl.h> 89 #include <sys/syslog.h> 90 #include <sys/systm.h> 91 92 #include <net/if.h> 93 94 pool_cache_t mb_cache; /* mbuf cache */ 95 static pool_cache_t mcl_cache; /* mbuf cluster cache */ 96 97 struct mbstat mbstat; 98 int max_linkhdr; 99 int max_protohdr; 100 int max_hdr; 101 int max_datalen; 102 103 static void mb_drain(void *, int); 104 static int mb_ctor(void *, void *, int); 105 106 static void sysctl_kern_mbuf_setup(void); 107 108 static struct sysctllog *mbuf_sysctllog; 109 110 static struct mbuf *m_copy_internal(struct mbuf *, int, int, int, bool); 111 static struct mbuf *m_split_internal(struct mbuf *, int, int, bool); 112 static int m_copyback_internal(struct mbuf **, int, int, const void *, 113 int, int); 114 115 /* Flags for m_copyback_internal. */ 116 #define CB_COPYBACK 0x0001 /* copyback from cp */ 117 #define CB_PRESERVE 0x0002 /* preserve original data */ 118 #define CB_COW 0x0004 /* do copy-on-write */ 119 #define CB_EXTEND 0x0008 /* extend chain */ 120 121 static const char mclpool_warnmsg[] = 122 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 123 124 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 125 126 static percpu_t *mbstat_percpu; 127 128 #ifdef MBUFTRACE 129 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 130 struct mowner unknown_mowners[] = { 131 MOWNER_INIT("unknown", "free"), 132 MOWNER_INIT("unknown", "data"), 133 MOWNER_INIT("unknown", "header"), 134 MOWNER_INIT("unknown", "soname"), 135 MOWNER_INIT("unknown", "soopts"), 136 MOWNER_INIT("unknown", "ftable"), 137 MOWNER_INIT("unknown", "control"), 138 MOWNER_INIT("unknown", "oobdata"), 139 }; 140 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 141 #endif 142 143 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 144 145 #define MCLADDREFERENCE(o, n) \ 146 do { \ 147 KASSERT(((o)->m_flags & M_EXT) != 0); \ 148 KASSERT(((n)->m_flags & M_EXT) == 0); \ 149 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 150 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 151 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 152 (n)->m_ext_ref = (o)->m_ext_ref; \ 153 mowner_ref((n), (n)->m_flags); \ 154 } while (/* CONSTCOND */ 0) 155 156 static int 157 nmbclusters_limit(void) 158 { 159 #if defined(PMAP_MAP_POOLPAGE) 160 /* direct mapping, doesn't use space in kmem_arena */ 161 vsize_t max_size = physmem / 4; 162 #else 163 vsize_t max_size = MIN(physmem / 4, nkmempages / 4); 164 #endif 165 166 max_size = max_size * PAGE_SIZE / MCLBYTES; 167 #ifdef NMBCLUSTERS_MAX 168 max_size = MIN(max_size, NMBCLUSTERS_MAX); 169 #endif 170 171 return max_size; 172 } 173 174 /* 175 * Initialize the mbuf allocator. 176 */ 177 void 178 mbinit(void) 179 { 180 181 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 182 CTASSERT(sizeof(struct mbuf) == MSIZE); 183 184 sysctl_kern_mbuf_setup(); 185 186 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 187 NULL, IPL_VM, mb_ctor, NULL, NULL); 188 KASSERT(mb_cache != NULL); 189 190 mcl_cache = pool_cache_init(mclbytes, COHERENCY_UNIT, 0, 0, "mclpl", 191 NULL, IPL_VM, NULL, NULL, NULL); 192 KASSERT(mcl_cache != NULL); 193 194 pool_cache_set_drain_hook(mb_cache, mb_drain, NULL); 195 pool_cache_set_drain_hook(mcl_cache, mb_drain, NULL); 196 197 /* 198 * Set an arbitrary default limit on the number of mbuf clusters. 199 */ 200 #ifdef NMBCLUSTERS 201 nmbclusters = MIN(NMBCLUSTERS, nmbclusters_limit()); 202 #else 203 nmbclusters = MAX(1024, 204 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 205 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 206 #endif 207 208 /* 209 * Set the hard limit on the mclpool to the number of 210 * mbuf clusters the kernel is to support. Log the limit 211 * reached message max once a minute. 212 */ 213 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 214 215 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 216 217 /* 218 * Set a low water mark for both mbufs and clusters. This should 219 * help ensure that they can be allocated in a memory starvation 220 * situation. This is important for e.g. diskless systems which 221 * must allocate mbufs in order for the pagedaemon to clean pages. 222 */ 223 pool_cache_setlowat(mb_cache, mblowat); 224 pool_cache_setlowat(mcl_cache, mcllowat); 225 226 #ifdef MBUFTRACE 227 { 228 /* 229 * Attach the unknown mowners. 230 */ 231 int i; 232 MOWNER_ATTACH(&revoked_mowner); 233 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 234 i-- > 0; ) 235 MOWNER_ATTACH(&unknown_mowners[i]); 236 } 237 #endif 238 } 239 240 static void 241 mb_drain(void *arg, int flags) 242 { 243 struct domain *dp; 244 const struct protosw *pr; 245 struct ifnet *ifp; 246 int s; 247 248 KERNEL_LOCK(1, NULL); 249 s = splvm(); 250 DOMAIN_FOREACH(dp) { 251 for (pr = dp->dom_protosw; 252 pr < dp->dom_protoswNPROTOSW; pr++) 253 if (pr->pr_drain) 254 (*pr->pr_drain)(); 255 } 256 /* XXX we cannot use psref in H/W interrupt */ 257 if (!cpu_intr_p()) { 258 int bound = curlwp_bind(); 259 IFNET_READER_FOREACH(ifp) { 260 struct psref psref; 261 262 if_acquire(ifp, &psref); 263 264 if (ifp->if_drain) 265 (*ifp->if_drain)(ifp); 266 267 if_release(ifp, &psref); 268 } 269 curlwp_bindx(bound); 270 } 271 splx(s); 272 mbstat.m_drain++; 273 KERNEL_UNLOCK_ONE(NULL); 274 } 275 276 /* 277 * sysctl helper routine for the kern.mbuf subtree. 278 * nmbclusters, mblowat and mcllowat need range 279 * checking and pool tweaking after being reset. 280 */ 281 static int 282 sysctl_kern_mbuf(SYSCTLFN_ARGS) 283 { 284 int error, newval; 285 struct sysctlnode node; 286 287 node = *rnode; 288 node.sysctl_data = &newval; 289 switch (rnode->sysctl_num) { 290 case MBUF_NMBCLUSTERS: 291 case MBUF_MBLOWAT: 292 case MBUF_MCLLOWAT: 293 newval = *(int*)rnode->sysctl_data; 294 break; 295 case MBUF_NMBCLUSTERS_LIMIT: 296 newval = nmbclusters_limit(); 297 break; 298 default: 299 return SET_ERROR(EOPNOTSUPP); 300 } 301 302 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 303 if (error || newp == NULL) 304 return error; 305 if (newval < 0) 306 return SET_ERROR(EINVAL); 307 308 switch (node.sysctl_num) { 309 case MBUF_NMBCLUSTERS: 310 if (newval < nmbclusters) 311 return SET_ERROR(EINVAL); 312 if (newval > nmbclusters_limit()) 313 return SET_ERROR(EINVAL); 314 nmbclusters = newval; 315 pool_cache_sethardlimit(mcl_cache, nmbclusters, 316 mclpool_warnmsg, 60); 317 break; 318 case MBUF_MBLOWAT: 319 mblowat = newval; 320 pool_cache_setlowat(mb_cache, mblowat); 321 break; 322 case MBUF_MCLLOWAT: 323 mcllowat = newval; 324 pool_cache_setlowat(mcl_cache, mcllowat); 325 break; 326 } 327 328 return 0; 329 } 330 331 #ifdef MBUFTRACE 332 static void 333 mowner_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 334 { 335 struct mowner_counter *mc = v1; 336 struct mowner_user *mo_user = v2; 337 int i; 338 339 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 340 mo_user->mo_counter[i] += mc->mc_counter[i]; 341 } 342 } 343 344 static void 345 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 346 { 347 348 memset(mo_user, 0, sizeof(*mo_user)); 349 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 350 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 351 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 352 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 353 percpu_foreach(mo->mo_counters, mowner_convert_to_user_cb, mo_user); 354 } 355 356 static int 357 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 358 { 359 struct mowner *mo; 360 size_t len = 0; 361 int error = 0; 362 363 if (namelen != 0) 364 return SET_ERROR(EINVAL); 365 if (newp != NULL) 366 return SET_ERROR(EPERM); 367 368 LIST_FOREACH(mo, &mowners, mo_link) { 369 struct mowner_user mo_user; 370 371 mowner_convert_to_user(mo, &mo_user); 372 373 if (oldp != NULL) { 374 if (*oldlenp - len < sizeof(mo_user)) { 375 error = SET_ERROR(ENOMEM); 376 break; 377 } 378 error = copyout(&mo_user, (char *)oldp + len, 379 sizeof(mo_user)); 380 if (error) 381 break; 382 } 383 len += sizeof(mo_user); 384 } 385 386 if (error == 0) 387 *oldlenp = len; 388 389 return error; 390 } 391 #endif /* MBUFTRACE */ 392 393 void 394 mbstat_type_add(int type, int diff) 395 { 396 struct mbstat_cpu *mb; 397 int s; 398 399 s = splvm(); 400 mb = percpu_getref(mbstat_percpu); 401 mb->m_mtypes[type] += diff; 402 percpu_putref(mbstat_percpu); 403 splx(s); 404 } 405 406 static void 407 mbstat_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 408 { 409 struct mbstat_cpu *mbsc = v1; 410 struct mbstat *mbs = v2; 411 int i; 412 413 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 414 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 415 } 416 } 417 418 static void 419 mbstat_convert_to_user(struct mbstat *mbs) 420 { 421 422 memset(mbs, 0, sizeof(*mbs)); 423 mbs->m_drain = mbstat.m_drain; 424 percpu_foreach(mbstat_percpu, mbstat_convert_to_user_cb, mbs); 425 } 426 427 static int 428 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 429 { 430 struct sysctlnode node; 431 struct mbstat mbs; 432 433 mbstat_convert_to_user(&mbs); 434 node = *rnode; 435 node.sysctl_data = &mbs; 436 node.sysctl_size = sizeof(mbs); 437 return sysctl_lookup(SYSCTLFN_CALL(&node)); 438 } 439 440 static void 441 sysctl_kern_mbuf_setup(void) 442 { 443 444 KASSERT(mbuf_sysctllog == NULL); 445 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 446 CTLFLAG_PERMANENT, 447 CTLTYPE_NODE, "mbuf", 448 SYSCTL_DESCR("mbuf control variables"), 449 NULL, 0, NULL, 0, 450 CTL_KERN, KERN_MBUF, CTL_EOL); 451 452 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 453 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 454 CTLTYPE_INT, "msize", 455 SYSCTL_DESCR("mbuf base size"), 456 NULL, msize, NULL, 0, 457 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 458 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 459 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 460 CTLTYPE_INT, "mclbytes", 461 SYSCTL_DESCR("mbuf cluster size"), 462 NULL, mclbytes, NULL, 0, 463 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 464 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 465 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 466 CTLTYPE_INT, "nmbclusters", 467 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 468 sysctl_kern_mbuf, 0, &nmbclusters, 0, 469 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 470 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 471 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 472 CTLTYPE_INT, "mblowat", 473 SYSCTL_DESCR("mbuf low water mark"), 474 sysctl_kern_mbuf, 0, &mblowat, 0, 475 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 476 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 477 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 478 CTLTYPE_INT, "mcllowat", 479 SYSCTL_DESCR("mbuf cluster low water mark"), 480 sysctl_kern_mbuf, 0, &mcllowat, 0, 481 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 482 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 483 CTLFLAG_PERMANENT, 484 CTLTYPE_STRUCT, "stats", 485 SYSCTL_DESCR("mbuf allocation statistics"), 486 sysctl_kern_mbuf_stats, 0, NULL, 0, 487 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 488 #ifdef MBUFTRACE 489 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 490 CTLFLAG_PERMANENT, 491 CTLTYPE_STRUCT, "mowners", 492 SYSCTL_DESCR("Information about mbuf owners"), 493 sysctl_kern_mbuf_mowners, 0, NULL, 0, 494 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 495 #endif 496 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 497 CTLFLAG_PERMANENT|CTLFLAG_READONLY, 498 CTLTYPE_INT, "nmbclusters_limit", 499 SYSCTL_DESCR("Limit of nmbclusters"), 500 sysctl_kern_mbuf, 0, NULL, 0, 501 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS_LIMIT, CTL_EOL); 502 } 503 504 static int 505 mb_ctor(void *arg, void *object, int flags) 506 { 507 struct mbuf *m = object; 508 509 #ifdef POOL_VTOPHYS 510 m->m_paddr = POOL_VTOPHYS(m); 511 #else 512 m->m_paddr = M_PADDR_INVALID; 513 #endif 514 return 0; 515 } 516 517 /* 518 * Add mbuf to the end of a chain 519 */ 520 struct mbuf * 521 m_add(struct mbuf *c, struct mbuf *m) 522 { 523 struct mbuf *n; 524 525 if (c == NULL) 526 return m; 527 528 for (n = c; n->m_next != NULL; n = n->m_next) 529 continue; 530 n->m_next = m; 531 return c; 532 } 533 534 struct mbuf * 535 m_get(int how, int type) 536 { 537 struct mbuf *m; 538 539 KASSERT(type != MT_FREE); 540 541 m = pool_cache_get(mb_cache, 542 how == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : PR_NOWAIT); 543 if (m == NULL) 544 return NULL; 545 KASSERTMSG(((vaddr_t)m->m_dat & PAGE_MASK) + MLEN <= PAGE_SIZE, 546 "m=%p m->m_dat=%p" 547 " MLEN=%u PAGE_MASK=0x%x PAGE_SIZE=%u", 548 m, m->m_dat, 549 (unsigned)MLEN, (unsigned)PAGE_MASK, (unsigned)PAGE_SIZE); 550 551 mbstat_type_add(type, 1); 552 553 mowner_init(m, type); 554 m->m_ext_ref = m; /* default */ 555 m->m_type = type; 556 m->m_len = 0; 557 m->m_next = NULL; 558 m->m_nextpkt = NULL; /* default */ 559 m->m_data = m->m_dat; 560 m->m_flags = 0; /* default */ 561 562 return m; 563 } 564 565 struct mbuf * 566 m_gethdr(int how, int type) 567 { 568 struct mbuf *m; 569 570 m = m_get(how, type); 571 if (m == NULL) 572 return NULL; 573 574 m->m_data = m->m_pktdat; 575 m->m_flags = M_PKTHDR; 576 577 m_reset_rcvif(m); 578 m->m_pkthdr.len = 0; 579 m->m_pkthdr.csum_flags = 0; 580 m->m_pkthdr.csum_data = 0; 581 m->m_pkthdr.segsz = 0; 582 m->m_pkthdr.ether_vtag = 0; 583 m->m_pkthdr.pkthdr_flags = 0; 584 SLIST_INIT(&m->m_pkthdr.tags); 585 586 m->m_pkthdr.pattr_class = NULL; 587 m->m_pkthdr.pattr_af = AF_UNSPEC; 588 m->m_pkthdr.pattr_hdr = NULL; 589 590 return m; 591 } 592 593 struct mbuf * 594 m_get_n(int how, int type, size_t alignbytes, size_t nbytes) 595 { 596 struct mbuf *m; 597 598 if (alignbytes > MCLBYTES || nbytes > MCLBYTES - alignbytes) 599 return NULL; 600 if ((m = m_get(how, type)) == NULL) 601 return NULL; 602 if (nbytes + alignbytes > MLEN) { 603 m_clget(m, how); 604 if ((m->m_flags & M_EXT) == 0) { 605 m_free(m); 606 return NULL; 607 } 608 } 609 m->m_len = alignbytes + nbytes; 610 m_adj(m, alignbytes); 611 612 return m; 613 } 614 615 struct mbuf * 616 m_gethdr_n(int how, int type, size_t alignbytes, size_t nbytes) 617 { 618 struct mbuf *m; 619 620 if (nbytes > MCLBYTES || nbytes > MCLBYTES - alignbytes) 621 return NULL; 622 if ((m = m_gethdr(how, type)) == NULL) 623 return NULL; 624 if (alignbytes + nbytes > MHLEN) { 625 m_clget(m, how); 626 if ((m->m_flags & M_EXT) == 0) { 627 m_free(m); 628 return NULL; 629 } 630 } 631 m->m_len = m->m_pkthdr.len = alignbytes + nbytes; 632 m_adj(m, alignbytes); 633 634 return m; 635 } 636 637 void 638 m_clget(struct mbuf *m, int how) 639 { 640 m->m_ext_storage.ext_buf = (char *)pool_cache_get_paddr(mcl_cache, 641 how == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : PR_NOWAIT, 642 &m->m_ext_storage.ext_paddr); 643 644 if (m->m_ext_storage.ext_buf == NULL) 645 return; 646 647 KASSERTMSG((((vaddr_t)m->m_ext_storage.ext_buf & PAGE_MASK) + mclbytes 648 <= PAGE_SIZE), 649 "m=%p m->m_ext_storage.ext_buf=%p" 650 " mclbytes=%u PAGE_MASK=0x%x PAGE_SIZE=%u", 651 m, m->m_dat, 652 (unsigned)mclbytes, (unsigned)PAGE_MASK, (unsigned)PAGE_SIZE); 653 654 MCLINITREFERENCE(m); 655 m->m_data = m->m_ext.ext_buf; 656 m->m_flags = (m->m_flags & ~M_EXTCOPYFLAGS) | 657 M_EXT|M_EXT_CLUSTER|M_EXT_RW; 658 m->m_ext.ext_size = MCLBYTES; 659 m->m_ext.ext_free = NULL; 660 m->m_ext.ext_arg = NULL; 661 /* ext_paddr initialized above */ 662 663 mowner_ref(m, M_EXT|M_EXT_CLUSTER); 664 } 665 666 struct mbuf * 667 m_getcl(int how, int type, int flags) 668 { 669 struct mbuf *mp; 670 671 if ((flags & M_PKTHDR) != 0) 672 mp = m_gethdr(how, type); 673 else 674 mp = m_get(how, type); 675 676 if (mp == NULL) 677 return NULL; 678 679 MCLGET(mp, how); 680 if ((mp->m_flags & M_EXT) != 0) 681 return mp; 682 683 m_free(mp); 684 return NULL; 685 } 686 687 /* 688 * Utility function for M_PREPEND. Do *NOT* use it directly. 689 */ 690 struct mbuf * 691 m_prepend(struct mbuf *m, int len, int how) 692 { 693 struct mbuf *mn; 694 695 if (__predict_false(len > MHLEN)) { 696 panic("%s: len > MHLEN", __func__); 697 } 698 699 KASSERT(len != M_COPYALL); 700 mn = m_get(how, m->m_type); 701 if (mn == NULL) { 702 m_freem(m); 703 return NULL; 704 } 705 706 if (m->m_flags & M_PKTHDR) { 707 m_move_pkthdr(mn, m); 708 } else { 709 MCLAIM(mn, m->m_owner); 710 } 711 mn->m_next = m; 712 m = mn; 713 714 if (m->m_flags & M_PKTHDR) { 715 if (len < MHLEN) 716 m_align(m, len); 717 } else { 718 if (len < MLEN) 719 m_align(m, len); 720 } 721 722 m->m_len = len; 723 return m; 724 } 725 726 struct mbuf * 727 m_copym(struct mbuf *m, int off, int len, int wait) 728 { 729 /* Shallow copy on M_EXT. */ 730 return m_copy_internal(m, off, len, wait, false); 731 } 732 733 struct mbuf * 734 m_dup(struct mbuf *m, int off, int len, int wait) 735 { 736 /* Deep copy. */ 737 return m_copy_internal(m, off, len, wait, true); 738 } 739 740 static inline int 741 m_copylen(int len, int copylen) 742 { 743 return (len == M_COPYALL) ? copylen : uimin(len, copylen); 744 } 745 746 static struct mbuf * 747 m_copy_internal(struct mbuf *m, int off0, int len, int wait, bool deep) 748 { 749 struct mbuf *m0 __diagused = m; 750 int len0 __diagused = len; 751 struct mbuf *n, **np; 752 int off = off0; 753 struct mbuf *top; 754 int copyhdr = 0; 755 756 if (off < 0 || (len != M_COPYALL && len < 0)) 757 panic("%s: off %d, len %d", __func__, off, len); 758 if (off == 0 && m->m_flags & M_PKTHDR) 759 copyhdr = 1; 760 while (off > 0) { 761 if (m == NULL) 762 panic("%s: m == NULL, off %d", __func__, off); 763 if (off < m->m_len) 764 break; 765 off -= m->m_len; 766 m = m->m_next; 767 } 768 769 np = ⊤ 770 top = NULL; 771 while (len == M_COPYALL || len > 0) { 772 if (m == NULL) { 773 if (len != M_COPYALL) 774 panic("%s: m == NULL, len %d [!COPYALL]", 775 __func__, len); 776 break; 777 } 778 779 n = m_get(wait, m->m_type); 780 *np = n; 781 if (n == NULL) 782 goto nospace; 783 MCLAIM(n, m->m_owner); 784 785 if (copyhdr) { 786 m_copy_pkthdr(n, m); 787 if (len == M_COPYALL) 788 n->m_pkthdr.len -= off0; 789 else 790 n->m_pkthdr.len = len; 791 copyhdr = 0; 792 } 793 n->m_len = m_copylen(len, m->m_len - off); 794 795 if (m->m_flags & M_EXT) { 796 if (!deep) { 797 n->m_data = m->m_data + off; 798 MCLADDREFERENCE(m, n); 799 } else { 800 /* 801 * We don't care if MCLGET fails. n->m_len is 802 * recomputed and handles that. 803 */ 804 MCLGET(n, wait); 805 n->m_len = 0; 806 n->m_len = M_TRAILINGSPACE(n); 807 n->m_len = m_copylen(len, n->m_len); 808 n->m_len = uimin(n->m_len, m->m_len - off); 809 memcpy(mtod(n, void *), mtod(m, char *) + off, 810 (unsigned)n->m_len); 811 } 812 } else { 813 memcpy(mtod(n, void *), mtod(m, char *) + off, 814 (unsigned)n->m_len); 815 } 816 817 if (len != M_COPYALL) 818 len -= n->m_len; 819 off += n->m_len; 820 821 KASSERTMSG(off <= m->m_len, 822 "m=%p m->m_len=%d off=%d len=%d m0=%p off0=%d len0=%d", 823 m, m->m_len, off, len, m0, off0, len0); 824 825 if (off == m->m_len) { 826 m = m->m_next; 827 off = 0; 828 } 829 np = &n->m_next; 830 } 831 832 return top; 833 834 nospace: 835 m_freem(top); 836 return NULL; 837 } 838 839 /* 840 * Copy an entire packet, including header (which must be present). 841 * An optimization of the common case 'm_copym(m, 0, M_COPYALL, how)'. 842 */ 843 struct mbuf * 844 m_copypacket(struct mbuf *m, int how) 845 { 846 struct mbuf *top, *n, *o; 847 848 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { 849 panic("%s: no header (m = %p)", __func__, m); 850 } 851 852 n = m_get(how, m->m_type); 853 top = n; 854 if (!n) 855 goto nospace; 856 857 MCLAIM(n, m->m_owner); 858 m_copy_pkthdr(n, m); 859 n->m_len = m->m_len; 860 if (m->m_flags & M_EXT) { 861 n->m_data = m->m_data; 862 MCLADDREFERENCE(m, n); 863 } else { 864 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 865 } 866 867 m = m->m_next; 868 while (m) { 869 o = m_get(how, m->m_type); 870 if (!o) 871 goto nospace; 872 873 MCLAIM(o, m->m_owner); 874 n->m_next = o; 875 n = n->m_next; 876 877 n->m_len = m->m_len; 878 if (m->m_flags & M_EXT) { 879 n->m_data = m->m_data; 880 MCLADDREFERENCE(m, n); 881 } else { 882 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 883 } 884 885 m = m->m_next; 886 } 887 return top; 888 889 nospace: 890 m_freem(top); 891 return NULL; 892 } 893 894 void 895 m_copydata(struct mbuf *m, int off, int len, void *cp) 896 { 897 unsigned int count; 898 struct mbuf *m0 = m; 899 int len0 = len; 900 int off0 = off; 901 void *cp0 = cp; 902 903 KASSERT(len != M_COPYALL); 904 if (off < 0 || len < 0) 905 panic("m_copydata: off %d, len %d", off, len); 906 while (off > 0) { 907 if (m == NULL) 908 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)", 909 m0, len0, off0, cp0, off, off0 - off); 910 if (off < m->m_len) 911 break; 912 off -= m->m_len; 913 m = m->m_next; 914 } 915 while (len > 0) { 916 if (m == NULL) 917 panic("m_copydata(%p,%d,%d,%p): " 918 "m=NULL, off=%d (%d), len=%d (%d)", 919 m0, len0, off0, cp0, 920 off, off0 - off, len, len0 - len); 921 count = uimin(m->m_len - off, len); 922 memcpy(cp, mtod(m, char *) + off, count); 923 len -= count; 924 cp = (char *)cp + count; 925 off = 0; 926 m = m->m_next; 927 } 928 } 929 930 /* 931 * Concatenate mbuf chain n to m. 932 * n might be copied into m (when n->m_len is small), therefore data portion of 933 * n could be copied into an mbuf of different mbuf type. 934 * Any m_pkthdr is not updated. 935 */ 936 void 937 m_cat(struct mbuf *m, struct mbuf *n) 938 { 939 940 while (m->m_next) 941 m = m->m_next; 942 while (n) { 943 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 944 /* just join the two chains */ 945 m->m_next = n; 946 return; 947 } 948 /* splat the data from one into the other */ 949 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 950 (u_int)n->m_len); 951 m->m_len += n->m_len; 952 n = m_free(n); 953 } 954 } 955 956 void 957 m_adj(struct mbuf *mp, int req_len) 958 { 959 int len = req_len; 960 struct mbuf *m; 961 int count; 962 963 if ((m = mp) == NULL) 964 return; 965 if (len >= 0) { 966 /* 967 * Trim from head. 968 */ 969 while (m != NULL && len > 0) { 970 if (m->m_len <= len) { 971 len -= m->m_len; 972 m->m_len = 0; 973 m = m->m_next; 974 } else { 975 m->m_len -= len; 976 m->m_data += len; 977 len = 0; 978 } 979 } 980 if (mp->m_flags & M_PKTHDR) 981 mp->m_pkthdr.len -= (req_len - len); 982 } else { 983 /* 984 * Trim from tail. Scan the mbuf chain, 985 * calculating its length and finding the last mbuf. 986 * If the adjustment only affects this mbuf, then just 987 * adjust and return. Otherwise, rescan and truncate 988 * after the remaining size. 989 */ 990 len = -len; 991 count = 0; 992 for (;;) { 993 count += m->m_len; 994 if (m->m_next == NULL) 995 break; 996 m = m->m_next; 997 } 998 if (m->m_len >= len) { 999 m->m_len -= len; 1000 if (mp->m_flags & M_PKTHDR) 1001 mp->m_pkthdr.len -= len; 1002 return; 1003 } 1004 1005 count -= len; 1006 if (count < 0) 1007 count = 0; 1008 1009 /* 1010 * Correct length for chain is "count". 1011 * Find the mbuf with last data, adjust its length, 1012 * and toss data from remaining mbufs on chain. 1013 */ 1014 m = mp; 1015 if (m->m_flags & M_PKTHDR) 1016 m->m_pkthdr.len = count; 1017 for (; m; m = m->m_next) { 1018 if (m->m_len >= count) { 1019 m->m_len = count; 1020 break; 1021 } 1022 count -= m->m_len; 1023 } 1024 if (m) { 1025 while (m->m_next) 1026 (m = m->m_next)->m_len = 0; 1027 } 1028 } 1029 } 1030 1031 /* 1032 * m_ensure_contig: rearrange an mbuf chain that given length of bytes 1033 * would be contiguous and in the data area of an mbuf (therefore, mtod() 1034 * would work for a structure of given length). 1035 * 1036 * => On success, returns true and the resulting mbuf chain; false otherwise. 1037 * => The mbuf chain may change, but is always preserved valid. 1038 */ 1039 bool 1040 m_ensure_contig(struct mbuf **m0, int len) 1041 { 1042 struct mbuf *n = *m0, *m; 1043 size_t count, space; 1044 1045 KASSERT(len != M_COPYALL); 1046 /* 1047 * If first mbuf has no cluster, and has room for len bytes 1048 * without shifting current data, pullup into it, 1049 * otherwise allocate a new mbuf to prepend to the chain. 1050 */ 1051 if ((n->m_flags & M_EXT) == 0 && 1052 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 1053 if (n->m_len >= len) { 1054 return true; 1055 } 1056 m = n; 1057 n = n->m_next; 1058 len -= m->m_len; 1059 } else { 1060 if (len > MHLEN) { 1061 return false; 1062 } 1063 m = m_get(M_DONTWAIT, n->m_type); 1064 if (m == NULL) { 1065 return false; 1066 } 1067 MCLAIM(m, n->m_owner); 1068 if (n->m_flags & M_PKTHDR) { 1069 m_move_pkthdr(m, n); 1070 } 1071 } 1072 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1073 do { 1074 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len); 1075 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1076 (unsigned)count); 1077 len -= count; 1078 m->m_len += count; 1079 n->m_len -= count; 1080 space -= count; 1081 if (n->m_len) 1082 n->m_data += count; 1083 else 1084 n = m_free(n); 1085 } while (len > 0 && n); 1086 1087 m->m_next = n; 1088 *m0 = m; 1089 1090 return len <= 0; 1091 } 1092 1093 /* 1094 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error. 1095 */ 1096 struct mbuf * 1097 m_pullup(struct mbuf *n, int len) 1098 { 1099 struct mbuf *m = n; 1100 1101 KASSERT(len != M_COPYALL); 1102 if (!m_ensure_contig(&m, len)) { 1103 KASSERT(m != NULL); 1104 m_freem(m); 1105 m = NULL; 1106 } 1107 return m; 1108 } 1109 1110 /* 1111 * ensure that [off, off + len) is contiguous on the mbuf chain "m". 1112 * packet chain before "off" is kept untouched. 1113 * if offp == NULL, the target will start at <retval, 0> on resulting chain. 1114 * if offp != NULL, the target will start at <retval, *offp> on resulting chain. 1115 * 1116 * on error return (NULL return value), original "m" will be freed. 1117 * 1118 * XXX M_TRAILINGSPACE/M_LEADINGSPACE on shared cluster (sharedcluster) 1119 */ 1120 struct mbuf * 1121 m_pulldown(struct mbuf *m, int off, int len, int *offp) 1122 { 1123 struct mbuf *n, *o; 1124 int hlen, tlen, olen; 1125 int sharedcluster; 1126 1127 /* Check invalid arguments. */ 1128 if (m == NULL) 1129 panic("%s: m == NULL", __func__); 1130 if (len > MCLBYTES) { 1131 m_freem(m); 1132 return NULL; 1133 } 1134 1135 n = m; 1136 while (n != NULL && off > 0) { 1137 if (n->m_len > off) 1138 break; 1139 off -= n->m_len; 1140 n = n->m_next; 1141 } 1142 /* Be sure to point non-empty mbuf. */ 1143 while (n != NULL && n->m_len == 0) 1144 n = n->m_next; 1145 if (!n) { 1146 m_freem(m); 1147 return NULL; /* mbuf chain too short */ 1148 } 1149 1150 sharedcluster = M_READONLY(n); 1151 1152 /* 1153 * The target data is on <n, off>. If we got enough data on the mbuf 1154 * "n", we're done. 1155 */ 1156 #ifdef __NO_STRICT_ALIGNMENT 1157 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster) 1158 #else 1159 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster && 1160 ALIGNED_POINTER((mtod(n, char *) + off), uint32_t)) 1161 #endif 1162 goto ok; 1163 1164 /* 1165 * When (len <= n->m_len - off) and (off != 0), it is a special case. 1166 * Len bytes from <n, off> sit in single mbuf, but the caller does 1167 * not like the starting position (off). 1168 * 1169 * Chop the current mbuf into two pieces, set off to 0. 1170 */ 1171 if (len <= n->m_len - off) { 1172 struct mbuf *mlast; 1173 1174 o = m_dup(n, off, n->m_len - off, M_DONTWAIT); 1175 if (o == NULL) { 1176 m_freem(m); 1177 return NULL; /* ENOBUFS */ 1178 } 1179 KASSERTMSG(o->m_len >= len, "o=%p o->m_len=%d len=%d", 1180 o, o->m_len, len); 1181 for (mlast = o; mlast->m_next != NULL; mlast = mlast->m_next) 1182 ; 1183 n->m_len = off; 1184 mlast->m_next = n->m_next; 1185 n->m_next = o; 1186 n = o; 1187 off = 0; 1188 goto ok; 1189 } 1190 1191 /* 1192 * We need to take hlen from <n, off> and tlen from <n->m_next, 0>, 1193 * and construct contiguous mbuf with m_len == len. 1194 * 1195 * Note that hlen + tlen == len, and tlen > 0. 1196 */ 1197 hlen = n->m_len - off; 1198 tlen = len - hlen; 1199 1200 /* 1201 * Ensure that we have enough trailing data on mbuf chain. If not, 1202 * we can do nothing about the chain. 1203 */ 1204 olen = 0; 1205 for (o = n->m_next; o != NULL; o = o->m_next) 1206 olen += o->m_len; 1207 if (hlen + olen < len) { 1208 m_freem(m); 1209 return NULL; /* mbuf chain too short */ 1210 } 1211 1212 /* 1213 * Easy cases first. We need to use m_copydata() to get data from 1214 * <n->m_next, 0>. 1215 */ 1216 if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen && 1217 !sharedcluster) { 1218 m_copydata(n->m_next, 0, tlen, mtod(n, char *) + n->m_len); 1219 n->m_len += tlen; 1220 m_adj(n->m_next, tlen); 1221 goto ok; 1222 } 1223 if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen && 1224 #ifndef __NO_STRICT_ALIGNMENT 1225 ALIGNED_POINTER((n->m_next->m_data - hlen), uint32_t) && 1226 #endif 1227 !sharedcluster && n->m_next->m_len >= tlen) { 1228 n->m_next->m_data -= hlen; 1229 n->m_next->m_len += hlen; 1230 memcpy(mtod(n->m_next, void *), mtod(n, char *) + off, hlen); 1231 n->m_len -= hlen; 1232 n = n->m_next; 1233 off = 0; 1234 goto ok; 1235 } 1236 1237 /* 1238 * Now, we need to do the hard way. Don't copy as there's no room 1239 * on both ends. 1240 */ 1241 o = m_get(M_DONTWAIT, m->m_type); 1242 if (o && len > MLEN) { 1243 MCLGET(o, M_DONTWAIT); 1244 if ((o->m_flags & M_EXT) == 0) { 1245 m_free(o); 1246 o = NULL; 1247 } 1248 } 1249 if (!o) { 1250 m_freem(m); 1251 return NULL; /* ENOBUFS */ 1252 } 1253 /* get hlen from <n, off> into <o, 0> */ 1254 o->m_len = hlen; 1255 memcpy(mtod(o, void *), mtod(n, char *) + off, hlen); 1256 n->m_len -= hlen; 1257 /* get tlen from <n->m_next, 0> into <o, hlen> */ 1258 m_copydata(n->m_next, 0, tlen, mtod(o, char *) + o->m_len); 1259 o->m_len += tlen; 1260 m_adj(n->m_next, tlen); 1261 o->m_next = n->m_next; 1262 n->m_next = o; 1263 n = o; 1264 off = 0; 1265 1266 ok: 1267 if (offp) 1268 *offp = off; 1269 return n; 1270 } 1271 1272 /* 1273 * Like m_pullup(), except a new mbuf is always allocated, and we allow 1274 * the amount of empty space before the data in the new mbuf to be specified 1275 * (in the event that the caller expects to prepend later). 1276 */ 1277 struct mbuf * 1278 m_copyup(struct mbuf *n, int len, int dstoff) 1279 { 1280 struct mbuf *m; 1281 int count, space; 1282 1283 KASSERT(len != M_COPYALL); 1284 if (len > ((int)MHLEN - dstoff)) 1285 goto bad; 1286 m = m_get(M_DONTWAIT, n->m_type); 1287 if (m == NULL) 1288 goto bad; 1289 MCLAIM(m, n->m_owner); 1290 if (n->m_flags & M_PKTHDR) { 1291 m_move_pkthdr(m, n); 1292 } 1293 m->m_data += dstoff; 1294 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1295 do { 1296 count = uimin(uimin(uimax(len, max_protohdr), space), n->m_len); 1297 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1298 (unsigned)count); 1299 len -= count; 1300 m->m_len += count; 1301 n->m_len -= count; 1302 space -= count; 1303 if (n->m_len) 1304 n->m_data += count; 1305 else 1306 n = m_free(n); 1307 } while (len > 0 && n); 1308 if (len > 0) { 1309 (void) m_free(m); 1310 goto bad; 1311 } 1312 m->m_next = n; 1313 return m; 1314 bad: 1315 m_freem(n); 1316 return NULL; 1317 } 1318 1319 struct mbuf * 1320 m_split(struct mbuf *m0, int len, int wait) 1321 { 1322 return m_split_internal(m0, len, wait, true); 1323 } 1324 1325 static struct mbuf * 1326 m_split_internal(struct mbuf *m0, int len0, int wait, bool copyhdr) 1327 { 1328 struct mbuf *m, *n; 1329 unsigned len = len0, remain, len_save; 1330 1331 KASSERT(len0 != M_COPYALL); 1332 for (m = m0; m && len > m->m_len; m = m->m_next) 1333 len -= m->m_len; 1334 if (m == NULL) 1335 return NULL; 1336 1337 remain = m->m_len - len; 1338 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1339 n = m_gethdr(wait, m0->m_type); 1340 if (n == NULL) 1341 return NULL; 1342 1343 MCLAIM(n, m0->m_owner); 1344 m_copy_rcvif(n, m0); 1345 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1346 len_save = m0->m_pkthdr.len; 1347 m0->m_pkthdr.len = len0; 1348 1349 if ((m->m_flags & M_EXT) == 0 && remain > MHLEN) { 1350 /* m can't be the lead packet */ 1351 m_align(n, 0); 1352 n->m_len = 0; 1353 n->m_next = m_split(m, len, wait); 1354 if (n->m_next == NULL) { 1355 (void)m_free(n); 1356 m0->m_pkthdr.len = len_save; 1357 return NULL; 1358 } 1359 return n; 1360 } 1361 } else if (remain == 0) { 1362 n = m->m_next; 1363 m->m_next = NULL; 1364 return n; 1365 } else { 1366 n = m_get(wait, m->m_type); 1367 if (n == NULL) 1368 return NULL; 1369 MCLAIM(n, m->m_owner); 1370 } 1371 1372 if (m->m_flags & M_EXT) { 1373 n->m_data = m->m_data + len; 1374 MCLADDREFERENCE(m, n); 1375 } else { 1376 m_align(n, remain); 1377 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1378 } 1379 1380 n->m_len = remain; 1381 m->m_len = len; 1382 n->m_next = m->m_next; 1383 m->m_next = NULL; 1384 return n; 1385 } 1386 1387 /* 1388 * Routine to copy from device local memory into mbufs. 1389 */ 1390 struct mbuf * 1391 m_devget(char *buf, int totlen, int off, struct ifnet *ifp) 1392 { 1393 struct mbuf *m; 1394 struct mbuf *top = NULL, **mp = ⊤ 1395 char *cp, *epkt; 1396 int len; 1397 1398 cp = buf; 1399 epkt = cp + totlen; 1400 if (off) { 1401 /* 1402 * If 'off' is non-zero, packet is trailer-encapsulated, 1403 * so we have to skip the type and length fields. 1404 */ 1405 cp += off + 2 * sizeof(uint16_t); 1406 totlen -= 2 * sizeof(uint16_t); 1407 } 1408 1409 m = m_gethdr(M_DONTWAIT, MT_DATA); 1410 if (m == NULL) 1411 return NULL; 1412 m_set_rcvif(m, ifp); 1413 m->m_pkthdr.len = totlen; 1414 m->m_len = MHLEN; 1415 1416 while (totlen > 0) { 1417 if (top) { 1418 m = m_get(M_DONTWAIT, MT_DATA); 1419 if (m == NULL) { 1420 m_freem(top); 1421 return NULL; 1422 } 1423 m->m_len = MLEN; 1424 } 1425 1426 len = uimin(totlen, epkt - cp); 1427 1428 if (len >= MINCLSIZE) { 1429 MCLGET(m, M_DONTWAIT); 1430 if ((m->m_flags & M_EXT) == 0) { 1431 m_free(m); 1432 m_freem(top); 1433 return NULL; 1434 } 1435 m->m_len = len = uimin(len, MCLBYTES); 1436 } else { 1437 /* 1438 * Place initial small packet/header at end of mbuf. 1439 */ 1440 if (len < m->m_len) { 1441 if (top == 0 && len + max_linkhdr <= m->m_len) 1442 m->m_data += max_linkhdr; 1443 m->m_len = len; 1444 } else 1445 len = m->m_len; 1446 } 1447 1448 memcpy(mtod(m, void *), cp, (size_t)len); 1449 1450 cp += len; 1451 *mp = m; 1452 mp = &m->m_next; 1453 totlen -= len; 1454 if (cp == epkt) 1455 cp = buf; 1456 } 1457 1458 return top; 1459 } 1460 1461 /* 1462 * Copy data from a buffer back into the indicated mbuf chain, 1463 * starting "off" bytes from the beginning, extending the mbuf 1464 * chain if necessary. 1465 */ 1466 void 1467 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1468 { 1469 #if defined(DEBUG) 1470 struct mbuf *origm = m0; 1471 int error; 1472 #endif 1473 1474 if (m0 == NULL) 1475 return; 1476 1477 #if defined(DEBUG) 1478 error = 1479 #endif 1480 m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_EXTEND, 1481 M_DONTWAIT); 1482 1483 #if defined(DEBUG) 1484 if (error != 0 || (m0 != NULL && origm != m0)) 1485 panic("m_copyback"); 1486 #endif 1487 } 1488 1489 struct mbuf * 1490 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1491 { 1492 int error; 1493 1494 /* don't support chain expansion */ 1495 KASSERT(len != M_COPYALL); 1496 KDASSERT(off + len <= m_length(m0)); 1497 1498 error = m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_COW, 1499 how); 1500 if (error) { 1501 /* 1502 * no way to recover from partial success. 1503 * just free the chain. 1504 */ 1505 m_freem(m0); 1506 return NULL; 1507 } 1508 return m0; 1509 } 1510 1511 int 1512 m_makewritable(struct mbuf **mp, int off, int len, int how) 1513 { 1514 int error; 1515 #if defined(DEBUG) 1516 int origlen = m_length(*mp); 1517 #endif 1518 1519 error = m_copyback_internal(mp, off, len, NULL, CB_PRESERVE|CB_COW, 1520 how); 1521 if (error) 1522 return error; 1523 1524 #if defined(DEBUG) 1525 int reslen = 0; 1526 for (struct mbuf *n = *mp; n; n = n->m_next) 1527 reslen += n->m_len; 1528 if (origlen != reslen) 1529 panic("m_makewritable: length changed"); 1530 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1531 panic("m_makewritable: inconsist"); 1532 #endif 1533 1534 return 0; 1535 } 1536 1537 static int 1538 m_copyback_internal(struct mbuf **mp0, int off, int len, const void *vp, 1539 int flags, int how) 1540 { 1541 int mlen; 1542 struct mbuf *m, *n; 1543 struct mbuf **mp; 1544 int totlen = 0; 1545 const char *cp = vp; 1546 1547 KASSERT(mp0 != NULL); 1548 KASSERT(*mp0 != NULL); 1549 KASSERT((flags & CB_PRESERVE) == 0 || cp == NULL); 1550 KASSERT((flags & CB_COPYBACK) == 0 || cp != NULL); 1551 1552 if (len == M_COPYALL) 1553 len = m_length(*mp0) - off; 1554 1555 /* 1556 * we don't bother to update "totlen" in the case of CB_COW, 1557 * assuming that CB_EXTEND and CB_COW are exclusive. 1558 */ 1559 1560 KASSERT((~flags & (CB_EXTEND|CB_COW)) != 0); 1561 1562 mp = mp0; 1563 m = *mp; 1564 while (off > (mlen = m->m_len)) { 1565 off -= mlen; 1566 totlen += mlen; 1567 if (m->m_next == NULL) { 1568 int tspace; 1569 extend: 1570 if ((flags & CB_EXTEND) == 0) 1571 goto out; 1572 1573 /* 1574 * try to make some space at the end of "m". 1575 */ 1576 1577 mlen = m->m_len; 1578 if (off + len >= MINCLSIZE && 1579 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1580 MCLGET(m, how); 1581 } 1582 tspace = M_TRAILINGSPACE(m); 1583 if (tspace > 0) { 1584 tspace = uimin(tspace, off + len); 1585 KASSERT(tspace > 0); 1586 memset(mtod(m, char *) + m->m_len, 0, 1587 uimin(off, tspace)); 1588 m->m_len += tspace; 1589 off += mlen; 1590 totlen -= mlen; 1591 continue; 1592 } 1593 1594 /* 1595 * need to allocate an mbuf. 1596 */ 1597 1598 if (off + len >= MINCLSIZE) { 1599 n = m_getcl(how, m->m_type, 0); 1600 } else { 1601 n = m_get(how, m->m_type); 1602 } 1603 if (n == NULL) { 1604 goto out; 1605 } 1606 n->m_len = uimin(M_TRAILINGSPACE(n), off + len); 1607 memset(mtod(n, char *), 0, uimin(n->m_len, off)); 1608 m->m_next = n; 1609 } 1610 mp = &m->m_next; 1611 m = m->m_next; 1612 } 1613 while (len > 0) { 1614 mlen = m->m_len - off; 1615 if (mlen != 0 && M_READONLY(m)) { 1616 /* 1617 * This mbuf is read-only. Allocate a new writable 1618 * mbuf and try again. 1619 */ 1620 char *datap; 1621 int eatlen; 1622 1623 KASSERT((flags & CB_COW) != 0); 1624 1625 /* 1626 * if we're going to write into the middle of 1627 * a mbuf, split it first. 1628 */ 1629 if (off > 0) { 1630 n = m_split_internal(m, off, how, false); 1631 if (n == NULL) 1632 goto enobufs; 1633 m->m_next = n; 1634 mp = &m->m_next; 1635 m = n; 1636 off = 0; 1637 continue; 1638 } 1639 1640 /* 1641 * XXX TODO coalesce into the trailingspace of 1642 * the previous mbuf when possible. 1643 */ 1644 1645 /* 1646 * allocate a new mbuf. copy packet header if needed. 1647 */ 1648 n = m_get(how, m->m_type); 1649 if (n == NULL) 1650 goto enobufs; 1651 MCLAIM(n, m->m_owner); 1652 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1653 m_move_pkthdr(n, m); 1654 n->m_len = MHLEN; 1655 } else { 1656 if (len >= MINCLSIZE) 1657 MCLGET(n, M_DONTWAIT); 1658 n->m_len = 1659 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1660 } 1661 if (n->m_len > len) 1662 n->m_len = len; 1663 1664 /* 1665 * free the region which has been overwritten. 1666 * copying data from old mbufs if requested. 1667 */ 1668 if (flags & CB_PRESERVE) 1669 datap = mtod(n, char *); 1670 else 1671 datap = NULL; 1672 eatlen = n->m_len; 1673 while (m != NULL && M_READONLY(m) && 1674 n->m_type == m->m_type && eatlen > 0) { 1675 mlen = uimin(eatlen, m->m_len); 1676 if (datap) { 1677 m_copydata(m, 0, mlen, datap); 1678 datap += mlen; 1679 } 1680 m->m_data += mlen; 1681 m->m_len -= mlen; 1682 eatlen -= mlen; 1683 if (m->m_len == 0) 1684 *mp = m = m_free(m); 1685 } 1686 if (eatlen > 0) 1687 n->m_len -= eatlen; 1688 n->m_next = m; 1689 *mp = m = n; 1690 continue; 1691 } 1692 mlen = uimin(mlen, len); 1693 if (flags & CB_COPYBACK) { 1694 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1695 cp += mlen; 1696 } 1697 len -= mlen; 1698 mlen += off; 1699 off = 0; 1700 totlen += mlen; 1701 if (len == 0) 1702 break; 1703 if (m->m_next == NULL) { 1704 goto extend; 1705 } 1706 mp = &m->m_next; 1707 m = m->m_next; 1708 } 1709 1710 out: 1711 if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1712 KASSERT((flags & CB_EXTEND) != 0); 1713 m->m_pkthdr.len = totlen; 1714 } 1715 1716 return 0; 1717 1718 enobufs: 1719 return SET_ERROR(ENOBUFS); 1720 } 1721 1722 /* 1723 * Compress the mbuf chain. Return the new mbuf chain on success, NULL on 1724 * failure. The first mbuf is preserved, and on success the pointer returned 1725 * is the same as the one passed. 1726 */ 1727 struct mbuf * 1728 m_defrag(struct mbuf *m, int how) 1729 { 1730 struct mbuf *m0, *mn, *n; 1731 int sz; 1732 1733 KASSERT((m->m_flags & M_PKTHDR) != 0); 1734 1735 if (m->m_next == NULL) 1736 return m; 1737 1738 /* Defrag to single mbuf if at all possible */ 1739 if ((m->m_flags & M_EXT) == 0 && m->m_pkthdr.len <= MCLBYTES) { 1740 if (m->m_pkthdr.len <= MHLEN) { 1741 if (M_TRAILINGSPACE(m) < (m->m_pkthdr.len - m->m_len)) { 1742 KASSERTMSG(M_LEADINGSPACE(m) + 1743 M_TRAILINGSPACE(m) >= 1744 (m->m_pkthdr.len - m->m_len), 1745 "too small leading %d trailing %d ro? %d" 1746 " pkthdr.len %d mlen %d", 1747 (int)M_LEADINGSPACE(m), 1748 (int)M_TRAILINGSPACE(m), 1749 M_READONLY(m), 1750 m->m_pkthdr.len, m->m_len); 1751 1752 memmove(m->m_pktdat, m->m_data, m->m_len); 1753 m->m_data = m->m_pktdat; 1754 1755 KASSERT(M_TRAILINGSPACE(m) >= 1756 (m->m_pkthdr.len - m->m_len)); 1757 } 1758 } else { 1759 /* Must copy data before adding cluster */ 1760 m0 = m_get(how, MT_DATA); 1761 if (m0 == NULL) 1762 return NULL; 1763 KASSERTMSG(m->m_len <= MHLEN, 1764 "m=%p m->m_len=%d MHLEN=%u", 1765 m, m->m_len, (unsigned)MHLEN); 1766 m_copydata(m, 0, m->m_len, mtod(m0, void *)); 1767 1768 MCLGET(m, how); 1769 if ((m->m_flags & M_EXT) == 0) { 1770 m_free(m0); 1771 return NULL; 1772 } 1773 memcpy(m->m_data, mtod(m0, void *), m->m_len); 1774 m_free(m0); 1775 } 1776 KASSERTMSG(M_TRAILINGSPACE(m) >= (m->m_pkthdr.len - m->m_len), 1777 "m=%p M_TRAILINGSPACE(m)=%zd m->m_pkthdr.len=%d" 1778 " m->m_len=%d", 1779 m, M_TRAILINGSPACE(m), m->m_pkthdr.len, m->m_len); 1780 m_copydata(m->m_next, 0, m->m_pkthdr.len - m->m_len, 1781 mtod(m, char *) + m->m_len); 1782 m->m_len = m->m_pkthdr.len; 1783 m_freem(m->m_next); 1784 m->m_next = NULL; 1785 return m; 1786 } 1787 1788 m0 = m_get(how, MT_DATA); 1789 if (m0 == NULL) 1790 return NULL; 1791 mn = m0; 1792 1793 sz = m->m_pkthdr.len - m->m_len; 1794 KASSERT(sz >= 0); 1795 1796 do { 1797 if (sz > MLEN) { 1798 MCLGET(mn, how); 1799 if ((mn->m_flags & M_EXT) == 0) { 1800 m_freem(m0); 1801 return NULL; 1802 } 1803 } 1804 1805 mn->m_len = MIN(sz, MCLBYTES); 1806 1807 m_copydata(m, m->m_pkthdr.len - sz, mn->m_len, 1808 mtod(mn, void *)); 1809 1810 sz -= mn->m_len; 1811 1812 if (sz > 0) { 1813 /* need more mbufs */ 1814 n = m_get(how, MT_DATA); 1815 if (n == NULL) { 1816 m_freem(m0); 1817 return NULL; 1818 } 1819 1820 mn->m_next = n; 1821 mn = n; 1822 } 1823 } while (sz > 0); 1824 1825 m_freem(m->m_next); 1826 m->m_next = m0; 1827 1828 return m; 1829 } 1830 1831 void 1832 m_remove_pkthdr(struct mbuf *m) 1833 { 1834 KASSERT(m->m_flags & M_PKTHDR); 1835 1836 m_tag_delete_chain(m); 1837 m->m_flags &= ~M_PKTHDR; 1838 memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr)); 1839 } 1840 1841 void 1842 m_copy_pkthdr(struct mbuf *to, struct mbuf *from) 1843 { 1844 KASSERT((to->m_flags & M_EXT) == 0); 1845 KASSERT((to->m_flags & M_PKTHDR) == 0 || 1846 SLIST_FIRST(&to->m_pkthdr.tags) == NULL); 1847 KASSERT((from->m_flags & M_PKTHDR) != 0); 1848 1849 to->m_pkthdr = from->m_pkthdr; 1850 to->m_flags = from->m_flags & M_COPYFLAGS; 1851 to->m_data = to->m_pktdat; 1852 1853 SLIST_INIT(&to->m_pkthdr.tags); 1854 m_tag_copy_chain(to, from); 1855 } 1856 1857 void 1858 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1859 { 1860 KASSERT((to->m_flags & M_EXT) == 0); 1861 KASSERT((to->m_flags & M_PKTHDR) == 0 || 1862 SLIST_FIRST(&to->m_pkthdr.tags) == NULL); 1863 KASSERT((from->m_flags & M_PKTHDR) != 0); 1864 1865 to->m_pkthdr = from->m_pkthdr; 1866 to->m_flags = from->m_flags & M_COPYFLAGS; 1867 to->m_data = to->m_pktdat; 1868 1869 from->m_flags &= ~M_PKTHDR; 1870 } 1871 1872 /* 1873 * Set the m_data pointer of a newly-allocated mbuf to place an object of the 1874 * specified size at the end of the mbuf, longword aligned. 1875 */ 1876 void 1877 m_align(struct mbuf *m, int len) 1878 { 1879 int buflen, adjust; 1880 1881 KASSERT(len != M_COPYALL); 1882 KASSERTMSG(M_LEADINGSPACE(m) == 0, "m=%p M_LEADINGSPACE(m)=%zd", 1883 m, M_LEADINGSPACE(m)); 1884 1885 buflen = M_BUFSIZE(m); 1886 1887 KASSERTMSG(len <= buflen, "m=%p len=%d buflen=%d", m, len, buflen); 1888 adjust = buflen - len; 1889 m->m_data += adjust &~ (sizeof(long)-1); 1890 } 1891 1892 /* 1893 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1894 * beginning, continuing for "len" bytes. 1895 */ 1896 int 1897 m_apply(struct mbuf *m, int off, int len, 1898 int (*f)(void *, void *, unsigned int), void *arg) 1899 { 1900 unsigned int count; 1901 int rval; 1902 1903 KASSERT(len != M_COPYALL); 1904 KASSERT(len >= 0); 1905 KASSERT(off >= 0); 1906 1907 while (off > 0) { 1908 KASSERT(m != NULL); 1909 if (off < m->m_len) 1910 break; 1911 off -= m->m_len; 1912 m = m->m_next; 1913 } 1914 while (len > 0) { 1915 KASSERT(m != NULL); 1916 count = uimin(m->m_len - off, len); 1917 1918 rval = (*f)(arg, mtod(m, char *) + off, count); 1919 if (rval) 1920 return rval; 1921 1922 len -= count; 1923 off = 0; 1924 m = m->m_next; 1925 } 1926 1927 return 0; 1928 } 1929 1930 /* 1931 * Return a pointer to mbuf/offset of location in mbuf chain. 1932 */ 1933 struct mbuf * 1934 m_getptr(struct mbuf *m, int loc, int *off) 1935 { 1936 1937 while (loc >= 0) { 1938 /* Normal end of search */ 1939 if (m->m_len > loc) { 1940 *off = loc; 1941 return m; 1942 } 1943 1944 loc -= m->m_len; 1945 1946 if (m->m_next == NULL) { 1947 if (loc == 0) { 1948 /* Point at the end of valid data */ 1949 *off = m->m_len; 1950 return m; 1951 } 1952 return NULL; 1953 } else { 1954 m = m->m_next; 1955 } 1956 } 1957 1958 return NULL; 1959 } 1960 1961 /* 1962 * Release a reference to the mbuf external storage. 1963 * 1964 * => free the mbuf m itself as well. 1965 */ 1966 static void 1967 m_ext_free(struct mbuf *m) 1968 { 1969 const bool embedded = MEXT_ISEMBEDDED(m); 1970 bool dofree = true; 1971 u_int refcnt; 1972 1973 KASSERT((m->m_flags & M_EXT) != 0); 1974 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1975 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1976 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1977 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1978 1979 if (__predict_false(m->m_type == MT_FREE)) { 1980 panic("mbuf %p already freed", m); 1981 } 1982 1983 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1984 refcnt = m->m_ext.ext_refcnt = 0; 1985 } else { 1986 membar_release(); 1987 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1988 } 1989 1990 if (refcnt > 0) { 1991 if (embedded) { 1992 /* 1993 * other mbuf's m_ext_ref still points to us. 1994 */ 1995 dofree = false; 1996 } else { 1997 m->m_ext_ref = m; 1998 } 1999 } else { 2000 /* 2001 * dropping the last reference 2002 */ 2003 membar_acquire(); 2004 if (!embedded) { 2005 m->m_ext.ext_refcnt++; /* XXX */ 2006 m_ext_free(m->m_ext_ref); 2007 m->m_ext_ref = m; 2008 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 2009 pool_cache_put_paddr(mcl_cache, 2010 m->m_ext.ext_buf, m->m_ext.ext_paddr); 2011 } else if (m->m_ext.ext_free) { 2012 (*m->m_ext.ext_free)(m, 2013 m->m_ext.ext_buf, m->m_ext.ext_size, 2014 m->m_ext.ext_arg); 2015 /* 2016 * 'm' is already freed by the ext_free callback. 2017 */ 2018 dofree = false; 2019 } else { 2020 free(m->m_ext.ext_buf, 0); 2021 } 2022 } 2023 2024 if (dofree) { 2025 m->m_type = MT_FREE; 2026 m->m_data = NULL; 2027 pool_cache_put(mb_cache, m); 2028 } 2029 } 2030 2031 /* 2032 * Free a single mbuf and associated external storage. Return the 2033 * successor, if any. 2034 */ 2035 struct mbuf * 2036 m_free(struct mbuf *m) 2037 { 2038 struct mbuf *n; 2039 2040 mowner_revoke(m, 1, m->m_flags); 2041 mbstat_type_add(m->m_type, -1); 2042 2043 if (m->m_flags & M_PKTHDR) 2044 m_tag_delete_chain(m); 2045 2046 n = m->m_next; 2047 2048 if (m->m_flags & M_EXT) { 2049 m_ext_free(m); 2050 } else { 2051 if (__predict_false(m->m_type == MT_FREE)) { 2052 panic("mbuf %p already freed", m); 2053 } 2054 m->m_type = MT_FREE; 2055 m->m_data = NULL; 2056 pool_cache_put(mb_cache, m); 2057 } 2058 2059 return n; 2060 } 2061 2062 void 2063 m_freem(struct mbuf *m) 2064 { 2065 if (m == NULL) 2066 return; 2067 do { 2068 m = m_free(m); 2069 } while (m); 2070 } 2071 2072 #if defined(DDB) 2073 void 2074 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 2075 { 2076 char ch; 2077 bool opt_c = false; 2078 bool opt_d = false; 2079 #if NETHER > 0 2080 bool opt_v = false; 2081 const struct mbuf *m0 = NULL; 2082 #endif 2083 int no = 0; 2084 char buf[512]; 2085 2086 while ((ch = *(modif++)) != '\0') { 2087 switch (ch) { 2088 case 'c': 2089 opt_c = true; 2090 break; 2091 case 'd': 2092 opt_d = true; 2093 break; 2094 #if NETHER > 0 2095 case 'v': 2096 opt_v = true; 2097 m0 = m; 2098 break; 2099 #endif 2100 default: 2101 break; 2102 } 2103 } 2104 2105 nextchain: 2106 (*pr)("MBUF(%d) %p\n", no, m); 2107 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 2108 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", 2109 m->m_data, m->m_len, m->m_type, buf); 2110 if (opt_d) { 2111 int i; 2112 unsigned char *p = m->m_data; 2113 2114 (*pr)(" data:"); 2115 2116 for (i = 0; i < m->m_len; i++) { 2117 if (i % 16 == 0) 2118 (*pr)("\n"); 2119 (*pr)(" %02x", p[i]); 2120 } 2121 2122 (*pr)("\n"); 2123 } 2124 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 2125 m->m_nextpkt); 2126 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 2127 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 2128 (int)M_READONLY(m)); 2129 if ((m->m_flags & M_PKTHDR) != 0) { 2130 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 2131 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=%s, csum_data=0x%" 2132 PRIx32 ", segsz=%u\n", 2133 m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m), 2134 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 2135 } 2136 if ((m->m_flags & M_EXT)) { 2137 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 2138 "ext_free=%p, ext_arg=%p\n", 2139 m->m_ext.ext_refcnt, 2140 m->m_ext.ext_buf, m->m_ext.ext_size, 2141 m->m_ext.ext_free, m->m_ext.ext_arg); 2142 } 2143 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 2144 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 2145 vaddr_t eva = sva + m->m_ext.ext_size; 2146 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 2147 int i; 2148 2149 (*pr)(" pages:"); 2150 for (i = 0; i < n; i ++) { 2151 (*pr)(" %p", m->m_ext.ext_pgs[i]); 2152 } 2153 (*pr)("\n"); 2154 } 2155 2156 if (opt_c) { 2157 m = m->m_next; 2158 if (m != NULL) { 2159 no++; 2160 goto nextchain; 2161 } 2162 } 2163 2164 #if NETHER > 0 2165 if (opt_v && m0) 2166 m_examine(m0, AF_ETHER, modif, pr); 2167 #endif 2168 } 2169 #endif /* defined(DDB) */ 2170 2171 #if defined(MBUFTRACE) 2172 void 2173 mowner_init_owner(struct mowner *mo, const char *name, const char *descr) 2174 { 2175 memset(mo, 0, sizeof(*mo)); 2176 strlcpy(mo->mo_name, name, sizeof(mo->mo_name)); 2177 strlcpy(mo->mo_descr, descr, sizeof(mo->mo_descr)); 2178 } 2179 2180 void 2181 mowner_attach(struct mowner *mo) 2182 { 2183 2184 KASSERT(mo->mo_counters == NULL); 2185 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 2186 2187 /* XXX lock */ 2188 LIST_INSERT_HEAD(&mowners, mo, mo_link); 2189 } 2190 2191 void 2192 mowner_detach(struct mowner *mo) 2193 { 2194 2195 KASSERT(mo->mo_counters != NULL); 2196 2197 /* XXX lock */ 2198 LIST_REMOVE(mo, mo_link); 2199 2200 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 2201 mo->mo_counters = NULL; 2202 } 2203 2204 void 2205 mowner_init(struct mbuf *m, int type) 2206 { 2207 struct mowner_counter *mc; 2208 struct mowner *mo; 2209 int s; 2210 2211 m->m_owner = mo = &unknown_mowners[type]; 2212 s = splvm(); 2213 mc = percpu_getref(mo->mo_counters); 2214 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 2215 percpu_putref(mo->mo_counters); 2216 splx(s); 2217 } 2218 2219 void 2220 mowner_ref(struct mbuf *m, int flags) 2221 { 2222 struct mowner *mo = m->m_owner; 2223 struct mowner_counter *mc; 2224 int s; 2225 2226 s = splvm(); 2227 mc = percpu_getref(mo->mo_counters); 2228 if ((flags & M_EXT) != 0) 2229 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 2230 if ((flags & M_EXT_CLUSTER) != 0) 2231 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 2232 percpu_putref(mo->mo_counters); 2233 splx(s); 2234 } 2235 2236 void 2237 mowner_revoke(struct mbuf *m, bool all, int flags) 2238 { 2239 struct mowner *mo = m->m_owner; 2240 struct mowner_counter *mc; 2241 int s; 2242 2243 s = splvm(); 2244 mc = percpu_getref(mo->mo_counters); 2245 if ((flags & M_EXT) != 0) 2246 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 2247 if ((flags & M_EXT_CLUSTER) != 0) 2248 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 2249 if (all) 2250 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 2251 percpu_putref(mo->mo_counters); 2252 splx(s); 2253 if (all) 2254 m->m_owner = &revoked_mowner; 2255 } 2256 2257 static void 2258 mowner_claim(struct mbuf *m, struct mowner *mo) 2259 { 2260 struct mowner_counter *mc; 2261 int flags = m->m_flags; 2262 int s; 2263 2264 s = splvm(); 2265 mc = percpu_getref(mo->mo_counters); 2266 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 2267 if ((flags & M_EXT) != 0) 2268 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 2269 if ((flags & M_EXT_CLUSTER) != 0) 2270 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 2271 percpu_putref(mo->mo_counters); 2272 splx(s); 2273 m->m_owner = mo; 2274 } 2275 2276 void 2277 m_claim(struct mbuf *m, struct mowner *mo) 2278 { 2279 2280 if (m->m_owner == mo || mo == NULL) 2281 return; 2282 2283 mowner_revoke(m, true, m->m_flags); 2284 mowner_claim(m, mo); 2285 } 2286 2287 void 2288 m_claimm(struct mbuf *m, struct mowner *mo) 2289 { 2290 2291 for (; m != NULL; m = m->m_next) 2292 m_claim(m, mo); 2293 } 2294 #endif /* defined(MBUFTRACE) */ 2295 2296 #ifdef DIAGNOSTIC 2297 /* 2298 * Verify that the mbuf chain is not malformed. Used only for diagnostic. 2299 * Panics on error. 2300 */ 2301 void 2302 m_verify_packet(struct mbuf *m) 2303 { 2304 struct mbuf *n = m; 2305 char *low, *high, *dat; 2306 int totlen = 0, len; 2307 2308 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) { 2309 panic("%s: mbuf doesn't have M_PKTHDR", __func__); 2310 } 2311 2312 while (n != NULL) { 2313 if (__predict_false(n->m_type == MT_FREE)) { 2314 panic("%s: mbuf already freed (n = %p)", __func__, n); 2315 } 2316 #if 0 2317 /* 2318 * This ought to be a rule of the mbuf API. Unfortunately, 2319 * many places don't respect that rule. 2320 */ 2321 if (__predict_false((n != m) && (n->m_flags & M_PKTHDR) != 0)) { 2322 panic("%s: M_PKTHDR set on secondary mbuf", __func__); 2323 } 2324 #endif 2325 if (__predict_false(n->m_nextpkt != NULL)) { 2326 panic("%s: m_nextpkt not null (m_nextpkt = %p)", 2327 __func__, n->m_nextpkt); 2328 } 2329 2330 dat = n->m_data; 2331 len = n->m_len; 2332 if (__predict_false(len < 0)) { 2333 panic("%s: incorrect length (len = %d)", __func__, len); 2334 } 2335 2336 low = M_BUFADDR(n); 2337 high = low + M_BUFSIZE(n); 2338 if (__predict_false((dat < low) || (dat + len > high))) { 2339 panic("%s: m_data not in packet" 2340 "(dat = %p, len = %d, low = %p, high = %p)", 2341 __func__, dat, len, low, high); 2342 } 2343 2344 totlen += len; 2345 n = n->m_next; 2346 } 2347 2348 if (__predict_false(totlen != m->m_pkthdr.len)) { 2349 panic("%s: inconsistent mbuf length (%d != %d)", __func__, 2350 totlen, m->m_pkthdr.len); 2351 } 2352 } 2353 #endif 2354 2355 struct m_tag * 2356 m_tag_get(int type, int len, int wait) 2357 { 2358 struct m_tag *t; 2359 2360 if (len < 0) 2361 return NULL; 2362 t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait); 2363 if (t == NULL) 2364 return NULL; 2365 t->m_tag_id = type; 2366 t->m_tag_len = len; 2367 return t; 2368 } 2369 2370 void 2371 m_tag_free(struct m_tag *t) 2372 { 2373 free(t, M_PACKET_TAGS); 2374 } 2375 2376 void 2377 m_tag_prepend(struct mbuf *m, struct m_tag *t) 2378 { 2379 KASSERT((m->m_flags & M_PKTHDR) != 0); 2380 SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); 2381 } 2382 2383 void 2384 m_tag_unlink(struct mbuf *m, struct m_tag *t) 2385 { 2386 KASSERT((m->m_flags & M_PKTHDR) != 0); 2387 SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); 2388 } 2389 2390 void 2391 m_tag_delete(struct mbuf *m, struct m_tag *t) 2392 { 2393 m_tag_unlink(m, t); 2394 m_tag_free(t); 2395 } 2396 2397 void 2398 m_tag_delete_chain(struct mbuf *m) 2399 { 2400 struct m_tag *p, *q; 2401 2402 KASSERT((m->m_flags & M_PKTHDR) != 0); 2403 2404 p = SLIST_FIRST(&m->m_pkthdr.tags); 2405 if (p == NULL) 2406 return; 2407 while ((q = SLIST_NEXT(p, m_tag_link)) != NULL) 2408 m_tag_delete(m, q); 2409 m_tag_delete(m, p); 2410 } 2411 2412 struct m_tag * 2413 m_tag_find(const struct mbuf *m, int type) 2414 { 2415 struct m_tag *p; 2416 2417 KASSERT((m->m_flags & M_PKTHDR) != 0); 2418 2419 p = SLIST_FIRST(&m->m_pkthdr.tags); 2420 while (p != NULL) { 2421 if (p->m_tag_id == type) 2422 return p; 2423 p = SLIST_NEXT(p, m_tag_link); 2424 } 2425 return NULL; 2426 } 2427 2428 struct m_tag * 2429 m_tag_copy(struct m_tag *t) 2430 { 2431 struct m_tag *p; 2432 2433 p = m_tag_get(t->m_tag_id, t->m_tag_len, M_NOWAIT); 2434 if (p == NULL) 2435 return NULL; 2436 memcpy(p + 1, t + 1, t->m_tag_len); 2437 return p; 2438 } 2439 2440 /* 2441 * Copy two tag chains. The destination mbuf (to) loses any attached 2442 * tags even if the operation fails. This should not be a problem, as 2443 * m_tag_copy_chain() is typically called with a newly-allocated 2444 * destination mbuf. 2445 */ 2446 int 2447 m_tag_copy_chain(struct mbuf *to, struct mbuf *from) 2448 { 2449 struct m_tag *p, *t, *tprev = NULL; 2450 2451 KASSERT((from->m_flags & M_PKTHDR) != 0); 2452 2453 m_tag_delete_chain(to); 2454 SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) { 2455 t = m_tag_copy(p); 2456 if (t == NULL) { 2457 m_tag_delete_chain(to); 2458 return 0; 2459 } 2460 if (tprev == NULL) 2461 SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link); 2462 else 2463 SLIST_INSERT_AFTER(tprev, t, m_tag_link); 2464 tprev = t; 2465 } 2466 return 1; 2467 } 2468