1 /* $NetBSD: uipc_mbuf.c,v 1.170 2017/01/09 14:25:52 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.170 2017/01/09 14:25:52 christos Exp $"); 66 67 #ifdef _KERNEL_OPT 68 #include "opt_mbuftrace.h" 69 #include "opt_nmbclusters.h" 70 #include "opt_ddb.h" 71 #endif 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/atomic.h> 76 #include <sys/cpu.h> 77 #include <sys/proc.h> 78 #include <sys/mbuf.h> 79 #include <sys/kernel.h> 80 #include <sys/syslog.h> 81 #include <sys/domain.h> 82 #include <sys/protosw.h> 83 #include <sys/percpu.h> 84 #include <sys/pool.h> 85 #include <sys/socket.h> 86 #include <sys/sysctl.h> 87 88 #include <net/if.h> 89 90 pool_cache_t mb_cache; /* mbuf cache */ 91 pool_cache_t mcl_cache; /* mbuf cluster cache */ 92 93 struct mbstat mbstat; 94 int max_linkhdr; 95 int max_protohdr; 96 int max_hdr; 97 int max_datalen; 98 99 static int mb_ctor(void *, void *, int); 100 101 static void sysctl_kern_mbuf_setup(void); 102 103 static struct sysctllog *mbuf_sysctllog; 104 105 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 106 static struct mbuf *m_split0(struct mbuf *, int, int, int); 107 static int m_copyback0(struct mbuf **, int, int, const void *, int, int); 108 109 /* flags for m_copyback0 */ 110 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ 111 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ 112 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ 113 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ 114 115 static const char mclpool_warnmsg[] = 116 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters"; 117 118 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 119 120 static percpu_t *mbstat_percpu; 121 122 #ifdef MBUFTRACE 123 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners); 124 struct mowner unknown_mowners[] = { 125 MOWNER_INIT("unknown", "free"), 126 MOWNER_INIT("unknown", "data"), 127 MOWNER_INIT("unknown", "header"), 128 MOWNER_INIT("unknown", "soname"), 129 MOWNER_INIT("unknown", "soopts"), 130 MOWNER_INIT("unknown", "ftable"), 131 MOWNER_INIT("unknown", "control"), 132 MOWNER_INIT("unknown", "oobdata"), 133 }; 134 struct mowner revoked_mowner = MOWNER_INIT("revoked", ""); 135 #endif 136 137 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m)) 138 139 #define MCLADDREFERENCE(o, n) \ 140 do { \ 141 KASSERT(((o)->m_flags & M_EXT) != 0); \ 142 KASSERT(((n)->m_flags & M_EXT) == 0); \ 143 KASSERT((o)->m_ext.ext_refcnt >= 1); \ 144 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \ 145 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \ 146 (n)->m_ext_ref = (o)->m_ext_ref; \ 147 mowner_ref((n), (n)->m_flags); \ 148 MCLREFDEBUGN((n), __FILE__, __LINE__); \ 149 } while (/* CONSTCOND */ 0) 150 151 static int 152 nmbclusters_limit(void) 153 { 154 #if defined(PMAP_MAP_POOLPAGE) 155 /* direct mapping, doesn't use space in kmem_arena */ 156 vsize_t max_size = physmem / 4; 157 #else 158 vsize_t max_size = MIN(physmem / 4, nkmempages / 4); 159 #endif 160 161 max_size = max_size * PAGE_SIZE / MCLBYTES; 162 #ifdef NMBCLUSTERS_MAX 163 max_size = MIN(max_size, NMBCLUSTERS_MAX); 164 #endif 165 166 #ifdef NMBCLUSTERS 167 return MIN(max_size, NMBCLUSTERS); 168 #else 169 return max_size; 170 #endif 171 } 172 173 /* 174 * Initialize the mbuf allocator. 175 */ 176 void 177 mbinit(void) 178 { 179 180 CTASSERT(sizeof(struct _m_ext) <= MHLEN); 181 CTASSERT(sizeof(struct mbuf) == MSIZE); 182 183 sysctl_kern_mbuf_setup(); 184 185 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl", 186 NULL, IPL_VM, mb_ctor, NULL, NULL); 187 KASSERT(mb_cache != NULL); 188 189 mcl_cache = pool_cache_init(mclbytes, 0, 0, 0, "mclpl", NULL, 190 IPL_VM, NULL, NULL, NULL); 191 KASSERT(mcl_cache != NULL); 192 193 pool_cache_set_drain_hook(mb_cache, m_reclaim, NULL); 194 pool_cache_set_drain_hook(mcl_cache, m_reclaim, NULL); 195 196 /* 197 * Set an arbitrary default limit on the number of mbuf clusters. 198 */ 199 #ifdef NMBCLUSTERS 200 nmbclusters = nmbclusters_limit(); 201 #else 202 nmbclusters = MAX(1024, 203 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16); 204 nmbclusters = MIN(nmbclusters, nmbclusters_limit()); 205 #endif 206 207 /* 208 * Set the hard limit on the mclpool to the number of 209 * mbuf clusters the kernel is to support. Log the limit 210 * reached message max once a minute. 211 */ 212 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60); 213 214 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu)); 215 216 /* 217 * Set a low water mark for both mbufs and clusters. This should 218 * help ensure that they can be allocated in a memory starvation 219 * situation. This is important for e.g. diskless systems which 220 * must allocate mbufs in order for the pagedaemon to clean pages. 221 */ 222 pool_cache_setlowat(mb_cache, mblowat); 223 pool_cache_setlowat(mcl_cache, mcllowat); 224 225 #ifdef MBUFTRACE 226 { 227 /* 228 * Attach the unknown mowners. 229 */ 230 int i; 231 MOWNER_ATTACH(&revoked_mowner); 232 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]); 233 i-- > 0; ) 234 MOWNER_ATTACH(&unknown_mowners[i]); 235 } 236 #endif 237 } 238 239 /* 240 * sysctl helper routine for the kern.mbuf subtree. 241 * nmbclusters, mblowat and mcllowat need range 242 * checking and pool tweaking after being reset. 243 */ 244 static int 245 sysctl_kern_mbuf(SYSCTLFN_ARGS) 246 { 247 int error, newval; 248 struct sysctlnode node; 249 250 node = *rnode; 251 node.sysctl_data = &newval; 252 switch (rnode->sysctl_num) { 253 case MBUF_NMBCLUSTERS: 254 case MBUF_MBLOWAT: 255 case MBUF_MCLLOWAT: 256 newval = *(int*)rnode->sysctl_data; 257 break; 258 default: 259 return (EOPNOTSUPP); 260 } 261 262 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 263 if (error || newp == NULL) 264 return (error); 265 if (newval < 0) 266 return (EINVAL); 267 268 switch (node.sysctl_num) { 269 case MBUF_NMBCLUSTERS: 270 if (newval < nmbclusters) 271 return (EINVAL); 272 if (newval > nmbclusters_limit()) 273 return (EINVAL); 274 nmbclusters = newval; 275 pool_cache_sethardlimit(mcl_cache, nmbclusters, 276 mclpool_warnmsg, 60); 277 break; 278 case MBUF_MBLOWAT: 279 mblowat = newval; 280 pool_cache_setlowat(mb_cache, mblowat); 281 break; 282 case MBUF_MCLLOWAT: 283 mcllowat = newval; 284 pool_cache_setlowat(mcl_cache, mcllowat); 285 break; 286 } 287 288 return (0); 289 } 290 291 #ifdef MBUFTRACE 292 static void 293 mowner_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 294 { 295 struct mowner_counter *mc = v1; 296 struct mowner_user *mo_user = v2; 297 int i; 298 299 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) { 300 mo_user->mo_counter[i] += mc->mc_counter[i]; 301 } 302 } 303 304 static void 305 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user) 306 { 307 308 memset(mo_user, 0, sizeof(*mo_user)); 309 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name)); 310 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr)); 311 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name)); 312 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr)); 313 percpu_foreach(mo->mo_counters, mowner_conver_to_user_cb, mo_user); 314 } 315 316 static int 317 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS) 318 { 319 struct mowner *mo; 320 size_t len = 0; 321 int error = 0; 322 323 if (namelen != 0) 324 return (EINVAL); 325 if (newp != NULL) 326 return (EPERM); 327 328 LIST_FOREACH(mo, &mowners, mo_link) { 329 struct mowner_user mo_user; 330 331 mowner_convert_to_user(mo, &mo_user); 332 333 if (oldp != NULL) { 334 if (*oldlenp - len < sizeof(mo_user)) { 335 error = ENOMEM; 336 break; 337 } 338 error = copyout(&mo_user, (char *)oldp + len, 339 sizeof(mo_user)); 340 if (error) 341 break; 342 } 343 len += sizeof(mo_user); 344 } 345 346 if (error == 0) 347 *oldlenp = len; 348 349 return (error); 350 } 351 #endif /* MBUFTRACE */ 352 353 static void 354 mbstat_conver_to_user_cb(void *v1, void *v2, struct cpu_info *ci) 355 { 356 struct mbstat_cpu *mbsc = v1; 357 struct mbstat *mbs = v2; 358 int i; 359 360 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) { 361 mbs->m_mtypes[i] += mbsc->m_mtypes[i]; 362 } 363 } 364 365 static void 366 mbstat_convert_to_user(struct mbstat *mbs) 367 { 368 369 memset(mbs, 0, sizeof(*mbs)); 370 mbs->m_drain = mbstat.m_drain; 371 percpu_foreach(mbstat_percpu, mbstat_conver_to_user_cb, mbs); 372 } 373 374 static int 375 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS) 376 { 377 struct sysctlnode node; 378 struct mbstat mbs; 379 380 mbstat_convert_to_user(&mbs); 381 node = *rnode; 382 node.sysctl_data = &mbs; 383 node.sysctl_size = sizeof(mbs); 384 return sysctl_lookup(SYSCTLFN_CALL(&node)); 385 } 386 387 static void 388 sysctl_kern_mbuf_setup(void) 389 { 390 391 KASSERT(mbuf_sysctllog == NULL); 392 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 393 CTLFLAG_PERMANENT, 394 CTLTYPE_NODE, "mbuf", 395 SYSCTL_DESCR("mbuf control variables"), 396 NULL, 0, NULL, 0, 397 CTL_KERN, KERN_MBUF, CTL_EOL); 398 399 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 400 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 401 CTLTYPE_INT, "msize", 402 SYSCTL_DESCR("mbuf base size"), 403 NULL, msize, NULL, 0, 404 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL); 405 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 406 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 407 CTLTYPE_INT, "mclbytes", 408 SYSCTL_DESCR("mbuf cluster size"), 409 NULL, mclbytes, NULL, 0, 410 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL); 411 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 412 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 413 CTLTYPE_INT, "nmbclusters", 414 SYSCTL_DESCR("Limit on the number of mbuf clusters"), 415 sysctl_kern_mbuf, 0, &nmbclusters, 0, 416 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL); 417 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 418 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 419 CTLTYPE_INT, "mblowat", 420 SYSCTL_DESCR("mbuf low water mark"), 421 sysctl_kern_mbuf, 0, &mblowat, 0, 422 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL); 423 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 424 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 425 CTLTYPE_INT, "mcllowat", 426 SYSCTL_DESCR("mbuf cluster low water mark"), 427 sysctl_kern_mbuf, 0, &mcllowat, 0, 428 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL); 429 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 430 CTLFLAG_PERMANENT, 431 CTLTYPE_STRUCT, "stats", 432 SYSCTL_DESCR("mbuf allocation statistics"), 433 sysctl_kern_mbuf_stats, 0, NULL, 0, 434 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL); 435 #ifdef MBUFTRACE 436 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL, 437 CTLFLAG_PERMANENT, 438 CTLTYPE_STRUCT, "mowners", 439 SYSCTL_DESCR("Information about mbuf owners"), 440 sysctl_kern_mbuf_mowners, 0, NULL, 0, 441 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL); 442 #endif /* MBUFTRACE */ 443 } 444 445 static int 446 mb_ctor(void *arg, void *object, int flags) 447 { 448 struct mbuf *m = object; 449 450 #ifdef POOL_VTOPHYS 451 m->m_paddr = POOL_VTOPHYS(m); 452 #else 453 m->m_paddr = M_PADDR_INVALID; 454 #endif 455 return (0); 456 } 457 458 /* 459 * Add mbuf to the end of a chain 460 */ 461 struct mbuf * 462 m_add(struct mbuf *c, struct mbuf *m) { 463 struct mbuf *n; 464 465 if (c == NULL) 466 return m; 467 468 for (n = c; n->m_next != NULL; n = n->m_next) 469 continue; 470 n->m_next = m; 471 return c; 472 } 473 474 /* 475 * Set the m_data pointer of a newly-allocated mbuf 476 * to place an object of the specified size at the 477 * end of the mbuf, longword aligned. 478 */ 479 void 480 m_align(struct mbuf *m, int len) 481 { 482 int adjust; 483 484 KASSERT(len != M_COPYALL); 485 486 if (m->m_flags & M_EXT) 487 adjust = m->m_ext.ext_size - len; 488 else if (m->m_flags & M_PKTHDR) 489 adjust = MHLEN - len; 490 else 491 adjust = MLEN - len; 492 m->m_data += adjust &~ (sizeof(long)-1); 493 } 494 495 /* 496 * Append the specified data to the indicated mbuf chain, 497 * Extend the mbuf chain if the new data does not fit in 498 * existing space. 499 * 500 * Return 1 if able to complete the job; otherwise 0. 501 */ 502 int 503 m_append(struct mbuf *m0, int len, const void *cpv) 504 { 505 struct mbuf *m, *n; 506 int remainder, space; 507 const char *cp = cpv; 508 509 KASSERT(len != M_COPYALL); 510 for (m = m0; m->m_next != NULL; m = m->m_next) 511 continue; 512 remainder = len; 513 space = M_TRAILINGSPACE(m); 514 if (space > 0) { 515 /* 516 * Copy into available space. 517 */ 518 if (space > remainder) 519 space = remainder; 520 memmove(mtod(m, char *) + m->m_len, cp, space); 521 m->m_len += space; 522 cp = cp + space, remainder -= space; 523 } 524 while (remainder > 0) { 525 /* 526 * Allocate a new mbuf; could check space 527 * and allocate a cluster instead. 528 */ 529 n = m_get(M_DONTWAIT, m->m_type); 530 if (n == NULL) 531 break; 532 n->m_len = min(MLEN, remainder); 533 memmove(mtod(n, void *), cp, n->m_len); 534 cp += n->m_len, remainder -= n->m_len; 535 m->m_next = n; 536 m = n; 537 } 538 if (m0->m_flags & M_PKTHDR) 539 m0->m_pkthdr.len += len - remainder; 540 return (remainder == 0); 541 } 542 543 void 544 m_reclaim(void *arg, int flags) 545 { 546 struct domain *dp; 547 const struct protosw *pr; 548 struct ifnet *ifp; 549 int s; 550 551 KERNEL_LOCK(1, NULL); 552 s = splvm(); 553 DOMAIN_FOREACH(dp) { 554 for (pr = dp->dom_protosw; 555 pr < dp->dom_protoswNPROTOSW; pr++) 556 if (pr->pr_drain) 557 (*pr->pr_drain)(); 558 } 559 /* XXX we cannot use psref in H/W interrupt */ 560 if (!cpu_intr_p()) { 561 int bound = curlwp_bind(); 562 IFNET_READER_FOREACH(ifp) { 563 struct psref psref; 564 565 psref_acquire(&psref, &ifp->if_psref, 566 ifnet_psref_class); 567 568 if (ifp->if_drain) 569 (*ifp->if_drain)(ifp); 570 571 psref_release(&psref, &ifp->if_psref, 572 ifnet_psref_class); 573 } 574 curlwp_bindx(bound); 575 } 576 splx(s); 577 mbstat.m_drain++; 578 KERNEL_UNLOCK_ONE(NULL); 579 } 580 581 /* 582 * Space allocation routines. 583 * These are also available as macros 584 * for critical paths. 585 */ 586 struct mbuf * 587 m_get(int nowait, int type) 588 { 589 struct mbuf *m; 590 591 KASSERT(type != MT_FREE); 592 593 m = pool_cache_get(mb_cache, 594 nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); 595 if (m == NULL) 596 return NULL; 597 598 mbstat_type_add(type, 1); 599 600 m_hdr_init(m, type, NULL, m->m_dat, 0); 601 602 return m; 603 } 604 605 struct mbuf * 606 m_gethdr(int nowait, int type) 607 { 608 struct mbuf *m; 609 610 m = m_get(nowait, type); 611 if (m == NULL) 612 return NULL; 613 614 m_pkthdr_init(m); 615 616 return m; 617 } 618 619 struct mbuf * 620 m_getclr(int nowait, int type) 621 { 622 struct mbuf *m; 623 624 m = m_get(nowait, type); 625 if (m == 0) 626 return (NULL); 627 memset(mtod(m, void *), 0, MLEN); 628 return (m); 629 } 630 631 void 632 m_clget(struct mbuf *m, int nowait) 633 { 634 635 MCLGET(m, nowait); 636 } 637 638 #ifdef MBUFTRACE 639 /* 640 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain. 641 */ 642 void 643 m_claimm(struct mbuf *m, struct mowner *mo) 644 { 645 646 for (; m != NULL; m = m->m_next) 647 MCLAIM(m, mo); 648 } 649 #endif 650 651 /* 652 * Mbuffer utility routines. 653 */ 654 655 /* 656 * Lesser-used path for M_PREPEND: 657 * allocate new mbuf to prepend to chain, 658 * copy junk along. 659 */ 660 struct mbuf * 661 m_prepend(struct mbuf *m, int len, int how) 662 { 663 struct mbuf *mn; 664 665 KASSERT(len != M_COPYALL); 666 mn = m_get(how, m->m_type); 667 if (mn == NULL) { 668 m_freem(m); 669 return (NULL); 670 } 671 if (m->m_flags & M_PKTHDR) { 672 M_MOVE_PKTHDR(mn, m); 673 } else { 674 MCLAIM(mn, m->m_owner); 675 } 676 mn->m_next = m; 677 m = mn; 678 if (len < MHLEN) 679 MH_ALIGN(m, len); 680 m->m_len = len; 681 return (m); 682 } 683 684 /* 685 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 686 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 687 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 688 */ 689 int MCFail; 690 691 struct mbuf * 692 m_copym(struct mbuf *m, int off0, int len, int wait) 693 { 694 695 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */ 696 } 697 698 struct mbuf * 699 m_dup(struct mbuf *m, int off0, int len, int wait) 700 { 701 702 return m_copym0(m, off0, len, wait, 1); /* deep copy */ 703 } 704 705 static inline int 706 m_copylen(int len, int copylen) { 707 return len == M_COPYALL ? copylen : min(len, copylen); 708 } 709 710 static struct mbuf * 711 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep) 712 { 713 struct mbuf *n, **np; 714 int off = off0; 715 struct mbuf *top; 716 int copyhdr = 0; 717 718 if (off < 0 || (len != M_COPYALL && len < 0)) 719 panic("m_copym: off %d, len %d", off, len); 720 if (off == 0 && m->m_flags & M_PKTHDR) 721 copyhdr = 1; 722 while (off > 0) { 723 if (m == 0) 724 panic("m_copym: m == 0, off %d", off); 725 if (off < m->m_len) 726 break; 727 off -= m->m_len; 728 m = m->m_next; 729 } 730 np = ⊤ 731 top = 0; 732 while (len == M_COPYALL || len > 0) { 733 if (m == 0) { 734 if (len != M_COPYALL) 735 panic("m_copym: m == 0, len %d [!COPYALL]", 736 len); 737 break; 738 } 739 n = m_get(wait, m->m_type); 740 *np = n; 741 if (n == 0) 742 goto nospace; 743 MCLAIM(n, m->m_owner); 744 if (copyhdr) { 745 M_COPY_PKTHDR(n, m); 746 if (len == M_COPYALL) 747 n->m_pkthdr.len -= off0; 748 else 749 n->m_pkthdr.len = len; 750 copyhdr = 0; 751 } 752 n->m_len = m_copylen(len, m->m_len - off); 753 if (m->m_flags & M_EXT) { 754 if (!deep) { 755 n->m_data = m->m_data + off; 756 MCLADDREFERENCE(m, n); 757 } else { 758 /* 759 * we are unsure about the way m was allocated. 760 * copy into multiple MCLBYTES cluster mbufs. 761 * 762 * recompute m_len, it is no longer valid if MCLGET() 763 * fails to allocate a cluster. Then we try to split 764 * the source into normal sized mbufs. 765 */ 766 MCLGET(n, wait); 767 n->m_len = 0; 768 n->m_len = M_TRAILINGSPACE(n); 769 n->m_len = m_copylen(len, n->m_len); 770 n->m_len = min(n->m_len, m->m_len - off); 771 memcpy(mtod(n, void *), mtod(m, char *) + off, 772 (unsigned)n->m_len); 773 } 774 } else 775 memcpy(mtod(n, void *), mtod(m, char *) + off, 776 (unsigned)n->m_len); 777 if (len != M_COPYALL) 778 len -= n->m_len; 779 off += n->m_len; 780 #ifdef DIAGNOSTIC 781 if (off > m->m_len) 782 panic("m_copym0 overrun %d %d", off, m->m_len); 783 #endif 784 if (off == m->m_len) { 785 m = m->m_next; 786 off = 0; 787 } 788 np = &n->m_next; 789 } 790 if (top == 0) 791 MCFail++; 792 return (top); 793 nospace: 794 m_freem(top); 795 MCFail++; 796 return (NULL); 797 } 798 799 /* 800 * Copy an entire packet, including header (which must be present). 801 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 802 */ 803 struct mbuf * 804 m_copypacket(struct mbuf *m, int how) 805 { 806 struct mbuf *top, *n, *o; 807 808 n = m_get(how, m->m_type); 809 top = n; 810 if (!n) 811 goto nospace; 812 813 MCLAIM(n, m->m_owner); 814 M_COPY_PKTHDR(n, m); 815 n->m_len = m->m_len; 816 if (m->m_flags & M_EXT) { 817 n->m_data = m->m_data; 818 MCLADDREFERENCE(m, n); 819 } else { 820 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 821 } 822 823 m = m->m_next; 824 while (m) { 825 o = m_get(how, m->m_type); 826 if (!o) 827 goto nospace; 828 829 MCLAIM(o, m->m_owner); 830 n->m_next = o; 831 n = n->m_next; 832 833 n->m_len = m->m_len; 834 if (m->m_flags & M_EXT) { 835 n->m_data = m->m_data; 836 MCLADDREFERENCE(m, n); 837 } else { 838 memcpy(mtod(n, char *), mtod(m, char *), n->m_len); 839 } 840 841 m = m->m_next; 842 } 843 return top; 844 nospace: 845 m_freem(top); 846 MCFail++; 847 return NULL; 848 } 849 850 /* 851 * Copy data from an mbuf chain starting "off" bytes from the beginning, 852 * continuing for "len" bytes, into the indicated buffer. 853 */ 854 void 855 m_copydata(struct mbuf *m, int off, int len, void *vp) 856 { 857 unsigned count; 858 void * cp = vp; 859 struct mbuf *m0 = m; 860 int len0 = len; 861 int off0 = off; 862 void *vp0 = vp; 863 864 KASSERT(len != M_COPYALL); 865 if (off < 0 || len < 0) 866 panic("m_copydata: off %d, len %d", off, len); 867 while (off > 0) { 868 if (m == NULL) 869 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)", 870 m0, len0, off0, vp0, off, off0 - off); 871 if (off < m->m_len) 872 break; 873 off -= m->m_len; 874 m = m->m_next; 875 } 876 while (len > 0) { 877 if (m == NULL) 878 panic("m_copydata(%p,%d,%d,%p): " 879 "m=NULL, off=%d (%d), len=%d (%d)", 880 m0, len0, off0, vp0, 881 off, off0 - off, len, len0 - len); 882 count = min(m->m_len - off, len); 883 memcpy(cp, mtod(m, char *) + off, count); 884 len -= count; 885 cp = (char *)cp + count; 886 off = 0; 887 m = m->m_next; 888 } 889 } 890 891 /* 892 * Concatenate mbuf chain n to m. 893 * n might be copied into m (when n->m_len is small), therefore data portion of 894 * n could be copied into an mbuf of different mbuf type. 895 * Any m_pkthdr is not updated. 896 */ 897 void 898 m_cat(struct mbuf *m, struct mbuf *n) 899 { 900 901 while (m->m_next) 902 m = m->m_next; 903 while (n) { 904 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 905 /* just join the two chains */ 906 m->m_next = n; 907 return; 908 } 909 /* splat the data from one into the other */ 910 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 911 (u_int)n->m_len); 912 m->m_len += n->m_len; 913 n = m_free(n); 914 } 915 } 916 917 void 918 m_adj(struct mbuf *mp, int req_len) 919 { 920 int len = req_len; 921 struct mbuf *m; 922 int count; 923 924 if ((m = mp) == NULL) 925 return; 926 if (len >= 0) { 927 /* 928 * Trim from head. 929 */ 930 while (m != NULL && len > 0) { 931 if (m->m_len <= len) { 932 len -= m->m_len; 933 m->m_len = 0; 934 m = m->m_next; 935 } else { 936 m->m_len -= len; 937 m->m_data += len; 938 len = 0; 939 } 940 } 941 m = mp; 942 if (mp->m_flags & M_PKTHDR) 943 m->m_pkthdr.len -= (req_len - len); 944 } else { 945 /* 946 * Trim from tail. Scan the mbuf chain, 947 * calculating its length and finding the last mbuf. 948 * If the adjustment only affects this mbuf, then just 949 * adjust and return. Otherwise, rescan and truncate 950 * after the remaining size. 951 */ 952 len = -len; 953 count = 0; 954 for (;;) { 955 count += m->m_len; 956 if (m->m_next == (struct mbuf *)0) 957 break; 958 m = m->m_next; 959 } 960 if (m->m_len >= len) { 961 m->m_len -= len; 962 if (mp->m_flags & M_PKTHDR) 963 mp->m_pkthdr.len -= len; 964 return; 965 } 966 count -= len; 967 if (count < 0) 968 count = 0; 969 /* 970 * Correct length for chain is "count". 971 * Find the mbuf with last data, adjust its length, 972 * and toss data from remaining mbufs on chain. 973 */ 974 m = mp; 975 if (m->m_flags & M_PKTHDR) 976 m->m_pkthdr.len = count; 977 for (; m; m = m->m_next) { 978 if (m->m_len >= count) { 979 m->m_len = count; 980 break; 981 } 982 count -= m->m_len; 983 } 984 if (m) 985 while (m->m_next) 986 (m = m->m_next)->m_len = 0; 987 } 988 } 989 990 /* 991 * m_ensure_contig: rearrange an mbuf chain that given length of bytes 992 * would be contiguous and in the data area of an mbuf (therefore, mtod() 993 * would work for a structure of given length). 994 * 995 * => On success, returns true and the resulting mbuf chain; false otherwise. 996 * => The mbuf chain may change, but is always preserved valid. 997 */ 998 bool 999 m_ensure_contig(struct mbuf **m0, int len) 1000 { 1001 struct mbuf *n = *m0, *m; 1002 size_t count, space; 1003 1004 KASSERT(len != M_COPYALL); 1005 /* 1006 * If first mbuf has no cluster, and has room for len bytes 1007 * without shifting current data, pullup into it, 1008 * otherwise allocate a new mbuf to prepend to the chain. 1009 */ 1010 if ((n->m_flags & M_EXT) == 0 && 1011 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 1012 if (n->m_len >= len) { 1013 return true; 1014 } 1015 m = n; 1016 n = n->m_next; 1017 len -= m->m_len; 1018 } else { 1019 if (len > MHLEN) { 1020 return false; 1021 } 1022 m = m_get(M_DONTWAIT, n->m_type); 1023 if (m == NULL) { 1024 return false; 1025 } 1026 MCLAIM(m, n->m_owner); 1027 if (n->m_flags & M_PKTHDR) { 1028 M_MOVE_PKTHDR(m, n); 1029 } 1030 } 1031 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1032 do { 1033 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len); 1034 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1035 (unsigned)count); 1036 len -= count; 1037 m->m_len += count; 1038 n->m_len -= count; 1039 space -= count; 1040 if (n->m_len) 1041 n->m_data += count; 1042 else 1043 n = m_free(n); 1044 } while (len > 0 && n); 1045 1046 m->m_next = n; 1047 *m0 = m; 1048 1049 return len <= 0; 1050 } 1051 1052 /* 1053 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error. 1054 */ 1055 int MPFail; 1056 1057 struct mbuf * 1058 m_pullup(struct mbuf *n, int len) 1059 { 1060 struct mbuf *m = n; 1061 1062 KASSERT(len != M_COPYALL); 1063 if (!m_ensure_contig(&m, len)) { 1064 KASSERT(m != NULL); 1065 m_freem(m); 1066 MPFail++; 1067 m = NULL; 1068 } 1069 return m; 1070 } 1071 1072 /* 1073 * Like m_pullup(), except a new mbuf is always allocated, and we allow 1074 * the amount of empty space before the data in the new mbuf to be specified 1075 * (in the event that the caller expects to prepend later). 1076 */ 1077 int MSFail; 1078 1079 struct mbuf * 1080 m_copyup(struct mbuf *n, int len, int dstoff) 1081 { 1082 struct mbuf *m; 1083 int count, space; 1084 1085 KASSERT(len != M_COPYALL); 1086 if (len > (MHLEN - dstoff)) 1087 goto bad; 1088 m = m_get(M_DONTWAIT, n->m_type); 1089 if (m == NULL) 1090 goto bad; 1091 MCLAIM(m, n->m_owner); 1092 if (n->m_flags & M_PKTHDR) { 1093 M_MOVE_PKTHDR(m, n); 1094 } 1095 m->m_data += dstoff; 1096 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1097 do { 1098 count = min(min(max(len, max_protohdr), space), n->m_len); 1099 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *), 1100 (unsigned)count); 1101 len -= count; 1102 m->m_len += count; 1103 n->m_len -= count; 1104 space -= count; 1105 if (n->m_len) 1106 n->m_data += count; 1107 else 1108 n = m_free(n); 1109 } while (len > 0 && n); 1110 if (len > 0) { 1111 (void) m_free(m); 1112 goto bad; 1113 } 1114 m->m_next = n; 1115 return (m); 1116 bad: 1117 m_freem(n); 1118 MSFail++; 1119 return (NULL); 1120 } 1121 1122 /* 1123 * Partition an mbuf chain in two pieces, returning the tail -- 1124 * all but the first len0 bytes. In case of failure, it returns NULL and 1125 * attempts to restore the chain to its original state. 1126 */ 1127 struct mbuf * 1128 m_split(struct mbuf *m0, int len0, int wait) 1129 { 1130 1131 return m_split0(m0, len0, wait, 1); 1132 } 1133 1134 static struct mbuf * 1135 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) 1136 { 1137 struct mbuf *m, *n; 1138 unsigned len = len0, remain, len_save; 1139 1140 KASSERT(len0 != M_COPYALL); 1141 for (m = m0; m && len > m->m_len; m = m->m_next) 1142 len -= m->m_len; 1143 if (m == 0) 1144 return (NULL); 1145 remain = m->m_len - len; 1146 if (copyhdr && (m0->m_flags & M_PKTHDR)) { 1147 n = m_gethdr(wait, m0->m_type); 1148 if (n == NULL) 1149 return NULL; 1150 MCLAIM(n, m0->m_owner); 1151 m_copy_rcvif(n, m0); 1152 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1153 len_save = m0->m_pkthdr.len; 1154 m0->m_pkthdr.len = len0; 1155 if (m->m_flags & M_EXT) 1156 goto extpacket; 1157 if (remain > MHLEN) { 1158 /* m can't be the lead packet */ 1159 MH_ALIGN(n, 0); 1160 n->m_len = 0; 1161 n->m_next = m_split(m, len, wait); 1162 if (n->m_next == 0) { 1163 (void) m_free(n); 1164 m0->m_pkthdr.len = len_save; 1165 return (NULL); 1166 } else 1167 return (n); 1168 } else 1169 MH_ALIGN(n, remain); 1170 } else if (remain == 0) { 1171 n = m->m_next; 1172 m->m_next = 0; 1173 return (n); 1174 } else { 1175 n = m_get(wait, m->m_type); 1176 if (n == 0) 1177 return (NULL); 1178 MCLAIM(n, m->m_owner); 1179 M_ALIGN(n, remain); 1180 } 1181 extpacket: 1182 if (m->m_flags & M_EXT) { 1183 n->m_data = m->m_data + len; 1184 MCLADDREFERENCE(m, n); 1185 } else { 1186 memcpy(mtod(n, void *), mtod(m, char *) + len, remain); 1187 } 1188 n->m_len = remain; 1189 m->m_len = len; 1190 n->m_next = m->m_next; 1191 m->m_next = 0; 1192 return (n); 1193 } 1194 /* 1195 * Routine to copy from device local memory into mbufs. 1196 */ 1197 struct mbuf * 1198 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, 1199 void (*copy)(const void *from, void *to, size_t len)) 1200 { 1201 struct mbuf *m; 1202 struct mbuf *top = 0, **mp = ⊤ 1203 int off = off0, len; 1204 char *cp; 1205 char *epkt; 1206 1207 cp = buf; 1208 epkt = cp + totlen; 1209 if (off) { 1210 /* 1211 * If 'off' is non-zero, packet is trailer-encapsulated, 1212 * so we have to skip the type and length fields. 1213 */ 1214 cp += off + 2 * sizeof(uint16_t); 1215 totlen -= 2 * sizeof(uint16_t); 1216 } 1217 m = m_gethdr(M_DONTWAIT, MT_DATA); 1218 if (m == NULL) 1219 return NULL; 1220 m_set_rcvif(m, ifp); 1221 m->m_pkthdr.len = totlen; 1222 m->m_len = MHLEN; 1223 1224 while (totlen > 0) { 1225 if (top) { 1226 m = m_get(M_DONTWAIT, MT_DATA); 1227 if (m == 0) { 1228 m_freem(top); 1229 return (NULL); 1230 } 1231 m->m_len = MLEN; 1232 } 1233 len = min(totlen, epkt - cp); 1234 if (len >= MINCLSIZE) { 1235 MCLGET(m, M_DONTWAIT); 1236 if ((m->m_flags & M_EXT) == 0) { 1237 m_free(m); 1238 m_freem(top); 1239 return (NULL); 1240 } 1241 m->m_len = len = min(len, MCLBYTES); 1242 } else { 1243 /* 1244 * Place initial small packet/header at end of mbuf. 1245 */ 1246 if (len < m->m_len) { 1247 if (top == 0 && len + max_linkhdr <= m->m_len) 1248 m->m_data += max_linkhdr; 1249 m->m_len = len; 1250 } else 1251 len = m->m_len; 1252 } 1253 if (copy) 1254 copy(cp, mtod(m, void *), (size_t)len); 1255 else 1256 memcpy(mtod(m, void *), cp, (size_t)len); 1257 cp += len; 1258 *mp = m; 1259 mp = &m->m_next; 1260 totlen -= len; 1261 if (cp == epkt) 1262 cp = buf; 1263 } 1264 return (top); 1265 } 1266 1267 /* 1268 * Copy data from a buffer back into the indicated mbuf chain, 1269 * starting "off" bytes from the beginning, extending the mbuf 1270 * chain if necessary. 1271 */ 1272 void 1273 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 1274 { 1275 #if defined(DEBUG) 1276 struct mbuf *origm = m0; 1277 int error; 1278 #endif /* defined(DEBUG) */ 1279 1280 if (m0 == NULL) 1281 return; 1282 1283 #if defined(DEBUG) 1284 error = 1285 #endif /* defined(DEBUG) */ 1286 m_copyback0(&m0, off, len, cp, 1287 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT); 1288 1289 #if defined(DEBUG) 1290 if (error != 0 || (m0 != NULL && origm != m0)) 1291 panic("m_copyback"); 1292 #endif /* defined(DEBUG) */ 1293 } 1294 1295 struct mbuf * 1296 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) 1297 { 1298 int error; 1299 1300 /* don't support chain expansion */ 1301 KASSERT(len != M_COPYALL); 1302 KDASSERT(off + len <= m_length(m0)); 1303 1304 error = m_copyback0(&m0, off, len, cp, 1305 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how); 1306 if (error) { 1307 /* 1308 * no way to recover from partial success. 1309 * just free the chain. 1310 */ 1311 m_freem(m0); 1312 return NULL; 1313 } 1314 return m0; 1315 } 1316 1317 /* 1318 * m_makewritable: ensure the specified range writable. 1319 */ 1320 int 1321 m_makewritable(struct mbuf **mp, int off, int len, int how) 1322 { 1323 int error; 1324 #if defined(DEBUG) 1325 int origlen = m_length(*mp); 1326 #endif /* defined(DEBUG) */ 1327 1328 error = m_copyback0(mp, off, len, NULL, 1329 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how); 1330 1331 if (error) 1332 return error; 1333 1334 #if defined(DEBUG) 1335 int reslen = 0; 1336 for (struct mbuf *n = *mp; n; n = n->m_next) 1337 reslen += n->m_len; 1338 if (origlen != reslen) 1339 panic("m_makewritable: length changed"); 1340 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len) 1341 panic("m_makewritable: inconsist"); 1342 #endif /* defined(DEBUG) */ 1343 1344 return 0; 1345 } 1346 1347 /* 1348 * Copy the mbuf chain to a new mbuf chain that is as short as possible. 1349 * Return the new mbuf chain on success, NULL on failure. On success, 1350 * free the old mbuf chain. 1351 */ 1352 struct mbuf * 1353 m_defrag(struct mbuf *mold, int flags) 1354 { 1355 struct mbuf *m0, *mn, *n; 1356 size_t sz = mold->m_pkthdr.len; 1357 1358 #ifdef DIAGNOSTIC 1359 if ((mold->m_flags & M_PKTHDR) == 0) 1360 panic("m_defrag: not a mbuf chain header"); 1361 #endif 1362 1363 m0 = m_gethdr(flags, MT_DATA); 1364 if (m0 == NULL) 1365 return NULL; 1366 M_COPY_PKTHDR(m0, mold); 1367 mn = m0; 1368 1369 do { 1370 if (sz > MHLEN) { 1371 MCLGET(mn, M_DONTWAIT); 1372 if ((mn->m_flags & M_EXT) == 0) { 1373 m_freem(m0); 1374 return NULL; 1375 } 1376 } 1377 1378 mn->m_len = MIN(sz, MCLBYTES); 1379 1380 m_copydata(mold, mold->m_pkthdr.len - sz, mn->m_len, 1381 mtod(mn, void *)); 1382 1383 sz -= mn->m_len; 1384 1385 if (sz > 0) { 1386 /* need more mbufs */ 1387 n = m_get(M_NOWAIT, MT_DATA); 1388 if (n == NULL) { 1389 m_freem(m0); 1390 return NULL; 1391 } 1392 1393 mn->m_next = n; 1394 mn = n; 1395 } 1396 } while (sz > 0); 1397 1398 m_freem(mold); 1399 1400 return m0; 1401 } 1402 1403 int 1404 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, 1405 int how) 1406 { 1407 int mlen; 1408 struct mbuf *m, *n; 1409 struct mbuf **mp; 1410 int totlen = 0; 1411 const char *cp = vp; 1412 1413 KASSERT(mp0 != NULL); 1414 KASSERT(*mp0 != NULL); 1415 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); 1416 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); 1417 1418 if (len == M_COPYALL) 1419 len = m_length(*mp0) - off; 1420 1421 /* 1422 * we don't bother to update "totlen" in the case of M_COPYBACK0_COW, 1423 * assuming that M_COPYBACK0_EXTEND and M_COPYBACK0_COW are exclusive. 1424 */ 1425 1426 KASSERT((~flags & (M_COPYBACK0_EXTEND|M_COPYBACK0_COW)) != 0); 1427 1428 mp = mp0; 1429 m = *mp; 1430 while (off > (mlen = m->m_len)) { 1431 off -= mlen; 1432 totlen += mlen; 1433 if (m->m_next == NULL) { 1434 int tspace; 1435 extend: 1436 if ((flags & M_COPYBACK0_EXTEND) == 0) 1437 goto out; 1438 1439 /* 1440 * try to make some space at the end of "m". 1441 */ 1442 1443 mlen = m->m_len; 1444 if (off + len >= MINCLSIZE && 1445 (m->m_flags & M_EXT) == 0 && m->m_len == 0) { 1446 MCLGET(m, how); 1447 } 1448 tspace = M_TRAILINGSPACE(m); 1449 if (tspace > 0) { 1450 tspace = min(tspace, off + len); 1451 KASSERT(tspace > 0); 1452 memset(mtod(m, char *) + m->m_len, 0, 1453 min(off, tspace)); 1454 m->m_len += tspace; 1455 off += mlen; 1456 totlen -= mlen; 1457 continue; 1458 } 1459 1460 /* 1461 * need to allocate an mbuf. 1462 */ 1463 1464 if (off + len >= MINCLSIZE) { 1465 n = m_getcl(how, m->m_type, 0); 1466 } else { 1467 n = m_get(how, m->m_type); 1468 } 1469 if (n == NULL) { 1470 goto out; 1471 } 1472 n->m_len = min(M_TRAILINGSPACE(n), off + len); 1473 memset(mtod(n, char *), 0, min(n->m_len, off)); 1474 m->m_next = n; 1475 } 1476 mp = &m->m_next; 1477 m = m->m_next; 1478 } 1479 while (len > 0) { 1480 mlen = m->m_len - off; 1481 if (mlen != 0 && M_READONLY(m)) { 1482 char *datap; 1483 int eatlen; 1484 1485 /* 1486 * this mbuf is read-only. 1487 * allocate a new writable mbuf and try again. 1488 */ 1489 1490 #if defined(DIAGNOSTIC) 1491 if ((flags & M_COPYBACK0_COW) == 0) 1492 panic("m_copyback0: read-only"); 1493 #endif /* defined(DIAGNOSTIC) */ 1494 1495 /* 1496 * if we're going to write into the middle of 1497 * a mbuf, split it first. 1498 */ 1499 if (off > 0) { 1500 n = m_split0(m, off, how, 0); 1501 if (n == NULL) 1502 goto enobufs; 1503 m->m_next = n; 1504 mp = &m->m_next; 1505 m = n; 1506 off = 0; 1507 continue; 1508 } 1509 1510 /* 1511 * XXX TODO coalesce into the trailingspace of 1512 * the previous mbuf when possible. 1513 */ 1514 1515 /* 1516 * allocate a new mbuf. copy packet header if needed. 1517 */ 1518 n = m_get(how, m->m_type); 1519 if (n == NULL) 1520 goto enobufs; 1521 MCLAIM(n, m->m_owner); 1522 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { 1523 M_MOVE_PKTHDR(n, m); 1524 n->m_len = MHLEN; 1525 } else { 1526 if (len >= MINCLSIZE) 1527 MCLGET(n, M_DONTWAIT); 1528 n->m_len = 1529 (n->m_flags & M_EXT) ? MCLBYTES : MLEN; 1530 } 1531 if (n->m_len > len) 1532 n->m_len = len; 1533 1534 /* 1535 * free the region which has been overwritten. 1536 * copying data from old mbufs if requested. 1537 */ 1538 if (flags & M_COPYBACK0_PRESERVE) 1539 datap = mtod(n, char *); 1540 else 1541 datap = NULL; 1542 eatlen = n->m_len; 1543 while (m != NULL && M_READONLY(m) && 1544 n->m_type == m->m_type && eatlen > 0) { 1545 mlen = min(eatlen, m->m_len); 1546 if (datap) { 1547 m_copydata(m, 0, mlen, datap); 1548 datap += mlen; 1549 } 1550 m->m_data += mlen; 1551 m->m_len -= mlen; 1552 eatlen -= mlen; 1553 if (m->m_len == 0) 1554 *mp = m = m_free(m); 1555 } 1556 if (eatlen > 0) 1557 n->m_len -= eatlen; 1558 n->m_next = m; 1559 *mp = m = n; 1560 continue; 1561 } 1562 mlen = min(mlen, len); 1563 if (flags & M_COPYBACK0_COPYBACK) { 1564 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen); 1565 cp += mlen; 1566 } 1567 len -= mlen; 1568 mlen += off; 1569 off = 0; 1570 totlen += mlen; 1571 if (len == 0) 1572 break; 1573 if (m->m_next == NULL) { 1574 goto extend; 1575 } 1576 mp = &m->m_next; 1577 m = m->m_next; 1578 } 1579 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { 1580 KASSERT((flags & M_COPYBACK0_EXTEND) != 0); 1581 m->m_pkthdr.len = totlen; 1582 } 1583 1584 return 0; 1585 1586 enobufs: 1587 return ENOBUFS; 1588 } 1589 1590 void 1591 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1592 { 1593 1594 KASSERT((to->m_flags & M_EXT) == 0); 1595 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL); 1596 KASSERT((from->m_flags & M_PKTHDR) != 0); 1597 1598 to->m_pkthdr = from->m_pkthdr; 1599 to->m_flags = from->m_flags & M_COPYFLAGS; 1600 to->m_data = to->m_pktdat; 1601 1602 from->m_flags &= ~M_PKTHDR; 1603 } 1604 1605 /* 1606 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1607 * beginning, continuing for "len" bytes. 1608 */ 1609 int 1610 m_apply(struct mbuf *m, int off, int len, 1611 int (*f)(void *, void *, unsigned int), void *arg) 1612 { 1613 unsigned int count; 1614 int rval; 1615 1616 KASSERT(len != M_COPYALL); 1617 KASSERT(len >= 0); 1618 KASSERT(off >= 0); 1619 1620 while (off > 0) { 1621 KASSERT(m != NULL); 1622 if (off < m->m_len) 1623 break; 1624 off -= m->m_len; 1625 m = m->m_next; 1626 } 1627 while (len > 0) { 1628 KASSERT(m != NULL); 1629 count = min(m->m_len - off, len); 1630 1631 rval = (*f)(arg, mtod(m, char *) + off, count); 1632 if (rval) 1633 return (rval); 1634 1635 len -= count; 1636 off = 0; 1637 m = m->m_next; 1638 } 1639 1640 return (0); 1641 } 1642 1643 /* 1644 * Return a pointer to mbuf/offset of location in mbuf chain. 1645 */ 1646 struct mbuf * 1647 m_getptr(struct mbuf *m, int loc, int *off) 1648 { 1649 1650 while (loc >= 0) { 1651 /* Normal end of search */ 1652 if (m->m_len > loc) { 1653 *off = loc; 1654 return (m); 1655 } else { 1656 loc -= m->m_len; 1657 1658 if (m->m_next == NULL) { 1659 if (loc == 0) { 1660 /* Point at the end of valid data */ 1661 *off = m->m_len; 1662 return (m); 1663 } else 1664 return (NULL); 1665 } else 1666 m = m->m_next; 1667 } 1668 } 1669 1670 return (NULL); 1671 } 1672 1673 /* 1674 * m_ext_free: release a reference to the mbuf external storage. 1675 * 1676 * => free the mbuf m itself as well. 1677 */ 1678 1679 void 1680 m_ext_free(struct mbuf *m) 1681 { 1682 bool embedded = MEXT_ISEMBEDDED(m); 1683 bool dofree = true; 1684 u_int refcnt; 1685 1686 KASSERT((m->m_flags & M_EXT) != 0); 1687 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref)); 1688 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0); 1689 KASSERT((m->m_flags & M_EXT_CLUSTER) == 1690 (m->m_ext_ref->m_flags & M_EXT_CLUSTER)); 1691 1692 if (__predict_true(m->m_ext.ext_refcnt == 1)) { 1693 refcnt = m->m_ext.ext_refcnt = 0; 1694 } else { 1695 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); 1696 } 1697 if (refcnt > 0) { 1698 if (embedded) { 1699 /* 1700 * other mbuf's m_ext_ref still points to us. 1701 */ 1702 dofree = false; 1703 } else { 1704 m->m_ext_ref = m; 1705 } 1706 } else { 1707 /* 1708 * dropping the last reference 1709 */ 1710 if (!embedded) { 1711 m->m_ext.ext_refcnt++; /* XXX */ 1712 m_ext_free(m->m_ext_ref); 1713 m->m_ext_ref = m; 1714 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) { 1715 pool_cache_put_paddr((struct pool_cache *) 1716 m->m_ext.ext_arg, 1717 m->m_ext.ext_buf, m->m_ext.ext_paddr); 1718 } else if (m->m_ext.ext_free) { 1719 (*m->m_ext.ext_free)(m, 1720 m->m_ext.ext_buf, m->m_ext.ext_size, 1721 m->m_ext.ext_arg); 1722 /* 1723 * 'm' is already freed by the ext_free callback. 1724 */ 1725 dofree = false; 1726 } else { 1727 free(m->m_ext.ext_buf, m->m_ext.ext_type); 1728 } 1729 } 1730 if (dofree) { 1731 m->m_type = MT_FREE; 1732 pool_cache_put(mb_cache, m); 1733 } 1734 } 1735 1736 #if defined(DDB) 1737 void 1738 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...)) 1739 { 1740 char ch; 1741 bool opt_c = false; 1742 char buf[512]; 1743 1744 while ((ch = *(modif++)) != '\0') { 1745 switch (ch) { 1746 case 'c': 1747 opt_c = true; 1748 break; 1749 } 1750 } 1751 1752 nextchain: 1753 (*pr)("MBUF %p\n", m); 1754 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags); 1755 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n", 1756 m->m_data, m->m_len, m->m_type, buf); 1757 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next, 1758 m->m_nextpkt); 1759 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n", 1760 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m), 1761 (int)M_READONLY(m)); 1762 if ((m->m_flags & M_PKTHDR) != 0) { 1763 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags); 1764 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=0x%s, csum_data=0x%" 1765 PRIx32 ", segsz=%u\n", 1766 m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m), 1767 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz); 1768 } 1769 if ((m->m_flags & M_EXT)) { 1770 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, " 1771 "ext_free=%p, ext_arg=%p\n", 1772 m->m_ext.ext_refcnt, 1773 m->m_ext.ext_buf, m->m_ext.ext_size, 1774 m->m_ext.ext_free, m->m_ext.ext_arg); 1775 } 1776 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) { 1777 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf; 1778 vaddr_t eva = sva + m->m_ext.ext_size; 1779 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT; 1780 int i; 1781 1782 (*pr)(" pages:"); 1783 for (i = 0; i < n; i ++) { 1784 (*pr)(" %p", m->m_ext.ext_pgs[i]); 1785 } 1786 (*pr)("\n"); 1787 } 1788 1789 if (opt_c) { 1790 m = m->m_next; 1791 if (m != NULL) { 1792 goto nextchain; 1793 } 1794 } 1795 } 1796 #endif /* defined(DDB) */ 1797 1798 void 1799 mbstat_type_add(int type, int diff) 1800 { 1801 struct mbstat_cpu *mb; 1802 int s; 1803 1804 s = splvm(); 1805 mb = percpu_getref(mbstat_percpu); 1806 mb->m_mtypes[type] += diff; 1807 percpu_putref(mbstat_percpu); 1808 splx(s); 1809 } 1810 1811 #if defined(MBUFTRACE) 1812 void 1813 mowner_attach(struct mowner *mo) 1814 { 1815 1816 KASSERT(mo->mo_counters == NULL); 1817 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter)); 1818 1819 /* XXX lock */ 1820 LIST_INSERT_HEAD(&mowners, mo, mo_link); 1821 } 1822 1823 void 1824 mowner_detach(struct mowner *mo) 1825 { 1826 1827 KASSERT(mo->mo_counters != NULL); 1828 1829 /* XXX lock */ 1830 LIST_REMOVE(mo, mo_link); 1831 1832 percpu_free(mo->mo_counters, sizeof(struct mowner_counter)); 1833 mo->mo_counters = NULL; 1834 } 1835 1836 void 1837 mowner_init(struct mbuf *m, int type) 1838 { 1839 struct mowner_counter *mc; 1840 struct mowner *mo; 1841 int s; 1842 1843 m->m_owner = mo = &unknown_mowners[type]; 1844 s = splvm(); 1845 mc = percpu_getref(mo->mo_counters); 1846 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1847 percpu_putref(mo->mo_counters); 1848 splx(s); 1849 } 1850 1851 void 1852 mowner_ref(struct mbuf *m, int flags) 1853 { 1854 struct mowner *mo = m->m_owner; 1855 struct mowner_counter *mc; 1856 int s; 1857 1858 s = splvm(); 1859 mc = percpu_getref(mo->mo_counters); 1860 if ((flags & M_EXT) != 0) 1861 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1862 if ((flags & M_CLUSTER) != 0) 1863 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1864 percpu_putref(mo->mo_counters); 1865 splx(s); 1866 } 1867 1868 void 1869 mowner_revoke(struct mbuf *m, bool all, int flags) 1870 { 1871 struct mowner *mo = m->m_owner; 1872 struct mowner_counter *mc; 1873 int s; 1874 1875 s = splvm(); 1876 mc = percpu_getref(mo->mo_counters); 1877 if ((flags & M_EXT) != 0) 1878 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++; 1879 if ((flags & M_CLUSTER) != 0) 1880 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++; 1881 if (all) 1882 mc->mc_counter[MOWNER_COUNTER_RELEASES]++; 1883 percpu_putref(mo->mo_counters); 1884 splx(s); 1885 if (all) 1886 m->m_owner = &revoked_mowner; 1887 } 1888 1889 static void 1890 mowner_claim(struct mbuf *m, struct mowner *mo) 1891 { 1892 struct mowner_counter *mc; 1893 int flags = m->m_flags; 1894 int s; 1895 1896 s = splvm(); 1897 mc = percpu_getref(mo->mo_counters); 1898 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++; 1899 if ((flags & M_EXT) != 0) 1900 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++; 1901 if ((flags & M_CLUSTER) != 0) 1902 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++; 1903 percpu_putref(mo->mo_counters); 1904 splx(s); 1905 m->m_owner = mo; 1906 } 1907 1908 void 1909 m_claim(struct mbuf *m, struct mowner *mo) 1910 { 1911 1912 if (m->m_owner == mo || mo == NULL) 1913 return; 1914 1915 mowner_revoke(m, true, m->m_flags); 1916 mowner_claim(m, mo); 1917 } 1918 #endif /* defined(MBUFTRACE) */ 1919 1920 /* 1921 * MFREE(struct mbuf *m, struct mbuf *n) 1922 * Free a single mbuf and associated external storage. 1923 * Place the successor, if any, in n. 1924 */ 1925 #define MFREE(f, l, m, n) \ 1926 mowner_revoke((m), 1, (m)->m_flags); \ 1927 mbstat_type_add((m)->m_type, -1); \ 1928 if ((m)->m_flags & M_PKTHDR) \ 1929 m_tag_delete_chain((m), NULL); \ 1930 (n) = (m)->m_next; \ 1931 if ((m)->m_flags & M_EXT) { \ 1932 m_ext_free((m)); \ 1933 } else { \ 1934 MBUFFREE(f, l, m); \ 1935 } \ 1936 1937 #ifdef DEBUG 1938 #define MBUFFREE(f, l, m) \ 1939 do { \ 1940 if ((m)->m_type == MT_FREE) \ 1941 panic("mbuf was already freed at %s,%d", \ 1942 m->m_data, m->m_len); \ 1943 (m)->m_type = MT_FREE; \ 1944 (m)->m_data = __UNCONST(f); \ 1945 (m)->m_len = l; \ 1946 pool_cache_put(mb_cache, (m)); \ 1947 } while (/*CONSTCOND*/0) 1948 1949 #else 1950 #define MBUFFREE(f, l, m) \ 1951 do { \ 1952 KASSERT((m)->m_type != MT_FREE); \ 1953 (m)->m_type = MT_FREE; \ 1954 pool_cache_put(mb_cache, (m)); \ 1955 } while (/*CONSTCOND*/0) 1956 #endif 1957 1958 struct mbuf * 1959 m__free(const char *f, int l, struct mbuf *m) 1960 { 1961 struct mbuf *n; 1962 1963 MFREE(f, l, m, n); 1964 return (n); 1965 } 1966 1967 void 1968 m__freem(const char *f, int l, struct mbuf *m) 1969 { 1970 struct mbuf *n; 1971 1972 if (m == NULL) 1973 return; 1974 do { 1975 MFREE(f, l, m, n); 1976 m = n; 1977 } while (m); 1978 } 1979 1980 #undef m_free 1981 struct mbuf *m_free(struct mbuf *); 1982 struct mbuf * 1983 m_free(struct mbuf *m) 1984 { 1985 return m__free(__func__, __LINE__, m); 1986 } 1987 1988 #undef m_freem 1989 void m_freem(struct mbuf *); 1990 void 1991 m_freem(struct mbuf *m) 1992 { 1993 m__freem(__func__, __LINE__, m); 1994 } 1995