1 /* $OpenBSD: uipc_mbuf.c,v 1.166 2012/04/13 09:38:32 deraadt Exp $ */ 2 /* $NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 33 */ 34 35 /* 36 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 37 * 38 * NRL grants permission for redistribution and use in source and binary 39 * forms, with or without modification, of the software and documentation 40 * created at NRL provided that the following conditions are met: 41 * 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgements: 49 * This product includes software developed by the University of 50 * California, Berkeley and its contributors. 51 * This product includes software developed at the Information 52 * Technology Division, US Naval Research Laboratory. 53 * 4. Neither the name of the NRL nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 58 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 59 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 60 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 61 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 62 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 63 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 64 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 65 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 66 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 67 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 68 * 69 * The views and conclusions contained in the software and documentation 70 * are those of the authors and should not be interpreted as representing 71 * official policies, either expressed or implied, of the US Naval 72 * Research Laboratory (NRL). 73 */ 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/proc.h> 78 #include <sys/malloc.h> 79 #include <sys/mbuf.h> 80 #include <sys/kernel.h> 81 #include <sys/syslog.h> 82 #include <sys/domain.h> 83 #include <sys/protosw.h> 84 #include <sys/pool.h> 85 86 #include <sys/socket.h> 87 #include <sys/socketvar.h> 88 #include <net/if.h> 89 90 #include <machine/cpu.h> 91 92 #include <uvm/uvm.h> 93 #include <uvm/uvm_extern.h> 94 95 #ifdef DDB 96 #include <machine/db_machdep.h> 97 #include <ddb/db_interface.h> 98 #endif 99 100 struct mbstat mbstat; /* mbuf stats */ 101 struct pool mbpool; /* mbuf pool */ 102 103 /* mbuf cluster pools */ 104 u_int mclsizes[] = { 105 MCLBYTES, /* must be at slot 0 */ 106 4 * 1024, 107 8 * 1024, 108 9 * 1024, 109 12 * 1024, 110 16 * 1024, 111 64 * 1024 112 }; 113 static char mclnames[MCLPOOLS][8]; 114 struct pool mclpools[MCLPOOLS]; 115 116 int m_clpool(u_int); 117 118 int max_linkhdr; /* largest link-level header */ 119 int max_protohdr; /* largest protocol header */ 120 int max_hdr; /* largest link+protocol header */ 121 int max_datalen; /* MHLEN - max_hdr */ 122 123 struct timeout m_cltick_tmo; 124 int m_clticks; 125 void m_cltick(void *); 126 127 void m_extfree(struct mbuf *); 128 struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 129 void nmbclust_update(void); 130 131 132 const char *mclpool_warnmsg = 133 "WARNING: mclpools limit reached; increase kern.maxclusters"; 134 135 /* 136 * Initialize the mbuf allocator. 137 */ 138 void 139 mbinit(void) 140 { 141 int i; 142 143 #if DIAGNOSTIC 144 if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES) 145 panic("mbinit: the largest cluster size != MAXMCLBYTES"); 146 #endif 147 148 pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", NULL); 149 pool_set_constraints(&mbpool, &kp_dma_contig); 150 pool_setlowat(&mbpool, mblowat); 151 152 for (i = 0; i < nitems(mclsizes); i++) { 153 snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk", 154 mclsizes[i] >> 10); 155 pool_init(&mclpools[i], mclsizes[i], 0, 0, 0, 156 mclnames[i], NULL); 157 pool_set_constraints(&mclpools[i], &kp_dma_contig); 158 pool_setlowat(&mclpools[i], mcllowat); 159 } 160 161 nmbclust_update(); 162 163 timeout_set(&m_cltick_tmo, m_cltick, NULL); 164 m_cltick(NULL); 165 } 166 167 void 168 nmbclust_update(void) 169 { 170 int i; 171 /* 172 * Set the hard limit on the mclpools to the number of 173 * mbuf clusters the kernel is to support. Log the limit 174 * reached message max once a minute. 175 */ 176 for (i = 0; i < nitems(mclsizes); i++) { 177 (void)pool_sethardlimit(&mclpools[i], nmbclust, 178 mclpool_warnmsg, 60); 179 /* 180 * XXX this needs to be reconsidered. 181 * Setting the high water mark to nmbclust is too high 182 * but we need to have enough spare buffers around so that 183 * allocations in interrupt context don't fail or mclgeti() 184 * drivers may end up with empty rings. 185 */ 186 pool_sethiwat(&mclpools[i], nmbclust); 187 } 188 pool_sethiwat(&mbpool, nmbclust); 189 } 190 191 void 192 m_reclaim(void *arg, int flags) 193 { 194 struct domain *dp; 195 struct protosw *pr; 196 int s = splnet(); 197 198 for (dp = domains; dp; dp = dp->dom_next) 199 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 200 if (pr->pr_drain) 201 (*pr->pr_drain)(); 202 mbstat.m_drain++; 203 splx(s); 204 } 205 206 /* 207 * Space allocation routines. 208 */ 209 struct mbuf * 210 m_get(int nowait, int type) 211 { 212 struct mbuf *m; 213 int s; 214 215 s = splnet(); 216 m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT); 217 if (m) 218 mbstat.m_mtypes[type]++; 219 splx(s); 220 if (m) { 221 m->m_type = type; 222 m->m_next = (struct mbuf *)NULL; 223 m->m_nextpkt = (struct mbuf *)NULL; 224 m->m_data = m->m_dat; 225 m->m_flags = 0; 226 } 227 return (m); 228 } 229 230 /* 231 * ATTN: When changing anything here check m_inithdr() and m_defrag() those 232 * may need to change as well. 233 */ 234 struct mbuf * 235 m_gethdr(int nowait, int type) 236 { 237 struct mbuf *m; 238 int s; 239 240 s = splnet(); 241 m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT); 242 if (m) 243 mbstat.m_mtypes[type]++; 244 splx(s); 245 if (m) { 246 m->m_type = type; 247 248 /* keep in sync with m_inithdr */ 249 m->m_next = (struct mbuf *)NULL; 250 m->m_nextpkt = (struct mbuf *)NULL; 251 m->m_data = m->m_pktdat; 252 m->m_flags = M_PKTHDR; 253 bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); 254 m->m_pkthdr.pf.prio = IFQ_DEFPRIO; 255 } 256 return (m); 257 } 258 259 struct mbuf * 260 m_inithdr(struct mbuf *m) 261 { 262 /* keep in sync with m_gethdr */ 263 m->m_next = (struct mbuf *)NULL; 264 m->m_nextpkt = (struct mbuf *)NULL; 265 m->m_data = m->m_pktdat; 266 m->m_flags = M_PKTHDR; 267 bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); 268 m->m_pkthdr.pf.prio = IFQ_DEFPRIO; 269 270 return (m); 271 } 272 273 struct mbuf * 274 m_getclr(int nowait, int type) 275 { 276 struct mbuf *m; 277 278 MGET(m, nowait, type); 279 if (m == NULL) 280 return (NULL); 281 memset(mtod(m, caddr_t), 0, MLEN); 282 return (m); 283 } 284 285 int 286 m_clpool(u_int pktlen) 287 { 288 int pi; 289 290 for (pi = 0; pi < MCLPOOLS; pi++) { 291 if (pktlen <= mclsizes[pi]) 292 return (pi); 293 } 294 295 return (-1); 296 } 297 298 void 299 m_clinitifp(struct ifnet *ifp) 300 { 301 struct mclpool *mclp = ifp->if_data.ifi_mclpool; 302 int i; 303 304 /* Initialize high water marks for use of cluster pools */ 305 for (i = 0; i < MCLPOOLS; i++) { 306 mclp = &ifp->if_data.ifi_mclpool[i]; 307 308 if (mclp->mcl_lwm == 0) 309 mclp->mcl_lwm = 2; 310 if (mclp->mcl_hwm == 0) 311 mclp->mcl_hwm = 32768; 312 313 mclp->mcl_cwm = MAX(4, mclp->mcl_lwm); 314 } 315 } 316 317 void 318 m_clsetwms(struct ifnet *ifp, u_int pktlen, u_int lwm, u_int hwm) 319 { 320 int pi; 321 322 pi = m_clpool(pktlen); 323 if (pi == -1) 324 return; 325 326 ifp->if_data.ifi_mclpool[pi].mcl_lwm = lwm; 327 ifp->if_data.ifi_mclpool[pi].mcl_hwm = hwm; 328 } 329 330 /* 331 * Record when the last timeout has been run. If the delta is 332 * too high, m_cldrop() will notice and decrease the interface 333 * high water marks. 334 */ 335 void 336 m_cltick(void *arg) 337 { 338 extern int ticks; 339 340 m_clticks = ticks; 341 timeout_add(&m_cltick_tmo, 1); 342 } 343 344 int m_livelock; 345 u_int mcllivelocks; 346 347 int 348 m_cldrop(struct ifnet *ifp, int pi) 349 { 350 static int liveticks; 351 struct mclpool *mclp; 352 extern int ticks; 353 int i; 354 355 if (ticks - m_clticks > 1) { 356 struct ifnet *aifp; 357 358 /* 359 * Timeout did not run, so we are in some kind of livelock. 360 * Decrease the cluster allocation high water marks on all 361 * interfaces and prevent them from growth for the very near 362 * future. 363 */ 364 m_livelock = 1; 365 mcllivelocks++; 366 m_clticks = liveticks = ticks; 367 TAILQ_FOREACH(aifp, &ifnet, if_list) { 368 mclp = aifp->if_data.ifi_mclpool; 369 for (i = 0; i < MCLPOOLS; i++) { 370 int diff = max(mclp[i].mcl_cwm / 8, 2); 371 mclp[i].mcl_cwm = max(mclp[i].mcl_lwm, 372 mclp[i].mcl_cwm - diff); 373 } 374 } 375 } else if (m_livelock && (ticks - liveticks) > 4) 376 m_livelock = 0; /* Let the high water marks grow again */ 377 378 mclp = &ifp->if_data.ifi_mclpool[pi]; 379 if (m_livelock == 0 && ISSET(ifp->if_flags, IFF_RUNNING) && 380 mclp->mcl_alive <= 4 && mclp->mcl_cwm < mclp->mcl_hwm && 381 mclp->mcl_grown < ticks) { 382 /* About to run out, so increase the current watermark */ 383 mclp->mcl_cwm++; 384 mclp->mcl_grown = ticks; 385 } else if (mclp->mcl_alive >= mclp->mcl_cwm) 386 return (1); /* No more packets given */ 387 388 return (0); 389 } 390 391 void 392 m_clcount(struct ifnet *ifp, int pi) 393 { 394 ifp->if_data.ifi_mclpool[pi].mcl_alive++; 395 } 396 397 void 398 m_cluncount(struct mbuf *m, int all) 399 { 400 struct mbuf_ext *me; 401 402 do { 403 me = &m->m_ext; 404 if (((m->m_flags & (M_EXT|M_CLUSTER)) != (M_EXT|M_CLUSTER)) || 405 (me->ext_ifp == NULL)) 406 continue; 407 408 me->ext_ifp->if_data.ifi_mclpool[me->ext_backend].mcl_alive--; 409 me->ext_ifp = NULL; 410 } while (all && (m = m->m_next)); 411 } 412 413 struct mbuf * 414 m_clget(struct mbuf *m, int how, struct ifnet *ifp, u_int pktlen) 415 { 416 struct mbuf *m0 = NULL; 417 int pi; 418 int s; 419 420 pi = m_clpool(pktlen); 421 #ifdef DIAGNOSTIC 422 if (pi == -1) 423 panic("m_clget: request for %u byte cluster", pktlen); 424 #endif 425 426 s = splnet(); 427 428 if (ifp != NULL && m_cldrop(ifp, pi)) { 429 splx(s); 430 return (NULL); 431 } 432 433 if (m == NULL) { 434 MGETHDR(m0, M_DONTWAIT, MT_DATA); 435 if (m0 == NULL) { 436 splx(s); 437 return (NULL); 438 } 439 m = m0; 440 } 441 m->m_ext.ext_buf = pool_get(&mclpools[pi], 442 how == M_WAIT ? PR_WAITOK : PR_NOWAIT); 443 if (!m->m_ext.ext_buf) { 444 if (m0) 445 m_freem(m0); 446 splx(s); 447 return (NULL); 448 } 449 if (ifp != NULL) 450 m_clcount(ifp, pi); 451 splx(s); 452 453 m->m_data = m->m_ext.ext_buf; 454 m->m_flags |= M_EXT|M_CLUSTER; 455 m->m_ext.ext_size = mclpools[pi].pr_size; 456 m->m_ext.ext_free = NULL; 457 m->m_ext.ext_arg = NULL; 458 m->m_ext.ext_backend = pi; 459 m->m_ext.ext_ifp = ifp; 460 MCLINITREFERENCE(m); 461 return (m); 462 } 463 464 struct mbuf * 465 m_free_unlocked(struct mbuf *m) 466 { 467 struct mbuf *n; 468 469 mbstat.m_mtypes[m->m_type]--; 470 if (m->m_flags & M_PKTHDR) 471 m_tag_delete_chain(m); 472 if (m->m_flags & M_EXT) 473 m_extfree(m); 474 n = m->m_next; 475 pool_put(&mbpool, m); 476 477 return (n); 478 } 479 480 struct mbuf * 481 m_free(struct mbuf *m) 482 { 483 struct mbuf *n; 484 int s; 485 486 s = splnet(); 487 n = m_free_unlocked(m); 488 splx(s); 489 490 return (n); 491 } 492 493 void 494 m_extfree(struct mbuf *m) 495 { 496 if (MCLISREFERENCED(m)) { 497 m->m_ext.ext_nextref->m_ext.ext_prevref = 498 m->m_ext.ext_prevref; 499 m->m_ext.ext_prevref->m_ext.ext_nextref = 500 m->m_ext.ext_nextref; 501 } else if (m->m_flags & M_CLUSTER) { 502 m_cluncount(m, 0); 503 pool_put(&mclpools[m->m_ext.ext_backend], 504 m->m_ext.ext_buf); 505 } else if (m->m_ext.ext_free) 506 (*(m->m_ext.ext_free))(m->m_ext.ext_buf, 507 m->m_ext.ext_size, m->m_ext.ext_arg); 508 else 509 panic("unknown type of extension buffer"); 510 m->m_ext.ext_size = 0; 511 m->m_flags &= ~(M_EXT|M_CLUSTER); 512 } 513 514 void 515 m_freem(struct mbuf *m) 516 { 517 struct mbuf *n; 518 int s; 519 520 if (m == NULL) 521 return; 522 s = splnet(); 523 do { 524 n = m_free_unlocked(m); 525 } while ((m = n) != NULL); 526 splx(s); 527 } 528 529 /* 530 * mbuf chain defragmenter. This function uses some evil tricks to defragment 531 * an mbuf chain into a single buffer without changing the mbuf pointer. 532 * This needs to know a lot of the mbuf internals to make this work. 533 */ 534 int 535 m_defrag(struct mbuf *m, int how) 536 { 537 struct mbuf *m0; 538 539 if (m->m_next == NULL) 540 return 0; 541 542 #ifdef DIAGNOSTIC 543 if (!(m->m_flags & M_PKTHDR)) 544 panic("m_defrag: no packet hdr or not a chain"); 545 #endif 546 547 if ((m0 = m_gethdr(how, m->m_type)) == NULL) 548 return -1; 549 if (m->m_pkthdr.len > MHLEN) { 550 MCLGETI(m0, how, NULL, m->m_pkthdr.len); 551 if (!(m0->m_flags & M_EXT)) { 552 m_free(m0); 553 return -1; 554 } 555 } 556 m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t)); 557 m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len; 558 559 /* free chain behind and possible ext buf on the first mbuf */ 560 m_freem(m->m_next); 561 m->m_next = NULL; 562 563 if (m->m_flags & M_EXT) { 564 int s = splnet(); 565 m_extfree(m); 566 splx(s); 567 } 568 569 /* 570 * Bounce copy mbuf over to the original mbuf and set everything up. 571 * This needs to reset or clear all pointers that may go into the 572 * original mbuf chain. 573 */ 574 if (m0->m_flags & M_EXT) { 575 bcopy(&m0->m_ext, &m->m_ext, sizeof(struct mbuf_ext)); 576 MCLINITREFERENCE(m); 577 m->m_flags |= M_EXT|M_CLUSTER; 578 m->m_data = m->m_ext.ext_buf; 579 } else { 580 m->m_data = m->m_pktdat; 581 bcopy(m0->m_data, m->m_data, m0->m_len); 582 } 583 m->m_pkthdr.len = m->m_len = m0->m_len; 584 m->m_pkthdr.pf.hdr = NULL; /* altq will cope */ 585 586 m0->m_flags &= ~(M_EXT|M_CLUSTER); /* cluster is gone */ 587 m_free(m0); 588 589 return 0; 590 } 591 592 /* 593 * Mbuffer utility routines. 594 */ 595 596 /* 597 * Ensure len bytes of contiguous space at the beginning of the mbuf chain 598 */ 599 struct mbuf * 600 m_prepend(struct mbuf *m, int len, int how) 601 { 602 struct mbuf *mn; 603 604 if (len > MHLEN) 605 panic("mbuf prepend length too big"); 606 607 if (M_LEADINGSPACE(m) >= len) { 608 m->m_data -= len; 609 m->m_len += len; 610 } else { 611 MGET(mn, how, m->m_type); 612 if (mn == NULL) { 613 m_freem(m); 614 return (NULL); 615 } 616 if (m->m_flags & M_PKTHDR) 617 M_MOVE_PKTHDR(mn, m); 618 mn->m_next = m; 619 m = mn; 620 MH_ALIGN(m, len); 621 m->m_len = len; 622 } 623 if (m->m_flags & M_PKTHDR) 624 m->m_pkthdr.len += len; 625 return (m); 626 } 627 628 /* 629 * Make a copy of an mbuf chain starting "off" bytes from the beginning, 630 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 631 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 632 */ 633 struct mbuf * 634 m_copym(struct mbuf *m, int off, int len, int wait) 635 { 636 return m_copym0(m, off, len, wait, 0); /* shallow copy on M_EXT */ 637 } 638 639 /* 640 * m_copym2() is like m_copym(), except it COPIES cluster mbufs, instead 641 * of merely bumping the reference count. 642 */ 643 struct mbuf * 644 m_copym2(struct mbuf *m, int off, int len, int wait) 645 { 646 return m_copym0(m, off, len, wait, 1); /* deep copy */ 647 } 648 649 struct mbuf * 650 m_copym0(struct mbuf *m0, int off, int len, int wait, int deep) 651 { 652 struct mbuf *m, *n, **np; 653 struct mbuf *top; 654 int copyhdr = 0; 655 656 if (off < 0 || len < 0) 657 panic("m_copym0: off %d, len %d", off, len); 658 if (off == 0 && m0->m_flags & M_PKTHDR) 659 copyhdr = 1; 660 if ((m = m_getptr(m0, off, &off)) == NULL) 661 panic("m_copym0: short mbuf chain"); 662 np = ⊤ 663 top = NULL; 664 while (len > 0) { 665 if (m == NULL) { 666 if (len != M_COPYALL) 667 panic("m_copym0: m == NULL and not COPYALL"); 668 break; 669 } 670 MGET(n, wait, m->m_type); 671 *np = n; 672 if (n == NULL) 673 goto nospace; 674 if (copyhdr) { 675 if (m_dup_pkthdr(n, m0, wait)) 676 goto nospace; 677 if (len != M_COPYALL) 678 n->m_pkthdr.len = len; 679 copyhdr = 0; 680 } 681 n->m_len = min(len, m->m_len - off); 682 if (m->m_flags & M_EXT) { 683 if (!deep) { 684 n->m_data = m->m_data + off; 685 n->m_ext = m->m_ext; 686 MCLADDREFERENCE(m, n); 687 } else { 688 /* 689 * we are unsure about the way m was allocated. 690 * copy into multiple MCLBYTES cluster mbufs. 691 */ 692 MCLGET(n, wait); 693 n->m_len = 0; 694 n->m_len = M_TRAILINGSPACE(n); 695 n->m_len = min(n->m_len, len); 696 n->m_len = min(n->m_len, m->m_len - off); 697 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, 698 n->m_len); 699 } 700 } else 701 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, 702 n->m_len); 703 if (len != M_COPYALL) 704 len -= n->m_len; 705 off += n->m_len; 706 #ifdef DIAGNOSTIC 707 if (off > m->m_len) 708 panic("m_copym0 overrun"); 709 #endif 710 if (off == m->m_len) { 711 m = m->m_next; 712 off = 0; 713 } 714 np = &n->m_next; 715 } 716 return (top); 717 nospace: 718 m_freem(top); 719 return (NULL); 720 } 721 722 /* 723 * Copy data from an mbuf chain starting "off" bytes from the beginning, 724 * continuing for "len" bytes, into the indicated buffer. 725 */ 726 void 727 m_copydata(struct mbuf *m, int off, int len, caddr_t cp) 728 { 729 unsigned count; 730 731 if (off < 0) 732 panic("m_copydata: off %d < 0", off); 733 if (len < 0) 734 panic("m_copydata: len %d < 0", len); 735 if ((m = m_getptr(m, off, &off)) == NULL) 736 panic("m_copydata: short mbuf chain"); 737 while (len > 0) { 738 if (m == NULL) 739 panic("m_copydata: null mbuf"); 740 count = min(m->m_len - off, len); 741 bcopy(mtod(m, caddr_t) + off, cp, count); 742 len -= count; 743 cp += count; 744 off = 0; 745 m = m->m_next; 746 } 747 } 748 749 /* 750 * Copy data from a buffer back into the indicated mbuf chain, 751 * starting "off" bytes from the beginning, extending the mbuf 752 * chain if necessary. The mbuf needs to be properly initialized 753 * including the setting of m_len. 754 */ 755 int 756 m_copyback(struct mbuf *m0, int off, int len, const void *_cp, int wait) 757 { 758 int mlen, totlen = 0; 759 struct mbuf *m = m0, *n; 760 caddr_t cp = (caddr_t)_cp; 761 int error = 0; 762 763 if (m0 == NULL) 764 return (0); 765 while (off > (mlen = m->m_len)) { 766 off -= mlen; 767 totlen += mlen; 768 if (m->m_next == NULL) { 769 if ((n = m_get(wait, m->m_type)) == NULL) { 770 error = ENOBUFS; 771 goto out; 772 } 773 774 if (off + len > MLEN) { 775 MCLGETI(n, wait, NULL, off + len); 776 if (!(n->m_flags & M_EXT)) { 777 m_free(n); 778 error = ENOBUFS; 779 goto out; 780 } 781 } 782 bzero(mtod(n, caddr_t), off); 783 n->m_len = len + off; 784 m->m_next = n; 785 } 786 m = m->m_next; 787 } 788 while (len > 0) { 789 /* extend last packet to be filled fully */ 790 if (m->m_next == NULL && (len > m->m_len - off)) 791 m->m_len += min(len - (m->m_len - off), 792 M_TRAILINGSPACE(m)); 793 mlen = min(m->m_len - off, len); 794 bcopy(cp, mtod(m, caddr_t) + off, (size_t)mlen); 795 cp += mlen; 796 len -= mlen; 797 totlen += mlen + off; 798 if (len == 0) 799 break; 800 off = 0; 801 802 if (m->m_next == NULL) { 803 if ((n = m_get(wait, m->m_type)) == NULL) { 804 error = ENOBUFS; 805 goto out; 806 } 807 808 if (len > MLEN) { 809 MCLGETI(n, wait, NULL, len); 810 if (!(n->m_flags & M_EXT)) { 811 m_free(n); 812 error = ENOBUFS; 813 goto out; 814 } 815 } 816 n->m_len = len; 817 m->m_next = n; 818 } 819 m = m->m_next; 820 } 821 out: 822 if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 823 m->m_pkthdr.len = totlen; 824 825 return (error); 826 } 827 828 /* 829 * Concatenate mbuf chain n to m. 830 * n might be copied into m (when n->m_len is small), therefore data portion of 831 * n could be copied into an mbuf of different mbuf type. 832 * Therefore both chains should be of the same type (e.g. MT_DATA). 833 * Any m_pkthdr is not updated. 834 */ 835 void 836 m_cat(struct mbuf *m, struct mbuf *n) 837 { 838 while (m->m_next) 839 m = m->m_next; 840 while (n) { 841 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 842 /* just join the two chains */ 843 m->m_next = n; 844 return; 845 } 846 /* splat the data from one into the other */ 847 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 848 (u_int)n->m_len); 849 m->m_len += n->m_len; 850 n = m_free(n); 851 } 852 } 853 854 void 855 m_adj(struct mbuf *mp, int req_len) 856 { 857 int len = req_len; 858 struct mbuf *m; 859 int count; 860 861 if ((m = mp) == NULL) 862 return; 863 if (len >= 0) { 864 /* 865 * Trim from head. 866 */ 867 while (m != NULL && len > 0) { 868 if (m->m_len <= len) { 869 len -= m->m_len; 870 m->m_len = 0; 871 m = m->m_next; 872 } else { 873 m->m_len -= len; 874 m->m_data += len; 875 len = 0; 876 } 877 } 878 if (mp->m_flags & M_PKTHDR) 879 mp->m_pkthdr.len -= (req_len - len); 880 } else { 881 /* 882 * Trim from tail. Scan the mbuf chain, 883 * calculating its length and finding the last mbuf. 884 * If the adjustment only affects this mbuf, then just 885 * adjust and return. Otherwise, rescan and truncate 886 * after the remaining size. 887 */ 888 len = -len; 889 count = 0; 890 for (;;) { 891 count += m->m_len; 892 if (m->m_next == NULL) 893 break; 894 m = m->m_next; 895 } 896 if (m->m_len >= len) { 897 m->m_len -= len; 898 if (mp->m_flags & M_PKTHDR) 899 mp->m_pkthdr.len -= len; 900 return; 901 } 902 count -= len; 903 if (count < 0) 904 count = 0; 905 /* 906 * Correct length for chain is "count". 907 * Find the mbuf with last data, adjust its length, 908 * and toss data from remaining mbufs on chain. 909 */ 910 m = mp; 911 if (m->m_flags & M_PKTHDR) 912 m->m_pkthdr.len = count; 913 for (; m; m = m->m_next) { 914 if (m->m_len >= count) { 915 m->m_len = count; 916 break; 917 } 918 count -= m->m_len; 919 } 920 while ((m = m->m_next) != NULL) 921 m->m_len = 0; 922 } 923 } 924 925 /* 926 * Rearrange an mbuf chain so that len bytes are contiguous 927 * and in the data area of an mbuf (so that mtod will work 928 * for a structure of size len). Returns the resulting 929 * mbuf chain on success, frees it and returns null on failure. 930 */ 931 struct mbuf * 932 m_pullup(struct mbuf *n, int len) 933 { 934 struct mbuf *m; 935 int count; 936 937 /* 938 * If first mbuf has no cluster, and has room for len bytes 939 * without shifting current data, pullup into it, 940 * otherwise allocate a new mbuf to prepend to the chain. 941 */ 942 if ((n->m_flags & M_EXT) == 0 && n->m_next && 943 n->m_data + len < &n->m_dat[MLEN]) { 944 if (n->m_len >= len) 945 return (n); 946 m = n; 947 n = n->m_next; 948 len -= m->m_len; 949 } else if ((n->m_flags & M_EXT) != 0 && len > MHLEN && n->m_next && 950 n->m_data + len < &n->m_ext.ext_buf[n->m_ext.ext_size]) { 951 if (n->m_len >= len) 952 return (n); 953 m = n; 954 n = n->m_next; 955 len -= m->m_len; 956 } else { 957 if (len > MAXMCLBYTES) 958 goto bad; 959 MGET(m, M_DONTWAIT, n->m_type); 960 if (m == NULL) 961 goto bad; 962 if (len > MHLEN) { 963 MCLGETI(m, M_DONTWAIT, NULL, len); 964 if ((m->m_flags & M_EXT) == 0) { 965 m_free(m); 966 goto bad; 967 } 968 } 969 m->m_len = 0; 970 if (n->m_flags & M_PKTHDR) 971 M_MOVE_PKTHDR(m, n); 972 } 973 974 do { 975 count = min(len, n->m_len); 976 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 977 (unsigned)count); 978 len -= count; 979 m->m_len += count; 980 n->m_len -= count; 981 if (n->m_len) 982 n->m_data += count; 983 else 984 n = m_free(n); 985 } while (len > 0 && n); 986 if (len > 0) { 987 (void)m_free(m); 988 goto bad; 989 } 990 m->m_next = n; 991 992 return (m); 993 bad: 994 m_freem(n); 995 return (NULL); 996 } 997 998 /* 999 * Return a pointer to mbuf/offset of location in mbuf chain. 1000 */ 1001 struct mbuf * 1002 m_getptr(struct mbuf *m, int loc, int *off) 1003 { 1004 while (loc >= 0) { 1005 /* Normal end of search */ 1006 if (m->m_len > loc) { 1007 *off = loc; 1008 return (m); 1009 } else { 1010 loc -= m->m_len; 1011 1012 if (m->m_next == NULL) { 1013 if (loc == 0) { 1014 /* Point at the end of valid data */ 1015 *off = m->m_len; 1016 return (m); 1017 } else { 1018 return (NULL); 1019 } 1020 } else { 1021 m = m->m_next; 1022 } 1023 } 1024 } 1025 1026 return (NULL); 1027 } 1028 1029 /* 1030 * Inject a new mbuf chain of length siz in mbuf chain m0 at 1031 * position len0. Returns a pointer to the first injected mbuf, or 1032 * NULL on failure (m0 is left undisturbed). Note that if there is 1033 * enough space for an object of size siz in the appropriate position, 1034 * no memory will be allocated. Also, there will be no data movement in 1035 * the first len0 bytes (pointers to that will remain valid). 1036 * 1037 * XXX It is assumed that siz is less than the size of an mbuf at the moment. 1038 */ 1039 struct mbuf * 1040 m_inject(struct mbuf *m0, int len0, int siz, int wait) 1041 { 1042 struct mbuf *m, *n, *n2 = NULL, *n3; 1043 unsigned len = len0, remain; 1044 1045 if ((siz >= MHLEN) || (len0 <= 0)) 1046 return (NULL); 1047 for (m = m0; m && len > m->m_len; m = m->m_next) 1048 len -= m->m_len; 1049 if (m == NULL) 1050 return (NULL); 1051 remain = m->m_len - len; 1052 if (remain == 0) { 1053 if ((m->m_next) && (M_LEADINGSPACE(m->m_next) >= siz)) { 1054 m->m_next->m_len += siz; 1055 if (m0->m_flags & M_PKTHDR) 1056 m0->m_pkthdr.len += siz; 1057 m->m_next->m_data -= siz; 1058 return m->m_next; 1059 } 1060 } else { 1061 n2 = m_copym2(m, len, remain, wait); 1062 if (n2 == NULL) 1063 return (NULL); 1064 } 1065 1066 MGET(n, wait, MT_DATA); 1067 if (n == NULL) { 1068 if (n2) 1069 m_freem(n2); 1070 return (NULL); 1071 } 1072 1073 n->m_len = siz; 1074 if (m0->m_flags & M_PKTHDR) 1075 m0->m_pkthdr.len += siz; 1076 m->m_len -= remain; /* Trim */ 1077 if (n2) { 1078 for (n3 = n; n3->m_next != NULL; n3 = n3->m_next) 1079 ; 1080 n3->m_next = n2; 1081 } else 1082 n3 = n; 1083 for (; n3->m_next != NULL; n3 = n3->m_next) 1084 ; 1085 n3->m_next = m->m_next; 1086 m->m_next = n; 1087 return n; 1088 } 1089 1090 /* 1091 * Partition an mbuf chain in two pieces, returning the tail -- 1092 * all but the first len0 bytes. In case of failure, it returns NULL and 1093 * attempts to restore the chain to its original state. 1094 */ 1095 struct mbuf * 1096 m_split(struct mbuf *m0, int len0, int wait) 1097 { 1098 struct mbuf *m, *n; 1099 unsigned len = len0, remain, olen; 1100 1101 for (m = m0; m && len > m->m_len; m = m->m_next) 1102 len -= m->m_len; 1103 if (m == NULL) 1104 return (NULL); 1105 remain = m->m_len - len; 1106 if (m0->m_flags & M_PKTHDR) { 1107 MGETHDR(n, wait, m0->m_type); 1108 if (n == NULL) 1109 return (NULL); 1110 if (m_dup_pkthdr(n, m0, wait)) { 1111 m_freem(n); 1112 return (NULL); 1113 } 1114 n->m_pkthdr.len -= len0; 1115 olen = m0->m_pkthdr.len; 1116 m0->m_pkthdr.len = len0; 1117 if (m->m_flags & M_EXT) 1118 goto extpacket; 1119 if (remain > MHLEN) { 1120 /* m can't be the lead packet */ 1121 MH_ALIGN(n, 0); 1122 n->m_next = m_split(m, len, wait); 1123 if (n->m_next == NULL) { 1124 (void) m_free(n); 1125 m0->m_pkthdr.len = olen; 1126 return (NULL); 1127 } else 1128 return (n); 1129 } else 1130 MH_ALIGN(n, remain); 1131 } else if (remain == 0) { 1132 n = m->m_next; 1133 m->m_next = NULL; 1134 return (n); 1135 } else { 1136 MGET(n, wait, m->m_type); 1137 if (n == NULL) 1138 return (NULL); 1139 M_ALIGN(n, remain); 1140 } 1141 extpacket: 1142 if (m->m_flags & M_EXT) { 1143 n->m_ext = m->m_ext; 1144 MCLADDREFERENCE(m, n); 1145 n->m_data = m->m_data + len; 1146 } else { 1147 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1148 } 1149 n->m_len = remain; 1150 m->m_len = len; 1151 n->m_next = m->m_next; 1152 m->m_next = NULL; 1153 return (n); 1154 } 1155 1156 /* 1157 * Routine to copy from device local memory into mbufs. 1158 */ 1159 struct mbuf * 1160 m_devget(char *buf, int totlen, int off, struct ifnet *ifp, 1161 void (*copy)(const void *, void *, size_t)) 1162 { 1163 struct mbuf *m; 1164 struct mbuf *top, **mp; 1165 int len; 1166 1167 top = NULL; 1168 mp = ⊤ 1169 1170 if (off < 0 || off > MHLEN) 1171 return (NULL); 1172 1173 MGETHDR(m, M_DONTWAIT, MT_DATA); 1174 if (m == NULL) 1175 return (NULL); 1176 1177 m->m_pkthdr.rcvif = ifp; 1178 m->m_pkthdr.len = totlen; 1179 1180 len = MHLEN; 1181 1182 while (totlen > 0) { 1183 if (top != NULL) { 1184 MGET(m, M_DONTWAIT, MT_DATA); 1185 if (m == NULL) { 1186 m_freem(top); 1187 return (NULL); 1188 } 1189 len = MLEN; 1190 } 1191 1192 if (totlen + off >= MINCLSIZE) { 1193 MCLGET(m, M_DONTWAIT); 1194 if (m->m_flags & M_EXT) 1195 len = MCLBYTES; 1196 } else { 1197 /* Place initial small packet/header at end of mbuf. */ 1198 if (top == NULL && totlen + off + max_linkhdr <= len) { 1199 m->m_data += max_linkhdr; 1200 len -= max_linkhdr; 1201 } 1202 } 1203 1204 if (off) { 1205 m->m_data += off; 1206 len -= off; 1207 off = 0; 1208 } 1209 1210 m->m_len = len = min(totlen, len); 1211 1212 if (copy) 1213 copy(buf, mtod(m, caddr_t), (size_t)len); 1214 else 1215 bcopy(buf, mtod(m, caddr_t), (size_t)len); 1216 1217 buf += len; 1218 *mp = m; 1219 mp = &m->m_next; 1220 totlen -= len; 1221 } 1222 return (top); 1223 } 1224 1225 void 1226 m_zero(struct mbuf *m) 1227 { 1228 while (m) { 1229 #ifdef DIAGNOSTIC 1230 if (M_READONLY(m)) 1231 panic("m_zero: M_READONLY"); 1232 #endif /* DIAGNOSTIC */ 1233 if (m->m_flags & M_EXT) 1234 memset(m->m_ext.ext_buf, 0, m->m_ext.ext_size); 1235 else { 1236 if (m->m_flags & M_PKTHDR) 1237 memset(m->m_pktdat, 0, MHLEN); 1238 else 1239 memset(m->m_dat, 0, MLEN); 1240 } 1241 m = m->m_next; 1242 } 1243 } 1244 1245 /* 1246 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1247 * beginning, continuing for "len" bytes. 1248 */ 1249 int 1250 m_apply(struct mbuf *m, int off, int len, 1251 int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate) 1252 { 1253 int rval; 1254 unsigned int count; 1255 1256 if (len < 0) 1257 panic("m_apply: len %d < 0", len); 1258 if (off < 0) 1259 panic("m_apply: off %d < 0", off); 1260 while (off > 0) { 1261 if (m == NULL) 1262 panic("m_apply: null mbuf in skip"); 1263 if (off < m->m_len) 1264 break; 1265 off -= m->m_len; 1266 m = m->m_next; 1267 } 1268 while (len > 0) { 1269 if (m == NULL) 1270 panic("m_apply: null mbuf"); 1271 count = min(m->m_len - off, len); 1272 1273 rval = f(fstate, mtod(m, caddr_t) + off, count); 1274 if (rval) 1275 return (rval); 1276 1277 len -= count; 1278 off = 0; 1279 m = m->m_next; 1280 } 1281 1282 return (0); 1283 } 1284 1285 int 1286 m_leadingspace(struct mbuf *m) 1287 { 1288 if (M_READONLY(m)) 1289 return 0; 1290 return (m->m_flags & M_EXT ? m->m_data - m->m_ext.ext_buf : 1291 m->m_flags & M_PKTHDR ? m->m_data - m->m_pktdat : 1292 m->m_data - m->m_dat); 1293 } 1294 1295 int 1296 m_trailingspace(struct mbuf *m) 1297 { 1298 if (M_READONLY(m)) 1299 return 0; 1300 return (m->m_flags & M_EXT ? m->m_ext.ext_buf + 1301 m->m_ext.ext_size - (m->m_data + m->m_len) : 1302 &m->m_dat[MLEN] - (m->m_data + m->m_len)); 1303 } 1304 1305 1306 /* 1307 * Duplicate mbuf pkthdr from from to to. 1308 * from must have M_PKTHDR set, and to must be empty. 1309 */ 1310 int 1311 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int wait) 1312 { 1313 int error; 1314 1315 KASSERT(from->m_flags & M_PKTHDR); 1316 1317 to->m_flags = (to->m_flags & (M_EXT | M_CLUSTER)); 1318 to->m_flags |= (from->m_flags & M_COPYFLAGS); 1319 to->m_pkthdr = from->m_pkthdr; 1320 1321 SLIST_INIT(&to->m_pkthdr.tags); 1322 1323 if ((error = m_tag_copy_chain(to, from, wait)) != 0) 1324 return (error); 1325 1326 if ((to->m_flags & M_EXT) == 0) 1327 to->m_data = to->m_pktdat; 1328 1329 return (0); 1330 } 1331 1332 #ifdef DDB 1333 void 1334 m_print(void *v, int (*pr)(const char *, ...)) 1335 { 1336 struct mbuf *m = v; 1337 1338 (*pr)("mbuf %p\n", m); 1339 (*pr)("m_type: %hi\tm_flags: %b\n", m->m_type, m->m_flags, 1340 "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_CLUSTER\5M_PROTO1\6M_VLANTAG" 1341 "\7M_LOOP\10M_FILDROP\11M_BCAST\12M_MCAST\13M_CONF\14M_AUTH" 1342 "\15M_TUNNEL\16M_AUTH_AH\17M_LINK0"); 1343 (*pr)("m_next: %p\tm_nextpkt: %p\n", m->m_next, m->m_nextpkt); 1344 (*pr)("m_data: %p\tm_len: %u\n", m->m_data, m->m_len); 1345 (*pr)("m_dat: %p m_pktdat: %p\n", m->m_dat, m->m_pktdat); 1346 if (m->m_flags & M_PKTHDR) { 1347 (*pr)("m_pkthdr.len: %i\tm_ptkhdr.rcvif: %p\t" 1348 "m_ptkhdr.rdomain: %u\n", m->m_pkthdr.len, 1349 m->m_pkthdr.rcvif, m->m_pkthdr.rdomain); 1350 (*pr)("m_ptkhdr.tags: %p\tm_pkthdr.tagsset: %hx\n", 1351 SLIST_FIRST(&m->m_pkthdr.tags), m->m_pkthdr.tagsset); 1352 (*pr)("m_pkthdr.csum_flags: %hx\tm_pkthdr.ether_vtag: %hu\n", 1353 m->m_pkthdr.csum_flags, m->m_pkthdr.ether_vtag); 1354 (*pr)("m_pkthdr.pf.flags: %b\n", 1355 m->m_pkthdr.pf.flags, "\20\1GENERATED\2FRAGCACHE" 1356 "\3TRANSLATE_LOCALHOST\4DIVERTED\5DIVERTED_PACKET" 1357 "\6PF_TAG_REROUTE"); 1358 (*pr)("m_pkthdr.pf.hdr: %p\tm_pkthdr.pf.statekey: %p\n", 1359 m->m_pkthdr.pf.hdr, m->m_pkthdr.pf.statekey); 1360 (*pr)("m_pkthdr.pf.qid:\t%u m_pkthdr.pf.tag: %hu\n", 1361 m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag); 1362 (*pr)("m_pkthdr.pf.prio:\t%u m_pkthdr.pf.tag: %hu\n", 1363 m->m_pkthdr.pf.prio, m->m_pkthdr.pf.tag); 1364 (*pr)("m_pkthdr.pf.routed: %hx\n", m->m_pkthdr.pf.routed); 1365 } 1366 if (m->m_flags & M_EXT) { 1367 (*pr)("m_ext.ext_buf: %p\tm_ext.ext_size: %u\n", 1368 m->m_ext.ext_buf, m->m_ext.ext_size); 1369 (*pr)("m_ext.ext_type: %x\tm_ext.ext_backend: %i\n", 1370 m->m_ext.ext_type, m->m_ext.ext_backend); 1371 (*pr)("m_ext.ext_ifp: %p\n", m->m_ext.ext_ifp); 1372 (*pr)("m_ext.ext_free: %p\tm_ext.ext_arg: %p\n", 1373 m->m_ext.ext_free, m->m_ext.ext_arg); 1374 (*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n", 1375 m->m_ext.ext_nextref, m->m_ext.ext_prevref); 1376 } 1377 } 1378 #endif 1379