1 /* $OpenBSD: uipc_mbuf.c,v 1.160 2011/07/08 18:48:50 henning Exp $ */ 2 /* $NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 33 */ 34 35 /* 36 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 37 * 38 * NRL grants permission for redistribution and use in source and binary 39 * forms, with or without modification, of the software and documentation 40 * created at NRL provided that the following conditions are met: 41 * 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgements: 49 * This product includes software developed by the University of 50 * California, Berkeley and its contributors. 51 * This product includes software developed at the Information 52 * Technology Division, US Naval Research Laboratory. 53 * 4. Neither the name of the NRL nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 58 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 59 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 60 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 61 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 62 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 63 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 64 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 65 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 66 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 67 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 68 * 69 * The views and conclusions contained in the software and documentation 70 * are those of the authors and should not be interpreted as representing 71 * official policies, either expressed or implied, of the US Naval 72 * Research Laboratory (NRL). 73 */ 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/proc.h> 78 #include <sys/malloc.h> 79 #include <sys/mbuf.h> 80 #include <sys/kernel.h> 81 #include <sys/syslog.h> 82 #include <sys/domain.h> 83 #include <sys/protosw.h> 84 #include <sys/pool.h> 85 86 #include <sys/socket.h> 87 #include <sys/socketvar.h> 88 #include <net/if.h> 89 90 #include <machine/cpu.h> 91 92 #include <uvm/uvm.h> 93 #include <uvm/uvm_extern.h> 94 95 #ifdef DDB 96 #include <machine/db_machdep.h> 97 #include <ddb/db_interface.h> 98 #endif 99 100 struct mbstat mbstat; /* mbuf stats */ 101 struct pool mbpool; /* mbuf pool */ 102 103 /* mbuf cluster pools */ 104 u_int mclsizes[] = { 105 MCLBYTES, /* must be at slot 0 */ 106 4 * 1024, 107 8 * 1024, 108 9 * 1024, 109 12 * 1024, 110 16 * 1024, 111 64 * 1024 112 }; 113 static char mclnames[MCLPOOLS][8]; 114 struct pool mclpools[MCLPOOLS]; 115 116 int m_clpool(u_int); 117 118 int max_linkhdr; /* largest link-level header */ 119 int max_protohdr; /* largest protocol header */ 120 int max_hdr; /* largest link+protocol header */ 121 int max_datalen; /* MHLEN - max_hdr */ 122 123 struct timeout m_cltick_tmo; 124 int m_clticks; 125 void m_cltick(void *); 126 127 void m_extfree(struct mbuf *); 128 struct mbuf *m_copym0(struct mbuf *, int, int, int, int); 129 void nmbclust_update(void); 130 131 132 const char *mclpool_warnmsg = 133 "WARNING: mclpools limit reached; increase kern.maxclusters"; 134 135 /* 136 * Initialize the mbuf allocator. 137 */ 138 void 139 mbinit(void) 140 { 141 int i; 142 143 pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", NULL); 144 pool_set_constraints(&mbpool, &kp_dma_contig); 145 pool_setlowat(&mbpool, mblowat); 146 147 for (i = 0; i < nitems(mclsizes); i++) { 148 snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk", 149 mclsizes[i] >> 10); 150 pool_init(&mclpools[i], mclsizes[i], 0, 0, 0, 151 mclnames[i], NULL); 152 pool_set_constraints(&mclpools[i], &kp_dma_contig); 153 pool_setlowat(&mclpools[i], mcllowat); 154 } 155 156 nmbclust_update(); 157 158 timeout_set(&m_cltick_tmo, m_cltick, NULL); 159 m_cltick(NULL); 160 } 161 162 void 163 nmbclust_update(void) 164 { 165 int i; 166 /* 167 * Set the hard limit on the mclpools to the number of 168 * mbuf clusters the kernel is to support. Log the limit 169 * reached message max once a minute. 170 */ 171 for (i = 0; i < nitems(mclsizes); i++) { 172 (void)pool_sethardlimit(&mclpools[i], nmbclust, 173 mclpool_warnmsg, 60); 174 /* 175 * XXX this needs to be reconsidered. 176 * Setting the high water mark to nmbclust is too high 177 * but we need to have enough spare buffers around so that 178 * allocations in interrupt context don't fail or mclgeti() 179 * drivers may end up with empty rings. 180 */ 181 pool_sethiwat(&mclpools[i], nmbclust); 182 } 183 pool_sethiwat(&mbpool, nmbclust); 184 } 185 186 void 187 m_reclaim(void *arg, int flags) 188 { 189 struct domain *dp; 190 struct protosw *pr; 191 int s = splnet(); 192 193 for (dp = domains; dp; dp = dp->dom_next) 194 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 195 if (pr->pr_drain) 196 (*pr->pr_drain)(); 197 mbstat.m_drain++; 198 splx(s); 199 } 200 201 /* 202 * Space allocation routines. 203 */ 204 struct mbuf * 205 m_get(int nowait, int type) 206 { 207 struct mbuf *m; 208 int s; 209 210 s = splnet(); 211 m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT); 212 if (m) 213 mbstat.m_mtypes[type]++; 214 splx(s); 215 if (m) { 216 m->m_type = type; 217 m->m_next = (struct mbuf *)NULL; 218 m->m_nextpkt = (struct mbuf *)NULL; 219 m->m_data = m->m_dat; 220 m->m_flags = 0; 221 } 222 return (m); 223 } 224 225 /* 226 * ATTN: When changing anything here check m_inithdr() and m_defrag() those 227 * may need to change as well. 228 */ 229 struct mbuf * 230 m_gethdr(int nowait, int type) 231 { 232 struct mbuf *m; 233 int s; 234 235 s = splnet(); 236 m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT); 237 if (m) 238 mbstat.m_mtypes[type]++; 239 splx(s); 240 if (m) { 241 m->m_type = type; 242 243 /* keep in sync with m_inithdr */ 244 m->m_next = (struct mbuf *)NULL; 245 m->m_nextpkt = (struct mbuf *)NULL; 246 m->m_data = m->m_pktdat; 247 m->m_flags = M_PKTHDR; 248 bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); 249 m->m_pkthdr.pf.prio = IFQ_DEFPRIO; 250 } 251 return (m); 252 } 253 254 struct mbuf * 255 m_inithdr(struct mbuf *m) 256 { 257 /* keep in sync with m_gethdr */ 258 m->m_next = (struct mbuf *)NULL; 259 m->m_nextpkt = (struct mbuf *)NULL; 260 m->m_data = m->m_pktdat; 261 m->m_flags = M_PKTHDR; 262 bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); 263 m->m_pkthdr.pf.prio = IFQ_DEFPRIO; 264 265 return (m); 266 } 267 268 struct mbuf * 269 m_getclr(int nowait, int type) 270 { 271 struct mbuf *m; 272 273 MGET(m, nowait, type); 274 if (m == NULL) 275 return (NULL); 276 memset(mtod(m, caddr_t), 0, MLEN); 277 return (m); 278 } 279 280 int 281 m_clpool(u_int pktlen) 282 { 283 int pi; 284 285 for (pi = 0; pi < MCLPOOLS; pi++) { 286 if (pktlen <= mclsizes[pi]) 287 return (pi); 288 } 289 290 return (-1); 291 } 292 293 void 294 m_clinitifp(struct ifnet *ifp) 295 { 296 struct mclpool *mclp = ifp->if_data.ifi_mclpool; 297 int i; 298 299 /* Initialize high water marks for use of cluster pools */ 300 for (i = 0; i < MCLPOOLS; i++) { 301 mclp = &ifp->if_data.ifi_mclpool[i]; 302 303 if (mclp->mcl_lwm == 0) 304 mclp->mcl_lwm = 2; 305 if (mclp->mcl_hwm == 0) 306 mclp->mcl_hwm = 32768; 307 308 mclp->mcl_cwm = MAX(4, mclp->mcl_lwm); 309 } 310 } 311 312 void 313 m_clsetwms(struct ifnet *ifp, u_int pktlen, u_int lwm, u_int hwm) 314 { 315 int pi; 316 317 pi = m_clpool(pktlen); 318 if (pi == -1) 319 return; 320 321 ifp->if_data.ifi_mclpool[pi].mcl_lwm = lwm; 322 ifp->if_data.ifi_mclpool[pi].mcl_hwm = hwm; 323 } 324 325 /* 326 * Record when the last timeout has been run. If the delta is 327 * too high, m_cldrop() will notice and decrease the interface 328 * high water marks. 329 */ 330 void 331 m_cltick(void *arg) 332 { 333 extern int ticks; 334 335 m_clticks = ticks; 336 timeout_add(&m_cltick_tmo, 1); 337 } 338 339 int m_livelock; 340 u_int mcllivelocks; 341 342 int 343 m_cldrop(struct ifnet *ifp, int pi) 344 { 345 static int liveticks; 346 struct mclpool *mclp; 347 extern int ticks; 348 int i; 349 350 if (ticks - m_clticks > 1) { 351 struct ifnet *aifp; 352 353 /* 354 * Timeout did not run, so we are in some kind of livelock. 355 * Decrease the cluster allocation high water marks on all 356 * interfaces and prevent them from growth for the very near 357 * future. 358 */ 359 m_livelock = 1; 360 mcllivelocks++; 361 m_clticks = liveticks = ticks; 362 TAILQ_FOREACH(aifp, &ifnet, if_list) { 363 mclp = aifp->if_data.ifi_mclpool; 364 for (i = 0; i < MCLPOOLS; i++) { 365 int diff = max(mclp[i].mcl_cwm / 8, 2); 366 mclp[i].mcl_cwm = max(mclp[i].mcl_lwm, 367 mclp[i].mcl_cwm - diff); 368 } 369 } 370 } else if (m_livelock && (ticks - liveticks) > 4) 371 m_livelock = 0; /* Let the high water marks grow again */ 372 373 mclp = &ifp->if_data.ifi_mclpool[pi]; 374 if (m_livelock == 0 && ISSET(ifp->if_flags, IFF_RUNNING) && 375 mclp->mcl_alive <= 4 && mclp->mcl_cwm < mclp->mcl_hwm && 376 mclp->mcl_grown < ticks) { 377 /* About to run out, so increase the current watermark */ 378 mclp->mcl_cwm++; 379 mclp->mcl_grown = ticks; 380 } else if (mclp->mcl_alive >= mclp->mcl_cwm) 381 return (1); /* No more packets given */ 382 383 return (0); 384 } 385 386 void 387 m_clcount(struct ifnet *ifp, int pi) 388 { 389 ifp->if_data.ifi_mclpool[pi].mcl_alive++; 390 } 391 392 void 393 m_cluncount(struct mbuf *m, int all) 394 { 395 struct mbuf_ext *me; 396 397 do { 398 me = &m->m_ext; 399 if (((m->m_flags & (M_EXT|M_CLUSTER)) != (M_EXT|M_CLUSTER)) || 400 (me->ext_ifp == NULL)) 401 continue; 402 403 me->ext_ifp->if_data.ifi_mclpool[me->ext_backend].mcl_alive--; 404 me->ext_ifp = NULL; 405 } while (all && (m = m->m_next)); 406 } 407 408 struct mbuf * 409 m_clget(struct mbuf *m, int how, struct ifnet *ifp, u_int pktlen) 410 { 411 struct mbuf *m0 = NULL; 412 int pi; 413 int s; 414 415 pi = m_clpool(pktlen); 416 #ifdef DIAGNOSTIC 417 if (pi == -1) 418 panic("m_clget: request for %u byte cluster", pktlen); 419 #endif 420 421 s = splnet(); 422 423 if (ifp != NULL && m_cldrop(ifp, pi)) { 424 splx(s); 425 return (NULL); 426 } 427 428 if (m == NULL) { 429 MGETHDR(m0, M_DONTWAIT, MT_DATA); 430 if (m0 == NULL) { 431 splx(s); 432 return (NULL); 433 } 434 m = m0; 435 } 436 m->m_ext.ext_buf = pool_get(&mclpools[pi], 437 how == M_WAIT ? PR_WAITOK : PR_NOWAIT); 438 if (!m->m_ext.ext_buf) { 439 if (m0) 440 m_freem(m0); 441 splx(s); 442 return (NULL); 443 } 444 if (ifp != NULL) 445 m_clcount(ifp, pi); 446 splx(s); 447 448 m->m_data = m->m_ext.ext_buf; 449 m->m_flags |= M_EXT|M_CLUSTER; 450 m->m_ext.ext_size = mclpools[pi].pr_size; 451 m->m_ext.ext_free = NULL; 452 m->m_ext.ext_arg = NULL; 453 m->m_ext.ext_backend = pi; 454 m->m_ext.ext_ifp = ifp; 455 MCLINITREFERENCE(m); 456 return (m); 457 } 458 459 struct mbuf * 460 m_free_unlocked(struct mbuf *m) 461 { 462 struct mbuf *n; 463 464 mbstat.m_mtypes[m->m_type]--; 465 if (m->m_flags & M_PKTHDR) 466 m_tag_delete_chain(m); 467 if (m->m_flags & M_EXT) 468 m_extfree(m); 469 n = m->m_next; 470 pool_put(&mbpool, m); 471 472 return (n); 473 } 474 475 struct mbuf * 476 m_free(struct mbuf *m) 477 { 478 struct mbuf *n; 479 int s; 480 481 s = splnet(); 482 n = m_free_unlocked(m); 483 splx(s); 484 485 return (n); 486 } 487 488 void 489 m_extfree(struct mbuf *m) 490 { 491 if (MCLISREFERENCED(m)) { 492 m->m_ext.ext_nextref->m_ext.ext_prevref = 493 m->m_ext.ext_prevref; 494 m->m_ext.ext_prevref->m_ext.ext_nextref = 495 m->m_ext.ext_nextref; 496 } else if (m->m_flags & M_CLUSTER) { 497 m_cluncount(m, 0); 498 pool_put(&mclpools[m->m_ext.ext_backend], 499 m->m_ext.ext_buf); 500 } else if (m->m_ext.ext_free) 501 (*(m->m_ext.ext_free))(m->m_ext.ext_buf, 502 m->m_ext.ext_size, m->m_ext.ext_arg); 503 else 504 panic("unknown type of extension buffer"); 505 m->m_ext.ext_size = 0; 506 m->m_flags &= ~(M_EXT|M_CLUSTER); 507 } 508 509 void 510 m_freem(struct mbuf *m) 511 { 512 struct mbuf *n; 513 int s; 514 515 if (m == NULL) 516 return; 517 s = splnet(); 518 do { 519 n = m_free_unlocked(m); 520 } while ((m = n) != NULL); 521 splx(s); 522 } 523 524 /* 525 * mbuf chain defragmenter. This function uses some evil tricks to defragment 526 * an mbuf chain into a single buffer without changing the mbuf pointer. 527 * This needs to know a lot of the mbuf internals to make this work. 528 */ 529 int 530 m_defrag(struct mbuf *m, int how) 531 { 532 struct mbuf *m0; 533 534 if (m->m_next == NULL) 535 return 0; 536 537 #ifdef DIAGNOSTIC 538 if (!(m->m_flags & M_PKTHDR)) 539 panic("m_defrag: no packet hdr or not a chain"); 540 #endif 541 542 if ((m0 = m_gethdr(how, m->m_type)) == NULL) 543 return -1; 544 if (m->m_pkthdr.len > MHLEN) { 545 MCLGETI(m0, how, NULL, m->m_pkthdr.len); 546 if (!(m0->m_flags & M_EXT)) { 547 m_free(m0); 548 return -1; 549 } 550 } 551 m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t)); 552 m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len; 553 554 /* free chain behind and possible ext buf on the first mbuf */ 555 m_freem(m->m_next); 556 m->m_next = NULL; 557 558 if (m->m_flags & M_EXT) { 559 int s = splnet(); 560 m_extfree(m); 561 splx(s); 562 } 563 564 /* 565 * Bounce copy mbuf over to the original mbuf and set everything up. 566 * This needs to reset or clear all pointers that may go into the 567 * original mbuf chain. 568 */ 569 if (m0->m_flags & M_EXT) { 570 bcopy(&m0->m_ext, &m->m_ext, sizeof(struct mbuf_ext)); 571 MCLINITREFERENCE(m); 572 m->m_flags |= M_EXT|M_CLUSTER; 573 m->m_data = m->m_ext.ext_buf; 574 } else { 575 m->m_data = m->m_pktdat; 576 bcopy(m0->m_data, m->m_data, m0->m_len); 577 } 578 m->m_pkthdr.len = m->m_len = m0->m_len; 579 m->m_pkthdr.pf.hdr = NULL; /* altq will cope */ 580 581 m0->m_flags &= ~(M_EXT|M_CLUSTER); /* cluster is gone */ 582 m_free(m0); 583 584 return 0; 585 } 586 587 /* 588 * Mbuffer utility routines. 589 */ 590 591 /* 592 * Ensure len bytes of contiguous space at the beginning of the mbuf chain 593 */ 594 struct mbuf * 595 m_prepend(struct mbuf *m, int len, int how) 596 { 597 struct mbuf *mn; 598 599 if (len > MHLEN) 600 panic("mbuf prepend length too big"); 601 602 if (M_LEADINGSPACE(m) >= len) { 603 m->m_data -= len; 604 m->m_len += len; 605 } else { 606 MGET(mn, how, m->m_type); 607 if (mn == NULL) { 608 m_freem(m); 609 return (NULL); 610 } 611 if (m->m_flags & M_PKTHDR) 612 M_MOVE_PKTHDR(mn, m); 613 mn->m_next = m; 614 m = mn; 615 MH_ALIGN(m, len); 616 m->m_len = len; 617 } 618 if (m->m_flags & M_PKTHDR) 619 m->m_pkthdr.len += len; 620 return (m); 621 } 622 623 /* 624 * Make a copy of an mbuf chain starting "off" bytes from the beginning, 625 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 626 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 627 */ 628 struct mbuf * 629 m_copym(struct mbuf *m, int off, int len, int wait) 630 { 631 return m_copym0(m, off, len, wait, 0); /* shallow copy on M_EXT */ 632 } 633 634 /* 635 * m_copym2() is like m_copym(), except it COPIES cluster mbufs, instead 636 * of merely bumping the reference count. 637 */ 638 struct mbuf * 639 m_copym2(struct mbuf *m, int off, int len, int wait) 640 { 641 return m_copym0(m, off, len, wait, 1); /* deep copy */ 642 } 643 644 struct mbuf * 645 m_copym0(struct mbuf *m0, int off, int len, int wait, int deep) 646 { 647 struct mbuf *m, *n, **np; 648 struct mbuf *top; 649 int copyhdr = 0; 650 651 if (off < 0 || len < 0) 652 panic("m_copym0: off %d, len %d", off, len); 653 if (off == 0 && m0->m_flags & M_PKTHDR) 654 copyhdr = 1; 655 if ((m = m_getptr(m0, off, &off)) == NULL) 656 panic("m_copym0: short mbuf chain"); 657 np = ⊤ 658 top = NULL; 659 while (len > 0) { 660 if (m == NULL) { 661 if (len != M_COPYALL) 662 panic("m_copym0: m == NULL and not COPYALL"); 663 break; 664 } 665 MGET(n, wait, m->m_type); 666 *np = n; 667 if (n == NULL) 668 goto nospace; 669 if (copyhdr) { 670 if (m_dup_pkthdr(n, m0, wait)) 671 goto nospace; 672 if (len != M_COPYALL) 673 n->m_pkthdr.len = len; 674 copyhdr = 0; 675 } 676 n->m_len = min(len, m->m_len - off); 677 if (m->m_flags & M_EXT) { 678 if (!deep) { 679 n->m_data = m->m_data + off; 680 n->m_ext = m->m_ext; 681 MCLADDREFERENCE(m, n); 682 } else { 683 /* 684 * we are unsure about the way m was allocated. 685 * copy into multiple MCLBYTES cluster mbufs. 686 */ 687 MCLGET(n, wait); 688 n->m_len = 0; 689 n->m_len = M_TRAILINGSPACE(n); 690 n->m_len = min(n->m_len, len); 691 n->m_len = min(n->m_len, m->m_len - off); 692 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, 693 (unsigned)n->m_len); 694 } 695 } else 696 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, 697 (unsigned)n->m_len); 698 if (len != M_COPYALL) 699 len -= n->m_len; 700 off += n->m_len; 701 #ifdef DIAGNOSTIC 702 if (off > m->m_len) 703 panic("m_copym0 overrun"); 704 #endif 705 if (off == m->m_len) { 706 m = m->m_next; 707 off = 0; 708 } 709 np = &n->m_next; 710 } 711 return (top); 712 nospace: 713 m_freem(top); 714 return (NULL); 715 } 716 717 /* 718 * Copy data from an mbuf chain starting "off" bytes from the beginning, 719 * continuing for "len" bytes, into the indicated buffer. 720 */ 721 void 722 m_copydata(struct mbuf *m, int off, int len, caddr_t cp) 723 { 724 unsigned count; 725 726 if (off < 0) 727 panic("m_copydata: off %d < 0", off); 728 if (len < 0) 729 panic("m_copydata: len %d < 0", len); 730 if ((m = m_getptr(m, off, &off)) == NULL) 731 panic("m_copydata: short mbuf chain"); 732 while (len > 0) { 733 if (m == NULL) 734 panic("m_copydata: null mbuf"); 735 count = min(m->m_len - off, len); 736 bcopy(mtod(m, caddr_t) + off, cp, count); 737 len -= count; 738 cp += count; 739 off = 0; 740 m = m->m_next; 741 } 742 } 743 744 /* 745 * Copy data from a buffer back into the indicated mbuf chain, 746 * starting "off" bytes from the beginning, extending the mbuf 747 * chain if necessary. The mbuf needs to be properly initialized 748 * including the setting of m_len. 749 */ 750 int 751 m_copyback(struct mbuf *m0, int off, int len, const void *_cp, int wait) 752 { 753 int mlen, totlen = 0; 754 struct mbuf *m = m0, *n; 755 caddr_t cp = (caddr_t)_cp; 756 int error = 0; 757 758 if (m0 == NULL) 759 return (0); 760 while (off > (mlen = m->m_len)) { 761 off -= mlen; 762 totlen += mlen; 763 if (m->m_next == NULL) { 764 if ((n = m_get(wait, m->m_type)) == NULL) { 765 error = ENOBUFS; 766 goto out; 767 } 768 769 if (off + len > MLEN) { 770 MCLGETI(n, wait, NULL, off + len); 771 if (!(n->m_flags & M_EXT)) { 772 m_free(n); 773 error = ENOBUFS; 774 goto out; 775 } 776 } 777 bzero(mtod(n, caddr_t), off); 778 n->m_len = len + off; 779 m->m_next = n; 780 } 781 m = m->m_next; 782 } 783 while (len > 0) { 784 /* extend last packet to be filled fully */ 785 if (m->m_next == NULL && (len > m->m_len - off)) 786 m->m_len += min(len - (m->m_len - off), 787 M_TRAILINGSPACE(m)); 788 mlen = min(m->m_len - off, len); 789 bcopy(cp, mtod(m, caddr_t) + off, (size_t)mlen); 790 cp += mlen; 791 len -= mlen; 792 totlen += mlen + off; 793 if (len == 0) 794 break; 795 off = 0; 796 797 if (m->m_next == NULL) { 798 if ((n = m_get(wait, m->m_type)) == NULL) { 799 error = ENOBUFS; 800 goto out; 801 } 802 803 if (len > MLEN) { 804 MCLGETI(n, wait, NULL, len); 805 if (!(n->m_flags & M_EXT)) { 806 m_free(n); 807 error = ENOBUFS; 808 goto out; 809 } 810 } 811 n->m_len = len; 812 m->m_next = n; 813 } 814 m = m->m_next; 815 } 816 out: 817 if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 818 m->m_pkthdr.len = totlen; 819 820 return (error); 821 } 822 823 /* 824 * Concatenate mbuf chain n to m. 825 * n might be copied into m (when n->m_len is small), therefore data portion of 826 * n could be copied into an mbuf of different mbuf type. 827 * Therefore both chains should be of the same type (e.g. MT_DATA). 828 * Any m_pkthdr is not updated. 829 */ 830 void 831 m_cat(struct mbuf *m, struct mbuf *n) 832 { 833 while (m->m_next) 834 m = m->m_next; 835 while (n) { 836 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) { 837 /* just join the two chains */ 838 m->m_next = n; 839 return; 840 } 841 /* splat the data from one into the other */ 842 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 843 (u_int)n->m_len); 844 m->m_len += n->m_len; 845 n = m_free(n); 846 } 847 } 848 849 void 850 m_adj(struct mbuf *mp, int req_len) 851 { 852 int len = req_len; 853 struct mbuf *m; 854 int count; 855 856 if ((m = mp) == NULL) 857 return; 858 if (len >= 0) { 859 /* 860 * Trim from head. 861 */ 862 while (m != NULL && len > 0) { 863 if (m->m_len <= len) { 864 len -= m->m_len; 865 m->m_len = 0; 866 m = m->m_next; 867 } else { 868 m->m_len -= len; 869 m->m_data += len; 870 len = 0; 871 } 872 } 873 if (mp->m_flags & M_PKTHDR) 874 mp->m_pkthdr.len -= (req_len - len); 875 } else { 876 /* 877 * Trim from tail. Scan the mbuf chain, 878 * calculating its length and finding the last mbuf. 879 * If the adjustment only affects this mbuf, then just 880 * adjust and return. Otherwise, rescan and truncate 881 * after the remaining size. 882 */ 883 len = -len; 884 count = 0; 885 for (;;) { 886 count += m->m_len; 887 if (m->m_next == NULL) 888 break; 889 m = m->m_next; 890 } 891 if (m->m_len >= len) { 892 m->m_len -= len; 893 if (mp->m_flags & M_PKTHDR) 894 mp->m_pkthdr.len -= len; 895 return; 896 } 897 count -= len; 898 if (count < 0) 899 count = 0; 900 /* 901 * Correct length for chain is "count". 902 * Find the mbuf with last data, adjust its length, 903 * and toss data from remaining mbufs on chain. 904 */ 905 m = mp; 906 if (m->m_flags & M_PKTHDR) 907 m->m_pkthdr.len = count; 908 for (; m; m = m->m_next) { 909 if (m->m_len >= count) { 910 m->m_len = count; 911 break; 912 } 913 count -= m->m_len; 914 } 915 while ((m = m->m_next) != NULL) 916 m->m_len = 0; 917 } 918 } 919 920 /* 921 * Rearrange an mbuf chain so that len bytes are contiguous 922 * and in the data area of an mbuf (so that mtod will work 923 * for a structure of size len). Returns the resulting 924 * mbuf chain on success, frees it and returns null on failure. 925 */ 926 struct mbuf * 927 m_pullup(struct mbuf *n, int len) 928 { 929 struct mbuf *m; 930 int count; 931 932 /* 933 * If first mbuf has no cluster, and has room for len bytes 934 * without shifting current data, pullup into it, 935 * otherwise allocate a new mbuf to prepend to the chain. 936 */ 937 if ((n->m_flags & M_EXT) == 0 && 938 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 939 if (n->m_len >= len) 940 return (n); 941 m = n; 942 n = n->m_next; 943 len -= m->m_len; 944 } else if ((n->m_flags & M_EXT) != 0 && len > MHLEN && 945 n->m_data + len < &n->m_data[MCLBYTES] && n->m_next) { 946 if (n->m_len >= len) 947 return (n); 948 m = n; 949 n = n->m_next; 950 len -= m->m_len; 951 } else { 952 if (len > MCLBYTES) 953 goto bad; 954 MGET(m, M_DONTWAIT, n->m_type); 955 if (m == NULL) 956 goto bad; 957 if (len > MHLEN) { 958 MCLGET(m, M_DONTWAIT); 959 if ((m->m_flags & M_EXT) == 0) { 960 m_free(m); 961 goto bad; 962 } 963 } 964 m->m_len = 0; 965 if (n->m_flags & M_PKTHDR) 966 M_MOVE_PKTHDR(m, n); 967 } 968 969 do { 970 count = min(len, n->m_len); 971 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 972 (unsigned)count); 973 len -= count; 974 m->m_len += count; 975 n->m_len -= count; 976 if (n->m_len) 977 n->m_data += count; 978 else 979 n = m_free(n); 980 } while (len > 0 && n); 981 if (len > 0) { 982 (void)m_free(m); 983 goto bad; 984 } 985 m->m_next = n; 986 987 return (m); 988 bad: 989 m_freem(n); 990 return (NULL); 991 } 992 993 /* 994 * Return a pointer to mbuf/offset of location in mbuf chain. 995 */ 996 struct mbuf * 997 m_getptr(struct mbuf *m, int loc, int *off) 998 { 999 while (loc >= 0) { 1000 /* Normal end of search */ 1001 if (m->m_len > loc) { 1002 *off = loc; 1003 return (m); 1004 } else { 1005 loc -= m->m_len; 1006 1007 if (m->m_next == NULL) { 1008 if (loc == 0) { 1009 /* Point at the end of valid data */ 1010 *off = m->m_len; 1011 return (m); 1012 } else { 1013 return (NULL); 1014 } 1015 } else { 1016 m = m->m_next; 1017 } 1018 } 1019 } 1020 1021 return (NULL); 1022 } 1023 1024 /* 1025 * Inject a new mbuf chain of length siz in mbuf chain m0 at 1026 * position len0. Returns a pointer to the first injected mbuf, or 1027 * NULL on failure (m0 is left undisturbed). Note that if there is 1028 * enough space for an object of size siz in the appropriate position, 1029 * no memory will be allocated. Also, there will be no data movement in 1030 * the first len0 bytes (pointers to that will remain valid). 1031 * 1032 * XXX It is assumed that siz is less than the size of an mbuf at the moment. 1033 */ 1034 struct mbuf * 1035 m_inject(struct mbuf *m0, int len0, int siz, int wait) 1036 { 1037 struct mbuf *m, *n, *n2 = NULL, *n3; 1038 unsigned len = len0, remain; 1039 1040 if ((siz >= MHLEN) || (len0 <= 0)) 1041 return (NULL); 1042 for (m = m0; m && len > m->m_len; m = m->m_next) 1043 len -= m->m_len; 1044 if (m == NULL) 1045 return (NULL); 1046 remain = m->m_len - len; 1047 if (remain == 0) { 1048 if ((m->m_next) && (M_LEADINGSPACE(m->m_next) >= siz)) { 1049 m->m_next->m_len += siz; 1050 if (m0->m_flags & M_PKTHDR) 1051 m0->m_pkthdr.len += siz; 1052 m->m_next->m_data -= siz; 1053 return m->m_next; 1054 } 1055 } else { 1056 n2 = m_copym2(m, len, remain, wait); 1057 if (n2 == NULL) 1058 return (NULL); 1059 } 1060 1061 MGET(n, wait, MT_DATA); 1062 if (n == NULL) { 1063 if (n2) 1064 m_freem(n2); 1065 return (NULL); 1066 } 1067 1068 n->m_len = siz; 1069 if (m0->m_flags & M_PKTHDR) 1070 m0->m_pkthdr.len += siz; 1071 m->m_len -= remain; /* Trim */ 1072 if (n2) { 1073 for (n3 = n; n3->m_next != NULL; n3 = n3->m_next) 1074 ; 1075 n3->m_next = n2; 1076 } else 1077 n3 = n; 1078 for (; n3->m_next != NULL; n3 = n3->m_next) 1079 ; 1080 n3->m_next = m->m_next; 1081 m->m_next = n; 1082 return n; 1083 } 1084 1085 /* 1086 * Partition an mbuf chain in two pieces, returning the tail -- 1087 * all but the first len0 bytes. In case of failure, it returns NULL and 1088 * attempts to restore the chain to its original state. 1089 */ 1090 struct mbuf * 1091 m_split(struct mbuf *m0, int len0, int wait) 1092 { 1093 struct mbuf *m, *n; 1094 unsigned len = len0, remain, olen; 1095 1096 for (m = m0; m && len > m->m_len; m = m->m_next) 1097 len -= m->m_len; 1098 if (m == NULL) 1099 return (NULL); 1100 remain = m->m_len - len; 1101 if (m0->m_flags & M_PKTHDR) { 1102 MGETHDR(n, wait, m0->m_type); 1103 if (n == NULL) 1104 return (NULL); 1105 if (m_dup_pkthdr(n, m0, wait)) { 1106 m_freem(n); 1107 return (NULL); 1108 } 1109 n->m_pkthdr.len -= len0; 1110 olen = m0->m_pkthdr.len; 1111 m0->m_pkthdr.len = len0; 1112 if (m->m_flags & M_EXT) 1113 goto extpacket; 1114 if (remain > MHLEN) { 1115 /* m can't be the lead packet */ 1116 MH_ALIGN(n, 0); 1117 n->m_next = m_split(m, len, wait); 1118 if (n->m_next == NULL) { 1119 (void) m_free(n); 1120 m0->m_pkthdr.len = olen; 1121 return (NULL); 1122 } else 1123 return (n); 1124 } else 1125 MH_ALIGN(n, remain); 1126 } else if (remain == 0) { 1127 n = m->m_next; 1128 m->m_next = NULL; 1129 return (n); 1130 } else { 1131 MGET(n, wait, m->m_type); 1132 if (n == NULL) 1133 return (NULL); 1134 M_ALIGN(n, remain); 1135 } 1136 extpacket: 1137 if (m->m_flags & M_EXT) { 1138 n->m_ext = m->m_ext; 1139 MCLADDREFERENCE(m, n); 1140 n->m_data = m->m_data + len; 1141 } else { 1142 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1143 } 1144 n->m_len = remain; 1145 m->m_len = len; 1146 n->m_next = m->m_next; 1147 m->m_next = NULL; 1148 return (n); 1149 } 1150 1151 /* 1152 * Routine to copy from device local memory into mbufs. 1153 */ 1154 struct mbuf * 1155 m_devget(char *buf, int totlen, int off, struct ifnet *ifp, 1156 void (*copy)(const void *, void *, size_t)) 1157 { 1158 struct mbuf *m; 1159 struct mbuf *top, **mp; 1160 int len; 1161 1162 top = NULL; 1163 mp = ⊤ 1164 1165 if (off < 0 || off > MHLEN) 1166 return (NULL); 1167 1168 MGETHDR(m, M_DONTWAIT, MT_DATA); 1169 if (m == NULL) 1170 return (NULL); 1171 1172 m->m_pkthdr.rcvif = ifp; 1173 m->m_pkthdr.len = totlen; 1174 1175 len = MHLEN; 1176 1177 while (totlen > 0) { 1178 if (top != NULL) { 1179 MGET(m, M_DONTWAIT, MT_DATA); 1180 if (m == NULL) { 1181 m_freem(top); 1182 return (NULL); 1183 } 1184 len = MLEN; 1185 } 1186 1187 if (totlen + off >= MINCLSIZE) { 1188 MCLGET(m, M_DONTWAIT); 1189 if (m->m_flags & M_EXT) 1190 len = MCLBYTES; 1191 } else { 1192 /* Place initial small packet/header at end of mbuf. */ 1193 if (top == NULL && totlen + off + max_linkhdr <= len) { 1194 m->m_data += max_linkhdr; 1195 len -= max_linkhdr; 1196 } 1197 } 1198 1199 if (off) { 1200 m->m_data += off; 1201 len -= off; 1202 off = 0; 1203 } 1204 1205 m->m_len = len = min(totlen, len); 1206 1207 if (copy) 1208 copy(buf, mtod(m, caddr_t), (size_t)len); 1209 else 1210 bcopy(buf, mtod(m, caddr_t), (size_t)len); 1211 1212 buf += len; 1213 *mp = m; 1214 mp = &m->m_next; 1215 totlen -= len; 1216 } 1217 return (top); 1218 } 1219 1220 void 1221 m_zero(struct mbuf *m) 1222 { 1223 while (m) { 1224 #ifdef DIAGNOSTIC 1225 if (M_READONLY(m)) 1226 panic("m_zero: M_READONLY"); 1227 #endif /* DIAGNOSTIC */ 1228 if (m->m_flags & M_EXT) 1229 memset(m->m_ext.ext_buf, 0, m->m_ext.ext_size); 1230 else { 1231 if (m->m_flags & M_PKTHDR) 1232 memset(m->m_pktdat, 0, MHLEN); 1233 else 1234 memset(m->m_dat, 0, MLEN); 1235 } 1236 m = m->m_next; 1237 } 1238 } 1239 1240 /* 1241 * Apply function f to the data in an mbuf chain starting "off" bytes from the 1242 * beginning, continuing for "len" bytes. 1243 */ 1244 int 1245 m_apply(struct mbuf *m, int off, int len, 1246 int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate) 1247 { 1248 int rval; 1249 unsigned int count; 1250 1251 if (len < 0) 1252 panic("m_apply: len %d < 0", len); 1253 if (off < 0) 1254 panic("m_apply: off %d < 0", off); 1255 while (off > 0) { 1256 if (m == NULL) 1257 panic("m_apply: null mbuf in skip"); 1258 if (off < m->m_len) 1259 break; 1260 off -= m->m_len; 1261 m = m->m_next; 1262 } 1263 while (len > 0) { 1264 if (m == NULL) 1265 panic("m_apply: null mbuf"); 1266 count = min(m->m_len - off, len); 1267 1268 rval = f(fstate, mtod(m, caddr_t) + off, count); 1269 if (rval) 1270 return (rval); 1271 1272 len -= count; 1273 off = 0; 1274 m = m->m_next; 1275 } 1276 1277 return (0); 1278 } 1279 1280 int 1281 m_leadingspace(struct mbuf *m) 1282 { 1283 if (M_READONLY(m)) 1284 return 0; 1285 return (m->m_flags & M_EXT ? m->m_data - m->m_ext.ext_buf : 1286 m->m_flags & M_PKTHDR ? m->m_data - m->m_pktdat : 1287 m->m_data - m->m_dat); 1288 } 1289 1290 int 1291 m_trailingspace(struct mbuf *m) 1292 { 1293 if (M_READONLY(m)) 1294 return 0; 1295 return (m->m_flags & M_EXT ? m->m_ext.ext_buf + 1296 m->m_ext.ext_size - (m->m_data + m->m_len) : 1297 &m->m_dat[MLEN] - (m->m_data + m->m_len)); 1298 } 1299 1300 1301 /* 1302 * Duplicate mbuf pkthdr from from to to. 1303 * from must have M_PKTHDR set, and to must be empty. 1304 */ 1305 int 1306 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int wait) 1307 { 1308 int error; 1309 1310 KASSERT(from->m_flags & M_PKTHDR); 1311 1312 to->m_flags = (to->m_flags & (M_EXT | M_CLUSTER)); 1313 to->m_flags |= (from->m_flags & M_COPYFLAGS); 1314 to->m_pkthdr = from->m_pkthdr; 1315 1316 SLIST_INIT(&to->m_pkthdr.tags); 1317 1318 if ((error = m_tag_copy_chain(to, from, wait)) != 0) 1319 return (error); 1320 1321 if ((to->m_flags & M_EXT) == 0) 1322 to->m_data = to->m_pktdat; 1323 1324 return (0); 1325 } 1326 1327 #ifdef DDB 1328 void 1329 m_print(void *v, int (*pr)(const char *, ...)) 1330 { 1331 struct mbuf *m = v; 1332 1333 (*pr)("mbuf %p\n", m); 1334 (*pr)("m_type: %hi\tm_flags: %b\n", m->m_type, m->m_flags, 1335 "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_CLUSTER\5M_PROTO1\6M_VLANTAG" 1336 "\7M_LOOP\10M_FILDROP\11M_BCAST\12M_MCAST\13M_CONF\14M_AUTH" 1337 "\15M_TUNNEL\16M_AUTH_AH\17M_LINK0"); 1338 (*pr)("m_next: %p\tm_nextpkt: %p\n", m->m_next, m->m_nextpkt); 1339 (*pr)("m_data: %p\tm_len: %u\n", m->m_data, m->m_len); 1340 (*pr)("m_dat: %p m_pktdat: %p\n", m->m_dat, m->m_pktdat); 1341 if (m->m_flags & M_PKTHDR) { 1342 (*pr)("m_pkthdr.len: %i\tm_ptkhdr.rcvif: %p\t" 1343 "m_ptkhdr.rdomain: %u\n", m->m_pkthdr.len, 1344 m->m_pkthdr.rcvif, m->m_pkthdr.rdomain); 1345 (*pr)("m_ptkhdr.tags: %p\tm_pkthdr.tagsset: %hx\n", 1346 SLIST_FIRST(&m->m_pkthdr.tags), m->m_pkthdr.tagsset); 1347 (*pr)("m_pkthdr.csum_flags: %hx\tm_pkthdr.ether_vtag: %hu\n", 1348 m->m_pkthdr.csum_flags, m->m_pkthdr.ether_vtag); 1349 (*pr)("m_pkthdr.pf.flags: %b\n", 1350 m->m_pkthdr.pf.flags, "\20\1GENERATED\2FRAGCACHE" 1351 "\3TRANSLATE_LOCALHOST\4DIVERTED\5DIVERTED_PACKET" 1352 "\6PF_TAG_REROUTE"); 1353 (*pr)("m_pkthdr.pf.hdr: %p\tm_pkthdr.pf.statekey: %p\n", 1354 m->m_pkthdr.pf.hdr, m->m_pkthdr.pf.statekey); 1355 (*pr)("m_pkthdr.pf.qid:\t%u m_pkthdr.pf.tag: %hu\n", 1356 m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag); 1357 (*pr)("m_pkthdr.pf.prio:\t%u m_pkthdr.pf.tag: %hu\n", 1358 m->m_pkthdr.pf.prio, m->m_pkthdr.pf.tag); 1359 (*pr)("m_pkthdr.pf.routed: %hhx\n", m->m_pkthdr.pf.routed); 1360 } 1361 if (m->m_flags & M_EXT) { 1362 (*pr)("m_ext.ext_buf: %p\tm_ext.ext_size: %u\n", 1363 m->m_ext.ext_buf, m->m_ext.ext_size); 1364 (*pr)("m_ext.ext_type: %x\tm_ext.ext_backend: %i\n", 1365 m->m_ext.ext_type, m->m_ext.ext_backend); 1366 (*pr)("m_ext.ext_ifp: %p\n", m->m_ext.ext_ifp); 1367 (*pr)("m_ext.ext_free: %p\tm_ext.ext_arg: %p\n", 1368 m->m_ext.ext_free, m->m_ext.ext_arg); 1369 (*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n", 1370 m->m_ext.ext_nextref, m->m_ext.ext_prevref); 1371 } 1372 } 1373 #endif 1374