1 /* $OpenBSD: bpf.c,v 1.169 2018/03/02 16:57:41 bluhm Exp $ */ 2 /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org> 8 * 9 * This code is derived from the Stanford/CMU enet packet filter, 10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 12 * Berkeley Laboratory. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94 39 */ 40 41 #include "bpfilter.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/ioctl.h> 49 #include <sys/conf.h> 50 #include <sys/vnode.h> 51 #include <sys/fcntl.h> 52 #include <sys/socket.h> 53 #include <sys/poll.h> 54 #include <sys/kernel.h> 55 #include <sys/sysctl.h> 56 #include <sys/rwlock.h> 57 #include <sys/atomic.h> 58 #include <sys/srp.h> 59 #include <sys/specdev.h> 60 #include <sys/selinfo.h> 61 #include <sys/task.h> 62 63 #include <net/if.h> 64 #include <net/bpf.h> 65 #include <net/bpfdesc.h> 66 67 #include <netinet/in.h> 68 #include <netinet/if_ether.h> 69 70 #include "vlan.h" 71 #if NVLAN > 0 72 #include <net/if_vlan_var.h> 73 #endif 74 75 #define BPF_BUFSIZE 32768 76 77 #define PRINET 26 /* interruptible */ 78 79 /* from kern/kern_clock.c; incremented each clock tick. */ 80 extern int ticks; 81 82 /* 83 * The default read buffer size is patchable. 84 */ 85 int bpf_bufsize = BPF_BUFSIZE; 86 int bpf_maxbufsize = BPF_MAXBUFSIZE; 87 88 /* 89 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 90 * bpf_d_list is the list of descriptors 91 */ 92 struct bpf_if *bpf_iflist; 93 LIST_HEAD(, bpf_d) bpf_d_list; 94 95 int bpf_allocbufs(struct bpf_d *); 96 void bpf_ifname(struct bpf_if*, struct ifreq *); 97 int _bpf_mtap(caddr_t, const struct mbuf *, u_int, 98 void (*)(const void *, void *, size_t)); 99 void bpf_mcopy(const void *, void *, size_t); 100 int bpf_movein(struct uio *, u_int, struct mbuf **, 101 struct sockaddr *, struct bpf_insn *); 102 int bpf_setif(struct bpf_d *, struct ifreq *); 103 int bpfpoll(dev_t, int, struct proc *); 104 int bpfkqfilter(dev_t, struct knote *); 105 void bpf_wakeup(struct bpf_d *); 106 void bpf_wakeup_cb(void *); 107 void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, 108 void (*)(const void *, void *, size_t), struct timeval *); 109 int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 110 int bpf_setdlt(struct bpf_d *, u_int); 111 112 void filt_bpfrdetach(struct knote *); 113 int filt_bpfread(struct knote *, long); 114 115 int bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t); 116 117 struct bpf_d *bpfilter_lookup(int); 118 119 /* 120 * Called holding ``bd_mtx''. 121 */ 122 void bpf_attachd(struct bpf_d *, struct bpf_if *); 123 void bpf_detachd(struct bpf_d *); 124 void bpf_resetd(struct bpf_d *); 125 126 /* 127 * Reference count access to descriptor buffers 128 */ 129 void bpf_get(struct bpf_d *); 130 void bpf_put(struct bpf_d *); 131 132 /* 133 * garbage collector srps 134 */ 135 136 void bpf_d_ref(void *, void *); 137 void bpf_d_unref(void *, void *); 138 struct srpl_rc bpf_d_rc = SRPL_RC_INITIALIZER(bpf_d_ref, bpf_d_unref, NULL); 139 140 void bpf_insn_dtor(void *, void *); 141 struct srp_gc bpf_insn_gc = SRP_GC_INITIALIZER(bpf_insn_dtor, NULL); 142 143 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz"); 144 145 int 146 bpf_movein(struct uio *uio, u_int linktype, struct mbuf **mp, 147 struct sockaddr *sockp, struct bpf_insn *filter) 148 { 149 struct mbuf *m; 150 struct m_tag *mtag; 151 int error; 152 u_int hlen; 153 u_int len; 154 u_int slen; 155 156 /* 157 * Build a sockaddr based on the data link layer type. 158 * We do this at this level because the ethernet header 159 * is copied directly into the data field of the sockaddr. 160 * In the case of SLIP, there is no header and the packet 161 * is forwarded as is. 162 * Also, we are careful to leave room at the front of the mbuf 163 * for the link level header. 164 */ 165 switch (linktype) { 166 167 case DLT_SLIP: 168 sockp->sa_family = AF_INET; 169 hlen = 0; 170 break; 171 172 case DLT_PPP: 173 sockp->sa_family = AF_UNSPEC; 174 hlen = 0; 175 break; 176 177 case DLT_EN10MB: 178 sockp->sa_family = AF_UNSPEC; 179 /* XXX Would MAXLINKHDR be better? */ 180 hlen = ETHER_HDR_LEN; 181 break; 182 183 case DLT_IEEE802_11: 184 case DLT_IEEE802_11_RADIO: 185 sockp->sa_family = AF_UNSPEC; 186 hlen = 0; 187 break; 188 189 case DLT_RAW: 190 case DLT_NULL: 191 sockp->sa_family = AF_UNSPEC; 192 hlen = 0; 193 break; 194 195 case DLT_LOOP: 196 sockp->sa_family = AF_UNSPEC; 197 hlen = sizeof(u_int32_t); 198 break; 199 200 default: 201 return (EIO); 202 } 203 204 if (uio->uio_resid > MAXMCLBYTES) 205 return (EIO); 206 len = uio->uio_resid; 207 208 MGETHDR(m, M_WAIT, MT_DATA); 209 m->m_pkthdr.ph_ifidx = 0; 210 m->m_pkthdr.len = len - hlen; 211 212 if (len > MHLEN) { 213 MCLGETI(m, M_WAIT, NULL, len); 214 if ((m->m_flags & M_EXT) == 0) { 215 error = ENOBUFS; 216 goto bad; 217 } 218 } 219 m->m_len = len; 220 *mp = m; 221 222 error = uiomove(mtod(m, caddr_t), len, uio); 223 if (error) 224 goto bad; 225 226 slen = bpf_filter(filter, mtod(m, u_char *), len, len); 227 if (slen < len) { 228 error = EPERM; 229 goto bad; 230 } 231 232 if (m->m_len < hlen) { 233 error = EPERM; 234 goto bad; 235 } 236 /* 237 * Make room for link header, and copy it to sockaddr 238 */ 239 if (hlen != 0) { 240 if (linktype == DLT_LOOP) { 241 u_int32_t af; 242 243 /* the link header indicates the address family */ 244 KASSERT(hlen == sizeof(u_int32_t)); 245 memcpy(&af, m->m_data, hlen); 246 sockp->sa_family = ntohl(af); 247 } else 248 memcpy(sockp->sa_data, m->m_data, hlen); 249 m->m_len -= hlen; 250 m->m_data += hlen; /* XXX */ 251 } 252 253 /* 254 * Prepend the data link type as a mbuf tag 255 */ 256 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT); 257 *(u_int *)(mtag + 1) = linktype; 258 m_tag_prepend(m, mtag); 259 260 return (0); 261 bad: 262 m_freem(m); 263 return (error); 264 } 265 266 /* 267 * Attach file to the bpf interface, i.e. make d listen on bp. 268 */ 269 void 270 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 271 { 272 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 273 274 /* 275 * Point d at bp, and add d to the interface's list of listeners. 276 * Finally, point the driver's bpf cookie at the interface so 277 * it will divert packets to bpf. 278 */ 279 280 d->bd_bif = bp; 281 282 KERNEL_ASSERT_LOCKED(); 283 SRPL_INSERT_HEAD_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bd_next); 284 285 *bp->bif_driverp = bp; 286 } 287 288 /* 289 * Detach a file from its interface. 290 */ 291 void 292 bpf_detachd(struct bpf_d *d) 293 { 294 struct bpf_if *bp; 295 296 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 297 298 bp = d->bd_bif; 299 /* Not attached. */ 300 if (bp == NULL) 301 return; 302 303 /* Remove ``d'' from the interface's descriptor list. */ 304 KERNEL_ASSERT_LOCKED(); 305 SRPL_REMOVE_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bpf_d, bd_next); 306 307 if (SRPL_EMPTY_LOCKED(&bp->bif_dlist)) { 308 /* 309 * Let the driver know that there are no more listeners. 310 */ 311 *bp->bif_driverp = NULL; 312 } 313 314 d->bd_bif = NULL; 315 316 /* 317 * Check if this descriptor had requested promiscuous mode. 318 * If so, turn it off. 319 */ 320 if (d->bd_promisc) { 321 int error; 322 323 KASSERT(bp->bif_ifp != NULL); 324 325 d->bd_promisc = 0; 326 327 bpf_get(d); 328 mtx_leave(&d->bd_mtx); 329 NET_LOCK(); 330 error = ifpromisc(bp->bif_ifp, 0); 331 NET_UNLOCK(); 332 mtx_enter(&d->bd_mtx); 333 bpf_put(d); 334 335 if (error && !(error == EINVAL || error == ENODEV)) 336 /* 337 * Something is really wrong if we were able to put 338 * the driver into promiscuous mode, but can't 339 * take it out. 340 */ 341 panic("bpf: ifpromisc failed"); 342 } 343 } 344 345 void 346 bpfilterattach(int n) 347 { 348 LIST_INIT(&bpf_d_list); 349 } 350 351 /* 352 * Open ethernet device. Returns ENXIO for illegal minor device number, 353 * EBUSY if file is open by another process. 354 */ 355 int 356 bpfopen(dev_t dev, int flag, int mode, struct proc *p) 357 { 358 struct bpf_d *bd; 359 int unit = minor(dev); 360 361 if (unit & ((1 << CLONE_SHIFT) - 1)) 362 return (ENXIO); 363 364 KASSERT(bpfilter_lookup(unit) == NULL); 365 366 /* create on demand */ 367 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 368 return (EBUSY); 369 370 /* Mark "free" and do most initialization. */ 371 bd->bd_unit = unit; 372 bd->bd_bufsize = bpf_bufsize; 373 bd->bd_sig = SIGIO; 374 mtx_init(&bd->bd_mtx, IPL_NET); 375 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd); 376 377 if (flag & FNONBLOCK) 378 bd->bd_rtout = -1; 379 380 bpf_get(bd); 381 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list); 382 383 return (0); 384 } 385 386 /* 387 * Close the descriptor by detaching it from its interface, 388 * deallocating its buffers, and marking it free. 389 */ 390 int 391 bpfclose(dev_t dev, int flag, int mode, struct proc *p) 392 { 393 struct bpf_d *d; 394 395 d = bpfilter_lookup(minor(dev)); 396 mtx_enter(&d->bd_mtx); 397 bpf_detachd(d); 398 bpf_wakeup(d); 399 LIST_REMOVE(d, bd_list); 400 mtx_leave(&d->bd_mtx); 401 bpf_put(d); 402 403 return (0); 404 } 405 406 /* 407 * Rotate the packet buffers in descriptor d. Move the store buffer 408 * into the hold slot, and the free buffer into the store slot. 409 * Zero the length of the new store buffer. 410 */ 411 #define ROTATE_BUFFERS(d) \ 412 KASSERT(d->bd_in_uiomove == 0); \ 413 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \ 414 (d)->bd_hbuf = (d)->bd_sbuf; \ 415 (d)->bd_hlen = (d)->bd_slen; \ 416 (d)->bd_sbuf = (d)->bd_fbuf; \ 417 (d)->bd_slen = 0; \ 418 (d)->bd_fbuf = NULL; 419 /* 420 * bpfread - read next chunk of packets from buffers 421 */ 422 int 423 bpfread(dev_t dev, struct uio *uio, int ioflag) 424 { 425 struct bpf_d *d; 426 caddr_t hbuf; 427 int hlen, error; 428 429 KERNEL_ASSERT_LOCKED(); 430 431 d = bpfilter_lookup(minor(dev)); 432 if (d->bd_bif == NULL) 433 return (ENXIO); 434 435 bpf_get(d); 436 mtx_enter(&d->bd_mtx); 437 438 /* 439 * Restrict application to use a buffer the same size as 440 * as kernel buffers. 441 */ 442 if (uio->uio_resid != d->bd_bufsize) { 443 error = EINVAL; 444 goto out; 445 } 446 447 /* 448 * If there's a timeout, bd_rdStart is tagged when we start the read. 449 * we can then figure out when we're done reading. 450 */ 451 if (d->bd_rtout != -1 && d->bd_rdStart == 0) 452 d->bd_rdStart = ticks; 453 else 454 d->bd_rdStart = 0; 455 456 /* 457 * If the hold buffer is empty, then do a timed sleep, which 458 * ends when the timeout expires or when enough packets 459 * have arrived to fill the store buffer. 460 */ 461 while (d->bd_hbuf == NULL) { 462 if (d->bd_bif == NULL) { 463 /* interface is gone */ 464 if (d->bd_slen == 0) { 465 error = EIO; 466 goto out; 467 } 468 ROTATE_BUFFERS(d); 469 break; 470 } 471 if (d->bd_immediate && d->bd_slen != 0) { 472 /* 473 * A packet(s) either arrived since the previous 474 * read or arrived while we were asleep. 475 * Rotate the buffers and return what's here. 476 */ 477 ROTATE_BUFFERS(d); 478 break; 479 } 480 if (d->bd_rtout == -1) { 481 /* User requested non-blocking I/O */ 482 error = EWOULDBLOCK; 483 } else { 484 if ((d->bd_rdStart + d->bd_rtout) < ticks) { 485 error = msleep(d, &d->bd_mtx, PRINET|PCATCH, 486 "bpf", d->bd_rtout); 487 } else 488 error = EWOULDBLOCK; 489 } 490 if (error == EINTR || error == ERESTART) 491 goto out; 492 if (error == EWOULDBLOCK) { 493 /* 494 * On a timeout, return what's in the buffer, 495 * which may be nothing. If there is something 496 * in the store buffer, we can rotate the buffers. 497 */ 498 if (d->bd_hbuf != NULL) 499 /* 500 * We filled up the buffer in between 501 * getting the timeout and arriving 502 * here, so we don't need to rotate. 503 */ 504 break; 505 506 if (d->bd_slen == 0) { 507 error = 0; 508 goto out; 509 } 510 ROTATE_BUFFERS(d); 511 break; 512 } 513 } 514 /* 515 * At this point, we know we have something in the hold slot. 516 */ 517 hbuf = d->bd_hbuf; 518 hlen = d->bd_hlen; 519 d->bd_hbuf = NULL; 520 d->bd_hlen = 0; 521 d->bd_fbuf = NULL; 522 d->bd_in_uiomove = 1; 523 524 /* 525 * Move data from hold buffer into user space. 526 * We know the entire buffer is transferred since 527 * we checked above that the read buffer is bpf_bufsize bytes. 528 */ 529 mtx_leave(&d->bd_mtx); 530 error = uiomove(hbuf, hlen, uio); 531 mtx_enter(&d->bd_mtx); 532 533 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */ 534 KASSERT(d->bd_fbuf == NULL); 535 KASSERT(d->bd_hbuf == NULL); 536 d->bd_fbuf = hbuf; 537 d->bd_in_uiomove = 0; 538 out: 539 mtx_leave(&d->bd_mtx); 540 bpf_put(d); 541 542 return (error); 543 } 544 545 546 /* 547 * If there are processes sleeping on this descriptor, wake them up. 548 */ 549 void 550 bpf_wakeup(struct bpf_d *d) 551 { 552 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 553 554 /* 555 * As long as csignal() and selwakeup() need to be protected 556 * by the KERNEL_LOCK() we have to delay the wakeup to 557 * another context to keep the hot path KERNEL_LOCK()-free. 558 */ 559 bpf_get(d); 560 if (!task_add(systq, &d->bd_wake_task)) 561 bpf_put(d); 562 } 563 564 void 565 bpf_wakeup_cb(void *xd) 566 { 567 struct bpf_d *d = xd; 568 569 KERNEL_ASSERT_LOCKED(); 570 571 wakeup(d); 572 if (d->bd_async && d->bd_sig) 573 csignal(d->bd_pgid, d->bd_sig, d->bd_siguid, d->bd_sigeuid); 574 575 selwakeup(&d->bd_sel); 576 bpf_put(d); 577 } 578 579 int 580 bpfwrite(dev_t dev, struct uio *uio, int ioflag) 581 { 582 struct bpf_d *d; 583 struct ifnet *ifp; 584 struct mbuf *m; 585 struct bpf_program *bf; 586 struct bpf_insn *fcode = NULL; 587 int error; 588 struct sockaddr_storage dst; 589 u_int dlt; 590 591 KERNEL_ASSERT_LOCKED(); 592 593 d = bpfilter_lookup(minor(dev)); 594 if (d->bd_bif == NULL) 595 return (ENXIO); 596 597 bpf_get(d); 598 ifp = d->bd_bif->bif_ifp; 599 600 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) { 601 error = ENETDOWN; 602 goto out; 603 } 604 605 if (uio->uio_resid == 0) { 606 error = 0; 607 goto out; 608 } 609 610 KERNEL_ASSERT_LOCKED(); /* for accessing bd_wfilter */ 611 bf = srp_get_locked(&d->bd_wfilter); 612 if (bf != NULL) 613 fcode = bf->bf_insns; 614 615 dlt = d->bd_bif->bif_dlt; 616 617 error = bpf_movein(uio, dlt, &m, sstosa(&dst), fcode); 618 if (error) 619 goto out; 620 621 if (m->m_pkthdr.len > ifp->if_mtu) { 622 m_freem(m); 623 error = EMSGSIZE; 624 goto out; 625 } 626 627 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 628 m->m_pkthdr.pf.prio = ifp->if_llprio; 629 630 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC) 631 dst.ss_family = pseudo_AF_HDRCMPLT; 632 633 NET_LOCK(); 634 error = ifp->if_output(ifp, m, sstosa(&dst), NULL); 635 NET_UNLOCK(); 636 637 out: 638 bpf_put(d); 639 return (error); 640 } 641 642 /* 643 * Reset a descriptor by flushing its packet buffer and clearing the 644 * receive and drop counts. 645 */ 646 void 647 bpf_resetd(struct bpf_d *d) 648 { 649 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 650 KASSERT(d->bd_in_uiomove == 0); 651 652 if (d->bd_hbuf != NULL) { 653 /* Free the hold buffer. */ 654 d->bd_fbuf = d->bd_hbuf; 655 d->bd_hbuf = NULL; 656 } 657 d->bd_slen = 0; 658 d->bd_hlen = 0; 659 d->bd_rcount = 0; 660 d->bd_dcount = 0; 661 } 662 663 /* 664 * FIONREAD Check for read packet available. 665 * BIOCGBLEN Get buffer len [for read()]. 666 * BIOCSETF Set ethernet read filter. 667 * BIOCFLUSH Flush read packet buffer. 668 * BIOCPROMISC Put interface into promiscuous mode. 669 * BIOCGDLTLIST Get supported link layer types. 670 * BIOCGDLT Get link layer type. 671 * BIOCSDLT Set link layer type. 672 * BIOCGETIF Get interface name. 673 * BIOCSETIF Set interface. 674 * BIOCSRTIMEOUT Set read timeout. 675 * BIOCGRTIMEOUT Get read timeout. 676 * BIOCGSTATS Get packet stats. 677 * BIOCIMMEDIATE Set immediate mode. 678 * BIOCVERSION Get filter language version. 679 * BIOCGHDRCMPLT Get "header already complete" flag 680 * BIOCSHDRCMPLT Set "header already complete" flag 681 */ 682 int 683 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 684 { 685 struct bpf_d *d; 686 int error = 0; 687 688 d = bpfilter_lookup(minor(dev)); 689 if (d->bd_locked && suser(p) != 0) { 690 /* list of allowed ioctls when locked and not root */ 691 switch (cmd) { 692 case BIOCGBLEN: 693 case BIOCFLUSH: 694 case BIOCGDLT: 695 case BIOCGDLTLIST: 696 case BIOCGETIF: 697 case BIOCGRTIMEOUT: 698 case BIOCGSTATS: 699 case BIOCVERSION: 700 case BIOCGRSIG: 701 case BIOCGHDRCMPLT: 702 case FIONREAD: 703 case BIOCLOCK: 704 case BIOCSRTIMEOUT: 705 case BIOCIMMEDIATE: 706 case TIOCGPGRP: 707 case BIOCGDIRFILT: 708 break; 709 default: 710 return (EPERM); 711 } 712 } 713 714 bpf_get(d); 715 716 switch (cmd) { 717 default: 718 error = EINVAL; 719 break; 720 721 /* 722 * Check for read packet available. 723 */ 724 case FIONREAD: 725 { 726 int n; 727 728 mtx_enter(&d->bd_mtx); 729 n = d->bd_slen; 730 if (d->bd_hbuf != NULL) 731 n += d->bd_hlen; 732 mtx_leave(&d->bd_mtx); 733 734 *(int *)addr = n; 735 break; 736 } 737 738 /* 739 * Get buffer len [for read()]. 740 */ 741 case BIOCGBLEN: 742 *(u_int *)addr = d->bd_bufsize; 743 break; 744 745 /* 746 * Set buffer length. 747 */ 748 case BIOCSBLEN: 749 if (d->bd_bif != NULL) 750 error = EINVAL; 751 else { 752 u_int size = *(u_int *)addr; 753 754 if (size > bpf_maxbufsize) 755 *(u_int *)addr = size = bpf_maxbufsize; 756 else if (size < BPF_MINBUFSIZE) 757 *(u_int *)addr = size = BPF_MINBUFSIZE; 758 mtx_enter(&d->bd_mtx); 759 d->bd_bufsize = size; 760 mtx_leave(&d->bd_mtx); 761 } 762 break; 763 764 /* 765 * Set link layer read filter. 766 */ 767 case BIOCSETF: 768 error = bpf_setf(d, (struct bpf_program *)addr, 0); 769 break; 770 771 /* 772 * Set link layer write filter. 773 */ 774 case BIOCSETWF: 775 error = bpf_setf(d, (struct bpf_program *)addr, 1); 776 break; 777 778 /* 779 * Flush read packet buffer. 780 */ 781 case BIOCFLUSH: 782 mtx_enter(&d->bd_mtx); 783 bpf_resetd(d); 784 mtx_leave(&d->bd_mtx); 785 break; 786 787 /* 788 * Put interface into promiscuous mode. 789 */ 790 case BIOCPROMISC: 791 if (d->bd_bif == NULL) { 792 /* 793 * No interface attached yet. 794 */ 795 error = EINVAL; 796 } else if (d->bd_bif->bif_ifp != NULL) { 797 if (d->bd_promisc == 0) { 798 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx); 799 NET_LOCK(); 800 error = ifpromisc(d->bd_bif->bif_ifp, 1); 801 NET_UNLOCK(); 802 if (error == 0) 803 d->bd_promisc = 1; 804 } 805 } 806 break; 807 808 /* 809 * Get a list of supported device parameters. 810 */ 811 case BIOCGDLTLIST: 812 if (d->bd_bif == NULL) 813 error = EINVAL; 814 else 815 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 816 break; 817 818 /* 819 * Get device parameters. 820 */ 821 case BIOCGDLT: 822 if (d->bd_bif == NULL) 823 error = EINVAL; 824 else 825 *(u_int *)addr = d->bd_bif->bif_dlt; 826 break; 827 828 /* 829 * Set device parameters. 830 */ 831 case BIOCSDLT: 832 if (d->bd_bif == NULL) 833 error = EINVAL; 834 else { 835 mtx_enter(&d->bd_mtx); 836 error = bpf_setdlt(d, *(u_int *)addr); 837 mtx_leave(&d->bd_mtx); 838 } 839 break; 840 841 /* 842 * Set interface name. 843 */ 844 case BIOCGETIF: 845 if (d->bd_bif == NULL) 846 error = EINVAL; 847 else 848 bpf_ifname(d->bd_bif, (struct ifreq *)addr); 849 break; 850 851 /* 852 * Set interface. 853 */ 854 case BIOCSETIF: 855 error = bpf_setif(d, (struct ifreq *)addr); 856 break; 857 858 /* 859 * Set read timeout. 860 */ 861 case BIOCSRTIMEOUT: 862 { 863 struct timeval *tv = (struct timeval *)addr; 864 865 /* Compute number of ticks. */ 866 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 867 if (d->bd_rtout == 0 && tv->tv_usec != 0) 868 d->bd_rtout = 1; 869 break; 870 } 871 872 /* 873 * Get read timeout. 874 */ 875 case BIOCGRTIMEOUT: 876 { 877 struct timeval *tv = (struct timeval *)addr; 878 879 tv->tv_sec = d->bd_rtout / hz; 880 tv->tv_usec = (d->bd_rtout % hz) * tick; 881 break; 882 } 883 884 /* 885 * Get packet stats. 886 */ 887 case BIOCGSTATS: 888 { 889 struct bpf_stat *bs = (struct bpf_stat *)addr; 890 891 bs->bs_recv = d->bd_rcount; 892 bs->bs_drop = d->bd_dcount; 893 break; 894 } 895 896 /* 897 * Set immediate mode. 898 */ 899 case BIOCIMMEDIATE: 900 d->bd_immediate = *(u_int *)addr; 901 break; 902 903 case BIOCVERSION: 904 { 905 struct bpf_version *bv = (struct bpf_version *)addr; 906 907 bv->bv_major = BPF_MAJOR_VERSION; 908 bv->bv_minor = BPF_MINOR_VERSION; 909 break; 910 } 911 912 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 913 *(u_int *)addr = d->bd_hdrcmplt; 914 break; 915 916 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 917 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 918 break; 919 920 case BIOCLOCK: /* set "locked" flag (no reset) */ 921 d->bd_locked = 1; 922 break; 923 924 case BIOCGFILDROP: /* get "filter-drop" flag */ 925 *(u_int *)addr = d->bd_fildrop; 926 break; 927 928 case BIOCSFILDROP: /* set "filter-drop" flag */ 929 d->bd_fildrop = *(u_int *)addr ? 1 : 0; 930 break; 931 932 case BIOCGDIRFILT: /* get direction filter */ 933 *(u_int *)addr = d->bd_dirfilt; 934 break; 935 936 case BIOCSDIRFILT: /* set direction filter */ 937 d->bd_dirfilt = (*(u_int *)addr) & 938 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT); 939 break; 940 941 case FIONBIO: /* Non-blocking I/O */ 942 if (*(int *)addr) 943 d->bd_rtout = -1; 944 else 945 d->bd_rtout = 0; 946 break; 947 948 case FIOASYNC: /* Send signal on receive packets */ 949 d->bd_async = *(int *)addr; 950 break; 951 952 /* 953 * N.B. ioctl (FIOSETOWN) and fcntl (F_SETOWN) both end up doing 954 * the equivalent of a TIOCSPGRP and hence end up here. *However* 955 * TIOCSPGRP's arg is a process group if it's positive and a process 956 * id if it's negative. This is exactly the opposite of what the 957 * other two functions want! Therefore there is code in ioctl and 958 * fcntl to negate the arg before calling here. 959 */ 960 case TIOCSPGRP: /* Process or group to send signals to */ 961 d->bd_pgid = *(int *)addr; 962 d->bd_siguid = p->p_ucred->cr_ruid; 963 d->bd_sigeuid = p->p_ucred->cr_uid; 964 break; 965 966 case TIOCGPGRP: 967 *(int *)addr = d->bd_pgid; 968 break; 969 970 case BIOCSRSIG: /* Set receive signal */ 971 { 972 u_int sig; 973 974 sig = *(u_int *)addr; 975 976 if (sig >= NSIG) 977 error = EINVAL; 978 else 979 d->bd_sig = sig; 980 break; 981 } 982 case BIOCGRSIG: 983 *(u_int *)addr = d->bd_sig; 984 break; 985 } 986 987 bpf_put(d); 988 return (error); 989 } 990 991 /* 992 * Set d's packet filter program to fp. If this file already has a filter, 993 * free it and replace it. Returns EINVAL for bogus requests. 994 */ 995 int 996 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf) 997 { 998 struct bpf_program *bf; 999 struct srp *filter; 1000 struct bpf_insn *fcode; 1001 u_int flen, size; 1002 1003 KERNEL_ASSERT_LOCKED(); 1004 filter = wf ? &d->bd_wfilter : &d->bd_rfilter; 1005 1006 if (fp->bf_insns == 0) { 1007 if (fp->bf_len != 0) 1008 return (EINVAL); 1009 srp_update_locked(&bpf_insn_gc, filter, NULL); 1010 mtx_enter(&d->bd_mtx); 1011 bpf_resetd(d); 1012 mtx_leave(&d->bd_mtx); 1013 return (0); 1014 } 1015 flen = fp->bf_len; 1016 if (flen > BPF_MAXINSNS) 1017 return (EINVAL); 1018 1019 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF, 1020 M_WAITOK | M_CANFAIL); 1021 if (fcode == NULL) 1022 return (ENOMEM); 1023 1024 size = flen * sizeof(*fp->bf_insns); 1025 if (copyin(fp->bf_insns, fcode, size) != 0 || 1026 bpf_validate(fcode, (int)flen) == 0) { 1027 free(fcode, M_DEVBUF, size); 1028 return (EINVAL); 1029 } 1030 1031 bf = malloc(sizeof(*bf), M_DEVBUF, M_WAITOK); 1032 bf->bf_len = flen; 1033 bf->bf_insns = fcode; 1034 1035 srp_update_locked(&bpf_insn_gc, filter, bf); 1036 1037 mtx_enter(&d->bd_mtx); 1038 bpf_resetd(d); 1039 mtx_leave(&d->bd_mtx); 1040 return (0); 1041 } 1042 1043 /* 1044 * Detach a file from its current interface (if attached at all) and attach 1045 * to the interface indicated by the name stored in ifr. 1046 * Return an errno or 0. 1047 */ 1048 int 1049 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1050 { 1051 struct bpf_if *bp, *candidate = NULL; 1052 int error = 0; 1053 1054 /* 1055 * Look through attached interfaces for the named one. 1056 */ 1057 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1058 if (strcmp(bp->bif_name, ifr->ifr_name) != 0) 1059 continue; 1060 1061 if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt) 1062 candidate = bp; 1063 } 1064 1065 /* Not found. */ 1066 if (candidate == NULL) 1067 return (ENXIO); 1068 1069 /* 1070 * Allocate the packet buffers if we need to. 1071 * If we're already attached to requested interface, 1072 * just flush the buffer. 1073 */ 1074 mtx_enter(&d->bd_mtx); 1075 if (d->bd_sbuf == NULL) { 1076 if ((error = bpf_allocbufs(d))) 1077 goto out; 1078 } 1079 if (candidate != d->bd_bif) { 1080 /* 1081 * Detach if attached to something else. 1082 */ 1083 bpf_detachd(d); 1084 bpf_attachd(d, candidate); 1085 } 1086 bpf_resetd(d); 1087 out: 1088 mtx_leave(&d->bd_mtx); 1089 return (error); 1090 } 1091 1092 /* 1093 * Copy the interface name to the ifreq. 1094 */ 1095 void 1096 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr) 1097 { 1098 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name)); 1099 } 1100 1101 /* 1102 * Support for poll() system call 1103 */ 1104 int 1105 bpfpoll(dev_t dev, int events, struct proc *p) 1106 { 1107 struct bpf_d *d; 1108 int revents; 1109 1110 KERNEL_ASSERT_LOCKED(); 1111 1112 /* 1113 * An imitation of the FIONREAD ioctl code. 1114 */ 1115 d = bpfilter_lookup(minor(dev)); 1116 1117 /* 1118 * XXX The USB stack manages it to trigger some race condition 1119 * which causes bpfilter_lookup to return NULL when a USB device 1120 * gets detached while it is up and has an open bpf handler (e.g. 1121 * dhclient). We still should recheck if we can fix the root 1122 * cause of this issue. 1123 */ 1124 if (d == NULL) 1125 return (POLLERR); 1126 1127 /* Always ready to write data */ 1128 revents = events & (POLLOUT | POLLWRNORM); 1129 1130 if (events & (POLLIN | POLLRDNORM)) { 1131 mtx_enter(&d->bd_mtx); 1132 if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0)) 1133 revents |= events & (POLLIN | POLLRDNORM); 1134 else { 1135 /* 1136 * if there's a timeout, mark the time we 1137 * started waiting. 1138 */ 1139 if (d->bd_rtout != -1 && d->bd_rdStart == 0) 1140 d->bd_rdStart = ticks; 1141 selrecord(p, &d->bd_sel); 1142 } 1143 mtx_leave(&d->bd_mtx); 1144 } 1145 return (revents); 1146 } 1147 1148 struct filterops bpfread_filtops = 1149 { 1, NULL, filt_bpfrdetach, filt_bpfread }; 1150 1151 int 1152 bpfkqfilter(dev_t dev, struct knote *kn) 1153 { 1154 struct bpf_d *d; 1155 struct klist *klist; 1156 1157 KERNEL_ASSERT_LOCKED(); 1158 1159 d = bpfilter_lookup(minor(dev)); 1160 1161 switch (kn->kn_filter) { 1162 case EVFILT_READ: 1163 klist = &d->bd_sel.si_note; 1164 kn->kn_fop = &bpfread_filtops; 1165 break; 1166 default: 1167 return (EINVAL); 1168 } 1169 1170 bpf_get(d); 1171 kn->kn_hook = d; 1172 SLIST_INSERT_HEAD(klist, kn, kn_selnext); 1173 1174 mtx_enter(&d->bd_mtx); 1175 if (d->bd_rtout != -1 && d->bd_rdStart == 0) 1176 d->bd_rdStart = ticks; 1177 mtx_leave(&d->bd_mtx); 1178 1179 return (0); 1180 } 1181 1182 void 1183 filt_bpfrdetach(struct knote *kn) 1184 { 1185 struct bpf_d *d = kn->kn_hook; 1186 1187 KERNEL_ASSERT_LOCKED(); 1188 1189 SLIST_REMOVE(&d->bd_sel.si_note, kn, knote, kn_selnext); 1190 bpf_put(d); 1191 } 1192 1193 int 1194 filt_bpfread(struct knote *kn, long hint) 1195 { 1196 struct bpf_d *d = kn->kn_hook; 1197 1198 KERNEL_ASSERT_LOCKED(); 1199 1200 mtx_enter(&d->bd_mtx); 1201 kn->kn_data = d->bd_hlen; 1202 if (d->bd_immediate) 1203 kn->kn_data += d->bd_slen; 1204 mtx_leave(&d->bd_mtx); 1205 1206 return (kn->kn_data > 0); 1207 } 1208 1209 /* 1210 * Copy data from an mbuf chain into a buffer. This code is derived 1211 * from m_copydata in sys/uipc_mbuf.c. 1212 */ 1213 void 1214 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) 1215 { 1216 const struct mbuf *m; 1217 u_int count; 1218 u_char *dst; 1219 1220 m = src_arg; 1221 dst = dst_arg; 1222 while (len > 0) { 1223 if (m == NULL) 1224 panic("bpf_mcopy"); 1225 count = min(m->m_len, len); 1226 bcopy(mtod(m, caddr_t), (caddr_t)dst, count); 1227 m = m->m_next; 1228 dst += count; 1229 len -= count; 1230 } 1231 } 1232 1233 /* 1234 * like bpf_mtap, but copy fn can be given. used by various bpf_mtap* 1235 */ 1236 int 1237 _bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction, 1238 void (*cpfn)(const void *, void *, size_t)) 1239 { 1240 struct bpf_if *bp = (struct bpf_if *)arg; 1241 struct srp_ref sr; 1242 struct bpf_d *d; 1243 size_t pktlen, slen; 1244 const struct mbuf *m0; 1245 struct timeval tv; 1246 int gottime = 0; 1247 int drop = 0; 1248 1249 if (m == NULL) 1250 return (0); 1251 1252 if (cpfn == NULL) 1253 cpfn = bpf_mcopy; 1254 1255 if (bp == NULL) 1256 return (0); 1257 1258 pktlen = 0; 1259 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1260 pktlen += m0->m_len; 1261 1262 SRPL_FOREACH(d, &sr, &bp->bif_dlist, bd_next) { 1263 atomic_inc_long(&d->bd_rcount); 1264 1265 if ((direction & d->bd_dirfilt) != 0) 1266 slen = 0; 1267 else { 1268 struct srp_ref bsr; 1269 struct bpf_program *bf; 1270 struct bpf_insn *fcode = NULL; 1271 1272 bf = srp_enter(&bsr, &d->bd_rfilter); 1273 if (bf != NULL) 1274 fcode = bf->bf_insns; 1275 slen = bpf_mfilter(fcode, m, pktlen); 1276 srp_leave(&bsr); 1277 } 1278 1279 if (slen > 0) { 1280 if (!gottime++) 1281 microtime(&tv); 1282 1283 mtx_enter(&d->bd_mtx); 1284 bpf_catchpacket(d, (u_char *)m, pktlen, slen, cpfn, 1285 &tv); 1286 mtx_leave(&d->bd_mtx); 1287 1288 if (d->bd_fildrop) 1289 drop = 1; 1290 } 1291 } 1292 SRPL_LEAVE(&sr); 1293 1294 return (drop); 1295 } 1296 1297 /* 1298 * Incoming linkage from device drivers, where a data buffer should be 1299 * prepended by an arbitrary header. In this situation we already have a 1300 * way of representing a chain of memory buffers, ie, mbufs, so reuse 1301 * the existing functionality by attaching the buffers to mbufs. 1302 * 1303 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a 1304 * struct m_hdr each for the header and data on the stack. 1305 */ 1306 int 1307 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen, 1308 const void *buf, unsigned int buflen, u_int direction) 1309 { 1310 struct m_hdr mh, md; 1311 struct mbuf *m0 = NULL; 1312 struct mbuf **mp = &m0; 1313 1314 if (hdr != NULL) { 1315 mh.mh_flags = 0; 1316 mh.mh_next = NULL; 1317 mh.mh_len = hdrlen; 1318 mh.mh_data = (void *)hdr; 1319 1320 *mp = (struct mbuf *)&mh; 1321 mp = &mh.mh_next; 1322 } 1323 1324 if (buf != NULL) { 1325 md.mh_flags = 0; 1326 md.mh_next = NULL; 1327 md.mh_len = buflen; 1328 md.mh_data = (void *)buf; 1329 1330 *mp = (struct mbuf *)&md; 1331 } 1332 1333 return _bpf_mtap(arg, m0, direction, bpf_mcopy); 1334 } 1335 1336 /* 1337 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1338 */ 1339 int 1340 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction) 1341 { 1342 return _bpf_mtap(arg, m, direction, NULL); 1343 } 1344 1345 /* 1346 * Incoming linkage from device drivers, where we have a mbuf chain 1347 * but need to prepend some arbitrary header from a linear buffer. 1348 * 1349 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1350 * struct m_hdr on the stack. This is safe as bpf only reads from the 1351 * fields in this header that we initialize, and will not try to free 1352 * it or keep a pointer to it. 1353 */ 1354 int 1355 bpf_mtap_hdr(caddr_t arg, caddr_t data, u_int dlen, const struct mbuf *m, 1356 u_int direction, void (*cpfn)(const void *, void *, size_t)) 1357 { 1358 struct m_hdr mh; 1359 const struct mbuf *m0; 1360 1361 if (dlen > 0) { 1362 mh.mh_flags = 0; 1363 mh.mh_next = (struct mbuf *)m; 1364 mh.mh_len = dlen; 1365 mh.mh_data = data; 1366 m0 = (struct mbuf *)&mh; 1367 } else 1368 m0 = m; 1369 1370 return _bpf_mtap(arg, m0, direction, cpfn); 1371 } 1372 1373 /* 1374 * Incoming linkage from device drivers, where we have a mbuf chain 1375 * but need to prepend the address family. 1376 * 1377 * Con up a minimal dummy header to pacify bpf. We allocate (only) a 1378 * struct m_hdr on the stack. This is safe as bpf only reads from the 1379 * fields in this header that we initialize, and will not try to free 1380 * it or keep a pointer to it. 1381 */ 1382 int 1383 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction) 1384 { 1385 u_int32_t afh; 1386 1387 afh = htonl(af); 1388 1389 return bpf_mtap_hdr(arg, (caddr_t)&afh, sizeof(afh), 1390 m, direction, NULL); 1391 } 1392 1393 /* 1394 * Incoming linkage from device drivers, where we have a mbuf chain 1395 * but need to prepend a VLAN encapsulation header. 1396 * 1397 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1398 * struct m_hdr on the stack. This is safe as bpf only reads from the 1399 * fields in this header that we initialize, and will not try to free 1400 * it or keep a pointer to it. 1401 */ 1402 int 1403 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction) 1404 { 1405 #if NVLAN > 0 1406 struct ether_vlan_header evh; 1407 struct m_hdr mh; 1408 uint8_t prio; 1409 1410 if ((m->m_flags & M_VLANTAG) == 0) 1411 #endif 1412 { 1413 return bpf_mtap(arg, m, direction); 1414 } 1415 1416 #if NVLAN > 0 1417 KASSERT(m->m_len >= ETHER_HDR_LEN); 1418 1419 prio = m->m_pkthdr.pf.prio; 1420 if (prio <= 1) 1421 prio = !prio; 1422 1423 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN); 1424 evh.evl_proto = evh.evl_encap_proto; 1425 evh.evl_encap_proto = htons(ETHERTYPE_VLAN); 1426 evh.evl_tag = htons(m->m_pkthdr.ether_vtag | 1427 (prio << EVL_PRIO_BITS)); 1428 1429 mh.mh_flags = 0; 1430 mh.mh_data = m->m_data + ETHER_HDR_LEN; 1431 mh.mh_len = m->m_len - ETHER_HDR_LEN; 1432 mh.mh_next = m->m_next; 1433 1434 return bpf_mtap_hdr(arg, (caddr_t)&evh, sizeof(evh), 1435 (struct mbuf *)&mh, direction, NULL); 1436 #endif 1437 } 1438 1439 /* 1440 * Move the packet data from interface memory (pkt) into the 1441 * store buffer. Wake up listeners if needed. 1442 * "copy" is the routine called to do the actual data 1443 * transfer. bcopy is passed in to copy contiguous chunks, while 1444 * bpf_mcopy is passed in to copy mbuf chains. In the latter case, 1445 * pkt is really an mbuf. 1446 */ 1447 void 1448 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, 1449 void (*cpfn)(const void *, void *, size_t), struct timeval *tv) 1450 { 1451 struct bpf_hdr *hp; 1452 int totlen, curlen; 1453 int hdrlen, do_wakeup = 0; 1454 1455 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1456 if (d->bd_bif == NULL) 1457 return; 1458 1459 hdrlen = d->bd_bif->bif_hdrlen; 1460 1461 /* 1462 * Figure out how many bytes to move. If the packet is 1463 * greater or equal to the snapshot length, transfer that 1464 * much. Otherwise, transfer the whole packet (unless 1465 * we hit the buffer size limit). 1466 */ 1467 totlen = hdrlen + min(snaplen, pktlen); 1468 if (totlen > d->bd_bufsize) 1469 totlen = d->bd_bufsize; 1470 1471 /* 1472 * Round up the end of the previous packet to the next longword. 1473 */ 1474 curlen = BPF_WORDALIGN(d->bd_slen); 1475 if (curlen + totlen > d->bd_bufsize) { 1476 /* 1477 * This packet will overflow the storage buffer. 1478 * Rotate the buffers if we can, then wakeup any 1479 * pending reads. 1480 */ 1481 if (d->bd_fbuf == NULL) { 1482 /* 1483 * We haven't completed the previous read yet, 1484 * so drop the packet. 1485 */ 1486 ++d->bd_dcount; 1487 return; 1488 } 1489 ROTATE_BUFFERS(d); 1490 do_wakeup = 1; 1491 curlen = 0; 1492 } 1493 1494 /* 1495 * Append the bpf header. 1496 */ 1497 hp = (struct bpf_hdr *)(d->bd_sbuf + curlen); 1498 hp->bh_tstamp.tv_sec = tv->tv_sec; 1499 hp->bh_tstamp.tv_usec = tv->tv_usec; 1500 hp->bh_datalen = pktlen; 1501 hp->bh_hdrlen = hdrlen; 1502 /* 1503 * Copy the packet data into the store buffer and update its length. 1504 */ 1505 (*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen)); 1506 d->bd_slen = curlen + totlen; 1507 1508 if (d->bd_immediate) { 1509 /* 1510 * Immediate mode is set. A packet arrived so any 1511 * reads should be woken up. 1512 */ 1513 do_wakeup = 1; 1514 } 1515 1516 if (d->bd_rdStart && (d->bd_rtout + d->bd_rdStart < ticks)) { 1517 /* 1518 * we could be selecting on the bpf, and we 1519 * may have timeouts set. We got here by getting 1520 * a packet, so wake up the reader. 1521 */ 1522 if (d->bd_fbuf != NULL) { 1523 d->bd_rdStart = 0; 1524 ROTATE_BUFFERS(d); 1525 do_wakeup = 1; 1526 } 1527 } 1528 1529 if (do_wakeup) 1530 bpf_wakeup(d); 1531 } 1532 1533 /* 1534 * Initialize all nonzero fields of a descriptor. 1535 */ 1536 int 1537 bpf_allocbufs(struct bpf_d *d) 1538 { 1539 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1540 1541 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1542 if (d->bd_fbuf == NULL) 1543 return (ENOMEM); 1544 1545 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1546 if (d->bd_sbuf == NULL) { 1547 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); 1548 return (ENOMEM); 1549 } 1550 1551 d->bd_slen = 0; 1552 d->bd_hlen = 0; 1553 1554 return (0); 1555 } 1556 1557 void 1558 bpf_get(struct bpf_d *bd) 1559 { 1560 atomic_inc_int(&bd->bd_ref); 1561 } 1562 1563 /* 1564 * Free buffers currently in use by a descriptor 1565 * when the reference count drops to zero. 1566 */ 1567 void 1568 bpf_put(struct bpf_d *bd) 1569 { 1570 if (atomic_dec_int_nv(&bd->bd_ref) > 0) 1571 return; 1572 1573 free(bd->bd_sbuf, M_DEVBUF, 0); 1574 free(bd->bd_hbuf, M_DEVBUF, 0); 1575 free(bd->bd_fbuf, M_DEVBUF, 0); 1576 KERNEL_ASSERT_LOCKED(); 1577 srp_update_locked(&bpf_insn_gc, &bd->bd_rfilter, NULL); 1578 srp_update_locked(&bpf_insn_gc, &bd->bd_wfilter, NULL); 1579 1580 free(bd, M_DEVBUF, sizeof(*bd)); 1581 } 1582 1583 void * 1584 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen) 1585 { 1586 struct bpf_if *bp; 1587 1588 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL) 1589 panic("bpfattach"); 1590 SRPL_INIT(&bp->bif_dlist); 1591 bp->bif_driverp = (struct bpf_if **)bpfp; 1592 bp->bif_name = name; 1593 bp->bif_ifp = NULL; 1594 bp->bif_dlt = dlt; 1595 1596 bp->bif_next = bpf_iflist; 1597 bpf_iflist = bp; 1598 1599 *bp->bif_driverp = NULL; 1600 1601 /* 1602 * Compute the length of the bpf header. This is not necessarily 1603 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1604 * that the network layer header begins on a longword boundary (for 1605 * performance reasons and to alleviate alignment restrictions). 1606 */ 1607 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1608 1609 return (bp); 1610 } 1611 1612 void 1613 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen) 1614 { 1615 struct bpf_if *bp; 1616 1617 bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen); 1618 bp->bif_ifp = ifp; 1619 } 1620 1621 /* Detach an interface from its attached bpf device. */ 1622 void 1623 bpfdetach(struct ifnet *ifp) 1624 { 1625 struct bpf_if *bp, *nbp, **pbp = &bpf_iflist; 1626 1627 KERNEL_ASSERT_LOCKED(); 1628 1629 for (bp = bpf_iflist; bp; bp = nbp) { 1630 nbp = bp->bif_next; 1631 if (bp->bif_ifp == ifp) { 1632 *pbp = nbp; 1633 1634 bpfsdetach(bp); 1635 } else 1636 pbp = &bp->bif_next; 1637 } 1638 ifp->if_bpf = NULL; 1639 } 1640 1641 void 1642 bpfsdetach(void *p) 1643 { 1644 struct bpf_if *bp = p; 1645 struct bpf_d *bd; 1646 int maj; 1647 1648 /* Locate the major number. */ 1649 for (maj = 0; maj < nchrdev; maj++) 1650 if (cdevsw[maj].d_open == bpfopen) 1651 break; 1652 1653 while ((bd = SRPL_FIRST_LOCKED(&bp->bif_dlist))) 1654 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR); 1655 1656 free(bp, M_DEVBUF, sizeof *bp); 1657 } 1658 1659 int 1660 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp, 1661 void *newp, size_t newlen) 1662 { 1663 int newval; 1664 int error; 1665 1666 switch (name[0]) { 1667 case NET_BPF_BUFSIZE: 1668 newval = bpf_bufsize; 1669 error = sysctl_int(oldp, oldlenp, newp, newlen, &newval); 1670 if (error) 1671 return (error); 1672 if (newval < BPF_MINBUFSIZE || newval > bpf_maxbufsize) 1673 return (EINVAL); 1674 bpf_bufsize = newval; 1675 break; 1676 case NET_BPF_MAXBUFSIZE: 1677 newval = bpf_maxbufsize; 1678 error = sysctl_int(oldp, oldlenp, newp, newlen, &newval); 1679 if (error) 1680 return (error); 1681 if (newval < BPF_MINBUFSIZE) 1682 return (EINVAL); 1683 bpf_maxbufsize = newval; 1684 break; 1685 default: 1686 return (EOPNOTSUPP); 1687 } 1688 return (0); 1689 } 1690 1691 int 1692 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 1693 size_t newlen) 1694 { 1695 int flags = RW_INTR; 1696 int error; 1697 1698 if (namelen != 1) 1699 return (ENOTDIR); 1700 1701 flags |= (newp == NULL) ? RW_READ : RW_WRITE; 1702 1703 error = rw_enter(&bpf_sysctl_lk, flags); 1704 if (error != 0) 1705 return (error); 1706 1707 error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen); 1708 1709 rw_exit(&bpf_sysctl_lk); 1710 1711 return (error); 1712 } 1713 1714 struct bpf_d * 1715 bpfilter_lookup(int unit) 1716 { 1717 struct bpf_d *bd; 1718 1719 KERNEL_ASSERT_LOCKED(); 1720 1721 LIST_FOREACH(bd, &bpf_d_list, bd_list) 1722 if (bd->bd_unit == unit) 1723 return (bd); 1724 return (NULL); 1725 } 1726 1727 /* 1728 * Get a list of available data link type of the interface. 1729 */ 1730 int 1731 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1732 { 1733 int n, error; 1734 struct bpf_if *bp; 1735 const char *name; 1736 1737 name = d->bd_bif->bif_name; 1738 n = 0; 1739 error = 0; 1740 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1741 if (strcmp(name, bp->bif_name) != 0) 1742 continue; 1743 if (bfl->bfl_list != NULL) { 1744 if (n >= bfl->bfl_len) 1745 return (ENOMEM); 1746 error = copyout(&bp->bif_dlt, 1747 bfl->bfl_list + n, sizeof(u_int)); 1748 if (error) 1749 break; 1750 } 1751 n++; 1752 } 1753 1754 bfl->bfl_len = n; 1755 return (error); 1756 } 1757 1758 /* 1759 * Set the data link type of a BPF instance. 1760 */ 1761 int 1762 bpf_setdlt(struct bpf_d *d, u_int dlt) 1763 { 1764 const char *name; 1765 struct bpf_if *bp; 1766 1767 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1768 if (d->bd_bif->bif_dlt == dlt) 1769 return (0); 1770 name = d->bd_bif->bif_name; 1771 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1772 if (strcmp(name, bp->bif_name) != 0) 1773 continue; 1774 if (bp->bif_dlt == dlt) 1775 break; 1776 } 1777 if (bp == NULL) 1778 return (EINVAL); 1779 bpf_detachd(d); 1780 bpf_attachd(d, bp); 1781 bpf_resetd(d); 1782 return (0); 1783 } 1784 1785 void 1786 bpf_d_ref(void *null, void *d) 1787 { 1788 bpf_get(d); 1789 } 1790 1791 void 1792 bpf_d_unref(void *null, void *d) 1793 { 1794 bpf_put(d); 1795 } 1796 1797 void 1798 bpf_insn_dtor(void *null, void *f) 1799 { 1800 struct bpf_program *bf = f; 1801 struct bpf_insn *insns = bf->bf_insns; 1802 1803 free(insns, M_DEVBUF, bf->bf_len * sizeof(*insns)); 1804 free(bf, M_DEVBUF, sizeof(*bf)); 1805 } 1806 1807 u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *); 1808 u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *); 1809 u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *); 1810 1811 int bpf_mbuf_copy(const struct mbuf *, u_int32_t, 1812 void *, u_int32_t); 1813 1814 const struct bpf_ops bpf_mbuf_ops = { 1815 bpf_mbuf_ldw, 1816 bpf_mbuf_ldh, 1817 bpf_mbuf_ldb, 1818 }; 1819 1820 int 1821 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len) 1822 { 1823 u_int8_t *cp = buf; 1824 u_int32_t count; 1825 1826 while (off >= m->m_len) { 1827 off -= m->m_len; 1828 1829 m = m->m_next; 1830 if (m == NULL) 1831 return (-1); 1832 } 1833 1834 for (;;) { 1835 count = min(m->m_len - off, len); 1836 1837 memcpy(cp, m->m_data + off, count); 1838 len -= count; 1839 1840 if (len == 0) 1841 return (0); 1842 1843 m = m->m_next; 1844 if (m == NULL) 1845 break; 1846 1847 cp += count; 1848 off = 0; 1849 } 1850 1851 return (-1); 1852 } 1853 1854 u_int32_t 1855 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err) 1856 { 1857 u_int32_t v; 1858 1859 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1860 *err = 1; 1861 return (0); 1862 } 1863 1864 *err = 0; 1865 return ntohl(v); 1866 } 1867 1868 u_int32_t 1869 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err) 1870 { 1871 u_int16_t v; 1872 1873 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1874 *err = 1; 1875 return (0); 1876 } 1877 1878 *err = 0; 1879 return ntohs(v); 1880 } 1881 1882 u_int32_t 1883 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err) 1884 { 1885 const struct mbuf *m = m0; 1886 u_int8_t v; 1887 1888 while (k >= m->m_len) { 1889 k -= m->m_len; 1890 1891 m = m->m_next; 1892 if (m == NULL) { 1893 *err = 1; 1894 return (0); 1895 } 1896 } 1897 v = m->m_data[k]; 1898 1899 *err = 0; 1900 return v; 1901 } 1902 1903 u_int 1904 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen) 1905 { 1906 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen); 1907 } 1908