1 /* $OpenBSD: bpf.c,v 1.192 2020/06/18 23:32:00 dlg Exp $ */ 2 /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org> 8 * 9 * This code is derived from the Stanford/CMU enet packet filter, 10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 12 * Berkeley Laboratory. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94 39 */ 40 41 #include "bpfilter.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/ioctl.h> 49 #include <sys/conf.h> 50 #include <sys/vnode.h> 51 #include <sys/fcntl.h> 52 #include <sys/socket.h> 53 #include <sys/poll.h> 54 #include <sys/kernel.h> 55 #include <sys/sysctl.h> 56 #include <sys/rwlock.h> 57 #include <sys/atomic.h> 58 #include <sys/smr.h> 59 #include <sys/specdev.h> 60 #include <sys/selinfo.h> 61 #include <sys/sigio.h> 62 #include <sys/task.h> 63 64 #include <net/if.h> 65 #include <net/bpf.h> 66 #include <net/bpfdesc.h> 67 68 #include <netinet/in.h> 69 #include <netinet/if_ether.h> 70 71 #include "vlan.h" 72 #if NVLAN > 0 73 #include <net/if_vlan_var.h> 74 #endif 75 76 #define BPF_BUFSIZE 32768 77 78 #define PRINET 26 /* interruptible */ 79 80 /* from kern/kern_clock.c; incremented each clock tick. */ 81 extern int ticks; 82 83 /* 84 * The default read buffer size is patchable. 85 */ 86 int bpf_bufsize = BPF_BUFSIZE; 87 int bpf_maxbufsize = BPF_MAXBUFSIZE; 88 89 /* 90 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 91 * bpf_d_list is the list of descriptors 92 */ 93 struct bpf_if *bpf_iflist; 94 LIST_HEAD(, bpf_d) bpf_d_list; 95 96 int bpf_allocbufs(struct bpf_d *); 97 void bpf_ifname(struct bpf_if*, struct ifreq *); 98 void bpf_mcopy(const void *, void *, size_t); 99 int bpf_movein(struct uio *, struct bpf_d *, struct mbuf **, 100 struct sockaddr *); 101 int bpf_setif(struct bpf_d *, struct ifreq *); 102 int bpfpoll(dev_t, int, struct proc *); 103 int bpfkqfilter(dev_t, struct knote *); 104 void bpf_wakeup(struct bpf_d *); 105 void bpf_wakeup_cb(void *); 106 int _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int); 107 void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, 108 const struct bpf_hdr *); 109 int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 110 int bpf_setdlt(struct bpf_d *, u_int); 111 112 void filt_bpfrdetach(struct knote *); 113 int filt_bpfread(struct knote *, long); 114 115 int bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t); 116 117 struct bpf_d *bpfilter_lookup(int); 118 119 /* 120 * Called holding ``bd_mtx''. 121 */ 122 void bpf_attachd(struct bpf_d *, struct bpf_if *); 123 void bpf_detachd(struct bpf_d *); 124 void bpf_resetd(struct bpf_d *); 125 126 void bpf_prog_smr(void *); 127 void bpf_d_smr(void *); 128 129 /* 130 * Reference count access to descriptor buffers 131 */ 132 void bpf_get(struct bpf_d *); 133 void bpf_put(struct bpf_d *); 134 135 136 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz"); 137 138 int 139 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp, 140 struct sockaddr *sockp) 141 { 142 struct bpf_program_smr *bps; 143 struct bpf_insn *fcode = NULL; 144 struct mbuf *m; 145 struct m_tag *mtag; 146 int error; 147 u_int hlen; 148 u_int len; 149 u_int linktype; 150 u_int slen; 151 152 /* 153 * Build a sockaddr based on the data link layer type. 154 * We do this at this level because the ethernet header 155 * is copied directly into the data field of the sockaddr. 156 * In the case of SLIP, there is no header and the packet 157 * is forwarded as is. 158 * Also, we are careful to leave room at the front of the mbuf 159 * for the link level header. 160 */ 161 linktype = d->bd_bif->bif_dlt; 162 switch (linktype) { 163 164 case DLT_SLIP: 165 sockp->sa_family = AF_INET; 166 hlen = 0; 167 break; 168 169 case DLT_PPP: 170 sockp->sa_family = AF_UNSPEC; 171 hlen = 0; 172 break; 173 174 case DLT_EN10MB: 175 sockp->sa_family = AF_UNSPEC; 176 /* XXX Would MAXLINKHDR be better? */ 177 hlen = ETHER_HDR_LEN; 178 break; 179 180 case DLT_IEEE802_11: 181 case DLT_IEEE802_11_RADIO: 182 sockp->sa_family = AF_UNSPEC; 183 hlen = 0; 184 break; 185 186 case DLT_RAW: 187 case DLT_NULL: 188 sockp->sa_family = AF_UNSPEC; 189 hlen = 0; 190 break; 191 192 case DLT_LOOP: 193 sockp->sa_family = AF_UNSPEC; 194 hlen = sizeof(u_int32_t); 195 break; 196 197 default: 198 return (EIO); 199 } 200 201 if (uio->uio_resid > MAXMCLBYTES) 202 return (EIO); 203 len = uio->uio_resid; 204 205 MGETHDR(m, M_WAIT, MT_DATA); 206 m->m_pkthdr.ph_ifidx = 0; 207 m->m_pkthdr.len = len - hlen; 208 209 if (len > MHLEN) { 210 MCLGETI(m, M_WAIT, NULL, len); 211 if ((m->m_flags & M_EXT) == 0) { 212 error = ENOBUFS; 213 goto bad; 214 } 215 } 216 m->m_len = len; 217 *mp = m; 218 219 error = uiomove(mtod(m, caddr_t), len, uio); 220 if (error) 221 goto bad; 222 223 smr_read_enter(); 224 bps = SMR_PTR_GET(&d->bd_wfilter); 225 if (bps != NULL) 226 fcode = bps->bps_bf.bf_insns; 227 slen = bpf_filter(fcode, mtod(m, u_char *), len, len); 228 smr_read_leave(); 229 230 if (slen < len) { 231 error = EPERM; 232 goto bad; 233 } 234 235 if (m->m_len < hlen) { 236 error = EPERM; 237 goto bad; 238 } 239 /* 240 * Make room for link header, and copy it to sockaddr 241 */ 242 if (hlen != 0) { 243 if (linktype == DLT_LOOP) { 244 u_int32_t af; 245 246 /* the link header indicates the address family */ 247 KASSERT(hlen == sizeof(u_int32_t)); 248 memcpy(&af, m->m_data, hlen); 249 sockp->sa_family = ntohl(af); 250 } else 251 memcpy(sockp->sa_data, m->m_data, hlen); 252 m->m_len -= hlen; 253 m->m_data += hlen; /* XXX */ 254 } 255 256 /* 257 * Prepend the data link type as a mbuf tag 258 */ 259 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT); 260 *(u_int *)(mtag + 1) = linktype; 261 m_tag_prepend(m, mtag); 262 263 return (0); 264 bad: 265 m_freem(m); 266 return (error); 267 } 268 269 /* 270 * Attach file to the bpf interface, i.e. make d listen on bp. 271 */ 272 void 273 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 274 { 275 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 276 277 /* 278 * Point d at bp, and add d to the interface's list of listeners. 279 * Finally, point the driver's bpf cookie at the interface so 280 * it will divert packets to bpf. 281 */ 282 283 d->bd_bif = bp; 284 285 KERNEL_ASSERT_LOCKED(); 286 SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next); 287 288 *bp->bif_driverp = bp; 289 } 290 291 /* 292 * Detach a file from its interface. 293 */ 294 void 295 bpf_detachd(struct bpf_d *d) 296 { 297 struct bpf_if *bp; 298 299 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 300 301 bp = d->bd_bif; 302 /* Not attached. */ 303 if (bp == NULL) 304 return; 305 306 /* Remove ``d'' from the interface's descriptor list. */ 307 KERNEL_ASSERT_LOCKED(); 308 SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next); 309 310 if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) { 311 /* 312 * Let the driver know that there are no more listeners. 313 */ 314 *bp->bif_driverp = NULL; 315 } 316 317 d->bd_bif = NULL; 318 319 /* 320 * Check if this descriptor had requested promiscuous mode. 321 * If so, turn it off. 322 */ 323 if (d->bd_promisc) { 324 int error; 325 326 KASSERT(bp->bif_ifp != NULL); 327 328 d->bd_promisc = 0; 329 330 bpf_get(d); 331 mtx_leave(&d->bd_mtx); 332 NET_LOCK(); 333 error = ifpromisc(bp->bif_ifp, 0); 334 NET_UNLOCK(); 335 mtx_enter(&d->bd_mtx); 336 bpf_put(d); 337 338 if (error && !(error == EINVAL || error == ENODEV || 339 error == ENXIO)) 340 /* 341 * Something is really wrong if we were able to put 342 * the driver into promiscuous mode, but can't 343 * take it out. 344 */ 345 panic("bpf: ifpromisc failed"); 346 } 347 } 348 349 void 350 bpfilterattach(int n) 351 { 352 LIST_INIT(&bpf_d_list); 353 } 354 355 /* 356 * Open ethernet device. Returns ENXIO for illegal minor device number, 357 * EBUSY if file is open by another process. 358 */ 359 int 360 bpfopen(dev_t dev, int flag, int mode, struct proc *p) 361 { 362 struct bpf_d *bd; 363 int unit = minor(dev); 364 365 if (unit & ((1 << CLONE_SHIFT) - 1)) 366 return (ENXIO); 367 368 KASSERT(bpfilter_lookup(unit) == NULL); 369 370 /* create on demand */ 371 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 372 return (EBUSY); 373 374 /* Mark "free" and do most initialization. */ 375 bd->bd_unit = unit; 376 bd->bd_bufsize = bpf_bufsize; 377 bd->bd_sig = SIGIO; 378 mtx_init(&bd->bd_mtx, IPL_NET); 379 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd); 380 smr_init(&bd->bd_smr); 381 sigio_init(&bd->bd_sigio); 382 383 bd->bd_rtout = 0; /* no timeout by default */ 384 bd->bd_rnonblock = ISSET(flag, FNONBLOCK); 385 386 bpf_get(bd); 387 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list); 388 389 return (0); 390 } 391 392 /* 393 * Close the descriptor by detaching it from its interface, 394 * deallocating its buffers, and marking it free. 395 */ 396 int 397 bpfclose(dev_t dev, int flag, int mode, struct proc *p) 398 { 399 struct bpf_d *d; 400 401 d = bpfilter_lookup(minor(dev)); 402 mtx_enter(&d->bd_mtx); 403 bpf_detachd(d); 404 bpf_wakeup(d); 405 LIST_REMOVE(d, bd_list); 406 mtx_leave(&d->bd_mtx); 407 bpf_put(d); 408 409 return (0); 410 } 411 412 /* 413 * Rotate the packet buffers in descriptor d. Move the store buffer 414 * into the hold slot, and the free buffer into the store slot. 415 * Zero the length of the new store buffer. 416 */ 417 #define ROTATE_BUFFERS(d) \ 418 KASSERT(d->bd_in_uiomove == 0); \ 419 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \ 420 (d)->bd_hbuf = (d)->bd_sbuf; \ 421 (d)->bd_hlen = (d)->bd_slen; \ 422 (d)->bd_sbuf = (d)->bd_fbuf; \ 423 (d)->bd_slen = 0; \ 424 (d)->bd_fbuf = NULL; 425 /* 426 * bpfread - read next chunk of packets from buffers 427 */ 428 int 429 bpfread(dev_t dev, struct uio *uio, int ioflag) 430 { 431 struct bpf_d *d; 432 caddr_t hbuf; 433 int hlen, error; 434 435 KERNEL_ASSERT_LOCKED(); 436 437 d = bpfilter_lookup(minor(dev)); 438 if (d->bd_bif == NULL) 439 return (ENXIO); 440 441 bpf_get(d); 442 mtx_enter(&d->bd_mtx); 443 444 /* 445 * Restrict application to use a buffer the same size as 446 * as kernel buffers. 447 */ 448 if (uio->uio_resid != d->bd_bufsize) { 449 error = EINVAL; 450 goto out; 451 } 452 453 /* 454 * If there's a timeout, bd_rdStart is tagged when we start the read. 455 * we can then figure out when we're done reading. 456 */ 457 if (d->bd_rnonblock == 0 && d->bd_rdStart == 0) 458 d->bd_rdStart = ticks; 459 else 460 d->bd_rdStart = 0; 461 462 /* 463 * If the hold buffer is empty, then do a timed sleep, which 464 * ends when the timeout expires or when enough packets 465 * have arrived to fill the store buffer. 466 */ 467 while (d->bd_hbuf == NULL) { 468 if (d->bd_bif == NULL) { 469 /* interface is gone */ 470 if (d->bd_slen == 0) { 471 error = EIO; 472 goto out; 473 } 474 ROTATE_BUFFERS(d); 475 break; 476 } 477 if (d->bd_immediate && d->bd_slen != 0) { 478 /* 479 * A packet(s) either arrived since the previous 480 * read or arrived while we were asleep. 481 * Rotate the buffers and return what's here. 482 */ 483 ROTATE_BUFFERS(d); 484 break; 485 } 486 if (d->bd_rnonblock) { 487 /* User requested non-blocking I/O */ 488 error = EWOULDBLOCK; 489 } else { 490 if (d->bd_rdStart <= ULONG_MAX - d->bd_rtout && 491 d->bd_rdStart + d->bd_rtout < ticks) { 492 error = msleep(d, &d->bd_mtx, PRINET|PCATCH, 493 "bpf", d->bd_rtout); 494 } else 495 error = EWOULDBLOCK; 496 } 497 if (error == EINTR || error == ERESTART) 498 goto out; 499 if (error == EWOULDBLOCK) { 500 /* 501 * On a timeout, return what's in the buffer, 502 * which may be nothing. If there is something 503 * in the store buffer, we can rotate the buffers. 504 */ 505 if (d->bd_hbuf != NULL) 506 /* 507 * We filled up the buffer in between 508 * getting the timeout and arriving 509 * here, so we don't need to rotate. 510 */ 511 break; 512 513 if (d->bd_slen == 0) { 514 error = 0; 515 goto out; 516 } 517 ROTATE_BUFFERS(d); 518 break; 519 } 520 } 521 /* 522 * At this point, we know we have something in the hold slot. 523 */ 524 hbuf = d->bd_hbuf; 525 hlen = d->bd_hlen; 526 d->bd_hbuf = NULL; 527 d->bd_hlen = 0; 528 d->bd_fbuf = NULL; 529 d->bd_in_uiomove = 1; 530 531 /* 532 * Move data from hold buffer into user space. 533 * We know the entire buffer is transferred since 534 * we checked above that the read buffer is bpf_bufsize bytes. 535 */ 536 mtx_leave(&d->bd_mtx); 537 error = uiomove(hbuf, hlen, uio); 538 mtx_enter(&d->bd_mtx); 539 540 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */ 541 KASSERT(d->bd_fbuf == NULL); 542 KASSERT(d->bd_hbuf == NULL); 543 d->bd_fbuf = hbuf; 544 d->bd_in_uiomove = 0; 545 out: 546 mtx_leave(&d->bd_mtx); 547 bpf_put(d); 548 549 return (error); 550 } 551 552 553 /* 554 * If there are processes sleeping on this descriptor, wake them up. 555 */ 556 void 557 bpf_wakeup(struct bpf_d *d) 558 { 559 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 560 561 /* 562 * As long as pgsigio() and selwakeup() need to be protected 563 * by the KERNEL_LOCK() we have to delay the wakeup to 564 * another context to keep the hot path KERNEL_LOCK()-free. 565 */ 566 bpf_get(d); 567 if (!task_add(systq, &d->bd_wake_task)) 568 bpf_put(d); 569 } 570 571 void 572 bpf_wakeup_cb(void *xd) 573 { 574 struct bpf_d *d = xd; 575 576 wakeup(d); 577 if (d->bd_async && d->bd_sig) 578 pgsigio(&d->bd_sigio, d->bd_sig, 0); 579 580 selwakeup(&d->bd_sel); 581 bpf_put(d); 582 } 583 584 int 585 bpfwrite(dev_t dev, struct uio *uio, int ioflag) 586 { 587 struct bpf_d *d; 588 struct ifnet *ifp; 589 struct mbuf *m; 590 int error; 591 struct sockaddr_storage dst; 592 593 KERNEL_ASSERT_LOCKED(); 594 595 d = bpfilter_lookup(minor(dev)); 596 if (d->bd_bif == NULL) 597 return (ENXIO); 598 599 bpf_get(d); 600 ifp = d->bd_bif->bif_ifp; 601 602 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) { 603 error = ENETDOWN; 604 goto out; 605 } 606 607 if (uio->uio_resid == 0) { 608 error = 0; 609 goto out; 610 } 611 612 error = bpf_movein(uio, d, &m, sstosa(&dst)); 613 if (error) 614 goto out; 615 616 if (m->m_pkthdr.len > ifp->if_mtu) { 617 m_freem(m); 618 error = EMSGSIZE; 619 goto out; 620 } 621 622 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 623 m->m_pkthdr.pf.prio = ifp->if_llprio; 624 625 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC) 626 dst.ss_family = pseudo_AF_HDRCMPLT; 627 628 NET_LOCK(); 629 error = ifp->if_output(ifp, m, sstosa(&dst), NULL); 630 NET_UNLOCK(); 631 632 out: 633 bpf_put(d); 634 return (error); 635 } 636 637 /* 638 * Reset a descriptor by flushing its packet buffer and clearing the 639 * receive and drop counts. 640 */ 641 void 642 bpf_resetd(struct bpf_d *d) 643 { 644 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 645 KASSERT(d->bd_in_uiomove == 0); 646 647 if (d->bd_hbuf != NULL) { 648 /* Free the hold buffer. */ 649 d->bd_fbuf = d->bd_hbuf; 650 d->bd_hbuf = NULL; 651 } 652 d->bd_slen = 0; 653 d->bd_hlen = 0; 654 d->bd_rcount = 0; 655 d->bd_dcount = 0; 656 } 657 658 /* 659 * FIONREAD Check for read packet available. 660 * BIOCGBLEN Get buffer len [for read()]. 661 * BIOCSETF Set ethernet read filter. 662 * BIOCFLUSH Flush read packet buffer. 663 * BIOCPROMISC Put interface into promiscuous mode. 664 * BIOCGDLTLIST Get supported link layer types. 665 * BIOCGDLT Get link layer type. 666 * BIOCSDLT Set link layer type. 667 * BIOCGETIF Get interface name. 668 * BIOCSETIF Set interface. 669 * BIOCSRTIMEOUT Set read timeout. 670 * BIOCGRTIMEOUT Get read timeout. 671 * BIOCGSTATS Get packet stats. 672 * BIOCIMMEDIATE Set immediate mode. 673 * BIOCVERSION Get filter language version. 674 * BIOCGHDRCMPLT Get "header already complete" flag 675 * BIOCSHDRCMPLT Set "header already complete" flag 676 */ 677 int 678 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 679 { 680 struct bpf_d *d; 681 int error = 0; 682 683 d = bpfilter_lookup(minor(dev)); 684 if (d->bd_locked && suser(p) != 0) { 685 /* list of allowed ioctls when locked and not root */ 686 switch (cmd) { 687 case BIOCGBLEN: 688 case BIOCFLUSH: 689 case BIOCGDLT: 690 case BIOCGDLTLIST: 691 case BIOCGETIF: 692 case BIOCGRTIMEOUT: 693 case BIOCGSTATS: 694 case BIOCVERSION: 695 case BIOCGRSIG: 696 case BIOCGHDRCMPLT: 697 case FIONREAD: 698 case BIOCLOCK: 699 case BIOCSRTIMEOUT: 700 case BIOCIMMEDIATE: 701 case TIOCGPGRP: 702 case BIOCGDIRFILT: 703 break; 704 default: 705 return (EPERM); 706 } 707 } 708 709 bpf_get(d); 710 711 switch (cmd) { 712 default: 713 error = EINVAL; 714 break; 715 716 /* 717 * Check for read packet available. 718 */ 719 case FIONREAD: 720 { 721 int n; 722 723 mtx_enter(&d->bd_mtx); 724 n = d->bd_slen; 725 if (d->bd_hbuf != NULL) 726 n += d->bd_hlen; 727 mtx_leave(&d->bd_mtx); 728 729 *(int *)addr = n; 730 break; 731 } 732 733 /* 734 * Get buffer len [for read()]. 735 */ 736 case BIOCGBLEN: 737 *(u_int *)addr = d->bd_bufsize; 738 break; 739 740 /* 741 * Set buffer length. 742 */ 743 case BIOCSBLEN: 744 if (d->bd_bif != NULL) 745 error = EINVAL; 746 else { 747 u_int size = *(u_int *)addr; 748 749 if (size > bpf_maxbufsize) 750 *(u_int *)addr = size = bpf_maxbufsize; 751 else if (size < BPF_MINBUFSIZE) 752 *(u_int *)addr = size = BPF_MINBUFSIZE; 753 mtx_enter(&d->bd_mtx); 754 d->bd_bufsize = size; 755 mtx_leave(&d->bd_mtx); 756 } 757 break; 758 759 /* 760 * Set link layer read filter. 761 */ 762 case BIOCSETF: 763 error = bpf_setf(d, (struct bpf_program *)addr, 0); 764 break; 765 766 /* 767 * Set link layer write filter. 768 */ 769 case BIOCSETWF: 770 error = bpf_setf(d, (struct bpf_program *)addr, 1); 771 break; 772 773 /* 774 * Flush read packet buffer. 775 */ 776 case BIOCFLUSH: 777 mtx_enter(&d->bd_mtx); 778 bpf_resetd(d); 779 mtx_leave(&d->bd_mtx); 780 break; 781 782 /* 783 * Put interface into promiscuous mode. 784 */ 785 case BIOCPROMISC: 786 if (d->bd_bif == NULL) { 787 /* 788 * No interface attached yet. 789 */ 790 error = EINVAL; 791 } else if (d->bd_bif->bif_ifp != NULL) { 792 if (d->bd_promisc == 0) { 793 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx); 794 NET_LOCK(); 795 error = ifpromisc(d->bd_bif->bif_ifp, 1); 796 NET_UNLOCK(); 797 if (error == 0) 798 d->bd_promisc = 1; 799 } 800 } 801 break; 802 803 /* 804 * Get a list of supported device parameters. 805 */ 806 case BIOCGDLTLIST: 807 if (d->bd_bif == NULL) 808 error = EINVAL; 809 else 810 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 811 break; 812 813 /* 814 * Get device parameters. 815 */ 816 case BIOCGDLT: 817 if (d->bd_bif == NULL) 818 error = EINVAL; 819 else 820 *(u_int *)addr = d->bd_bif->bif_dlt; 821 break; 822 823 /* 824 * Set device parameters. 825 */ 826 case BIOCSDLT: 827 if (d->bd_bif == NULL) 828 error = EINVAL; 829 else { 830 mtx_enter(&d->bd_mtx); 831 error = bpf_setdlt(d, *(u_int *)addr); 832 mtx_leave(&d->bd_mtx); 833 } 834 break; 835 836 /* 837 * Set interface name. 838 */ 839 case BIOCGETIF: 840 if (d->bd_bif == NULL) 841 error = EINVAL; 842 else 843 bpf_ifname(d->bd_bif, (struct ifreq *)addr); 844 break; 845 846 /* 847 * Set interface. 848 */ 849 case BIOCSETIF: 850 error = bpf_setif(d, (struct ifreq *)addr); 851 break; 852 853 /* 854 * Set read timeout. 855 */ 856 case BIOCSRTIMEOUT: 857 { 858 struct timeval *tv = (struct timeval *)addr; 859 u_long rtout; 860 861 /* Compute number of ticks. */ 862 if (tv->tv_sec < 0 || !timerisvalid(tv)) { 863 error = EINVAL; 864 break; 865 } 866 if (tv->tv_sec > INT_MAX / hz) { 867 error = EOVERFLOW; 868 break; 869 } 870 rtout = tv->tv_sec * hz; 871 if (tv->tv_usec / tick > INT_MAX - rtout) { 872 error = EOVERFLOW; 873 break; 874 } 875 rtout += tv->tv_usec / tick; 876 d->bd_rtout = rtout; 877 if (d->bd_rtout == 0 && tv->tv_usec != 0) 878 d->bd_rtout = 1; 879 break; 880 } 881 882 /* 883 * Get read timeout. 884 */ 885 case BIOCGRTIMEOUT: 886 { 887 struct timeval *tv = (struct timeval *)addr; 888 889 tv->tv_sec = d->bd_rtout / hz; 890 tv->tv_usec = (d->bd_rtout % hz) * tick; 891 break; 892 } 893 894 /* 895 * Get packet stats. 896 */ 897 case BIOCGSTATS: 898 { 899 struct bpf_stat *bs = (struct bpf_stat *)addr; 900 901 bs->bs_recv = d->bd_rcount; 902 bs->bs_drop = d->bd_dcount; 903 break; 904 } 905 906 /* 907 * Set immediate mode. 908 */ 909 case BIOCIMMEDIATE: 910 d->bd_immediate = *(u_int *)addr; 911 break; 912 913 case BIOCVERSION: 914 { 915 struct bpf_version *bv = (struct bpf_version *)addr; 916 917 bv->bv_major = BPF_MAJOR_VERSION; 918 bv->bv_minor = BPF_MINOR_VERSION; 919 break; 920 } 921 922 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 923 *(u_int *)addr = d->bd_hdrcmplt; 924 break; 925 926 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 927 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 928 break; 929 930 case BIOCLOCK: /* set "locked" flag (no reset) */ 931 d->bd_locked = 1; 932 break; 933 934 case BIOCGFILDROP: /* get "filter-drop" flag */ 935 *(u_int *)addr = d->bd_fildrop; 936 break; 937 938 case BIOCSFILDROP: { /* set "filter-drop" flag */ 939 unsigned int fildrop = *(u_int *)addr; 940 switch (fildrop) { 941 case BPF_FILDROP_PASS: 942 case BPF_FILDROP_CAPTURE: 943 case BPF_FILDROP_DROP: 944 d->bd_fildrop = fildrop; 945 break; 946 default: 947 error = EINVAL; 948 break; 949 } 950 break; 951 } 952 953 case BIOCGDIRFILT: /* get direction filter */ 954 *(u_int *)addr = d->bd_dirfilt; 955 break; 956 957 case BIOCSDIRFILT: /* set direction filter */ 958 d->bd_dirfilt = (*(u_int *)addr) & 959 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT); 960 break; 961 962 case FIONBIO: /* Non-blocking I/O */ 963 if (*(int *)addr) 964 d->bd_rnonblock = 1; 965 else 966 d->bd_rnonblock = 0; 967 break; 968 969 case FIOASYNC: /* Send signal on receive packets */ 970 d->bd_async = *(int *)addr; 971 break; 972 973 case FIOSETOWN: /* Process or group to send signals to */ 974 case TIOCSPGRP: 975 error = sigio_setown(&d->bd_sigio, cmd, addr); 976 break; 977 978 case FIOGETOWN: 979 case TIOCGPGRP: 980 sigio_getown(&d->bd_sigio, cmd, addr); 981 break; 982 983 case BIOCSRSIG: /* Set receive signal */ 984 { 985 u_int sig; 986 987 sig = *(u_int *)addr; 988 989 if (sig >= NSIG) 990 error = EINVAL; 991 else 992 d->bd_sig = sig; 993 break; 994 } 995 case BIOCGRSIG: 996 *(u_int *)addr = d->bd_sig; 997 break; 998 } 999 1000 bpf_put(d); 1001 return (error); 1002 } 1003 1004 /* 1005 * Set d's packet filter program to fp. If this file already has a filter, 1006 * free it and replace it. Returns EINVAL for bogus requests. 1007 */ 1008 int 1009 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf) 1010 { 1011 struct bpf_program_smr *bps, *old_bps; 1012 struct bpf_insn *fcode; 1013 u_int flen, size; 1014 1015 KERNEL_ASSERT_LOCKED(); 1016 1017 if (fp->bf_insns == 0) { 1018 if (fp->bf_len != 0) 1019 return (EINVAL); 1020 bps = NULL; 1021 } else { 1022 flen = fp->bf_len; 1023 if (flen > BPF_MAXINSNS) 1024 return (EINVAL); 1025 1026 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF, 1027 M_WAITOK | M_CANFAIL); 1028 if (fcode == NULL) 1029 return (ENOMEM); 1030 1031 size = flen * sizeof(*fp->bf_insns); 1032 if (copyin(fp->bf_insns, fcode, size) != 0 || 1033 bpf_validate(fcode, (int)flen) == 0) { 1034 free(fcode, M_DEVBUF, size); 1035 return (EINVAL); 1036 } 1037 1038 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK); 1039 smr_init(&bps->bps_smr); 1040 bps->bps_bf.bf_len = flen; 1041 bps->bps_bf.bf_insns = fcode; 1042 } 1043 1044 if (wf == 0) { 1045 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter); 1046 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps); 1047 } else { 1048 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter); 1049 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps); 1050 } 1051 1052 mtx_enter(&d->bd_mtx); 1053 bpf_resetd(d); 1054 mtx_leave(&d->bd_mtx); 1055 if (old_bps != NULL) 1056 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps); 1057 1058 return (0); 1059 } 1060 1061 /* 1062 * Detach a file from its current interface (if attached at all) and attach 1063 * to the interface indicated by the name stored in ifr. 1064 * Return an errno or 0. 1065 */ 1066 int 1067 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1068 { 1069 struct bpf_if *bp, *candidate = NULL; 1070 int error = 0; 1071 1072 /* 1073 * Look through attached interfaces for the named one. 1074 */ 1075 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1076 if (strcmp(bp->bif_name, ifr->ifr_name) != 0) 1077 continue; 1078 1079 if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt) 1080 candidate = bp; 1081 } 1082 1083 /* Not found. */ 1084 if (candidate == NULL) 1085 return (ENXIO); 1086 1087 /* 1088 * Allocate the packet buffers if we need to. 1089 * If we're already attached to requested interface, 1090 * just flush the buffer. 1091 */ 1092 mtx_enter(&d->bd_mtx); 1093 if (d->bd_sbuf == NULL) { 1094 if ((error = bpf_allocbufs(d))) 1095 goto out; 1096 } 1097 if (candidate != d->bd_bif) { 1098 /* 1099 * Detach if attached to something else. 1100 */ 1101 bpf_detachd(d); 1102 bpf_attachd(d, candidate); 1103 } 1104 bpf_resetd(d); 1105 out: 1106 mtx_leave(&d->bd_mtx); 1107 return (error); 1108 } 1109 1110 /* 1111 * Copy the interface name to the ifreq. 1112 */ 1113 void 1114 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr) 1115 { 1116 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name)); 1117 } 1118 1119 /* 1120 * Support for poll() system call 1121 */ 1122 int 1123 bpfpoll(dev_t dev, int events, struct proc *p) 1124 { 1125 struct bpf_d *d; 1126 int revents; 1127 1128 KERNEL_ASSERT_LOCKED(); 1129 1130 /* 1131 * An imitation of the FIONREAD ioctl code. 1132 */ 1133 d = bpfilter_lookup(minor(dev)); 1134 1135 /* 1136 * XXX The USB stack manages it to trigger some race condition 1137 * which causes bpfilter_lookup to return NULL when a USB device 1138 * gets detached while it is up and has an open bpf handler (e.g. 1139 * dhclient). We still should recheck if we can fix the root 1140 * cause of this issue. 1141 */ 1142 if (d == NULL) 1143 return (POLLERR); 1144 1145 /* Always ready to write data */ 1146 revents = events & (POLLOUT | POLLWRNORM); 1147 1148 if (events & (POLLIN | POLLRDNORM)) { 1149 mtx_enter(&d->bd_mtx); 1150 if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0)) 1151 revents |= events & (POLLIN | POLLRDNORM); 1152 else { 1153 /* 1154 * if there's a timeout, mark the time we 1155 * started waiting. 1156 */ 1157 if (d->bd_rnonblock == 0 && d->bd_rdStart == 0) 1158 d->bd_rdStart = ticks; 1159 selrecord(p, &d->bd_sel); 1160 } 1161 mtx_leave(&d->bd_mtx); 1162 } 1163 return (revents); 1164 } 1165 1166 const struct filterops bpfread_filtops = { 1167 .f_flags = FILTEROP_ISFD, 1168 .f_attach = NULL, 1169 .f_detach = filt_bpfrdetach, 1170 .f_event = filt_bpfread, 1171 }; 1172 1173 int 1174 bpfkqfilter(dev_t dev, struct knote *kn) 1175 { 1176 struct bpf_d *d; 1177 struct klist *klist; 1178 1179 KERNEL_ASSERT_LOCKED(); 1180 1181 d = bpfilter_lookup(minor(dev)); 1182 1183 switch (kn->kn_filter) { 1184 case EVFILT_READ: 1185 klist = &d->bd_sel.si_note; 1186 kn->kn_fop = &bpfread_filtops; 1187 break; 1188 default: 1189 return (EINVAL); 1190 } 1191 1192 bpf_get(d); 1193 kn->kn_hook = d; 1194 klist_insert(klist, kn); 1195 1196 mtx_enter(&d->bd_mtx); 1197 if (d->bd_rnonblock == 0 && d->bd_rdStart == 0) 1198 d->bd_rdStart = ticks; 1199 mtx_leave(&d->bd_mtx); 1200 1201 return (0); 1202 } 1203 1204 void 1205 filt_bpfrdetach(struct knote *kn) 1206 { 1207 struct bpf_d *d = kn->kn_hook; 1208 1209 KERNEL_ASSERT_LOCKED(); 1210 1211 klist_remove(&d->bd_sel.si_note, kn); 1212 bpf_put(d); 1213 } 1214 1215 int 1216 filt_bpfread(struct knote *kn, long hint) 1217 { 1218 struct bpf_d *d = kn->kn_hook; 1219 1220 KERNEL_ASSERT_LOCKED(); 1221 1222 mtx_enter(&d->bd_mtx); 1223 kn->kn_data = d->bd_hlen; 1224 if (d->bd_immediate) 1225 kn->kn_data += d->bd_slen; 1226 mtx_leave(&d->bd_mtx); 1227 1228 return (kn->kn_data > 0); 1229 } 1230 1231 /* 1232 * Copy data from an mbuf chain into a buffer. This code is derived 1233 * from m_copydata in sys/uipc_mbuf.c. 1234 */ 1235 void 1236 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) 1237 { 1238 const struct mbuf *m; 1239 u_int count; 1240 u_char *dst; 1241 1242 m = src_arg; 1243 dst = dst_arg; 1244 while (len > 0) { 1245 if (m == NULL) 1246 panic("bpf_mcopy"); 1247 count = min(m->m_len, len); 1248 bcopy(mtod(m, caddr_t), (caddr_t)dst, count); 1249 m = m->m_next; 1250 dst += count; 1251 len -= count; 1252 } 1253 } 1254 1255 int 1256 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction) 1257 { 1258 return _bpf_mtap(arg, m, m, direction); 1259 } 1260 1261 int 1262 _bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m, 1263 u_int direction) 1264 { 1265 struct bpf_if *bp = (struct bpf_if *)arg; 1266 struct bpf_d *d; 1267 size_t pktlen, slen; 1268 const struct mbuf *m0; 1269 struct bpf_hdr tbh; 1270 int gothdr = 0; 1271 int drop = 0; 1272 1273 if (m == NULL) 1274 return (0); 1275 1276 if (bp == NULL) 1277 return (0); 1278 1279 pktlen = 0; 1280 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1281 pktlen += m0->m_len; 1282 1283 smr_read_enter(); 1284 SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1285 struct bpf_program_smr *bps; 1286 struct bpf_insn *fcode = NULL; 1287 1288 atomic_inc_long(&d->bd_rcount); 1289 1290 if (ISSET(d->bd_dirfilt, direction)) 1291 continue; 1292 1293 bps = SMR_PTR_GET(&d->bd_rfilter); 1294 if (bps != NULL) 1295 fcode = bps->bps_bf.bf_insns; 1296 slen = bpf_mfilter(fcode, m, pktlen); 1297 1298 if (slen == 0) 1299 continue; 1300 if (d->bd_fildrop != BPF_FILDROP_PASS) 1301 drop = 1; 1302 if (d->bd_fildrop != BPF_FILDROP_DROP) { 1303 if (!gothdr) { 1304 struct timeval tv; 1305 memset(&tbh, 0, sizeof(tbh)); 1306 1307 if (ISSET(mp->m_flags, M_PKTHDR)) { 1308 tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx; 1309 tbh.bh_flowid = mp->m_pkthdr.ph_flowid; 1310 tbh.bh_flags = mp->m_pkthdr.pf.prio; 1311 if (ISSET(mp->m_pkthdr.csum_flags, 1312 M_FLOWID)) 1313 SET(tbh.bh_flags, BPF_F_FLOWID); 1314 1315 m_microtime(m, &tv); 1316 } else 1317 microtime(&tv); 1318 1319 tbh.bh_tstamp.tv_sec = tv.tv_sec; 1320 tbh.bh_tstamp.tv_usec = tv.tv_usec; 1321 SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT); 1322 1323 gothdr = 1; 1324 } 1325 1326 mtx_enter(&d->bd_mtx); 1327 bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh); 1328 mtx_leave(&d->bd_mtx); 1329 } 1330 } 1331 smr_read_leave(); 1332 1333 return (drop); 1334 } 1335 1336 /* 1337 * Incoming linkage from device drivers, where a data buffer should be 1338 * prepended by an arbitrary header. In this situation we already have a 1339 * way of representing a chain of memory buffers, ie, mbufs, so reuse 1340 * the existing functionality by attaching the buffers to mbufs. 1341 * 1342 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a 1343 * struct m_hdr each for the header and data on the stack. 1344 */ 1345 int 1346 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen, 1347 const void *buf, unsigned int buflen, u_int direction) 1348 { 1349 struct m_hdr mh, md; 1350 struct mbuf *m0 = NULL; 1351 struct mbuf **mp = &m0; 1352 1353 if (hdr != NULL) { 1354 mh.mh_flags = 0; 1355 mh.mh_next = NULL; 1356 mh.mh_len = hdrlen; 1357 mh.mh_data = (void *)hdr; 1358 1359 *mp = (struct mbuf *)&mh; 1360 mp = &mh.mh_next; 1361 } 1362 1363 if (buf != NULL) { 1364 md.mh_flags = 0; 1365 md.mh_next = NULL; 1366 md.mh_len = buflen; 1367 md.mh_data = (void *)buf; 1368 1369 *mp = (struct mbuf *)&md; 1370 } 1371 1372 return bpf_mtap(arg, m0, direction); 1373 } 1374 1375 /* 1376 * Incoming linkage from device drivers, where we have a mbuf chain 1377 * but need to prepend some arbitrary header from a linear buffer. 1378 * 1379 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1380 * struct m_hdr on the stack. This is safe as bpf only reads from the 1381 * fields in this header that we initialize, and will not try to free 1382 * it or keep a pointer to it. 1383 */ 1384 int 1385 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m, 1386 u_int direction) 1387 { 1388 struct m_hdr mh; 1389 const struct mbuf *m0; 1390 1391 if (dlen > 0) { 1392 mh.mh_flags = 0; 1393 mh.mh_next = (struct mbuf *)m; 1394 mh.mh_len = dlen; 1395 mh.mh_data = (void *)data; 1396 m0 = (struct mbuf *)&mh; 1397 } else 1398 m0 = m; 1399 1400 return _bpf_mtap(arg, m, m0, direction); 1401 } 1402 1403 /* 1404 * Incoming linkage from device drivers, where we have a mbuf chain 1405 * but need to prepend the address family. 1406 * 1407 * Con up a minimal dummy header to pacify bpf. We allocate (only) a 1408 * struct m_hdr on the stack. This is safe as bpf only reads from the 1409 * fields in this header that we initialize, and will not try to free 1410 * it or keep a pointer to it. 1411 */ 1412 int 1413 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction) 1414 { 1415 u_int32_t afh; 1416 1417 afh = htonl(af); 1418 1419 return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction); 1420 } 1421 1422 /* 1423 * Incoming linkage from device drivers, where we have a mbuf chain 1424 * but need to prepend a VLAN encapsulation header. 1425 * 1426 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1427 * struct m_hdr on the stack. This is safe as bpf only reads from the 1428 * fields in this header that we initialize, and will not try to free 1429 * it or keep a pointer to it. 1430 */ 1431 int 1432 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction) 1433 { 1434 #if NVLAN > 0 1435 struct ether_vlan_header evh; 1436 struct m_hdr mh; 1437 uint8_t prio; 1438 1439 if ((m->m_flags & M_VLANTAG) == 0) 1440 #endif 1441 { 1442 return bpf_mtap(arg, m, direction); 1443 } 1444 1445 #if NVLAN > 0 1446 KASSERT(m->m_len >= ETHER_HDR_LEN); 1447 1448 prio = m->m_pkthdr.pf.prio; 1449 if (prio <= 1) 1450 prio = !prio; 1451 1452 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN); 1453 evh.evl_proto = evh.evl_encap_proto; 1454 evh.evl_encap_proto = htons(ETHERTYPE_VLAN); 1455 evh.evl_tag = htons(m->m_pkthdr.ether_vtag | 1456 (prio << EVL_PRIO_BITS)); 1457 1458 mh.mh_flags = 0; 1459 mh.mh_data = m->m_data + ETHER_HDR_LEN; 1460 mh.mh_len = m->m_len - ETHER_HDR_LEN; 1461 mh.mh_next = m->m_next; 1462 1463 return bpf_mtap_hdr(arg, &evh, sizeof(evh), 1464 (struct mbuf *)&mh, direction); 1465 #endif 1466 } 1467 1468 /* 1469 * Move the packet data from interface memory (pkt) into the 1470 * store buffer. Wake up listeners if needed. 1471 * "copy" is the routine called to do the actual data 1472 * transfer. bcopy is passed in to copy contiguous chunks, while 1473 * bpf_mcopy is passed in to copy mbuf chains. In the latter case, 1474 * pkt is really an mbuf. 1475 */ 1476 void 1477 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, 1478 const struct bpf_hdr *tbh) 1479 { 1480 struct bpf_hdr *bh; 1481 int totlen, curlen; 1482 int hdrlen, do_wakeup = 0; 1483 1484 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1485 if (d->bd_bif == NULL) 1486 return; 1487 1488 hdrlen = d->bd_bif->bif_hdrlen; 1489 1490 /* 1491 * Figure out how many bytes to move. If the packet is 1492 * greater or equal to the snapshot length, transfer that 1493 * much. Otherwise, transfer the whole packet (unless 1494 * we hit the buffer size limit). 1495 */ 1496 totlen = hdrlen + min(snaplen, pktlen); 1497 if (totlen > d->bd_bufsize) 1498 totlen = d->bd_bufsize; 1499 1500 /* 1501 * Round up the end of the previous packet to the next longword. 1502 */ 1503 curlen = BPF_WORDALIGN(d->bd_slen); 1504 if (curlen + totlen > d->bd_bufsize) { 1505 /* 1506 * This packet will overflow the storage buffer. 1507 * Rotate the buffers if we can, then wakeup any 1508 * pending reads. 1509 */ 1510 if (d->bd_fbuf == NULL) { 1511 /* 1512 * We haven't completed the previous read yet, 1513 * so drop the packet. 1514 */ 1515 ++d->bd_dcount; 1516 return; 1517 } 1518 ROTATE_BUFFERS(d); 1519 do_wakeup = 1; 1520 curlen = 0; 1521 } 1522 1523 /* 1524 * Append the bpf header. 1525 */ 1526 bh = (struct bpf_hdr *)(d->bd_sbuf + curlen); 1527 *bh = *tbh; 1528 bh->bh_datalen = pktlen; 1529 bh->bh_hdrlen = hdrlen; 1530 bh->bh_caplen = totlen - hdrlen; 1531 1532 /* 1533 * Copy the packet data into the store buffer and update its length. 1534 */ 1535 bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen); 1536 d->bd_slen = curlen + totlen; 1537 1538 if (d->bd_immediate) { 1539 /* 1540 * Immediate mode is set. A packet arrived so any 1541 * reads should be woken up. 1542 */ 1543 do_wakeup = 1; 1544 } 1545 1546 if (d->bd_rdStart && d->bd_rdStart <= ULONG_MAX - d->bd_rtout && 1547 d->bd_rdStart + d->bd_rtout < ticks) { 1548 /* 1549 * we could be selecting on the bpf, and we 1550 * may have timeouts set. We got here by getting 1551 * a packet, so wake up the reader. 1552 */ 1553 if (d->bd_fbuf != NULL) { 1554 d->bd_rdStart = 0; 1555 ROTATE_BUFFERS(d); 1556 do_wakeup = 1; 1557 } 1558 } 1559 1560 if (do_wakeup) 1561 bpf_wakeup(d); 1562 } 1563 1564 /* 1565 * Initialize all nonzero fields of a descriptor. 1566 */ 1567 int 1568 bpf_allocbufs(struct bpf_d *d) 1569 { 1570 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1571 1572 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1573 if (d->bd_fbuf == NULL) 1574 return (ENOMEM); 1575 1576 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1577 if (d->bd_sbuf == NULL) { 1578 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); 1579 return (ENOMEM); 1580 } 1581 1582 d->bd_slen = 0; 1583 d->bd_hlen = 0; 1584 1585 return (0); 1586 } 1587 1588 void 1589 bpf_prog_smr(void *bps_arg) 1590 { 1591 struct bpf_program_smr *bps = bps_arg; 1592 1593 free(bps->bps_bf.bf_insns, M_DEVBUF, 1594 bps->bps_bf.bf_len * sizeof(struct bpf_insn)); 1595 free(bps, M_DEVBUF, sizeof(struct bpf_program_smr)); 1596 } 1597 1598 void 1599 bpf_d_smr(void *smr) 1600 { 1601 struct bpf_d *bd = smr; 1602 1603 sigio_free(&bd->bd_sigio); 1604 free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize); 1605 free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize); 1606 free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize); 1607 1608 if (bd->bd_rfilter != NULL) 1609 bpf_prog_smr(bd->bd_rfilter); 1610 if (bd->bd_wfilter != NULL) 1611 bpf_prog_smr(bd->bd_wfilter); 1612 1613 free(bd, M_DEVBUF, sizeof(*bd)); 1614 } 1615 1616 void 1617 bpf_get(struct bpf_d *bd) 1618 { 1619 atomic_inc_int(&bd->bd_ref); 1620 } 1621 1622 /* 1623 * Free buffers currently in use by a descriptor 1624 * when the reference count drops to zero. 1625 */ 1626 void 1627 bpf_put(struct bpf_d *bd) 1628 { 1629 if (atomic_dec_int_nv(&bd->bd_ref) > 0) 1630 return; 1631 1632 smr_call(&bd->bd_smr, bpf_d_smr, bd); 1633 } 1634 1635 void * 1636 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen) 1637 { 1638 struct bpf_if *bp; 1639 1640 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL) 1641 panic("bpfattach"); 1642 SMR_SLIST_INIT(&bp->bif_dlist); 1643 bp->bif_driverp = (struct bpf_if **)bpfp; 1644 bp->bif_name = name; 1645 bp->bif_ifp = NULL; 1646 bp->bif_dlt = dlt; 1647 1648 bp->bif_next = bpf_iflist; 1649 bpf_iflist = bp; 1650 1651 *bp->bif_driverp = NULL; 1652 1653 /* 1654 * Compute the length of the bpf header. This is not necessarily 1655 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1656 * that the network layer header begins on a longword boundary (for 1657 * performance reasons and to alleviate alignment restrictions). 1658 */ 1659 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1660 1661 return (bp); 1662 } 1663 1664 void 1665 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen) 1666 { 1667 struct bpf_if *bp; 1668 1669 bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen); 1670 bp->bif_ifp = ifp; 1671 } 1672 1673 /* Detach an interface from its attached bpf device. */ 1674 void 1675 bpfdetach(struct ifnet *ifp) 1676 { 1677 struct bpf_if *bp, *nbp; 1678 1679 KERNEL_ASSERT_LOCKED(); 1680 1681 for (bp = bpf_iflist; bp; bp = nbp) { 1682 nbp = bp->bif_next; 1683 if (bp->bif_ifp == ifp) 1684 bpfsdetach(bp); 1685 } 1686 ifp->if_bpf = NULL; 1687 } 1688 1689 void 1690 bpfsdetach(void *p) 1691 { 1692 struct bpf_if *bp = p, *tbp; 1693 struct bpf_d *bd; 1694 int maj; 1695 1696 KERNEL_ASSERT_LOCKED(); 1697 1698 /* Locate the major number. */ 1699 for (maj = 0; maj < nchrdev; maj++) 1700 if (cdevsw[maj].d_open == bpfopen) 1701 break; 1702 1703 while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) 1704 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR); 1705 1706 for (tbp = bpf_iflist; tbp; tbp = tbp->bif_next) { 1707 if (tbp->bif_next == bp) { 1708 tbp->bif_next = bp->bif_next; 1709 break; 1710 } 1711 } 1712 1713 if (bpf_iflist == bp) 1714 bpf_iflist = bp->bif_next; 1715 1716 free(bp, M_DEVBUF, sizeof(*bp)); 1717 } 1718 1719 int 1720 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp, 1721 void *newp, size_t newlen) 1722 { 1723 int newval; 1724 int error; 1725 1726 switch (name[0]) { 1727 case NET_BPF_BUFSIZE: 1728 newval = bpf_bufsize; 1729 error = sysctl_int(oldp, oldlenp, newp, newlen, &newval); 1730 if (error) 1731 return (error); 1732 if (newval < BPF_MINBUFSIZE || newval > bpf_maxbufsize) 1733 return (EINVAL); 1734 bpf_bufsize = newval; 1735 break; 1736 case NET_BPF_MAXBUFSIZE: 1737 newval = bpf_maxbufsize; 1738 error = sysctl_int(oldp, oldlenp, newp, newlen, &newval); 1739 if (error) 1740 return (error); 1741 if (newval < BPF_MINBUFSIZE) 1742 return (EINVAL); 1743 bpf_maxbufsize = newval; 1744 break; 1745 default: 1746 return (EOPNOTSUPP); 1747 } 1748 return (0); 1749 } 1750 1751 int 1752 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 1753 size_t newlen) 1754 { 1755 int flags = RW_INTR; 1756 int error; 1757 1758 if (namelen != 1) 1759 return (ENOTDIR); 1760 1761 flags |= (newp == NULL) ? RW_READ : RW_WRITE; 1762 1763 error = rw_enter(&bpf_sysctl_lk, flags); 1764 if (error != 0) 1765 return (error); 1766 1767 error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen); 1768 1769 rw_exit(&bpf_sysctl_lk); 1770 1771 return (error); 1772 } 1773 1774 struct bpf_d * 1775 bpfilter_lookup(int unit) 1776 { 1777 struct bpf_d *bd; 1778 1779 KERNEL_ASSERT_LOCKED(); 1780 1781 LIST_FOREACH(bd, &bpf_d_list, bd_list) 1782 if (bd->bd_unit == unit) 1783 return (bd); 1784 return (NULL); 1785 } 1786 1787 /* 1788 * Get a list of available data link type of the interface. 1789 */ 1790 int 1791 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1792 { 1793 int n, error; 1794 struct bpf_if *bp; 1795 const char *name; 1796 1797 name = d->bd_bif->bif_name; 1798 n = 0; 1799 error = 0; 1800 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1801 if (strcmp(name, bp->bif_name) != 0) 1802 continue; 1803 if (bfl->bfl_list != NULL) { 1804 if (n >= bfl->bfl_len) 1805 return (ENOMEM); 1806 error = copyout(&bp->bif_dlt, 1807 bfl->bfl_list + n, sizeof(u_int)); 1808 if (error) 1809 break; 1810 } 1811 n++; 1812 } 1813 1814 bfl->bfl_len = n; 1815 return (error); 1816 } 1817 1818 /* 1819 * Set the data link type of a BPF instance. 1820 */ 1821 int 1822 bpf_setdlt(struct bpf_d *d, u_int dlt) 1823 { 1824 const char *name; 1825 struct bpf_if *bp; 1826 1827 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1828 if (d->bd_bif->bif_dlt == dlt) 1829 return (0); 1830 name = d->bd_bif->bif_name; 1831 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1832 if (strcmp(name, bp->bif_name) != 0) 1833 continue; 1834 if (bp->bif_dlt == dlt) 1835 break; 1836 } 1837 if (bp == NULL) 1838 return (EINVAL); 1839 bpf_detachd(d); 1840 bpf_attachd(d, bp); 1841 bpf_resetd(d); 1842 return (0); 1843 } 1844 1845 u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *); 1846 u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *); 1847 u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *); 1848 1849 int bpf_mbuf_copy(const struct mbuf *, u_int32_t, 1850 void *, u_int32_t); 1851 1852 const struct bpf_ops bpf_mbuf_ops = { 1853 bpf_mbuf_ldw, 1854 bpf_mbuf_ldh, 1855 bpf_mbuf_ldb, 1856 }; 1857 1858 int 1859 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len) 1860 { 1861 u_int8_t *cp = buf; 1862 u_int32_t count; 1863 1864 while (off >= m->m_len) { 1865 off -= m->m_len; 1866 1867 m = m->m_next; 1868 if (m == NULL) 1869 return (-1); 1870 } 1871 1872 for (;;) { 1873 count = min(m->m_len - off, len); 1874 1875 memcpy(cp, m->m_data + off, count); 1876 len -= count; 1877 1878 if (len == 0) 1879 return (0); 1880 1881 m = m->m_next; 1882 if (m == NULL) 1883 break; 1884 1885 cp += count; 1886 off = 0; 1887 } 1888 1889 return (-1); 1890 } 1891 1892 u_int32_t 1893 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err) 1894 { 1895 u_int32_t v; 1896 1897 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1898 *err = 1; 1899 return (0); 1900 } 1901 1902 *err = 0; 1903 return ntohl(v); 1904 } 1905 1906 u_int32_t 1907 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err) 1908 { 1909 u_int16_t v; 1910 1911 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1912 *err = 1; 1913 return (0); 1914 } 1915 1916 *err = 0; 1917 return ntohs(v); 1918 } 1919 1920 u_int32_t 1921 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err) 1922 { 1923 const struct mbuf *m = m0; 1924 u_int8_t v; 1925 1926 while (k >= m->m_len) { 1927 k -= m->m_len; 1928 1929 m = m->m_next; 1930 if (m == NULL) { 1931 *err = 1; 1932 return (0); 1933 } 1934 } 1935 v = m->m_data[k]; 1936 1937 *err = 0; 1938 return v; 1939 } 1940 1941 u_int 1942 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen) 1943 { 1944 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen); 1945 } 1946