1 /* $OpenBSD: bpf.c,v 1.216 2022/03/17 14:22:03 visa Exp $ */ 2 /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org> 8 * 9 * This code is derived from the Stanford/CMU enet packet filter, 10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 12 * Berkeley Laboratory. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94 39 */ 40 41 #include "bpfilter.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/ioctl.h> 49 #include <sys/conf.h> 50 #include <sys/vnode.h> 51 #include <sys/fcntl.h> 52 #include <sys/socket.h> 53 #include <sys/poll.h> 54 #include <sys/kernel.h> 55 #include <sys/sysctl.h> 56 #include <sys/rwlock.h> 57 #include <sys/atomic.h> 58 #include <sys/refcnt.h> 59 #include <sys/smr.h> 60 #include <sys/specdev.h> 61 #include <sys/selinfo.h> 62 #include <sys/sigio.h> 63 #include <sys/task.h> 64 #include <sys/time.h> 65 66 #include <net/if.h> 67 #include <net/bpf.h> 68 #include <net/bpfdesc.h> 69 70 #include <netinet/in.h> 71 #include <netinet/if_ether.h> 72 73 #include "vlan.h" 74 #if NVLAN > 0 75 #include <net/if_vlan_var.h> 76 #endif 77 78 #define BPF_BUFSIZE 32768 79 80 #define PRINET 26 /* interruptible */ 81 82 /* 83 * The default read buffer size is patchable. 84 */ 85 int bpf_bufsize = BPF_BUFSIZE; 86 int bpf_maxbufsize = BPF_MAXBUFSIZE; 87 88 /* 89 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 90 * bpf_d_list is the list of descriptors 91 */ 92 struct bpf_if *bpf_iflist; 93 LIST_HEAD(, bpf_d) bpf_d_list; 94 95 int bpf_allocbufs(struct bpf_d *); 96 void bpf_ifname(struct bpf_if*, struct ifreq *); 97 void bpf_mcopy(const void *, void *, size_t); 98 int bpf_movein(struct uio *, struct bpf_d *, struct mbuf **, 99 struct sockaddr *); 100 int bpf_setif(struct bpf_d *, struct ifreq *); 101 int bpfpoll(dev_t, int, struct proc *); 102 int bpfkqfilter(dev_t, struct knote *); 103 void bpf_wakeup(struct bpf_d *); 104 void bpf_wakeup_cb(void *); 105 int _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int); 106 void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, 107 const struct bpf_hdr *); 108 int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 109 int bpf_setdlt(struct bpf_d *, u_int); 110 111 void filt_bpfrdetach(struct knote *); 112 int filt_bpfread(struct knote *, long); 113 int filt_bpfreadmodify(struct kevent *, struct knote *); 114 int filt_bpfreadprocess(struct knote *, struct kevent *); 115 116 int bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t); 117 118 struct bpf_d *bpfilter_lookup(int); 119 120 /* 121 * Called holding ``bd_mtx''. 122 */ 123 void bpf_attachd(struct bpf_d *, struct bpf_if *); 124 void bpf_detachd(struct bpf_d *); 125 void bpf_resetd(struct bpf_d *); 126 127 void bpf_prog_smr(void *); 128 void bpf_d_smr(void *); 129 130 /* 131 * Reference count access to descriptor buffers 132 */ 133 void bpf_get(struct bpf_d *); 134 void bpf_put(struct bpf_d *); 135 136 137 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz"); 138 139 int 140 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp, 141 struct sockaddr *sockp) 142 { 143 struct bpf_program_smr *bps; 144 struct bpf_insn *fcode = NULL; 145 struct mbuf *m; 146 struct m_tag *mtag; 147 int error; 148 u_int hlen, alen, mlen; 149 u_int len; 150 u_int linktype; 151 u_int slen; 152 153 /* 154 * Build a sockaddr based on the data link layer type. 155 * We do this at this level because the ethernet header 156 * is copied directly into the data field of the sockaddr. 157 * In the case of SLIP, there is no header and the packet 158 * is forwarded as is. 159 * Also, we are careful to leave room at the front of the mbuf 160 * for the link level header. 161 */ 162 linktype = d->bd_bif->bif_dlt; 163 switch (linktype) { 164 165 case DLT_SLIP: 166 sockp->sa_family = AF_INET; 167 hlen = 0; 168 break; 169 170 case DLT_PPP: 171 sockp->sa_family = AF_UNSPEC; 172 hlen = 0; 173 break; 174 175 case DLT_EN10MB: 176 sockp->sa_family = AF_UNSPEC; 177 /* XXX Would MAXLINKHDR be better? */ 178 hlen = ETHER_HDR_LEN; 179 break; 180 181 case DLT_IEEE802_11: 182 case DLT_IEEE802_11_RADIO: 183 sockp->sa_family = AF_UNSPEC; 184 hlen = 0; 185 break; 186 187 case DLT_RAW: 188 case DLT_NULL: 189 sockp->sa_family = AF_UNSPEC; 190 hlen = 0; 191 break; 192 193 case DLT_LOOP: 194 sockp->sa_family = AF_UNSPEC; 195 hlen = sizeof(u_int32_t); 196 break; 197 198 default: 199 return (EIO); 200 } 201 202 if (uio->uio_resid > MAXMCLBYTES) 203 return (EMSGSIZE); 204 len = uio->uio_resid; 205 if (len < hlen) 206 return (EINVAL); 207 208 /* 209 * Get the length of the payload so we can align it properly. 210 */ 211 alen = len - hlen; 212 213 /* 214 * Allocate enough space for headers and the aligned payload. 215 */ 216 mlen = max(max_linkhdr, hlen) + roundup(alen, sizeof(long)); 217 if (mlen > MAXMCLBYTES) 218 return (EMSGSIZE); 219 220 MGETHDR(m, M_WAIT, MT_DATA); 221 if (mlen > MHLEN) { 222 MCLGETL(m, M_WAIT, mlen); 223 if ((m->m_flags & M_EXT) == 0) { 224 error = ENOBUFS; 225 goto bad; 226 } 227 } 228 229 m_align(m, alen); /* Align the payload. */ 230 m->m_data -= hlen; 231 232 m->m_pkthdr.ph_ifidx = 0; 233 m->m_pkthdr.len = len; 234 m->m_len = len; 235 236 error = uiomove(mtod(m, caddr_t), len, uio); 237 if (error) 238 goto bad; 239 240 smr_read_enter(); 241 bps = SMR_PTR_GET(&d->bd_wfilter); 242 if (bps != NULL) 243 fcode = bps->bps_bf.bf_insns; 244 slen = bpf_filter(fcode, mtod(m, u_char *), len, len); 245 smr_read_leave(); 246 247 if (slen < len) { 248 error = EPERM; 249 goto bad; 250 } 251 252 /* 253 * Make room for link header, and copy it to sockaddr 254 */ 255 if (hlen != 0) { 256 if (linktype == DLT_LOOP) { 257 u_int32_t af; 258 259 /* the link header indicates the address family */ 260 KASSERT(hlen == sizeof(u_int32_t)); 261 memcpy(&af, m->m_data, hlen); 262 sockp->sa_family = ntohl(af); 263 } else 264 memcpy(sockp->sa_data, m->m_data, hlen); 265 266 m->m_pkthdr.len -= hlen; 267 m->m_len -= hlen; 268 m->m_data += hlen; 269 } 270 271 /* 272 * Prepend the data link type as a mbuf tag 273 */ 274 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT); 275 *(u_int *)(mtag + 1) = linktype; 276 m_tag_prepend(m, mtag); 277 278 *mp = m; 279 return (0); 280 bad: 281 m_freem(m); 282 return (error); 283 } 284 285 /* 286 * Attach file to the bpf interface, i.e. make d listen on bp. 287 */ 288 void 289 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 290 { 291 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 292 293 /* 294 * Point d at bp, and add d to the interface's list of listeners. 295 * Finally, point the driver's bpf cookie at the interface so 296 * it will divert packets to bpf. 297 */ 298 299 d->bd_bif = bp; 300 301 KERNEL_ASSERT_LOCKED(); 302 SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next); 303 304 *bp->bif_driverp = bp; 305 } 306 307 /* 308 * Detach a file from its interface. 309 */ 310 void 311 bpf_detachd(struct bpf_d *d) 312 { 313 struct bpf_if *bp; 314 315 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 316 317 bp = d->bd_bif; 318 /* Not attached. */ 319 if (bp == NULL) 320 return; 321 322 /* Remove ``d'' from the interface's descriptor list. */ 323 KERNEL_ASSERT_LOCKED(); 324 SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next); 325 326 if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) { 327 /* 328 * Let the driver know that there are no more listeners. 329 */ 330 *bp->bif_driverp = NULL; 331 } 332 333 d->bd_bif = NULL; 334 335 /* 336 * Check if this descriptor had requested promiscuous mode. 337 * If so, turn it off. 338 */ 339 if (d->bd_promisc) { 340 int error; 341 342 KASSERT(bp->bif_ifp != NULL); 343 344 d->bd_promisc = 0; 345 346 bpf_get(d); 347 mtx_leave(&d->bd_mtx); 348 NET_LOCK(); 349 error = ifpromisc(bp->bif_ifp, 0); 350 NET_UNLOCK(); 351 mtx_enter(&d->bd_mtx); 352 bpf_put(d); 353 354 if (error && !(error == EINVAL || error == ENODEV || 355 error == ENXIO)) 356 /* 357 * Something is really wrong if we were able to put 358 * the driver into promiscuous mode, but can't 359 * take it out. 360 */ 361 panic("bpf: ifpromisc failed"); 362 } 363 } 364 365 void 366 bpfilterattach(int n) 367 { 368 LIST_INIT(&bpf_d_list); 369 } 370 371 /* 372 * Open ethernet device. Returns ENXIO for illegal minor device number, 373 * EBUSY if file is open by another process. 374 */ 375 int 376 bpfopen(dev_t dev, int flag, int mode, struct proc *p) 377 { 378 struct bpf_d *bd; 379 int unit = minor(dev); 380 381 if (unit & ((1 << CLONE_SHIFT) - 1)) 382 return (ENXIO); 383 384 KASSERT(bpfilter_lookup(unit) == NULL); 385 386 /* create on demand */ 387 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 388 return (EBUSY); 389 390 /* Mark "free" and do most initialization. */ 391 bd->bd_unit = unit; 392 bd->bd_bufsize = bpf_bufsize; 393 bd->bd_sig = SIGIO; 394 mtx_init(&bd->bd_mtx, IPL_NET); 395 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd); 396 smr_init(&bd->bd_smr); 397 sigio_init(&bd->bd_sigio); 398 klist_init_mutex(&bd->bd_sel.si_note, &bd->bd_mtx); 399 400 bd->bd_rtout = 0; /* no timeout by default */ 401 402 refcnt_init(&bd->bd_refcnt); 403 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list); 404 405 return (0); 406 } 407 408 /* 409 * Close the descriptor by detaching it from its interface, 410 * deallocating its buffers, and marking it free. 411 */ 412 int 413 bpfclose(dev_t dev, int flag, int mode, struct proc *p) 414 { 415 struct bpf_d *d; 416 417 d = bpfilter_lookup(minor(dev)); 418 mtx_enter(&d->bd_mtx); 419 bpf_detachd(d); 420 bpf_wakeup(d); 421 LIST_REMOVE(d, bd_list); 422 mtx_leave(&d->bd_mtx); 423 bpf_put(d); 424 425 return (0); 426 } 427 428 /* 429 * Rotate the packet buffers in descriptor d. Move the store buffer 430 * into the hold slot, and the free buffer into the store slot. 431 * Zero the length of the new store buffer. 432 */ 433 #define ROTATE_BUFFERS(d) \ 434 KASSERT(d->bd_in_uiomove == 0); \ 435 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \ 436 (d)->bd_hbuf = (d)->bd_sbuf; \ 437 (d)->bd_hlen = (d)->bd_slen; \ 438 (d)->bd_sbuf = (d)->bd_fbuf; \ 439 (d)->bd_slen = 0; \ 440 (d)->bd_fbuf = NULL; 441 442 /* 443 * bpfread - read next chunk of packets from buffers 444 */ 445 int 446 bpfread(dev_t dev, struct uio *uio, int ioflag) 447 { 448 uint64_t end, now; 449 struct bpf_d *d; 450 caddr_t hbuf; 451 int error, hlen; 452 453 KERNEL_ASSERT_LOCKED(); 454 455 d = bpfilter_lookup(minor(dev)); 456 if (d->bd_bif == NULL) 457 return (ENXIO); 458 459 bpf_get(d); 460 mtx_enter(&d->bd_mtx); 461 462 /* 463 * Restrict application to use a buffer the same size as 464 * as kernel buffers. 465 */ 466 if (uio->uio_resid != d->bd_bufsize) { 467 error = EINVAL; 468 goto out; 469 } 470 471 /* 472 * If there's a timeout, mark when the read should end. 473 */ 474 if (d->bd_rtout != 0) { 475 now = nsecuptime(); 476 end = now + d->bd_rtout; 477 if (end < now) 478 end = UINT64_MAX; 479 } 480 481 /* 482 * If the hold buffer is empty, then do a timed sleep, which 483 * ends when the timeout expires or when enough packets 484 * have arrived to fill the store buffer. 485 */ 486 while (d->bd_hbuf == NULL) { 487 if (d->bd_bif == NULL) { 488 /* interface is gone */ 489 if (d->bd_slen == 0) { 490 error = EIO; 491 goto out; 492 } 493 ROTATE_BUFFERS(d); 494 break; 495 } 496 if (d->bd_immediate && d->bd_slen != 0) { 497 /* 498 * A packet(s) either arrived since the previous 499 * read or arrived while we were asleep. 500 * Rotate the buffers and return what's here. 501 */ 502 ROTATE_BUFFERS(d); 503 break; 504 } 505 if (ISSET(ioflag, IO_NDELAY)) { 506 /* User requested non-blocking I/O */ 507 error = EWOULDBLOCK; 508 } else if (d->bd_rtout == 0) { 509 /* No read timeout set. */ 510 d->bd_nreaders++; 511 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 512 "bpf", INFSLP); 513 d->bd_nreaders--; 514 } else if ((now = nsecuptime()) < end) { 515 /* Read timeout has not expired yet. */ 516 d->bd_nreaders++; 517 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 518 "bpf", end - now); 519 d->bd_nreaders--; 520 } else { 521 /* Read timeout has expired. */ 522 error = EWOULDBLOCK; 523 } 524 if (error == EINTR || error == ERESTART) 525 goto out; 526 if (error == EWOULDBLOCK) { 527 /* 528 * On a timeout, return what's in the buffer, 529 * which may be nothing. If there is something 530 * in the store buffer, we can rotate the buffers. 531 */ 532 if (d->bd_hbuf != NULL) 533 /* 534 * We filled up the buffer in between 535 * getting the timeout and arriving 536 * here, so we don't need to rotate. 537 */ 538 break; 539 540 if (d->bd_slen == 0) { 541 error = 0; 542 goto out; 543 } 544 ROTATE_BUFFERS(d); 545 break; 546 } 547 } 548 /* 549 * At this point, we know we have something in the hold slot. 550 */ 551 hbuf = d->bd_hbuf; 552 hlen = d->bd_hlen; 553 d->bd_hbuf = NULL; 554 d->bd_hlen = 0; 555 d->bd_fbuf = NULL; 556 d->bd_in_uiomove = 1; 557 558 /* 559 * Move data from hold buffer into user space. 560 * We know the entire buffer is transferred since 561 * we checked above that the read buffer is bpf_bufsize bytes. 562 */ 563 mtx_leave(&d->bd_mtx); 564 error = uiomove(hbuf, hlen, uio); 565 mtx_enter(&d->bd_mtx); 566 567 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */ 568 KASSERT(d->bd_fbuf == NULL); 569 KASSERT(d->bd_hbuf == NULL); 570 d->bd_fbuf = hbuf; 571 d->bd_in_uiomove = 0; 572 out: 573 mtx_leave(&d->bd_mtx); 574 bpf_put(d); 575 576 return (error); 577 } 578 579 /* 580 * If there are processes sleeping on this descriptor, wake them up. 581 */ 582 void 583 bpf_wakeup(struct bpf_d *d) 584 { 585 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 586 587 if (d->bd_nreaders) 588 wakeup(d); 589 590 KNOTE(&d->bd_sel.si_note, 0); 591 592 /* 593 * As long as pgsigio() and selwakeup() need to be protected 594 * by the KERNEL_LOCK() we have to delay the wakeup to 595 * another context to keep the hot path KERNEL_LOCK()-free. 596 */ 597 if ((d->bd_async && d->bd_sig) || d->bd_sel.si_seltid != 0) { 598 bpf_get(d); 599 if (!task_add(systq, &d->bd_wake_task)) 600 bpf_put(d); 601 } 602 } 603 604 void 605 bpf_wakeup_cb(void *xd) 606 { 607 struct bpf_d *d = xd; 608 609 if (d->bd_async && d->bd_sig) 610 pgsigio(&d->bd_sigio, d->bd_sig, 0); 611 612 mtx_enter(&d->bd_mtx); 613 selwakeup(&d->bd_sel); 614 mtx_leave(&d->bd_mtx); 615 bpf_put(d); 616 } 617 618 int 619 bpfwrite(dev_t dev, struct uio *uio, int ioflag) 620 { 621 struct bpf_d *d; 622 struct ifnet *ifp; 623 struct mbuf *m; 624 int error; 625 struct sockaddr_storage dst; 626 627 KERNEL_ASSERT_LOCKED(); 628 629 d = bpfilter_lookup(minor(dev)); 630 if (d->bd_bif == NULL) 631 return (ENXIO); 632 633 bpf_get(d); 634 ifp = d->bd_bif->bif_ifp; 635 636 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) { 637 error = ENETDOWN; 638 goto out; 639 } 640 641 if (uio->uio_resid == 0) { 642 error = 0; 643 goto out; 644 } 645 646 error = bpf_movein(uio, d, &m, sstosa(&dst)); 647 if (error) 648 goto out; 649 650 if (m->m_pkthdr.len > ifp->if_mtu) { 651 m_freem(m); 652 error = EMSGSIZE; 653 goto out; 654 } 655 656 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 657 m->m_pkthdr.pf.prio = ifp->if_llprio; 658 659 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC) 660 dst.ss_family = pseudo_AF_HDRCMPLT; 661 662 NET_LOCK(); 663 error = ifp->if_output(ifp, m, sstosa(&dst), NULL); 664 NET_UNLOCK(); 665 666 out: 667 bpf_put(d); 668 return (error); 669 } 670 671 /* 672 * Reset a descriptor by flushing its packet buffer and clearing the 673 * receive and drop counts. 674 */ 675 void 676 bpf_resetd(struct bpf_d *d) 677 { 678 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 679 KASSERT(d->bd_in_uiomove == 0); 680 681 if (d->bd_hbuf != NULL) { 682 /* Free the hold buffer. */ 683 d->bd_fbuf = d->bd_hbuf; 684 d->bd_hbuf = NULL; 685 } 686 d->bd_slen = 0; 687 d->bd_hlen = 0; 688 d->bd_rcount = 0; 689 d->bd_dcount = 0; 690 } 691 692 /* 693 * FIONREAD Check for read packet available. 694 * BIOCGBLEN Get buffer len [for read()]. 695 * BIOCSETF Set ethernet read filter. 696 * BIOCFLUSH Flush read packet buffer. 697 * BIOCPROMISC Put interface into promiscuous mode. 698 * BIOCGDLTLIST Get supported link layer types. 699 * BIOCGDLT Get link layer type. 700 * BIOCSDLT Set link layer type. 701 * BIOCGETIF Get interface name. 702 * BIOCSETIF Set interface. 703 * BIOCSRTIMEOUT Set read timeout. 704 * BIOCGRTIMEOUT Get read timeout. 705 * BIOCGSTATS Get packet stats. 706 * BIOCIMMEDIATE Set immediate mode. 707 * BIOCVERSION Get filter language version. 708 * BIOCGHDRCMPLT Get "header already complete" flag 709 * BIOCSHDRCMPLT Set "header already complete" flag 710 */ 711 int 712 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 713 { 714 struct bpf_d *d; 715 int error = 0; 716 717 d = bpfilter_lookup(minor(dev)); 718 if (d->bd_locked && suser(p) != 0) { 719 /* list of allowed ioctls when locked and not root */ 720 switch (cmd) { 721 case BIOCGBLEN: 722 case BIOCFLUSH: 723 case BIOCGDLT: 724 case BIOCGDLTLIST: 725 case BIOCGETIF: 726 case BIOCGRTIMEOUT: 727 case BIOCGSTATS: 728 case BIOCVERSION: 729 case BIOCGRSIG: 730 case BIOCGHDRCMPLT: 731 case FIONREAD: 732 case BIOCLOCK: 733 case BIOCSRTIMEOUT: 734 case BIOCIMMEDIATE: 735 case TIOCGPGRP: 736 case BIOCGDIRFILT: 737 break; 738 default: 739 return (EPERM); 740 } 741 } 742 743 bpf_get(d); 744 745 switch (cmd) { 746 default: 747 error = EINVAL; 748 break; 749 750 /* 751 * Check for read packet available. 752 */ 753 case FIONREAD: 754 { 755 int n; 756 757 mtx_enter(&d->bd_mtx); 758 n = d->bd_slen; 759 if (d->bd_hbuf != NULL) 760 n += d->bd_hlen; 761 mtx_leave(&d->bd_mtx); 762 763 *(int *)addr = n; 764 break; 765 } 766 767 /* 768 * Get buffer len [for read()]. 769 */ 770 case BIOCGBLEN: 771 *(u_int *)addr = d->bd_bufsize; 772 break; 773 774 /* 775 * Set buffer length. 776 */ 777 case BIOCSBLEN: 778 if (d->bd_bif != NULL) 779 error = EINVAL; 780 else { 781 u_int size = *(u_int *)addr; 782 783 if (size > bpf_maxbufsize) 784 *(u_int *)addr = size = bpf_maxbufsize; 785 else if (size < BPF_MINBUFSIZE) 786 *(u_int *)addr = size = BPF_MINBUFSIZE; 787 mtx_enter(&d->bd_mtx); 788 d->bd_bufsize = size; 789 mtx_leave(&d->bd_mtx); 790 } 791 break; 792 793 /* 794 * Set link layer read filter. 795 */ 796 case BIOCSETF: 797 error = bpf_setf(d, (struct bpf_program *)addr, 0); 798 break; 799 800 /* 801 * Set link layer write filter. 802 */ 803 case BIOCSETWF: 804 error = bpf_setf(d, (struct bpf_program *)addr, 1); 805 break; 806 807 /* 808 * Flush read packet buffer. 809 */ 810 case BIOCFLUSH: 811 mtx_enter(&d->bd_mtx); 812 bpf_resetd(d); 813 mtx_leave(&d->bd_mtx); 814 break; 815 816 /* 817 * Put interface into promiscuous mode. 818 */ 819 case BIOCPROMISC: 820 if (d->bd_bif == NULL) { 821 /* 822 * No interface attached yet. 823 */ 824 error = EINVAL; 825 } else if (d->bd_bif->bif_ifp != NULL) { 826 if (d->bd_promisc == 0) { 827 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx); 828 NET_LOCK(); 829 error = ifpromisc(d->bd_bif->bif_ifp, 1); 830 NET_UNLOCK(); 831 if (error == 0) 832 d->bd_promisc = 1; 833 } 834 } 835 break; 836 837 /* 838 * Get a list of supported device parameters. 839 */ 840 case BIOCGDLTLIST: 841 if (d->bd_bif == NULL) 842 error = EINVAL; 843 else 844 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 845 break; 846 847 /* 848 * Get device parameters. 849 */ 850 case BIOCGDLT: 851 if (d->bd_bif == NULL) 852 error = EINVAL; 853 else 854 *(u_int *)addr = d->bd_bif->bif_dlt; 855 break; 856 857 /* 858 * Set device parameters. 859 */ 860 case BIOCSDLT: 861 if (d->bd_bif == NULL) 862 error = EINVAL; 863 else { 864 mtx_enter(&d->bd_mtx); 865 error = bpf_setdlt(d, *(u_int *)addr); 866 mtx_leave(&d->bd_mtx); 867 } 868 break; 869 870 /* 871 * Set interface name. 872 */ 873 case BIOCGETIF: 874 if (d->bd_bif == NULL) 875 error = EINVAL; 876 else 877 bpf_ifname(d->bd_bif, (struct ifreq *)addr); 878 break; 879 880 /* 881 * Set interface. 882 */ 883 case BIOCSETIF: 884 error = bpf_setif(d, (struct ifreq *)addr); 885 break; 886 887 /* 888 * Set read timeout. 889 */ 890 case BIOCSRTIMEOUT: 891 { 892 struct timeval *tv = (struct timeval *)addr; 893 uint64_t rtout; 894 895 if (tv->tv_sec < 0 || !timerisvalid(tv)) { 896 error = EINVAL; 897 break; 898 } 899 rtout = TIMEVAL_TO_NSEC(tv); 900 if (rtout > MAXTSLP) { 901 error = EOVERFLOW; 902 break; 903 } 904 mtx_enter(&d->bd_mtx); 905 d->bd_rtout = rtout; 906 mtx_leave(&d->bd_mtx); 907 break; 908 } 909 910 /* 911 * Get read timeout. 912 */ 913 case BIOCGRTIMEOUT: 914 { 915 struct timeval *tv = (struct timeval *)addr; 916 917 memset(tv, 0, sizeof(*tv)); 918 mtx_enter(&d->bd_mtx); 919 NSEC_TO_TIMEVAL(d->bd_rtout, tv); 920 mtx_leave(&d->bd_mtx); 921 break; 922 } 923 924 /* 925 * Get packet stats. 926 */ 927 case BIOCGSTATS: 928 { 929 struct bpf_stat *bs = (struct bpf_stat *)addr; 930 931 bs->bs_recv = d->bd_rcount; 932 bs->bs_drop = d->bd_dcount; 933 break; 934 } 935 936 /* 937 * Set immediate mode. 938 */ 939 case BIOCIMMEDIATE: 940 d->bd_immediate = *(u_int *)addr; 941 break; 942 943 case BIOCVERSION: 944 { 945 struct bpf_version *bv = (struct bpf_version *)addr; 946 947 bv->bv_major = BPF_MAJOR_VERSION; 948 bv->bv_minor = BPF_MINOR_VERSION; 949 break; 950 } 951 952 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 953 *(u_int *)addr = d->bd_hdrcmplt; 954 break; 955 956 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 957 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 958 break; 959 960 case BIOCLOCK: /* set "locked" flag (no reset) */ 961 d->bd_locked = 1; 962 break; 963 964 case BIOCGFILDROP: /* get "filter-drop" flag */ 965 *(u_int *)addr = d->bd_fildrop; 966 break; 967 968 case BIOCSFILDROP: { /* set "filter-drop" flag */ 969 unsigned int fildrop = *(u_int *)addr; 970 switch (fildrop) { 971 case BPF_FILDROP_PASS: 972 case BPF_FILDROP_CAPTURE: 973 case BPF_FILDROP_DROP: 974 d->bd_fildrop = fildrop; 975 break; 976 default: 977 error = EINVAL; 978 break; 979 } 980 break; 981 } 982 983 case BIOCGDIRFILT: /* get direction filter */ 984 *(u_int *)addr = d->bd_dirfilt; 985 break; 986 987 case BIOCSDIRFILT: /* set direction filter */ 988 d->bd_dirfilt = (*(u_int *)addr) & 989 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT); 990 break; 991 992 case FIONBIO: /* Non-blocking I/O */ 993 /* let vfs to keep track of this */ 994 break; 995 996 case FIOASYNC: /* Send signal on receive packets */ 997 d->bd_async = *(int *)addr; 998 break; 999 1000 case FIOSETOWN: /* Process or group to send signals to */ 1001 case TIOCSPGRP: 1002 error = sigio_setown(&d->bd_sigio, cmd, addr); 1003 break; 1004 1005 case FIOGETOWN: 1006 case TIOCGPGRP: 1007 sigio_getown(&d->bd_sigio, cmd, addr); 1008 break; 1009 1010 case BIOCSRSIG: /* Set receive signal */ 1011 { 1012 u_int sig; 1013 1014 sig = *(u_int *)addr; 1015 1016 if (sig >= NSIG) 1017 error = EINVAL; 1018 else 1019 d->bd_sig = sig; 1020 break; 1021 } 1022 case BIOCGRSIG: 1023 *(u_int *)addr = d->bd_sig; 1024 break; 1025 } 1026 1027 bpf_put(d); 1028 return (error); 1029 } 1030 1031 /* 1032 * Set d's packet filter program to fp. If this file already has a filter, 1033 * free it and replace it. Returns EINVAL for bogus requests. 1034 */ 1035 int 1036 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf) 1037 { 1038 struct bpf_program_smr *bps, *old_bps; 1039 struct bpf_insn *fcode; 1040 u_int flen, size; 1041 1042 KERNEL_ASSERT_LOCKED(); 1043 1044 if (fp->bf_insns == 0) { 1045 if (fp->bf_len != 0) 1046 return (EINVAL); 1047 bps = NULL; 1048 } else { 1049 flen = fp->bf_len; 1050 if (flen > BPF_MAXINSNS) 1051 return (EINVAL); 1052 1053 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF, 1054 M_WAITOK | M_CANFAIL); 1055 if (fcode == NULL) 1056 return (ENOMEM); 1057 1058 size = flen * sizeof(*fp->bf_insns); 1059 if (copyin(fp->bf_insns, fcode, size) != 0 || 1060 bpf_validate(fcode, (int)flen) == 0) { 1061 free(fcode, M_DEVBUF, size); 1062 return (EINVAL); 1063 } 1064 1065 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK); 1066 smr_init(&bps->bps_smr); 1067 bps->bps_bf.bf_len = flen; 1068 bps->bps_bf.bf_insns = fcode; 1069 } 1070 1071 if (wf == 0) { 1072 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter); 1073 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps); 1074 } else { 1075 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter); 1076 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps); 1077 } 1078 1079 mtx_enter(&d->bd_mtx); 1080 bpf_resetd(d); 1081 mtx_leave(&d->bd_mtx); 1082 if (old_bps != NULL) 1083 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps); 1084 1085 return (0); 1086 } 1087 1088 /* 1089 * Detach a file from its current interface (if attached at all) and attach 1090 * to the interface indicated by the name stored in ifr. 1091 * Return an errno or 0. 1092 */ 1093 int 1094 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1095 { 1096 struct bpf_if *bp, *candidate = NULL; 1097 int error = 0; 1098 1099 /* 1100 * Look through attached interfaces for the named one. 1101 */ 1102 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1103 if (strcmp(bp->bif_name, ifr->ifr_name) != 0) 1104 continue; 1105 1106 if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt) 1107 candidate = bp; 1108 } 1109 1110 /* Not found. */ 1111 if (candidate == NULL) 1112 return (ENXIO); 1113 1114 /* 1115 * Allocate the packet buffers if we need to. 1116 * If we're already attached to requested interface, 1117 * just flush the buffer. 1118 */ 1119 mtx_enter(&d->bd_mtx); 1120 if (d->bd_sbuf == NULL) { 1121 if ((error = bpf_allocbufs(d))) 1122 goto out; 1123 } 1124 if (candidate != d->bd_bif) { 1125 /* 1126 * Detach if attached to something else. 1127 */ 1128 bpf_detachd(d); 1129 bpf_attachd(d, candidate); 1130 } 1131 bpf_resetd(d); 1132 out: 1133 mtx_leave(&d->bd_mtx); 1134 return (error); 1135 } 1136 1137 /* 1138 * Copy the interface name to the ifreq. 1139 */ 1140 void 1141 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr) 1142 { 1143 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name)); 1144 } 1145 1146 /* 1147 * Support for poll() system call 1148 */ 1149 int 1150 bpfpoll(dev_t dev, int events, struct proc *p) 1151 { 1152 struct bpf_d *d; 1153 int revents; 1154 1155 KERNEL_ASSERT_LOCKED(); 1156 1157 /* 1158 * An imitation of the FIONREAD ioctl code. 1159 */ 1160 d = bpfilter_lookup(minor(dev)); 1161 1162 /* 1163 * XXX The USB stack manages it to trigger some race condition 1164 * which causes bpfilter_lookup to return NULL when a USB device 1165 * gets detached while it is up and has an open bpf handler (e.g. 1166 * dhclient). We still should recheck if we can fix the root 1167 * cause of this issue. 1168 */ 1169 if (d == NULL) 1170 return (POLLERR); 1171 1172 /* Always ready to write data */ 1173 revents = events & (POLLOUT | POLLWRNORM); 1174 1175 if (events & (POLLIN | POLLRDNORM)) { 1176 mtx_enter(&d->bd_mtx); 1177 if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0)) 1178 revents |= events & (POLLIN | POLLRDNORM); 1179 else 1180 selrecord(p, &d->bd_sel); 1181 mtx_leave(&d->bd_mtx); 1182 } 1183 return (revents); 1184 } 1185 1186 const struct filterops bpfread_filtops = { 1187 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 1188 .f_attach = NULL, 1189 .f_detach = filt_bpfrdetach, 1190 .f_event = filt_bpfread, 1191 .f_modify = filt_bpfreadmodify, 1192 .f_process = filt_bpfreadprocess, 1193 }; 1194 1195 int 1196 bpfkqfilter(dev_t dev, struct knote *kn) 1197 { 1198 struct bpf_d *d; 1199 struct klist *klist; 1200 1201 KERNEL_ASSERT_LOCKED(); 1202 1203 d = bpfilter_lookup(minor(dev)); 1204 if (d == NULL) 1205 return (ENXIO); 1206 1207 switch (kn->kn_filter) { 1208 case EVFILT_READ: 1209 klist = &d->bd_sel.si_note; 1210 kn->kn_fop = &bpfread_filtops; 1211 break; 1212 default: 1213 return (EINVAL); 1214 } 1215 1216 bpf_get(d); 1217 kn->kn_hook = d; 1218 klist_insert(klist, kn); 1219 1220 return (0); 1221 } 1222 1223 void 1224 filt_bpfrdetach(struct knote *kn) 1225 { 1226 struct bpf_d *d = kn->kn_hook; 1227 1228 klist_remove(&d->bd_sel.si_note, kn); 1229 bpf_put(d); 1230 } 1231 1232 int 1233 filt_bpfread(struct knote *kn, long hint) 1234 { 1235 struct bpf_d *d = kn->kn_hook; 1236 1237 if (hint == NOTE_SUBMIT) /* ignore activation from selwakeup */ 1238 return (0); 1239 1240 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1241 1242 kn->kn_data = d->bd_hlen; 1243 if (d->bd_immediate) 1244 kn->kn_data += d->bd_slen; 1245 1246 return (kn->kn_data > 0); 1247 } 1248 1249 int 1250 filt_bpfreadmodify(struct kevent *kev, struct knote *kn) 1251 { 1252 struct bpf_d *d = kn->kn_hook; 1253 int active; 1254 1255 mtx_enter(&d->bd_mtx); 1256 active = knote_modify_fn(kev, kn, filt_bpfread); 1257 mtx_leave(&d->bd_mtx); 1258 1259 return (active); 1260 } 1261 1262 int 1263 filt_bpfreadprocess(struct knote *kn, struct kevent *kev) 1264 { 1265 struct bpf_d *d = kn->kn_hook; 1266 int active; 1267 1268 mtx_enter(&d->bd_mtx); 1269 active = knote_process_fn(kn, kev, filt_bpfread); 1270 mtx_leave(&d->bd_mtx); 1271 1272 return (active); 1273 } 1274 1275 /* 1276 * Copy data from an mbuf chain into a buffer. This code is derived 1277 * from m_copydata in sys/uipc_mbuf.c. 1278 */ 1279 void 1280 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) 1281 { 1282 const struct mbuf *m; 1283 u_int count; 1284 u_char *dst; 1285 1286 m = src_arg; 1287 dst = dst_arg; 1288 while (len > 0) { 1289 if (m == NULL) 1290 panic("bpf_mcopy"); 1291 count = min(m->m_len, len); 1292 bcopy(mtod(m, caddr_t), (caddr_t)dst, count); 1293 m = m->m_next; 1294 dst += count; 1295 len -= count; 1296 } 1297 } 1298 1299 int 1300 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction) 1301 { 1302 return _bpf_mtap(arg, m, m, direction); 1303 } 1304 1305 int 1306 _bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m, 1307 u_int direction) 1308 { 1309 struct bpf_if *bp = (struct bpf_if *)arg; 1310 struct bpf_d *d; 1311 size_t pktlen, slen; 1312 const struct mbuf *m0; 1313 struct bpf_hdr tbh; 1314 int gothdr = 0; 1315 int drop = 0; 1316 1317 if (m == NULL) 1318 return (0); 1319 1320 if (bp == NULL) 1321 return (0); 1322 1323 pktlen = 0; 1324 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1325 pktlen += m0->m_len; 1326 1327 smr_read_enter(); 1328 SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1329 struct bpf_program_smr *bps; 1330 struct bpf_insn *fcode = NULL; 1331 1332 atomic_inc_long(&d->bd_rcount); 1333 1334 if (ISSET(d->bd_dirfilt, direction)) 1335 continue; 1336 1337 bps = SMR_PTR_GET(&d->bd_rfilter); 1338 if (bps != NULL) 1339 fcode = bps->bps_bf.bf_insns; 1340 slen = bpf_mfilter(fcode, m, pktlen); 1341 1342 if (slen == 0) 1343 continue; 1344 if (d->bd_fildrop != BPF_FILDROP_PASS) 1345 drop = 1; 1346 if (d->bd_fildrop != BPF_FILDROP_DROP) { 1347 if (!gothdr) { 1348 struct timeval tv; 1349 memset(&tbh, 0, sizeof(tbh)); 1350 1351 if (ISSET(mp->m_flags, M_PKTHDR)) { 1352 tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx; 1353 tbh.bh_flowid = mp->m_pkthdr.ph_flowid; 1354 tbh.bh_flags = mp->m_pkthdr.pf.prio; 1355 if (ISSET(mp->m_pkthdr.csum_flags, 1356 M_FLOWID)) 1357 SET(tbh.bh_flags, BPF_F_FLOWID); 1358 1359 m_microtime(mp, &tv); 1360 } else 1361 microtime(&tv); 1362 1363 tbh.bh_tstamp.tv_sec = tv.tv_sec; 1364 tbh.bh_tstamp.tv_usec = tv.tv_usec; 1365 SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT); 1366 1367 gothdr = 1; 1368 } 1369 1370 mtx_enter(&d->bd_mtx); 1371 bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh); 1372 mtx_leave(&d->bd_mtx); 1373 } 1374 } 1375 smr_read_leave(); 1376 1377 return (drop); 1378 } 1379 1380 /* 1381 * Incoming linkage from device drivers, where a data buffer should be 1382 * prepended by an arbitrary header. In this situation we already have a 1383 * way of representing a chain of memory buffers, ie, mbufs, so reuse 1384 * the existing functionality by attaching the buffers to mbufs. 1385 * 1386 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a 1387 * struct m_hdr each for the header and data on the stack. 1388 */ 1389 int 1390 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen, 1391 const void *buf, unsigned int buflen, u_int direction) 1392 { 1393 struct m_hdr mh, md; 1394 struct mbuf *m0 = NULL; 1395 struct mbuf **mp = &m0; 1396 1397 if (hdr != NULL) { 1398 mh.mh_flags = 0; 1399 mh.mh_next = NULL; 1400 mh.mh_len = hdrlen; 1401 mh.mh_data = (void *)hdr; 1402 1403 *mp = (struct mbuf *)&mh; 1404 mp = &mh.mh_next; 1405 } 1406 1407 if (buf != NULL) { 1408 md.mh_flags = 0; 1409 md.mh_next = NULL; 1410 md.mh_len = buflen; 1411 md.mh_data = (void *)buf; 1412 1413 *mp = (struct mbuf *)&md; 1414 } 1415 1416 return bpf_mtap(arg, m0, direction); 1417 } 1418 1419 /* 1420 * Incoming linkage from device drivers, where we have a mbuf chain 1421 * but need to prepend some arbitrary header from a linear buffer. 1422 * 1423 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1424 * struct m_hdr on the stack. This is safe as bpf only reads from the 1425 * fields in this header that we initialize, and will not try to free 1426 * it or keep a pointer to it. 1427 */ 1428 int 1429 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m, 1430 u_int direction) 1431 { 1432 struct m_hdr mh; 1433 const struct mbuf *m0; 1434 1435 if (dlen > 0) { 1436 mh.mh_flags = 0; 1437 mh.mh_next = (struct mbuf *)m; 1438 mh.mh_len = dlen; 1439 mh.mh_data = (void *)data; 1440 m0 = (struct mbuf *)&mh; 1441 } else 1442 m0 = m; 1443 1444 return _bpf_mtap(arg, m, m0, direction); 1445 } 1446 1447 /* 1448 * Incoming linkage from device drivers, where we have a mbuf chain 1449 * but need to prepend the address family. 1450 * 1451 * Con up a minimal dummy header to pacify bpf. We allocate (only) a 1452 * struct m_hdr on the stack. This is safe as bpf only reads from the 1453 * fields in this header that we initialize, and will not try to free 1454 * it or keep a pointer to it. 1455 */ 1456 int 1457 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction) 1458 { 1459 u_int32_t afh; 1460 1461 afh = htonl(af); 1462 1463 return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction); 1464 } 1465 1466 /* 1467 * Incoming linkage from device drivers, where we have a mbuf chain 1468 * but need to prepend a VLAN encapsulation header. 1469 * 1470 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1471 * struct m_hdr on the stack. This is safe as bpf only reads from the 1472 * fields in this header that we initialize, and will not try to free 1473 * it or keep a pointer to it. 1474 */ 1475 int 1476 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction) 1477 { 1478 #if NVLAN > 0 1479 struct ether_vlan_header evh; 1480 struct m_hdr mh, md; 1481 1482 if ((m->m_flags & M_VLANTAG) == 0) 1483 #endif 1484 { 1485 return _bpf_mtap(arg, m, m, direction); 1486 } 1487 1488 #if NVLAN > 0 1489 KASSERT(m->m_len >= ETHER_HDR_LEN); 1490 1491 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN); 1492 evh.evl_proto = evh.evl_encap_proto; 1493 evh.evl_encap_proto = htons(ETHERTYPE_VLAN); 1494 evh.evl_tag = htons(m->m_pkthdr.ether_vtag); 1495 1496 mh.mh_flags = 0; 1497 mh.mh_data = (caddr_t)&evh; 1498 mh.mh_len = sizeof(evh); 1499 mh.mh_next = (struct mbuf *)&md; 1500 1501 md.mh_flags = 0; 1502 md.mh_data = m->m_data + ETHER_HDR_LEN; 1503 md.mh_len = m->m_len - ETHER_HDR_LEN; 1504 md.mh_next = m->m_next; 1505 1506 return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction); 1507 #endif 1508 } 1509 1510 /* 1511 * Move the packet data from interface memory (pkt) into the 1512 * store buffer. Wake up listeners if needed. 1513 * "copy" is the routine called to do the actual data 1514 * transfer. bcopy is passed in to copy contiguous chunks, while 1515 * bpf_mcopy is passed in to copy mbuf chains. In the latter case, 1516 * pkt is really an mbuf. 1517 */ 1518 void 1519 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, 1520 const struct bpf_hdr *tbh) 1521 { 1522 struct bpf_hdr *bh; 1523 int totlen, curlen; 1524 int hdrlen, do_wakeup = 0; 1525 1526 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1527 if (d->bd_bif == NULL) 1528 return; 1529 1530 hdrlen = d->bd_bif->bif_hdrlen; 1531 1532 /* 1533 * Figure out how many bytes to move. If the packet is 1534 * greater or equal to the snapshot length, transfer that 1535 * much. Otherwise, transfer the whole packet (unless 1536 * we hit the buffer size limit). 1537 */ 1538 totlen = hdrlen + min(snaplen, pktlen); 1539 if (totlen > d->bd_bufsize) 1540 totlen = d->bd_bufsize; 1541 1542 /* 1543 * Round up the end of the previous packet to the next longword. 1544 */ 1545 curlen = BPF_WORDALIGN(d->bd_slen); 1546 if (curlen + totlen > d->bd_bufsize) { 1547 /* 1548 * This packet will overflow the storage buffer. 1549 * Rotate the buffers if we can, then wakeup any 1550 * pending reads. 1551 */ 1552 if (d->bd_fbuf == NULL) { 1553 /* 1554 * We haven't completed the previous read yet, 1555 * so drop the packet. 1556 */ 1557 ++d->bd_dcount; 1558 return; 1559 } 1560 ROTATE_BUFFERS(d); 1561 do_wakeup = 1; 1562 curlen = 0; 1563 } 1564 1565 /* 1566 * Append the bpf header. 1567 */ 1568 bh = (struct bpf_hdr *)(d->bd_sbuf + curlen); 1569 *bh = *tbh; 1570 bh->bh_datalen = pktlen; 1571 bh->bh_hdrlen = hdrlen; 1572 bh->bh_caplen = totlen - hdrlen; 1573 1574 /* 1575 * Copy the packet data into the store buffer and update its length. 1576 */ 1577 bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen); 1578 d->bd_slen = curlen + totlen; 1579 1580 if (d->bd_immediate) { 1581 /* 1582 * Immediate mode is set. A packet arrived so any 1583 * reads should be woken up. 1584 */ 1585 do_wakeup = 1; 1586 } 1587 1588 if (do_wakeup) 1589 bpf_wakeup(d); 1590 } 1591 1592 /* 1593 * Initialize all nonzero fields of a descriptor. 1594 */ 1595 int 1596 bpf_allocbufs(struct bpf_d *d) 1597 { 1598 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1599 1600 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1601 if (d->bd_fbuf == NULL) 1602 return (ENOMEM); 1603 1604 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1605 if (d->bd_sbuf == NULL) { 1606 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); 1607 d->bd_fbuf = NULL; 1608 return (ENOMEM); 1609 } 1610 1611 d->bd_slen = 0; 1612 d->bd_hlen = 0; 1613 1614 return (0); 1615 } 1616 1617 void 1618 bpf_prog_smr(void *bps_arg) 1619 { 1620 struct bpf_program_smr *bps = bps_arg; 1621 1622 free(bps->bps_bf.bf_insns, M_DEVBUF, 1623 bps->bps_bf.bf_len * sizeof(struct bpf_insn)); 1624 free(bps, M_DEVBUF, sizeof(struct bpf_program_smr)); 1625 } 1626 1627 void 1628 bpf_d_smr(void *smr) 1629 { 1630 struct bpf_d *bd = smr; 1631 1632 sigio_free(&bd->bd_sigio); 1633 free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize); 1634 free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize); 1635 free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize); 1636 1637 if (bd->bd_rfilter != NULL) 1638 bpf_prog_smr(bd->bd_rfilter); 1639 if (bd->bd_wfilter != NULL) 1640 bpf_prog_smr(bd->bd_wfilter); 1641 1642 klist_free(&bd->bd_sel.si_note); 1643 free(bd, M_DEVBUF, sizeof(*bd)); 1644 } 1645 1646 void 1647 bpf_get(struct bpf_d *bd) 1648 { 1649 refcnt_take(&bd->bd_refcnt); 1650 } 1651 1652 /* 1653 * Free buffers currently in use by a descriptor 1654 * when the reference count drops to zero. 1655 */ 1656 void 1657 bpf_put(struct bpf_d *bd) 1658 { 1659 if (refcnt_rele(&bd->bd_refcnt) == 0) 1660 return; 1661 1662 smr_call(&bd->bd_smr, bpf_d_smr, bd); 1663 } 1664 1665 void * 1666 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen) 1667 { 1668 struct bpf_if *bp; 1669 1670 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL) 1671 panic("bpfattach"); 1672 SMR_SLIST_INIT(&bp->bif_dlist); 1673 bp->bif_driverp = (struct bpf_if **)bpfp; 1674 bp->bif_name = name; 1675 bp->bif_ifp = NULL; 1676 bp->bif_dlt = dlt; 1677 1678 bp->bif_next = bpf_iflist; 1679 bpf_iflist = bp; 1680 1681 *bp->bif_driverp = NULL; 1682 1683 /* 1684 * Compute the length of the bpf header. This is not necessarily 1685 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1686 * that the network layer header begins on a longword boundary (for 1687 * performance reasons and to alleviate alignment restrictions). 1688 */ 1689 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1690 1691 return (bp); 1692 } 1693 1694 void 1695 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen) 1696 { 1697 struct bpf_if *bp; 1698 1699 bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen); 1700 bp->bif_ifp = ifp; 1701 } 1702 1703 /* Detach an interface from its attached bpf device. */ 1704 void 1705 bpfdetach(struct ifnet *ifp) 1706 { 1707 struct bpf_if *bp, *nbp; 1708 1709 KERNEL_ASSERT_LOCKED(); 1710 1711 for (bp = bpf_iflist; bp; bp = nbp) { 1712 nbp = bp->bif_next; 1713 if (bp->bif_ifp == ifp) 1714 bpfsdetach(bp); 1715 } 1716 ifp->if_bpf = NULL; 1717 } 1718 1719 void 1720 bpfsdetach(void *p) 1721 { 1722 struct bpf_if *bp = p, *tbp; 1723 struct bpf_d *bd; 1724 int maj; 1725 1726 KERNEL_ASSERT_LOCKED(); 1727 1728 /* Locate the major number. */ 1729 for (maj = 0; maj < nchrdev; maj++) 1730 if (cdevsw[maj].d_open == bpfopen) 1731 break; 1732 1733 while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) { 1734 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR); 1735 klist_invalidate(&bd->bd_sel.si_note); 1736 } 1737 1738 for (tbp = bpf_iflist; tbp; tbp = tbp->bif_next) { 1739 if (tbp->bif_next == bp) { 1740 tbp->bif_next = bp->bif_next; 1741 break; 1742 } 1743 } 1744 1745 if (bpf_iflist == bp) 1746 bpf_iflist = bp->bif_next; 1747 1748 free(bp, M_DEVBUF, sizeof(*bp)); 1749 } 1750 1751 int 1752 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp, 1753 void *newp, size_t newlen) 1754 { 1755 switch (name[0]) { 1756 case NET_BPF_BUFSIZE: 1757 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1758 &bpf_bufsize, BPF_MINBUFSIZE, bpf_maxbufsize); 1759 case NET_BPF_MAXBUFSIZE: 1760 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1761 &bpf_maxbufsize, BPF_MINBUFSIZE, INT_MAX); 1762 default: 1763 return (EOPNOTSUPP); 1764 } 1765 } 1766 1767 int 1768 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 1769 size_t newlen) 1770 { 1771 int flags = RW_INTR; 1772 int error; 1773 1774 if (namelen != 1) 1775 return (ENOTDIR); 1776 1777 flags |= (newp == NULL) ? RW_READ : RW_WRITE; 1778 1779 error = rw_enter(&bpf_sysctl_lk, flags); 1780 if (error != 0) 1781 return (error); 1782 1783 error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen); 1784 1785 rw_exit(&bpf_sysctl_lk); 1786 1787 return (error); 1788 } 1789 1790 struct bpf_d * 1791 bpfilter_lookup(int unit) 1792 { 1793 struct bpf_d *bd; 1794 1795 KERNEL_ASSERT_LOCKED(); 1796 1797 LIST_FOREACH(bd, &bpf_d_list, bd_list) 1798 if (bd->bd_unit == unit) 1799 return (bd); 1800 return (NULL); 1801 } 1802 1803 /* 1804 * Get a list of available data link type of the interface. 1805 */ 1806 int 1807 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1808 { 1809 int n, error; 1810 struct bpf_if *bp; 1811 const char *name; 1812 1813 name = d->bd_bif->bif_name; 1814 n = 0; 1815 error = 0; 1816 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1817 if (strcmp(name, bp->bif_name) != 0) 1818 continue; 1819 if (bfl->bfl_list != NULL) { 1820 if (n >= bfl->bfl_len) 1821 return (ENOMEM); 1822 error = copyout(&bp->bif_dlt, 1823 bfl->bfl_list + n, sizeof(u_int)); 1824 if (error) 1825 break; 1826 } 1827 n++; 1828 } 1829 1830 bfl->bfl_len = n; 1831 return (error); 1832 } 1833 1834 /* 1835 * Set the data link type of a BPF instance. 1836 */ 1837 int 1838 bpf_setdlt(struct bpf_d *d, u_int dlt) 1839 { 1840 const char *name; 1841 struct bpf_if *bp; 1842 1843 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1844 if (d->bd_bif->bif_dlt == dlt) 1845 return (0); 1846 name = d->bd_bif->bif_name; 1847 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1848 if (strcmp(name, bp->bif_name) != 0) 1849 continue; 1850 if (bp->bif_dlt == dlt) 1851 break; 1852 } 1853 if (bp == NULL) 1854 return (EINVAL); 1855 bpf_detachd(d); 1856 bpf_attachd(d, bp); 1857 bpf_resetd(d); 1858 return (0); 1859 } 1860 1861 u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *); 1862 u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *); 1863 u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *); 1864 1865 int bpf_mbuf_copy(const struct mbuf *, u_int32_t, 1866 void *, u_int32_t); 1867 1868 const struct bpf_ops bpf_mbuf_ops = { 1869 bpf_mbuf_ldw, 1870 bpf_mbuf_ldh, 1871 bpf_mbuf_ldb, 1872 }; 1873 1874 int 1875 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len) 1876 { 1877 u_int8_t *cp = buf; 1878 u_int32_t count; 1879 1880 while (off >= m->m_len) { 1881 off -= m->m_len; 1882 1883 m = m->m_next; 1884 if (m == NULL) 1885 return (-1); 1886 } 1887 1888 for (;;) { 1889 count = min(m->m_len - off, len); 1890 1891 memcpy(cp, m->m_data + off, count); 1892 len -= count; 1893 1894 if (len == 0) 1895 return (0); 1896 1897 m = m->m_next; 1898 if (m == NULL) 1899 break; 1900 1901 cp += count; 1902 off = 0; 1903 } 1904 1905 return (-1); 1906 } 1907 1908 u_int32_t 1909 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err) 1910 { 1911 u_int32_t v; 1912 1913 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1914 *err = 1; 1915 return (0); 1916 } 1917 1918 *err = 0; 1919 return ntohl(v); 1920 } 1921 1922 u_int32_t 1923 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err) 1924 { 1925 u_int16_t v; 1926 1927 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1928 *err = 1; 1929 return (0); 1930 } 1931 1932 *err = 0; 1933 return ntohs(v); 1934 } 1935 1936 u_int32_t 1937 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err) 1938 { 1939 const struct mbuf *m = m0; 1940 u_int8_t v; 1941 1942 while (k >= m->m_len) { 1943 k -= m->m_len; 1944 1945 m = m->m_next; 1946 if (m == NULL) { 1947 *err = 1; 1948 return (0); 1949 } 1950 } 1951 v = m->m_data[k]; 1952 1953 *err = 0; 1954 return v; 1955 } 1956 1957 u_int 1958 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen) 1959 { 1960 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen); 1961 } 1962