1 /* $OpenBSD: bpf.c,v 1.219 2022/07/09 12:48:21 visa Exp $ */ 2 /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org> 8 * 9 * This code is derived from the Stanford/CMU enet packet filter, 10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 12 * Berkeley Laboratory. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94 39 */ 40 41 #include "bpfilter.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/ioctl.h> 49 #include <sys/conf.h> 50 #include <sys/vnode.h> 51 #include <sys/fcntl.h> 52 #include <sys/socket.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/rwlock.h> 56 #include <sys/atomic.h> 57 #include <sys/event.h> 58 #include <sys/mutex.h> 59 #include <sys/refcnt.h> 60 #include <sys/smr.h> 61 #include <sys/specdev.h> 62 #include <sys/sigio.h> 63 #include <sys/task.h> 64 #include <sys/time.h> 65 66 #include <net/if.h> 67 #include <net/bpf.h> 68 #include <net/bpfdesc.h> 69 70 #include <netinet/in.h> 71 #include <netinet/if_ether.h> 72 73 #include "vlan.h" 74 #if NVLAN > 0 75 #include <net/if_vlan_var.h> 76 #endif 77 78 #define BPF_BUFSIZE 32768 79 80 #define PRINET 26 /* interruptible */ 81 82 /* 83 * The default read buffer size is patchable. 84 */ 85 int bpf_bufsize = BPF_BUFSIZE; 86 int bpf_maxbufsize = BPF_MAXBUFSIZE; 87 88 /* 89 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 90 * bpf_d_list is the list of descriptors 91 */ 92 struct bpf_if *bpf_iflist; 93 LIST_HEAD(, bpf_d) bpf_d_list; 94 95 int bpf_allocbufs(struct bpf_d *); 96 void bpf_ifname(struct bpf_if*, struct ifreq *); 97 void bpf_mcopy(const void *, void *, size_t); 98 int bpf_movein(struct uio *, struct bpf_d *, struct mbuf **, 99 struct sockaddr *); 100 int bpf_setif(struct bpf_d *, struct ifreq *); 101 int bpfkqfilter(dev_t, struct knote *); 102 void bpf_wakeup(struct bpf_d *); 103 void bpf_wakeup_cb(void *); 104 int _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int); 105 void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, 106 const struct bpf_hdr *); 107 int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 108 int bpf_setdlt(struct bpf_d *, u_int); 109 110 void filt_bpfrdetach(struct knote *); 111 int filt_bpfread(struct knote *, long); 112 int filt_bpfreadmodify(struct kevent *, struct knote *); 113 int filt_bpfreadprocess(struct knote *, struct kevent *); 114 115 int bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t); 116 117 struct bpf_d *bpfilter_lookup(int); 118 119 /* 120 * Called holding ``bd_mtx''. 121 */ 122 void bpf_attachd(struct bpf_d *, struct bpf_if *); 123 void bpf_detachd(struct bpf_d *); 124 void bpf_resetd(struct bpf_d *); 125 126 void bpf_prog_smr(void *); 127 void bpf_d_smr(void *); 128 129 /* 130 * Reference count access to descriptor buffers 131 */ 132 void bpf_get(struct bpf_d *); 133 void bpf_put(struct bpf_d *); 134 135 136 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz"); 137 138 int 139 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp, 140 struct sockaddr *sockp) 141 { 142 struct bpf_program_smr *bps; 143 struct bpf_insn *fcode = NULL; 144 struct mbuf *m; 145 struct m_tag *mtag; 146 int error; 147 u_int hlen, alen, mlen; 148 u_int len; 149 u_int linktype; 150 u_int slen; 151 152 /* 153 * Build a sockaddr based on the data link layer type. 154 * We do this at this level because the ethernet header 155 * is copied directly into the data field of the sockaddr. 156 * In the case of SLIP, there is no header and the packet 157 * is forwarded as is. 158 * Also, we are careful to leave room at the front of the mbuf 159 * for the link level header. 160 */ 161 linktype = d->bd_bif->bif_dlt; 162 switch (linktype) { 163 164 case DLT_SLIP: 165 sockp->sa_family = AF_INET; 166 hlen = 0; 167 break; 168 169 case DLT_PPP: 170 sockp->sa_family = AF_UNSPEC; 171 hlen = 0; 172 break; 173 174 case DLT_EN10MB: 175 sockp->sa_family = AF_UNSPEC; 176 /* XXX Would MAXLINKHDR be better? */ 177 hlen = ETHER_HDR_LEN; 178 break; 179 180 case DLT_IEEE802_11: 181 case DLT_IEEE802_11_RADIO: 182 sockp->sa_family = AF_UNSPEC; 183 hlen = 0; 184 break; 185 186 case DLT_RAW: 187 case DLT_NULL: 188 sockp->sa_family = AF_UNSPEC; 189 hlen = 0; 190 break; 191 192 case DLT_LOOP: 193 sockp->sa_family = AF_UNSPEC; 194 hlen = sizeof(u_int32_t); 195 break; 196 197 default: 198 return (EIO); 199 } 200 201 if (uio->uio_resid > MAXMCLBYTES) 202 return (EMSGSIZE); 203 len = uio->uio_resid; 204 if (len < hlen) 205 return (EINVAL); 206 207 /* 208 * Get the length of the payload so we can align it properly. 209 */ 210 alen = len - hlen; 211 212 /* 213 * Allocate enough space for headers and the aligned payload. 214 */ 215 mlen = max(max_linkhdr, hlen) + roundup(alen, sizeof(long)); 216 if (mlen > MAXMCLBYTES) 217 return (EMSGSIZE); 218 219 MGETHDR(m, M_WAIT, MT_DATA); 220 if (mlen > MHLEN) { 221 MCLGETL(m, M_WAIT, mlen); 222 if ((m->m_flags & M_EXT) == 0) { 223 error = ENOBUFS; 224 goto bad; 225 } 226 } 227 228 m_align(m, alen); /* Align the payload. */ 229 m->m_data -= hlen; 230 231 m->m_pkthdr.ph_ifidx = 0; 232 m->m_pkthdr.len = len; 233 m->m_len = len; 234 235 error = uiomove(mtod(m, caddr_t), len, uio); 236 if (error) 237 goto bad; 238 239 smr_read_enter(); 240 bps = SMR_PTR_GET(&d->bd_wfilter); 241 if (bps != NULL) 242 fcode = bps->bps_bf.bf_insns; 243 slen = bpf_filter(fcode, mtod(m, u_char *), len, len); 244 smr_read_leave(); 245 246 if (slen < len) { 247 error = EPERM; 248 goto bad; 249 } 250 251 /* 252 * Make room for link header, and copy it to sockaddr 253 */ 254 if (hlen != 0) { 255 if (linktype == DLT_LOOP) { 256 u_int32_t af; 257 258 /* the link header indicates the address family */ 259 KASSERT(hlen == sizeof(u_int32_t)); 260 memcpy(&af, m->m_data, hlen); 261 sockp->sa_family = ntohl(af); 262 } else 263 memcpy(sockp->sa_data, m->m_data, hlen); 264 265 m->m_pkthdr.len -= hlen; 266 m->m_len -= hlen; 267 m->m_data += hlen; 268 } 269 270 /* 271 * Prepend the data link type as a mbuf tag 272 */ 273 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT); 274 *(u_int *)(mtag + 1) = linktype; 275 m_tag_prepend(m, mtag); 276 277 *mp = m; 278 return (0); 279 bad: 280 m_freem(m); 281 return (error); 282 } 283 284 /* 285 * Attach file to the bpf interface, i.e. make d listen on bp. 286 */ 287 void 288 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 289 { 290 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 291 292 /* 293 * Point d at bp, and add d to the interface's list of listeners. 294 * Finally, point the driver's bpf cookie at the interface so 295 * it will divert packets to bpf. 296 */ 297 298 d->bd_bif = bp; 299 300 KERNEL_ASSERT_LOCKED(); 301 SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next); 302 303 *bp->bif_driverp = bp; 304 } 305 306 /* 307 * Detach a file from its interface. 308 */ 309 void 310 bpf_detachd(struct bpf_d *d) 311 { 312 struct bpf_if *bp; 313 314 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 315 316 bp = d->bd_bif; 317 /* Not attached. */ 318 if (bp == NULL) 319 return; 320 321 /* Remove ``d'' from the interface's descriptor list. */ 322 KERNEL_ASSERT_LOCKED(); 323 SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next); 324 325 if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) { 326 /* 327 * Let the driver know that there are no more listeners. 328 */ 329 *bp->bif_driverp = NULL; 330 } 331 332 d->bd_bif = NULL; 333 334 /* 335 * Check if this descriptor had requested promiscuous mode. 336 * If so, turn it off. 337 */ 338 if (d->bd_promisc) { 339 int error; 340 341 KASSERT(bp->bif_ifp != NULL); 342 343 d->bd_promisc = 0; 344 345 bpf_get(d); 346 mtx_leave(&d->bd_mtx); 347 NET_LOCK(); 348 error = ifpromisc(bp->bif_ifp, 0); 349 NET_UNLOCK(); 350 mtx_enter(&d->bd_mtx); 351 bpf_put(d); 352 353 if (error && !(error == EINVAL || error == ENODEV || 354 error == ENXIO)) 355 /* 356 * Something is really wrong if we were able to put 357 * the driver into promiscuous mode, but can't 358 * take it out. 359 */ 360 panic("bpf: ifpromisc failed"); 361 } 362 } 363 364 void 365 bpfilterattach(int n) 366 { 367 LIST_INIT(&bpf_d_list); 368 } 369 370 /* 371 * Open ethernet device. Returns ENXIO for illegal minor device number, 372 * EBUSY if file is open by another process. 373 */ 374 int 375 bpfopen(dev_t dev, int flag, int mode, struct proc *p) 376 { 377 struct bpf_d *bd; 378 int unit = minor(dev); 379 380 if (unit & ((1 << CLONE_SHIFT) - 1)) 381 return (ENXIO); 382 383 KASSERT(bpfilter_lookup(unit) == NULL); 384 385 /* create on demand */ 386 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 387 return (EBUSY); 388 389 /* Mark "free" and do most initialization. */ 390 bd->bd_unit = unit; 391 bd->bd_bufsize = bpf_bufsize; 392 bd->bd_sig = SIGIO; 393 mtx_init(&bd->bd_mtx, IPL_NET); 394 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd); 395 smr_init(&bd->bd_smr); 396 sigio_init(&bd->bd_sigio); 397 klist_init_mutex(&bd->bd_klist, &bd->bd_mtx); 398 399 bd->bd_rtout = 0; /* no timeout by default */ 400 401 refcnt_init(&bd->bd_refcnt); 402 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list); 403 404 return (0); 405 } 406 407 /* 408 * Close the descriptor by detaching it from its interface, 409 * deallocating its buffers, and marking it free. 410 */ 411 int 412 bpfclose(dev_t dev, int flag, int mode, struct proc *p) 413 { 414 struct bpf_d *d; 415 416 d = bpfilter_lookup(minor(dev)); 417 mtx_enter(&d->bd_mtx); 418 bpf_detachd(d); 419 bpf_wakeup(d); 420 LIST_REMOVE(d, bd_list); 421 mtx_leave(&d->bd_mtx); 422 bpf_put(d); 423 424 return (0); 425 } 426 427 /* 428 * Rotate the packet buffers in descriptor d. Move the store buffer 429 * into the hold slot, and the free buffer into the store slot. 430 * Zero the length of the new store buffer. 431 */ 432 #define ROTATE_BUFFERS(d) \ 433 KASSERT(d->bd_in_uiomove == 0); \ 434 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \ 435 (d)->bd_hbuf = (d)->bd_sbuf; \ 436 (d)->bd_hlen = (d)->bd_slen; \ 437 (d)->bd_sbuf = (d)->bd_fbuf; \ 438 (d)->bd_slen = 0; \ 439 (d)->bd_fbuf = NULL; 440 441 /* 442 * bpfread - read next chunk of packets from buffers 443 */ 444 int 445 bpfread(dev_t dev, struct uio *uio, int ioflag) 446 { 447 uint64_t end, now; 448 struct bpf_d *d; 449 caddr_t hbuf; 450 int error, hlen; 451 452 KERNEL_ASSERT_LOCKED(); 453 454 d = bpfilter_lookup(minor(dev)); 455 if (d->bd_bif == NULL) 456 return (ENXIO); 457 458 bpf_get(d); 459 mtx_enter(&d->bd_mtx); 460 461 /* 462 * Restrict application to use a buffer the same size as 463 * as kernel buffers. 464 */ 465 if (uio->uio_resid != d->bd_bufsize) { 466 error = EINVAL; 467 goto out; 468 } 469 470 /* 471 * If there's a timeout, mark when the read should end. 472 */ 473 if (d->bd_rtout != 0) { 474 now = nsecuptime(); 475 end = now + d->bd_rtout; 476 if (end < now) 477 end = UINT64_MAX; 478 } 479 480 /* 481 * If the hold buffer is empty, then do a timed sleep, which 482 * ends when the timeout expires or when enough packets 483 * have arrived to fill the store buffer. 484 */ 485 while (d->bd_hbuf == NULL) { 486 if (d->bd_bif == NULL) { 487 /* interface is gone */ 488 if (d->bd_slen == 0) { 489 error = EIO; 490 goto out; 491 } 492 ROTATE_BUFFERS(d); 493 break; 494 } 495 if (d->bd_immediate && d->bd_slen != 0) { 496 /* 497 * A packet(s) either arrived since the previous 498 * read or arrived while we were asleep. 499 * Rotate the buffers and return what's here. 500 */ 501 ROTATE_BUFFERS(d); 502 break; 503 } 504 if (ISSET(ioflag, IO_NDELAY)) { 505 /* User requested non-blocking I/O */ 506 error = EWOULDBLOCK; 507 } else if (d->bd_rtout == 0) { 508 /* No read timeout set. */ 509 d->bd_nreaders++; 510 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 511 "bpf", INFSLP); 512 d->bd_nreaders--; 513 } else if ((now = nsecuptime()) < end) { 514 /* Read timeout has not expired yet. */ 515 d->bd_nreaders++; 516 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 517 "bpf", end - now); 518 d->bd_nreaders--; 519 } else { 520 /* Read timeout has expired. */ 521 error = EWOULDBLOCK; 522 } 523 if (error == EINTR || error == ERESTART) 524 goto out; 525 if (error == EWOULDBLOCK) { 526 /* 527 * On a timeout, return what's in the buffer, 528 * which may be nothing. If there is something 529 * in the store buffer, we can rotate the buffers. 530 */ 531 if (d->bd_hbuf != NULL) 532 /* 533 * We filled up the buffer in between 534 * getting the timeout and arriving 535 * here, so we don't need to rotate. 536 */ 537 break; 538 539 if (d->bd_slen == 0) { 540 error = 0; 541 goto out; 542 } 543 ROTATE_BUFFERS(d); 544 break; 545 } 546 } 547 /* 548 * At this point, we know we have something in the hold slot. 549 */ 550 hbuf = d->bd_hbuf; 551 hlen = d->bd_hlen; 552 d->bd_hbuf = NULL; 553 d->bd_hlen = 0; 554 d->bd_fbuf = NULL; 555 d->bd_in_uiomove = 1; 556 557 /* 558 * Move data from hold buffer into user space. 559 * We know the entire buffer is transferred since 560 * we checked above that the read buffer is bpf_bufsize bytes. 561 */ 562 mtx_leave(&d->bd_mtx); 563 error = uiomove(hbuf, hlen, uio); 564 mtx_enter(&d->bd_mtx); 565 566 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */ 567 KASSERT(d->bd_fbuf == NULL); 568 KASSERT(d->bd_hbuf == NULL); 569 d->bd_fbuf = hbuf; 570 d->bd_in_uiomove = 0; 571 out: 572 mtx_leave(&d->bd_mtx); 573 bpf_put(d); 574 575 return (error); 576 } 577 578 /* 579 * If there are processes sleeping on this descriptor, wake them up. 580 */ 581 void 582 bpf_wakeup(struct bpf_d *d) 583 { 584 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 585 586 if (d->bd_nreaders) 587 wakeup(d); 588 589 KNOTE(&d->bd_klist, 0); 590 591 /* 592 * As long as pgsigio() needs to be protected 593 * by the KERNEL_LOCK() we have to delay the wakeup to 594 * another context to keep the hot path KERNEL_LOCK()-free. 595 */ 596 if (d->bd_async && d->bd_sig) { 597 bpf_get(d); 598 if (!task_add(systq, &d->bd_wake_task)) 599 bpf_put(d); 600 } 601 } 602 603 void 604 bpf_wakeup_cb(void *xd) 605 { 606 struct bpf_d *d = xd; 607 608 if (d->bd_async && d->bd_sig) 609 pgsigio(&d->bd_sigio, d->bd_sig, 0); 610 611 bpf_put(d); 612 } 613 614 int 615 bpfwrite(dev_t dev, struct uio *uio, int ioflag) 616 { 617 struct bpf_d *d; 618 struct ifnet *ifp; 619 struct mbuf *m; 620 int error; 621 struct sockaddr_storage dst; 622 623 KERNEL_ASSERT_LOCKED(); 624 625 d = bpfilter_lookup(minor(dev)); 626 if (d->bd_bif == NULL) 627 return (ENXIO); 628 629 bpf_get(d); 630 ifp = d->bd_bif->bif_ifp; 631 632 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) { 633 error = ENETDOWN; 634 goto out; 635 } 636 637 if (uio->uio_resid == 0) { 638 error = 0; 639 goto out; 640 } 641 642 error = bpf_movein(uio, d, &m, sstosa(&dst)); 643 if (error) 644 goto out; 645 646 if (m->m_pkthdr.len > ifp->if_mtu) { 647 m_freem(m); 648 error = EMSGSIZE; 649 goto out; 650 } 651 652 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 653 m->m_pkthdr.pf.prio = ifp->if_llprio; 654 655 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC) 656 dst.ss_family = pseudo_AF_HDRCMPLT; 657 658 NET_LOCK(); 659 error = ifp->if_output(ifp, m, sstosa(&dst), NULL); 660 NET_UNLOCK(); 661 662 out: 663 bpf_put(d); 664 return (error); 665 } 666 667 /* 668 * Reset a descriptor by flushing its packet buffer and clearing the 669 * receive and drop counts. 670 */ 671 void 672 bpf_resetd(struct bpf_d *d) 673 { 674 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 675 KASSERT(d->bd_in_uiomove == 0); 676 677 if (d->bd_hbuf != NULL) { 678 /* Free the hold buffer. */ 679 d->bd_fbuf = d->bd_hbuf; 680 d->bd_hbuf = NULL; 681 } 682 d->bd_slen = 0; 683 d->bd_hlen = 0; 684 d->bd_rcount = 0; 685 d->bd_dcount = 0; 686 } 687 688 /* 689 * FIONREAD Check for read packet available. 690 * BIOCGBLEN Get buffer len [for read()]. 691 * BIOCSETF Set ethernet read filter. 692 * BIOCFLUSH Flush read packet buffer. 693 * BIOCPROMISC Put interface into promiscuous mode. 694 * BIOCGDLTLIST Get supported link layer types. 695 * BIOCGDLT Get link layer type. 696 * BIOCSDLT Set link layer type. 697 * BIOCGETIF Get interface name. 698 * BIOCSETIF Set interface. 699 * BIOCSRTIMEOUT Set read timeout. 700 * BIOCGRTIMEOUT Get read timeout. 701 * BIOCGSTATS Get packet stats. 702 * BIOCIMMEDIATE Set immediate mode. 703 * BIOCVERSION Get filter language version. 704 * BIOCGHDRCMPLT Get "header already complete" flag 705 * BIOCSHDRCMPLT Set "header already complete" flag 706 */ 707 int 708 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 709 { 710 struct bpf_d *d; 711 int error = 0; 712 713 d = bpfilter_lookup(minor(dev)); 714 if (d->bd_locked && suser(p) != 0) { 715 /* list of allowed ioctls when locked and not root */ 716 switch (cmd) { 717 case BIOCGBLEN: 718 case BIOCFLUSH: 719 case BIOCGDLT: 720 case BIOCGDLTLIST: 721 case BIOCGETIF: 722 case BIOCGRTIMEOUT: 723 case BIOCGSTATS: 724 case BIOCVERSION: 725 case BIOCGRSIG: 726 case BIOCGHDRCMPLT: 727 case FIONREAD: 728 case BIOCLOCK: 729 case BIOCSRTIMEOUT: 730 case BIOCIMMEDIATE: 731 case TIOCGPGRP: 732 case BIOCGDIRFILT: 733 break; 734 default: 735 return (EPERM); 736 } 737 } 738 739 bpf_get(d); 740 741 switch (cmd) { 742 default: 743 error = EINVAL; 744 break; 745 746 /* 747 * Check for read packet available. 748 */ 749 case FIONREAD: 750 { 751 int n; 752 753 mtx_enter(&d->bd_mtx); 754 n = d->bd_slen; 755 if (d->bd_hbuf != NULL) 756 n += d->bd_hlen; 757 mtx_leave(&d->bd_mtx); 758 759 *(int *)addr = n; 760 break; 761 } 762 763 /* 764 * Get buffer len [for read()]. 765 */ 766 case BIOCGBLEN: 767 *(u_int *)addr = d->bd_bufsize; 768 break; 769 770 /* 771 * Set buffer length. 772 */ 773 case BIOCSBLEN: 774 if (d->bd_bif != NULL) 775 error = EINVAL; 776 else { 777 u_int size = *(u_int *)addr; 778 779 if (size > bpf_maxbufsize) 780 *(u_int *)addr = size = bpf_maxbufsize; 781 else if (size < BPF_MINBUFSIZE) 782 *(u_int *)addr = size = BPF_MINBUFSIZE; 783 mtx_enter(&d->bd_mtx); 784 d->bd_bufsize = size; 785 mtx_leave(&d->bd_mtx); 786 } 787 break; 788 789 /* 790 * Set link layer read filter. 791 */ 792 case BIOCSETF: 793 error = bpf_setf(d, (struct bpf_program *)addr, 0); 794 break; 795 796 /* 797 * Set link layer write filter. 798 */ 799 case BIOCSETWF: 800 error = bpf_setf(d, (struct bpf_program *)addr, 1); 801 break; 802 803 /* 804 * Flush read packet buffer. 805 */ 806 case BIOCFLUSH: 807 mtx_enter(&d->bd_mtx); 808 bpf_resetd(d); 809 mtx_leave(&d->bd_mtx); 810 break; 811 812 /* 813 * Put interface into promiscuous mode. 814 */ 815 case BIOCPROMISC: 816 if (d->bd_bif == NULL) { 817 /* 818 * No interface attached yet. 819 */ 820 error = EINVAL; 821 } else if (d->bd_bif->bif_ifp != NULL) { 822 if (d->bd_promisc == 0) { 823 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx); 824 NET_LOCK(); 825 error = ifpromisc(d->bd_bif->bif_ifp, 1); 826 NET_UNLOCK(); 827 if (error == 0) 828 d->bd_promisc = 1; 829 } 830 } 831 break; 832 833 /* 834 * Get a list of supported device parameters. 835 */ 836 case BIOCGDLTLIST: 837 if (d->bd_bif == NULL) 838 error = EINVAL; 839 else 840 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 841 break; 842 843 /* 844 * Get device parameters. 845 */ 846 case BIOCGDLT: 847 if (d->bd_bif == NULL) 848 error = EINVAL; 849 else 850 *(u_int *)addr = d->bd_bif->bif_dlt; 851 break; 852 853 /* 854 * Set device parameters. 855 */ 856 case BIOCSDLT: 857 if (d->bd_bif == NULL) 858 error = EINVAL; 859 else { 860 mtx_enter(&d->bd_mtx); 861 error = bpf_setdlt(d, *(u_int *)addr); 862 mtx_leave(&d->bd_mtx); 863 } 864 break; 865 866 /* 867 * Set interface name. 868 */ 869 case BIOCGETIF: 870 if (d->bd_bif == NULL) 871 error = EINVAL; 872 else 873 bpf_ifname(d->bd_bif, (struct ifreq *)addr); 874 break; 875 876 /* 877 * Set interface. 878 */ 879 case BIOCSETIF: 880 error = bpf_setif(d, (struct ifreq *)addr); 881 break; 882 883 /* 884 * Set read timeout. 885 */ 886 case BIOCSRTIMEOUT: 887 { 888 struct timeval *tv = (struct timeval *)addr; 889 uint64_t rtout; 890 891 if (tv->tv_sec < 0 || !timerisvalid(tv)) { 892 error = EINVAL; 893 break; 894 } 895 rtout = TIMEVAL_TO_NSEC(tv); 896 if (rtout > MAXTSLP) { 897 error = EOVERFLOW; 898 break; 899 } 900 mtx_enter(&d->bd_mtx); 901 d->bd_rtout = rtout; 902 mtx_leave(&d->bd_mtx); 903 break; 904 } 905 906 /* 907 * Get read timeout. 908 */ 909 case BIOCGRTIMEOUT: 910 { 911 struct timeval *tv = (struct timeval *)addr; 912 913 memset(tv, 0, sizeof(*tv)); 914 mtx_enter(&d->bd_mtx); 915 NSEC_TO_TIMEVAL(d->bd_rtout, tv); 916 mtx_leave(&d->bd_mtx); 917 break; 918 } 919 920 /* 921 * Get packet stats. 922 */ 923 case BIOCGSTATS: 924 { 925 struct bpf_stat *bs = (struct bpf_stat *)addr; 926 927 bs->bs_recv = d->bd_rcount; 928 bs->bs_drop = d->bd_dcount; 929 break; 930 } 931 932 /* 933 * Set immediate mode. 934 */ 935 case BIOCIMMEDIATE: 936 d->bd_immediate = *(u_int *)addr; 937 break; 938 939 case BIOCVERSION: 940 { 941 struct bpf_version *bv = (struct bpf_version *)addr; 942 943 bv->bv_major = BPF_MAJOR_VERSION; 944 bv->bv_minor = BPF_MINOR_VERSION; 945 break; 946 } 947 948 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 949 *(u_int *)addr = d->bd_hdrcmplt; 950 break; 951 952 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 953 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 954 break; 955 956 case BIOCLOCK: /* set "locked" flag (no reset) */ 957 d->bd_locked = 1; 958 break; 959 960 case BIOCGFILDROP: /* get "filter-drop" flag */ 961 *(u_int *)addr = d->bd_fildrop; 962 break; 963 964 case BIOCSFILDROP: { /* set "filter-drop" flag */ 965 unsigned int fildrop = *(u_int *)addr; 966 switch (fildrop) { 967 case BPF_FILDROP_PASS: 968 case BPF_FILDROP_CAPTURE: 969 case BPF_FILDROP_DROP: 970 d->bd_fildrop = fildrop; 971 break; 972 default: 973 error = EINVAL; 974 break; 975 } 976 break; 977 } 978 979 case BIOCGDIRFILT: /* get direction filter */ 980 *(u_int *)addr = d->bd_dirfilt; 981 break; 982 983 case BIOCSDIRFILT: /* set direction filter */ 984 d->bd_dirfilt = (*(u_int *)addr) & 985 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT); 986 break; 987 988 case FIONBIO: /* Non-blocking I/O */ 989 /* let vfs to keep track of this */ 990 break; 991 992 case FIOASYNC: /* Send signal on receive packets */ 993 d->bd_async = *(int *)addr; 994 break; 995 996 case FIOSETOWN: /* Process or group to send signals to */ 997 case TIOCSPGRP: 998 error = sigio_setown(&d->bd_sigio, cmd, addr); 999 break; 1000 1001 case FIOGETOWN: 1002 case TIOCGPGRP: 1003 sigio_getown(&d->bd_sigio, cmd, addr); 1004 break; 1005 1006 case BIOCSRSIG: /* Set receive signal */ 1007 { 1008 u_int sig; 1009 1010 sig = *(u_int *)addr; 1011 1012 if (sig >= NSIG) 1013 error = EINVAL; 1014 else 1015 d->bd_sig = sig; 1016 break; 1017 } 1018 case BIOCGRSIG: 1019 *(u_int *)addr = d->bd_sig; 1020 break; 1021 } 1022 1023 bpf_put(d); 1024 return (error); 1025 } 1026 1027 /* 1028 * Set d's packet filter program to fp. If this file already has a filter, 1029 * free it and replace it. Returns EINVAL for bogus requests. 1030 */ 1031 int 1032 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf) 1033 { 1034 struct bpf_program_smr *bps, *old_bps; 1035 struct bpf_insn *fcode; 1036 u_int flen, size; 1037 1038 KERNEL_ASSERT_LOCKED(); 1039 1040 if (fp->bf_insns == 0) { 1041 if (fp->bf_len != 0) 1042 return (EINVAL); 1043 bps = NULL; 1044 } else { 1045 flen = fp->bf_len; 1046 if (flen > BPF_MAXINSNS) 1047 return (EINVAL); 1048 1049 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF, 1050 M_WAITOK | M_CANFAIL); 1051 if (fcode == NULL) 1052 return (ENOMEM); 1053 1054 size = flen * sizeof(*fp->bf_insns); 1055 if (copyin(fp->bf_insns, fcode, size) != 0 || 1056 bpf_validate(fcode, (int)flen) == 0) { 1057 free(fcode, M_DEVBUF, size); 1058 return (EINVAL); 1059 } 1060 1061 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK); 1062 smr_init(&bps->bps_smr); 1063 bps->bps_bf.bf_len = flen; 1064 bps->bps_bf.bf_insns = fcode; 1065 } 1066 1067 if (wf == 0) { 1068 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter); 1069 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps); 1070 } else { 1071 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter); 1072 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps); 1073 } 1074 1075 mtx_enter(&d->bd_mtx); 1076 bpf_resetd(d); 1077 mtx_leave(&d->bd_mtx); 1078 if (old_bps != NULL) 1079 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps); 1080 1081 return (0); 1082 } 1083 1084 /* 1085 * Detach a file from its current interface (if attached at all) and attach 1086 * to the interface indicated by the name stored in ifr. 1087 * Return an errno or 0. 1088 */ 1089 int 1090 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1091 { 1092 struct bpf_if *bp, *candidate = NULL; 1093 int error = 0; 1094 1095 /* 1096 * Look through attached interfaces for the named one. 1097 */ 1098 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1099 if (strcmp(bp->bif_name, ifr->ifr_name) != 0) 1100 continue; 1101 1102 if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt) 1103 candidate = bp; 1104 } 1105 1106 /* Not found. */ 1107 if (candidate == NULL) 1108 return (ENXIO); 1109 1110 /* 1111 * Allocate the packet buffers if we need to. 1112 * If we're already attached to requested interface, 1113 * just flush the buffer. 1114 */ 1115 mtx_enter(&d->bd_mtx); 1116 if (d->bd_sbuf == NULL) { 1117 if ((error = bpf_allocbufs(d))) 1118 goto out; 1119 } 1120 if (candidate != d->bd_bif) { 1121 /* 1122 * Detach if attached to something else. 1123 */ 1124 bpf_detachd(d); 1125 bpf_attachd(d, candidate); 1126 } 1127 bpf_resetd(d); 1128 out: 1129 mtx_leave(&d->bd_mtx); 1130 return (error); 1131 } 1132 1133 /* 1134 * Copy the interface name to the ifreq. 1135 */ 1136 void 1137 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr) 1138 { 1139 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name)); 1140 } 1141 1142 const struct filterops bpfread_filtops = { 1143 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 1144 .f_attach = NULL, 1145 .f_detach = filt_bpfrdetach, 1146 .f_event = filt_bpfread, 1147 .f_modify = filt_bpfreadmodify, 1148 .f_process = filt_bpfreadprocess, 1149 }; 1150 1151 int 1152 bpfkqfilter(dev_t dev, struct knote *kn) 1153 { 1154 struct bpf_d *d; 1155 struct klist *klist; 1156 1157 KERNEL_ASSERT_LOCKED(); 1158 1159 d = bpfilter_lookup(minor(dev)); 1160 if (d == NULL) 1161 return (ENXIO); 1162 1163 switch (kn->kn_filter) { 1164 case EVFILT_READ: 1165 klist = &d->bd_klist; 1166 kn->kn_fop = &bpfread_filtops; 1167 break; 1168 default: 1169 return (EINVAL); 1170 } 1171 1172 bpf_get(d); 1173 kn->kn_hook = d; 1174 klist_insert(klist, kn); 1175 1176 return (0); 1177 } 1178 1179 void 1180 filt_bpfrdetach(struct knote *kn) 1181 { 1182 struct bpf_d *d = kn->kn_hook; 1183 1184 klist_remove(&d->bd_klist, kn); 1185 bpf_put(d); 1186 } 1187 1188 int 1189 filt_bpfread(struct knote *kn, long hint) 1190 { 1191 struct bpf_d *d = kn->kn_hook; 1192 1193 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1194 1195 kn->kn_data = d->bd_hlen; 1196 if (d->bd_immediate) 1197 kn->kn_data += d->bd_slen; 1198 1199 return (kn->kn_data > 0); 1200 } 1201 1202 int 1203 filt_bpfreadmodify(struct kevent *kev, struct knote *kn) 1204 { 1205 struct bpf_d *d = kn->kn_hook; 1206 int active; 1207 1208 mtx_enter(&d->bd_mtx); 1209 active = knote_modify_fn(kev, kn, filt_bpfread); 1210 mtx_leave(&d->bd_mtx); 1211 1212 return (active); 1213 } 1214 1215 int 1216 filt_bpfreadprocess(struct knote *kn, struct kevent *kev) 1217 { 1218 struct bpf_d *d = kn->kn_hook; 1219 int active; 1220 1221 mtx_enter(&d->bd_mtx); 1222 active = knote_process_fn(kn, kev, filt_bpfread); 1223 mtx_leave(&d->bd_mtx); 1224 1225 return (active); 1226 } 1227 1228 /* 1229 * Copy data from an mbuf chain into a buffer. This code is derived 1230 * from m_copydata in sys/uipc_mbuf.c. 1231 */ 1232 void 1233 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) 1234 { 1235 const struct mbuf *m; 1236 u_int count; 1237 u_char *dst; 1238 1239 m = src_arg; 1240 dst = dst_arg; 1241 while (len > 0) { 1242 if (m == NULL) 1243 panic("bpf_mcopy"); 1244 count = min(m->m_len, len); 1245 bcopy(mtod(m, caddr_t), (caddr_t)dst, count); 1246 m = m->m_next; 1247 dst += count; 1248 len -= count; 1249 } 1250 } 1251 1252 int 1253 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction) 1254 { 1255 return _bpf_mtap(arg, m, m, direction); 1256 } 1257 1258 int 1259 _bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m, 1260 u_int direction) 1261 { 1262 struct bpf_if *bp = (struct bpf_if *)arg; 1263 struct bpf_d *d; 1264 size_t pktlen, slen; 1265 const struct mbuf *m0; 1266 struct bpf_hdr tbh; 1267 int gothdr = 0; 1268 int drop = 0; 1269 1270 if (m == NULL) 1271 return (0); 1272 1273 if (bp == NULL) 1274 return (0); 1275 1276 pktlen = 0; 1277 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1278 pktlen += m0->m_len; 1279 1280 smr_read_enter(); 1281 SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1282 struct bpf_program_smr *bps; 1283 struct bpf_insn *fcode = NULL; 1284 1285 atomic_inc_long(&d->bd_rcount); 1286 1287 if (ISSET(d->bd_dirfilt, direction)) 1288 continue; 1289 1290 bps = SMR_PTR_GET(&d->bd_rfilter); 1291 if (bps != NULL) 1292 fcode = bps->bps_bf.bf_insns; 1293 slen = bpf_mfilter(fcode, m, pktlen); 1294 1295 if (slen == 0) 1296 continue; 1297 if (d->bd_fildrop != BPF_FILDROP_PASS) 1298 drop = 1; 1299 if (d->bd_fildrop != BPF_FILDROP_DROP) { 1300 if (!gothdr) { 1301 struct timeval tv; 1302 memset(&tbh, 0, sizeof(tbh)); 1303 1304 if (ISSET(mp->m_flags, M_PKTHDR)) { 1305 tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx; 1306 tbh.bh_flowid = mp->m_pkthdr.ph_flowid; 1307 tbh.bh_flags = mp->m_pkthdr.pf.prio; 1308 if (ISSET(mp->m_pkthdr.csum_flags, 1309 M_FLOWID)) 1310 SET(tbh.bh_flags, BPF_F_FLOWID); 1311 1312 m_microtime(mp, &tv); 1313 } else 1314 microtime(&tv); 1315 1316 tbh.bh_tstamp.tv_sec = tv.tv_sec; 1317 tbh.bh_tstamp.tv_usec = tv.tv_usec; 1318 SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT); 1319 1320 gothdr = 1; 1321 } 1322 1323 mtx_enter(&d->bd_mtx); 1324 bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh); 1325 mtx_leave(&d->bd_mtx); 1326 } 1327 } 1328 smr_read_leave(); 1329 1330 return (drop); 1331 } 1332 1333 /* 1334 * Incoming linkage from device drivers, where a data buffer should be 1335 * prepended by an arbitrary header. In this situation we already have a 1336 * way of representing a chain of memory buffers, ie, mbufs, so reuse 1337 * the existing functionality by attaching the buffers to mbufs. 1338 * 1339 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a 1340 * struct m_hdr each for the header and data on the stack. 1341 */ 1342 int 1343 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen, 1344 const void *buf, unsigned int buflen, u_int direction) 1345 { 1346 struct m_hdr mh, md; 1347 struct mbuf *m0 = NULL; 1348 struct mbuf **mp = &m0; 1349 1350 if (hdr != NULL) { 1351 mh.mh_flags = 0; 1352 mh.mh_next = NULL; 1353 mh.mh_len = hdrlen; 1354 mh.mh_data = (void *)hdr; 1355 1356 *mp = (struct mbuf *)&mh; 1357 mp = &mh.mh_next; 1358 } 1359 1360 if (buf != NULL) { 1361 md.mh_flags = 0; 1362 md.mh_next = NULL; 1363 md.mh_len = buflen; 1364 md.mh_data = (void *)buf; 1365 1366 *mp = (struct mbuf *)&md; 1367 } 1368 1369 return bpf_mtap(arg, m0, direction); 1370 } 1371 1372 /* 1373 * Incoming linkage from device drivers, where we have a mbuf chain 1374 * but need to prepend some arbitrary header from a linear buffer. 1375 * 1376 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1377 * struct m_hdr on the stack. This is safe as bpf only reads from the 1378 * fields in this header that we initialize, and will not try to free 1379 * it or keep a pointer to it. 1380 */ 1381 int 1382 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m, 1383 u_int direction) 1384 { 1385 struct m_hdr mh; 1386 const struct mbuf *m0; 1387 1388 if (dlen > 0) { 1389 mh.mh_flags = 0; 1390 mh.mh_next = (struct mbuf *)m; 1391 mh.mh_len = dlen; 1392 mh.mh_data = (void *)data; 1393 m0 = (struct mbuf *)&mh; 1394 } else 1395 m0 = m; 1396 1397 return _bpf_mtap(arg, m, m0, direction); 1398 } 1399 1400 /* 1401 * Incoming linkage from device drivers, where we have a mbuf chain 1402 * but need to prepend the address family. 1403 * 1404 * Con up a minimal dummy header to pacify bpf. We allocate (only) a 1405 * struct m_hdr on the stack. This is safe as bpf only reads from the 1406 * fields in this header that we initialize, and will not try to free 1407 * it or keep a pointer to it. 1408 */ 1409 int 1410 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction) 1411 { 1412 u_int32_t afh; 1413 1414 afh = htonl(af); 1415 1416 return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction); 1417 } 1418 1419 /* 1420 * Incoming linkage from device drivers, where we have a mbuf chain 1421 * but need to prepend a VLAN encapsulation header. 1422 * 1423 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1424 * struct m_hdr on the stack. This is safe as bpf only reads from the 1425 * fields in this header that we initialize, and will not try to free 1426 * it or keep a pointer to it. 1427 */ 1428 int 1429 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction) 1430 { 1431 #if NVLAN > 0 1432 struct ether_vlan_header evh; 1433 struct m_hdr mh, md; 1434 1435 if ((m->m_flags & M_VLANTAG) == 0) 1436 #endif 1437 { 1438 return _bpf_mtap(arg, m, m, direction); 1439 } 1440 1441 #if NVLAN > 0 1442 KASSERT(m->m_len >= ETHER_HDR_LEN); 1443 1444 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN); 1445 evh.evl_proto = evh.evl_encap_proto; 1446 evh.evl_encap_proto = htons(ETHERTYPE_VLAN); 1447 evh.evl_tag = htons(m->m_pkthdr.ether_vtag); 1448 1449 mh.mh_flags = 0; 1450 mh.mh_data = (caddr_t)&evh; 1451 mh.mh_len = sizeof(evh); 1452 mh.mh_next = (struct mbuf *)&md; 1453 1454 md.mh_flags = 0; 1455 md.mh_data = m->m_data + ETHER_HDR_LEN; 1456 md.mh_len = m->m_len - ETHER_HDR_LEN; 1457 md.mh_next = m->m_next; 1458 1459 return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction); 1460 #endif 1461 } 1462 1463 /* 1464 * Move the packet data from interface memory (pkt) into the 1465 * store buffer. Wake up listeners if needed. 1466 * "copy" is the routine called to do the actual data 1467 * transfer. bcopy is passed in to copy contiguous chunks, while 1468 * bpf_mcopy is passed in to copy mbuf chains. In the latter case, 1469 * pkt is really an mbuf. 1470 */ 1471 void 1472 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, 1473 const struct bpf_hdr *tbh) 1474 { 1475 struct bpf_hdr *bh; 1476 int totlen, curlen; 1477 int hdrlen, do_wakeup = 0; 1478 1479 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1480 if (d->bd_bif == NULL) 1481 return; 1482 1483 hdrlen = d->bd_bif->bif_hdrlen; 1484 1485 /* 1486 * Figure out how many bytes to move. If the packet is 1487 * greater or equal to the snapshot length, transfer that 1488 * much. Otherwise, transfer the whole packet (unless 1489 * we hit the buffer size limit). 1490 */ 1491 totlen = hdrlen + min(snaplen, pktlen); 1492 if (totlen > d->bd_bufsize) 1493 totlen = d->bd_bufsize; 1494 1495 /* 1496 * Round up the end of the previous packet to the next longword. 1497 */ 1498 curlen = BPF_WORDALIGN(d->bd_slen); 1499 if (curlen + totlen > d->bd_bufsize) { 1500 /* 1501 * This packet will overflow the storage buffer. 1502 * Rotate the buffers if we can, then wakeup any 1503 * pending reads. 1504 */ 1505 if (d->bd_fbuf == NULL) { 1506 /* 1507 * We haven't completed the previous read yet, 1508 * so drop the packet. 1509 */ 1510 ++d->bd_dcount; 1511 return; 1512 } 1513 ROTATE_BUFFERS(d); 1514 do_wakeup = 1; 1515 curlen = 0; 1516 } 1517 1518 /* 1519 * Append the bpf header. 1520 */ 1521 bh = (struct bpf_hdr *)(d->bd_sbuf + curlen); 1522 *bh = *tbh; 1523 bh->bh_datalen = pktlen; 1524 bh->bh_hdrlen = hdrlen; 1525 bh->bh_caplen = totlen - hdrlen; 1526 1527 /* 1528 * Copy the packet data into the store buffer and update its length. 1529 */ 1530 bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen); 1531 d->bd_slen = curlen + totlen; 1532 1533 if (d->bd_immediate) { 1534 /* 1535 * Immediate mode is set. A packet arrived so any 1536 * reads should be woken up. 1537 */ 1538 do_wakeup = 1; 1539 } 1540 1541 if (do_wakeup) 1542 bpf_wakeup(d); 1543 } 1544 1545 /* 1546 * Initialize all nonzero fields of a descriptor. 1547 */ 1548 int 1549 bpf_allocbufs(struct bpf_d *d) 1550 { 1551 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1552 1553 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1554 if (d->bd_fbuf == NULL) 1555 return (ENOMEM); 1556 1557 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1558 if (d->bd_sbuf == NULL) { 1559 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); 1560 d->bd_fbuf = NULL; 1561 return (ENOMEM); 1562 } 1563 1564 d->bd_slen = 0; 1565 d->bd_hlen = 0; 1566 1567 return (0); 1568 } 1569 1570 void 1571 bpf_prog_smr(void *bps_arg) 1572 { 1573 struct bpf_program_smr *bps = bps_arg; 1574 1575 free(bps->bps_bf.bf_insns, M_DEVBUF, 1576 bps->bps_bf.bf_len * sizeof(struct bpf_insn)); 1577 free(bps, M_DEVBUF, sizeof(struct bpf_program_smr)); 1578 } 1579 1580 void 1581 bpf_d_smr(void *smr) 1582 { 1583 struct bpf_d *bd = smr; 1584 1585 sigio_free(&bd->bd_sigio); 1586 free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize); 1587 free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize); 1588 free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize); 1589 1590 if (bd->bd_rfilter != NULL) 1591 bpf_prog_smr(bd->bd_rfilter); 1592 if (bd->bd_wfilter != NULL) 1593 bpf_prog_smr(bd->bd_wfilter); 1594 1595 klist_free(&bd->bd_klist); 1596 free(bd, M_DEVBUF, sizeof(*bd)); 1597 } 1598 1599 void 1600 bpf_get(struct bpf_d *bd) 1601 { 1602 refcnt_take(&bd->bd_refcnt); 1603 } 1604 1605 /* 1606 * Free buffers currently in use by a descriptor 1607 * when the reference count drops to zero. 1608 */ 1609 void 1610 bpf_put(struct bpf_d *bd) 1611 { 1612 if (refcnt_rele(&bd->bd_refcnt) == 0) 1613 return; 1614 1615 smr_call(&bd->bd_smr, bpf_d_smr, bd); 1616 } 1617 1618 void * 1619 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen) 1620 { 1621 struct bpf_if *bp; 1622 1623 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL) 1624 panic("bpfattach"); 1625 SMR_SLIST_INIT(&bp->bif_dlist); 1626 bp->bif_driverp = (struct bpf_if **)bpfp; 1627 bp->bif_name = name; 1628 bp->bif_ifp = NULL; 1629 bp->bif_dlt = dlt; 1630 1631 bp->bif_next = bpf_iflist; 1632 bpf_iflist = bp; 1633 1634 *bp->bif_driverp = NULL; 1635 1636 /* 1637 * Compute the length of the bpf header. This is not necessarily 1638 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1639 * that the network layer header begins on a longword boundary (for 1640 * performance reasons and to alleviate alignment restrictions). 1641 */ 1642 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1643 1644 return (bp); 1645 } 1646 1647 void 1648 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen) 1649 { 1650 struct bpf_if *bp; 1651 1652 bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen); 1653 bp->bif_ifp = ifp; 1654 } 1655 1656 /* Detach an interface from its attached bpf device. */ 1657 void 1658 bpfdetach(struct ifnet *ifp) 1659 { 1660 struct bpf_if *bp, *nbp; 1661 1662 KERNEL_ASSERT_LOCKED(); 1663 1664 for (bp = bpf_iflist; bp; bp = nbp) { 1665 nbp = bp->bif_next; 1666 if (bp->bif_ifp == ifp) 1667 bpfsdetach(bp); 1668 } 1669 ifp->if_bpf = NULL; 1670 } 1671 1672 void 1673 bpfsdetach(void *p) 1674 { 1675 struct bpf_if *bp = p, *tbp; 1676 struct bpf_d *bd; 1677 int maj; 1678 1679 KERNEL_ASSERT_LOCKED(); 1680 1681 /* Locate the major number. */ 1682 for (maj = 0; maj < nchrdev; maj++) 1683 if (cdevsw[maj].d_open == bpfopen) 1684 break; 1685 1686 while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) { 1687 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR); 1688 klist_invalidate(&bd->bd_klist); 1689 } 1690 1691 for (tbp = bpf_iflist; tbp; tbp = tbp->bif_next) { 1692 if (tbp->bif_next == bp) { 1693 tbp->bif_next = bp->bif_next; 1694 break; 1695 } 1696 } 1697 1698 if (bpf_iflist == bp) 1699 bpf_iflist = bp->bif_next; 1700 1701 free(bp, M_DEVBUF, sizeof(*bp)); 1702 } 1703 1704 int 1705 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp, 1706 void *newp, size_t newlen) 1707 { 1708 switch (name[0]) { 1709 case NET_BPF_BUFSIZE: 1710 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1711 &bpf_bufsize, BPF_MINBUFSIZE, bpf_maxbufsize); 1712 case NET_BPF_MAXBUFSIZE: 1713 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1714 &bpf_maxbufsize, BPF_MINBUFSIZE, INT_MAX); 1715 default: 1716 return (EOPNOTSUPP); 1717 } 1718 } 1719 1720 int 1721 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 1722 size_t newlen) 1723 { 1724 int flags = RW_INTR; 1725 int error; 1726 1727 if (namelen != 1) 1728 return (ENOTDIR); 1729 1730 flags |= (newp == NULL) ? RW_READ : RW_WRITE; 1731 1732 error = rw_enter(&bpf_sysctl_lk, flags); 1733 if (error != 0) 1734 return (error); 1735 1736 error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen); 1737 1738 rw_exit(&bpf_sysctl_lk); 1739 1740 return (error); 1741 } 1742 1743 struct bpf_d * 1744 bpfilter_lookup(int unit) 1745 { 1746 struct bpf_d *bd; 1747 1748 KERNEL_ASSERT_LOCKED(); 1749 1750 LIST_FOREACH(bd, &bpf_d_list, bd_list) 1751 if (bd->bd_unit == unit) 1752 return (bd); 1753 return (NULL); 1754 } 1755 1756 /* 1757 * Get a list of available data link type of the interface. 1758 */ 1759 int 1760 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1761 { 1762 int n, error; 1763 struct bpf_if *bp; 1764 const char *name; 1765 1766 name = d->bd_bif->bif_name; 1767 n = 0; 1768 error = 0; 1769 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1770 if (strcmp(name, bp->bif_name) != 0) 1771 continue; 1772 if (bfl->bfl_list != NULL) { 1773 if (n >= bfl->bfl_len) 1774 return (ENOMEM); 1775 error = copyout(&bp->bif_dlt, 1776 bfl->bfl_list + n, sizeof(u_int)); 1777 if (error) 1778 break; 1779 } 1780 n++; 1781 } 1782 1783 bfl->bfl_len = n; 1784 return (error); 1785 } 1786 1787 /* 1788 * Set the data link type of a BPF instance. 1789 */ 1790 int 1791 bpf_setdlt(struct bpf_d *d, u_int dlt) 1792 { 1793 const char *name; 1794 struct bpf_if *bp; 1795 1796 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1797 if (d->bd_bif->bif_dlt == dlt) 1798 return (0); 1799 name = d->bd_bif->bif_name; 1800 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1801 if (strcmp(name, bp->bif_name) != 0) 1802 continue; 1803 if (bp->bif_dlt == dlt) 1804 break; 1805 } 1806 if (bp == NULL) 1807 return (EINVAL); 1808 bpf_detachd(d); 1809 bpf_attachd(d, bp); 1810 bpf_resetd(d); 1811 return (0); 1812 } 1813 1814 u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *); 1815 u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *); 1816 u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *); 1817 1818 int bpf_mbuf_copy(const struct mbuf *, u_int32_t, 1819 void *, u_int32_t); 1820 1821 const struct bpf_ops bpf_mbuf_ops = { 1822 bpf_mbuf_ldw, 1823 bpf_mbuf_ldh, 1824 bpf_mbuf_ldb, 1825 }; 1826 1827 int 1828 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len) 1829 { 1830 u_int8_t *cp = buf; 1831 u_int32_t count; 1832 1833 while (off >= m->m_len) { 1834 off -= m->m_len; 1835 1836 m = m->m_next; 1837 if (m == NULL) 1838 return (-1); 1839 } 1840 1841 for (;;) { 1842 count = min(m->m_len - off, len); 1843 1844 memcpy(cp, m->m_data + off, count); 1845 len -= count; 1846 1847 if (len == 0) 1848 return (0); 1849 1850 m = m->m_next; 1851 if (m == NULL) 1852 break; 1853 1854 cp += count; 1855 off = 0; 1856 } 1857 1858 return (-1); 1859 } 1860 1861 u_int32_t 1862 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err) 1863 { 1864 u_int32_t v; 1865 1866 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1867 *err = 1; 1868 return (0); 1869 } 1870 1871 *err = 0; 1872 return ntohl(v); 1873 } 1874 1875 u_int32_t 1876 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err) 1877 { 1878 u_int16_t v; 1879 1880 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1881 *err = 1; 1882 return (0); 1883 } 1884 1885 *err = 0; 1886 return ntohs(v); 1887 } 1888 1889 u_int32_t 1890 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err) 1891 { 1892 const struct mbuf *m = m0; 1893 u_int8_t v; 1894 1895 while (k >= m->m_len) { 1896 k -= m->m_len; 1897 1898 m = m->m_next; 1899 if (m == NULL) { 1900 *err = 1; 1901 return (0); 1902 } 1903 } 1904 v = m->m_data[k]; 1905 1906 *err = 0; 1907 return v; 1908 } 1909 1910 u_int 1911 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen) 1912 { 1913 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen); 1914 } 1915