1 /* $OpenBSD: bpf.c,v 1.201 2021/01/02 07:25:42 dlg Exp $ */ 2 /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org> 8 * 9 * This code is derived from the Stanford/CMU enet packet filter, 10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 12 * Berkeley Laboratory. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94 39 */ 40 41 #include "bpfilter.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/ioctl.h> 49 #include <sys/conf.h> 50 #include <sys/vnode.h> 51 #include <sys/fcntl.h> 52 #include <sys/socket.h> 53 #include <sys/poll.h> 54 #include <sys/kernel.h> 55 #include <sys/sysctl.h> 56 #include <sys/rwlock.h> 57 #include <sys/atomic.h> 58 #include <sys/smr.h> 59 #include <sys/specdev.h> 60 #include <sys/selinfo.h> 61 #include <sys/sigio.h> 62 #include <sys/task.h> 63 #include <sys/time.h> 64 65 #include <net/if.h> 66 #include <net/bpf.h> 67 #include <net/bpfdesc.h> 68 69 #include <netinet/in.h> 70 #include <netinet/if_ether.h> 71 72 #include "vlan.h" 73 #if NVLAN > 0 74 #include <net/if_vlan_var.h> 75 #endif 76 77 #define BPF_BUFSIZE 32768 78 79 #define PRINET 26 /* interruptible */ 80 81 /* 82 * The default read buffer size is patchable. 83 */ 84 int bpf_bufsize = BPF_BUFSIZE; 85 int bpf_maxbufsize = BPF_MAXBUFSIZE; 86 87 /* 88 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 89 * bpf_d_list is the list of descriptors 90 */ 91 struct bpf_if *bpf_iflist; 92 LIST_HEAD(, bpf_d) bpf_d_list; 93 94 int bpf_allocbufs(struct bpf_d *); 95 void bpf_ifname(struct bpf_if*, struct ifreq *); 96 void bpf_mcopy(const void *, void *, size_t); 97 int bpf_movein(struct uio *, struct bpf_d *, struct mbuf **, 98 struct sockaddr *); 99 int bpf_setif(struct bpf_d *, struct ifreq *); 100 int bpfpoll(dev_t, int, struct proc *); 101 int bpfkqfilter(dev_t, struct knote *); 102 void bpf_wakeup(struct bpf_d *); 103 void bpf_wakeup_cb(void *); 104 int _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int); 105 void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, 106 const struct bpf_hdr *); 107 int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 108 int bpf_setdlt(struct bpf_d *, u_int); 109 110 void filt_bpfrdetach(struct knote *); 111 int filt_bpfread(struct knote *, long); 112 113 int bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t); 114 115 struct bpf_d *bpfilter_lookup(int); 116 117 /* 118 * Called holding ``bd_mtx''. 119 */ 120 void bpf_attachd(struct bpf_d *, struct bpf_if *); 121 void bpf_detachd(struct bpf_d *); 122 void bpf_resetd(struct bpf_d *); 123 124 void bpf_prog_smr(void *); 125 void bpf_d_smr(void *); 126 127 /* 128 * Reference count access to descriptor buffers 129 */ 130 void bpf_get(struct bpf_d *); 131 void bpf_put(struct bpf_d *); 132 133 134 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz"); 135 136 int 137 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp, 138 struct sockaddr *sockp) 139 { 140 struct bpf_program_smr *bps; 141 struct bpf_insn *fcode = NULL; 142 struct mbuf *m; 143 struct m_tag *mtag; 144 int error; 145 u_int hlen; 146 u_int len; 147 u_int linktype; 148 u_int slen; 149 150 /* 151 * Build a sockaddr based on the data link layer type. 152 * We do this at this level because the ethernet header 153 * is copied directly into the data field of the sockaddr. 154 * In the case of SLIP, there is no header and the packet 155 * is forwarded as is. 156 * Also, we are careful to leave room at the front of the mbuf 157 * for the link level header. 158 */ 159 linktype = d->bd_bif->bif_dlt; 160 switch (linktype) { 161 162 case DLT_SLIP: 163 sockp->sa_family = AF_INET; 164 hlen = 0; 165 break; 166 167 case DLT_PPP: 168 sockp->sa_family = AF_UNSPEC; 169 hlen = 0; 170 break; 171 172 case DLT_EN10MB: 173 sockp->sa_family = AF_UNSPEC; 174 /* XXX Would MAXLINKHDR be better? */ 175 hlen = ETHER_HDR_LEN; 176 break; 177 178 case DLT_IEEE802_11: 179 case DLT_IEEE802_11_RADIO: 180 sockp->sa_family = AF_UNSPEC; 181 hlen = 0; 182 break; 183 184 case DLT_RAW: 185 case DLT_NULL: 186 sockp->sa_family = AF_UNSPEC; 187 hlen = 0; 188 break; 189 190 case DLT_LOOP: 191 sockp->sa_family = AF_UNSPEC; 192 hlen = sizeof(u_int32_t); 193 break; 194 195 default: 196 return (EIO); 197 } 198 199 if (uio->uio_resid > MAXMCLBYTES) 200 return (EIO); 201 len = uio->uio_resid; 202 203 MGETHDR(m, M_WAIT, MT_DATA); 204 m->m_pkthdr.ph_ifidx = 0; 205 m->m_pkthdr.len = len - hlen; 206 207 if (len > MHLEN) { 208 MCLGETL(m, M_WAIT, len); 209 if ((m->m_flags & M_EXT) == 0) { 210 error = ENOBUFS; 211 goto bad; 212 } 213 } 214 m->m_len = len; 215 *mp = m; 216 217 error = uiomove(mtod(m, caddr_t), len, uio); 218 if (error) 219 goto bad; 220 221 smr_read_enter(); 222 bps = SMR_PTR_GET(&d->bd_wfilter); 223 if (bps != NULL) 224 fcode = bps->bps_bf.bf_insns; 225 slen = bpf_filter(fcode, mtod(m, u_char *), len, len); 226 smr_read_leave(); 227 228 if (slen < len) { 229 error = EPERM; 230 goto bad; 231 } 232 233 if (m->m_len < hlen) { 234 error = EPERM; 235 goto bad; 236 } 237 /* 238 * Make room for link header, and copy it to sockaddr 239 */ 240 if (hlen != 0) { 241 if (linktype == DLT_LOOP) { 242 u_int32_t af; 243 244 /* the link header indicates the address family */ 245 KASSERT(hlen == sizeof(u_int32_t)); 246 memcpy(&af, m->m_data, hlen); 247 sockp->sa_family = ntohl(af); 248 } else 249 memcpy(sockp->sa_data, m->m_data, hlen); 250 m->m_len -= hlen; 251 m->m_data += hlen; /* XXX */ 252 } 253 254 /* 255 * Prepend the data link type as a mbuf tag 256 */ 257 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT); 258 *(u_int *)(mtag + 1) = linktype; 259 m_tag_prepend(m, mtag); 260 261 return (0); 262 bad: 263 m_freem(m); 264 return (error); 265 } 266 267 /* 268 * Attach file to the bpf interface, i.e. make d listen on bp. 269 */ 270 void 271 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 272 { 273 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 274 275 /* 276 * Point d at bp, and add d to the interface's list of listeners. 277 * Finally, point the driver's bpf cookie at the interface so 278 * it will divert packets to bpf. 279 */ 280 281 d->bd_bif = bp; 282 283 KERNEL_ASSERT_LOCKED(); 284 SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next); 285 286 *bp->bif_driverp = bp; 287 } 288 289 /* 290 * Detach a file from its interface. 291 */ 292 void 293 bpf_detachd(struct bpf_d *d) 294 { 295 struct bpf_if *bp; 296 297 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 298 299 bp = d->bd_bif; 300 /* Not attached. */ 301 if (bp == NULL) 302 return; 303 304 /* Remove ``d'' from the interface's descriptor list. */ 305 KERNEL_ASSERT_LOCKED(); 306 SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next); 307 308 if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) { 309 /* 310 * Let the driver know that there are no more listeners. 311 */ 312 *bp->bif_driverp = NULL; 313 } 314 315 d->bd_bif = NULL; 316 317 /* 318 * Check if this descriptor had requested promiscuous mode. 319 * If so, turn it off. 320 */ 321 if (d->bd_promisc) { 322 int error; 323 324 KASSERT(bp->bif_ifp != NULL); 325 326 d->bd_promisc = 0; 327 328 bpf_get(d); 329 mtx_leave(&d->bd_mtx); 330 NET_LOCK(); 331 error = ifpromisc(bp->bif_ifp, 0); 332 NET_UNLOCK(); 333 mtx_enter(&d->bd_mtx); 334 bpf_put(d); 335 336 if (error && !(error == EINVAL || error == ENODEV || 337 error == ENXIO)) 338 /* 339 * Something is really wrong if we were able to put 340 * the driver into promiscuous mode, but can't 341 * take it out. 342 */ 343 panic("bpf: ifpromisc failed"); 344 } 345 } 346 347 void 348 bpfilterattach(int n) 349 { 350 LIST_INIT(&bpf_d_list); 351 } 352 353 /* 354 * Open ethernet device. Returns ENXIO for illegal minor device number, 355 * EBUSY if file is open by another process. 356 */ 357 int 358 bpfopen(dev_t dev, int flag, int mode, struct proc *p) 359 { 360 struct bpf_d *bd; 361 int unit = minor(dev); 362 363 if (unit & ((1 << CLONE_SHIFT) - 1)) 364 return (ENXIO); 365 366 KASSERT(bpfilter_lookup(unit) == NULL); 367 368 /* create on demand */ 369 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 370 return (EBUSY); 371 372 /* Mark "free" and do most initialization. */ 373 bd->bd_unit = unit; 374 bd->bd_bufsize = bpf_bufsize; 375 bd->bd_sig = SIGIO; 376 mtx_init(&bd->bd_mtx, IPL_NET); 377 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd); 378 smr_init(&bd->bd_smr); 379 sigio_init(&bd->bd_sigio); 380 381 bd->bd_rtout = 0; /* no timeout by default */ 382 bd->bd_rnonblock = ISSET(flag, FNONBLOCK); 383 384 bpf_get(bd); 385 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list); 386 387 return (0); 388 } 389 390 /* 391 * Close the descriptor by detaching it from its interface, 392 * deallocating its buffers, and marking it free. 393 */ 394 int 395 bpfclose(dev_t dev, int flag, int mode, struct proc *p) 396 { 397 struct bpf_d *d; 398 399 d = bpfilter_lookup(minor(dev)); 400 mtx_enter(&d->bd_mtx); 401 bpf_detachd(d); 402 bpf_wakeup(d); 403 LIST_REMOVE(d, bd_list); 404 mtx_leave(&d->bd_mtx); 405 bpf_put(d); 406 407 return (0); 408 } 409 410 /* 411 * Rotate the packet buffers in descriptor d. Move the store buffer 412 * into the hold slot, and the free buffer into the store slot. 413 * Zero the length of the new store buffer. 414 */ 415 #define ROTATE_BUFFERS(d) \ 416 KASSERT(d->bd_in_uiomove == 0); \ 417 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \ 418 (d)->bd_hbuf = (d)->bd_sbuf; \ 419 (d)->bd_hlen = (d)->bd_slen; \ 420 (d)->bd_sbuf = (d)->bd_fbuf; \ 421 (d)->bd_slen = 0; \ 422 (d)->bd_fbuf = NULL; 423 424 /* 425 * TODO Move nsecuptime() into kern_tc.c and document it when we have 426 * more users elsewhere in the kernel. 427 */ 428 static uint64_t 429 nsecuptime(void) 430 { 431 struct timespec now; 432 433 nanouptime(&now); 434 return TIMESPEC_TO_NSEC(&now); 435 } 436 437 /* 438 * bpfread - read next chunk of packets from buffers 439 */ 440 int 441 bpfread(dev_t dev, struct uio *uio, int ioflag) 442 { 443 uint64_t end, now; 444 struct bpf_d *d; 445 caddr_t hbuf; 446 int error, hlen; 447 448 KERNEL_ASSERT_LOCKED(); 449 450 d = bpfilter_lookup(minor(dev)); 451 if (d->bd_bif == NULL) 452 return (ENXIO); 453 454 bpf_get(d); 455 mtx_enter(&d->bd_mtx); 456 457 /* 458 * Restrict application to use a buffer the same size as 459 * as kernel buffers. 460 */ 461 if (uio->uio_resid != d->bd_bufsize) { 462 error = EINVAL; 463 goto out; 464 } 465 466 /* 467 * If there's a timeout, mark when the read should end. 468 */ 469 if (d->bd_rtout != 0) { 470 now = nsecuptime(); 471 end = now + d->bd_rtout; 472 if (end < now) 473 end = UINT64_MAX; 474 } 475 476 /* 477 * If the hold buffer is empty, then do a timed sleep, which 478 * ends when the timeout expires or when enough packets 479 * have arrived to fill the store buffer. 480 */ 481 while (d->bd_hbuf == NULL) { 482 if (d->bd_bif == NULL) { 483 /* interface is gone */ 484 if (d->bd_slen == 0) { 485 error = EIO; 486 goto out; 487 } 488 ROTATE_BUFFERS(d); 489 break; 490 } 491 if (d->bd_immediate && d->bd_slen != 0) { 492 /* 493 * A packet(s) either arrived since the previous 494 * read or arrived while we were asleep. 495 * Rotate the buffers and return what's here. 496 */ 497 ROTATE_BUFFERS(d); 498 break; 499 } 500 if (d->bd_rnonblock) { 501 /* User requested non-blocking I/O */ 502 error = EWOULDBLOCK; 503 } else if (d->bd_rtout == 0) { 504 /* No read timeout set. */ 505 d->bd_nreaders++; 506 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 507 "bpf", INFSLP); 508 d->bd_nreaders--; 509 } else if ((now = nsecuptime()) < end) { 510 /* Read timeout has not expired yet. */ 511 d->bd_nreaders++; 512 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 513 "bpf", end - now); 514 d->bd_nreaders--; 515 } else { 516 /* Read timeout has expired. */ 517 error = EWOULDBLOCK; 518 } 519 if (error == EINTR || error == ERESTART) 520 goto out; 521 if (error == EWOULDBLOCK) { 522 /* 523 * On a timeout, return what's in the buffer, 524 * which may be nothing. If there is something 525 * in the store buffer, we can rotate the buffers. 526 */ 527 if (d->bd_hbuf != NULL) 528 /* 529 * We filled up the buffer in between 530 * getting the timeout and arriving 531 * here, so we don't need to rotate. 532 */ 533 break; 534 535 if (d->bd_slen == 0) { 536 error = 0; 537 goto out; 538 } 539 ROTATE_BUFFERS(d); 540 break; 541 } 542 } 543 /* 544 * At this point, we know we have something in the hold slot. 545 */ 546 hbuf = d->bd_hbuf; 547 hlen = d->bd_hlen; 548 d->bd_hbuf = NULL; 549 d->bd_hlen = 0; 550 d->bd_fbuf = NULL; 551 d->bd_in_uiomove = 1; 552 553 /* 554 * Move data from hold buffer into user space. 555 * We know the entire buffer is transferred since 556 * we checked above that the read buffer is bpf_bufsize bytes. 557 */ 558 mtx_leave(&d->bd_mtx); 559 error = uiomove(hbuf, hlen, uio); 560 mtx_enter(&d->bd_mtx); 561 562 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */ 563 KASSERT(d->bd_fbuf == NULL); 564 KASSERT(d->bd_hbuf == NULL); 565 d->bd_fbuf = hbuf; 566 d->bd_in_uiomove = 0; 567 out: 568 mtx_leave(&d->bd_mtx); 569 bpf_put(d); 570 571 return (error); 572 } 573 574 /* 575 * If there are processes sleeping on this descriptor, wake them up. 576 */ 577 void 578 bpf_wakeup(struct bpf_d *d) 579 { 580 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 581 582 if (d->bd_nreaders) 583 wakeup(d); 584 585 /* 586 * As long as pgsigio() and selwakeup() need to be protected 587 * by the KERNEL_LOCK() we have to delay the wakeup to 588 * another context to keep the hot path KERNEL_LOCK()-free. 589 */ 590 if ((d->bd_async && d->bd_sig) || 591 (!klist_empty(&d->bd_sel.si_note) || d->bd_sel.si_seltid != 0)) { 592 bpf_get(d); 593 if (!task_add(systq, &d->bd_wake_task)) 594 bpf_put(d); 595 } 596 } 597 598 void 599 bpf_wakeup_cb(void *xd) 600 { 601 struct bpf_d *d = xd; 602 603 if (d->bd_async && d->bd_sig) 604 pgsigio(&d->bd_sigio, d->bd_sig, 0); 605 606 selwakeup(&d->bd_sel); 607 bpf_put(d); 608 } 609 610 int 611 bpfwrite(dev_t dev, struct uio *uio, int ioflag) 612 { 613 struct bpf_d *d; 614 struct ifnet *ifp; 615 struct mbuf *m; 616 int error; 617 struct sockaddr_storage dst; 618 619 KERNEL_ASSERT_LOCKED(); 620 621 d = bpfilter_lookup(minor(dev)); 622 if (d->bd_bif == NULL) 623 return (ENXIO); 624 625 bpf_get(d); 626 ifp = d->bd_bif->bif_ifp; 627 628 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) { 629 error = ENETDOWN; 630 goto out; 631 } 632 633 if (uio->uio_resid == 0) { 634 error = 0; 635 goto out; 636 } 637 638 error = bpf_movein(uio, d, &m, sstosa(&dst)); 639 if (error) 640 goto out; 641 642 if (m->m_pkthdr.len > ifp->if_mtu) { 643 m_freem(m); 644 error = EMSGSIZE; 645 goto out; 646 } 647 648 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 649 m->m_pkthdr.pf.prio = ifp->if_llprio; 650 651 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC) 652 dst.ss_family = pseudo_AF_HDRCMPLT; 653 654 NET_LOCK(); 655 error = ifp->if_output(ifp, m, sstosa(&dst), NULL); 656 NET_UNLOCK(); 657 658 out: 659 bpf_put(d); 660 return (error); 661 } 662 663 /* 664 * Reset a descriptor by flushing its packet buffer and clearing the 665 * receive and drop counts. 666 */ 667 void 668 bpf_resetd(struct bpf_d *d) 669 { 670 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 671 KASSERT(d->bd_in_uiomove == 0); 672 673 if (d->bd_hbuf != NULL) { 674 /* Free the hold buffer. */ 675 d->bd_fbuf = d->bd_hbuf; 676 d->bd_hbuf = NULL; 677 } 678 d->bd_slen = 0; 679 d->bd_hlen = 0; 680 d->bd_rcount = 0; 681 d->bd_dcount = 0; 682 } 683 684 /* 685 * FIONREAD Check for read packet available. 686 * BIOCGBLEN Get buffer len [for read()]. 687 * BIOCSETF Set ethernet read filter. 688 * BIOCFLUSH Flush read packet buffer. 689 * BIOCPROMISC Put interface into promiscuous mode. 690 * BIOCGDLTLIST Get supported link layer types. 691 * BIOCGDLT Get link layer type. 692 * BIOCSDLT Set link layer type. 693 * BIOCGETIF Get interface name. 694 * BIOCSETIF Set interface. 695 * BIOCSRTIMEOUT Set read timeout. 696 * BIOCGRTIMEOUT Get read timeout. 697 * BIOCGSTATS Get packet stats. 698 * BIOCIMMEDIATE Set immediate mode. 699 * BIOCVERSION Get filter language version. 700 * BIOCGHDRCMPLT Get "header already complete" flag 701 * BIOCSHDRCMPLT Set "header already complete" flag 702 */ 703 int 704 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 705 { 706 struct bpf_d *d; 707 int error = 0; 708 709 d = bpfilter_lookup(minor(dev)); 710 if (d->bd_locked && suser(p) != 0) { 711 /* list of allowed ioctls when locked and not root */ 712 switch (cmd) { 713 case BIOCGBLEN: 714 case BIOCFLUSH: 715 case BIOCGDLT: 716 case BIOCGDLTLIST: 717 case BIOCGETIF: 718 case BIOCGRTIMEOUT: 719 case BIOCGSTATS: 720 case BIOCVERSION: 721 case BIOCGRSIG: 722 case BIOCGHDRCMPLT: 723 case FIONREAD: 724 case BIOCLOCK: 725 case BIOCSRTIMEOUT: 726 case BIOCIMMEDIATE: 727 case TIOCGPGRP: 728 case BIOCGDIRFILT: 729 break; 730 default: 731 return (EPERM); 732 } 733 } 734 735 bpf_get(d); 736 737 switch (cmd) { 738 default: 739 error = EINVAL; 740 break; 741 742 /* 743 * Check for read packet available. 744 */ 745 case FIONREAD: 746 { 747 int n; 748 749 mtx_enter(&d->bd_mtx); 750 n = d->bd_slen; 751 if (d->bd_hbuf != NULL) 752 n += d->bd_hlen; 753 mtx_leave(&d->bd_mtx); 754 755 *(int *)addr = n; 756 break; 757 } 758 759 /* 760 * Get buffer len [for read()]. 761 */ 762 case BIOCGBLEN: 763 *(u_int *)addr = d->bd_bufsize; 764 break; 765 766 /* 767 * Set buffer length. 768 */ 769 case BIOCSBLEN: 770 if (d->bd_bif != NULL) 771 error = EINVAL; 772 else { 773 u_int size = *(u_int *)addr; 774 775 if (size > bpf_maxbufsize) 776 *(u_int *)addr = size = bpf_maxbufsize; 777 else if (size < BPF_MINBUFSIZE) 778 *(u_int *)addr = size = BPF_MINBUFSIZE; 779 mtx_enter(&d->bd_mtx); 780 d->bd_bufsize = size; 781 mtx_leave(&d->bd_mtx); 782 } 783 break; 784 785 /* 786 * Set link layer read filter. 787 */ 788 case BIOCSETF: 789 error = bpf_setf(d, (struct bpf_program *)addr, 0); 790 break; 791 792 /* 793 * Set link layer write filter. 794 */ 795 case BIOCSETWF: 796 error = bpf_setf(d, (struct bpf_program *)addr, 1); 797 break; 798 799 /* 800 * Flush read packet buffer. 801 */ 802 case BIOCFLUSH: 803 mtx_enter(&d->bd_mtx); 804 bpf_resetd(d); 805 mtx_leave(&d->bd_mtx); 806 break; 807 808 /* 809 * Put interface into promiscuous mode. 810 */ 811 case BIOCPROMISC: 812 if (d->bd_bif == NULL) { 813 /* 814 * No interface attached yet. 815 */ 816 error = EINVAL; 817 } else if (d->bd_bif->bif_ifp != NULL) { 818 if (d->bd_promisc == 0) { 819 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx); 820 NET_LOCK(); 821 error = ifpromisc(d->bd_bif->bif_ifp, 1); 822 NET_UNLOCK(); 823 if (error == 0) 824 d->bd_promisc = 1; 825 } 826 } 827 break; 828 829 /* 830 * Get a list of supported device parameters. 831 */ 832 case BIOCGDLTLIST: 833 if (d->bd_bif == NULL) 834 error = EINVAL; 835 else 836 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 837 break; 838 839 /* 840 * Get device parameters. 841 */ 842 case BIOCGDLT: 843 if (d->bd_bif == NULL) 844 error = EINVAL; 845 else 846 *(u_int *)addr = d->bd_bif->bif_dlt; 847 break; 848 849 /* 850 * Set device parameters. 851 */ 852 case BIOCSDLT: 853 if (d->bd_bif == NULL) 854 error = EINVAL; 855 else { 856 mtx_enter(&d->bd_mtx); 857 error = bpf_setdlt(d, *(u_int *)addr); 858 mtx_leave(&d->bd_mtx); 859 } 860 break; 861 862 /* 863 * Set interface name. 864 */ 865 case BIOCGETIF: 866 if (d->bd_bif == NULL) 867 error = EINVAL; 868 else 869 bpf_ifname(d->bd_bif, (struct ifreq *)addr); 870 break; 871 872 /* 873 * Set interface. 874 */ 875 case BIOCSETIF: 876 error = bpf_setif(d, (struct ifreq *)addr); 877 break; 878 879 /* 880 * Set read timeout. 881 */ 882 case BIOCSRTIMEOUT: 883 { 884 struct timeval *tv = (struct timeval *)addr; 885 uint64_t rtout; 886 887 if (tv->tv_sec < 0 || !timerisvalid(tv)) { 888 error = EINVAL; 889 break; 890 } 891 rtout = TIMEVAL_TO_NSEC(tv); 892 if (rtout > MAXTSLP) { 893 error = EOVERFLOW; 894 break; 895 } 896 mtx_enter(&d->bd_mtx); 897 d->bd_rtout = rtout; 898 mtx_leave(&d->bd_mtx); 899 break; 900 } 901 902 /* 903 * Get read timeout. 904 */ 905 case BIOCGRTIMEOUT: 906 { 907 struct timeval *tv = (struct timeval *)addr; 908 909 memset(tv, 0, sizeof(*tv)); 910 mtx_enter(&d->bd_mtx); 911 NSEC_TO_TIMEVAL(d->bd_rtout, tv); 912 mtx_leave(&d->bd_mtx); 913 break; 914 } 915 916 /* 917 * Get packet stats. 918 */ 919 case BIOCGSTATS: 920 { 921 struct bpf_stat *bs = (struct bpf_stat *)addr; 922 923 bs->bs_recv = d->bd_rcount; 924 bs->bs_drop = d->bd_dcount; 925 break; 926 } 927 928 /* 929 * Set immediate mode. 930 */ 931 case BIOCIMMEDIATE: 932 d->bd_immediate = *(u_int *)addr; 933 break; 934 935 case BIOCVERSION: 936 { 937 struct bpf_version *bv = (struct bpf_version *)addr; 938 939 bv->bv_major = BPF_MAJOR_VERSION; 940 bv->bv_minor = BPF_MINOR_VERSION; 941 break; 942 } 943 944 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 945 *(u_int *)addr = d->bd_hdrcmplt; 946 break; 947 948 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 949 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 950 break; 951 952 case BIOCLOCK: /* set "locked" flag (no reset) */ 953 d->bd_locked = 1; 954 break; 955 956 case BIOCGFILDROP: /* get "filter-drop" flag */ 957 *(u_int *)addr = d->bd_fildrop; 958 break; 959 960 case BIOCSFILDROP: { /* set "filter-drop" flag */ 961 unsigned int fildrop = *(u_int *)addr; 962 switch (fildrop) { 963 case BPF_FILDROP_PASS: 964 case BPF_FILDROP_CAPTURE: 965 case BPF_FILDROP_DROP: 966 d->bd_fildrop = fildrop; 967 break; 968 default: 969 error = EINVAL; 970 break; 971 } 972 break; 973 } 974 975 case BIOCGDIRFILT: /* get direction filter */ 976 *(u_int *)addr = d->bd_dirfilt; 977 break; 978 979 case BIOCSDIRFILT: /* set direction filter */ 980 d->bd_dirfilt = (*(u_int *)addr) & 981 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT); 982 break; 983 984 case FIONBIO: /* Non-blocking I/O */ 985 if (*(int *)addr) 986 d->bd_rnonblock = 1; 987 else 988 d->bd_rnonblock = 0; 989 break; 990 991 case FIOASYNC: /* Send signal on receive packets */ 992 d->bd_async = *(int *)addr; 993 break; 994 995 case FIOSETOWN: /* Process or group to send signals to */ 996 case TIOCSPGRP: 997 error = sigio_setown(&d->bd_sigio, cmd, addr); 998 break; 999 1000 case FIOGETOWN: 1001 case TIOCGPGRP: 1002 sigio_getown(&d->bd_sigio, cmd, addr); 1003 break; 1004 1005 case BIOCSRSIG: /* Set receive signal */ 1006 { 1007 u_int sig; 1008 1009 sig = *(u_int *)addr; 1010 1011 if (sig >= NSIG) 1012 error = EINVAL; 1013 else 1014 d->bd_sig = sig; 1015 break; 1016 } 1017 case BIOCGRSIG: 1018 *(u_int *)addr = d->bd_sig; 1019 break; 1020 } 1021 1022 bpf_put(d); 1023 return (error); 1024 } 1025 1026 /* 1027 * Set d's packet filter program to fp. If this file already has a filter, 1028 * free it and replace it. Returns EINVAL for bogus requests. 1029 */ 1030 int 1031 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf) 1032 { 1033 struct bpf_program_smr *bps, *old_bps; 1034 struct bpf_insn *fcode; 1035 u_int flen, size; 1036 1037 KERNEL_ASSERT_LOCKED(); 1038 1039 if (fp->bf_insns == 0) { 1040 if (fp->bf_len != 0) 1041 return (EINVAL); 1042 bps = NULL; 1043 } else { 1044 flen = fp->bf_len; 1045 if (flen > BPF_MAXINSNS) 1046 return (EINVAL); 1047 1048 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF, 1049 M_WAITOK | M_CANFAIL); 1050 if (fcode == NULL) 1051 return (ENOMEM); 1052 1053 size = flen * sizeof(*fp->bf_insns); 1054 if (copyin(fp->bf_insns, fcode, size) != 0 || 1055 bpf_validate(fcode, (int)flen) == 0) { 1056 free(fcode, M_DEVBUF, size); 1057 return (EINVAL); 1058 } 1059 1060 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK); 1061 smr_init(&bps->bps_smr); 1062 bps->bps_bf.bf_len = flen; 1063 bps->bps_bf.bf_insns = fcode; 1064 } 1065 1066 if (wf == 0) { 1067 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter); 1068 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps); 1069 } else { 1070 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter); 1071 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps); 1072 } 1073 1074 mtx_enter(&d->bd_mtx); 1075 bpf_resetd(d); 1076 mtx_leave(&d->bd_mtx); 1077 if (old_bps != NULL) 1078 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps); 1079 1080 return (0); 1081 } 1082 1083 /* 1084 * Detach a file from its current interface (if attached at all) and attach 1085 * to the interface indicated by the name stored in ifr. 1086 * Return an errno or 0. 1087 */ 1088 int 1089 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1090 { 1091 struct bpf_if *bp, *candidate = NULL; 1092 int error = 0; 1093 1094 /* 1095 * Look through attached interfaces for the named one. 1096 */ 1097 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1098 if (strcmp(bp->bif_name, ifr->ifr_name) != 0) 1099 continue; 1100 1101 if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt) 1102 candidate = bp; 1103 } 1104 1105 /* Not found. */ 1106 if (candidate == NULL) 1107 return (ENXIO); 1108 1109 /* 1110 * Allocate the packet buffers if we need to. 1111 * If we're already attached to requested interface, 1112 * just flush the buffer. 1113 */ 1114 mtx_enter(&d->bd_mtx); 1115 if (d->bd_sbuf == NULL) { 1116 if ((error = bpf_allocbufs(d))) 1117 goto out; 1118 } 1119 if (candidate != d->bd_bif) { 1120 /* 1121 * Detach if attached to something else. 1122 */ 1123 bpf_detachd(d); 1124 bpf_attachd(d, candidate); 1125 } 1126 bpf_resetd(d); 1127 out: 1128 mtx_leave(&d->bd_mtx); 1129 return (error); 1130 } 1131 1132 /* 1133 * Copy the interface name to the ifreq. 1134 */ 1135 void 1136 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr) 1137 { 1138 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name)); 1139 } 1140 1141 /* 1142 * Support for poll() system call 1143 */ 1144 int 1145 bpfpoll(dev_t dev, int events, struct proc *p) 1146 { 1147 struct bpf_d *d; 1148 int revents; 1149 1150 KERNEL_ASSERT_LOCKED(); 1151 1152 /* 1153 * An imitation of the FIONREAD ioctl code. 1154 */ 1155 d = bpfilter_lookup(minor(dev)); 1156 1157 /* 1158 * XXX The USB stack manages it to trigger some race condition 1159 * which causes bpfilter_lookup to return NULL when a USB device 1160 * gets detached while it is up and has an open bpf handler (e.g. 1161 * dhclient). We still should recheck if we can fix the root 1162 * cause of this issue. 1163 */ 1164 if (d == NULL) 1165 return (POLLERR); 1166 1167 /* Always ready to write data */ 1168 revents = events & (POLLOUT | POLLWRNORM); 1169 1170 if (events & (POLLIN | POLLRDNORM)) { 1171 mtx_enter(&d->bd_mtx); 1172 if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0)) 1173 revents |= events & (POLLIN | POLLRDNORM); 1174 else 1175 selrecord(p, &d->bd_sel); 1176 mtx_leave(&d->bd_mtx); 1177 } 1178 return (revents); 1179 } 1180 1181 const struct filterops bpfread_filtops = { 1182 .f_flags = FILTEROP_ISFD, 1183 .f_attach = NULL, 1184 .f_detach = filt_bpfrdetach, 1185 .f_event = filt_bpfread, 1186 }; 1187 1188 int 1189 bpfkqfilter(dev_t dev, struct knote *kn) 1190 { 1191 struct bpf_d *d; 1192 struct klist *klist; 1193 1194 KERNEL_ASSERT_LOCKED(); 1195 1196 d = bpfilter_lookup(minor(dev)); 1197 1198 switch (kn->kn_filter) { 1199 case EVFILT_READ: 1200 klist = &d->bd_sel.si_note; 1201 kn->kn_fop = &bpfread_filtops; 1202 break; 1203 default: 1204 return (EINVAL); 1205 } 1206 1207 bpf_get(d); 1208 kn->kn_hook = d; 1209 klist_insert_locked(klist, kn); 1210 1211 return (0); 1212 } 1213 1214 void 1215 filt_bpfrdetach(struct knote *kn) 1216 { 1217 struct bpf_d *d = kn->kn_hook; 1218 1219 KERNEL_ASSERT_LOCKED(); 1220 1221 klist_remove_locked(&d->bd_sel.si_note, kn); 1222 bpf_put(d); 1223 } 1224 1225 int 1226 filt_bpfread(struct knote *kn, long hint) 1227 { 1228 struct bpf_d *d = kn->kn_hook; 1229 1230 KERNEL_ASSERT_LOCKED(); 1231 1232 mtx_enter(&d->bd_mtx); 1233 kn->kn_data = d->bd_hlen; 1234 if (d->bd_immediate) 1235 kn->kn_data += d->bd_slen; 1236 mtx_leave(&d->bd_mtx); 1237 1238 return (kn->kn_data > 0); 1239 } 1240 1241 /* 1242 * Copy data from an mbuf chain into a buffer. This code is derived 1243 * from m_copydata in sys/uipc_mbuf.c. 1244 */ 1245 void 1246 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) 1247 { 1248 const struct mbuf *m; 1249 u_int count; 1250 u_char *dst; 1251 1252 m = src_arg; 1253 dst = dst_arg; 1254 while (len > 0) { 1255 if (m == NULL) 1256 panic("bpf_mcopy"); 1257 count = min(m->m_len, len); 1258 bcopy(mtod(m, caddr_t), (caddr_t)dst, count); 1259 m = m->m_next; 1260 dst += count; 1261 len -= count; 1262 } 1263 } 1264 1265 int 1266 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction) 1267 { 1268 return _bpf_mtap(arg, m, m, direction); 1269 } 1270 1271 int 1272 _bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m, 1273 u_int direction) 1274 { 1275 struct bpf_if *bp = (struct bpf_if *)arg; 1276 struct bpf_d *d; 1277 size_t pktlen, slen; 1278 const struct mbuf *m0; 1279 struct bpf_hdr tbh; 1280 int gothdr = 0; 1281 int drop = 0; 1282 1283 if (m == NULL) 1284 return (0); 1285 1286 if (bp == NULL) 1287 return (0); 1288 1289 pktlen = 0; 1290 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1291 pktlen += m0->m_len; 1292 1293 smr_read_enter(); 1294 SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1295 struct bpf_program_smr *bps; 1296 struct bpf_insn *fcode = NULL; 1297 1298 atomic_inc_long(&d->bd_rcount); 1299 1300 if (ISSET(d->bd_dirfilt, direction)) 1301 continue; 1302 1303 bps = SMR_PTR_GET(&d->bd_rfilter); 1304 if (bps != NULL) 1305 fcode = bps->bps_bf.bf_insns; 1306 slen = bpf_mfilter(fcode, m, pktlen); 1307 1308 if (slen == 0) 1309 continue; 1310 if (d->bd_fildrop != BPF_FILDROP_PASS) 1311 drop = 1; 1312 if (d->bd_fildrop != BPF_FILDROP_DROP) { 1313 if (!gothdr) { 1314 struct timeval tv; 1315 memset(&tbh, 0, sizeof(tbh)); 1316 1317 if (ISSET(mp->m_flags, M_PKTHDR)) { 1318 tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx; 1319 tbh.bh_flowid = mp->m_pkthdr.ph_flowid; 1320 tbh.bh_flags = mp->m_pkthdr.pf.prio; 1321 if (ISSET(mp->m_pkthdr.csum_flags, 1322 M_FLOWID)) 1323 SET(tbh.bh_flags, BPF_F_FLOWID); 1324 1325 m_microtime(mp, &tv); 1326 } else 1327 microtime(&tv); 1328 1329 tbh.bh_tstamp.tv_sec = tv.tv_sec; 1330 tbh.bh_tstamp.tv_usec = tv.tv_usec; 1331 SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT); 1332 1333 gothdr = 1; 1334 } 1335 1336 mtx_enter(&d->bd_mtx); 1337 bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh); 1338 mtx_leave(&d->bd_mtx); 1339 } 1340 } 1341 smr_read_leave(); 1342 1343 return (drop); 1344 } 1345 1346 /* 1347 * Incoming linkage from device drivers, where a data buffer should be 1348 * prepended by an arbitrary header. In this situation we already have a 1349 * way of representing a chain of memory buffers, ie, mbufs, so reuse 1350 * the existing functionality by attaching the buffers to mbufs. 1351 * 1352 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a 1353 * struct m_hdr each for the header and data on the stack. 1354 */ 1355 int 1356 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen, 1357 const void *buf, unsigned int buflen, u_int direction) 1358 { 1359 struct m_hdr mh, md; 1360 struct mbuf *m0 = NULL; 1361 struct mbuf **mp = &m0; 1362 1363 if (hdr != NULL) { 1364 mh.mh_flags = 0; 1365 mh.mh_next = NULL; 1366 mh.mh_len = hdrlen; 1367 mh.mh_data = (void *)hdr; 1368 1369 *mp = (struct mbuf *)&mh; 1370 mp = &mh.mh_next; 1371 } 1372 1373 if (buf != NULL) { 1374 md.mh_flags = 0; 1375 md.mh_next = NULL; 1376 md.mh_len = buflen; 1377 md.mh_data = (void *)buf; 1378 1379 *mp = (struct mbuf *)&md; 1380 } 1381 1382 return bpf_mtap(arg, m0, direction); 1383 } 1384 1385 /* 1386 * Incoming linkage from device drivers, where we have a mbuf chain 1387 * but need to prepend some arbitrary header from a linear buffer. 1388 * 1389 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1390 * struct m_hdr on the stack. This is safe as bpf only reads from the 1391 * fields in this header that we initialize, and will not try to free 1392 * it or keep a pointer to it. 1393 */ 1394 int 1395 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m, 1396 u_int direction) 1397 { 1398 struct m_hdr mh; 1399 const struct mbuf *m0; 1400 1401 if (dlen > 0) { 1402 mh.mh_flags = 0; 1403 mh.mh_next = (struct mbuf *)m; 1404 mh.mh_len = dlen; 1405 mh.mh_data = (void *)data; 1406 m0 = (struct mbuf *)&mh; 1407 } else 1408 m0 = m; 1409 1410 return _bpf_mtap(arg, m, m0, direction); 1411 } 1412 1413 /* 1414 * Incoming linkage from device drivers, where we have a mbuf chain 1415 * but need to prepend the address family. 1416 * 1417 * Con up a minimal dummy header to pacify bpf. We allocate (only) a 1418 * struct m_hdr on the stack. This is safe as bpf only reads from the 1419 * fields in this header that we initialize, and will not try to free 1420 * it or keep a pointer to it. 1421 */ 1422 int 1423 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction) 1424 { 1425 u_int32_t afh; 1426 1427 afh = htonl(af); 1428 1429 return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction); 1430 } 1431 1432 /* 1433 * Incoming linkage from device drivers, where we have a mbuf chain 1434 * but need to prepend a VLAN encapsulation header. 1435 * 1436 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1437 * struct m_hdr on the stack. This is safe as bpf only reads from the 1438 * fields in this header that we initialize, and will not try to free 1439 * it or keep a pointer to it. 1440 */ 1441 int 1442 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction) 1443 { 1444 #if NVLAN > 0 1445 struct ether_vlan_header evh; 1446 struct m_hdr mh, md; 1447 uint8_t prio; 1448 1449 if ((m->m_flags & M_VLANTAG) == 0) 1450 #endif 1451 { 1452 return _bpf_mtap(arg, m, m, direction); 1453 } 1454 1455 #if NVLAN > 0 1456 KASSERT(m->m_len >= ETHER_HDR_LEN); 1457 1458 prio = m->m_pkthdr.pf.prio; 1459 if (prio <= 1) 1460 prio = !prio; 1461 1462 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN); 1463 evh.evl_proto = evh.evl_encap_proto; 1464 evh.evl_encap_proto = htons(ETHERTYPE_VLAN); 1465 evh.evl_tag = htons(m->m_pkthdr.ether_vtag | 1466 (prio << EVL_PRIO_BITS)); 1467 1468 mh.mh_flags = 0; 1469 mh.mh_data = (caddr_t)&evh; 1470 mh.mh_len = sizeof(evh); 1471 mh.mh_next = (struct mbuf *)&md; 1472 1473 md.mh_flags = 0; 1474 md.mh_data = m->m_data + ETHER_HDR_LEN; 1475 md.mh_len = m->m_len - ETHER_HDR_LEN; 1476 md.mh_next = m->m_next; 1477 1478 return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction); 1479 #endif 1480 } 1481 1482 /* 1483 * Move the packet data from interface memory (pkt) into the 1484 * store buffer. Wake up listeners if needed. 1485 * "copy" is the routine called to do the actual data 1486 * transfer. bcopy is passed in to copy contiguous chunks, while 1487 * bpf_mcopy is passed in to copy mbuf chains. In the latter case, 1488 * pkt is really an mbuf. 1489 */ 1490 void 1491 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, 1492 const struct bpf_hdr *tbh) 1493 { 1494 struct bpf_hdr *bh; 1495 int totlen, curlen; 1496 int hdrlen, do_wakeup = 0; 1497 1498 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1499 if (d->bd_bif == NULL) 1500 return; 1501 1502 hdrlen = d->bd_bif->bif_hdrlen; 1503 1504 /* 1505 * Figure out how many bytes to move. If the packet is 1506 * greater or equal to the snapshot length, transfer that 1507 * much. Otherwise, transfer the whole packet (unless 1508 * we hit the buffer size limit). 1509 */ 1510 totlen = hdrlen + min(snaplen, pktlen); 1511 if (totlen > d->bd_bufsize) 1512 totlen = d->bd_bufsize; 1513 1514 /* 1515 * Round up the end of the previous packet to the next longword. 1516 */ 1517 curlen = BPF_WORDALIGN(d->bd_slen); 1518 if (curlen + totlen > d->bd_bufsize) { 1519 /* 1520 * This packet will overflow the storage buffer. 1521 * Rotate the buffers if we can, then wakeup any 1522 * pending reads. 1523 */ 1524 if (d->bd_fbuf == NULL) { 1525 /* 1526 * We haven't completed the previous read yet, 1527 * so drop the packet. 1528 */ 1529 ++d->bd_dcount; 1530 return; 1531 } 1532 ROTATE_BUFFERS(d); 1533 do_wakeup = 1; 1534 curlen = 0; 1535 } 1536 1537 /* 1538 * Append the bpf header. 1539 */ 1540 bh = (struct bpf_hdr *)(d->bd_sbuf + curlen); 1541 *bh = *tbh; 1542 bh->bh_datalen = pktlen; 1543 bh->bh_hdrlen = hdrlen; 1544 bh->bh_caplen = totlen - hdrlen; 1545 1546 /* 1547 * Copy the packet data into the store buffer and update its length. 1548 */ 1549 bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen); 1550 d->bd_slen = curlen + totlen; 1551 1552 if (d->bd_immediate) { 1553 /* 1554 * Immediate mode is set. A packet arrived so any 1555 * reads should be woken up. 1556 */ 1557 do_wakeup = 1; 1558 } 1559 1560 if (do_wakeup) 1561 bpf_wakeup(d); 1562 } 1563 1564 /* 1565 * Initialize all nonzero fields of a descriptor. 1566 */ 1567 int 1568 bpf_allocbufs(struct bpf_d *d) 1569 { 1570 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1571 1572 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1573 if (d->bd_fbuf == NULL) 1574 return (ENOMEM); 1575 1576 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1577 if (d->bd_sbuf == NULL) { 1578 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); 1579 return (ENOMEM); 1580 } 1581 1582 d->bd_slen = 0; 1583 d->bd_hlen = 0; 1584 1585 return (0); 1586 } 1587 1588 void 1589 bpf_prog_smr(void *bps_arg) 1590 { 1591 struct bpf_program_smr *bps = bps_arg; 1592 1593 free(bps->bps_bf.bf_insns, M_DEVBUF, 1594 bps->bps_bf.bf_len * sizeof(struct bpf_insn)); 1595 free(bps, M_DEVBUF, sizeof(struct bpf_program_smr)); 1596 } 1597 1598 void 1599 bpf_d_smr(void *smr) 1600 { 1601 struct bpf_d *bd = smr; 1602 1603 sigio_free(&bd->bd_sigio); 1604 free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize); 1605 free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize); 1606 free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize); 1607 1608 if (bd->bd_rfilter != NULL) 1609 bpf_prog_smr(bd->bd_rfilter); 1610 if (bd->bd_wfilter != NULL) 1611 bpf_prog_smr(bd->bd_wfilter); 1612 1613 free(bd, M_DEVBUF, sizeof(*bd)); 1614 } 1615 1616 void 1617 bpf_get(struct bpf_d *bd) 1618 { 1619 atomic_inc_int(&bd->bd_ref); 1620 } 1621 1622 /* 1623 * Free buffers currently in use by a descriptor 1624 * when the reference count drops to zero. 1625 */ 1626 void 1627 bpf_put(struct bpf_d *bd) 1628 { 1629 if (atomic_dec_int_nv(&bd->bd_ref) > 0) 1630 return; 1631 1632 smr_call(&bd->bd_smr, bpf_d_smr, bd); 1633 } 1634 1635 void * 1636 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen) 1637 { 1638 struct bpf_if *bp; 1639 1640 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL) 1641 panic("bpfattach"); 1642 SMR_SLIST_INIT(&bp->bif_dlist); 1643 bp->bif_driverp = (struct bpf_if **)bpfp; 1644 bp->bif_name = name; 1645 bp->bif_ifp = NULL; 1646 bp->bif_dlt = dlt; 1647 1648 bp->bif_next = bpf_iflist; 1649 bpf_iflist = bp; 1650 1651 *bp->bif_driverp = NULL; 1652 1653 /* 1654 * Compute the length of the bpf header. This is not necessarily 1655 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1656 * that the network layer header begins on a longword boundary (for 1657 * performance reasons and to alleviate alignment restrictions). 1658 */ 1659 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1660 1661 return (bp); 1662 } 1663 1664 void 1665 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen) 1666 { 1667 struct bpf_if *bp; 1668 1669 bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen); 1670 bp->bif_ifp = ifp; 1671 } 1672 1673 /* Detach an interface from its attached bpf device. */ 1674 void 1675 bpfdetach(struct ifnet *ifp) 1676 { 1677 struct bpf_if *bp, *nbp; 1678 1679 KERNEL_ASSERT_LOCKED(); 1680 1681 for (bp = bpf_iflist; bp; bp = nbp) { 1682 nbp = bp->bif_next; 1683 if (bp->bif_ifp == ifp) 1684 bpfsdetach(bp); 1685 } 1686 ifp->if_bpf = NULL; 1687 } 1688 1689 void 1690 bpfsdetach(void *p) 1691 { 1692 struct bpf_if *bp = p, *tbp; 1693 struct bpf_d *bd; 1694 int maj; 1695 1696 KERNEL_ASSERT_LOCKED(); 1697 1698 /* Locate the major number. */ 1699 for (maj = 0; maj < nchrdev; maj++) 1700 if (cdevsw[maj].d_open == bpfopen) 1701 break; 1702 1703 while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) 1704 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR); 1705 1706 for (tbp = bpf_iflist; tbp; tbp = tbp->bif_next) { 1707 if (tbp->bif_next == bp) { 1708 tbp->bif_next = bp->bif_next; 1709 break; 1710 } 1711 } 1712 1713 if (bpf_iflist == bp) 1714 bpf_iflist = bp->bif_next; 1715 1716 free(bp, M_DEVBUF, sizeof(*bp)); 1717 } 1718 1719 int 1720 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp, 1721 void *newp, size_t newlen) 1722 { 1723 switch (name[0]) { 1724 case NET_BPF_BUFSIZE: 1725 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1726 &bpf_bufsize, BPF_MINBUFSIZE, bpf_maxbufsize); 1727 case NET_BPF_MAXBUFSIZE: 1728 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1729 &bpf_maxbufsize, BPF_MINBUFSIZE, INT_MAX); 1730 default: 1731 return (EOPNOTSUPP); 1732 } 1733 } 1734 1735 int 1736 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 1737 size_t newlen) 1738 { 1739 int flags = RW_INTR; 1740 int error; 1741 1742 if (namelen != 1) 1743 return (ENOTDIR); 1744 1745 flags |= (newp == NULL) ? RW_READ : RW_WRITE; 1746 1747 error = rw_enter(&bpf_sysctl_lk, flags); 1748 if (error != 0) 1749 return (error); 1750 1751 error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen); 1752 1753 rw_exit(&bpf_sysctl_lk); 1754 1755 return (error); 1756 } 1757 1758 struct bpf_d * 1759 bpfilter_lookup(int unit) 1760 { 1761 struct bpf_d *bd; 1762 1763 KERNEL_ASSERT_LOCKED(); 1764 1765 LIST_FOREACH(bd, &bpf_d_list, bd_list) 1766 if (bd->bd_unit == unit) 1767 return (bd); 1768 return (NULL); 1769 } 1770 1771 /* 1772 * Get a list of available data link type of the interface. 1773 */ 1774 int 1775 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1776 { 1777 int n, error; 1778 struct bpf_if *bp; 1779 const char *name; 1780 1781 name = d->bd_bif->bif_name; 1782 n = 0; 1783 error = 0; 1784 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1785 if (strcmp(name, bp->bif_name) != 0) 1786 continue; 1787 if (bfl->bfl_list != NULL) { 1788 if (n >= bfl->bfl_len) 1789 return (ENOMEM); 1790 error = copyout(&bp->bif_dlt, 1791 bfl->bfl_list + n, sizeof(u_int)); 1792 if (error) 1793 break; 1794 } 1795 n++; 1796 } 1797 1798 bfl->bfl_len = n; 1799 return (error); 1800 } 1801 1802 /* 1803 * Set the data link type of a BPF instance. 1804 */ 1805 int 1806 bpf_setdlt(struct bpf_d *d, u_int dlt) 1807 { 1808 const char *name; 1809 struct bpf_if *bp; 1810 1811 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1812 if (d->bd_bif->bif_dlt == dlt) 1813 return (0); 1814 name = d->bd_bif->bif_name; 1815 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1816 if (strcmp(name, bp->bif_name) != 0) 1817 continue; 1818 if (bp->bif_dlt == dlt) 1819 break; 1820 } 1821 if (bp == NULL) 1822 return (EINVAL); 1823 bpf_detachd(d); 1824 bpf_attachd(d, bp); 1825 bpf_resetd(d); 1826 return (0); 1827 } 1828 1829 u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *); 1830 u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *); 1831 u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *); 1832 1833 int bpf_mbuf_copy(const struct mbuf *, u_int32_t, 1834 void *, u_int32_t); 1835 1836 const struct bpf_ops bpf_mbuf_ops = { 1837 bpf_mbuf_ldw, 1838 bpf_mbuf_ldh, 1839 bpf_mbuf_ldb, 1840 }; 1841 1842 int 1843 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len) 1844 { 1845 u_int8_t *cp = buf; 1846 u_int32_t count; 1847 1848 while (off >= m->m_len) { 1849 off -= m->m_len; 1850 1851 m = m->m_next; 1852 if (m == NULL) 1853 return (-1); 1854 } 1855 1856 for (;;) { 1857 count = min(m->m_len - off, len); 1858 1859 memcpy(cp, m->m_data + off, count); 1860 len -= count; 1861 1862 if (len == 0) 1863 return (0); 1864 1865 m = m->m_next; 1866 if (m == NULL) 1867 break; 1868 1869 cp += count; 1870 off = 0; 1871 } 1872 1873 return (-1); 1874 } 1875 1876 u_int32_t 1877 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err) 1878 { 1879 u_int32_t v; 1880 1881 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1882 *err = 1; 1883 return (0); 1884 } 1885 1886 *err = 0; 1887 return ntohl(v); 1888 } 1889 1890 u_int32_t 1891 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err) 1892 { 1893 u_int16_t v; 1894 1895 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1896 *err = 1; 1897 return (0); 1898 } 1899 1900 *err = 0; 1901 return ntohs(v); 1902 } 1903 1904 u_int32_t 1905 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err) 1906 { 1907 const struct mbuf *m = m0; 1908 u_int8_t v; 1909 1910 while (k >= m->m_len) { 1911 k -= m->m_len; 1912 1913 m = m->m_next; 1914 if (m == NULL) { 1915 *err = 1; 1916 return (0); 1917 } 1918 } 1919 v = m->m_data[k]; 1920 1921 *err = 0; 1922 return v; 1923 } 1924 1925 u_int 1926 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen) 1927 { 1928 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen); 1929 } 1930