1 /* $OpenBSD: bpf.c,v 1.177 2019/06/13 21:14:53 mpi Exp $ */ 2 /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org> 8 * 9 * This code is derived from the Stanford/CMU enet packet filter, 10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 12 * Berkeley Laboratory. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94 39 */ 40 41 #include "bpfilter.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/ioctl.h> 49 #include <sys/conf.h> 50 #include <sys/vnode.h> 51 #include <sys/fcntl.h> 52 #include <sys/socket.h> 53 #include <sys/poll.h> 54 #include <sys/kernel.h> 55 #include <sys/sysctl.h> 56 #include <sys/rwlock.h> 57 #include <sys/atomic.h> 58 #include <sys/smr.h> 59 #include <sys/specdev.h> 60 #include <sys/selinfo.h> 61 #include <sys/task.h> 62 63 #include <net/if.h> 64 #include <net/bpf.h> 65 #include <net/bpfdesc.h> 66 67 #include <netinet/in.h> 68 #include <netinet/if_ether.h> 69 70 #include "vlan.h" 71 #if NVLAN > 0 72 #include <net/if_vlan_var.h> 73 #endif 74 75 #define BPF_BUFSIZE 32768 76 77 #define PRINET 26 /* interruptible */ 78 79 /* from kern/kern_clock.c; incremented each clock tick. */ 80 extern int ticks; 81 82 /* 83 * The default read buffer size is patchable. 84 */ 85 int bpf_bufsize = BPF_BUFSIZE; 86 int bpf_maxbufsize = BPF_MAXBUFSIZE; 87 88 /* 89 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 90 * bpf_d_list is the list of descriptors 91 */ 92 struct bpf_if *bpf_iflist; 93 LIST_HEAD(, bpf_d) bpf_d_list; 94 95 int bpf_allocbufs(struct bpf_d *); 96 void bpf_ifname(struct bpf_if*, struct ifreq *); 97 int _bpf_mtap(caddr_t, const struct mbuf *, u_int, 98 void (*)(const void *, void *, size_t)); 99 void bpf_mcopy(const void *, void *, size_t); 100 int bpf_movein(struct uio *, struct bpf_d *, struct mbuf **, 101 struct sockaddr *); 102 int bpf_setif(struct bpf_d *, struct ifreq *); 103 int bpfpoll(dev_t, int, struct proc *); 104 int bpfkqfilter(dev_t, struct knote *); 105 void bpf_wakeup(struct bpf_d *); 106 void bpf_wakeup_cb(void *); 107 void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, 108 void (*)(const void *, void *, size_t), struct timeval *); 109 int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 110 int bpf_setdlt(struct bpf_d *, u_int); 111 112 void filt_bpfrdetach(struct knote *); 113 int filt_bpfread(struct knote *, long); 114 115 int bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t); 116 117 struct bpf_d *bpfilter_lookup(int); 118 119 /* 120 * Called holding ``bd_mtx''. 121 */ 122 void bpf_attachd(struct bpf_d *, struct bpf_if *); 123 void bpf_detachd(struct bpf_d *); 124 void bpf_resetd(struct bpf_d *); 125 126 void bpf_prog_smr(void *); 127 void bpf_d_smr(void *); 128 129 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz"); 130 131 int 132 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp, 133 struct sockaddr *sockp) 134 { 135 struct bpf_program_smr *bps; 136 struct bpf_insn *fcode = NULL; 137 struct mbuf *m; 138 struct m_tag *mtag; 139 int error; 140 u_int hlen; 141 u_int len; 142 u_int linktype; 143 u_int slen; 144 145 /* 146 * Build a sockaddr based on the data link layer type. 147 * We do this at this level because the ethernet header 148 * is copied directly into the data field of the sockaddr. 149 * In the case of SLIP, there is no header and the packet 150 * is forwarded as is. 151 * Also, we are careful to leave room at the front of the mbuf 152 * for the link level header. 153 */ 154 linktype = d->bd_bif->bif_dlt; 155 switch (linktype) { 156 157 case DLT_SLIP: 158 sockp->sa_family = AF_INET; 159 hlen = 0; 160 break; 161 162 case DLT_PPP: 163 sockp->sa_family = AF_UNSPEC; 164 hlen = 0; 165 break; 166 167 case DLT_EN10MB: 168 sockp->sa_family = AF_UNSPEC; 169 /* XXX Would MAXLINKHDR be better? */ 170 hlen = ETHER_HDR_LEN; 171 break; 172 173 case DLT_IEEE802_11: 174 case DLT_IEEE802_11_RADIO: 175 sockp->sa_family = AF_UNSPEC; 176 hlen = 0; 177 break; 178 179 case DLT_RAW: 180 case DLT_NULL: 181 sockp->sa_family = AF_UNSPEC; 182 hlen = 0; 183 break; 184 185 case DLT_LOOP: 186 sockp->sa_family = AF_UNSPEC; 187 hlen = sizeof(u_int32_t); 188 break; 189 190 default: 191 return (EIO); 192 } 193 194 if (uio->uio_resid > MAXMCLBYTES) 195 return (EIO); 196 len = uio->uio_resid; 197 198 MGETHDR(m, M_WAIT, MT_DATA); 199 m->m_pkthdr.ph_ifidx = 0; 200 m->m_pkthdr.len = len - hlen; 201 202 if (len > MHLEN) { 203 MCLGETI(m, M_WAIT, NULL, len); 204 if ((m->m_flags & M_EXT) == 0) { 205 error = ENOBUFS; 206 goto bad; 207 } 208 } 209 m->m_len = len; 210 *mp = m; 211 212 error = uiomove(mtod(m, caddr_t), len, uio); 213 if (error) 214 goto bad; 215 216 smr_read_enter(); 217 bps = SMR_PTR_GET(&d->bd_wfilter); 218 if (bps != NULL) 219 fcode = bps->bps_bf.bf_insns; 220 slen = bpf_filter(fcode, mtod(m, u_char *), len, len); 221 smr_read_leave(); 222 223 if (slen < len) { 224 error = EPERM; 225 goto bad; 226 } 227 228 if (m->m_len < hlen) { 229 error = EPERM; 230 goto bad; 231 } 232 /* 233 * Make room for link header, and copy it to sockaddr 234 */ 235 if (hlen != 0) { 236 if (linktype == DLT_LOOP) { 237 u_int32_t af; 238 239 /* the link header indicates the address family */ 240 KASSERT(hlen == sizeof(u_int32_t)); 241 memcpy(&af, m->m_data, hlen); 242 sockp->sa_family = ntohl(af); 243 } else 244 memcpy(sockp->sa_data, m->m_data, hlen); 245 m->m_len -= hlen; 246 m->m_data += hlen; /* XXX */ 247 } 248 249 /* 250 * Prepend the data link type as a mbuf tag 251 */ 252 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT); 253 *(u_int *)(mtag + 1) = linktype; 254 m_tag_prepend(m, mtag); 255 256 return (0); 257 bad: 258 m_freem(m); 259 return (error); 260 } 261 262 /* 263 * Attach file to the bpf interface, i.e. make d listen on bp. 264 */ 265 void 266 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 267 { 268 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 269 270 /* 271 * Point d at bp, and add d to the interface's list of listeners. 272 * Finally, point the driver's bpf cookie at the interface so 273 * it will divert packets to bpf. 274 */ 275 276 d->bd_bif = bp; 277 278 KERNEL_ASSERT_LOCKED(); 279 SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next); 280 281 *bp->bif_driverp = bp; 282 } 283 284 /* 285 * Detach a file from its interface. 286 */ 287 void 288 bpf_detachd(struct bpf_d *d) 289 { 290 struct bpf_if *bp; 291 292 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 293 294 bp = d->bd_bif; 295 /* Not attached. */ 296 if (bp == NULL) 297 return; 298 299 /* Remove ``d'' from the interface's descriptor list. */ 300 KERNEL_ASSERT_LOCKED(); 301 SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next); 302 303 if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) { 304 /* 305 * Let the driver know that there are no more listeners. 306 */ 307 *bp->bif_driverp = NULL; 308 } 309 310 d->bd_bif = NULL; 311 312 /* 313 * Check if this descriptor had requested promiscuous mode. 314 * If so, turn it off. 315 */ 316 if (d->bd_promisc) { 317 int error; 318 319 KASSERT(bp->bif_ifp != NULL); 320 321 d->bd_promisc = 0; 322 323 mtx_leave(&d->bd_mtx); 324 NET_LOCK(); 325 error = ifpromisc(bp->bif_ifp, 0); 326 NET_UNLOCK(); 327 mtx_enter(&d->bd_mtx); 328 329 if (error && !(error == EINVAL || error == ENODEV || 330 error == ENXIO)) 331 /* 332 * Something is really wrong if we were able to put 333 * the driver into promiscuous mode, but can't 334 * take it out. 335 */ 336 panic("bpf: ifpromisc failed"); 337 } 338 } 339 340 void 341 bpfilterattach(int n) 342 { 343 LIST_INIT(&bpf_d_list); 344 } 345 346 /* 347 * Open ethernet device. Returns ENXIO for illegal minor device number, 348 * EBUSY if file is open by another process. 349 */ 350 int 351 bpfopen(dev_t dev, int flag, int mode, struct proc *p) 352 { 353 struct bpf_d *bd; 354 int unit = minor(dev); 355 356 if (unit & ((1 << CLONE_SHIFT) - 1)) 357 return (ENXIO); 358 359 KASSERT(bpfilter_lookup(unit) == NULL); 360 361 /* create on demand */ 362 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 363 return (EBUSY); 364 365 /* Mark "free" and do most initialization. */ 366 bd->bd_unit = unit; 367 bd->bd_bufsize = bpf_bufsize; 368 bd->bd_sig = SIGIO; 369 mtx_init(&bd->bd_mtx, IPL_NET); 370 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd); 371 smr_init(&bd->bd_smr); 372 373 if (flag & FNONBLOCK) 374 bd->bd_rtout = -1; 375 376 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list); 377 378 return (0); 379 } 380 381 /* 382 * Close the descriptor by detaching it from its interface, 383 * deallocating its buffers, and marking it free. 384 */ 385 int 386 bpfclose(dev_t dev, int flag, int mode, struct proc *p) 387 { 388 struct bpf_d *d; 389 390 d = bpfilter_lookup(minor(dev)); 391 mtx_enter(&d->bd_mtx); 392 bpf_detachd(d); 393 bpf_wakeup(d); 394 LIST_REMOVE(d, bd_list); 395 mtx_leave(&d->bd_mtx); 396 397 /* 398 * Wait for the task to finish here, before proceeding to garbage 399 * collection. 400 */ 401 taskq_barrier(systq); 402 smr_call(&d->bd_smr, bpf_d_smr, d); 403 404 return (0); 405 } 406 407 /* 408 * Rotate the packet buffers in descriptor d. Move the store buffer 409 * into the hold slot, and the free buffer into the store slot. 410 * Zero the length of the new store buffer. 411 */ 412 #define ROTATE_BUFFERS(d) \ 413 KASSERT(d->bd_in_uiomove == 0); \ 414 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \ 415 (d)->bd_hbuf = (d)->bd_sbuf; \ 416 (d)->bd_hlen = (d)->bd_slen; \ 417 (d)->bd_sbuf = (d)->bd_fbuf; \ 418 (d)->bd_slen = 0; \ 419 (d)->bd_fbuf = NULL; 420 /* 421 * bpfread - read next chunk of packets from buffers 422 */ 423 int 424 bpfread(dev_t dev, struct uio *uio, int ioflag) 425 { 426 struct bpf_d *d; 427 caddr_t hbuf; 428 int hlen, error; 429 430 KERNEL_ASSERT_LOCKED(); 431 432 d = bpfilter_lookup(minor(dev)); 433 if (d->bd_bif == NULL) 434 return (ENXIO); 435 436 mtx_enter(&d->bd_mtx); 437 438 /* 439 * Restrict application to use a buffer the same size as 440 * as kernel buffers. 441 */ 442 if (uio->uio_resid != d->bd_bufsize) { 443 error = EINVAL; 444 goto out; 445 } 446 447 /* 448 * If there's a timeout, bd_rdStart is tagged when we start the read. 449 * we can then figure out when we're done reading. 450 */ 451 if (d->bd_rtout != -1 && d->bd_rdStart == 0) 452 d->bd_rdStart = ticks; 453 else 454 d->bd_rdStart = 0; 455 456 /* 457 * If the hold buffer is empty, then do a timed sleep, which 458 * ends when the timeout expires or when enough packets 459 * have arrived to fill the store buffer. 460 */ 461 while (d->bd_hbuf == NULL) { 462 if (d->bd_bif == NULL) { 463 /* interface is gone */ 464 if (d->bd_slen == 0) { 465 error = EIO; 466 goto out; 467 } 468 ROTATE_BUFFERS(d); 469 break; 470 } 471 if (d->bd_immediate && d->bd_slen != 0) { 472 /* 473 * A packet(s) either arrived since the previous 474 * read or arrived while we were asleep. 475 * Rotate the buffers and return what's here. 476 */ 477 ROTATE_BUFFERS(d); 478 break; 479 } 480 if (d->bd_rtout == -1) { 481 /* User requested non-blocking I/O */ 482 error = EWOULDBLOCK; 483 } else { 484 if (d->bd_rdStart <= ULONG_MAX - d->bd_rtout && 485 d->bd_rdStart + d->bd_rtout < ticks) { 486 error = msleep(d, &d->bd_mtx, PRINET|PCATCH, 487 "bpf", d->bd_rtout); 488 } else 489 error = EWOULDBLOCK; 490 } 491 if (error == EINTR || error == ERESTART) 492 goto out; 493 if (error == EWOULDBLOCK) { 494 /* 495 * On a timeout, return what's in the buffer, 496 * which may be nothing. If there is something 497 * in the store buffer, we can rotate the buffers. 498 */ 499 if (d->bd_hbuf != NULL) 500 /* 501 * We filled up the buffer in between 502 * getting the timeout and arriving 503 * here, so we don't need to rotate. 504 */ 505 break; 506 507 if (d->bd_slen == 0) { 508 error = 0; 509 goto out; 510 } 511 ROTATE_BUFFERS(d); 512 break; 513 } 514 } 515 /* 516 * At this point, we know we have something in the hold slot. 517 */ 518 hbuf = d->bd_hbuf; 519 hlen = d->bd_hlen; 520 d->bd_hbuf = NULL; 521 d->bd_hlen = 0; 522 d->bd_fbuf = NULL; 523 d->bd_in_uiomove = 1; 524 525 /* 526 * Move data from hold buffer into user space. 527 * We know the entire buffer is transferred since 528 * we checked above that the read buffer is bpf_bufsize bytes. 529 */ 530 mtx_leave(&d->bd_mtx); 531 error = uiomove(hbuf, hlen, uio); 532 mtx_enter(&d->bd_mtx); 533 534 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */ 535 KASSERT(d->bd_fbuf == NULL); 536 KASSERT(d->bd_hbuf == NULL); 537 d->bd_fbuf = hbuf; 538 d->bd_in_uiomove = 0; 539 out: 540 mtx_leave(&d->bd_mtx); 541 542 return (error); 543 } 544 545 546 /* 547 * If there are processes sleeping on this descriptor, wake them up. 548 */ 549 void 550 bpf_wakeup(struct bpf_d *d) 551 { 552 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 553 554 /* 555 * As long as csignal() and selwakeup() need to be protected 556 * by the KERNEL_LOCK() we have to delay the wakeup to 557 * another context to keep the hot path KERNEL_LOCK()-free. 558 */ 559 task_add(systq, &d->bd_wake_task); 560 } 561 562 void 563 bpf_wakeup_cb(void *xd) 564 { 565 struct bpf_d *d = xd; 566 567 KERNEL_ASSERT_LOCKED(); 568 569 wakeup(d); 570 if (d->bd_async && d->bd_sig) 571 csignal(d->bd_pgid, d->bd_sig, d->bd_siguid, d->bd_sigeuid); 572 573 selwakeup(&d->bd_sel); 574 } 575 576 int 577 bpfwrite(dev_t dev, struct uio *uio, int ioflag) 578 { 579 struct bpf_d *d; 580 struct ifnet *ifp; 581 struct mbuf *m; 582 int error; 583 struct sockaddr_storage dst; 584 585 KERNEL_ASSERT_LOCKED(); 586 587 d = bpfilter_lookup(minor(dev)); 588 if (d->bd_bif == NULL) 589 return (ENXIO); 590 591 ifp = d->bd_bif->bif_ifp; 592 593 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) { 594 error = ENETDOWN; 595 goto out; 596 } 597 598 if (uio->uio_resid == 0) { 599 error = 0; 600 goto out; 601 } 602 603 error = bpf_movein(uio, d, &m, sstosa(&dst)); 604 if (error) 605 goto out; 606 607 if (m->m_pkthdr.len > ifp->if_mtu) { 608 m_freem(m); 609 error = EMSGSIZE; 610 goto out; 611 } 612 613 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 614 m->m_pkthdr.pf.prio = ifp->if_llprio; 615 616 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC) 617 dst.ss_family = pseudo_AF_HDRCMPLT; 618 619 NET_LOCK(); 620 error = ifp->if_output(ifp, m, sstosa(&dst), NULL); 621 NET_UNLOCK(); 622 623 out: 624 return (error); 625 } 626 627 /* 628 * Reset a descriptor by flushing its packet buffer and clearing the 629 * receive and drop counts. 630 */ 631 void 632 bpf_resetd(struct bpf_d *d) 633 { 634 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 635 KASSERT(d->bd_in_uiomove == 0); 636 637 if (d->bd_hbuf != NULL) { 638 /* Free the hold buffer. */ 639 d->bd_fbuf = d->bd_hbuf; 640 d->bd_hbuf = NULL; 641 } 642 d->bd_slen = 0; 643 d->bd_hlen = 0; 644 d->bd_rcount = 0; 645 d->bd_dcount = 0; 646 } 647 648 /* 649 * FIONREAD Check for read packet available. 650 * BIOCGBLEN Get buffer len [for read()]. 651 * BIOCSETF Set ethernet read filter. 652 * BIOCFLUSH Flush read packet buffer. 653 * BIOCPROMISC Put interface into promiscuous mode. 654 * BIOCGDLTLIST Get supported link layer types. 655 * BIOCGDLT Get link layer type. 656 * BIOCSDLT Set link layer type. 657 * BIOCGETIF Get interface name. 658 * BIOCSETIF Set interface. 659 * BIOCSRTIMEOUT Set read timeout. 660 * BIOCGRTIMEOUT Get read timeout. 661 * BIOCGSTATS Get packet stats. 662 * BIOCIMMEDIATE Set immediate mode. 663 * BIOCVERSION Get filter language version. 664 * BIOCGHDRCMPLT Get "header already complete" flag 665 * BIOCSHDRCMPLT Set "header already complete" flag 666 */ 667 int 668 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 669 { 670 struct bpf_d *d; 671 int error = 0; 672 673 d = bpfilter_lookup(minor(dev)); 674 if (d->bd_locked && suser(p) != 0) { 675 /* list of allowed ioctls when locked and not root */ 676 switch (cmd) { 677 case BIOCGBLEN: 678 case BIOCFLUSH: 679 case BIOCGDLT: 680 case BIOCGDLTLIST: 681 case BIOCGETIF: 682 case BIOCGRTIMEOUT: 683 case BIOCGSTATS: 684 case BIOCVERSION: 685 case BIOCGRSIG: 686 case BIOCGHDRCMPLT: 687 case FIONREAD: 688 case BIOCLOCK: 689 case BIOCSRTIMEOUT: 690 case BIOCIMMEDIATE: 691 case TIOCGPGRP: 692 case BIOCGDIRFILT: 693 break; 694 default: 695 return (EPERM); 696 } 697 } 698 699 switch (cmd) { 700 default: 701 error = EINVAL; 702 break; 703 704 /* 705 * Check for read packet available. 706 */ 707 case FIONREAD: 708 { 709 int n; 710 711 mtx_enter(&d->bd_mtx); 712 n = d->bd_slen; 713 if (d->bd_hbuf != NULL) 714 n += d->bd_hlen; 715 mtx_leave(&d->bd_mtx); 716 717 *(int *)addr = n; 718 break; 719 } 720 721 /* 722 * Get buffer len [for read()]. 723 */ 724 case BIOCGBLEN: 725 *(u_int *)addr = d->bd_bufsize; 726 break; 727 728 /* 729 * Set buffer length. 730 */ 731 case BIOCSBLEN: 732 if (d->bd_bif != NULL) 733 error = EINVAL; 734 else { 735 u_int size = *(u_int *)addr; 736 737 if (size > bpf_maxbufsize) 738 *(u_int *)addr = size = bpf_maxbufsize; 739 else if (size < BPF_MINBUFSIZE) 740 *(u_int *)addr = size = BPF_MINBUFSIZE; 741 mtx_enter(&d->bd_mtx); 742 d->bd_bufsize = size; 743 mtx_leave(&d->bd_mtx); 744 } 745 break; 746 747 /* 748 * Set link layer read filter. 749 */ 750 case BIOCSETF: 751 error = bpf_setf(d, (struct bpf_program *)addr, 0); 752 break; 753 754 /* 755 * Set link layer write filter. 756 */ 757 case BIOCSETWF: 758 error = bpf_setf(d, (struct bpf_program *)addr, 1); 759 break; 760 761 /* 762 * Flush read packet buffer. 763 */ 764 case BIOCFLUSH: 765 mtx_enter(&d->bd_mtx); 766 bpf_resetd(d); 767 mtx_leave(&d->bd_mtx); 768 break; 769 770 /* 771 * Put interface into promiscuous mode. 772 */ 773 case BIOCPROMISC: 774 if (d->bd_bif == NULL) { 775 /* 776 * No interface attached yet. 777 */ 778 error = EINVAL; 779 } else if (d->bd_bif->bif_ifp != NULL) { 780 if (d->bd_promisc == 0) { 781 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx); 782 NET_LOCK(); 783 error = ifpromisc(d->bd_bif->bif_ifp, 1); 784 NET_UNLOCK(); 785 if (error == 0) 786 d->bd_promisc = 1; 787 } 788 } 789 break; 790 791 /* 792 * Get a list of supported device parameters. 793 */ 794 case BIOCGDLTLIST: 795 if (d->bd_bif == NULL) 796 error = EINVAL; 797 else 798 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 799 break; 800 801 /* 802 * Get device parameters. 803 */ 804 case BIOCGDLT: 805 if (d->bd_bif == NULL) 806 error = EINVAL; 807 else 808 *(u_int *)addr = d->bd_bif->bif_dlt; 809 break; 810 811 /* 812 * Set device parameters. 813 */ 814 case BIOCSDLT: 815 if (d->bd_bif == NULL) 816 error = EINVAL; 817 else { 818 mtx_enter(&d->bd_mtx); 819 error = bpf_setdlt(d, *(u_int *)addr); 820 mtx_leave(&d->bd_mtx); 821 } 822 break; 823 824 /* 825 * Set interface name. 826 */ 827 case BIOCGETIF: 828 if (d->bd_bif == NULL) 829 error = EINVAL; 830 else 831 bpf_ifname(d->bd_bif, (struct ifreq *)addr); 832 break; 833 834 /* 835 * Set interface. 836 */ 837 case BIOCSETIF: 838 error = bpf_setif(d, (struct ifreq *)addr); 839 break; 840 841 /* 842 * Set read timeout. 843 */ 844 case BIOCSRTIMEOUT: 845 { 846 struct timeval *tv = (struct timeval *)addr; 847 u_long rtout; 848 849 /* Compute number of ticks. */ 850 if (tv->tv_sec < 0 || !timerisvalid(tv)) { 851 error = EINVAL; 852 break; 853 } 854 if (tv->tv_sec > INT_MAX / hz) { 855 error = EOVERFLOW; 856 break; 857 } 858 rtout = tv->tv_sec * hz; 859 if (tv->tv_usec / tick > INT_MAX - rtout) { 860 error = EOVERFLOW; 861 break; 862 } 863 rtout += tv->tv_usec / tick; 864 d->bd_rtout = rtout; 865 if (d->bd_rtout == 0 && tv->tv_usec != 0) 866 d->bd_rtout = 1; 867 break; 868 } 869 870 /* 871 * Get read timeout. 872 */ 873 case BIOCGRTIMEOUT: 874 { 875 struct timeval *tv = (struct timeval *)addr; 876 877 tv->tv_sec = d->bd_rtout / hz; 878 tv->tv_usec = (d->bd_rtout % hz) * tick; 879 break; 880 } 881 882 /* 883 * Get packet stats. 884 */ 885 case BIOCGSTATS: 886 { 887 struct bpf_stat *bs = (struct bpf_stat *)addr; 888 889 bs->bs_recv = d->bd_rcount; 890 bs->bs_drop = d->bd_dcount; 891 break; 892 } 893 894 /* 895 * Set immediate mode. 896 */ 897 case BIOCIMMEDIATE: 898 d->bd_immediate = *(u_int *)addr; 899 break; 900 901 case BIOCVERSION: 902 { 903 struct bpf_version *bv = (struct bpf_version *)addr; 904 905 bv->bv_major = BPF_MAJOR_VERSION; 906 bv->bv_minor = BPF_MINOR_VERSION; 907 break; 908 } 909 910 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 911 *(u_int *)addr = d->bd_hdrcmplt; 912 break; 913 914 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 915 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 916 break; 917 918 case BIOCLOCK: /* set "locked" flag (no reset) */ 919 d->bd_locked = 1; 920 break; 921 922 case BIOCGFILDROP: /* get "filter-drop" flag */ 923 *(u_int *)addr = d->bd_fildrop; 924 break; 925 926 case BIOCSFILDROP: { /* set "filter-drop" flag */ 927 unsigned int fildrop = *(u_int *)addr; 928 switch (fildrop) { 929 case BPF_FILDROP_PASS: 930 case BPF_FILDROP_CAPTURE: 931 case BPF_FILDROP_DROP: 932 d->bd_fildrop = fildrop; 933 break; 934 default: 935 error = EINVAL; 936 break; 937 } 938 break; 939 } 940 941 case BIOCGDIRFILT: /* get direction filter */ 942 *(u_int *)addr = d->bd_dirfilt; 943 break; 944 945 case BIOCSDIRFILT: /* set direction filter */ 946 d->bd_dirfilt = (*(u_int *)addr) & 947 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT); 948 break; 949 950 case FIONBIO: /* Non-blocking I/O */ 951 if (*(int *)addr) 952 d->bd_rtout = -1; 953 else 954 d->bd_rtout = 0; 955 break; 956 957 case FIOASYNC: /* Send signal on receive packets */ 958 d->bd_async = *(int *)addr; 959 break; 960 961 /* 962 * N.B. ioctl (FIOSETOWN) and fcntl (F_SETOWN) both end up doing 963 * the equivalent of a TIOCSPGRP and hence end up here. *However* 964 * TIOCSPGRP's arg is a process group if it's positive and a process 965 * id if it's negative. This is exactly the opposite of what the 966 * other two functions want! Therefore there is code in ioctl and 967 * fcntl to negate the arg before calling here. 968 */ 969 case TIOCSPGRP: /* Process or group to send signals to */ 970 d->bd_pgid = *(int *)addr; 971 d->bd_siguid = p->p_ucred->cr_ruid; 972 d->bd_sigeuid = p->p_ucred->cr_uid; 973 break; 974 975 case TIOCGPGRP: 976 *(int *)addr = d->bd_pgid; 977 break; 978 979 case BIOCSRSIG: /* Set receive signal */ 980 { 981 u_int sig; 982 983 sig = *(u_int *)addr; 984 985 if (sig >= NSIG) 986 error = EINVAL; 987 else 988 d->bd_sig = sig; 989 break; 990 } 991 case BIOCGRSIG: 992 *(u_int *)addr = d->bd_sig; 993 break; 994 } 995 996 return (error); 997 } 998 999 /* 1000 * Set d's packet filter program to fp. If this file already has a filter, 1001 * free it and replace it. Returns EINVAL for bogus requests. 1002 */ 1003 int 1004 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf) 1005 { 1006 struct bpf_program_smr *bps, *old_bps; 1007 struct bpf_insn *fcode; 1008 u_int flen, size; 1009 1010 KERNEL_ASSERT_LOCKED(); 1011 1012 if (fp->bf_insns == 0) { 1013 if (fp->bf_len != 0) 1014 return (EINVAL); 1015 bps = NULL; 1016 } else { 1017 flen = fp->bf_len; 1018 if (flen > BPF_MAXINSNS) 1019 return (EINVAL); 1020 1021 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF, 1022 M_WAITOK | M_CANFAIL); 1023 if (fcode == NULL) 1024 return (ENOMEM); 1025 1026 size = flen * sizeof(*fp->bf_insns); 1027 if (copyin(fp->bf_insns, fcode, size) != 0 || 1028 bpf_validate(fcode, (int)flen) == 0) { 1029 free(fcode, M_DEVBUF, size); 1030 return (EINVAL); 1031 } 1032 1033 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK); 1034 smr_init(&bps->bps_smr); 1035 bps->bps_bf.bf_len = flen; 1036 bps->bps_bf.bf_insns = fcode; 1037 } 1038 1039 if (wf == 0) { 1040 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter); 1041 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps); 1042 } else { 1043 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter); 1044 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps); 1045 } 1046 1047 mtx_enter(&d->bd_mtx); 1048 bpf_resetd(d); 1049 mtx_leave(&d->bd_mtx); 1050 if (old_bps != NULL) 1051 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps); 1052 1053 return (0); 1054 } 1055 1056 /* 1057 * Detach a file from its current interface (if attached at all) and attach 1058 * to the interface indicated by the name stored in ifr. 1059 * Return an errno or 0. 1060 */ 1061 int 1062 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1063 { 1064 struct bpf_if *bp, *candidate = NULL; 1065 int error = 0; 1066 1067 /* 1068 * Look through attached interfaces for the named one. 1069 */ 1070 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1071 if (strcmp(bp->bif_name, ifr->ifr_name) != 0) 1072 continue; 1073 1074 if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt) 1075 candidate = bp; 1076 } 1077 1078 /* Not found. */ 1079 if (candidate == NULL) 1080 return (ENXIO); 1081 1082 /* 1083 * Allocate the packet buffers if we need to. 1084 * If we're already attached to requested interface, 1085 * just flush the buffer. 1086 */ 1087 mtx_enter(&d->bd_mtx); 1088 if (d->bd_sbuf == NULL) { 1089 if ((error = bpf_allocbufs(d))) 1090 goto out; 1091 } 1092 if (candidate != d->bd_bif) { 1093 /* 1094 * Detach if attached to something else. 1095 */ 1096 bpf_detachd(d); 1097 bpf_attachd(d, candidate); 1098 } 1099 bpf_resetd(d); 1100 out: 1101 mtx_leave(&d->bd_mtx); 1102 return (error); 1103 } 1104 1105 /* 1106 * Copy the interface name to the ifreq. 1107 */ 1108 void 1109 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr) 1110 { 1111 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name)); 1112 } 1113 1114 /* 1115 * Support for poll() system call 1116 */ 1117 int 1118 bpfpoll(dev_t dev, int events, struct proc *p) 1119 { 1120 struct bpf_d *d; 1121 int revents; 1122 1123 KERNEL_ASSERT_LOCKED(); 1124 1125 /* 1126 * An imitation of the FIONREAD ioctl code. 1127 */ 1128 d = bpfilter_lookup(minor(dev)); 1129 1130 /* 1131 * XXX The USB stack manages it to trigger some race condition 1132 * which causes bpfilter_lookup to return NULL when a USB device 1133 * gets detached while it is up and has an open bpf handler (e.g. 1134 * dhclient). We still should recheck if we can fix the root 1135 * cause of this issue. 1136 */ 1137 if (d == NULL) 1138 return (POLLERR); 1139 1140 /* Always ready to write data */ 1141 revents = events & (POLLOUT | POLLWRNORM); 1142 1143 if (events & (POLLIN | POLLRDNORM)) { 1144 mtx_enter(&d->bd_mtx); 1145 if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0)) 1146 revents |= events & (POLLIN | POLLRDNORM); 1147 else { 1148 /* 1149 * if there's a timeout, mark the time we 1150 * started waiting. 1151 */ 1152 if (d->bd_rtout != -1 && d->bd_rdStart == 0) 1153 d->bd_rdStart = ticks; 1154 selrecord(p, &d->bd_sel); 1155 } 1156 mtx_leave(&d->bd_mtx); 1157 } 1158 return (revents); 1159 } 1160 1161 struct filterops bpfread_filtops = 1162 { 1, NULL, filt_bpfrdetach, filt_bpfread }; 1163 1164 int 1165 bpfkqfilter(dev_t dev, struct knote *kn) 1166 { 1167 struct bpf_d *d; 1168 struct klist *klist; 1169 1170 KERNEL_ASSERT_LOCKED(); 1171 1172 d = bpfilter_lookup(minor(dev)); 1173 1174 switch (kn->kn_filter) { 1175 case EVFILT_READ: 1176 klist = &d->bd_sel.si_note; 1177 kn->kn_fop = &bpfread_filtops; 1178 break; 1179 default: 1180 return (EINVAL); 1181 } 1182 1183 kn->kn_hook = d; 1184 SLIST_INSERT_HEAD(klist, kn, kn_selnext); 1185 1186 mtx_enter(&d->bd_mtx); 1187 if (d->bd_rtout != -1 && d->bd_rdStart == 0) 1188 d->bd_rdStart = ticks; 1189 mtx_leave(&d->bd_mtx); 1190 1191 return (0); 1192 } 1193 1194 void 1195 filt_bpfrdetach(struct knote *kn) 1196 { 1197 struct bpf_d *d = kn->kn_hook; 1198 1199 KERNEL_ASSERT_LOCKED(); 1200 1201 SLIST_REMOVE(&d->bd_sel.si_note, kn, knote, kn_selnext); 1202 } 1203 1204 int 1205 filt_bpfread(struct knote *kn, long hint) 1206 { 1207 struct bpf_d *d = kn->kn_hook; 1208 1209 KERNEL_ASSERT_LOCKED(); 1210 1211 mtx_enter(&d->bd_mtx); 1212 kn->kn_data = d->bd_hlen; 1213 if (d->bd_immediate) 1214 kn->kn_data += d->bd_slen; 1215 mtx_leave(&d->bd_mtx); 1216 1217 return (kn->kn_data > 0); 1218 } 1219 1220 /* 1221 * Copy data from an mbuf chain into a buffer. This code is derived 1222 * from m_copydata in sys/uipc_mbuf.c. 1223 */ 1224 void 1225 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) 1226 { 1227 const struct mbuf *m; 1228 u_int count; 1229 u_char *dst; 1230 1231 m = src_arg; 1232 dst = dst_arg; 1233 while (len > 0) { 1234 if (m == NULL) 1235 panic("bpf_mcopy"); 1236 count = min(m->m_len, len); 1237 bcopy(mtod(m, caddr_t), (caddr_t)dst, count); 1238 m = m->m_next; 1239 dst += count; 1240 len -= count; 1241 } 1242 } 1243 1244 /* 1245 * like bpf_mtap, but copy fn can be given. used by various bpf_mtap* 1246 */ 1247 int 1248 _bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction, 1249 void (*cpfn)(const void *, void *, size_t)) 1250 { 1251 struct bpf_if *bp = (struct bpf_if *)arg; 1252 struct bpf_d *d; 1253 size_t pktlen, slen; 1254 const struct mbuf *m0; 1255 struct timeval tv; 1256 int gottime = 0; 1257 int drop = 0; 1258 1259 if (m == NULL) 1260 return (0); 1261 1262 if (cpfn == NULL) 1263 cpfn = bpf_mcopy; 1264 1265 if (bp == NULL) 1266 return (0); 1267 1268 pktlen = 0; 1269 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1270 pktlen += m0->m_len; 1271 1272 smr_read_enter(); 1273 SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1274 struct bpf_program_smr *bps; 1275 struct bpf_insn *fcode = NULL; 1276 1277 atomic_inc_long(&d->bd_rcount); 1278 1279 if (ISSET(d->bd_dirfilt, direction)) 1280 continue; 1281 1282 bps = SMR_PTR_GET(&d->bd_rfilter); 1283 if (bps != NULL) 1284 fcode = bps->bps_bf.bf_insns; 1285 slen = bpf_mfilter(fcode, m, pktlen); 1286 1287 if (slen == 0) 1288 continue; 1289 if (d->bd_fildrop != BPF_FILDROP_PASS) 1290 drop = 1; 1291 if (d->bd_fildrop != BPF_FILDROP_DROP) { 1292 if (!gottime) { 1293 if (ISSET(m->m_flags, M_PKTHDR)) 1294 m_microtime(m, &tv); 1295 else 1296 microtime(&tv); 1297 1298 gottime = 1; 1299 } 1300 1301 mtx_enter(&d->bd_mtx); 1302 bpf_catchpacket(d, (u_char *)m, pktlen, slen, cpfn, 1303 &tv); 1304 mtx_leave(&d->bd_mtx); 1305 } 1306 } 1307 smr_read_leave(); 1308 1309 return (drop); 1310 } 1311 1312 /* 1313 * Incoming linkage from device drivers, where a data buffer should be 1314 * prepended by an arbitrary header. In this situation we already have a 1315 * way of representing a chain of memory buffers, ie, mbufs, so reuse 1316 * the existing functionality by attaching the buffers to mbufs. 1317 * 1318 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a 1319 * struct m_hdr each for the header and data on the stack. 1320 */ 1321 int 1322 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen, 1323 const void *buf, unsigned int buflen, u_int direction) 1324 { 1325 struct m_hdr mh, md; 1326 struct mbuf *m0 = NULL; 1327 struct mbuf **mp = &m0; 1328 1329 if (hdr != NULL) { 1330 mh.mh_flags = 0; 1331 mh.mh_next = NULL; 1332 mh.mh_len = hdrlen; 1333 mh.mh_data = (void *)hdr; 1334 1335 *mp = (struct mbuf *)&mh; 1336 mp = &mh.mh_next; 1337 } 1338 1339 if (buf != NULL) { 1340 md.mh_flags = 0; 1341 md.mh_next = NULL; 1342 md.mh_len = buflen; 1343 md.mh_data = (void *)buf; 1344 1345 *mp = (struct mbuf *)&md; 1346 } 1347 1348 return _bpf_mtap(arg, m0, direction, bpf_mcopy); 1349 } 1350 1351 /* 1352 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1353 */ 1354 int 1355 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction) 1356 { 1357 return _bpf_mtap(arg, m, direction, NULL); 1358 } 1359 1360 /* 1361 * Incoming linkage from device drivers, where we have a mbuf chain 1362 * but need to prepend some arbitrary header from a linear buffer. 1363 * 1364 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1365 * struct m_hdr on the stack. This is safe as bpf only reads from the 1366 * fields in this header that we initialize, and will not try to free 1367 * it or keep a pointer to it. 1368 */ 1369 int 1370 bpf_mtap_hdr(caddr_t arg, caddr_t data, u_int dlen, const struct mbuf *m, 1371 u_int direction, void (*cpfn)(const void *, void *, size_t)) 1372 { 1373 struct m_hdr mh; 1374 const struct mbuf *m0; 1375 1376 if (dlen > 0) { 1377 mh.mh_flags = 0; 1378 mh.mh_next = (struct mbuf *)m; 1379 mh.mh_len = dlen; 1380 mh.mh_data = data; 1381 m0 = (struct mbuf *)&mh; 1382 } else 1383 m0 = m; 1384 1385 return _bpf_mtap(arg, m0, direction, cpfn); 1386 } 1387 1388 /* 1389 * Incoming linkage from device drivers, where we have a mbuf chain 1390 * but need to prepend the address family. 1391 * 1392 * Con up a minimal dummy header to pacify bpf. We allocate (only) a 1393 * struct m_hdr on the stack. This is safe as bpf only reads from the 1394 * fields in this header that we initialize, and will not try to free 1395 * it or keep a pointer to it. 1396 */ 1397 int 1398 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction) 1399 { 1400 u_int32_t afh; 1401 1402 afh = htonl(af); 1403 1404 return bpf_mtap_hdr(arg, (caddr_t)&afh, sizeof(afh), 1405 m, direction, NULL); 1406 } 1407 1408 /* 1409 * Incoming linkage from device drivers, where we have a mbuf chain 1410 * but need to prepend a VLAN encapsulation header. 1411 * 1412 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1413 * struct m_hdr on the stack. This is safe as bpf only reads from the 1414 * fields in this header that we initialize, and will not try to free 1415 * it or keep a pointer to it. 1416 */ 1417 int 1418 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction) 1419 { 1420 #if NVLAN > 0 1421 struct ether_vlan_header evh; 1422 struct m_hdr mh; 1423 uint8_t prio; 1424 1425 if ((m->m_flags & M_VLANTAG) == 0) 1426 #endif 1427 { 1428 return bpf_mtap(arg, m, direction); 1429 } 1430 1431 #if NVLAN > 0 1432 KASSERT(m->m_len >= ETHER_HDR_LEN); 1433 1434 prio = m->m_pkthdr.pf.prio; 1435 if (prio <= 1) 1436 prio = !prio; 1437 1438 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN); 1439 evh.evl_proto = evh.evl_encap_proto; 1440 evh.evl_encap_proto = htons(ETHERTYPE_VLAN); 1441 evh.evl_tag = htons(m->m_pkthdr.ether_vtag | 1442 (prio << EVL_PRIO_BITS)); 1443 1444 mh.mh_flags = 0; 1445 mh.mh_data = m->m_data + ETHER_HDR_LEN; 1446 mh.mh_len = m->m_len - ETHER_HDR_LEN; 1447 mh.mh_next = m->m_next; 1448 1449 return bpf_mtap_hdr(arg, (caddr_t)&evh, sizeof(evh), 1450 (struct mbuf *)&mh, direction, NULL); 1451 #endif 1452 } 1453 1454 /* 1455 * Move the packet data from interface memory (pkt) into the 1456 * store buffer. Wake up listeners if needed. 1457 * "copy" is the routine called to do the actual data 1458 * transfer. bcopy is passed in to copy contiguous chunks, while 1459 * bpf_mcopy is passed in to copy mbuf chains. In the latter case, 1460 * pkt is really an mbuf. 1461 */ 1462 void 1463 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, 1464 void (*cpfn)(const void *, void *, size_t), struct timeval *tv) 1465 { 1466 struct bpf_hdr *hp; 1467 int totlen, curlen; 1468 int hdrlen, do_wakeup = 0; 1469 1470 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1471 if (d->bd_bif == NULL) 1472 return; 1473 1474 hdrlen = d->bd_bif->bif_hdrlen; 1475 1476 /* 1477 * Figure out how many bytes to move. If the packet is 1478 * greater or equal to the snapshot length, transfer that 1479 * much. Otherwise, transfer the whole packet (unless 1480 * we hit the buffer size limit). 1481 */ 1482 totlen = hdrlen + min(snaplen, pktlen); 1483 if (totlen > d->bd_bufsize) 1484 totlen = d->bd_bufsize; 1485 1486 /* 1487 * Round up the end of the previous packet to the next longword. 1488 */ 1489 curlen = BPF_WORDALIGN(d->bd_slen); 1490 if (curlen + totlen > d->bd_bufsize) { 1491 /* 1492 * This packet will overflow the storage buffer. 1493 * Rotate the buffers if we can, then wakeup any 1494 * pending reads. 1495 */ 1496 if (d->bd_fbuf == NULL) { 1497 /* 1498 * We haven't completed the previous read yet, 1499 * so drop the packet. 1500 */ 1501 ++d->bd_dcount; 1502 return; 1503 } 1504 ROTATE_BUFFERS(d); 1505 do_wakeup = 1; 1506 curlen = 0; 1507 } 1508 1509 /* 1510 * Append the bpf header. 1511 */ 1512 hp = (struct bpf_hdr *)(d->bd_sbuf + curlen); 1513 hp->bh_tstamp.tv_sec = tv->tv_sec; 1514 hp->bh_tstamp.tv_usec = tv->tv_usec; 1515 hp->bh_datalen = pktlen; 1516 hp->bh_hdrlen = hdrlen; 1517 /* 1518 * Copy the packet data into the store buffer and update its length. 1519 */ 1520 (*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen)); 1521 d->bd_slen = curlen + totlen; 1522 1523 if (d->bd_immediate) { 1524 /* 1525 * Immediate mode is set. A packet arrived so any 1526 * reads should be woken up. 1527 */ 1528 do_wakeup = 1; 1529 } 1530 1531 if (d->bd_rdStart && d->bd_rdStart <= ULONG_MAX - d->bd_rtout && 1532 d->bd_rdStart + d->bd_rtout < ticks) { 1533 /* 1534 * we could be selecting on the bpf, and we 1535 * may have timeouts set. We got here by getting 1536 * a packet, so wake up the reader. 1537 */ 1538 if (d->bd_fbuf != NULL) { 1539 d->bd_rdStart = 0; 1540 ROTATE_BUFFERS(d); 1541 do_wakeup = 1; 1542 } 1543 } 1544 1545 if (do_wakeup) 1546 bpf_wakeup(d); 1547 } 1548 1549 /* 1550 * Initialize all nonzero fields of a descriptor. 1551 */ 1552 int 1553 bpf_allocbufs(struct bpf_d *d) 1554 { 1555 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1556 1557 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1558 if (d->bd_fbuf == NULL) 1559 return (ENOMEM); 1560 1561 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1562 if (d->bd_sbuf == NULL) { 1563 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); 1564 return (ENOMEM); 1565 } 1566 1567 d->bd_slen = 0; 1568 d->bd_hlen = 0; 1569 1570 return (0); 1571 } 1572 1573 void 1574 bpf_prog_smr(void *bps_arg) 1575 { 1576 struct bpf_program_smr *bps = bps_arg; 1577 1578 free(bps->bps_bf.bf_insns, M_DEVBUF, 1579 bps->bps_bf.bf_len * sizeof(struct bpf_insn)); 1580 free(bps, M_DEVBUF, sizeof(struct bpf_program_smr)); 1581 } 1582 1583 void 1584 bpf_d_smr(void *smr) 1585 { 1586 struct bpf_d *bd = smr; 1587 1588 free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize); 1589 free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize); 1590 free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize); 1591 1592 if (bd->bd_rfilter != NULL) 1593 bpf_prog_smr(bd->bd_rfilter); 1594 if (bd->bd_wfilter != NULL) 1595 bpf_prog_smr(bd->bd_wfilter); 1596 1597 free(bd, M_DEVBUF, sizeof(*bd)); 1598 } 1599 1600 void * 1601 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen) 1602 { 1603 struct bpf_if *bp; 1604 1605 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL) 1606 panic("bpfattach"); 1607 SMR_SLIST_INIT(&bp->bif_dlist); 1608 bp->bif_driverp = (struct bpf_if **)bpfp; 1609 bp->bif_name = name; 1610 bp->bif_ifp = NULL; 1611 bp->bif_dlt = dlt; 1612 1613 bp->bif_next = bpf_iflist; 1614 bpf_iflist = bp; 1615 1616 *bp->bif_driverp = NULL; 1617 1618 /* 1619 * Compute the length of the bpf header. This is not necessarily 1620 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1621 * that the network layer header begins on a longword boundary (for 1622 * performance reasons and to alleviate alignment restrictions). 1623 */ 1624 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1625 1626 return (bp); 1627 } 1628 1629 void 1630 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen) 1631 { 1632 struct bpf_if *bp; 1633 1634 bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen); 1635 bp->bif_ifp = ifp; 1636 } 1637 1638 /* Detach an interface from its attached bpf device. */ 1639 void 1640 bpfdetach(struct ifnet *ifp) 1641 { 1642 struct bpf_if *bp, *nbp, **pbp = &bpf_iflist; 1643 1644 KERNEL_ASSERT_LOCKED(); 1645 1646 for (bp = bpf_iflist; bp; bp = nbp) { 1647 nbp = bp->bif_next; 1648 if (bp->bif_ifp == ifp) { 1649 *pbp = nbp; 1650 1651 bpfsdetach(bp); 1652 } else 1653 pbp = &bp->bif_next; 1654 } 1655 ifp->if_bpf = NULL; 1656 } 1657 1658 void 1659 bpfsdetach(void *p) 1660 { 1661 struct bpf_if *bp = p; 1662 struct bpf_d *bd; 1663 int maj; 1664 1665 /* Locate the major number. */ 1666 for (maj = 0; maj < nchrdev; maj++) 1667 if (cdevsw[maj].d_open == bpfopen) 1668 break; 1669 1670 while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) 1671 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR); 1672 1673 free(bp, M_DEVBUF, sizeof(*bp)); 1674 } 1675 1676 int 1677 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp, 1678 void *newp, size_t newlen) 1679 { 1680 int newval; 1681 int error; 1682 1683 switch (name[0]) { 1684 case NET_BPF_BUFSIZE: 1685 newval = bpf_bufsize; 1686 error = sysctl_int(oldp, oldlenp, newp, newlen, &newval); 1687 if (error) 1688 return (error); 1689 if (newval < BPF_MINBUFSIZE || newval > bpf_maxbufsize) 1690 return (EINVAL); 1691 bpf_bufsize = newval; 1692 break; 1693 case NET_BPF_MAXBUFSIZE: 1694 newval = bpf_maxbufsize; 1695 error = sysctl_int(oldp, oldlenp, newp, newlen, &newval); 1696 if (error) 1697 return (error); 1698 if (newval < BPF_MINBUFSIZE) 1699 return (EINVAL); 1700 bpf_maxbufsize = newval; 1701 break; 1702 default: 1703 return (EOPNOTSUPP); 1704 } 1705 return (0); 1706 } 1707 1708 int 1709 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 1710 size_t newlen) 1711 { 1712 int flags = RW_INTR; 1713 int error; 1714 1715 if (namelen != 1) 1716 return (ENOTDIR); 1717 1718 flags |= (newp == NULL) ? RW_READ : RW_WRITE; 1719 1720 error = rw_enter(&bpf_sysctl_lk, flags); 1721 if (error != 0) 1722 return (error); 1723 1724 error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen); 1725 1726 rw_exit(&bpf_sysctl_lk); 1727 1728 return (error); 1729 } 1730 1731 struct bpf_d * 1732 bpfilter_lookup(int unit) 1733 { 1734 struct bpf_d *bd; 1735 1736 KERNEL_ASSERT_LOCKED(); 1737 1738 LIST_FOREACH(bd, &bpf_d_list, bd_list) 1739 if (bd->bd_unit == unit) 1740 return (bd); 1741 return (NULL); 1742 } 1743 1744 /* 1745 * Get a list of available data link type of the interface. 1746 */ 1747 int 1748 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1749 { 1750 int n, error; 1751 struct bpf_if *bp; 1752 const char *name; 1753 1754 name = d->bd_bif->bif_name; 1755 n = 0; 1756 error = 0; 1757 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1758 if (strcmp(name, bp->bif_name) != 0) 1759 continue; 1760 if (bfl->bfl_list != NULL) { 1761 if (n >= bfl->bfl_len) 1762 return (ENOMEM); 1763 error = copyout(&bp->bif_dlt, 1764 bfl->bfl_list + n, sizeof(u_int)); 1765 if (error) 1766 break; 1767 } 1768 n++; 1769 } 1770 1771 bfl->bfl_len = n; 1772 return (error); 1773 } 1774 1775 /* 1776 * Set the data link type of a BPF instance. 1777 */ 1778 int 1779 bpf_setdlt(struct bpf_d *d, u_int dlt) 1780 { 1781 const char *name; 1782 struct bpf_if *bp; 1783 1784 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1785 if (d->bd_bif->bif_dlt == dlt) 1786 return (0); 1787 name = d->bd_bif->bif_name; 1788 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1789 if (strcmp(name, bp->bif_name) != 0) 1790 continue; 1791 if (bp->bif_dlt == dlt) 1792 break; 1793 } 1794 if (bp == NULL) 1795 return (EINVAL); 1796 bpf_detachd(d); 1797 bpf_attachd(d, bp); 1798 bpf_resetd(d); 1799 return (0); 1800 } 1801 1802 u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *); 1803 u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *); 1804 u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *); 1805 1806 int bpf_mbuf_copy(const struct mbuf *, u_int32_t, 1807 void *, u_int32_t); 1808 1809 const struct bpf_ops bpf_mbuf_ops = { 1810 bpf_mbuf_ldw, 1811 bpf_mbuf_ldh, 1812 bpf_mbuf_ldb, 1813 }; 1814 1815 int 1816 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len) 1817 { 1818 u_int8_t *cp = buf; 1819 u_int32_t count; 1820 1821 while (off >= m->m_len) { 1822 off -= m->m_len; 1823 1824 m = m->m_next; 1825 if (m == NULL) 1826 return (-1); 1827 } 1828 1829 for (;;) { 1830 count = min(m->m_len - off, len); 1831 1832 memcpy(cp, m->m_data + off, count); 1833 len -= count; 1834 1835 if (len == 0) 1836 return (0); 1837 1838 m = m->m_next; 1839 if (m == NULL) 1840 break; 1841 1842 cp += count; 1843 off = 0; 1844 } 1845 1846 return (-1); 1847 } 1848 1849 u_int32_t 1850 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err) 1851 { 1852 u_int32_t v; 1853 1854 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1855 *err = 1; 1856 return (0); 1857 } 1858 1859 *err = 0; 1860 return ntohl(v); 1861 } 1862 1863 u_int32_t 1864 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err) 1865 { 1866 u_int16_t v; 1867 1868 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1869 *err = 1; 1870 return (0); 1871 } 1872 1873 *err = 0; 1874 return ntohs(v); 1875 } 1876 1877 u_int32_t 1878 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err) 1879 { 1880 const struct mbuf *m = m0; 1881 u_int8_t v; 1882 1883 while (k >= m->m_len) { 1884 k -= m->m_len; 1885 1886 m = m->m_next; 1887 if (m == NULL) { 1888 *err = 1; 1889 return (0); 1890 } 1891 } 1892 v = m->m_data[k]; 1893 1894 *err = 0; 1895 return v; 1896 } 1897 1898 u_int 1899 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen) 1900 { 1901 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen); 1902 } 1903