1 /* $OpenBSD: bpf.c,v 1.230 2025/01/19 03:27:27 dlg Exp $ */ 2 /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org> 8 * 9 * This code is derived from the Stanford/CMU enet packet filter, 10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 12 * Berkeley Laboratory. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94 39 */ 40 41 #include "bpfilter.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/ioctl.h> 49 #include <sys/conf.h> 50 #include <sys/vnode.h> 51 #include <sys/fcntl.h> 52 #include <sys/socket.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/rwlock.h> 56 #include <sys/atomic.h> 57 #include <sys/event.h> 58 #include <sys/mutex.h> 59 #include <sys/refcnt.h> 60 #include <sys/smr.h> 61 #include <sys/specdev.h> 62 #include <sys/sigio.h> 63 #include <sys/task.h> 64 #include <sys/time.h> 65 66 #include <net/if.h> 67 #include <net/bpf.h> 68 #include <net/bpfdesc.h> 69 70 #include <netinet/in.h> 71 #include <netinet/if_ether.h> 72 73 #include "vlan.h" 74 #if NVLAN > 0 75 #include <net/if_vlan_var.h> 76 #endif 77 78 #define BPF_BUFSIZE 32768 79 80 #define BPF_S_IDLE 0 81 #define BPF_S_WAIT 1 82 #define BPF_S_DONE 2 83 84 #define PRINET 26 /* interruptible */ 85 86 /* 87 * Locks used to protect data: 88 * a atomic 89 */ 90 91 /* 92 * The default read buffer size is patchable. 93 */ 94 int bpf_bufsize = BPF_BUFSIZE; /* [a] */ 95 int bpf_maxbufsize = BPF_MAXBUFSIZE; /* [a] */ 96 97 /* 98 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 99 * bpf_d_list is the list of descriptors 100 */ 101 TAILQ_HEAD(, bpf_if) bpf_iflist = TAILQ_HEAD_INITIALIZER(bpf_iflist); 102 LIST_HEAD(, bpf_d) bpf_d_list = LIST_HEAD_INITIALIZER(bpf_d_list); 103 104 int bpf_allocbufs(struct bpf_d *); 105 void bpf_ifname(struct bpf_if*, struct ifreq *); 106 void bpf_mcopy(const void *, void *, size_t); 107 int bpf_movein(struct uio *, struct bpf_d *, struct mbuf **, 108 struct sockaddr *); 109 int bpf_setif(struct bpf_d *, struct ifreq *); 110 int bpfkqfilter(dev_t, struct knote *); 111 void bpf_wakeup(struct bpf_d *); 112 void bpf_wakeup_cb(void *); 113 void bpf_wait_cb(void *); 114 int _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int); 115 void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, 116 const struct bpf_hdr *); 117 int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 118 int bpf_setdlt(struct bpf_d *, u_int); 119 120 void filt_bpfrdetach(struct knote *); 121 int filt_bpfread(struct knote *, long); 122 int filt_bpfreadmodify(struct kevent *, struct knote *); 123 int filt_bpfreadprocess(struct knote *, struct kevent *); 124 125 struct bpf_d *bpfilter_lookup(int); 126 127 /* 128 * Called holding ``bd_mtx''. 129 */ 130 void bpf_attachd(struct bpf_d *, struct bpf_if *); 131 void bpf_detachd(struct bpf_d *); 132 void bpf_resetd(struct bpf_d *); 133 134 void bpf_prog_smr(void *); 135 void bpf_d_smr(void *); 136 137 /* 138 * Reference count access to descriptor buffers 139 */ 140 void bpf_get(struct bpf_d *); 141 void bpf_put(struct bpf_d *); 142 143 int 144 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp, 145 struct sockaddr *sockp) 146 { 147 struct bpf_program_smr *bps; 148 struct bpf_insn *fcode = NULL; 149 struct mbuf *m; 150 struct m_tag *mtag; 151 int error; 152 u_int hlen, alen, mlen; 153 u_int len; 154 u_int linktype; 155 u_int slen; 156 157 /* 158 * Build a sockaddr based on the data link layer type. 159 * We do this at this level because the ethernet header 160 * is copied directly into the data field of the sockaddr. 161 * In the case of SLIP, there is no header and the packet 162 * is forwarded as is. 163 * Also, we are careful to leave room at the front of the mbuf 164 * for the link level header. 165 */ 166 linktype = d->bd_bif->bif_dlt; 167 switch (linktype) { 168 169 case DLT_SLIP: 170 sockp->sa_family = AF_INET; 171 hlen = 0; 172 break; 173 174 case DLT_PPP: 175 sockp->sa_family = AF_UNSPEC; 176 hlen = 0; 177 break; 178 179 case DLT_EN10MB: 180 sockp->sa_family = AF_UNSPEC; 181 /* XXX Would MAXLINKHDR be better? */ 182 hlen = ETHER_HDR_LEN; 183 break; 184 185 case DLT_IEEE802_11: 186 case DLT_IEEE802_11_RADIO: 187 sockp->sa_family = AF_UNSPEC; 188 hlen = 0; 189 break; 190 191 case DLT_RAW: 192 case DLT_NULL: 193 sockp->sa_family = AF_UNSPEC; 194 hlen = 0; 195 break; 196 197 case DLT_LOOP: 198 sockp->sa_family = AF_UNSPEC; 199 hlen = sizeof(u_int32_t); 200 break; 201 202 default: 203 return (EIO); 204 } 205 206 if (uio->uio_resid > MAXMCLBYTES) 207 return (EMSGSIZE); 208 len = uio->uio_resid; 209 if (len < hlen) 210 return (EINVAL); 211 212 /* 213 * Get the length of the payload so we can align it properly. 214 */ 215 alen = len - hlen; 216 217 /* 218 * Allocate enough space for headers and the aligned payload. 219 */ 220 mlen = max(max_linkhdr, hlen) + roundup(alen, sizeof(long)); 221 if (mlen > MAXMCLBYTES) 222 return (EMSGSIZE); 223 224 MGETHDR(m, M_WAIT, MT_DATA); 225 if (mlen > MHLEN) { 226 MCLGETL(m, M_WAIT, mlen); 227 if ((m->m_flags & M_EXT) == 0) { 228 error = ENOBUFS; 229 goto bad; 230 } 231 } 232 233 m_align(m, alen); /* Align the payload. */ 234 m->m_data -= hlen; 235 236 m->m_pkthdr.ph_ifidx = 0; 237 m->m_pkthdr.len = len; 238 m->m_len = len; 239 240 error = uiomove(mtod(m, caddr_t), len, uio); 241 if (error) 242 goto bad; 243 244 smr_read_enter(); 245 bps = SMR_PTR_GET(&d->bd_wfilter); 246 if (bps != NULL) 247 fcode = bps->bps_bf.bf_insns; 248 slen = bpf_filter(fcode, mtod(m, u_char *), len, len); 249 smr_read_leave(); 250 251 if (slen < len) { 252 error = EPERM; 253 goto bad; 254 } 255 256 /* 257 * Make room for link header, and copy it to sockaddr 258 */ 259 if (hlen != 0) { 260 if (linktype == DLT_LOOP) { 261 u_int32_t af; 262 263 /* the link header indicates the address family */ 264 KASSERT(hlen == sizeof(u_int32_t)); 265 memcpy(&af, m->m_data, hlen); 266 sockp->sa_family = ntohl(af); 267 } else 268 memcpy(sockp->sa_data, m->m_data, hlen); 269 270 m->m_pkthdr.len -= hlen; 271 m->m_len -= hlen; 272 m->m_data += hlen; 273 } 274 275 /* 276 * Prepend the data link type as a mbuf tag 277 */ 278 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT); 279 *(u_int *)(mtag + 1) = linktype; 280 m_tag_prepend(m, mtag); 281 282 *mp = m; 283 return (0); 284 bad: 285 m_freem(m); 286 return (error); 287 } 288 289 /* 290 * Attach file to the bpf interface, i.e. make d listen on bp. 291 */ 292 void 293 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 294 { 295 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 296 297 /* 298 * Point d at bp, and add d to the interface's list of listeners. 299 * Finally, point the driver's bpf cookie at the interface so 300 * it will divert packets to bpf. 301 */ 302 303 d->bd_bif = bp; 304 305 KERNEL_ASSERT_LOCKED(); 306 SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next); 307 308 *bp->bif_driverp = bp; 309 } 310 311 /* 312 * Detach a file from its interface. 313 */ 314 void 315 bpf_detachd(struct bpf_d *d) 316 { 317 struct bpf_if *bp; 318 319 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 320 321 bp = d->bd_bif; 322 /* Not attached. */ 323 if (bp == NULL) 324 return; 325 326 /* Remove ``d'' from the interface's descriptor list. */ 327 KERNEL_ASSERT_LOCKED(); 328 SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next); 329 330 if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) { 331 /* 332 * Let the driver know that there are no more listeners. 333 */ 334 *bp->bif_driverp = NULL; 335 } 336 337 d->bd_bif = NULL; 338 339 /* 340 * Check if this descriptor had requested promiscuous mode. 341 * If so, turn it off. 342 */ 343 if (d->bd_promisc) { 344 int error; 345 346 KASSERT(bp->bif_ifp != NULL); 347 348 d->bd_promisc = 0; 349 350 bpf_get(d); 351 mtx_leave(&d->bd_mtx); 352 NET_LOCK(); 353 error = ifpromisc(bp->bif_ifp, 0); 354 NET_UNLOCK(); 355 mtx_enter(&d->bd_mtx); 356 bpf_put(d); 357 358 if (error && !(error == EINVAL || error == ENODEV || 359 error == ENXIO)) 360 /* 361 * Something is really wrong if we were able to put 362 * the driver into promiscuous mode, but can't 363 * take it out. 364 */ 365 panic("bpf: ifpromisc failed"); 366 } 367 } 368 369 void 370 bpfilterattach(int n) 371 { 372 } 373 374 /* 375 * Open ethernet device. Returns ENXIO for illegal minor device number, 376 * EBUSY if file is open by another process. 377 */ 378 int 379 bpfopen(dev_t dev, int flag, int mode, struct proc *p) 380 { 381 struct bpf_d *bd; 382 int unit = minor(dev); 383 384 if (unit & ((1 << CLONE_SHIFT) - 1)) 385 return (ENXIO); 386 387 KASSERT(bpfilter_lookup(unit) == NULL); 388 389 /* create on demand */ 390 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 391 return (EBUSY); 392 393 /* Mark "free" and do most initialization. */ 394 bd->bd_unit = unit; 395 bd->bd_bufsize = atomic_load_int(&bpf_bufsize); 396 bd->bd_sig = SIGIO; 397 mtx_init(&bd->bd_mtx, IPL_NET); 398 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd); 399 timeout_set(&bd->bd_wait_tmo, bpf_wait_cb, bd); 400 smr_init(&bd->bd_smr); 401 sigio_init(&bd->bd_sigio); 402 klist_init_mutex(&bd->bd_klist, &bd->bd_mtx); 403 404 bd->bd_rtout = 0; /* no timeout by default */ 405 bd->bd_wtout = INFSLP; /* wait for the buffer to fill by default */ 406 407 refcnt_init(&bd->bd_refcnt); 408 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list); 409 410 return (0); 411 } 412 413 /* 414 * Close the descriptor by detaching it from its interface, 415 * deallocating its buffers, and marking it free. 416 */ 417 int 418 bpfclose(dev_t dev, int flag, int mode, struct proc *p) 419 { 420 struct bpf_d *d; 421 422 d = bpfilter_lookup(minor(dev)); 423 mtx_enter(&d->bd_mtx); 424 bpf_detachd(d); 425 bpf_wakeup(d); 426 LIST_REMOVE(d, bd_list); 427 mtx_leave(&d->bd_mtx); 428 bpf_put(d); 429 430 return (0); 431 } 432 433 /* 434 * Rotate the packet buffers in descriptor d. Move the store buffer 435 * into the hold slot, and the free buffer into the store slot. 436 * Zero the length of the new store buffer. 437 */ 438 #define ROTATE_BUFFERS(d) \ 439 KASSERT(d->bd_in_uiomove == 0); \ 440 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \ 441 (d)->bd_hbuf = (d)->bd_sbuf; \ 442 (d)->bd_hlen = (d)->bd_slen; \ 443 (d)->bd_sbuf = (d)->bd_fbuf; \ 444 (d)->bd_state = BPF_S_IDLE; \ 445 (d)->bd_slen = 0; \ 446 (d)->bd_fbuf = NULL; 447 448 /* 449 * bpfread - read next chunk of packets from buffers 450 */ 451 int 452 bpfread(dev_t dev, struct uio *uio, int ioflag) 453 { 454 uint64_t end, now; 455 struct bpf_d *d; 456 caddr_t hbuf; 457 int error, hlen; 458 459 KERNEL_ASSERT_LOCKED(); 460 461 d = bpfilter_lookup(minor(dev)); 462 if (d->bd_bif == NULL) 463 return (ENXIO); 464 465 bpf_get(d); 466 mtx_enter(&d->bd_mtx); 467 468 /* 469 * Restrict application to use a buffer the same size as 470 * as kernel buffers. 471 */ 472 if (uio->uio_resid != d->bd_bufsize) { 473 error = EINVAL; 474 goto out; 475 } 476 477 /* 478 * If there's a timeout, mark when the read should end. 479 */ 480 if (d->bd_rtout != 0) { 481 now = nsecuptime(); 482 end = now + d->bd_rtout; 483 if (end < now) 484 end = UINT64_MAX; 485 } 486 487 /* 488 * If the hold buffer is empty, then do a timed sleep, which 489 * ends when the timeout expires or when enough packets 490 * have arrived to fill the store buffer. 491 */ 492 while (d->bd_hbuf == NULL) { 493 if (d->bd_bif == NULL) { 494 /* interface is gone */ 495 if (d->bd_slen == 0) { 496 error = EIO; 497 goto out; 498 } 499 ROTATE_BUFFERS(d); 500 break; 501 } 502 if (d->bd_state == BPF_S_DONE) { 503 /* 504 * A packet(s) either arrived since the previous 505 * read or arrived while we were asleep. 506 * Rotate the buffers and return what's here. 507 */ 508 ROTATE_BUFFERS(d); 509 break; 510 } 511 if (ISSET(ioflag, IO_NDELAY)) { 512 /* User requested non-blocking I/O */ 513 error = EWOULDBLOCK; 514 } else if (d->bd_rtout == 0) { 515 /* No read timeout set. */ 516 d->bd_nreaders++; 517 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 518 "bpf", INFSLP); 519 d->bd_nreaders--; 520 } else if ((now = nsecuptime()) < end) { 521 /* Read timeout has not expired yet. */ 522 d->bd_nreaders++; 523 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 524 "bpf", end - now); 525 d->bd_nreaders--; 526 } else { 527 /* Read timeout has expired. */ 528 error = EWOULDBLOCK; 529 } 530 if (error == EINTR || error == ERESTART) 531 goto out; 532 if (error == EWOULDBLOCK) { 533 /* 534 * On a timeout, return what's in the buffer, 535 * which may be nothing. If there is something 536 * in the store buffer, we can rotate the buffers. 537 */ 538 if (d->bd_hbuf != NULL) 539 /* 540 * We filled up the buffer in between 541 * getting the timeout and arriving 542 * here, so we don't need to rotate. 543 */ 544 break; 545 546 if (d->bd_slen == 0) { 547 error = 0; 548 goto out; 549 } 550 ROTATE_BUFFERS(d); 551 break; 552 } 553 } 554 /* 555 * At this point, we know we have something in the hold slot. 556 */ 557 hbuf = d->bd_hbuf; 558 hlen = d->bd_hlen; 559 d->bd_hbuf = NULL; 560 d->bd_hlen = 0; 561 d->bd_fbuf = NULL; 562 d->bd_in_uiomove = 1; 563 564 /* 565 * Move data from hold buffer into user space. 566 * We know the entire buffer is transferred since 567 * we checked above that the read buffer is bpf_bufsize bytes. 568 */ 569 mtx_leave(&d->bd_mtx); 570 error = uiomove(hbuf, hlen, uio); 571 mtx_enter(&d->bd_mtx); 572 573 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */ 574 KASSERT(d->bd_fbuf == NULL); 575 KASSERT(d->bd_hbuf == NULL); 576 d->bd_fbuf = hbuf; 577 d->bd_in_uiomove = 0; 578 out: 579 mtx_leave(&d->bd_mtx); 580 bpf_put(d); 581 582 return (error); 583 } 584 585 /* 586 * If there are processes sleeping on this descriptor, wake them up. 587 */ 588 void 589 bpf_wakeup(struct bpf_d *d) 590 { 591 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 592 593 if (d->bd_nreaders) 594 wakeup(d); 595 596 knote_locked(&d->bd_klist, 0); 597 598 /* 599 * As long as pgsigio() needs to be protected 600 * by the KERNEL_LOCK() we have to delay the wakeup to 601 * another context to keep the hot path KERNEL_LOCK()-free. 602 */ 603 if (d->bd_async && d->bd_sig) { 604 bpf_get(d); 605 if (!task_add(systq, &d->bd_wake_task)) 606 bpf_put(d); 607 } 608 } 609 610 void 611 bpf_wakeup_cb(void *xd) 612 { 613 struct bpf_d *d = xd; 614 615 if (d->bd_async && d->bd_sig) 616 pgsigio(&d->bd_sigio, d->bd_sig, 0); 617 618 bpf_put(d); 619 } 620 621 void 622 bpf_wait_cb(void *xd) 623 { 624 struct bpf_d *d = xd; 625 626 mtx_enter(&d->bd_mtx); 627 if (d->bd_state == BPF_S_WAIT) { 628 d->bd_state = BPF_S_DONE; 629 bpf_wakeup(d); 630 } 631 mtx_leave(&d->bd_mtx); 632 633 bpf_put(d); 634 } 635 636 int 637 bpfwrite(dev_t dev, struct uio *uio, int ioflag) 638 { 639 struct bpf_d *d; 640 struct ifnet *ifp; 641 struct mbuf *m; 642 int error; 643 struct sockaddr_storage dst; 644 645 KERNEL_ASSERT_LOCKED(); 646 647 d = bpfilter_lookup(minor(dev)); 648 if (d->bd_bif == NULL) 649 return (ENXIO); 650 651 bpf_get(d); 652 ifp = d->bd_bif->bif_ifp; 653 654 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) { 655 error = ENETDOWN; 656 goto out; 657 } 658 659 if (uio->uio_resid == 0) { 660 error = 0; 661 goto out; 662 } 663 664 error = bpf_movein(uio, d, &m, sstosa(&dst)); 665 if (error) 666 goto out; 667 668 if (m->m_pkthdr.len > ifp->if_mtu) { 669 m_freem(m); 670 error = EMSGSIZE; 671 goto out; 672 } 673 674 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 675 m->m_pkthdr.pf.prio = ifp->if_llprio; 676 677 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC) 678 dst.ss_family = pseudo_AF_HDRCMPLT; 679 680 NET_LOCK(); 681 error = ifp->if_output(ifp, m, sstosa(&dst), NULL); 682 NET_UNLOCK(); 683 684 out: 685 bpf_put(d); 686 return (error); 687 } 688 689 /* 690 * Reset a descriptor by flushing its packet buffer and clearing the 691 * receive and drop counts. 692 */ 693 void 694 bpf_resetd(struct bpf_d *d) 695 { 696 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 697 KASSERT(d->bd_in_uiomove == 0); 698 699 if (timeout_del(&d->bd_wait_tmo)) 700 bpf_put(d); 701 702 if (d->bd_hbuf != NULL) { 703 /* Free the hold buffer. */ 704 d->bd_fbuf = d->bd_hbuf; 705 d->bd_hbuf = NULL; 706 } 707 d->bd_state = BPF_S_IDLE; 708 d->bd_slen = 0; 709 d->bd_hlen = 0; 710 d->bd_rcount = 0; 711 d->bd_dcount = 0; 712 } 713 714 static int 715 bpf_set_wtout(struct bpf_d *d, uint64_t wtout) 716 { 717 mtx_enter(&d->bd_mtx); 718 d->bd_wtout = wtout; 719 mtx_leave(&d->bd_mtx); 720 721 return (0); 722 } 723 724 static int 725 bpf_set_wtimeout(struct bpf_d *d, const struct timeval *tv) 726 { 727 uint64_t nsec; 728 729 if (tv->tv_sec < 0 || !timerisvalid(tv)) 730 return (EINVAL); 731 732 nsec = TIMEVAL_TO_NSEC(tv); 733 if (nsec > SEC_TO_NSEC(300)) 734 return (EINVAL); 735 if (nsec > MAXTSLP) 736 return (EOVERFLOW); 737 738 return (bpf_set_wtout(d, nsec)); 739 } 740 741 static int 742 bpf_get_wtimeout(struct bpf_d *d, struct timeval *tv) 743 { 744 uint64_t nsec; 745 746 mtx_enter(&d->bd_mtx); 747 nsec = d->bd_wtout; 748 mtx_leave(&d->bd_mtx); 749 750 if (nsec == INFSLP) 751 return (ENXIO); 752 753 memset(tv, 0, sizeof(*tv)); 754 NSEC_TO_TIMEVAL(nsec, tv); 755 756 return (0); 757 } 758 759 /* 760 * FIONREAD Check for read packet available. 761 * BIOCGBLEN Get buffer len [for read()]. 762 * BIOCSETF Set read filter. 763 * BIOCSETFNR Set read filter without resetting descriptor. 764 * BIOCFLUSH Flush read packet buffer. 765 * BIOCPROMISC Put interface into promiscuous mode. 766 * BIOCGDLTLIST Get supported link layer types. 767 * BIOCGDLT Get link layer type. 768 * BIOCSDLT Set link layer type. 769 * BIOCGETIF Get interface name. 770 * BIOCSETIF Set interface. 771 * BIOCSRTIMEOUT Set read timeout. 772 * BIOCGRTIMEOUT Get read timeout. 773 * BIOCSWTIMEOUT Set wait timeout. 774 * BIOCGWTIMEOUT Get wait timeout. 775 * BIOCDWTIMEOUT Del wait timeout. 776 * BIOCGSTATS Get packet stats. 777 * BIOCIMMEDIATE Set immediate mode. 778 * BIOCVERSION Get filter language version. 779 * BIOCGHDRCMPLT Get "header already complete" flag 780 * BIOCSHDRCMPLT Set "header already complete" flag 781 */ 782 int 783 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 784 { 785 struct bpf_d *d; 786 int error = 0; 787 788 d = bpfilter_lookup(minor(dev)); 789 if (d->bd_locked && suser(p) != 0) { 790 /* list of allowed ioctls when locked and not root */ 791 switch (cmd) { 792 case BIOCGBLEN: 793 case BIOCFLUSH: 794 case BIOCGDLT: 795 case BIOCGDLTLIST: 796 case BIOCGETIF: 797 case BIOCGRTIMEOUT: 798 case BIOCGWTIMEOUT: 799 case BIOCGSTATS: 800 case BIOCVERSION: 801 case BIOCGRSIG: 802 case BIOCGHDRCMPLT: 803 case FIONREAD: 804 case BIOCLOCK: 805 case BIOCSRTIMEOUT: 806 case BIOCSWTIMEOUT: 807 case BIOCDWTIMEOUT: 808 case BIOCIMMEDIATE: 809 case TIOCGPGRP: 810 case BIOCGDIRFILT: 811 break; 812 default: 813 return (EPERM); 814 } 815 } 816 817 bpf_get(d); 818 819 switch (cmd) { 820 default: 821 error = EINVAL; 822 break; 823 824 /* 825 * Check for read packet available. 826 */ 827 case FIONREAD: 828 { 829 int n; 830 831 mtx_enter(&d->bd_mtx); 832 n = d->bd_slen; 833 if (d->bd_hbuf != NULL) 834 n += d->bd_hlen; 835 mtx_leave(&d->bd_mtx); 836 837 *(int *)addr = n; 838 break; 839 } 840 841 /* 842 * Get buffer len [for read()]. 843 */ 844 case BIOCGBLEN: 845 *(u_int *)addr = d->bd_bufsize; 846 break; 847 848 /* 849 * Set buffer length. 850 */ 851 case BIOCSBLEN: 852 if (d->bd_bif != NULL) 853 error = EINVAL; 854 else { 855 u_int size = *(u_int *)addr; 856 int bpf_maxbufsize_local = 857 atomic_load_int(&bpf_maxbufsize); 858 859 if (size > bpf_maxbufsize_local) 860 *(u_int *)addr = size = bpf_maxbufsize_local; 861 else if (size < BPF_MINBUFSIZE) 862 *(u_int *)addr = size = BPF_MINBUFSIZE; 863 mtx_enter(&d->bd_mtx); 864 d->bd_bufsize = size; 865 mtx_leave(&d->bd_mtx); 866 } 867 break; 868 869 /* 870 * Set link layer read/write filter. 871 */ 872 case BIOCSETF: 873 case BIOCSETFNR: 874 case BIOCSETWF: 875 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 876 break; 877 878 /* 879 * Flush read packet buffer. 880 */ 881 case BIOCFLUSH: 882 mtx_enter(&d->bd_mtx); 883 bpf_resetd(d); 884 mtx_leave(&d->bd_mtx); 885 break; 886 887 /* 888 * Put interface into promiscuous mode. 889 */ 890 case BIOCPROMISC: 891 if (d->bd_bif == NULL) { 892 /* 893 * No interface attached yet. 894 */ 895 error = EINVAL; 896 } else if (d->bd_bif->bif_ifp != NULL) { 897 if (d->bd_promisc == 0) { 898 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx); 899 NET_LOCK(); 900 error = ifpromisc(d->bd_bif->bif_ifp, 1); 901 NET_UNLOCK(); 902 if (error == 0) 903 d->bd_promisc = 1; 904 } 905 } 906 break; 907 908 /* 909 * Get a list of supported device parameters. 910 */ 911 case BIOCGDLTLIST: 912 if (d->bd_bif == NULL) 913 error = EINVAL; 914 else 915 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 916 break; 917 918 /* 919 * Get device parameters. 920 */ 921 case BIOCGDLT: 922 if (d->bd_bif == NULL) 923 error = EINVAL; 924 else 925 *(u_int *)addr = d->bd_bif->bif_dlt; 926 break; 927 928 /* 929 * Set device parameters. 930 */ 931 case BIOCSDLT: 932 if (d->bd_bif == NULL) 933 error = EINVAL; 934 else { 935 mtx_enter(&d->bd_mtx); 936 error = bpf_setdlt(d, *(u_int *)addr); 937 mtx_leave(&d->bd_mtx); 938 } 939 break; 940 941 /* 942 * Set interface name. 943 */ 944 case BIOCGETIF: 945 if (d->bd_bif == NULL) 946 error = EINVAL; 947 else 948 bpf_ifname(d->bd_bif, (struct ifreq *)addr); 949 break; 950 951 /* 952 * Set interface. 953 */ 954 case BIOCSETIF: 955 error = bpf_setif(d, (struct ifreq *)addr); 956 break; 957 958 /* 959 * Set read timeout. 960 */ 961 case BIOCSRTIMEOUT: 962 { 963 struct timeval *tv = (struct timeval *)addr; 964 uint64_t rtout; 965 966 if (tv->tv_sec < 0 || !timerisvalid(tv)) { 967 error = EINVAL; 968 break; 969 } 970 rtout = TIMEVAL_TO_NSEC(tv); 971 if (rtout > MAXTSLP) { 972 error = EOVERFLOW; 973 break; 974 } 975 mtx_enter(&d->bd_mtx); 976 d->bd_rtout = rtout; 977 mtx_leave(&d->bd_mtx); 978 break; 979 } 980 981 /* 982 * Get read timeout. 983 */ 984 case BIOCGRTIMEOUT: 985 { 986 struct timeval *tv = (struct timeval *)addr; 987 988 memset(tv, 0, sizeof(*tv)); 989 mtx_enter(&d->bd_mtx); 990 NSEC_TO_TIMEVAL(d->bd_rtout, tv); 991 mtx_leave(&d->bd_mtx); 992 break; 993 } 994 995 /* 996 * Get packet stats. 997 */ 998 case BIOCGSTATS: 999 { 1000 struct bpf_stat *bs = (struct bpf_stat *)addr; 1001 1002 bs->bs_recv = d->bd_rcount; 1003 bs->bs_drop = d->bd_dcount; 1004 break; 1005 } 1006 1007 /* 1008 * Set immediate mode. 1009 */ 1010 case BIOCIMMEDIATE: 1011 error = bpf_set_wtout(d, *(int *)addr ? 0 : INFSLP); 1012 break; 1013 1014 /* 1015 * Wait timeout. 1016 */ 1017 case BIOCSWTIMEOUT: 1018 error = bpf_set_wtimeout(d, (const struct timeval *)addr); 1019 break; 1020 case BIOCGWTIMEOUT: 1021 error = bpf_get_wtimeout(d, (struct timeval *)addr); 1022 break; 1023 case BIOCDWTIMEOUT: 1024 error = bpf_set_wtout(d, INFSLP); 1025 break; 1026 1027 case BIOCVERSION: 1028 { 1029 struct bpf_version *bv = (struct bpf_version *)addr; 1030 1031 bv->bv_major = BPF_MAJOR_VERSION; 1032 bv->bv_minor = BPF_MINOR_VERSION; 1033 break; 1034 } 1035 1036 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 1037 *(u_int *)addr = d->bd_hdrcmplt; 1038 break; 1039 1040 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 1041 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1042 break; 1043 1044 case BIOCLOCK: /* set "locked" flag (no reset) */ 1045 d->bd_locked = 1; 1046 break; 1047 1048 case BIOCGFILDROP: /* get "filter-drop" flag */ 1049 *(u_int *)addr = d->bd_fildrop; 1050 break; 1051 1052 case BIOCSFILDROP: { /* set "filter-drop" flag */ 1053 unsigned int fildrop = *(u_int *)addr; 1054 switch (fildrop) { 1055 case BPF_FILDROP_PASS: 1056 case BPF_FILDROP_CAPTURE: 1057 case BPF_FILDROP_DROP: 1058 d->bd_fildrop = fildrop; 1059 break; 1060 default: 1061 error = EINVAL; 1062 break; 1063 } 1064 break; 1065 } 1066 1067 case BIOCGDIRFILT: /* get direction filter */ 1068 *(u_int *)addr = d->bd_dirfilt; 1069 break; 1070 1071 case BIOCSDIRFILT: /* set direction filter */ 1072 d->bd_dirfilt = (*(u_int *)addr) & 1073 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT); 1074 break; 1075 1076 case FIOASYNC: /* Send signal on receive packets */ 1077 d->bd_async = *(int *)addr; 1078 break; 1079 1080 case FIOSETOWN: /* Process or group to send signals to */ 1081 case TIOCSPGRP: 1082 error = sigio_setown(&d->bd_sigio, cmd, addr); 1083 break; 1084 1085 case FIOGETOWN: 1086 case TIOCGPGRP: 1087 sigio_getown(&d->bd_sigio, cmd, addr); 1088 break; 1089 1090 case BIOCSRSIG: /* Set receive signal */ 1091 { 1092 u_int sig; 1093 1094 sig = *(u_int *)addr; 1095 1096 if (sig >= NSIG) 1097 error = EINVAL; 1098 else 1099 d->bd_sig = sig; 1100 break; 1101 } 1102 case BIOCGRSIG: 1103 *(u_int *)addr = d->bd_sig; 1104 break; 1105 } 1106 1107 bpf_put(d); 1108 return (error); 1109 } 1110 1111 /* 1112 * Set d's packet filter program to fp. If this file already has a filter, 1113 * free it and replace it. Returns EINVAL for bogus requests. 1114 */ 1115 int 1116 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1117 { 1118 struct bpf_program_smr *bps, *old_bps; 1119 struct bpf_insn *fcode; 1120 u_int flen, size; 1121 1122 KERNEL_ASSERT_LOCKED(); 1123 1124 if (fp->bf_insns == 0) { 1125 if (fp->bf_len != 0) 1126 return (EINVAL); 1127 bps = NULL; 1128 } else { 1129 flen = fp->bf_len; 1130 if (flen > BPF_MAXINSNS) 1131 return (EINVAL); 1132 1133 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF, 1134 M_WAITOK | M_CANFAIL); 1135 if (fcode == NULL) 1136 return (ENOMEM); 1137 1138 size = flen * sizeof(*fp->bf_insns); 1139 if (copyin(fp->bf_insns, fcode, size) != 0 || 1140 bpf_validate(fcode, (int)flen) == 0) { 1141 free(fcode, M_DEVBUF, size); 1142 return (EINVAL); 1143 } 1144 1145 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK); 1146 smr_init(&bps->bps_smr); 1147 bps->bps_bf.bf_len = flen; 1148 bps->bps_bf.bf_insns = fcode; 1149 } 1150 1151 if (cmd != BIOCSETWF) { 1152 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter); 1153 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps); 1154 } else { 1155 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter); 1156 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps); 1157 } 1158 1159 if (cmd == BIOCSETF) { 1160 mtx_enter(&d->bd_mtx); 1161 bpf_resetd(d); 1162 mtx_leave(&d->bd_mtx); 1163 } 1164 1165 if (old_bps != NULL) 1166 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps); 1167 1168 return (0); 1169 } 1170 1171 /* 1172 * Detach a file from its current interface (if attached at all) and attach 1173 * to the interface indicated by the name stored in ifr. 1174 * Return an errno or 0. 1175 */ 1176 int 1177 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1178 { 1179 struct bpf_if *bp; 1180 int error = 0; 1181 1182 /* 1183 * Look through attached interfaces for the named one. 1184 */ 1185 TAILQ_FOREACH(bp, &bpf_iflist, bif_next) { 1186 if (strcmp(bp->bif_name, ifr->ifr_name) == 0) 1187 break; 1188 } 1189 1190 /* Not found. */ 1191 if (bp == NULL) 1192 return (ENXIO); 1193 1194 /* 1195 * Allocate the packet buffers if we need to. 1196 * If we're already attached to requested interface, 1197 * just flush the buffer. 1198 */ 1199 mtx_enter(&d->bd_mtx); 1200 if (d->bd_sbuf == NULL) { 1201 if ((error = bpf_allocbufs(d))) 1202 goto out; 1203 } 1204 if (bp != d->bd_bif) { 1205 /* 1206 * Detach if attached to something else. 1207 */ 1208 bpf_detachd(d); 1209 bpf_attachd(d, bp); 1210 } 1211 bpf_resetd(d); 1212 out: 1213 mtx_leave(&d->bd_mtx); 1214 return (error); 1215 } 1216 1217 /* 1218 * Copy the interface name to the ifreq. 1219 */ 1220 void 1221 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr) 1222 { 1223 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name)); 1224 } 1225 1226 const struct filterops bpfread_filtops = { 1227 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 1228 .f_attach = NULL, 1229 .f_detach = filt_bpfrdetach, 1230 .f_event = filt_bpfread, 1231 .f_modify = filt_bpfreadmodify, 1232 .f_process = filt_bpfreadprocess, 1233 }; 1234 1235 int 1236 bpfkqfilter(dev_t dev, struct knote *kn) 1237 { 1238 struct bpf_d *d; 1239 struct klist *klist; 1240 1241 KERNEL_ASSERT_LOCKED(); 1242 1243 d = bpfilter_lookup(minor(dev)); 1244 if (d == NULL) 1245 return (ENXIO); 1246 1247 switch (kn->kn_filter) { 1248 case EVFILT_READ: 1249 klist = &d->bd_klist; 1250 kn->kn_fop = &bpfread_filtops; 1251 break; 1252 default: 1253 return (EINVAL); 1254 } 1255 1256 bpf_get(d); 1257 kn->kn_hook = d; 1258 klist_insert(klist, kn); 1259 1260 return (0); 1261 } 1262 1263 void 1264 filt_bpfrdetach(struct knote *kn) 1265 { 1266 struct bpf_d *d = kn->kn_hook; 1267 1268 klist_remove(&d->bd_klist, kn); 1269 bpf_put(d); 1270 } 1271 1272 int 1273 filt_bpfread(struct knote *kn, long hint) 1274 { 1275 struct bpf_d *d = kn->kn_hook; 1276 1277 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1278 1279 kn->kn_data = d->bd_hlen; 1280 if (d->bd_state == BPF_S_DONE) 1281 kn->kn_data += d->bd_slen; 1282 1283 return (kn->kn_data > 0); 1284 } 1285 1286 int 1287 filt_bpfreadmodify(struct kevent *kev, struct knote *kn) 1288 { 1289 struct bpf_d *d = kn->kn_hook; 1290 int active; 1291 1292 mtx_enter(&d->bd_mtx); 1293 active = knote_modify_fn(kev, kn, filt_bpfread); 1294 mtx_leave(&d->bd_mtx); 1295 1296 return (active); 1297 } 1298 1299 int 1300 filt_bpfreadprocess(struct knote *kn, struct kevent *kev) 1301 { 1302 struct bpf_d *d = kn->kn_hook; 1303 int active; 1304 1305 mtx_enter(&d->bd_mtx); 1306 active = knote_process_fn(kn, kev, filt_bpfread); 1307 mtx_leave(&d->bd_mtx); 1308 1309 return (active); 1310 } 1311 1312 /* 1313 * Copy data from an mbuf chain into a buffer. This code is derived 1314 * from m_copydata in sys/uipc_mbuf.c. 1315 */ 1316 void 1317 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) 1318 { 1319 const struct mbuf *m; 1320 u_int count; 1321 u_char *dst; 1322 1323 m = src_arg; 1324 dst = dst_arg; 1325 while (len > 0) { 1326 if (m == NULL) 1327 panic("bpf_mcopy"); 1328 count = min(m->m_len, len); 1329 bcopy(mtod(m, caddr_t), (caddr_t)dst, count); 1330 m = m->m_next; 1331 dst += count; 1332 len -= count; 1333 } 1334 } 1335 1336 int 1337 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction) 1338 { 1339 return _bpf_mtap(arg, m, m, direction); 1340 } 1341 1342 int 1343 _bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m, 1344 u_int direction) 1345 { 1346 struct bpf_if *bp = (struct bpf_if *)arg; 1347 struct bpf_d *d; 1348 size_t pktlen, slen; 1349 const struct mbuf *m0; 1350 struct bpf_hdr tbh; 1351 int gothdr = 0; 1352 int drop = 0; 1353 1354 if (m == NULL) 1355 return (0); 1356 1357 if (bp == NULL) 1358 return (0); 1359 1360 pktlen = 0; 1361 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1362 pktlen += m0->m_len; 1363 1364 smr_read_enter(); 1365 SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1366 struct bpf_program_smr *bps; 1367 struct bpf_insn *fcode = NULL; 1368 1369 atomic_inc_long(&d->bd_rcount); 1370 1371 if (ISSET(d->bd_dirfilt, direction)) 1372 continue; 1373 1374 bps = SMR_PTR_GET(&d->bd_rfilter); 1375 if (bps != NULL) 1376 fcode = bps->bps_bf.bf_insns; 1377 slen = bpf_mfilter(fcode, m, pktlen); 1378 1379 if (slen == 0) 1380 continue; 1381 if (d->bd_fildrop != BPF_FILDROP_PASS) 1382 drop = 1; 1383 if (d->bd_fildrop != BPF_FILDROP_DROP) { 1384 if (!gothdr) { 1385 struct timeval tv; 1386 memset(&tbh, 0, sizeof(tbh)); 1387 1388 if (ISSET(mp->m_flags, M_PKTHDR)) { 1389 tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx; 1390 tbh.bh_flowid = mp->m_pkthdr.ph_flowid; 1391 tbh.bh_flags = mp->m_pkthdr.pf.prio; 1392 if (ISSET(mp->m_pkthdr.csum_flags, 1393 M_FLOWID)) 1394 SET(tbh.bh_flags, BPF_F_FLOWID); 1395 tbh.bh_csumflags = 1396 mp->m_pkthdr.csum_flags; 1397 1398 m_microtime(mp, &tv); 1399 } else 1400 microtime(&tv); 1401 1402 tbh.bh_tstamp.tv_sec = tv.tv_sec; 1403 tbh.bh_tstamp.tv_usec = tv.tv_usec; 1404 SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT); 1405 1406 gothdr = 1; 1407 } 1408 1409 mtx_enter(&d->bd_mtx); 1410 bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh); 1411 mtx_leave(&d->bd_mtx); 1412 } 1413 } 1414 smr_read_leave(); 1415 1416 return (drop); 1417 } 1418 1419 /* 1420 * Incoming linkage from device drivers, where a data buffer should be 1421 * prepended by an arbitrary header. In this situation we already have a 1422 * way of representing a chain of memory buffers, ie, mbufs, so reuse 1423 * the existing functionality by attaching the buffers to mbufs. 1424 * 1425 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a 1426 * struct m_hdr each for the header and data on the stack. 1427 */ 1428 int 1429 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen, 1430 const void *buf, unsigned int buflen, u_int direction) 1431 { 1432 struct m_hdr mh, md; 1433 struct mbuf *m0 = NULL; 1434 struct mbuf **mp = &m0; 1435 1436 if (hdr != NULL) { 1437 mh.mh_flags = 0; 1438 mh.mh_next = NULL; 1439 mh.mh_len = hdrlen; 1440 mh.mh_data = (void *)hdr; 1441 1442 *mp = (struct mbuf *)&mh; 1443 mp = &mh.mh_next; 1444 } 1445 1446 if (buf != NULL) { 1447 md.mh_flags = 0; 1448 md.mh_next = NULL; 1449 md.mh_len = buflen; 1450 md.mh_data = (void *)buf; 1451 1452 *mp = (struct mbuf *)&md; 1453 } 1454 1455 return bpf_mtap(arg, m0, direction); 1456 } 1457 1458 /* 1459 * Incoming linkage from device drivers, where we have a mbuf chain 1460 * but need to prepend some arbitrary header from a linear buffer. 1461 * 1462 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1463 * struct m_hdr on the stack. This is safe as bpf only reads from the 1464 * fields in this header that we initialize, and will not try to free 1465 * it or keep a pointer to it. 1466 */ 1467 int 1468 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m, 1469 u_int direction) 1470 { 1471 struct m_hdr mh; 1472 const struct mbuf *m0; 1473 1474 if (dlen > 0) { 1475 mh.mh_flags = 0; 1476 mh.mh_next = (struct mbuf *)m; 1477 mh.mh_len = dlen; 1478 mh.mh_data = (void *)data; 1479 m0 = (struct mbuf *)&mh; 1480 } else 1481 m0 = m; 1482 1483 return _bpf_mtap(arg, m, m0, direction); 1484 } 1485 1486 /* 1487 * Incoming linkage from device drivers, where we have a mbuf chain 1488 * but need to prepend the address family. 1489 * 1490 * Con up a minimal dummy header to pacify bpf. We allocate (only) a 1491 * struct m_hdr on the stack. This is safe as bpf only reads from the 1492 * fields in this header that we initialize, and will not try to free 1493 * it or keep a pointer to it. 1494 */ 1495 int 1496 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction) 1497 { 1498 u_int32_t afh; 1499 1500 afh = htonl(af); 1501 1502 return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction); 1503 } 1504 1505 /* 1506 * Incoming linkage from device drivers, where we have a mbuf chain 1507 * but need to prepend a VLAN encapsulation header. 1508 * 1509 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1510 * struct m_hdr on the stack. This is safe as bpf only reads from the 1511 * fields in this header that we initialize, and will not try to free 1512 * it or keep a pointer to it. 1513 */ 1514 int 1515 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction) 1516 { 1517 #if NVLAN > 0 1518 struct ether_vlan_header evh; 1519 struct m_hdr mh, md; 1520 1521 if ((m->m_flags & M_VLANTAG) == 0) 1522 #endif 1523 { 1524 return _bpf_mtap(arg, m, m, direction); 1525 } 1526 1527 #if NVLAN > 0 1528 KASSERT(m->m_len >= ETHER_HDR_LEN); 1529 1530 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN); 1531 evh.evl_proto = evh.evl_encap_proto; 1532 evh.evl_encap_proto = htons(ETHERTYPE_VLAN); 1533 evh.evl_tag = htons(m->m_pkthdr.ether_vtag); 1534 1535 mh.mh_flags = 0; 1536 mh.mh_data = (caddr_t)&evh; 1537 mh.mh_len = sizeof(evh); 1538 mh.mh_next = (struct mbuf *)&md; 1539 1540 md.mh_flags = 0; 1541 md.mh_data = m->m_data + ETHER_HDR_LEN; 1542 md.mh_len = m->m_len - ETHER_HDR_LEN; 1543 md.mh_next = m->m_next; 1544 1545 return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction); 1546 #endif 1547 } 1548 1549 /* 1550 * Move the packet data from interface memory (pkt) into the 1551 * store buffer. Wake up listeners if needed. 1552 * "copy" is the routine called to do the actual data 1553 * transfer. bcopy is passed in to copy contiguous chunks, while 1554 * bpf_mcopy is passed in to copy mbuf chains. In the latter case, 1555 * pkt is really an mbuf. 1556 */ 1557 void 1558 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, 1559 const struct bpf_hdr *tbh) 1560 { 1561 struct bpf_hdr *bh; 1562 int totlen, curlen; 1563 int hdrlen, do_wakeup = 0; 1564 1565 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1566 if (d->bd_bif == NULL) 1567 return; 1568 1569 hdrlen = d->bd_bif->bif_hdrlen; 1570 1571 /* 1572 * Figure out how many bytes to move. If the packet is 1573 * greater or equal to the snapshot length, transfer that 1574 * much. Otherwise, transfer the whole packet (unless 1575 * we hit the buffer size limit). 1576 */ 1577 totlen = hdrlen + min(snaplen, pktlen); 1578 if (totlen > d->bd_bufsize) 1579 totlen = d->bd_bufsize; 1580 1581 /* 1582 * Round up the end of the previous packet to the next longword. 1583 */ 1584 curlen = BPF_WORDALIGN(d->bd_slen); 1585 if (curlen + totlen > d->bd_bufsize) { 1586 /* 1587 * This packet will overflow the storage buffer. 1588 * Rotate the buffers if we can, then wakeup any 1589 * pending reads. 1590 */ 1591 if (d->bd_fbuf == NULL) { 1592 /* 1593 * We haven't completed the previous read yet, 1594 * so drop the packet. 1595 */ 1596 ++d->bd_dcount; 1597 return; 1598 } 1599 1600 /* cancel pending wtime */ 1601 if (timeout_del(&d->bd_wait_tmo)) 1602 bpf_put(d); 1603 1604 ROTATE_BUFFERS(d); 1605 do_wakeup = 1; 1606 curlen = 0; 1607 } 1608 1609 /* 1610 * Append the bpf header. 1611 */ 1612 bh = (struct bpf_hdr *)(d->bd_sbuf + curlen); 1613 *bh = *tbh; 1614 bh->bh_datalen = pktlen; 1615 bh->bh_hdrlen = hdrlen; 1616 bh->bh_caplen = totlen - hdrlen; 1617 1618 /* 1619 * Copy the packet data into the store buffer and update its length. 1620 */ 1621 bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen); 1622 d->bd_slen = curlen + totlen; 1623 1624 switch (d->bd_wtout) { 1625 case 0: 1626 /* 1627 * Immediate mode is set. A packet arrived so any 1628 * reads should be woken up. 1629 */ 1630 if (d->bd_state == BPF_S_IDLE) 1631 d->bd_state = BPF_S_DONE; 1632 do_wakeup = 1; 1633 break; 1634 case INFSLP: 1635 break; 1636 default: 1637 if (d->bd_state == BPF_S_IDLE) { 1638 d->bd_state = BPF_S_WAIT; 1639 1640 bpf_get(d); 1641 if (!timeout_add_nsec(&d->bd_wait_tmo, d->bd_wtout)) 1642 bpf_put(d); 1643 } 1644 break; 1645 } 1646 1647 if (do_wakeup) 1648 bpf_wakeup(d); 1649 } 1650 1651 /* 1652 * Initialize all nonzero fields of a descriptor. 1653 */ 1654 int 1655 bpf_allocbufs(struct bpf_d *d) 1656 { 1657 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1658 1659 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1660 if (d->bd_fbuf == NULL) 1661 return (ENOMEM); 1662 1663 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1664 if (d->bd_sbuf == NULL) { 1665 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); 1666 d->bd_fbuf = NULL; 1667 return (ENOMEM); 1668 } 1669 1670 d->bd_slen = 0; 1671 d->bd_hlen = 0; 1672 1673 return (0); 1674 } 1675 1676 void 1677 bpf_prog_smr(void *bps_arg) 1678 { 1679 struct bpf_program_smr *bps = bps_arg; 1680 1681 free(bps->bps_bf.bf_insns, M_DEVBUF, 1682 bps->bps_bf.bf_len * sizeof(struct bpf_insn)); 1683 free(bps, M_DEVBUF, sizeof(struct bpf_program_smr)); 1684 } 1685 1686 void 1687 bpf_d_smr(void *smr) 1688 { 1689 struct bpf_d *bd = smr; 1690 1691 sigio_free(&bd->bd_sigio); 1692 free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize); 1693 free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize); 1694 free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize); 1695 1696 if (bd->bd_rfilter != NULL) 1697 bpf_prog_smr(bd->bd_rfilter); 1698 if (bd->bd_wfilter != NULL) 1699 bpf_prog_smr(bd->bd_wfilter); 1700 1701 klist_free(&bd->bd_klist); 1702 free(bd, M_DEVBUF, sizeof(*bd)); 1703 } 1704 1705 void 1706 bpf_get(struct bpf_d *bd) 1707 { 1708 refcnt_take(&bd->bd_refcnt); 1709 } 1710 1711 /* 1712 * Free buffers currently in use by a descriptor 1713 * when the reference count drops to zero. 1714 */ 1715 void 1716 bpf_put(struct bpf_d *bd) 1717 { 1718 if (refcnt_rele(&bd->bd_refcnt) == 0) 1719 return; 1720 1721 smr_call(&bd->bd_smr, bpf_d_smr, bd); 1722 } 1723 1724 void * 1725 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen) 1726 { 1727 struct bpf_if *bp; 1728 1729 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL) 1730 panic("bpfattach"); 1731 SMR_SLIST_INIT(&bp->bif_dlist); 1732 bp->bif_driverp = (struct bpf_if **)bpfp; 1733 bp->bif_name = name; 1734 bp->bif_ifp = NULL; 1735 bp->bif_dlt = dlt; 1736 1737 TAILQ_INSERT_TAIL(&bpf_iflist, bp, bif_next); 1738 1739 *bp->bif_driverp = NULL; 1740 1741 /* 1742 * Compute the length of the bpf header. This is not necessarily 1743 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1744 * that the network layer header begins on a longword boundary (for 1745 * performance reasons and to alleviate alignment restrictions). 1746 */ 1747 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1748 1749 return (bp); 1750 } 1751 1752 void 1753 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen) 1754 { 1755 struct bpf_if *bp; 1756 1757 bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen); 1758 bp->bif_ifp = ifp; 1759 } 1760 1761 /* Detach an interface from its attached bpf device. */ 1762 void 1763 bpfdetach(struct ifnet *ifp) 1764 { 1765 struct bpf_if *bp, *nbp; 1766 1767 KERNEL_ASSERT_LOCKED(); 1768 1769 TAILQ_FOREACH_SAFE(bp, &bpf_iflist, bif_next, nbp) { 1770 if (bp->bif_ifp == ifp) 1771 bpfsdetach(bp); 1772 } 1773 ifp->if_bpf = NULL; 1774 } 1775 1776 void 1777 bpfsdetach(void *p) 1778 { 1779 struct bpf_if *bp = p; 1780 struct bpf_d *bd; 1781 int maj; 1782 1783 KERNEL_ASSERT_LOCKED(); 1784 1785 /* Locate the major number. */ 1786 for (maj = 0; maj < nchrdev; maj++) 1787 if (cdevsw[maj].d_open == bpfopen) 1788 break; 1789 1790 while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) { 1791 bpf_get(bd); 1792 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR); 1793 klist_invalidate(&bd->bd_klist); 1794 bpf_put(bd); 1795 } 1796 1797 TAILQ_REMOVE(&bpf_iflist, bp, bif_next); 1798 1799 free(bp, M_DEVBUF, sizeof(*bp)); 1800 } 1801 1802 int 1803 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 1804 size_t newlen) 1805 { 1806 if (namelen != 1) 1807 return (ENOTDIR); 1808 1809 switch (name[0]) { 1810 case NET_BPF_BUFSIZE: 1811 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1812 &bpf_bufsize, BPF_MINBUFSIZE, 1813 atomic_load_int(&bpf_maxbufsize)); 1814 case NET_BPF_MAXBUFSIZE: 1815 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1816 &bpf_maxbufsize, BPF_MINBUFSIZE, INT_MAX); 1817 default: 1818 return (EOPNOTSUPP); 1819 } 1820 1821 /* NOTREACHED */ 1822 } 1823 1824 struct bpf_d * 1825 bpfilter_lookup(int unit) 1826 { 1827 struct bpf_d *bd; 1828 1829 KERNEL_ASSERT_LOCKED(); 1830 1831 LIST_FOREACH(bd, &bpf_d_list, bd_list) 1832 if (bd->bd_unit == unit) 1833 return (bd); 1834 return (NULL); 1835 } 1836 1837 /* 1838 * Get a list of available data link type of the interface. 1839 */ 1840 int 1841 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1842 { 1843 int n, error; 1844 struct bpf_if *bp; 1845 const char *name; 1846 1847 name = d->bd_bif->bif_name; 1848 n = 0; 1849 error = 0; 1850 TAILQ_FOREACH(bp, &bpf_iflist, bif_next) { 1851 if (strcmp(name, bp->bif_name) != 0) 1852 continue; 1853 if (bfl->bfl_list != NULL) { 1854 if (n >= bfl->bfl_len) 1855 return (ENOMEM); 1856 error = copyout(&bp->bif_dlt, 1857 bfl->bfl_list + n, sizeof(u_int)); 1858 if (error) 1859 break; 1860 } 1861 n++; 1862 } 1863 1864 bfl->bfl_len = n; 1865 return (error); 1866 } 1867 1868 /* 1869 * Set the data link type of a BPF instance. 1870 */ 1871 int 1872 bpf_setdlt(struct bpf_d *d, u_int dlt) 1873 { 1874 const char *name; 1875 struct bpf_if *bp; 1876 1877 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1878 if (d->bd_bif->bif_dlt == dlt) 1879 return (0); 1880 name = d->bd_bif->bif_name; 1881 TAILQ_FOREACH(bp, &bpf_iflist, bif_next) { 1882 if (strcmp(name, bp->bif_name) != 0) 1883 continue; 1884 if (bp->bif_dlt == dlt) 1885 break; 1886 } 1887 if (bp == NULL) 1888 return (EINVAL); 1889 bpf_detachd(d); 1890 bpf_attachd(d, bp); 1891 bpf_resetd(d); 1892 return (0); 1893 } 1894 1895 u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *); 1896 u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *); 1897 u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *); 1898 1899 int bpf_mbuf_copy(const struct mbuf *, u_int32_t, 1900 void *, u_int32_t); 1901 1902 const struct bpf_ops bpf_mbuf_ops = { 1903 bpf_mbuf_ldw, 1904 bpf_mbuf_ldh, 1905 bpf_mbuf_ldb, 1906 }; 1907 1908 int 1909 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len) 1910 { 1911 u_int8_t *cp = buf; 1912 u_int32_t count; 1913 1914 while (off >= m->m_len) { 1915 off -= m->m_len; 1916 1917 m = m->m_next; 1918 if (m == NULL) 1919 return (-1); 1920 } 1921 1922 for (;;) { 1923 count = min(m->m_len - off, len); 1924 1925 memcpy(cp, m->m_data + off, count); 1926 len -= count; 1927 1928 if (len == 0) 1929 return (0); 1930 1931 m = m->m_next; 1932 if (m == NULL) 1933 break; 1934 1935 cp += count; 1936 off = 0; 1937 } 1938 1939 return (-1); 1940 } 1941 1942 u_int32_t 1943 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err) 1944 { 1945 u_int32_t v; 1946 1947 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1948 *err = 1; 1949 return (0); 1950 } 1951 1952 *err = 0; 1953 return ntohl(v); 1954 } 1955 1956 u_int32_t 1957 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err) 1958 { 1959 u_int16_t v; 1960 1961 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1962 *err = 1; 1963 return (0); 1964 } 1965 1966 *err = 0; 1967 return ntohs(v); 1968 } 1969 1970 u_int32_t 1971 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err) 1972 { 1973 const struct mbuf *m = m0; 1974 u_int8_t v; 1975 1976 while (k >= m->m_len) { 1977 k -= m->m_len; 1978 1979 m = m->m_next; 1980 if (m == NULL) { 1981 *err = 1; 1982 return (0); 1983 } 1984 } 1985 v = m->m_data[k]; 1986 1987 *err = 0; 1988 return v; 1989 } 1990 1991 u_int 1992 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen) 1993 { 1994 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen); 1995 } 1996